aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <David.Woodhouse@intel.com>2009-01-05 04:50:33 -0500
committerDavid Woodhouse <David.Woodhouse@intel.com>2009-01-05 04:50:33 -0500
commit353816f43d1fb340ff2d9a911dd5d0799c09f6a5 (patch)
tree517290fd884d286fe2971137ac89f89e3567785a /fs
parent160bbab3000dafccbe43688e48208cecf4deb879 (diff)
parentfe0bdec68b77020281dc814805edfe594ae89e0f (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: arch/arm/mach-pxa/corgi.c arch/arm/mach-pxa/poodle.c arch/arm/mach-pxa/spitz.c
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c6
-rw-r--r--fs/9p/v9fs.c2
-rw-r--r--fs/9p/vfs_dentry.c9
-rw-r--r--fs/9p/vfs_inode.c10
-rw-r--r--fs/9p/vfs_super.c4
-rw-r--r--fs/Kconfig39
-rw-r--r--fs/Makefile5
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/affs/inode.c4
-rw-r--r--fs/affs/super.c4
-rw-r--r--fs/afs/proc.c4
-rw-r--r--fs/afs/server.c9
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/aio.c100
-rw-r--r--fs/anon_inodes.c11
-rw-r--r--fs/attr.c4
-rw-r--r--fs/autofs/inode.c4
-rw-r--r--fs/autofs4/dev-ioctl.c3
-rw-r--r--fs/autofs4/inode.c4
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/bad_inode.c6
-rw-r--r--fs/befs/linuxvfs.c5
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/binfmt_aout.c83
-rw-r--r--fs/binfmt_elf.c22
-rw-r--r--fs/binfmt_elf_fdpic.c19
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/binfmt_som.c2
-rw-r--r--fs/bio-integrity.c2
-rw-r--r--fs/bio.c325
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/buffer.c23
-rw-r--r--fs/cifs/AUTHORS2
-rw-r--r--fs/cifs/CHANGES9
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README12
-rw-r--r--fs/cifs/cifs_dfs_ref.c48
-rw-r--r--fs/cifs/cifs_fs_sb.h7
-rw-r--r--fs/cifs/cifs_spnego.c6
-rw-r--r--fs/cifs/cifsencrypt.c30
-rw-r--r--fs/cifs/cifsencrypt.h3
-rw-r--r--fs/cifs/cifsfs.c82
-rw-r--r--fs/cifs/cifsfs.h3
-rw-r--r--fs/cifs/cifsglob.h15
-rw-r--r--fs/cifs/cifspdu.h2
-rw-r--r--fs/cifs/cifsproto.h5
-rw-r--r--fs/cifs/cifssmb.c52
-rw-r--r--fs/cifs/connect.c685
-rw-r--r--fs/cifs/dir.c21
-rw-r--r--fs/cifs/fcntl.c118
-rw-r--r--fs/cifs/file.c27
-rw-r--r--fs/cifs/inode.c66
-rw-r--r--fs/cifs/ioctl.c2
-rw-r--r--fs/cifs/misc.c13
-rw-r--r--fs/cifs/sess.c5
-rw-r--r--fs/cifs/smbdes.c5
-rw-r--r--fs/cifs/smbencrypt.c9
-rw-r--r--fs/cifs/transport.c378
-rw-r--r--fs/coda/cache.c6
-rw-r--r--fs/coda/file.c3
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat.c42
-rw-r--r--fs/dcache.c25
-rw-r--r--fs/dcookies.c28
-rw-r--r--fs/devpts/inode.c472
-rw-r--r--fs/dlm/netlink.c2
-rw-r--r--fs/dquot.c4
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h3
-rw-r--r--fs/ecryptfs/inode.c3
-rw-r--r--fs/ecryptfs/kthread.c9
-rw-r--r--fs/ecryptfs/main.c3
-rw-r--r--fs/ecryptfs/messaging.c27
-rw-r--r--fs/ecryptfs/miscdev.c27
-rw-r--r--fs/ecryptfs/mmap.c2
-rw-r--r--fs/exec.c245
-rw-r--r--fs/exportfs/expfs.c4
-rw-r--r--fs/ext2/balloc.c2
-rw-r--r--fs/ext2/ialloc.c10
-rw-r--r--fs/ext2/inode.c7
-rw-r--r--fs/ext2/namei.c15
-rw-r--r--fs/ext3/balloc.c2
-rw-r--r--fs/ext3/ialloc.c10
-rw-r--r--fs/ext3/inode.c9
-rw-r--r--fs/ext3/namei.c18
-rw-r--r--fs/ext4/balloc.c6
-rw-r--r--fs/ext4/ext4_sb.h6
-rw-r--r--fs/ext4/ialloc.c10
-rw-r--r--fs/ext4/inode.c11
-rw-r--r--fs/ext4/namei.c17
-rw-r--r--fs/ext4/super.c8
-rw-r--r--fs/fat/dir.c1
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fat/inode.c6
-rw-r--r--fs/fat/namei_vfat.c2
-rw-r--r--fs/fcntl.c18
-rw-r--r--fs/file_table.c20
-rw-r--r--fs/freevxfs/vxfs_inode.c4
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c23
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/gfs2/inode.c10
-rw-r--r--fs/gfs2/ops_address.c2
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/super.c4
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/options.c4
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/namei.c24
-rw-r--r--fs/hpfs/super.c4
-rw-r--r--fs/hppfs/hppfs.c6
-rw-r--r--fs/hugetlbfs/inode.c21
-rw-r--r--fs/inode.c268
-rw-r--r--fs/internal.h6
-rw-r--r--fs/ioprio.c18
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jfs/inode.c8
-rw-r--r--fs/jfs/jfs_inode.c33
-rw-r--r--fs/jfs/namei.c24
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/lockd/clntlock.c23
-rw-r--r--fs/lockd/host.c18
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svc.c6
-rw-r--r--fs/locks.c2
-rw-r--r--fs/minix/bitmap.c4
-rw-r--r--fs/namei.c138
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/ncpfs/ioctl.c91
-rw-r--r--fs/nfs/callback.c36
-rw-r--r--fs/nfs/client.c95
-rw-r--r--fs/nfs/delegation.c260
-rw-r--r--fs/nfs/delegation.h33
-rw-r--r--fs/nfs/dir.c24
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/internal.h14
-rw-r--r--fs/nfs/mount_clnt.c34
-rw-r--r--fs/nfs/nfs4_fs.h32
-rw-r--r--fs/nfs/nfs4proc.c431
-rw-r--r--fs/nfs/nfs4renewd.c22
-rw-r--r--fs/nfs/nfs4state.c415
-rw-r--r--fs/nfs/nfs4xdr.c1235
-rw-r--r--fs/nfs/nfsroot.c33
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/super.c50
-rw-r--r--fs/nfs_common/nfsacl.c4
-rw-r--r--fs/nfsctl.c10
-rw-r--r--fs/nfsd/auth.c95
-rw-r--r--fs/nfsd/nfs4callback.c9
-rw-r--r--fs/nfsd/nfs4recover.c72
-rw-r--r--fs/nfsd/nfs4state.c16
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/nfsfh.c11
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/notify/Kconfig2
-rw-r--r--fs/notify/Makefile2
-rw-r--r--fs/notify/dnotify/Kconfig10
-rw-r--r--fs/notify/dnotify/Makefile1
-rw-r--r--fs/notify/dnotify/dnotify.c (renamed from fs/dnotify.c)3
-rw-r--r--fs/notify/inotify/Kconfig27
-rw-r--r--fs/notify/inotify/Makefile2
-rw-r--r--fs/notify/inotify/inotify.c (renamed from fs/inotify.c)2
-rw-r--r--fs/notify/inotify/inotify_user.c (renamed from fs/inotify_user.c)4
-rw-r--r--fs/ocfs2/cluster/netdebug.c8
-rw-r--r--fs/ocfs2/cluster/nodemanager.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c29
-rw-r--r--fs/ocfs2/dlm/dlmfs.c8
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/ocfs2_fs.h8
-rw-r--r--fs/ocfs2/xattr.c4
-rw-r--r--fs/omfs/inode.c8
-rw-r--r--fs/open.c64
-rw-r--r--fs/pipe.c11
-rw-r--r--fs/posix_acl.c4
-rw-r--r--fs/proc/array.c32
-rw-r--r--fs/proc/base.c38
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/stat.c7
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/quota.c4
-rw-r--r--fs/ramfs/inode.c4
-rw-r--r--fs/reiserfs/inode.c17
-rw-r--r--fs/reiserfs/namei.c12
-rw-r--r--fs/seq_file.c27
-rw-r--r--fs/smbfs/dir.c3
-rw-r--r--fs/smbfs/file.c2
-rw-r--r--fs/smbfs/inode.c2
-rw-r--r--fs/smbfs/proc.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysv/ialloc.c4
-rw-r--r--fs/sysv/inode.c6
-rw-r--r--fs/ubifs/budget.c210
-rw-r--r--fs/ubifs/commit.c25
-rw-r--r--fs/ubifs/compress.c18
-rw-r--r--fs/ubifs/debug.c265
-rw-r--r--fs/ubifs/debug.h117
-rw-r--r--fs/ubifs/dir.c4
-rw-r--r--fs/ubifs/file.c17
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/journal.c4
-rw-r--r--fs/ubifs/key.h32
-rw-r--r--fs/ubifs/lprops.c14
-rw-r--r--fs/ubifs/lpt.c45
-rw-r--r--fs/ubifs/lpt_commit.c210
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/replay.c15
-rw-r--r--fs/ubifs/sb.c20
-rw-r--r--fs/ubifs/super.c255
-rw-r--r--fs/ubifs/tnc.c31
-rw-r--r--fs/ubifs/tnc_commit.c9
-rw-r--r--fs/ubifs/ubifs-media.h7
-rw-r--r--fs/ubifs/ubifs.h111
-rw-r--r--fs/udf/ialloc.c4
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/ufs/ialloc.c4
-rw-r--r--fs/xfs/Makefile6
-rw-r--r--fs/xfs/linux-2.6/sv.h22
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c66
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c87
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h30
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c189
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c226
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.h82
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c849
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h214
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c122
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h13
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h65
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c884
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c762
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h55
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h77
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c145
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h72
-rw-r--r--fs/xfs/quota/xfs_dquot.c39
-rw-r--r--fs/xfs/quota/xfs_dquot.h4
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c45
-rw-r--r--fs/xfs/quota/xfs_qm.c57
-rw-r--r--fs/xfs/quota/xfs_qm.h3
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c5
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c151
-rw-r--r--fs/xfs/support/debug.c39
-rw-r--r--fs/xfs/support/debug.h2
-rw-r--r--fs/xfs/support/ktrace.c9
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c8
-rw-r--r--fs/xfs/xfs_ag.h15
-rw-r--r--fs/xfs/xfs_alloc.c264
-rw-r--r--fs/xfs/xfs_alloc.h27
-rw-r--r--fs/xfs/xfs_alloc_btree.c2387
-rw-r--r--fs/xfs/xfs_alloc_btree.h107
-rw-r--r--fs/xfs/xfs_arch.h39
-rw-r--r--fs/xfs/xfs_bit.h3
-rw-r--r--fs/xfs/xfs_bmap.c410
-rw-r--r--fs/xfs/xfs_bmap.h72
-rw-r--r--fs/xfs/xfs_bmap_btree.c2617
-rw-r--r--fs/xfs/xfs_bmap_btree.h171
-rw-r--r--fs/xfs/xfs_btree.c3596
-rw-r--r--fs/xfs/xfs_btree.h392
-rw-r--r--fs/xfs/xfs_btree_trace.c249
-rw-r--r--fs/xfs/xfs_btree_trace.h116
-rw-r--r--fs/xfs/xfs_buf_item.c45
-rw-r--r--fs/xfs/xfs_clnt.h105
-rw-r--r--fs/xfs/xfs_da_btree.h24
-rw-r--r--fs/xfs/xfs_dfrag.c8
-rw-r--r--fs/xfs/xfs_dfrag.h2
-rw-r--r--fs/xfs/xfs_dinode.h148
-rw-r--r--fs/xfs/xfs_dir2_sf.h7
-rw-r--r--fs/xfs/xfs_dmops.c5
-rw-r--r--fs/xfs/xfs_error.c15
-rw-r--r--fs/xfs/xfs_error.h12
-rw-r--r--fs/xfs/xfs_extfree_item.c45
-rw-r--r--fs/xfs/xfs_fs.h22
-rw-r--r--fs/xfs/xfs_fsops.c30
-rw-r--r--fs/xfs/xfs_ialloc.c449
-rw-r--r--fs/xfs/xfs_ialloc.h31
-rw-r--r--fs/xfs/xfs_ialloc_btree.c2193
-rw-r--r--fs/xfs/xfs_ialloc_btree.h111
-rw-r--r--fs/xfs/xfs_iget.c735
-rw-r--r--fs/xfs/xfs_imap.h40
-rw-r--r--fs/xfs/xfs_inode.c587
-rw-r--r--fs/xfs/xfs_inode.h377
-rw-r--r--fs/xfs/xfs_inode_item.c45
-rw-r--r--fs/xfs/xfs_inode_item.h41
-rw-r--r--fs/xfs/xfs_iomap.c28
-rw-r--r--fs/xfs/xfs_itable.c102
-rw-r--r--fs/xfs/xfs_itable.h14
-rw-r--r--fs/xfs/xfs_log.c81
-rw-r--r--fs/xfs/xfs_log.h4
-rw-r--r--fs/xfs/xfs_log_priv.h48
-rw-r--r--fs/xfs/xfs_log_recover.c416
-rw-r--r--fs/xfs/xfs_mount.c81
-rw-r--r--fs/xfs/xfs_mount.h73
-rw-r--r--fs/xfs/xfs_qmops.c5
-rw-r--r--fs/xfs/xfs_quota.h8
-rw-r--r--fs/xfs/xfs_rename.c61
-rw-r--r--fs/xfs/xfs_rtalloc.c41
-rw-r--r--fs/xfs/xfs_rw.c2
-rw-r--r--fs/xfs/xfs_sb.h167
-rw-r--r--fs/xfs/xfs_trans.c22
-rw-r--r--fs/xfs/xfs_trans.h322
-rw-r--r--fs/xfs/xfs_trans_ail.c362
-rw-r--r--fs/xfs/xfs_trans_buf.c7
-rw-r--r--fs/xfs/xfs_trans_inode.c30
-rw-r--r--fs/xfs/xfs_trans_item.c10
-rw-r--r--fs/xfs/xfs_trans_priv.h98
-rw-r--r--fs/xfs/xfs_utils.c12
-rw-r--r--fs/xfs/xfs_vfsops.c757
-rw-r--r--fs/xfs/xfs_vfsops.h16
-rw-r--r--fs/xfs/xfs_vnodeops.c354
-rw-r--r--fs/xfs/xfs_vnodeops.h16
323 files changed, 15676 insertions, 16420 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 3031e3233dd..14d94420457 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -45,7 +45,7 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
45 struct v9fs_dentry *dent; 45 struct v9fs_dentry *dent;
46 46
47 P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n", 47 P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n",
48 fid->fid, dentry->d_iname); 48 fid->fid, dentry->d_name.name);
49 49
50 dent = dentry->d_fsdata; 50 dent = dentry->d_fsdata;
51 if (!dent) { 51 if (!dent) {
@@ -79,7 +79,7 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any)
79 struct p9_fid *fid, *ret; 79 struct p9_fid *fid, *ret;
80 80
81 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", 81 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n",
82 dentry->d_iname, dentry, uid, any); 82 dentry->d_name.name, dentry, uid, any);
83 dent = (struct v9fs_dentry *) dentry->d_fsdata; 83 dent = (struct v9fs_dentry *) dentry->d_fsdata;
84 ret = NULL; 84 ret = NULL;
85 if (dent) { 85 if (dent) {
@@ -120,7 +120,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
120 switch (access) { 120 switch (access) {
121 case V9FS_ACCESS_SINGLE: 121 case V9FS_ACCESS_SINGLE:
122 case V9FS_ACCESS_USER: 122 case V9FS_ACCESS_USER:
123 uid = current->fsuid; 123 uid = current_fsuid();
124 any = 0; 124 any = 0;
125 break; 125 break;
126 126
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 24eb01087b6..332b5ff02fe 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -160,7 +160,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses)
160 v9ses->flags |= V9FS_ACCESS_ANY; 160 v9ses->flags |= V9FS_ACCESS_ANY;
161 else { 161 else {
162 v9ses->flags |= V9FS_ACCESS_SINGLE; 162 v9ses->flags |= V9FS_ACCESS_SINGLE;
163 v9ses->uid = simple_strtol(s, &e, 10); 163 v9ses->uid = simple_strtoul(s, &e, 10);
164 if (*e != '\0') 164 if (*e != '\0')
165 v9ses->uid = ~0; 165 v9ses->uid = ~0;
166 } 166 }
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index f9534f18df0..06dcc7c4f23 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -52,7 +52,8 @@
52 52
53static int v9fs_dentry_delete(struct dentry *dentry) 53static int v9fs_dentry_delete(struct dentry *dentry)
54{ 54{
55 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 55 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
56 dentry);
56 57
57 return 1; 58 return 1;
58} 59}
@@ -69,7 +70,8 @@ static int v9fs_dentry_delete(struct dentry *dentry)
69static int v9fs_cached_dentry_delete(struct dentry *dentry) 70static int v9fs_cached_dentry_delete(struct dentry *dentry)
70{ 71{
71 struct inode *inode = dentry->d_inode; 72 struct inode *inode = dentry->d_inode;
72 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 73 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
74 dentry);
73 75
74 if(!inode) 76 if(!inode)
75 return 1; 77 return 1;
@@ -88,7 +90,8 @@ void v9fs_dentry_release(struct dentry *dentry)
88 struct v9fs_dentry *dent; 90 struct v9fs_dentry *dent;
89 struct p9_fid *temp, *current_fid; 91 struct p9_fid *temp, *current_fid;
90 92
91 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 93 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
94 dentry);
92 dent = dentry->d_fsdata; 95 dent = dentry->d_fsdata;
93 if (dent) { 96 if (dent) {
94 list_for_each_entry_safe(current_fid, temp, &dent->fidlist, 97 list_for_each_entry_safe(current_fid, temp, &dent->fidlist,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8314d3f43b7..81f8bbf12f9 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -215,8 +215,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
215 inode = new_inode(sb); 215 inode = new_inode(sb);
216 if (inode) { 216 if (inode) {
217 inode->i_mode = mode; 217 inode->i_mode = mode;
218 inode->i_uid = current->fsuid; 218 inode->i_uid = current_fsuid();
219 inode->i_gid = current->fsgid; 219 inode->i_gid = current_fsgid();
220 inode->i_blocks = 0; 220 inode->i_blocks = 0;
221 inode->i_rdev = 0; 221 inode->i_rdev = 0;
222 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 222 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -963,7 +963,8 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
963 if (buflen > PATH_MAX) 963 if (buflen > PATH_MAX)
964 buflen = PATH_MAX; 964 buflen = PATH_MAX;
965 965
966 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 966 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
967 dentry);
967 968
968 retval = v9fs_readlink(dentry, link, buflen); 969 retval = v9fs_readlink(dentry, link, buflen);
969 970
@@ -1022,7 +1023,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1022{ 1023{
1023 char *s = nd_get_link(nd); 1024 char *s = nd_get_link(nd);
1024 1025
1025 P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); 1026 P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name,
1027 IS_ERR(s) ? "<error>" : s);
1026 if (!IS_ERR(s)) 1028 if (!IS_ERR(s))
1027 __putname(s); 1029 __putname(s);
1028} 1030}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index d6cb1a0ca72..93212e40221 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -113,8 +113,8 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
113 struct v9fs_session_info *v9ses = NULL; 113 struct v9fs_session_info *v9ses = NULL;
114 struct p9_wstat *st = NULL; 114 struct p9_wstat *st = NULL;
115 int mode = S_IRWXUGO | S_ISVTX; 115 int mode = S_IRWXUGO | S_ISVTX;
116 uid_t uid = current->fsuid; 116 uid_t uid = current_fsuid();
117 gid_t gid = current->fsgid; 117 gid_t gid = current_fsgid();
118 struct p9_fid *fid; 118 struct p9_fid *fid;
119 int retval = 0; 119 int retval = 0;
120 120
diff --git a/fs/Kconfig b/fs/Kconfig
index 522469a7eca..ff0e8198020 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -270,44 +270,7 @@ config OCFS2_COMPAT_JBD
270 270
271endif # BLOCK 271endif # BLOCK
272 272
273config DNOTIFY 273source "fs/notify/Kconfig"
274 bool "Dnotify support"
275 default y
276 help
277 Dnotify is a directory-based per-fd file change notification system
278 that uses signals to communicate events to user-space. There exist
279 superior alternatives, but some applications may still rely on
280 dnotify.
281
282 If unsure, say Y.
283
284config INOTIFY
285 bool "Inotify file change notification support"
286 default y
287 ---help---
288 Say Y here to enable inotify support. Inotify is a file change
289 notification system and a replacement for dnotify. Inotify fixes
290 numerous shortcomings in dnotify and introduces several new features
291 including multiple file events, one-shot support, and unmount
292 notification.
293
294 For more information, see <file:Documentation/filesystems/inotify.txt>
295
296 If unsure, say Y.
297
298config INOTIFY_USER
299 bool "Inotify support for userspace"
300 depends on INOTIFY
301 default y
302 ---help---
303 Say Y here to enable inotify support for userspace, including the
304 associated system calls. Inotify allows monitoring of both files and
305 directories via a single open fd. Events are read from the file
306 descriptor, which is also select()- and poll()-able.
307
308 For more information, see <file:Documentation/filesystems/inotify.txt>
309
310 If unsure, say Y.
311 274
312config QUOTA 275config QUOTA
313 bool "Quota support" 276 bool "Quota support"
diff --git a/fs/Makefile b/fs/Makefile
index d9f8afe6f0c..e6f423d1d22 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -20,8 +20,7 @@ obj-y += no-block.o
20endif 20endif
21 21
22obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o 22obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
23obj-$(CONFIG_INOTIFY) += inotify.o 23obj-y += notify/
24obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
25obj-$(CONFIG_EPOLL) += eventpoll.o 24obj-$(CONFIG_EPOLL) += eventpoll.o
26obj-$(CONFIG_ANON_INODES) += anon_inodes.o 25obj-$(CONFIG_ANON_INODES) += anon_inodes.o
27obj-$(CONFIG_SIGNALFD) += signalfd.o 26obj-$(CONFIG_SIGNALFD) += signalfd.o
@@ -57,8 +56,6 @@ obj-$(CONFIG_QFMT_V1) += quota_v1.o
57obj-$(CONFIG_QFMT_V2) += quota_v2.o 56obj-$(CONFIG_QFMT_V2) += quota_v2.o
58obj-$(CONFIG_QUOTACTL) += quota.o 57obj-$(CONFIG_QUOTACTL) += quota.o
59 58
60obj-$(CONFIG_DNOTIFY) += dnotify.o
61
62obj-$(CONFIG_PROC_FS) += proc/ 59obj-$(CONFIG_PROC_FS) += proc/
63obj-y += partitions/ 60obj-y += partitions/
64obj-$(CONFIG_SYSFS) += sysfs/ 61obj-$(CONFIG_SYSFS) += sysfs/
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 1377b1240b6..9246cb4aa01 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -628,7 +628,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
628 } 628 }
629 629
630 index = pos >> PAGE_CACHE_SHIFT; 630 index = pos >> PAGE_CACHE_SHIFT;
631 page = __grab_cache_page(mapping, index); 631 page = grab_cache_page_write_begin(mapping, index, flags);
632 if (!page) 632 if (!page)
633 return -ENOMEM; 633 return -ENOMEM;
634 *pagep = page; 634 *pagep = page;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index a13b334a391..415d9c67ac1 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -293,8 +293,8 @@ affs_new_inode(struct inode *dir)
293 mark_buffer_dirty_inode(bh, inode); 293 mark_buffer_dirty_inode(bh, inode);
294 affs_brelse(bh); 294 affs_brelse(bh);
295 295
296 inode->i_uid = current->fsuid; 296 inode->i_uid = current_fsuid();
297 inode->i_gid = current->fsgid; 297 inode->i_gid = current_fsgid();
298 inode->i_ino = block; 298 inode->i_ino = block;
299 inode->i_nlink = 1; 299 inode->i_nlink = 1;
300 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 300 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 8989c93193e..a19d64b582a 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -163,8 +163,8 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
163 163
164 /* Fill in defaults */ 164 /* Fill in defaults */
165 165
166 *uid = current->uid; 166 *uid = current_uid();
167 *gid = current->gid; 167 *gid = current_gid();
168 *reserved = 2; 168 *reserved = 2;
169 *root = -1; 169 *root = -1;
170 *blocksize = -1; 170 *blocksize = -1;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 9f7d1ae7026..7578c1ab9e0 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -646,7 +646,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
646 } 646 }
647 647
648 /* display one cell per line on subsequent lines */ 648 /* display one cell per line on subsequent lines */
649 seq_printf(m, "%u.%u.%u.%u\n", NIPQUAD(addr->s_addr)); 649 seq_printf(m, "%pI4\n", &addr->s_addr);
650 return 0; 650 return 0;
651} 651}
652 652
@@ -737,7 +737,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
737 } 737 }
738 738
739 /* display one cell per line on subsequent lines */ 739 /* display one cell per line on subsequent lines */
740 sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(server->addr)); 740 sprintf(ipaddr, "%pI4", &server->addr);
741 seq_printf(m, "%3d %-15.15s %5d\n", 741 seq_printf(m, "%3d %-15.15s %5d\n",
742 atomic_read(&server->usage), ipaddr, server->fs_state); 742 atomic_read(&server->usage), ipaddr, server->fs_state);
743 743
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 28f2451419e..f4909951667 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -105,7 +105,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell,
105{ 105{
106 struct afs_server *server, *candidate; 106 struct afs_server *server, *candidate;
107 107
108 _enter("%p,"NIPQUAD_FMT, cell, NIPQUAD(addr->s_addr)); 108 _enter("%p,%pI4", cell, &addr->s_addr);
109 109
110 /* quick scan of the list to see if we already have the server */ 110 /* quick scan of the list to see if we already have the server */
111 read_lock(&cell->servers_lock); 111 read_lock(&cell->servers_lock);
@@ -168,9 +168,8 @@ found_server:
168server_in_two_cells: 168server_in_two_cells:
169 write_unlock(&cell->servers_lock); 169 write_unlock(&cell->servers_lock);
170 kfree(candidate); 170 kfree(candidate);
171 printk(KERN_NOTICE "kAFS:" 171 printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n",
172 " Server "NIPQUAD_FMT" appears to be in two cells\n", 172 addr);
173 NIPQUAD(*addr));
174 _leave(" = -EEXIST"); 173 _leave(" = -EEXIST");
175 return ERR_PTR(-EEXIST); 174 return ERR_PTR(-EEXIST);
176} 175}
@@ -184,7 +183,7 @@ struct afs_server *afs_find_server(const struct in_addr *_addr)
184 struct rb_node *p; 183 struct rb_node *p;
185 struct in_addr addr = *_addr; 184 struct in_addr addr = *_addr;
186 185
187 _enter(NIPQUAD_FMT, NIPQUAD(addr.s_addr)); 186 _enter("%pI4", &addr.s_addr);
188 187
189 read_lock(&afs_servers_lock); 188 read_lock(&afs_servers_lock);
190 189
diff --git a/fs/afs/write.c b/fs/afs/write.c
index d6b85dab35f..3fb36d43362 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -144,7 +144,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
144 candidate->state = AFS_WBACK_PENDING; 144 candidate->state = AFS_WBACK_PENDING;
145 init_waitqueue_head(&candidate->waitq); 145 init_waitqueue_head(&candidate->waitq);
146 146
147 page = __grab_cache_page(mapping, index); 147 page = grab_cache_page_write_begin(mapping, index, flags);
148 if (!page) { 148 if (!page) {
149 kfree(candidate); 149 kfree(candidate);
150 return -ENOMEM; 150 return -ENOMEM;
diff --git a/fs/aio.c b/fs/aio.c
index f658441d566..d6f89d3c15e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,20 @@ static int aio_setup_ring(struct kioctx *ctx)
191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ 191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
192} while(0) 192} while(0)
193 193
194static void ctx_rcu_free(struct rcu_head *head)
195{
196 struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
197 unsigned nr_events = ctx->max_reqs;
198
199 kmem_cache_free(kioctx_cachep, ctx);
200
201 if (nr_events) {
202 spin_lock(&aio_nr_lock);
203 BUG_ON(aio_nr - nr_events > aio_nr);
204 aio_nr -= nr_events;
205 spin_unlock(&aio_nr_lock);
206 }
207}
194 208
195/* __put_ioctx 209/* __put_ioctx
196 * Called when the last user of an aio context has gone away, 210 * Called when the last user of an aio context has gone away,
@@ -198,8 +212,6 @@ static int aio_setup_ring(struct kioctx *ctx)
198 */ 212 */
199static void __put_ioctx(struct kioctx *ctx) 213static void __put_ioctx(struct kioctx *ctx)
200{ 214{
201 unsigned nr_events = ctx->max_reqs;
202
203 BUG_ON(ctx->reqs_active); 215 BUG_ON(ctx->reqs_active);
204 216
205 cancel_delayed_work(&ctx->wq); 217 cancel_delayed_work(&ctx->wq);
@@ -208,14 +220,7 @@ static void __put_ioctx(struct kioctx *ctx)
208 mmdrop(ctx->mm); 220 mmdrop(ctx->mm);
209 ctx->mm = NULL; 221 ctx->mm = NULL;
210 pr_debug("__put_ioctx: freeing %p\n", ctx); 222 pr_debug("__put_ioctx: freeing %p\n", ctx);
211 kmem_cache_free(kioctx_cachep, ctx); 223 call_rcu(&ctx->rcu_head, ctx_rcu_free);
212
213 if (nr_events) {
214 spin_lock(&aio_nr_lock);
215 BUG_ON(aio_nr - nr_events > aio_nr);
216 aio_nr -= nr_events;
217 spin_unlock(&aio_nr_lock);
218 }
219} 224}
220 225
221#define get_ioctx(kioctx) do { \ 226#define get_ioctx(kioctx) do { \
@@ -235,6 +240,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
235{ 240{
236 struct mm_struct *mm; 241 struct mm_struct *mm;
237 struct kioctx *ctx; 242 struct kioctx *ctx;
243 int did_sync = 0;
238 244
239 /* Prevent overflows */ 245 /* Prevent overflows */
240 if ((nr_events > (0x10000000U / sizeof(struct io_event))) || 246 if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -267,21 +273,30 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
267 goto out_freectx; 273 goto out_freectx;
268 274
269 /* limit the number of system wide aios */ 275 /* limit the number of system wide aios */
270 spin_lock(&aio_nr_lock); 276 do {
271 if (aio_nr + ctx->max_reqs > aio_max_nr || 277 spin_lock_bh(&aio_nr_lock);
272 aio_nr + ctx->max_reqs < aio_nr) 278 if (aio_nr + nr_events > aio_max_nr ||
273 ctx->max_reqs = 0; 279 aio_nr + nr_events < aio_nr)
274 else 280 ctx->max_reqs = 0;
275 aio_nr += ctx->max_reqs; 281 else
276 spin_unlock(&aio_nr_lock); 282 aio_nr += ctx->max_reqs;
283 spin_unlock_bh(&aio_nr_lock);
284 if (ctx->max_reqs || did_sync)
285 break;
286
287 /* wait for rcu callbacks to have completed before giving up */
288 synchronize_rcu();
289 did_sync = 1;
290 ctx->max_reqs = nr_events;
291 } while (1);
292
277 if (ctx->max_reqs == 0) 293 if (ctx->max_reqs == 0)
278 goto out_cleanup; 294 goto out_cleanup;
279 295
280 /* now link into global list. */ 296 /* now link into global list. */
281 write_lock(&mm->ioctx_list_lock); 297 spin_lock(&mm->ioctx_lock);
282 ctx->next = mm->ioctx_list; 298 hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
283 mm->ioctx_list = ctx; 299 spin_unlock(&mm->ioctx_lock);
284 write_unlock(&mm->ioctx_list_lock);
285 300
286 dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", 301 dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
287 ctx, ctx->user_id, current->mm, ctx->ring_info.nr); 302 ctx, ctx->user_id, current->mm, ctx->ring_info.nr);
@@ -375,11 +390,12 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
375 */ 390 */
376void exit_aio(struct mm_struct *mm) 391void exit_aio(struct mm_struct *mm)
377{ 392{
378 struct kioctx *ctx = mm->ioctx_list; 393 struct kioctx *ctx;
379 mm->ioctx_list = NULL; 394
380 while (ctx) { 395 while (!hlist_empty(&mm->ioctx_list)) {
381 struct kioctx *next = ctx->next; 396 ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
382 ctx->next = NULL; 397 hlist_del_rcu(&ctx->list);
398
383 aio_cancel_all(ctx); 399 aio_cancel_all(ctx);
384 400
385 wait_for_all_aios(ctx); 401 wait_for_all_aios(ctx);
@@ -394,7 +410,6 @@ void exit_aio(struct mm_struct *mm)
394 atomic_read(&ctx->users), ctx->dead, 410 atomic_read(&ctx->users), ctx->dead,
395 ctx->reqs_active); 411 ctx->reqs_active);
396 put_ioctx(ctx); 412 put_ioctx(ctx);
397 ctx = next;
398 } 413 }
399} 414}
400 415
@@ -555,19 +570,21 @@ int aio_put_req(struct kiocb *req)
555 570
556static struct kioctx *lookup_ioctx(unsigned long ctx_id) 571static struct kioctx *lookup_ioctx(unsigned long ctx_id)
557{ 572{
558 struct kioctx *ioctx; 573 struct mm_struct *mm = current->mm;
559 struct mm_struct *mm; 574 struct kioctx *ctx = NULL;
575 struct hlist_node *n;
560 576
561 mm = current->mm; 577 rcu_read_lock();
562 read_lock(&mm->ioctx_list_lock); 578
563 for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next) 579 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
564 if (likely(ioctx->user_id == ctx_id && !ioctx->dead)) { 580 if (ctx->user_id == ctx_id && !ctx->dead) {
565 get_ioctx(ioctx); 581 get_ioctx(ctx);
566 break; 582 break;
567 } 583 }
568 read_unlock(&mm->ioctx_list_lock); 584 }
569 585
570 return ioctx; 586 rcu_read_unlock();
587 return ctx;
571} 588}
572 589
573/* 590/*
@@ -1215,19 +1232,14 @@ out:
1215static void io_destroy(struct kioctx *ioctx) 1232static void io_destroy(struct kioctx *ioctx)
1216{ 1233{
1217 struct mm_struct *mm = current->mm; 1234 struct mm_struct *mm = current->mm;
1218 struct kioctx **tmp;
1219 int was_dead; 1235 int was_dead;
1220 1236
1221 /* delete the entry from the list is someone else hasn't already */ 1237 /* delete the entry from the list is someone else hasn't already */
1222 write_lock(&mm->ioctx_list_lock); 1238 spin_lock(&mm->ioctx_lock);
1223 was_dead = ioctx->dead; 1239 was_dead = ioctx->dead;
1224 ioctx->dead = 1; 1240 ioctx->dead = 1;
1225 for (tmp = &mm->ioctx_list; *tmp && *tmp != ioctx; 1241 hlist_del_rcu(&ioctx->list);
1226 tmp = &(*tmp)->next) 1242 spin_unlock(&mm->ioctx_lock);
1227 ;
1228 if (*tmp)
1229 *tmp = ioctx->next;
1230 write_unlock(&mm->ioctx_list_lock);
1231 1243
1232 dprintk("aio_release(%p)\n", ioctx); 1244 dprintk("aio_release(%p)\n", ioctx);
1233 if (likely(!was_dead)) 1245 if (likely(!was_dead))
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 3662dd44896..3bbdb9d0237 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -79,9 +79,12 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
79 if (IS_ERR(anon_inode_inode)) 79 if (IS_ERR(anon_inode_inode))
80 return -ENODEV; 80 return -ENODEV;
81 81
82 if (fops->owner && !try_module_get(fops->owner))
83 return -ENOENT;
84
82 error = get_unused_fd_flags(flags); 85 error = get_unused_fd_flags(flags);
83 if (error < 0) 86 if (error < 0)
84 return error; 87 goto err_module;
85 fd = error; 88 fd = error;
86 89
87 /* 90 /*
@@ -128,6 +131,8 @@ err_dput:
128 dput(dentry); 131 dput(dentry);
129err_put_unused_fd: 132err_put_unused_fd:
130 put_unused_fd(fd); 133 put_unused_fd(fd);
134err_module:
135 module_put(fops->owner);
131 return error; 136 return error;
132} 137}
133EXPORT_SYMBOL_GPL(anon_inode_getfd); 138EXPORT_SYMBOL_GPL(anon_inode_getfd);
@@ -154,8 +159,8 @@ static struct inode *anon_inode_mkinode(void)
154 */ 159 */
155 inode->i_state = I_DIRTY; 160 inode->i_state = I_DIRTY;
156 inode->i_mode = S_IRUSR | S_IWUSR; 161 inode->i_mode = S_IRUSR | S_IWUSR;
157 inode->i_uid = current->fsuid; 162 inode->i_uid = current_fsuid();
158 inode->i_gid = current->fsgid; 163 inode->i_gid = current_fsgid();
159 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 164 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
160 return inode; 165 return inode;
161} 166}
diff --git a/fs/attr.c b/fs/attr.c
index 7a83819f6ba..f4360192a93 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -29,13 +29,13 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
29 29
30 /* Make sure a caller can chown. */ 30 /* Make sure a caller can chown. */
31 if ((ia_valid & ATTR_UID) && 31 if ((ia_valid & ATTR_UID) &&
32 (current->fsuid != inode->i_uid || 32 (current_fsuid() != inode->i_uid ||
33 attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN)) 33 attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
34 goto error; 34 goto error;
35 35
36 /* Make sure caller can chgrp. */ 36 /* Make sure caller can chgrp. */
37 if ((ia_valid & ATTR_GID) && 37 if ((ia_valid & ATTR_GID) &&
38 (current->fsuid != inode->i_uid || 38 (current_fsuid() != inode->i_uid ||
39 (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) && 39 (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
40 !capable(CAP_CHOWN)) 40 !capable(CAP_CHOWN))
41 goto error; 41 goto error;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index b70eea1e8c5..c773680d5c6 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -76,8 +76,8 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
76 substring_t args[MAX_OPT_ARGS]; 76 substring_t args[MAX_OPT_ARGS];
77 int option; 77 int option;
78 78
79 *uid = current->uid; 79 *uid = current_uid();
80 *gid = current->gid; 80 *gid = current_gid();
81 *pgrp = task_pgrp_nr(current); 81 *pgrp = task_pgrp_nr(current);
82 82
83 *minproto = *maxproto = AUTOFS_PROTO_VERSION; 83 *minproto = *maxproto = AUTOFS_PROTO_VERSION;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 33bf8cbfd05..63b7c7afe8d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -308,7 +308,8 @@ static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid)
308 goto out; 308 goto out;
309 } 309 }
310 310
311 filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY); 311 filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
312 current_cred());
312 if (IS_ERR(filp)) { 313 if (IS_ERR(filp)) {
313 err = PTR_ERR(filp); 314 err = PTR_ERR(filp);
314 goto out; 315 goto out;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index c7e65bb30ba..7b19802cfef 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -235,8 +235,8 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
235 substring_t args[MAX_OPT_ARGS]; 235 substring_t args[MAX_OPT_ARGS];
236 int option; 236 int option;
237 237
238 *uid = current->uid; 238 *uid = current_uid();
239 *gid = current->gid; 239 *gid = current_gid();
240 *pgrp = task_pgrp_nr(current); 240 *pgrp = task_pgrp_nr(current);
241 241
242 *minproto = AUTOFS_MIN_PROTO_VERSION; 242 *minproto = AUTOFS_MIN_PROTO_VERSION;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 4b67c2a2d77..e02cc8ae5eb 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -391,8 +391,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
391 memcpy(&wq->name, &qstr, sizeof(struct qstr)); 391 memcpy(&wq->name, &qstr, sizeof(struct qstr));
392 wq->dev = autofs4_get_dev(sbi); 392 wq->dev = autofs4_get_dev(sbi);
393 wq->ino = autofs4_get_ino(sbi); 393 wq->ino = autofs4_get_ino(sbi);
394 wq->uid = current->uid; 394 wq->uid = current_uid();
395 wq->gid = current->gid; 395 wq->gid = current_gid();
396 wq->pid = current->pid; 396 wq->pid = current->pid;
397 wq->tgid = current->tgid; 397 wq->tgid = current->tgid;
398 wq->status = -EINTR; /* Status return if interrupted */ 398 wq->status = -EINTR; /* Status return if interrupted */
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 5f1538c03b1..a05287a23f6 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -132,11 +132,6 @@ static int bad_file_check_flags(int flags)
132 return -EIO; 132 return -EIO;
133} 133}
134 134
135static int bad_file_dir_notify(struct file *file, unsigned long arg)
136{
137 return -EIO;
138}
139
140static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl) 135static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl)
141{ 136{
142 return -EIO; 137 return -EIO;
@@ -179,7 +174,6 @@ static const struct file_operations bad_file_ops =
179 .sendpage = bad_file_sendpage, 174 .sendpage = bad_file_sendpage,
180 .get_unmapped_area = bad_file_get_unmapped_area, 175 .get_unmapped_area = bad_file_get_unmapped_area,
181 .check_flags = bad_file_check_flags, 176 .check_flags = bad_file_check_flags,
182 .dir_notify = bad_file_dir_notify,
183 .flock = bad_file_flock, 177 .flock = bad_file_flock,
184 .splice_write = bad_file_splice_write, 178 .splice_write = bad_file_splice_write,
185 .splice_read = bad_file_splice_read, 179 .splice_read = bad_file_splice_read,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index b6dfee37c7b..d06cb023ad0 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -378,7 +378,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
378 inode->i_size = 0; 378 inode->i_size = 0;
379 inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE; 379 inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE;
380 strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink, 380 strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink,
381 BEFS_SYMLINK_LEN); 381 BEFS_SYMLINK_LEN - 1);
382 befs_ino->i_data.symlink[BEFS_SYMLINK_LEN - 1] = '\0';
382 } else { 383 } else {
383 int num_blks; 384 int num_blks;
384 385
@@ -477,6 +478,8 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
477 kfree(link); 478 kfree(link);
478 befs_error(sb, "Failed to read entire long symlink"); 479 befs_error(sb, "Failed to read entire long symlink");
479 link = ERR_PTR(-EIO); 480 link = ERR_PTR(-EIO);
481 } else {
482 link[len - 1] = '\0';
480 } 483 }
481 } else { 484 } else {
482 link = befs_ino->i_data.symlink; 485 link = befs_ino->i_data.symlink;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index daae463068e..4dd1b623f93 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -106,8 +106,8 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
106 } 106 }
107 set_bit(ino, info->si_imap); 107 set_bit(ino, info->si_imap);
108 info->si_freei--; 108 info->si_freei--;
109 inode->i_uid = current->fsuid; 109 inode->i_uid = current_fsuid();
110 inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; 110 inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current_fsgid();
111 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 111 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
112 inode->i_blocks = 0; 112 inode->i_blocks = 0;
113 inode->i_op = &bfs_file_inops; 113 inode->i_op = &bfs_file_inops;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 204cfd1d767..b639dcf7c77 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -95,92 +95,55 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
95 int has_dumped = 0; 95 int has_dumped = 0;
96 unsigned long dump_start, dump_size; 96 unsigned long dump_start, dump_size;
97 struct user dump; 97 struct user dump;
98#if defined(__alpha__) 98#ifdef __alpha__
99# define START_DATA(u) (u.start_data) 99# define START_DATA(u) (u.start_data)
100#elif defined(__arm__) 100#else
101# define START_DATA(u) ((u.u_tsize << PAGE_SHIFT) + u.start_code) 101# define START_DATA(u) ((u.u_tsize << PAGE_SHIFT) + u.start_code)
102#elif defined(__sparc__)
103# define START_DATA(u) (u.u_tsize)
104#elif defined(__i386__) || defined(__mc68000__) || defined(__arch_um__)
105# define START_DATA(u) (u.u_tsize << PAGE_SHIFT)
106#endif 102#endif
107#ifdef __sparc__
108# define START_STACK(u) ((regs->u_regs[UREG_FP]) & ~(PAGE_SIZE - 1))
109#else
110# define START_STACK(u) (u.start_stack) 103# define START_STACK(u) (u.start_stack)
111#endif
112 104
113 fs = get_fs(); 105 fs = get_fs();
114 set_fs(KERNEL_DS); 106 set_fs(KERNEL_DS);
115 has_dumped = 1; 107 has_dumped = 1;
116 current->flags |= PF_DUMPCORE; 108 current->flags |= PF_DUMPCORE;
117 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); 109 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
118#ifndef __sparc__
119 dump.u_ar0 = offsetof(struct user, regs); 110 dump.u_ar0 = offsetof(struct user, regs);
120#endif
121 dump.signal = signr; 111 dump.signal = signr;
122 aout_dump_thread(regs, &dump); 112 aout_dump_thread(regs, &dump);
123 113
124/* If the size of the dump file exceeds the rlimit, then see what would happen 114/* If the size of the dump file exceeds the rlimit, then see what would happen
125 if we wrote the stack, but not the data area. */ 115 if we wrote the stack, but not the data area. */
126#ifdef __sparc__
127 if ((dump.u_dsize + dump.u_ssize) > limit)
128 dump.u_dsize = 0;
129#else
130 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit) 116 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
131 dump.u_dsize = 0; 117 dump.u_dsize = 0;
132#endif
133 118
134/* Make sure we have enough room to write the stack and data areas. */ 119/* Make sure we have enough room to write the stack and data areas. */
135#ifdef __sparc__
136 if (dump.u_ssize > limit)
137 dump.u_ssize = 0;
138#else
139 if ((dump.u_ssize + 1) * PAGE_SIZE > limit) 120 if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
140 dump.u_ssize = 0; 121 dump.u_ssize = 0;
141#endif
142 122
143/* make sure we actually have a data and stack area to dump */ 123/* make sure we actually have a data and stack area to dump */
144 set_fs(USER_DS); 124 set_fs(USER_DS);
145#ifdef __sparc__
146 if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize))
147 dump.u_dsize = 0;
148 if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize))
149 dump.u_ssize = 0;
150#else
151 if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) 125 if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
152 dump.u_dsize = 0; 126 dump.u_dsize = 0;
153 if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) 127 if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
154 dump.u_ssize = 0; 128 dump.u_ssize = 0;
155#endif
156 129
157 set_fs(KERNEL_DS); 130 set_fs(KERNEL_DS);
158/* struct user */ 131/* struct user */
159 DUMP_WRITE(&dump,sizeof(dump)); 132 DUMP_WRITE(&dump,sizeof(dump));
160/* Now dump all of the user data. Include malloced stuff as well */ 133/* Now dump all of the user data. Include malloced stuff as well */
161#ifndef __sparc__
162 DUMP_SEEK(PAGE_SIZE); 134 DUMP_SEEK(PAGE_SIZE);
163#endif
164/* now we start writing out the user space info */ 135/* now we start writing out the user space info */
165 set_fs(USER_DS); 136 set_fs(USER_DS);
166/* Dump the data area */ 137/* Dump the data area */
167 if (dump.u_dsize != 0) { 138 if (dump.u_dsize != 0) {
168 dump_start = START_DATA(dump); 139 dump_start = START_DATA(dump);
169#ifdef __sparc__
170 dump_size = dump.u_dsize;
171#else
172 dump_size = dump.u_dsize << PAGE_SHIFT; 140 dump_size = dump.u_dsize << PAGE_SHIFT;
173#endif
174 DUMP_WRITE(dump_start,dump_size); 141 DUMP_WRITE(dump_start,dump_size);
175 } 142 }
176/* Now prepare to dump the stack area */ 143/* Now prepare to dump the stack area */
177 if (dump.u_ssize != 0) { 144 if (dump.u_ssize != 0) {
178 dump_start = START_STACK(dump); 145 dump_start = START_STACK(dump);
179#ifdef __sparc__
180 dump_size = dump.u_ssize;
181#else
182 dump_size = dump.u_ssize << PAGE_SHIFT; 146 dump_size = dump.u_ssize << PAGE_SHIFT;
183#endif
184 DUMP_WRITE(dump_start,dump_size); 147 DUMP_WRITE(dump_start,dump_size);
185 } 148 }
186/* Finally dump the task struct. Not be used by gdb, but could be useful */ 149/* Finally dump the task struct. Not be used by gdb, but could be useful */
@@ -205,29 +168,24 @@ static unsigned long __user *create_aout_tables(char __user *p, struct linux_bin
205 int envc = bprm->envc; 168 int envc = bprm->envc;
206 169
207 sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p); 170 sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p);
208#ifdef __sparc__
209 /* This imposes the proper stack alignment for a new process. */
210 sp = (void __user *) (((unsigned long) sp) & ~7);
211 if ((envc+argc+3)&1) --sp;
212#endif
213#ifdef __alpha__ 171#ifdef __alpha__
214/* whee.. test-programs are so much fun. */ 172/* whee.. test-programs are so much fun. */
215 put_user(0, --sp); 173 put_user(0, --sp);
216 put_user(0, --sp); 174 put_user(0, --sp);
217 if (bprm->loader) { 175 if (bprm->loader) {
218 put_user(0, --sp); 176 put_user(0, --sp);
219 put_user(0x3eb, --sp); 177 put_user(1003, --sp);
220 put_user(bprm->loader, --sp); 178 put_user(bprm->loader, --sp);
221 put_user(0x3ea, --sp); 179 put_user(1002, --sp);
222 } 180 }
223 put_user(bprm->exec, --sp); 181 put_user(bprm->exec, --sp);
224 put_user(0x3e9, --sp); 182 put_user(1001, --sp);
225#endif 183#endif
226 sp -= envc+1; 184 sp -= envc+1;
227 envp = (char __user * __user *) sp; 185 envp = (char __user * __user *) sp;
228 sp -= argc+1; 186 sp -= argc+1;
229 argv = (char __user * __user *) sp; 187 argv = (char __user * __user *) sp;
230#if defined(__i386__) || defined(__mc68000__) || defined(__arm__) || defined(__arch_um__) 188#ifndef __alpha__
231 put_user((unsigned long) envp,--sp); 189 put_user((unsigned long) envp,--sp);
232 put_user((unsigned long) argv,--sp); 190 put_user((unsigned long) argv,--sp);
233#endif 191#endif
@@ -300,13 +258,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
300 return retval; 258 return retval;
301 259
302 /* OK, This is the point of no return */ 260 /* OK, This is the point of no return */
303#if defined(__alpha__) 261#ifdef __alpha__
304 SET_AOUT_PERSONALITY(bprm, ex); 262 SET_AOUT_PERSONALITY(bprm, ex);
305#elif defined(__sparc__)
306 set_personality(PER_SUNOS);
307#if !defined(__sparc_v9__)
308 memcpy(&current->thread.core_exec, &ex, sizeof(struct exec));
309#endif
310#else 263#else
311 set_personality(PER_LINUX); 264 set_personality(PER_LINUX);
312#endif 265#endif
@@ -320,26 +273,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
320 current->mm->free_area_cache = current->mm->mmap_base; 273 current->mm->free_area_cache = current->mm->mmap_base;
321 current->mm->cached_hole_size = 0; 274 current->mm->cached_hole_size = 0;
322 275
323 compute_creds(bprm); 276 install_exec_creds(bprm);
324 current->flags &= ~PF_FORKNOEXEC; 277 current->flags &= ~PF_FORKNOEXEC;
325#ifdef __sparc__
326 if (N_MAGIC(ex) == NMAGIC) {
327 loff_t pos = fd_offset;
328 /* Fuck me plenty... */
329 /* <AOL></AOL> */
330 down_write(&current->mm->mmap_sem);
331 error = do_brk(N_TXTADDR(ex), ex.a_text);
332 up_write(&current->mm->mmap_sem);
333 bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
334 ex.a_text, &pos);
335 down_write(&current->mm->mmap_sem);
336 error = do_brk(N_DATADDR(ex), ex.a_data);
337 up_write(&current->mm->mmap_sem);
338 bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
339 ex.a_data, &pos);
340 goto beyond_if;
341 }
342#endif
343 278
344 if (N_MAGIC(ex) == OMAGIC) { 279 if (N_MAGIC(ex) == OMAGIC) {
345 unsigned long text_addr, map_size; 280 unsigned long text_addr, map_size;
@@ -347,7 +282,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
347 282
348 text_addr = N_TXTADDR(ex); 283 text_addr = N_TXTADDR(ex);
349 284
350#if defined(__alpha__) || defined(__sparc__) 285#ifdef __alpha__
351 pos = fd_offset; 286 pos = fd_offset;
352 map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1; 287 map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
353#else 288#else
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 8fcfa398d35..c41fa2af767 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -157,7 +157,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
157 int items; 157 int items;
158 elf_addr_t *elf_info; 158 elf_addr_t *elf_info;
159 int ei_index = 0; 159 int ei_index = 0;
160 struct task_struct *tsk = current; 160 const struct cred *cred = current_cred();
161 struct vm_area_struct *vma; 161 struct vm_area_struct *vma;
162 162
163 /* 163 /*
@@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
223 NEW_AUX_ENT(AT_BASE, interp_load_addr); 223 NEW_AUX_ENT(AT_BASE, interp_load_addr);
224 NEW_AUX_ENT(AT_FLAGS, 0); 224 NEW_AUX_ENT(AT_FLAGS, 0);
225 NEW_AUX_ENT(AT_ENTRY, exec->e_entry); 225 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
226 NEW_AUX_ENT(AT_UID, tsk->uid); 226 NEW_AUX_ENT(AT_UID, cred->uid);
227 NEW_AUX_ENT(AT_EUID, tsk->euid); 227 NEW_AUX_ENT(AT_EUID, cred->euid);
228 NEW_AUX_ENT(AT_GID, tsk->gid); 228 NEW_AUX_ENT(AT_GID, cred->gid);
229 NEW_AUX_ENT(AT_EGID, tsk->egid); 229 NEW_AUX_ENT(AT_EGID, cred->egid);
230 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); 230 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
231 NEW_AUX_ENT(AT_EXECFN, bprm->exec); 231 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
232 if (k_platform) { 232 if (k_platform) {
@@ -949,14 +949,14 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
949 set_binfmt(&elf_format); 949 set_binfmt(&elf_format);
950 950
951#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES 951#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
952 retval = arch_setup_additional_pages(bprm, executable_stack); 952 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
953 if (retval < 0) { 953 if (retval < 0) {
954 send_sig(SIGKILL, current, 0); 954 send_sig(SIGKILL, current, 0);
955 goto out; 955 goto out;
956 } 956 }
957#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ 957#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
958 958
959 compute_creds(bprm); 959 install_exec_creds(bprm);
960 current->flags &= ~PF_FORKNOEXEC; 960 current->flags &= ~PF_FORKNOEXEC;
961 retval = create_elf_tables(bprm, &loc->elf_ex, 961 retval = create_elf_tables(bprm, &loc->elf_ex,
962 load_addr, interp_load_addr); 962 load_addr, interp_load_addr);
@@ -1361,6 +1361,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1361static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, 1361static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1362 struct mm_struct *mm) 1362 struct mm_struct *mm)
1363{ 1363{
1364 const struct cred *cred;
1364 unsigned int i, len; 1365 unsigned int i, len;
1365 1366
1366 /* first copy the parameters from user space */ 1367 /* first copy the parameters from user space */
@@ -1388,8 +1389,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1388 psinfo->pr_zomb = psinfo->pr_sname == 'Z'; 1389 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1389 psinfo->pr_nice = task_nice(p); 1390 psinfo->pr_nice = task_nice(p);
1390 psinfo->pr_flag = p->flags; 1391 psinfo->pr_flag = p->flags;
1391 SET_UID(psinfo->pr_uid, p->uid); 1392 rcu_read_lock();
1392 SET_GID(psinfo->pr_gid, p->gid); 1393 cred = __task_cred(p);
1394 SET_UID(psinfo->pr_uid, cred->uid);
1395 SET_GID(psinfo->pr_gid, cred->gid);
1396 rcu_read_unlock();
1393 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); 1397 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1394 1398
1395 return 0; 1399 return 0;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 5b5424cb339..aa5b43205e3 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -404,7 +404,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
404 current->mm->start_stack = current->mm->start_brk + stack_size; 404 current->mm->start_stack = current->mm->start_brk + stack_size;
405#endif 405#endif
406 406
407 compute_creds(bprm); 407 install_exec_creds(bprm);
408 current->flags &= ~PF_FORKNOEXEC; 408 current->flags &= ~PF_FORKNOEXEC;
409 if (create_elf_fdpic_tables(bprm, current->mm, 409 if (create_elf_fdpic_tables(bprm, current->mm,
410 &exec_params, &interp_params) < 0) 410 &exec_params, &interp_params) < 0)
@@ -475,6 +475,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
475 struct elf_fdpic_params *exec_params, 475 struct elf_fdpic_params *exec_params,
476 struct elf_fdpic_params *interp_params) 476 struct elf_fdpic_params *interp_params)
477{ 477{
478 const struct cred *cred = current_cred();
478 unsigned long sp, csp, nitems; 479 unsigned long sp, csp, nitems;
479 elf_caddr_t __user *argv, *envp; 480 elf_caddr_t __user *argv, *envp;
480 size_t platform_len = 0, len; 481 size_t platform_len = 0, len;
@@ -623,10 +624,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
623 NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); 624 NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr);
624 NEW_AUX_ENT(AT_FLAGS, 0); 625 NEW_AUX_ENT(AT_FLAGS, 0);
625 NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); 626 NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr);
626 NEW_AUX_ENT(AT_UID, (elf_addr_t) current->uid); 627 NEW_AUX_ENT(AT_UID, (elf_addr_t) cred->uid);
627 NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid); 628 NEW_AUX_ENT(AT_EUID, (elf_addr_t) cred->euid);
628 NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid); 629 NEW_AUX_ENT(AT_GID, (elf_addr_t) cred->gid);
629 NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid); 630 NEW_AUX_ENT(AT_EGID, (elf_addr_t) cred->egid);
630 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); 631 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
631 NEW_AUX_ENT(AT_EXECFN, bprm->exec); 632 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
632 633
@@ -1413,6 +1414,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1413static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, 1414static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1414 struct mm_struct *mm) 1415 struct mm_struct *mm)
1415{ 1416{
1417 const struct cred *cred;
1416 unsigned int i, len; 1418 unsigned int i, len;
1417 1419
1418 /* first copy the parameters from user space */ 1420 /* first copy the parameters from user space */
@@ -1440,8 +1442,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1440 psinfo->pr_zomb = psinfo->pr_sname == 'Z'; 1442 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1441 psinfo->pr_nice = task_nice(p); 1443 psinfo->pr_nice = task_nice(p);
1442 psinfo->pr_flag = p->flags; 1444 psinfo->pr_flag = p->flags;
1443 SET_UID(psinfo->pr_uid, p->uid); 1445 rcu_read_lock();
1444 SET_GID(psinfo->pr_gid, p->gid); 1446 cred = __task_cred(p);
1447 SET_UID(psinfo->pr_uid, cred->uid);
1448 SET_GID(psinfo->pr_gid, cred->gid);
1449 rcu_read_unlock();
1445 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); 1450 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1446 1451
1447 return 0; 1452 return 0;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index ccb781a6a80..7bbd5c6b372 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -880,7 +880,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
880 (libinfo.lib_list[j].loaded)? 880 (libinfo.lib_list[j].loaded)?
881 libinfo.lib_list[j].start_data:UNLOADED_LIB; 881 libinfo.lib_list[j].start_data:UNLOADED_LIB;
882 882
883 compute_creds(bprm); 883 install_exec_creds(bprm);
884 current->flags &= ~PF_FORKNOEXEC; 884 current->flags &= ~PF_FORKNOEXEC;
885 885
886 set_binfmt(&flat_format); 886 set_binfmt(&flat_format);
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 74e587a5279..08644a61616 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -255,7 +255,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
255 kfree(hpuxhdr); 255 kfree(hpuxhdr);
256 256
257 set_binfmt(&som_format); 257 set_binfmt(&som_format);
258 compute_creds(bprm); 258 install_exec_creds(bprm);
259 setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); 259 setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
260 260
261 create_som_tables(bprm); 261 create_som_tables(bprm);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 19caf7c962a..77ebc3c263d 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -111,7 +111,7 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
111 && bip->bip_buf != NULL) 111 && bip->bip_buf != NULL)
112 kfree(bip->bip_buf); 112 kfree(bip->bip_buf);
113 113
114 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); 114 bvec_free_bs(bs, bip->bip_vec, bip->bip_pool);
115 mempool_free(bip, bs->bio_integrity_pool); 115 mempool_free(bip, bs->bio_integrity_pool);
116 116
117 bio->bi_integrity = NULL; 117 bio->bi_integrity = NULL;
diff --git a/fs/bio.c b/fs/bio.c
index 77a55bcceed..711cee10360 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,9 +26,16 @@
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <trace/block.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 30#include <scsi/sg.h> /* for struct sg_iovec */
30 31
31static struct kmem_cache *bio_slab __read_mostly; 32DEFINE_TRACE(block_split);
33
34/*
35 * Test patch to inline a certain number of bi_io_vec's inside the bio
36 * itself, to shrink a bio data allocation from two mempool calls to one
37 */
38#define BIO_INLINE_VECS 4
32 39
33static mempool_t *bio_split_pool __read_mostly; 40static mempool_t *bio_split_pool __read_mostly;
34 41
@@ -37,9 +44,8 @@ static mempool_t *bio_split_pool __read_mostly;
37 * break badly! cannot be bigger than what you can fit into an 44 * break badly! cannot be bigger than what you can fit into an
38 * unsigned short 45 * unsigned short
39 */ 46 */
40
41#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 47#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
42static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { 48struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
43 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 49 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
44}; 50};
45#undef BV 51#undef BV
@@ -50,12 +56,121 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
50 */ 56 */
51struct bio_set *fs_bio_set; 57struct bio_set *fs_bio_set;
52 58
59/*
60 * Our slab pool management
61 */
62struct bio_slab {
63 struct kmem_cache *slab;
64 unsigned int slab_ref;
65 unsigned int slab_size;
66 char name[8];
67};
68static DEFINE_MUTEX(bio_slab_lock);
69static struct bio_slab *bio_slabs;
70static unsigned int bio_slab_nr, bio_slab_max;
71
72static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
73{
74 unsigned int sz = sizeof(struct bio) + extra_size;
75 struct kmem_cache *slab = NULL;
76 struct bio_slab *bslab;
77 unsigned int i, entry = -1;
78
79 mutex_lock(&bio_slab_lock);
80
81 i = 0;
82 while (i < bio_slab_nr) {
83 struct bio_slab *bslab = &bio_slabs[i];
84
85 if (!bslab->slab && entry == -1)
86 entry = i;
87 else if (bslab->slab_size == sz) {
88 slab = bslab->slab;
89 bslab->slab_ref++;
90 break;
91 }
92 i++;
93 }
94
95 if (slab)
96 goto out_unlock;
97
98 if (bio_slab_nr == bio_slab_max && entry == -1) {
99 bio_slab_max <<= 1;
100 bio_slabs = krealloc(bio_slabs,
101 bio_slab_max * sizeof(struct bio_slab),
102 GFP_KERNEL);
103 if (!bio_slabs)
104 goto out_unlock;
105 }
106 if (entry == -1)
107 entry = bio_slab_nr++;
108
109 bslab = &bio_slabs[entry];
110
111 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
112 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
113 if (!slab)
114 goto out_unlock;
115
116 printk("bio: create slab <%s> at %d\n", bslab->name, entry);
117 bslab->slab = slab;
118 bslab->slab_ref = 1;
119 bslab->slab_size = sz;
120out_unlock:
121 mutex_unlock(&bio_slab_lock);
122 return slab;
123}
124
125static void bio_put_slab(struct bio_set *bs)
126{
127 struct bio_slab *bslab = NULL;
128 unsigned int i;
129
130 mutex_lock(&bio_slab_lock);
131
132 for (i = 0; i < bio_slab_nr; i++) {
133 if (bs->bio_slab == bio_slabs[i].slab) {
134 bslab = &bio_slabs[i];
135 break;
136 }
137 }
138
139 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
140 goto out;
141
142 WARN_ON(!bslab->slab_ref);
143
144 if (--bslab->slab_ref)
145 goto out;
146
147 kmem_cache_destroy(bslab->slab);
148 bslab->slab = NULL;
149
150out:
151 mutex_unlock(&bio_slab_lock);
152}
153
53unsigned int bvec_nr_vecs(unsigned short idx) 154unsigned int bvec_nr_vecs(unsigned short idx)
54{ 155{
55 return bvec_slabs[idx].nr_vecs; 156 return bvec_slabs[idx].nr_vecs;
56} 157}
57 158
58struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) 159void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
160{
161 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
162
163 if (idx == BIOVEC_MAX_IDX)
164 mempool_free(bv, bs->bvec_pool);
165 else {
166 struct biovec_slab *bvs = bvec_slabs + idx;
167
168 kmem_cache_free(bvs->slab, bv);
169 }
170}
171
172struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
173 struct bio_set *bs)
59{ 174{
60 struct bio_vec *bvl; 175 struct bio_vec *bvl;
61 176
@@ -64,60 +179,85 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
64 * If not, this is a bio_kmalloc() allocation and just do a 179 * If not, this is a bio_kmalloc() allocation and just do a
65 * kzalloc() for the exact number of vecs right away. 180 * kzalloc() for the exact number of vecs right away.
66 */ 181 */
67 if (bs) { 182 if (!bs)
183 bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask);
184
185 /*
186 * see comment near bvec_array define!
187 */
188 switch (nr) {
189 case 1:
190 *idx = 0;
191 break;
192 case 2 ... 4:
193 *idx = 1;
194 break;
195 case 5 ... 16:
196 *idx = 2;
197 break;
198 case 17 ... 64:
199 *idx = 3;
200 break;
201 case 65 ... 128:
202 *idx = 4;
203 break;
204 case 129 ... BIO_MAX_PAGES:
205 *idx = 5;
206 break;
207 default:
208 return NULL;
209 }
210
211 /*
212 * idx now points to the pool we want to allocate from. only the
213 * 1-vec entry pool is mempool backed.
214 */
215 if (*idx == BIOVEC_MAX_IDX) {
216fallback:
217 bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
218 } else {
219 struct biovec_slab *bvs = bvec_slabs + *idx;
220 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
221
68 /* 222 /*
69 * see comment near bvec_array define! 223 * Make this allocation restricted and don't dump info on
224 * allocation failures, since we'll fallback to the mempool
225 * in case of failure.
70 */ 226 */
71 switch (nr) { 227 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
72 case 1:
73 *idx = 0;
74 break;
75 case 2 ... 4:
76 *idx = 1;
77 break;
78 case 5 ... 16:
79 *idx = 2;
80 break;
81 case 17 ... 64:
82 *idx = 3;
83 break;
84 case 65 ... 128:
85 *idx = 4;
86 break;
87 case 129 ... BIO_MAX_PAGES:
88 *idx = 5;
89 break;
90 default:
91 return NULL;
92 }
93 228
94 /* 229 /*
95 * idx now points to the pool we want to allocate from 230 * Try a slab allocation. If this fails and __GFP_WAIT
231 * is set, retry with the 1-entry mempool
96 */ 232 */
97 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 233 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
98 if (bvl) 234 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
99 memset(bvl, 0, 235 *idx = BIOVEC_MAX_IDX;
100 bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); 236 goto fallback;
101 } else 237 }
102 bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); 238 }
103 239
104 return bvl; 240 return bvl;
105} 241}
106 242
107void bio_free(struct bio *bio, struct bio_set *bio_set) 243void bio_free(struct bio *bio, struct bio_set *bs)
108{ 244{
109 if (bio->bi_io_vec) { 245 void *p;
110 const int pool_idx = BIO_POOL_IDX(bio);
111 246
112 BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); 247 if (bio_has_allocated_vec(bio))
113 248 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
114 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
115 }
116 249
117 if (bio_integrity(bio)) 250 if (bio_integrity(bio))
118 bio_integrity_free(bio, bio_set); 251 bio_integrity_free(bio, bs);
252
253 /*
254 * If we have front padding, adjust the bio pointer before freeing
255 */
256 p = bio;
257 if (bs->front_pad)
258 p -= bs->front_pad;
119 259
120 mempool_free(bio, bio_set->bio_pool); 260 mempool_free(p, bs->bio_pool);
121} 261}
122 262
123/* 263/*
@@ -130,7 +270,8 @@ static void bio_fs_destructor(struct bio *bio)
130 270
131static void bio_kmalloc_destructor(struct bio *bio) 271static void bio_kmalloc_destructor(struct bio *bio)
132{ 272{
133 kfree(bio->bi_io_vec); 273 if (bio_has_allocated_vec(bio))
274 kfree(bio->bi_io_vec);
134 kfree(bio); 275 kfree(bio);
135} 276}
136 277
@@ -154,16 +295,20 @@ void bio_init(struct bio *bio)
154 * for a &struct bio to become free. If a %NULL @bs is passed in, we will 295 * for a &struct bio to become free. If a %NULL @bs is passed in, we will
155 * fall back to just using @kmalloc to allocate the required memory. 296 * fall back to just using @kmalloc to allocate the required memory.
156 * 297 *
157 * allocate bio and iovecs from the memory pools specified by the 298 * Note that the caller must set ->bi_destructor on succesful return
158 * bio_set structure, or @kmalloc if none given. 299 * of a bio, to do the appropriate freeing of the bio once the reference
300 * count drops to zero.
159 **/ 301 **/
160struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
161{ 303{
162 struct bio *bio; 304 struct bio *bio = NULL;
305
306 if (bs) {
307 void *p = mempool_alloc(bs->bio_pool, gfp_mask);
163 308
164 if (bs) 309 if (p)
165 bio = mempool_alloc(bs->bio_pool, gfp_mask); 310 bio = p + bs->front_pad;
166 else 311 } else
167 bio = kmalloc(sizeof(*bio), gfp_mask); 312 bio = kmalloc(sizeof(*bio), gfp_mask);
168 313
169 if (likely(bio)) { 314 if (likely(bio)) {
@@ -173,7 +318,15 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
173 if (likely(nr_iovecs)) { 318 if (likely(nr_iovecs)) {
174 unsigned long uninitialized_var(idx); 319 unsigned long uninitialized_var(idx);
175 320
176 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 321 if (nr_iovecs <= BIO_INLINE_VECS) {
322 idx = 0;
323 bvl = bio->bi_inline_vecs;
324 nr_iovecs = BIO_INLINE_VECS;
325 } else {
326 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx,
327 bs);
328 nr_iovecs = bvec_nr_vecs(idx);
329 }
177 if (unlikely(!bvl)) { 330 if (unlikely(!bvl)) {
178 if (bs) 331 if (bs)
179 mempool_free(bio, bs->bio_pool); 332 mempool_free(bio, bs->bio_pool);
@@ -183,7 +336,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
183 goto out; 336 goto out;
184 } 337 }
185 bio->bi_flags |= idx << BIO_POOL_OFFSET; 338 bio->bi_flags |= idx << BIO_POOL_OFFSET;
186 bio->bi_max_vecs = bvec_nr_vecs(idx); 339 bio->bi_max_vecs = nr_iovecs;
187 } 340 }
188 bio->bi_io_vec = bvl; 341 bio->bi_io_vec = bvl;
189 } 342 }
@@ -1263,7 +1416,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1263 if (!bp) 1416 if (!bp)
1264 return bp; 1417 return bp;
1265 1418
1266 blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi, 1419 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1267 bi->bi_sector + first_sectors); 1420 bi->bi_sector + first_sectors);
1268 1421
1269 BUG_ON(bi->bi_vcnt != 1); 1422 BUG_ON(bi->bi_vcnt != 1);
@@ -1343,30 +1496,18 @@ EXPORT_SYMBOL(bio_sector_offset);
1343 */ 1496 */
1344static int biovec_create_pools(struct bio_set *bs, int pool_entries) 1497static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1345{ 1498{
1346 int i; 1499 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1347 1500
1348 for (i = 0; i < BIOVEC_NR_POOLS; i++) { 1501 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
1349 struct biovec_slab *bp = bvec_slabs + i; 1502 if (!bs->bvec_pool)
1350 mempool_t **bvp = bs->bvec_pools + i; 1503 return -ENOMEM;
1351 1504
1352 *bvp = mempool_create_slab_pool(pool_entries, bp->slab);
1353 if (!*bvp)
1354 return -ENOMEM;
1355 }
1356 return 0; 1505 return 0;
1357} 1506}
1358 1507
1359static void biovec_free_pools(struct bio_set *bs) 1508static void biovec_free_pools(struct bio_set *bs)
1360{ 1509{
1361 int i; 1510 mempool_destroy(bs->bvec_pool);
1362
1363 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
1364 mempool_t *bvp = bs->bvec_pools[i];
1365
1366 if (bvp)
1367 mempool_destroy(bvp);
1368 }
1369
1370} 1511}
1371 1512
1372void bioset_free(struct bio_set *bs) 1513void bioset_free(struct bio_set *bs)
@@ -1376,25 +1517,49 @@ void bioset_free(struct bio_set *bs)
1376 1517
1377 bioset_integrity_free(bs); 1518 bioset_integrity_free(bs);
1378 biovec_free_pools(bs); 1519 biovec_free_pools(bs);
1520 bio_put_slab(bs);
1379 1521
1380 kfree(bs); 1522 kfree(bs);
1381} 1523}
1382 1524
1383struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size) 1525/**
1526 * bioset_create - Create a bio_set
1527 * @pool_size: Number of bio and bio_vecs to cache in the mempool
1528 * @front_pad: Number of bytes to allocate in front of the returned bio
1529 *
1530 * Description:
1531 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1532 * to ask for a number of bytes to be allocated in front of the bio.
1533 * Front pad allocation is useful for embedding the bio inside
1534 * another structure, to avoid allocating extra data to go with the bio.
1535 * Note that the bio must be embedded at the END of that structure always,
1536 * or things will break badly.
1537 */
1538struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1384{ 1539{
1385 struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL); 1540 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1541 struct bio_set *bs;
1386 1542
1543 bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1387 if (!bs) 1544 if (!bs)
1388 return NULL; 1545 return NULL;
1389 1546
1390 bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab); 1547 bs->front_pad = front_pad;
1548
1549 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
1550 if (!bs->bio_slab) {
1551 kfree(bs);
1552 return NULL;
1553 }
1554
1555 bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
1391 if (!bs->bio_pool) 1556 if (!bs->bio_pool)
1392 goto bad; 1557 goto bad;
1393 1558
1394 if (bioset_integrity_create(bs, bio_pool_size)) 1559 if (bioset_integrity_create(bs, pool_size))
1395 goto bad; 1560 goto bad;
1396 1561
1397 if (!biovec_create_pools(bs, bvec_pool_size)) 1562 if (!biovec_create_pools(bs, pool_size))
1398 return bs; 1563 return bs;
1399 1564
1400bad: 1565bad:
@@ -1418,12 +1583,16 @@ static void __init biovec_init_slabs(void)
1418 1583
1419static int __init init_bio(void) 1584static int __init init_bio(void)
1420{ 1585{
1421 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 1586 bio_slab_max = 2;
1587 bio_slab_nr = 0;
1588 bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
1589 if (!bio_slabs)
1590 panic("bio: can't allocate bios\n");
1422 1591
1423 bio_integrity_init_slab(); 1592 bio_integrity_init_slab();
1424 biovec_init_slabs(); 1593 biovec_init_slabs();
1425 1594
1426 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); 1595 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
1427 if (!fs_bio_set) 1596 if (!fs_bio_set)
1428 panic("bio: can't allocate bios\n"); 1597 panic("bio: can't allocate bios\n");
1429 1598
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 99e0ae1a4c7..349a26c1000 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -326,12 +326,13 @@ static struct file_system_type bd_type = {
326 .kill_sb = kill_anon_super, 326 .kill_sb = kill_anon_super,
327}; 327};
328 328
329static struct vfsmount *bd_mnt __read_mostly; 329struct super_block *blockdev_superblock __read_mostly;
330struct super_block *blockdev_superblock;
331 330
332void __init bdev_cache_init(void) 331void __init bdev_cache_init(void)
333{ 332{
334 int err; 333 int err;
334 struct vfsmount *bd_mnt;
335
335 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 336 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
336 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 337 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
337 SLAB_MEM_SPREAD|SLAB_PANIC), 338 SLAB_MEM_SPREAD|SLAB_PANIC),
@@ -373,7 +374,7 @@ struct block_device *bdget(dev_t dev)
373 struct block_device *bdev; 374 struct block_device *bdev;
374 struct inode *inode; 375 struct inode *inode;
375 376
376 inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), 377 inode = iget5_locked(blockdev_superblock, hash(dev),
377 bdev_test, bdev_set, &dev); 378 bdev_test, bdev_set, &dev);
378 379
379 if (!inode) 380 if (!inode)
@@ -463,7 +464,7 @@ void bd_forget(struct inode *inode)
463 464
464 spin_lock(&bdev_lock); 465 spin_lock(&bdev_lock);
465 if (inode->i_bdev) { 466 if (inode->i_bdev) {
466 if (inode->i_sb != blockdev_superblock) 467 if (!sb_is_blkdev_sb(inode->i_sb))
467 bdev = inode->i_bdev; 468 bdev = inode->i_bdev;
468 __bd_forget(inode); 469 __bd_forget(inode);
469 } 470 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 10179cfa115..a13f09b696f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -99,10 +99,18 @@ __clear_page_buffers(struct page *page)
99 page_cache_release(page); 99 page_cache_release(page);
100} 100}
101 101
102
103static int quiet_error(struct buffer_head *bh)
104{
105 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
106 return 0;
107 return 1;
108}
109
110
102static void buffer_io_error(struct buffer_head *bh) 111static void buffer_io_error(struct buffer_head *bh)
103{ 112{
104 char b[BDEVNAME_SIZE]; 113 char b[BDEVNAME_SIZE];
105
106 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", 114 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
107 bdevname(bh->b_bdev, b), 115 bdevname(bh->b_bdev, b),
108 (unsigned long long)bh->b_blocknr); 116 (unsigned long long)bh->b_blocknr);
@@ -144,7 +152,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
144 if (uptodate) { 152 if (uptodate) {
145 set_buffer_uptodate(bh); 153 set_buffer_uptodate(bh);
146 } else { 154 } else {
147 if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { 155 if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
148 buffer_io_error(bh); 156 buffer_io_error(bh);
149 printk(KERN_WARNING "lost page write due to " 157 printk(KERN_WARNING "lost page write due to "
150 "I/O error on %s\n", 158 "I/O error on %s\n",
@@ -394,7 +402,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
394 set_buffer_uptodate(bh); 402 set_buffer_uptodate(bh);
395 } else { 403 } else {
396 clear_buffer_uptodate(bh); 404 clear_buffer_uptodate(bh);
397 if (printk_ratelimit()) 405 if (!quiet_error(bh))
398 buffer_io_error(bh); 406 buffer_io_error(bh);
399 SetPageError(page); 407 SetPageError(page);
400 } 408 }
@@ -455,7 +463,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
455 if (uptodate) { 463 if (uptodate) {
456 set_buffer_uptodate(bh); 464 set_buffer_uptodate(bh);
457 } else { 465 } else {
458 if (printk_ratelimit()) { 466 if (!quiet_error(bh)) {
459 buffer_io_error(bh); 467 buffer_io_error(bh);
460 printk(KERN_WARNING "lost page write due to " 468 printk(KERN_WARNING "lost page write due to "
461 "I/O error on %s\n", 469 "I/O error on %s\n",
@@ -1988,7 +1996,7 @@ int block_write_begin(struct file *file, struct address_space *mapping,
1988 page = *pagep; 1996 page = *pagep;
1989 if (page == NULL) { 1997 if (page == NULL) {
1990 ownpage = 1; 1998 ownpage = 1;
1991 page = __grab_cache_page(mapping, index); 1999 page = grab_cache_page_write_begin(mapping, index, flags);
1992 if (!page) { 2000 if (!page) {
1993 status = -ENOMEM; 2001 status = -ENOMEM;
1994 goto out; 2002 goto out;
@@ -2494,7 +2502,7 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
2494 from = pos & (PAGE_CACHE_SIZE - 1); 2502 from = pos & (PAGE_CACHE_SIZE - 1);
2495 to = from + len; 2503 to = from + len;
2496 2504
2497 page = __grab_cache_page(mapping, index); 2505 page = grab_cache_page_write_begin(mapping, index, flags);
2498 if (!page) 2506 if (!page)
2499 return -ENOMEM; 2507 return -ENOMEM;
2500 *pagep = page; 2508 *pagep = page;
@@ -2913,6 +2921,9 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2913 set_bit(BH_Eopnotsupp, &bh->b_state); 2921 set_bit(BH_Eopnotsupp, &bh->b_state);
2914 } 2922 }
2915 2923
2924 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2925 set_bit(BH_Quiet, &bh->b_state);
2926
2916 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); 2927 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2917 bio_put(bio); 2928 bio_put(bio);
2918} 2929}
diff --git a/fs/cifs/AUTHORS b/fs/cifs/AUTHORS
index 9c136d7803d..7f7fa3c302a 100644
--- a/fs/cifs/AUTHORS
+++ b/fs/cifs/AUTHORS
@@ -36,7 +36,9 @@ Miklos Szeredi
36Kazeon team for various fixes especially for 2.4 version. 36Kazeon team for various fixes especially for 2.4 version.
37Asser Ferno (Change Notify support) 37Asser Ferno (Change Notify support)
38Shaggy (Dave Kleikamp) for inumerable small fs suggestions and some good cleanup 38Shaggy (Dave Kleikamp) for inumerable small fs suggestions and some good cleanup
39Gunter Kukkukk (testing and suggestions for support of old servers)
39Igor Mammedov (DFS support) 40Igor Mammedov (DFS support)
41Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
40 42
41Test case and Bug Report contributors 43Test case and Bug Report contributors
42------------------------------------- 44-------------------------------------
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index e078b7aea14..080703a15f4 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,12 @@
1Version 1.56
2------------
3Add "forcemandatorylock" mount option to allow user to use mandatory
4rather than posix (advisory) byte range locks, even though server would
5support posix byte range locks. Fix query of root inode when prefixpath
6specified and user does not have access to query information about the
7top of the share. Fix problem in 2.6.28 resolving DFS paths to
8Samba servers (worked to Windows).
9
1Version 1.55 10Version 1.55
2------------ 11------------
3Various fixes to make delete of open files behavior more predictable 12Various fixes to make delete of open files behavior more predictable
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 6ba43fb346f..9948c0030e8 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
8 md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o \ 8 md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o cifsacl.o 9 readdir.o ioctl.o sess.o export.o cifsacl.o
10 10
11cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o 11cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
diff --git a/fs/cifs/README b/fs/cifs/README
index a439dc1739b..da4515e3be2 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -463,9 +463,19 @@ A partial list of the supported mount options follows:
463 with cifs style mandatory byte range locks (and most 463 with cifs style mandatory byte range locks (and most
464 cifs servers do not yet support requesting advisory 464 cifs servers do not yet support requesting advisory
465 byte range locks). 465 byte range locks).
466 forcemandatorylock Even if the server supports posix (advisory) byte range
467 locking, send only mandatory lock requests. For some
468 (presumably rare) applications, originally coded for
469 DOS/Windows, which require Windows style mandatory byte range
470 locking, they may be able to take advantage of this option,
471 forcing the cifs client to only send mandatory locks
472 even if the cifs server would support posix advisory locks.
473 "forcemand" is accepted as a shorter form of this mount
474 option.
466 nodfs Disable DFS (global name space support) even if the 475 nodfs Disable DFS (global name space support) even if the
467 server claims to support it. This can help work around 476 server claims to support it. This can help work around
468 a problem with parsing of DFS paths with Samba 3.0.24 server. 477 a problem with parsing of DFS paths with Samba server
478 versions 3.0.24 and 3.0.25.
469 remount remount the share (often used to change from ro to rw mounts 479 remount remount the share (often used to change from ro to rw mounts
470 or vice versa) 480 or vice versa)
471 cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for 481 cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index e1c18362ba4..85c0a74d034 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -122,7 +122,7 @@ static char *compose_mount_options(const char *sb_mountdata,
122 char **devname) 122 char **devname)
123{ 123{
124 int rc; 124 int rc;
125 char *mountdata; 125 char *mountdata = NULL;
126 int md_len; 126 int md_len;
127 char *tkn_e; 127 char *tkn_e;
128 char *srvIP = NULL; 128 char *srvIP = NULL;
@@ -136,10 +136,9 @@ static char *compose_mount_options(const char *sb_mountdata,
136 *devname = cifs_get_share_name(ref->node_name); 136 *devname = cifs_get_share_name(ref->node_name);
137 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 137 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
138 if (rc != 0) { 138 if (rc != 0) {
139 cERROR(1, ("%s: Failed to resolve server part of %s to IP", 139 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d",
140 __func__, *devname)); 140 __func__, *devname, rc));;
141 mountdata = ERR_PTR(rc); 141 goto compose_mount_options_err;
142 goto compose_mount_options_out;
143 } 142 }
144 /* md_len = strlen(...) + 12 for 'sep+prefixpath=' 143 /* md_len = strlen(...) + 12 for 'sep+prefixpath='
145 * assuming that we have 'unc=' and 'ip=' in 144 * assuming that we have 'unc=' and 'ip=' in
@@ -149,8 +148,8 @@ static char *compose_mount_options(const char *sb_mountdata,
149 strlen(ref->node_name) + 12; 148 strlen(ref->node_name) + 12;
150 mountdata = kzalloc(md_len+1, GFP_KERNEL); 149 mountdata = kzalloc(md_len+1, GFP_KERNEL);
151 if (mountdata == NULL) { 150 if (mountdata == NULL) {
152 mountdata = ERR_PTR(-ENOMEM); 151 rc = -ENOMEM;
153 goto compose_mount_options_out; 152 goto compose_mount_options_err;
154 } 153 }
155 154
156 /* copy all options except of unc,ip,prefixpath */ 155 /* copy all options except of unc,ip,prefixpath */
@@ -197,18 +196,32 @@ static char *compose_mount_options(const char *sb_mountdata,
197 196
198 /* find & copy prefixpath */ 197 /* find & copy prefixpath */
199 tkn_e = strchr(ref->node_name + 2, '\\'); 198 tkn_e = strchr(ref->node_name + 2, '\\');
200 if (tkn_e == NULL) /* invalid unc, missing share name*/ 199 if (tkn_e == NULL) {
201 goto compose_mount_options_out; 200 /* invalid unc, missing share name*/
201 rc = -EINVAL;
202 goto compose_mount_options_err;
203 }
202 204
205 /*
206 * this function gives us a path with a double backslash prefix. We
207 * require a single backslash for DFS. Temporarily increment fullpath
208 * to put it in the proper form and decrement before freeing it.
209 */
203 fullpath = build_path_from_dentry(dentry); 210 fullpath = build_path_from_dentry(dentry);
211 if (!fullpath) {
212 rc = -ENOMEM;
213 goto compose_mount_options_err;
214 }
215 ++fullpath;
204 tkn_e = strchr(tkn_e + 1, '\\'); 216 tkn_e = strchr(tkn_e + 1, '\\');
205 if (tkn_e || strlen(fullpath) - (ref->path_consumed)) { 217 if (tkn_e || (strlen(fullpath) - ref->path_consumed)) {
206 strncat(mountdata, &sep, 1); 218 strncat(mountdata, &sep, 1);
207 strcat(mountdata, "prefixpath="); 219 strcat(mountdata, "prefixpath=");
208 if (tkn_e) 220 if (tkn_e)
209 strcat(mountdata, tkn_e + 1); 221 strcat(mountdata, tkn_e + 1);
210 strcat(mountdata, fullpath + (ref->path_consumed)); 222 strcat(mountdata, fullpath + ref->path_consumed);
211 } 223 }
224 --fullpath;
212 kfree(fullpath); 225 kfree(fullpath);
213 226
214 /*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/ 227 /*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/
@@ -217,6 +230,11 @@ static char *compose_mount_options(const char *sb_mountdata,
217compose_mount_options_out: 230compose_mount_options_out:
218 kfree(srvIP); 231 kfree(srvIP);
219 return mountdata; 232 return mountdata;
233
234compose_mount_options_err:
235 kfree(mountdata);
236 mountdata = ERR_PTR(rc);
237 goto compose_mount_options_out;
220} 238}
221 239
222 240
@@ -309,13 +327,19 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
309 goto out_err; 327 goto out_err;
310 } 328 }
311 329
330 /*
331 * The MSDFS spec states that paths in DFS referral requests and
332 * responses must be prefixed by a single '\' character instead of
333 * the double backslashes usually used in the UNC. This function
334 * gives us the latter, so we must adjust the result.
335 */
312 full_path = build_path_from_dentry(dentry); 336 full_path = build_path_from_dentry(dentry);
313 if (full_path == NULL) { 337 if (full_path == NULL) {
314 rc = -ENOMEM; 338 rc = -ENOMEM;
315 goto out_err; 339 goto out_err;
316 } 340 }
317 341
318 rc = get_dfs_path(xid, ses , full_path, cifs_sb->local_nls, 342 rc = get_dfs_path(xid, ses , full_path + 1, cifs_sb->local_nls,
319 &num_referrals, &referrals, 343 &num_referrals, &referrals,
320 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 344 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
321 345
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 877c85409f1..c4c306f7b06 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -19,8 +19,8 @@
19#define _CIFS_FS_SB_H 19#define _CIFS_FS_SB_H
20 20
21#define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */ 21#define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */
22#define CIFS_MOUNT_SET_UID 2 /* set current->euid in create etc. */ 22#define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */
23#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */ 23#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */
24#define CIFS_MOUNT_DIRECT_IO 8 /* do not write nor read through page cache */ 24#define CIFS_MOUNT_DIRECT_IO 8 /* do not write nor read through page cache */
25#define CIFS_MOUNT_NO_XATTR 0x10 /* if set - disable xattr support */ 25#define CIFS_MOUNT_NO_XATTR 0x10 /* if set - disable xattr support */
26#define CIFS_MOUNT_MAP_SPECIAL_CHR 0x20 /* remap illegal chars in filenames */ 26#define CIFS_MOUNT_MAP_SPECIAL_CHR 0x20 /* remap illegal chars in filenames */
@@ -30,7 +30,8 @@
30#define CIFS_MOUNT_CIFS_ACL 0x200 /* send ACL requests to non-POSIX srv */ 30#define CIFS_MOUNT_CIFS_ACL 0x200 /* send ACL requests to non-POSIX srv */
31#define CIFS_MOUNT_OVERR_UID 0x400 /* override uid returned from server */ 31#define CIFS_MOUNT_OVERR_UID 0x400 /* override uid returned from server */
32#define CIFS_MOUNT_OVERR_GID 0x800 /* override gid returned from server */ 32#define CIFS_MOUNT_OVERR_GID 0x800 /* override gid returned from server */
33#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */ 33#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */
34#define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */
34 35
35struct cifs_sb_info { 36struct cifs_sb_info {
36 struct cifsTconInfo *tcon; /* primary mount */ 37 struct cifsTconInfo *tcon; /* primary mount */
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 0ab2fb5afef..3fd3a9df043 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -121,11 +121,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
121 121
122 /* add the server address */ 122 /* add the server address */
123 if (server->addr.sockAddr.sin_family == AF_INET) 123 if (server->addr.sockAddr.sin_family == AF_INET)
124 sprintf(dp, "ip4=" NIPQUAD_FMT, 124 sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr);
125 NIPQUAD(server->addr.sockAddr.sin_addr));
126 else if (server->addr.sockAddr.sin_family == AF_INET6) 125 else if (server->addr.sockAddr.sin_family == AF_INET6)
127 sprintf(dp, "ip6=" NIP6_SEQFMT, 126 sprintf(dp, "ip6=%pi6", &server->addr.sockAddr6.sin6_addr);
128 NIP6(server->addr.sockAddr6.sin6_addr));
129 else 127 else
130 goto out; 128 goto out;
131 129
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index bd5f13d3845..d4839cf0cb2 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -37,7 +37,7 @@
37 37
38extern void mdfour(unsigned char *out, unsigned char *in, int n); 38extern void mdfour(unsigned char *out, unsigned char *in, int n);
39extern void E_md4hash(const unsigned char *passwd, unsigned char *p16); 39extern void E_md4hash(const unsigned char *passwd, unsigned char *p16);
40extern void SMBencrypt(unsigned char *passwd, unsigned char *c8, 40extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
41 unsigned char *p24); 41 unsigned char *p24);
42 42
43static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 43static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
@@ -280,25 +280,22 @@ int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses,
280} 280}
281 281
282#ifdef CONFIG_CIFS_WEAK_PW_HASH 282#ifdef CONFIG_CIFS_WEAK_PW_HASH
283void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key) 283void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
284 char *lnm_session_key)
284{ 285{
285 int i; 286 int i;
286 char password_with_pad[CIFS_ENCPWD_SIZE]; 287 char password_with_pad[CIFS_ENCPWD_SIZE];
287 288
288 if (ses->server == NULL)
289 return;
290
291 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); 289 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
292 if (ses->password) 290 if (password)
293 strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE); 291 strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE);
294 292
295 if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) 293 if (!encrypt && extended_security & CIFSSEC_MAY_PLNTXT) {
296 if (extended_security & CIFSSEC_MAY_PLNTXT) { 294 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
297 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); 295 memcpy(lnm_session_key, password_with_pad,
298 memcpy(lnm_session_key, password_with_pad, 296 CIFS_ENCPWD_SIZE);
299 CIFS_ENCPWD_SIZE); 297 return;
300 return; 298 }
301 }
302 299
303 /* calculate old style session key */ 300 /* calculate old style session key */
304 /* calling toupper is less broken than repeatedly 301 /* calling toupper is less broken than repeatedly
@@ -314,7 +311,8 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key)
314 for (i = 0; i < CIFS_ENCPWD_SIZE; i++) 311 for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
315 password_with_pad[i] = toupper(password_with_pad[i]); 312 password_with_pad[i] = toupper(password_with_pad[i]);
316 313
317 SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key); 314 SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
315
318 /* clear password before we return/free memory */ 316 /* clear password before we return/free memory */
319 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); 317 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
320} 318}
diff --git a/fs/cifs/cifsencrypt.h b/fs/cifs/cifsencrypt.h
index 152fa2dcfc6..15d2ec00647 100644
--- a/fs/cifs/cifsencrypt.h
+++ b/fs/cifs/cifsencrypt.h
@@ -26,7 +26,8 @@
26extern void mdfour(unsigned char *out, unsigned char *in, int n); 26extern void mdfour(unsigned char *out, unsigned char *in, int n);
27/* smbdes.c */ 27/* smbdes.c */
28extern void E_P16(unsigned char *p14, unsigned char *p16); 28extern void E_P16(unsigned char *p14, unsigned char *p16);
29extern void E_P24(unsigned char *p21, unsigned char *c8, unsigned char *p24); 29extern void E_P24(unsigned char *p21, const unsigned char *c8,
30 unsigned char *p24);
30 31
31 32
32 33
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d9cf467309e..13ea53251dc 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -66,7 +66,9 @@ unsigned int sign_CIFS_PDUs = 1;
66extern struct task_struct *oplockThread; /* remove sparse warning */ 66extern struct task_struct *oplockThread; /* remove sparse warning */
67struct task_struct *oplockThread = NULL; 67struct task_struct *oplockThread = NULL;
68/* extern struct task_struct * dnotifyThread; remove sparse warning */ 68/* extern struct task_struct * dnotifyThread; remove sparse warning */
69#ifdef CONFIG_CIFS_EXPERIMENTAL
69static struct task_struct *dnotifyThread = NULL; 70static struct task_struct *dnotifyThread = NULL;
71#endif
70static const struct super_operations cifs_super_ops; 72static const struct super_operations cifs_super_ops;
71unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; 73unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
72module_param(CIFSMaxBufSize, int, 0); 74module_param(CIFSMaxBufSize, int, 0);
@@ -337,39 +339,58 @@ static int
337cifs_show_options(struct seq_file *s, struct vfsmount *m) 339cifs_show_options(struct seq_file *s, struct vfsmount *m)
338{ 340{
339 struct cifs_sb_info *cifs_sb; 341 struct cifs_sb_info *cifs_sb;
342 struct cifsTconInfo *tcon;
343 struct TCP_Server_Info *server;
340 344
341 cifs_sb = CIFS_SB(m->mnt_sb); 345 cifs_sb = CIFS_SB(m->mnt_sb);
342 346
343 if (cifs_sb) { 347 if (cifs_sb) {
344 if (cifs_sb->tcon) { 348 tcon = cifs_sb->tcon;
345/* BB add prepath to mount options displayed */ 349 if (tcon) {
346 seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); 350 seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName);
347 if (cifs_sb->tcon->ses) { 351 if (tcon->ses) {
348 if (cifs_sb->tcon->ses->userName) 352 if (tcon->ses->userName)
349 seq_printf(s, ",username=%s", 353 seq_printf(s, ",username=%s",
350 cifs_sb->tcon->ses->userName); 354 tcon->ses->userName);
351 if (cifs_sb->tcon->ses->domainName) 355 if (tcon->ses->domainName)
352 seq_printf(s, ",domain=%s", 356 seq_printf(s, ",domain=%s",
353 cifs_sb->tcon->ses->domainName); 357 tcon->ses->domainName);
358 server = tcon->ses->server;
359 if (server) {
360 seq_printf(s, ",addr=");
361 switch (server->addr.sockAddr6.
362 sin6_family) {
363 case AF_INET6:
364 seq_printf(s, "%pI6",
365 &server->addr.sockAddr6.sin6_addr);
366 break;
367 case AF_INET:
368 seq_printf(s, "%pI4",
369 &server->addr.sockAddr.sin_addr.s_addr);
370 break;
371 }
372 }
354 } 373 }
355 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) || 374 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
356 !(cifs_sb->tcon->unix_ext)) 375 !(tcon->unix_ext))
357 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); 376 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
358 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) || 377 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
359 !(cifs_sb->tcon->unix_ext)) 378 !(tcon->unix_ext))
360 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); 379 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
361 if (!cifs_sb->tcon->unix_ext) { 380 if (!tcon->unix_ext) {
362 seq_printf(s, ",file_mode=0%o,dir_mode=0%o", 381 seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
363 cifs_sb->mnt_file_mode, 382 cifs_sb->mnt_file_mode,
364 cifs_sb->mnt_dir_mode); 383 cifs_sb->mnt_dir_mode);
365 } 384 }
366 if (cifs_sb->tcon->seal) 385 if (tcon->seal)
367 seq_printf(s, ",seal"); 386 seq_printf(s, ",seal");
368 if (cifs_sb->tcon->nocase) 387 if (tcon->nocase)
369 seq_printf(s, ",nocase"); 388 seq_printf(s, ",nocase");
370 if (cifs_sb->tcon->retry) 389 if (tcon->retry)
371 seq_printf(s, ",hard"); 390 seq_printf(s, ",hard");
372 } 391 }
392 if (cifs_sb->prepath)
393 seq_printf(s, ",prepath=%s", cifs_sb->prepath);
373 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) 394 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
374 seq_printf(s, ",posixpaths"); 395 seq_printf(s, ",posixpaths");
375 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) 396 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
@@ -417,9 +438,8 @@ int cifs_xquota_set(struct super_block *sb, int quota_type, qid_t qid,
417 xid = GetXid(); 438 xid = GetXid();
418 if (pTcon) { 439 if (pTcon) {
419 cFYI(1, ("set type: 0x%x id: %d", quota_type, qid)); 440 cFYI(1, ("set type: 0x%x id: %d", quota_type, qid));
420 } else { 441 } else
421 rc = -EIO; 442 rc = -EIO;
422 }
423 443
424 FreeXid(xid); 444 FreeXid(xid);
425 return rc; 445 return rc;
@@ -441,9 +461,8 @@ int cifs_xquota_get(struct super_block *sb, int quota_type, qid_t qid,
441 xid = GetXid(); 461 xid = GetXid();
442 if (pTcon) { 462 if (pTcon) {
443 cFYI(1, ("set type: 0x%x id: %d", quota_type, qid)); 463 cFYI(1, ("set type: 0x%x id: %d", quota_type, qid));
444 } else { 464 } else
445 rc = -EIO; 465 rc = -EIO;
446 }
447 466
448 FreeXid(xid); 467 FreeXid(xid);
449 return rc; 468 return rc;
@@ -464,9 +483,8 @@ int cifs_xstate_set(struct super_block *sb, unsigned int flags, int operation)
464 xid = GetXid(); 483 xid = GetXid();
465 if (pTcon) { 484 if (pTcon) {
466 cFYI(1, ("flags: 0x%x operation: 0x%x", flags, operation)); 485 cFYI(1, ("flags: 0x%x operation: 0x%x", flags, operation));
467 } else { 486 } else
468 rc = -EIO; 487 rc = -EIO;
469 }
470 488
471 FreeXid(xid); 489 FreeXid(xid);
472 return rc; 490 return rc;
@@ -479,17 +497,16 @@ int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats)
479 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 497 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
480 struct cifsTconInfo *pTcon; 498 struct cifsTconInfo *pTcon;
481 499
482 if (cifs_sb) { 500 if (cifs_sb)
483 pTcon = cifs_sb->tcon; 501 pTcon = cifs_sb->tcon;
484 } else { 502 else
485 return -EIO; 503 return -EIO;
486 } 504
487 xid = GetXid(); 505 xid = GetXid();
488 if (pTcon) { 506 if (pTcon) {
489 cFYI(1, ("pqstats %p", qstats)); 507 cFYI(1, ("pqstats %p", qstats));
490 } else { 508 } else
491 rc = -EIO; 509 rc = -EIO;
492 }
493 510
494 FreeXid(xid); 511 FreeXid(xid);
495 return rc; 512 return rc;
@@ -730,7 +747,6 @@ const struct file_operations cifs_file_ops = {
730#endif /* CONFIG_CIFS_POSIX */ 747#endif /* CONFIG_CIFS_POSIX */
731 748
732#ifdef CONFIG_CIFS_EXPERIMENTAL 749#ifdef CONFIG_CIFS_EXPERIMENTAL
733 .dir_notify = cifs_dir_notify,
734 .setlease = cifs_setlease, 750 .setlease = cifs_setlease,
735#endif /* CONFIG_CIFS_EXPERIMENTAL */ 751#endif /* CONFIG_CIFS_EXPERIMENTAL */
736}; 752};
@@ -751,7 +767,6 @@ const struct file_operations cifs_file_direct_ops = {
751#endif /* CONFIG_CIFS_POSIX */ 767#endif /* CONFIG_CIFS_POSIX */
752 .llseek = cifs_llseek, 768 .llseek = cifs_llseek,
753#ifdef CONFIG_CIFS_EXPERIMENTAL 769#ifdef CONFIG_CIFS_EXPERIMENTAL
754 .dir_notify = cifs_dir_notify,
755 .setlease = cifs_setlease, 770 .setlease = cifs_setlease,
756#endif /* CONFIG_CIFS_EXPERIMENTAL */ 771#endif /* CONFIG_CIFS_EXPERIMENTAL */
757}; 772};
@@ -772,7 +787,6 @@ const struct file_operations cifs_file_nobrl_ops = {
772#endif /* CONFIG_CIFS_POSIX */ 787#endif /* CONFIG_CIFS_POSIX */
773 788
774#ifdef CONFIG_CIFS_EXPERIMENTAL 789#ifdef CONFIG_CIFS_EXPERIMENTAL
775 .dir_notify = cifs_dir_notify,
776 .setlease = cifs_setlease, 790 .setlease = cifs_setlease,
777#endif /* CONFIG_CIFS_EXPERIMENTAL */ 791#endif /* CONFIG_CIFS_EXPERIMENTAL */
778}; 792};
@@ -792,7 +806,6 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
792#endif /* CONFIG_CIFS_POSIX */ 806#endif /* CONFIG_CIFS_POSIX */
793 .llseek = cifs_llseek, 807 .llseek = cifs_llseek,
794#ifdef CONFIG_CIFS_EXPERIMENTAL 808#ifdef CONFIG_CIFS_EXPERIMENTAL
795 .dir_notify = cifs_dir_notify,
796 .setlease = cifs_setlease, 809 .setlease = cifs_setlease,
797#endif /* CONFIG_CIFS_EXPERIMENTAL */ 810#endif /* CONFIG_CIFS_EXPERIMENTAL */
798}; 811};
@@ -801,9 +814,6 @@ const struct file_operations cifs_dir_ops = {
801 .readdir = cifs_readdir, 814 .readdir = cifs_readdir,
802 .release = cifs_closedir, 815 .release = cifs_closedir,
803 .read = generic_read_dir, 816 .read = generic_read_dir,
804#ifdef CONFIG_CIFS_EXPERIMENTAL
805 .dir_notify = cifs_dir_notify,
806#endif /* CONFIG_CIFS_EXPERIMENTAL */
807 .unlocked_ioctl = cifs_ioctl, 817 .unlocked_ioctl = cifs_ioctl,
808 .llseek = generic_file_llseek, 818 .llseek = generic_file_llseek,
809}; 819};
@@ -1029,6 +1039,7 @@ static int cifs_oplock_thread(void *dummyarg)
1029 return 0; 1039 return 0;
1030} 1040}
1031 1041
1042#ifdef CONFIG_CIFS_EXPERIMENTAL
1032static int cifs_dnotify_thread(void *dummyarg) 1043static int cifs_dnotify_thread(void *dummyarg)
1033{ 1044{
1034 struct list_head *tmp; 1045 struct list_head *tmp;
@@ -1054,6 +1065,7 @@ static int cifs_dnotify_thread(void *dummyarg)
1054 1065
1055 return 0; 1066 return 0;
1056} 1067}
1068#endif
1057 1069
1058static int __init 1070static int __init
1059init_cifs(void) 1071init_cifs(void)
@@ -1131,16 +1143,20 @@ init_cifs(void)
1131 goto out_unregister_dfs_key_type; 1143 goto out_unregister_dfs_key_type;
1132 } 1144 }
1133 1145
1146#ifdef CONFIG_CIFS_EXPERIMENTAL
1134 dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd"); 1147 dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd");
1135 if (IS_ERR(dnotifyThread)) { 1148 if (IS_ERR(dnotifyThread)) {
1136 rc = PTR_ERR(dnotifyThread); 1149 rc = PTR_ERR(dnotifyThread);
1137 cERROR(1, ("error %d create dnotify thread", rc)); 1150 cERROR(1, ("error %d create dnotify thread", rc));
1138 goto out_stop_oplock_thread; 1151 goto out_stop_oplock_thread;
1139 } 1152 }
1153#endif
1140 1154
1141 return 0; 1155 return 0;
1142 1156
1157#ifdef CONFIG_CIFS_EXPERIMENTAL
1143 out_stop_oplock_thread: 1158 out_stop_oplock_thread:
1159#endif
1144 kthread_stop(oplockThread); 1160 kthread_stop(oplockThread);
1145 out_unregister_dfs_key_type: 1161 out_unregister_dfs_key_type:
1146#ifdef CONFIG_CIFS_DFS_UPCALL 1162#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -1179,8 +1195,10 @@ exit_cifs(void)
1179 cifs_destroy_inodecache(); 1195 cifs_destroy_inodecache();
1180 cifs_destroy_mids(); 1196 cifs_destroy_mids();
1181 cifs_destroy_request_bufs(); 1197 cifs_destroy_request_bufs();
1182 kthread_stop(oplockThread); 1198#ifdef CONFIG_CIFS_EXPERIMENTAL
1183 kthread_stop(dnotifyThread); 1199 kthread_stop(dnotifyThread);
1200#endif
1201 kthread_stop(oplockThread);
1184} 1202}
1185 1203
1186MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); 1204MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 074de0b5064..7ac481841f8 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -76,7 +76,6 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
76extern const struct file_operations cifs_dir_ops; 76extern const struct file_operations cifs_dir_ops;
77extern int cifs_dir_open(struct inode *inode, struct file *file); 77extern int cifs_dir_open(struct inode *inode, struct file *file);
78extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); 78extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
79extern int cifs_dir_notify(struct file *, unsigned long arg);
80 79
81/* Functions related to dir entries */ 80/* Functions related to dir entries */
82extern struct dentry_operations cifs_dentry_ops; 81extern struct dentry_operations cifs_dentry_ops;
@@ -101,5 +100,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
101extern const struct export_operations cifs_export_ops; 100extern const struct export_operations cifs_export_ops;
102#endif /* EXPERIMENTAL */ 101#endif /* EXPERIMENTAL */
103 102
104#define CIFS_VERSION "1.55" 103#define CIFS_VERSION "1.56"
105#endif /* _CIFSFS_H */ 104#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c57c0565547..94c1ca0ec95 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -47,7 +47,11 @@
47 */ 47 */
48#define CIFS_MAX_REQ 50 48#define CIFS_MAX_REQ 50
49 49
50#define SERVER_NAME_LENGTH 15 50#define RFC1001_NAME_LEN 15
51#define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1)
52
53/* currently length of NIP6_FMT */
54#define SERVER_NAME_LENGTH 40
51#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) 55#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1)
52 56
53/* used to define string lengths for reversing unicode strings */ 57/* used to define string lengths for reversing unicode strings */
@@ -125,8 +129,7 @@ struct TCP_Server_Info {
125 struct list_head smb_ses_list; 129 struct list_head smb_ses_list;
126 int srv_count; /* reference counter */ 130 int srv_count; /* reference counter */
127 /* 15 character server name + 0x20 16th byte indicating type = srv */ 131 /* 15 character server name + 0x20 16th byte indicating type = srv */
128 char server_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; 132 char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
129 char unicode_server_Name[SERVER_NAME_LEN_WITH_NULL * 2];
130 char *hostname; /* hostname portion of UNC string */ 133 char *hostname; /* hostname portion of UNC string */
131 struct socket *ssocket; 134 struct socket *ssocket;
132 union { 135 union {
@@ -151,7 +154,7 @@ struct TCP_Server_Info {
151 atomic_t num_waiters; /* blocked waiting to get in sendrecv */ 154 atomic_t num_waiters; /* blocked waiting to get in sendrecv */
152#endif 155#endif
153 enum statusEnum tcpStatus; /* what we think the status is */ 156 enum statusEnum tcpStatus; /* what we think the status is */
154 struct semaphore tcpSem; 157 struct mutex srv_mutex;
155 struct task_struct *tsk; 158 struct task_struct *tsk;
156 char server_GUID[16]; 159 char server_GUID[16];
157 char secMode; 160 char secMode;
@@ -171,7 +174,7 @@ struct TCP_Server_Info {
171 __u16 CurrentMid; /* multiplex id - rotating counter */ 174 __u16 CurrentMid; /* multiplex id - rotating counter */
172 char cryptKey[CIFS_CRYPTO_KEY_SIZE]; 175 char cryptKey[CIFS_CRYPTO_KEY_SIZE];
173 /* 16th byte of RFC1001 workstation name is always null */ 176 /* 16th byte of RFC1001 workstation name is always null */
174 char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; 177 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
175 __u32 sequence_number; /* needed for CIFS PDU signature */ 178 __u32 sequence_number; /* needed for CIFS PDU signature */
176 struct mac_key mac_signing_key; 179 struct mac_key mac_signing_key;
177 char ntlmv2_hash[16]; 180 char ntlmv2_hash[16];
@@ -239,6 +242,7 @@ struct cifsTconInfo {
239 struct cifsSesInfo *ses; /* pointer to session associated with */ 242 struct cifsSesInfo *ses; /* pointer to session associated with */
240 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */ 243 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
241 char *nativeFileSystem; 244 char *nativeFileSystem;
245 char *password; /* for share-level security */
242 __u16 tid; /* The 2 byte tree id */ 246 __u16 tid; /* The 2 byte tree id */
243 __u16 Flags; /* optional support bits */ 247 __u16 Flags; /* optional support bits */
244 enum statusEnum tidStatus; 248 enum statusEnum tidStatus;
@@ -422,7 +426,6 @@ struct mid_q_entry {
422 unsigned long when_sent; /* time when smb send finished */ 426 unsigned long when_sent; /* time when smb send finished */
423 unsigned long when_received; /* when demux complete (taken off wire) */ 427 unsigned long when_received; /* when demux complete (taken off wire) */
424#endif 428#endif
425 struct cifsSesInfo *ses; /* smb was sent to this server */
426 struct task_struct *tsk; /* task waiting for response */ 429 struct task_struct *tsk; /* task waiting for response */
427 struct smb_hdr *resp_buf; /* response buffer */ 430 struct smb_hdr *resp_buf; /* response buffer */
428 int midState; /* wish this were enum but can not pass to wait_event */ 431 int midState; /* wish this were enum but can not pass to wait_event */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index d2a073edd1b..b4e2e9f0ee3 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1922,7 +1922,7 @@ typedef struct smb_com_transaction2_get_dfs_refer_req {
1922/* DFS server target type */ 1922/* DFS server target type */
1923#define DFS_TYPE_LINK 0x0000 /* also for sysvol targets */ 1923#define DFS_TYPE_LINK 0x0000 /* also for sysvol targets */
1924#define DFS_TYPE_ROOT 0x0001 1924#define DFS_TYPE_ROOT 0x0001
1925 1925
1926/* Referral Entry Flags */ 1926/* Referral Entry Flags */
1927#define DFS_NAME_LIST_REF 0x0200 1927#define DFS_NAME_LIST_REF 0x0200
1928 1928
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 6f21ecb85ce..06f6779988b 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -39,7 +39,7 @@ extern int smb_send(struct socket *, struct smb_hdr *,
39 unsigned int /* length */ , struct sockaddr *, bool); 39 unsigned int /* length */ , struct sockaddr *, bool);
40extern unsigned int _GetXid(void); 40extern unsigned int _GetXid(void);
41extern void _FreeXid(unsigned int); 41extern void _FreeXid(unsigned int);
42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid)); 42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current_fsuid()));
43#define FreeXid(curr_xid) {_FreeXid(curr_xid); cFYI(1,("CIFS VFS: leaving %s (xid = %d) rc = %d",__func__,curr_xid,(int)rc));} 43#define FreeXid(curr_xid) {_FreeXid(curr_xid); cFYI(1,("CIFS VFS: leaving %s (xid = %d) rc = %d",__func__,curr_xid,(int)rc));}
44extern char *build_path_from_dentry(struct dentry *); 44extern char *build_path_from_dentry(struct dentry *);
45extern char *build_wildcard_path_from_dentry(struct dentry *direntry); 45extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
@@ -330,7 +330,8 @@ extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
330extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, 330extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
331 const struct nls_table *); 331 const struct nls_table *);
332#ifdef CONFIG_CIFS_WEAK_PW_HASH 332#ifdef CONFIG_CIFS_WEAK_PW_HASH
333extern void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key); 333extern void calc_lanman_hash(const char *password, const char *cryptkey,
334 bool encrypt, char *lnm_session_key);
334#endif /* CIFS_WEAK_PW_HASH */ 335#endif /* CIFS_WEAK_PW_HASH */
335extern int CIFSSMBCopy(int xid, 336extern int CIFSSMBCopy(int xid,
336 struct cifsTconInfo *source_tcon, 337 struct cifsTconInfo *source_tcon,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 2af8626ced4..552642a507c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1382,13 +1382,13 @@ openRetry:
1382 if (cpu_to_le32(FILE_CREATE) == pSMBr->CreateAction) 1382 if (cpu_to_le32(FILE_CREATE) == pSMBr->CreateAction)
1383 *pOplock |= CIFS_CREATE_ACTION; 1383 *pOplock |= CIFS_CREATE_ACTION;
1384 if (pfile_info) { 1384 if (pfile_info) {
1385 memcpy((char *)pfile_info, (char *)&pSMBr->CreationTime, 1385 memcpy((char *)pfile_info, (char *)&pSMBr->CreationTime,
1386 36 /* CreationTime to Attributes */); 1386 36 /* CreationTime to Attributes */);
1387 /* the file_info buf is endian converted by caller */ 1387 /* the file_info buf is endian converted by caller */
1388 pfile_info->AllocationSize = pSMBr->AllocationSize; 1388 pfile_info->AllocationSize = pSMBr->AllocationSize;
1389 pfile_info->EndOfFile = pSMBr->EndOfFile; 1389 pfile_info->EndOfFile = pSMBr->EndOfFile;
1390 pfile_info->NumberOfLinks = cpu_to_le32(1); 1390 pfile_info->NumberOfLinks = cpu_to_le32(1);
1391 pfile_info->DeletePending = 0; 1391 pfile_info->DeletePending = 0;
1392 } 1392 }
1393 } 1393 }
1394 1394
@@ -1414,8 +1414,13 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1414 cFYI(1, ("Reading %d bytes on fid %d", count, netfid)); 1414 cFYI(1, ("Reading %d bytes on fid %d", count, netfid));
1415 if (tcon->ses->capabilities & CAP_LARGE_FILES) 1415 if (tcon->ses->capabilities & CAP_LARGE_FILES)
1416 wct = 12; 1416 wct = 12;
1417 else 1417 else {
1418 wct = 10; /* old style read */ 1418 wct = 10; /* old style read */
1419 if ((lseek >> 32) > 0) {
1420 /* can not handle this big offset for old */
1421 return -EIO;
1422 }
1423 }
1419 1424
1420 *nbytes = 0; 1425 *nbytes = 0;
1421 rc = small_smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **) &pSMB); 1426 rc = small_smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **) &pSMB);
@@ -1431,8 +1436,6 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1431 pSMB->OffsetLow = cpu_to_le32(lseek & 0xFFFFFFFF); 1436 pSMB->OffsetLow = cpu_to_le32(lseek & 0xFFFFFFFF);
1432 if (wct == 12) 1437 if (wct == 12)
1433 pSMB->OffsetHigh = cpu_to_le32(lseek >> 32); 1438 pSMB->OffsetHigh = cpu_to_le32(lseek >> 32);
1434 else if ((lseek >> 32) > 0) /* can not handle this big offset for old */
1435 return -EIO;
1436 1439
1437 pSMB->Remaining = 0; 1440 pSMB->Remaining = 0;
1438 pSMB->MaxCount = cpu_to_le16(count & 0xFFFF); 1441 pSMB->MaxCount = cpu_to_le16(count & 0xFFFF);
@@ -1519,8 +1522,13 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1519 1522
1520 if (tcon->ses->capabilities & CAP_LARGE_FILES) 1523 if (tcon->ses->capabilities & CAP_LARGE_FILES)
1521 wct = 14; 1524 wct = 14;
1522 else 1525 else {
1523 wct = 12; 1526 wct = 12;
1527 if ((offset >> 32) > 0) {
1528 /* can not handle big offset for old srv */
1529 return -EIO;
1530 }
1531 }
1524 1532
1525 rc = smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB, 1533 rc = smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB,
1526 (void **) &pSMBr); 1534 (void **) &pSMBr);
@@ -1535,8 +1543,6 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1535 pSMB->OffsetLow = cpu_to_le32(offset & 0xFFFFFFFF); 1543 pSMB->OffsetLow = cpu_to_le32(offset & 0xFFFFFFFF);
1536 if (wct == 14) 1544 if (wct == 14)
1537 pSMB->OffsetHigh = cpu_to_le32(offset >> 32); 1545 pSMB->OffsetHigh = cpu_to_le32(offset >> 32);
1538 else if ((offset >> 32) > 0) /* can not handle big offset for old srv */
1539 return -EIO;
1540 1546
1541 pSMB->Reserved = 0xFFFFFFFF; 1547 pSMB->Reserved = 0xFFFFFFFF;
1542 pSMB->WriteMode = 0; 1548 pSMB->WriteMode = 0;
@@ -1558,7 +1564,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1558 pSMB->DataOffset = 1564 pSMB->DataOffset =
1559 cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); 1565 cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4);
1560 if (buf) 1566 if (buf)
1561 memcpy(pSMB->Data, buf, bytes_sent); 1567 memcpy(pSMB->Data, buf, bytes_sent);
1562 else if (ubuf) { 1568 else if (ubuf) {
1563 if (copy_from_user(pSMB->Data, ubuf, bytes_sent)) { 1569 if (copy_from_user(pSMB->Data, ubuf, bytes_sent)) {
1564 cifs_buf_release(pSMB); 1570 cifs_buf_release(pSMB);
@@ -1621,10 +1627,15 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1621 1627
1622 cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count)); 1628 cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count));
1623 1629
1624 if (tcon->ses->capabilities & CAP_LARGE_FILES) 1630 if (tcon->ses->capabilities & CAP_LARGE_FILES) {
1625 wct = 14; 1631 wct = 14;
1626 else 1632 } else {
1627 wct = 12; 1633 wct = 12;
1634 if ((offset >> 32) > 0) {
1635 /* can not handle big offset for old srv */
1636 return -EIO;
1637 }
1638 }
1628 rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB); 1639 rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB);
1629 if (rc) 1640 if (rc)
1630 return rc; 1641 return rc;
@@ -1637,8 +1648,6 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1637 pSMB->OffsetLow = cpu_to_le32(offset & 0xFFFFFFFF); 1648 pSMB->OffsetLow = cpu_to_le32(offset & 0xFFFFFFFF);
1638 if (wct == 14) 1649 if (wct == 14)
1639 pSMB->OffsetHigh = cpu_to_le32(offset >> 32); 1650 pSMB->OffsetHigh = cpu_to_le32(offset >> 32);
1640 else if ((offset >> 32) > 0) /* can not handle big offset for old srv */
1641 return -EIO;
1642 pSMB->Reserved = 0xFFFFFFFF; 1651 pSMB->Reserved = 0xFFFFFFFF;
1643 pSMB->WriteMode = 0; 1652 pSMB->WriteMode = 0;
1644 pSMB->Remaining = 0; 1653 pSMB->Remaining = 0;
@@ -1862,10 +1871,6 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1862 rc = -EIO; /* bad smb */ 1871 rc = -EIO; /* bad smb */
1863 goto plk_err_exit; 1872 goto plk_err_exit;
1864 } 1873 }
1865 if (pLockData == NULL) {
1866 rc = -EINVAL;
1867 goto plk_err_exit;
1868 }
1869 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 1874 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
1870 data_count = le16_to_cpu(pSMBr->t2.DataCount); 1875 data_count = le16_to_cpu(pSMBr->t2.DataCount);
1871 if (data_count < sizeof(struct cifs_posix_lock)) { 1876 if (data_count < sizeof(struct cifs_posix_lock)) {
@@ -3983,7 +3988,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
3983 3988
3984 node->flags = le16_to_cpu(pSMBr->DFSFlags); 3989 node->flags = le16_to_cpu(pSMBr->DFSFlags);
3985 if (is_unicode) { 3990 if (is_unicode) {
3986 __le16 *tmp = kmalloc(strlen(searchName)*2, GFP_KERNEL); 3991 __le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
3992 GFP_KERNEL);
3987 cifsConvertToUCS((__le16 *) tmp, searchName, 3993 cifsConvertToUCS((__le16 *) tmp, searchName,
3988 PATH_MAX, nls_codepage, remap); 3994 PATH_MAX, nls_codepage, remap);
3989 node->path_consumed = hostlen_fromUCS(tmp, 3995 node->path_consumed = hostlen_fromUCS(tmp,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c7d34171458..e9ea394ee07 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -89,6 +89,7 @@ struct smb_vol {
89 bool nullauth:1; /* attempt to authenticate with null user */ 89 bool nullauth:1; /* attempt to authenticate with null user */
90 bool nocase:1; /* request case insensitive filenames */ 90 bool nocase:1; /* request case insensitive filenames */
91 bool nobrl:1; /* disable sending byte range locks to srv */ 91 bool nobrl:1; /* disable sending byte range locks to srv */
92 bool mand_lock:1; /* send mandatory not posix byte range lock reqs */
92 bool seal:1; /* request transport encryption on share */ 93 bool seal:1; /* request transport encryption on share */
93 bool nodfs:1; /* Do not request DFS, even if available */ 94 bool nodfs:1; /* Do not request DFS, even if available */
94 bool local_lease:1; /* check leases only on local system, not remote */ 95 bool local_lease:1; /* check leases only on local system, not remote */
@@ -101,25 +102,17 @@ struct smb_vol {
101 char *prepath; 102 char *prepath;
102}; 103};
103 104
104static int ipv4_connect(struct sockaddr_in *psin_server, 105static int ipv4_connect(struct TCP_Server_Info *server);
105 struct socket **csocket, 106static int ipv6_connect(struct TCP_Server_Info *server);
106 char *netb_name,
107 char *server_netb_name,
108 bool noblocksnd,
109 bool nosndbuf); /* ipv6 never set sndbuf size */
110static int ipv6_connect(struct sockaddr_in6 *psin_server,
111 struct socket **csocket, bool noblocksnd);
112
113
114 /*
115 * cifs tcp session reconnection
116 *
117 * mark tcp session as reconnecting so temporarily locked
118 * mark all smb sessions as reconnecting for tcp session
119 * reconnect tcp session
120 * wake up waiters on reconnection? - (not needed currently)
121 */
122 107
108/*
109 * cifs tcp session reconnection
110 *
111 * mark tcp session as reconnecting so temporarily locked
112 * mark all smb sessions as reconnecting for tcp session
113 * reconnect tcp session
114 * wake up waiters on reconnection? - (not needed currently)
115 */
123static int 116static int
124cifs_reconnect(struct TCP_Server_Info *server) 117cifs_reconnect(struct TCP_Server_Info *server)
125{ 118{
@@ -156,7 +149,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
156 } 149 }
157 read_unlock(&cifs_tcp_ses_lock); 150 read_unlock(&cifs_tcp_ses_lock);
158 /* do not want to be sending data on a socket we are freeing */ 151 /* do not want to be sending data on a socket we are freeing */
159 down(&server->tcpSem); 152 mutex_lock(&server->srv_mutex);
160 if (server->ssocket) { 153 if (server->ssocket) {
161 cFYI(1, ("State: 0x%x Flags: 0x%lx", server->ssocket->state, 154 cFYI(1, ("State: 0x%x Flags: 0x%lx", server->ssocket->state,
162 server->ssocket->flags)); 155 server->ssocket->flags));
@@ -182,21 +175,15 @@ cifs_reconnect(struct TCP_Server_Info *server)
182 } 175 }
183 } 176 }
184 spin_unlock(&GlobalMid_Lock); 177 spin_unlock(&GlobalMid_Lock);
185 up(&server->tcpSem); 178 mutex_unlock(&server->srv_mutex);
186 179
187 while ((server->tcpStatus != CifsExiting) && 180 while ((server->tcpStatus != CifsExiting) &&
188 (server->tcpStatus != CifsGood)) { 181 (server->tcpStatus != CifsGood)) {
189 try_to_freeze(); 182 try_to_freeze();
190 if (server->addr.sockAddr6.sin6_family == AF_INET6) { 183 if (server->addr.sockAddr6.sin6_family == AF_INET6)
191 rc = ipv6_connect(&server->addr.sockAddr6, 184 rc = ipv6_connect(server);
192 &server->ssocket, server->noautotune); 185 else
193 } else { 186 rc = ipv4_connect(server);
194 rc = ipv4_connect(&server->addr.sockAddr,
195 &server->ssocket,
196 server->workstation_RFC1001_name,
197 server->server_RFC1001_name,
198 server->noblocksnd, server->noautotune);
199 }
200 if (rc) { 187 if (rc) {
201 cFYI(1, ("reconnect error %d", rc)); 188 cFYI(1, ("reconnect error %d", rc));
202 msleep(3000); 189 msleep(3000);
@@ -776,7 +763,7 @@ multi_t2_fnd:
776 set_current_state(TASK_RUNNING); 763 set_current_state(TASK_RUNNING);
777 } 764 }
778 765
779 return 0; 766 module_put_and_exit(0);
780} 767}
781 768
782/* extract the host portion of the UNC string */ 769/* extract the host portion of the UNC string */
@@ -836,8 +823,8 @@ cifs_parse_mount_options(char *options, const char *devname,
836 /* null target name indicates to use *SMBSERVR default called name 823 /* null target name indicates to use *SMBSERVR default called name
837 if we end up sending RFC1001 session initialize */ 824 if we end up sending RFC1001 session initialize */
838 vol->target_rfc1001_name[0] = 0; 825 vol->target_rfc1001_name[0] = 0;
839 vol->linux_uid = current->uid; /* current->euid instead? */ 826 vol->linux_uid = current_uid(); /* use current_euid() instead? */
840 vol->linux_gid = current->gid; 827 vol->linux_gid = current_gid();
841 vol->dir_mode = S_IRWXUGO; 828 vol->dir_mode = S_IRWXUGO;
842 /* 2767 perms indicate mandatory locking support */ 829 /* 2767 perms indicate mandatory locking support */
843 vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP); 830 vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP);
@@ -1260,6 +1247,17 @@ cifs_parse_mount_options(char *options, const char *devname,
1260 if (vol->file_mode == 1247 if (vol->file_mode ==
1261 (S_IALLUGO & ~(S_ISUID | S_IXGRP))) 1248 (S_IALLUGO & ~(S_ISUID | S_IXGRP)))
1262 vol->file_mode = S_IALLUGO; 1249 vol->file_mode = S_IALLUGO;
1250 } else if (strnicmp(data, "forcemandatorylock", 9) == 0) {
1251 /* will take the shorter form "forcemand" as well */
1252 /* This mount option will force use of mandatory
1253 (DOS/Windows style) byte range locks, instead of
1254 using posix advisory byte range locks, even if the
1255 Unix extensions are available and posix locks would
1256 be supported otherwise. If Unix extensions are not
1257 negotiated this has no effect since mandatory locks
1258 would be used (mandatory locks is all that those
1259 those servers support) */
1260 vol->mand_lock = 1;
1263 } else if (strnicmp(data, "setuids", 7) == 0) { 1261 } else if (strnicmp(data, "setuids", 7) == 0) {
1264 vol->setuids = 1; 1262 vol->setuids = 1;
1265 } else if (strnicmp(data, "nosetuids", 9) == 0) { 1263 } else if (strnicmp(data, "nosetuids", 9) == 0) {
@@ -1417,6 +1415,143 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
1417 force_sig(SIGKILL, task); 1415 force_sig(SIGKILL, task);
1418} 1416}
1419 1417
1418static struct TCP_Server_Info *
1419cifs_get_tcp_session(struct smb_vol *volume_info)
1420{
1421 struct TCP_Server_Info *tcp_ses = NULL;
1422 struct sockaddr addr;
1423 struct sockaddr_in *sin_server = (struct sockaddr_in *) &addr;
1424 struct sockaddr_in6 *sin_server6 = (struct sockaddr_in6 *) &addr;
1425 int rc;
1426
1427 memset(&addr, 0, sizeof(struct sockaddr));
1428
1429 if (volume_info->UNCip && volume_info->UNC) {
1430 rc = cifs_inet_pton(AF_INET, volume_info->UNCip,
1431 &sin_server->sin_addr.s_addr);
1432
1433 if (rc <= 0) {
1434 /* not ipv4 address, try ipv6 */
1435 rc = cifs_inet_pton(AF_INET6, volume_info->UNCip,
1436 &sin_server6->sin6_addr.in6_u);
1437 if (rc > 0)
1438 addr.sa_family = AF_INET6;
1439 } else {
1440 addr.sa_family = AF_INET;
1441 }
1442
1443 if (rc <= 0) {
1444 /* we failed translating address */
1445 rc = -EINVAL;
1446 goto out_err;
1447 }
1448
1449 cFYI(1, ("UNC: %s ip: %s", volume_info->UNC,
1450 volume_info->UNCip));
1451 } else if (volume_info->UNCip) {
1452 /* BB using ip addr as tcp_ses name to connect to the
1453 DFS root below */
1454 cERROR(1, ("Connecting to DFS root not implemented yet"));
1455 rc = -EINVAL;
1456 goto out_err;
1457 } else /* which tcp_sess DFS root would we conect to */ {
1458 cERROR(1,
1459 ("CIFS mount error: No UNC path (e.g. -o "
1460 "unc=//192.168.1.100/public) specified"));
1461 rc = -EINVAL;
1462 goto out_err;
1463 }
1464
1465 /* see if we already have a matching tcp_ses */
1466 tcp_ses = cifs_find_tcp_session(&addr);
1467 if (tcp_ses)
1468 return tcp_ses;
1469
1470 tcp_ses = kzalloc(sizeof(struct TCP_Server_Info), GFP_KERNEL);
1471 if (!tcp_ses) {
1472 rc = -ENOMEM;
1473 goto out_err;
1474 }
1475
1476 tcp_ses->hostname = extract_hostname(volume_info->UNC);
1477 if (IS_ERR(tcp_ses->hostname)) {
1478 rc = PTR_ERR(tcp_ses->hostname);
1479 goto out_err;
1480 }
1481
1482 tcp_ses->noblocksnd = volume_info->noblocksnd;
1483 tcp_ses->noautotune = volume_info->noautotune;
1484 atomic_set(&tcp_ses->inFlight, 0);
1485 init_waitqueue_head(&tcp_ses->response_q);
1486 init_waitqueue_head(&tcp_ses->request_q);
1487 INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
1488 mutex_init(&tcp_ses->srv_mutex);
1489 memcpy(tcp_ses->workstation_RFC1001_name,
1490 volume_info->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
1491 memcpy(tcp_ses->server_RFC1001_name,
1492 volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
1493 tcp_ses->sequence_number = 0;
1494 INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
1495 INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
1496
1497 /*
1498 * at this point we are the only ones with the pointer
1499 * to the struct since the kernel thread not created yet
1500 * no need to spinlock this init of tcpStatus or srv_count
1501 */
1502 tcp_ses->tcpStatus = CifsNew;
1503 ++tcp_ses->srv_count;
1504
1505 if (addr.sa_family == AF_INET6) {
1506 cFYI(1, ("attempting ipv6 connect"));
1507 /* BB should we allow ipv6 on port 139? */
1508 /* other OS never observed in Wild doing 139 with v6 */
1509 memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
1510 sizeof(struct sockaddr_in6));
1511 sin_server6->sin6_port = htons(volume_info->port);
1512 rc = ipv6_connect(tcp_ses);
1513 } else {
1514 memcpy(&tcp_ses->addr.sockAddr, sin_server,
1515 sizeof(struct sockaddr_in));
1516 sin_server->sin_port = htons(volume_info->port);
1517 rc = ipv4_connect(tcp_ses);
1518 }
1519 if (rc < 0) {
1520 cERROR(1, ("Error connecting to socket. Aborting operation"));
1521 goto out_err;
1522 }
1523
1524 /*
1525 * since we're in a cifs function already, we know that
1526 * this will succeed. No need for try_module_get().
1527 */
1528 __module_get(THIS_MODULE);
1529 tcp_ses->tsk = kthread_run((void *)(void *)cifs_demultiplex_thread,
1530 tcp_ses, "cifsd");
1531 if (IS_ERR(tcp_ses->tsk)) {
1532 rc = PTR_ERR(tcp_ses->tsk);
1533 cERROR(1, ("error %d create cifsd thread", rc));
1534 module_put(THIS_MODULE);
1535 goto out_err;
1536 }
1537
1538 /* thread spawned, put it on the list */
1539 write_lock(&cifs_tcp_ses_lock);
1540 list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
1541 write_unlock(&cifs_tcp_ses_lock);
1542
1543 return tcp_ses;
1544
1545out_err:
1546 if (tcp_ses) {
1547 kfree(tcp_ses->hostname);
1548 if (tcp_ses->ssocket)
1549 sock_release(tcp_ses->ssocket);
1550 kfree(tcp_ses);
1551 }
1552 return ERR_PTR(rc);
1553}
1554
1420static struct cifsSesInfo * 1555static struct cifsSesInfo *
1421cifs_find_smb_ses(struct TCP_Server_Info *server, char *username) 1556cifs_find_smb_ses(struct TCP_Server_Info *server, char *username)
1422{ 1557{
@@ -1593,93 +1728,96 @@ static void rfc1002mangle(char *target, char *source, unsigned int length)
1593 1728
1594 1729
1595static int 1730static int
1596ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, 1731ipv4_connect(struct TCP_Server_Info *server)
1597 char *netbios_name, char *target_name,
1598 bool noblocksnd, bool noautotune)
1599{ 1732{
1600 int rc = 0; 1733 int rc = 0;
1601 int connected = 0; 1734 bool connected = false;
1602 __be16 orig_port = 0; 1735 __be16 orig_port = 0;
1736 struct socket *socket = server->ssocket;
1603 1737
1604 if (*csocket == NULL) { 1738 if (socket == NULL) {
1605 rc = sock_create_kern(PF_INET, SOCK_STREAM, 1739 rc = sock_create_kern(PF_INET, SOCK_STREAM,
1606 IPPROTO_TCP, csocket); 1740 IPPROTO_TCP, &socket);
1607 if (rc < 0) { 1741 if (rc < 0) {
1608 cERROR(1, ("Error %d creating socket", rc)); 1742 cERROR(1, ("Error %d creating socket", rc));
1609 *csocket = NULL;
1610 return rc; 1743 return rc;
1611 } else {
1612 /* BB other socket options to set KEEPALIVE, NODELAY? */
1613 cFYI(1, ("Socket created"));
1614 (*csocket)->sk->sk_allocation = GFP_NOFS;
1615 cifs_reclassify_socket4(*csocket);
1616 } 1744 }
1745
1746 /* BB other socket options to set KEEPALIVE, NODELAY? */
1747 cFYI(1, ("Socket created"));
1748 server->ssocket = socket;
1749 socket->sk->sk_allocation = GFP_NOFS;
1750 cifs_reclassify_socket4(socket);
1617 } 1751 }
1618 1752
1619 psin_server->sin_family = AF_INET; 1753 /* user overrode default port */
1620 if (psin_server->sin_port) { /* user overrode default port */ 1754 if (server->addr.sockAddr.sin_port) {
1621 rc = (*csocket)->ops->connect(*csocket, 1755 rc = socket->ops->connect(socket, (struct sockaddr *)
1622 (struct sockaddr *) psin_server, 1756 &server->addr.sockAddr,
1623 sizeof(struct sockaddr_in), 0); 1757 sizeof(struct sockaddr_in), 0);
1624 if (rc >= 0) 1758 if (rc >= 0)
1625 connected = 1; 1759 connected = true;
1626 } 1760 }
1627 1761
1628 if (!connected) { 1762 if (!connected) {
1629 /* save original port so we can retry user specified port 1763 /* save original port so we can retry user specified port
1630 later if fall back ports fail this time */ 1764 later if fall back ports fail this time */
1631 orig_port = psin_server->sin_port; 1765 orig_port = server->addr.sockAddr.sin_port;
1632 1766
1633 /* do not retry on the same port we just failed on */ 1767 /* do not retry on the same port we just failed on */
1634 if (psin_server->sin_port != htons(CIFS_PORT)) { 1768 if (server->addr.sockAddr.sin_port != htons(CIFS_PORT)) {
1635 psin_server->sin_port = htons(CIFS_PORT); 1769 server->addr.sockAddr.sin_port = htons(CIFS_PORT);
1636 1770 rc = socket->ops->connect(socket,
1637 rc = (*csocket)->ops->connect(*csocket, 1771 (struct sockaddr *)
1638 (struct sockaddr *) psin_server, 1772 &server->addr.sockAddr,
1639 sizeof(struct sockaddr_in), 0); 1773 sizeof(struct sockaddr_in), 0);
1640 if (rc >= 0) 1774 if (rc >= 0)
1641 connected = 1; 1775 connected = true;
1642 } 1776 }
1643 } 1777 }
1644 if (!connected) { 1778 if (!connected) {
1645 psin_server->sin_port = htons(RFC1001_PORT); 1779 server->addr.sockAddr.sin_port = htons(RFC1001_PORT);
1646 rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) 1780 rc = socket->ops->connect(socket, (struct sockaddr *)
1647 psin_server, 1781 &server->addr.sockAddr,
1648 sizeof(struct sockaddr_in), 0); 1782 sizeof(struct sockaddr_in), 0);
1649 if (rc >= 0) 1783 if (rc >= 0)
1650 connected = 1; 1784 connected = true;
1651 } 1785 }
1652 1786
1653 /* give up here - unless we want to retry on different 1787 /* give up here - unless we want to retry on different
1654 protocol families some day */ 1788 protocol families some day */
1655 if (!connected) { 1789 if (!connected) {
1656 if (orig_port) 1790 if (orig_port)
1657 psin_server->sin_port = orig_port; 1791 server->addr.sockAddr.sin_port = orig_port;
1658 cFYI(1, ("Error %d connecting to server via ipv4", rc)); 1792 cFYI(1, ("Error %d connecting to server via ipv4", rc));
1659 sock_release(*csocket); 1793 sock_release(socket);
1660 *csocket = NULL; 1794 server->ssocket = NULL;
1661 return rc; 1795 return rc;
1662 } 1796 }
1663 /* Eventually check for other socket options to change from 1797
1664 the default. sock_setsockopt not used because it expects 1798
1665 user space buffer */ 1799 /*
1666 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx", 1800 * Eventually check for other socket options to change from
1667 (*csocket)->sk->sk_sndbuf, 1801 * the default. sock_setsockopt not used because it expects
1668 (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); 1802 * user space buffer
1669 (*csocket)->sk->sk_rcvtimeo = 7 * HZ; 1803 */
1670 if (!noblocksnd) 1804 socket->sk->sk_rcvtimeo = 7 * HZ;
1671 (*csocket)->sk->sk_sndtimeo = 3 * HZ; 1805 socket->sk->sk_sndtimeo = 3 * HZ;
1672 1806
1673 /* make the bufsizes depend on wsize/rsize and max requests */ 1807 /* make the bufsizes depend on wsize/rsize and max requests */
1674 if (noautotune) { 1808 if (server->noautotune) {
1675 if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) 1809 if (socket->sk->sk_sndbuf < (200 * 1024))
1676 (*csocket)->sk->sk_sndbuf = 200 * 1024; 1810 socket->sk->sk_sndbuf = 200 * 1024;
1677 if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) 1811 if (socket->sk->sk_rcvbuf < (140 * 1024))
1678 (*csocket)->sk->sk_rcvbuf = 140 * 1024; 1812 socket->sk->sk_rcvbuf = 140 * 1024;
1679 } 1813 }
1680 1814
1815 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx",
1816 socket->sk->sk_sndbuf,
1817 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo));
1818
1681 /* send RFC1001 sessinit */ 1819 /* send RFC1001 sessinit */
1682 if (psin_server->sin_port == htons(RFC1001_PORT)) { 1820 if (server->addr.sockAddr.sin_port == htons(RFC1001_PORT)) {
1683 /* some servers require RFC1001 sessinit before sending 1821 /* some servers require RFC1001 sessinit before sending
1684 negprot - BB check reconnection in case where second 1822 negprot - BB check reconnection in case where second
1685 sessinit is sent but no second negprot */ 1823 sessinit is sent but no second negprot */
@@ -1689,31 +1827,42 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1689 GFP_KERNEL); 1827 GFP_KERNEL);
1690 if (ses_init_buf) { 1828 if (ses_init_buf) {
1691 ses_init_buf->trailer.session_req.called_len = 32; 1829 ses_init_buf->trailer.session_req.called_len = 32;
1692 if (target_name && (target_name[0] != 0)) { 1830 if (server->server_RFC1001_name &&
1693 rfc1002mangle(ses_init_buf->trailer.session_req.called_name, 1831 server->server_RFC1001_name[0] != 0)
1694 target_name, 16); 1832 rfc1002mangle(ses_init_buf->trailer.
1695 } else { 1833 session_req.called_name,
1696 rfc1002mangle(ses_init_buf->trailer.session_req.called_name, 1834 server->server_RFC1001_name,
1697 DEFAULT_CIFS_CALLED_NAME, 16); 1835 RFC1001_NAME_LEN_WITH_NULL);
1698 } 1836 else
1837 rfc1002mangle(ses_init_buf->trailer.
1838 session_req.called_name,
1839 DEFAULT_CIFS_CALLED_NAME,
1840 RFC1001_NAME_LEN_WITH_NULL);
1699 1841
1700 ses_init_buf->trailer.session_req.calling_len = 32; 1842 ses_init_buf->trailer.session_req.calling_len = 32;
1843
1701 /* calling name ends in null (byte 16) from old smb 1844 /* calling name ends in null (byte 16) from old smb
1702 convention. */ 1845 convention. */
1703 if (netbios_name && (netbios_name[0] != 0)) { 1846 if (server->workstation_RFC1001_name &&
1704 rfc1002mangle(ses_init_buf->trailer.session_req.calling_name, 1847 server->workstation_RFC1001_name[0] != 0)
1705 netbios_name, 16); 1848 rfc1002mangle(ses_init_buf->trailer.
1706 } else { 1849 session_req.calling_name,
1707 rfc1002mangle(ses_init_buf->trailer.session_req.calling_name, 1850 server->workstation_RFC1001_name,
1708 "LINUX_CIFS_CLNT", 16); 1851 RFC1001_NAME_LEN_WITH_NULL);
1709 } 1852 else
1853 rfc1002mangle(ses_init_buf->trailer.
1854 session_req.calling_name,
1855 "LINUX_CIFS_CLNT",
1856 RFC1001_NAME_LEN_WITH_NULL);
1857
1710 ses_init_buf->trailer.session_req.scope1 = 0; 1858 ses_init_buf->trailer.session_req.scope1 = 0;
1711 ses_init_buf->trailer.session_req.scope2 = 0; 1859 ses_init_buf->trailer.session_req.scope2 = 0;
1712 smb_buf = (struct smb_hdr *)ses_init_buf; 1860 smb_buf = (struct smb_hdr *)ses_init_buf;
1713 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 1861 /* sizeof RFC1002_SESSION_REQUEST with no scope */
1714 smb_buf->smb_buf_length = 0x81000044; 1862 smb_buf->smb_buf_length = 0x81000044;
1715 rc = smb_send(*csocket, smb_buf, 0x44, 1863 rc = smb_send(socket, smb_buf, 0x44,
1716 (struct sockaddr *)psin_server, noblocksnd); 1864 (struct sockaddr *) &server->addr.sockAddr,
1865 server->noblocksnd);
1717 kfree(ses_init_buf); 1866 kfree(ses_init_buf);
1718 msleep(1); /* RFC1001 layer in at least one server 1867 msleep(1); /* RFC1001 layer in at least one server
1719 requires very short break before negprot 1868 requires very short break before negprot
@@ -1733,79 +1882,81 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1733} 1882}
1734 1883
1735static int 1884static int
1736ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket, 1885ipv6_connect(struct TCP_Server_Info *server)
1737 bool noblocksnd)
1738{ 1886{
1739 int rc = 0; 1887 int rc = 0;
1740 int connected = 0; 1888 bool connected = false;
1741 __be16 orig_port = 0; 1889 __be16 orig_port = 0;
1890 struct socket *socket = server->ssocket;
1742 1891
1743 if (*csocket == NULL) { 1892 if (socket == NULL) {
1744 rc = sock_create_kern(PF_INET6, SOCK_STREAM, 1893 rc = sock_create_kern(PF_INET6, SOCK_STREAM,
1745 IPPROTO_TCP, csocket); 1894 IPPROTO_TCP, &socket);
1746 if (rc < 0) { 1895 if (rc < 0) {
1747 cERROR(1, ("Error %d creating ipv6 socket", rc)); 1896 cERROR(1, ("Error %d creating ipv6 socket", rc));
1748 *csocket = NULL; 1897 socket = NULL;
1749 return rc; 1898 return rc;
1750 } else {
1751 /* BB other socket options to set KEEPALIVE, NODELAY? */
1752 cFYI(1, ("ipv6 Socket created"));
1753 (*csocket)->sk->sk_allocation = GFP_NOFS;
1754 cifs_reclassify_socket6(*csocket);
1755 } 1899 }
1756 }
1757 1900
1758 psin_server->sin6_family = AF_INET6; 1901 /* BB other socket options to set KEEPALIVE, NODELAY? */
1902 cFYI(1, ("ipv6 Socket created"));
1903 server->ssocket = socket;
1904 socket->sk->sk_allocation = GFP_NOFS;
1905 cifs_reclassify_socket6(socket);
1906 }
1759 1907
1760 if (psin_server->sin6_port) { /* user overrode default port */ 1908 /* user overrode default port */
1761 rc = (*csocket)->ops->connect(*csocket, 1909 if (server->addr.sockAddr6.sin6_port) {
1762 (struct sockaddr *) psin_server, 1910 rc = socket->ops->connect(socket,
1911 (struct sockaddr *) &server->addr.sockAddr6,
1763 sizeof(struct sockaddr_in6), 0); 1912 sizeof(struct sockaddr_in6), 0);
1764 if (rc >= 0) 1913 if (rc >= 0)
1765 connected = 1; 1914 connected = true;
1766 } 1915 }
1767 1916
1768 if (!connected) { 1917 if (!connected) {
1769 /* save original port so we can retry user specified port 1918 /* save original port so we can retry user specified port
1770 later if fall back ports fail this time */ 1919 later if fall back ports fail this time */
1771 1920
1772 orig_port = psin_server->sin6_port; 1921 orig_port = server->addr.sockAddr6.sin6_port;
1773 /* do not retry on the same port we just failed on */ 1922 /* do not retry on the same port we just failed on */
1774 if (psin_server->sin6_port != htons(CIFS_PORT)) { 1923 if (server->addr.sockAddr6.sin6_port != htons(CIFS_PORT)) {
1775 psin_server->sin6_port = htons(CIFS_PORT); 1924 server->addr.sockAddr6.sin6_port = htons(CIFS_PORT);
1776 1925 rc = socket->ops->connect(socket, (struct sockaddr *)
1777 rc = (*csocket)->ops->connect(*csocket, 1926 &server->addr.sockAddr6,
1778 (struct sockaddr *) psin_server,
1779 sizeof(struct sockaddr_in6), 0); 1927 sizeof(struct sockaddr_in6), 0);
1780 if (rc >= 0) 1928 if (rc >= 0)
1781 connected = 1; 1929 connected = true;
1782 } 1930 }
1783 } 1931 }
1784 if (!connected) { 1932 if (!connected) {
1785 psin_server->sin6_port = htons(RFC1001_PORT); 1933 server->addr.sockAddr6.sin6_port = htons(RFC1001_PORT);
1786 rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) 1934 rc = socket->ops->connect(socket, (struct sockaddr *)
1787 psin_server, sizeof(struct sockaddr_in6), 0); 1935 &server->addr.sockAddr6,
1936 sizeof(struct sockaddr_in6), 0);
1788 if (rc >= 0) 1937 if (rc >= 0)
1789 connected = 1; 1938 connected = true;
1790 } 1939 }
1791 1940
1792 /* give up here - unless we want to retry on different 1941 /* give up here - unless we want to retry on different
1793 protocol families some day */ 1942 protocol families some day */
1794 if (!connected) { 1943 if (!connected) {
1795 if (orig_port) 1944 if (orig_port)
1796 psin_server->sin6_port = orig_port; 1945 server->addr.sockAddr6.sin6_port = orig_port;
1797 cFYI(1, ("Error %d connecting to server via ipv6", rc)); 1946 cFYI(1, ("Error %d connecting to server via ipv6", rc));
1798 sock_release(*csocket); 1947 sock_release(socket);
1799 *csocket = NULL; 1948 server->ssocket = NULL;
1800 return rc; 1949 return rc;
1801 } 1950 }
1802 /* Eventually check for other socket options to change from
1803 the default. sock_setsockopt not used because it expects
1804 user space buffer */
1805 (*csocket)->sk->sk_rcvtimeo = 7 * HZ;
1806 if (!noblocksnd)
1807 (*csocket)->sk->sk_sndtimeo = 3 * HZ;
1808 1951
1952 /*
1953 * Eventually check for other socket options to change from
1954 * the default. sock_setsockopt not used because it expects
1955 * user space buffer
1956 */
1957 socket->sk->sk_rcvtimeo = 7 * HZ;
1958 socket->sk->sk_sndtimeo = 3 * HZ;
1959 server->ssocket = socket;
1809 1960
1810 return rc; 1961 return rc;
1811} 1962}
@@ -2011,6 +2162,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2011 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL; 2162 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
2012 if (pvolume_info->nobrl) 2163 if (pvolume_info->nobrl)
2013 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL; 2164 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
2165 if (pvolume_info->mand_lock)
2166 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL;
2014 if (pvolume_info->cifs_acl) 2167 if (pvolume_info->cifs_acl)
2015 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; 2168 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
2016 if (pvolume_info->override_uid) 2169 if (pvolume_info->override_uid)
@@ -2035,32 +2188,30 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2035{ 2188{
2036 int rc = 0; 2189 int rc = 0;
2037 int xid; 2190 int xid;
2038 struct socket *csocket = NULL; 2191 struct smb_vol *volume_info;
2039 struct sockaddr addr;
2040 struct sockaddr_in *sin_server = (struct sockaddr_in *) &addr;
2041 struct sockaddr_in6 *sin_server6 = (struct sockaddr_in6 *) &addr;
2042 struct smb_vol volume_info;
2043 struct cifsSesInfo *pSesInfo = NULL; 2192 struct cifsSesInfo *pSesInfo = NULL;
2044 struct cifsTconInfo *tcon = NULL; 2193 struct cifsTconInfo *tcon = NULL;
2045 struct TCP_Server_Info *srvTcp = NULL; 2194 struct TCP_Server_Info *srvTcp = NULL;
2046 2195
2047 xid = GetXid(); 2196 xid = GetXid();
2048 2197
2049/* cFYI(1, ("Entering cifs_mount. Xid: %d with: %s", xid, mount_data)); */ 2198 volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL);
2199 if (!volume_info) {
2200 rc = -ENOMEM;
2201 goto out;
2202 }
2050 2203
2051 memset(&addr, 0, sizeof(struct sockaddr)); 2204 if (cifs_parse_mount_options(mount_data, devname, volume_info)) {
2052 memset(&volume_info, 0, sizeof(struct smb_vol));
2053 if (cifs_parse_mount_options(mount_data, devname, &volume_info)) {
2054 rc = -EINVAL; 2205 rc = -EINVAL;
2055 goto out; 2206 goto out;
2056 } 2207 }
2057 2208
2058 if (volume_info.nullauth) { 2209 if (volume_info->nullauth) {
2059 cFYI(1, ("null user")); 2210 cFYI(1, ("null user"));
2060 volume_info.username = ""; 2211 volume_info->username = "";
2061 } else if (volume_info.username) { 2212 } else if (volume_info->username) {
2062 /* BB fixme parse for domain name here */ 2213 /* BB fixme parse for domain name here */
2063 cFYI(1, ("Username: %s", volume_info.username)); 2214 cFYI(1, ("Username: %s", volume_info->username));
2064 } else { 2215 } else {
2065 cifserror("No username specified"); 2216 cifserror("No username specified");
2066 /* In userspace mount helper we can get user name from alternate 2217 /* In userspace mount helper we can get user name from alternate
@@ -2069,139 +2220,29 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2069 goto out; 2220 goto out;
2070 } 2221 }
2071 2222
2072 if (volume_info.UNCip && volume_info.UNC) {
2073 rc = cifs_inet_pton(AF_INET, volume_info.UNCip,
2074 &sin_server->sin_addr.s_addr);
2075
2076 if (rc <= 0) {
2077 /* not ipv4 address, try ipv6 */
2078 rc = cifs_inet_pton(AF_INET6, volume_info.UNCip,
2079 &sin_server6->sin6_addr.in6_u);
2080 if (rc > 0)
2081 addr.sa_family = AF_INET6;
2082 } else {
2083 addr.sa_family = AF_INET;
2084 }
2085
2086 if (rc <= 0) {
2087 /* we failed translating address */
2088 rc = -EINVAL;
2089 goto out;
2090 }
2091
2092 cFYI(1, ("UNC: %s ip: %s", volume_info.UNC, volume_info.UNCip));
2093 /* success */
2094 rc = 0;
2095 } else if (volume_info.UNCip) {
2096 /* BB using ip addr as server name to connect to the
2097 DFS root below */
2098 cERROR(1, ("Connecting to DFS root not implemented yet"));
2099 rc = -EINVAL;
2100 goto out;
2101 } else /* which servers DFS root would we conect to */ {
2102 cERROR(1,
2103 ("CIFS mount error: No UNC path (e.g. -o "
2104 "unc=//192.168.1.100/public) specified"));
2105 rc = -EINVAL;
2106 goto out;
2107 }
2108 2223
2109 /* this is needed for ASCII cp to Unicode converts */ 2224 /* this is needed for ASCII cp to Unicode converts */
2110 if (volume_info.iocharset == NULL) { 2225 if (volume_info->iocharset == NULL) {
2111 cifs_sb->local_nls = load_nls_default(); 2226 cifs_sb->local_nls = load_nls_default();
2112 /* load_nls_default can not return null */ 2227 /* load_nls_default can not return null */
2113 } else { 2228 } else {
2114 cifs_sb->local_nls = load_nls(volume_info.iocharset); 2229 cifs_sb->local_nls = load_nls(volume_info->iocharset);
2115 if (cifs_sb->local_nls == NULL) { 2230 if (cifs_sb->local_nls == NULL) {
2116 cERROR(1, ("CIFS mount error: iocharset %s not found", 2231 cERROR(1, ("CIFS mount error: iocharset %s not found",
2117 volume_info.iocharset)); 2232 volume_info->iocharset));
2118 rc = -ELIBACC; 2233 rc = -ELIBACC;
2119 goto out; 2234 goto out;
2120 } 2235 }
2121 } 2236 }
2122 2237
2123 srvTcp = cifs_find_tcp_session(&addr); 2238 /* get a reference to a tcp session */
2124 if (!srvTcp) { /* create socket */ 2239 srvTcp = cifs_get_tcp_session(volume_info);
2125 if (addr.sa_family == AF_INET6) { 2240 if (IS_ERR(srvTcp)) {
2126 cFYI(1, ("attempting ipv6 connect")); 2241 rc = PTR_ERR(srvTcp);
2127 /* BB should we allow ipv6 on port 139? */ 2242 goto out;
2128 /* other OS never observed in Wild doing 139 with v6 */
2129 sin_server6->sin6_port = htons(volume_info.port);
2130 rc = ipv6_connect(sin_server6, &csocket,
2131 volume_info.noblocksnd);
2132 } else {
2133 sin_server->sin_port = htons(volume_info.port);
2134 rc = ipv4_connect(sin_server, &csocket,
2135 volume_info.source_rfc1001_name,
2136 volume_info.target_rfc1001_name,
2137 volume_info.noblocksnd,
2138 volume_info.noautotune);
2139 }
2140 if (rc < 0) {
2141 cERROR(1, ("Error connecting to socket. "
2142 "Aborting operation"));
2143 if (csocket != NULL)
2144 sock_release(csocket);
2145 goto out;
2146 }
2147
2148 srvTcp = kzalloc(sizeof(struct TCP_Server_Info), GFP_KERNEL);
2149 if (!srvTcp) {
2150 rc = -ENOMEM;
2151 sock_release(csocket);
2152 goto out;
2153 } else {
2154 srvTcp->noblocksnd = volume_info.noblocksnd;
2155 srvTcp->noautotune = volume_info.noautotune;
2156 if (addr.sa_family == AF_INET6)
2157 memcpy(&srvTcp->addr.sockAddr6, sin_server6,
2158 sizeof(struct sockaddr_in6));
2159 else
2160 memcpy(&srvTcp->addr.sockAddr, sin_server,
2161 sizeof(struct sockaddr_in));
2162 atomic_set(&srvTcp->inFlight, 0);
2163 /* BB Add code for ipv6 case too */
2164 srvTcp->ssocket = csocket;
2165 srvTcp->hostname = extract_hostname(volume_info.UNC);
2166 if (IS_ERR(srvTcp->hostname)) {
2167 rc = PTR_ERR(srvTcp->hostname);
2168 sock_release(csocket);
2169 goto out;
2170 }
2171 init_waitqueue_head(&srvTcp->response_q);
2172 init_waitqueue_head(&srvTcp->request_q);
2173 INIT_LIST_HEAD(&srvTcp->pending_mid_q);
2174 /* at this point we are the only ones with the pointer
2175 to the struct since the kernel thread not created yet
2176 so no need to spinlock this init of tcpStatus */
2177 srvTcp->tcpStatus = CifsNew;
2178 init_MUTEX(&srvTcp->tcpSem);
2179 srvTcp->tsk = kthread_run((void *)(void *)cifs_demultiplex_thread, srvTcp, "cifsd");
2180 if (IS_ERR(srvTcp->tsk)) {
2181 rc = PTR_ERR(srvTcp->tsk);
2182 cERROR(1, ("error %d create cifsd thread", rc));
2183 srvTcp->tsk = NULL;
2184 sock_release(csocket);
2185 kfree(srvTcp->hostname);
2186 goto out;
2187 }
2188 rc = 0;
2189 memcpy(srvTcp->workstation_RFC1001_name,
2190 volume_info.source_rfc1001_name, 16);
2191 memcpy(srvTcp->server_RFC1001_name,
2192 volume_info.target_rfc1001_name, 16);
2193 srvTcp->sequence_number = 0;
2194 INIT_LIST_HEAD(&srvTcp->tcp_ses_list);
2195 INIT_LIST_HEAD(&srvTcp->smb_ses_list);
2196 ++srvTcp->srv_count;
2197 write_lock(&cifs_tcp_ses_lock);
2198 list_add(&srvTcp->tcp_ses_list,
2199 &cifs_tcp_ses_list);
2200 write_unlock(&cifs_tcp_ses_lock);
2201 }
2202 } 2243 }
2203 2244
2204 pSesInfo = cifs_find_smb_ses(srvTcp, volume_info.username); 2245 pSesInfo = cifs_find_smb_ses(srvTcp, volume_info->username);
2205 if (pSesInfo) { 2246 if (pSesInfo) {
2206 cFYI(1, ("Existing smb sess found (status=%d)", 2247 cFYI(1, ("Existing smb sess found (status=%d)",
2207 pSesInfo->status)); 2248 pSesInfo->status));
@@ -2228,31 +2269,38 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2228 2269
2229 /* new SMB session uses our srvTcp ref */ 2270 /* new SMB session uses our srvTcp ref */
2230 pSesInfo->server = srvTcp; 2271 pSesInfo->server = srvTcp;
2231 sprintf(pSesInfo->serverName, "%u.%u.%u.%u", 2272 if (srvTcp->addr.sockAddr6.sin6_family == AF_INET6)
2232 NIPQUAD(sin_server->sin_addr.s_addr)); 2273 sprintf(pSesInfo->serverName, "%pI6",
2274 &srvTcp->addr.sockAddr6.sin6_addr);
2275 else
2276 sprintf(pSesInfo->serverName, "%pI4",
2277 &srvTcp->addr.sockAddr.sin_addr.s_addr);
2233 2278
2234 write_lock(&cifs_tcp_ses_lock); 2279 write_lock(&cifs_tcp_ses_lock);
2235 list_add(&pSesInfo->smb_ses_list, &srvTcp->smb_ses_list); 2280 list_add(&pSesInfo->smb_ses_list, &srvTcp->smb_ses_list);
2236 write_unlock(&cifs_tcp_ses_lock); 2281 write_unlock(&cifs_tcp_ses_lock);
2237 2282
2238 /* volume_info.password freed at unmount */ 2283 /* volume_info->password freed at unmount */
2239 if (volume_info.password) { 2284 if (volume_info->password) {
2240 pSesInfo->password = volume_info.password; 2285 pSesInfo->password = kstrdup(volume_info->password,
2241 /* set to NULL to prevent freeing on exit */ 2286 GFP_KERNEL);
2242 volume_info.password = NULL; 2287 if (!pSesInfo->password) {
2288 rc = -ENOMEM;
2289 goto mount_fail_check;
2290 }
2243 } 2291 }
2244 if (volume_info.username) 2292 if (volume_info->username)
2245 strncpy(pSesInfo->userName, volume_info.username, 2293 strncpy(pSesInfo->userName, volume_info->username,
2246 MAX_USERNAME_SIZE); 2294 MAX_USERNAME_SIZE);
2247 if (volume_info.domainname) { 2295 if (volume_info->domainname) {
2248 int len = strlen(volume_info.domainname); 2296 int len = strlen(volume_info->domainname);
2249 pSesInfo->domainName = kmalloc(len + 1, GFP_KERNEL); 2297 pSesInfo->domainName = kmalloc(len + 1, GFP_KERNEL);
2250 if (pSesInfo->domainName) 2298 if (pSesInfo->domainName)
2251 strcpy(pSesInfo->domainName, 2299 strcpy(pSesInfo->domainName,
2252 volume_info.domainname); 2300 volume_info->domainname);
2253 } 2301 }
2254 pSesInfo->linux_uid = volume_info.linux_uid; 2302 pSesInfo->linux_uid = volume_info->linux_uid;
2255 pSesInfo->overrideSecFlg = volume_info.secFlg; 2303 pSesInfo->overrideSecFlg = volume_info->secFlg;
2256 down(&pSesInfo->sesSem); 2304 down(&pSesInfo->sesSem);
2257 2305
2258 /* BB FIXME need to pass vol->secFlgs BB */ 2306 /* BB FIXME need to pass vol->secFlgs BB */
@@ -2263,14 +2311,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2263 2311
2264 /* search for existing tcon to this server share */ 2312 /* search for existing tcon to this server share */
2265 if (!rc) { 2313 if (!rc) {
2266 setup_cifs_sb(&volume_info, cifs_sb); 2314 setup_cifs_sb(volume_info, cifs_sb);
2267 2315
2268 tcon = cifs_find_tcon(pSesInfo, volume_info.UNC); 2316 tcon = cifs_find_tcon(pSesInfo, volume_info->UNC);
2269 if (tcon) { 2317 if (tcon) {
2270 cFYI(1, ("Found match on UNC path")); 2318 cFYI(1, ("Found match on UNC path"));
2271 /* existing tcon already has a reference */ 2319 /* existing tcon already has a reference */
2272 cifs_put_smb_ses(pSesInfo); 2320 cifs_put_smb_ses(pSesInfo);
2273 if (tcon->seal != volume_info.seal) 2321 if (tcon->seal != volume_info->seal)
2274 cERROR(1, ("transport encryption setting " 2322 cERROR(1, ("transport encryption setting "
2275 "conflicts with existing tid")); 2323 "conflicts with existing tid"));
2276 } else { 2324 } else {
@@ -2279,11 +2327,20 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2279 rc = -ENOMEM; 2327 rc = -ENOMEM;
2280 goto mount_fail_check; 2328 goto mount_fail_check;
2281 } 2329 }
2330
2282 tcon->ses = pSesInfo; 2331 tcon->ses = pSesInfo;
2332 if (volume_info->password) {
2333 tcon->password = kstrdup(volume_info->password,
2334 GFP_KERNEL);
2335 if (!tcon->password) {
2336 rc = -ENOMEM;
2337 goto mount_fail_check;
2338 }
2339 }
2283 2340
2284 /* check for null share name ie connect to dfs root */ 2341 /* check for null share name ie connect to dfs root */
2285 if ((strchr(volume_info.UNC + 3, '\\') == NULL) 2342 if ((strchr(volume_info->UNC + 3, '\\') == NULL)
2286 && (strchr(volume_info.UNC + 3, '/') == NULL)) { 2343 && (strchr(volume_info->UNC + 3, '/') == NULL)) {
2287 /* rc = connect_to_dfs_path(...) */ 2344 /* rc = connect_to_dfs_path(...) */
2288 cFYI(1, ("DFS root not supported")); 2345 cFYI(1, ("DFS root not supported"));
2289 rc = -ENODEV; 2346 rc = -ENODEV;
@@ -2292,10 +2349,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2292 /* BB Do we need to wrap sesSem around 2349 /* BB Do we need to wrap sesSem around
2293 * this TCon call and Unix SetFS as 2350 * this TCon call and Unix SetFS as
2294 * we do on SessSetup and reconnect? */ 2351 * we do on SessSetup and reconnect? */
2295 rc = CIFSTCon(xid, pSesInfo, volume_info.UNC, 2352 rc = CIFSTCon(xid, pSesInfo, volume_info->UNC,
2296 tcon, cifs_sb->local_nls); 2353 tcon, cifs_sb->local_nls);
2297 cFYI(1, ("CIFS Tcon rc = %d", rc)); 2354 cFYI(1, ("CIFS Tcon rc = %d", rc));
2298 if (volume_info.nodfs) { 2355 if (volume_info->nodfs) {
2299 tcon->Flags &= ~SMB_SHARE_IS_IN_DFS; 2356 tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
2300 cFYI(1, ("DFS disabled (%d)", 2357 cFYI(1, ("DFS disabled (%d)",
2301 tcon->Flags)); 2358 tcon->Flags));
@@ -2303,7 +2360,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2303 } 2360 }
2304 if (rc) 2361 if (rc)
2305 goto mount_fail_check; 2362 goto mount_fail_check;
2306 tcon->seal = volume_info.seal; 2363 tcon->seal = volume_info->seal;
2307 write_lock(&cifs_tcp_ses_lock); 2364 write_lock(&cifs_tcp_ses_lock);
2308 list_add(&tcon->tcon_list, &pSesInfo->tcon_list); 2365 list_add(&tcon->tcon_list, &pSesInfo->tcon_list);
2309 write_unlock(&cifs_tcp_ses_lock); 2366 write_unlock(&cifs_tcp_ses_lock);
@@ -2313,9 +2370,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2313 to a share so for resources mounted more than once 2370 to a share so for resources mounted more than once
2314 to the same server share the last value passed in 2371 to the same server share the last value passed in
2315 for the retry flag is used */ 2372 for the retry flag is used */
2316 tcon->retry = volume_info.retry; 2373 tcon->retry = volume_info->retry;
2317 tcon->nocase = volume_info.nocase; 2374 tcon->nocase = volume_info->nocase;
2318 tcon->local_lease = volume_info.local_lease; 2375 tcon->local_lease = volume_info->local_lease;
2319 } 2376 }
2320 if (pSesInfo) { 2377 if (pSesInfo) {
2321 if (pSesInfo->capabilities & CAP_LARGE_FILES) { 2378 if (pSesInfo->capabilities & CAP_LARGE_FILES) {
@@ -2352,7 +2409,7 @@ mount_fail_check:
2352 if (tcon->ses->capabilities & CAP_UNIX) 2409 if (tcon->ses->capabilities & CAP_UNIX)
2353 /* reset of caps checks mount to see if unix extensions 2410 /* reset of caps checks mount to see if unix extensions
2354 disabled for just this mount */ 2411 disabled for just this mount */
2355 reset_cifs_unix_caps(xid, tcon, sb, &volume_info); 2412 reset_cifs_unix_caps(xid, tcon, sb, volume_info);
2356 else 2413 else
2357 tcon->unix_ext = 0; /* server does not support them */ 2414 tcon->unix_ext = 0; /* server does not support them */
2358 2415
@@ -2371,18 +2428,22 @@ mount_fail_check:
2371 cifs_sb->rsize = min(cifs_sb->rsize, 2428 cifs_sb->rsize = min(cifs_sb->rsize,
2372 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); 2429 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2373 2430
2374 /* volume_info.password is freed above when existing session found 2431 /* volume_info->password is freed above when existing session found
2375 (in which case it is not needed anymore) but when new sesion is created 2432 (in which case it is not needed anymore) but when new sesion is created
2376 the password ptr is put in the new session structure (in which case the 2433 the password ptr is put in the new session structure (in which case the
2377 password will be freed at unmount time) */ 2434 password will be freed at unmount time) */
2378out: 2435out:
2379 /* zero out password before freeing */ 2436 /* zero out password before freeing */
2380 if (volume_info.password != NULL) { 2437 if (volume_info) {
2381 memset(volume_info.password, 0, strlen(volume_info.password)); 2438 if (volume_info->password != NULL) {
2382 kfree(volume_info.password); 2439 memset(volume_info->password, 0,
2440 strlen(volume_info->password));
2441 kfree(volume_info->password);
2442 }
2443 kfree(volume_info->UNC);
2444 kfree(volume_info->prepath);
2445 kfree(volume_info);
2383 } 2446 }
2384 kfree(volume_info.UNC);
2385 kfree(volume_info.prepath);
2386 FreeXid(xid); 2447 FreeXid(xid);
2387 return rc; 2448 return rc;
2388} 2449}
@@ -2533,7 +2594,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2533 __u16 action = le16_to_cpu(pSMBr->resp.Action); 2594 __u16 action = le16_to_cpu(pSMBr->resp.Action);
2534 __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength); 2595 __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
2535 if (action & GUEST_LOGIN) 2596 if (action & GUEST_LOGIN)
2536 cFYI(1, (" Guest login")); /* BB mark SesInfo struct? */ 2597 cFYI(1, ("Guest login")); /* BB mark SesInfo struct? */
2537 ses->Suid = smb_buffer_response->Uid; /* UID left in wire format 2598 ses->Suid = smb_buffer_response->Uid; /* UID left in wire format
2538 (little endian) */ 2599 (little endian) */
2539 cFYI(1, ("UID = %d ", ses->Suid)); 2600 cFYI(1, ("UID = %d ", ses->Suid));
@@ -2679,13 +2740,11 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2679 len)); 2740 len));
2680 } 2741 }
2681 } else { 2742 } else {
2682 cERROR(1, 2743 cERROR(1, ("Security Blob Length extends beyond "
2683 (" Security Blob Length extends beyond "
2684 "end of SMB")); 2744 "end of SMB"));
2685 } 2745 }
2686 } else { 2746 } else {
2687 cERROR(1, 2747 cERROR(1, ("Invalid Word count %d: ",
2688 (" Invalid Word count %d: ",
2689 smb_buffer_response->WordCount)); 2748 smb_buffer_response->WordCount));
2690 rc = -EIO; 2749 rc = -EIO;
2691 } 2750 }
@@ -2843,7 +2902,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2843 __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength); 2902 __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
2844 2903
2845 if (action & GUEST_LOGIN) 2904 if (action & GUEST_LOGIN)
2846 cFYI(1, (" Guest login")); 2905 cFYI(1, ("Guest login"));
2847 /* Do we want to set anything in SesInfo struct when guest login? */ 2906 /* Do we want to set anything in SesInfo struct when guest login? */
2848 2907
2849 bcc_ptr = pByteArea(smb_buffer_response); 2908 bcc_ptr = pByteArea(smb_buffer_response);
@@ -2851,8 +2910,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2851 2910
2852 SecurityBlob2 = (PCHALLENGE_MESSAGE) bcc_ptr; 2911 SecurityBlob2 = (PCHALLENGE_MESSAGE) bcc_ptr;
2853 if (SecurityBlob2->MessageType != NtLmChallenge) { 2912 if (SecurityBlob2->MessageType != NtLmChallenge) {
2854 cFYI(1, 2913 cFYI(1, ("Unexpected NTLMSSP message type received %d",
2855 ("Unexpected NTLMSSP message type received %d",
2856 SecurityBlob2->MessageType)); 2914 SecurityBlob2->MessageType));
2857 } else if (ses) { 2915 } else if (ses) {
2858 ses->Suid = smb_buffer_response->Uid; /* UID left in le format */ 2916 ses->Suid = smb_buffer_response->Uid; /* UID left in le format */
@@ -3024,8 +3082,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
3024 cERROR(1, ("No session structure passed in.")); 3082 cERROR(1, ("No session structure passed in."));
3025 } 3083 }
3026 } else { 3084 } else {
3027 cERROR(1, 3085 cERROR(1, ("Invalid Word count %d:",
3028 (" Invalid Word count %d:",
3029 smb_buffer_response->WordCount)); 3086 smb_buffer_response->WordCount));
3030 rc = -EIO; 3087 rc = -EIO;
3031 } 3088 }
@@ -3264,7 +3321,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3264 __u16 action = le16_to_cpu(pSMBr->resp.Action); 3321 __u16 action = le16_to_cpu(pSMBr->resp.Action);
3265 __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength); 3322 __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
3266 if (action & GUEST_LOGIN) 3323 if (action & GUEST_LOGIN)
3267 cFYI(1, (" Guest login")); /* BB Should we set anything 3324 cFYI(1, ("Guest login")); /* BB Should we set anything
3268 in SesInfo struct ? */ 3325 in SesInfo struct ? */
3269/* if (SecurityBlob2->MessageType != NtLm??) { 3326/* if (SecurityBlob2->MessageType != NtLm??) {
3270 cFYI("Unexpected message type on auth response is %d")); 3327 cFYI("Unexpected message type on auth response is %d"));
@@ -3487,12 +3544,14 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3487 NTLMv2 password here) */ 3544 NTLMv2 password here) */
3488#ifdef CONFIG_CIFS_WEAK_PW_HASH 3545#ifdef CONFIG_CIFS_WEAK_PW_HASH
3489 if ((extended_security & CIFSSEC_MAY_LANMAN) && 3546 if ((extended_security & CIFSSEC_MAY_LANMAN) &&
3490 (ses->server->secType == LANMAN)) 3547 (ses->server->secType == LANMAN))
3491 calc_lanman_hash(ses, bcc_ptr); 3548 calc_lanman_hash(tcon->password, ses->server->cryptKey,
3549 ses->server->secMode &
3550 SECMODE_PW_ENCRYPT ? true : false,
3551 bcc_ptr);
3492 else 3552 else
3493#endif /* CIFS_WEAK_PW_HASH */ 3553#endif /* CIFS_WEAK_PW_HASH */
3494 SMBNTencrypt(ses->password, 3554 SMBNTencrypt(tcon->password, ses->server->cryptKey,
3495 ses->server->cryptKey,
3496 bcc_ptr); 3555 bcc_ptr);
3497 3556
3498 bcc_ptr += CIFS_SESS_KEY_SIZE; 3557 bcc_ptr += CIFS_SESS_KEY_SIZE;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e962e75e6f7..838d9c720a5 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -235,11 +235,11 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
235 }; 235 };
236 236
237 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 237 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
238 args.uid = (__u64) current->fsuid; 238 args.uid = (__u64) current_fsuid();
239 if (inode->i_mode & S_ISGID) 239 if (inode->i_mode & S_ISGID)
240 args.gid = (__u64) inode->i_gid; 240 args.gid = (__u64) inode->i_gid;
241 else 241 else
242 args.gid = (__u64) current->fsgid; 242 args.gid = (__u64) current_fsgid();
243 } else { 243 } else {
244 args.uid = NO_CHANGE_64; 244 args.uid = NO_CHANGE_64;
245 args.gid = NO_CHANGE_64; 245 args.gid = NO_CHANGE_64;
@@ -271,13 +271,13 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
271 if ((oplock & CIFS_CREATE_ACTION) && 271 if ((oplock & CIFS_CREATE_ACTION) &&
272 (cifs_sb->mnt_cifs_flags & 272 (cifs_sb->mnt_cifs_flags &
273 CIFS_MOUNT_SET_UID)) { 273 CIFS_MOUNT_SET_UID)) {
274 newinode->i_uid = current->fsuid; 274 newinode->i_uid = current_fsuid();
275 if (inode->i_mode & S_ISGID) 275 if (inode->i_mode & S_ISGID)
276 newinode->i_gid = 276 newinode->i_gid =
277 inode->i_gid; 277 inode->i_gid;
278 else 278 else
279 newinode->i_gid = 279 newinode->i_gid =
280 current->fsgid; 280 current_fsgid();
281 } 281 }
282 } 282 }
283 } 283 }
@@ -375,8 +375,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
375 .device = device_number, 375 .device = device_number,
376 }; 376 };
377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
378 args.uid = (__u64) current->fsuid; 378 args.uid = (__u64) current_fsuid();
379 args.gid = (__u64) current->fsgid; 379 args.gid = (__u64) current_fsgid();
380 } else { 380 } else {
381 args.uid = NO_CHANGE_64; 381 args.uid = NO_CHANGE_64;
382 args.gid = NO_CHANGE_64; 382 args.gid = NO_CHANGE_64;
@@ -483,7 +483,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
483 483
484 xid = GetXid(); 484 xid = GetXid();
485 485
486 cFYI(1, (" parent inode = 0x%p name is: %s and dentry = 0x%p", 486 cFYI(1, ("parent inode = 0x%p name is: %s and dentry = 0x%p",
487 parent_dir_inode, direntry->d_name.name, direntry)); 487 parent_dir_inode, direntry->d_name.name, direntry));
488 488
489 /* check whether path exists */ 489 /* check whether path exists */
@@ -515,12 +515,11 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
515 } 515 }
516 516
517 if (direntry->d_inode != NULL) { 517 if (direntry->d_inode != NULL) {
518 cFYI(1, (" non-NULL inode in lookup")); 518 cFYI(1, ("non-NULL inode in lookup"));
519 } else { 519 } else {
520 cFYI(1, (" NULL inode in lookup")); 520 cFYI(1, ("NULL inode in lookup"));
521 } 521 }
522 cFYI(1, 522 cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode));
523 (" Full path: %s inode = 0x%p", full_path, direntry->d_inode));
524 523
525 if (pTcon->unix_ext) 524 if (pTcon->unix_ext)
526 rc = cifs_get_inode_info_unix(&newInode, full_path, 525 rc = cifs_get_inode_info_unix(&newInode, full_path,
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
deleted file mode 100644
index 5a57581eb4b..00000000000
--- a/fs/cifs/fcntl.c
+++ /dev/null
@@ -1,118 +0,0 @@
1/*
2 * fs/cifs/fcntl.c
3 *
4 * vfs operations that deal with the file control API
5 *
6 * Copyright (C) International Business Machines Corp., 2003,2004
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 *
9 * This library is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as published
11 * by the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 * the GNU Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23#include <linux/fs.h>
24#include <linux/stat.h>
25#include <linux/fcntl.h>
26#include "cifsglob.h"
27#include "cifsproto.h"
28#include "cifs_unicode.h"
29#include "cifs_debug.h"
30#include "cifsfs.h"
31
32static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
33{
34 __u32 cifs_ntfy_flags = 0;
35
36 /* No way on Linux VFS to ask to monitor xattr
37 changes (and no stream support either */
38 if (fcntl_notify_flags & DN_ACCESS)
39 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
40 if (fcntl_notify_flags & DN_MODIFY) {
41 /* What does this mean on directories? */
42 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE |
43 FILE_NOTIFY_CHANGE_SIZE;
44 }
45 if (fcntl_notify_flags & DN_CREATE) {
46 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION |
47 FILE_NOTIFY_CHANGE_LAST_WRITE;
48 }
49 if (fcntl_notify_flags & DN_DELETE)
50 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE;
51 if (fcntl_notify_flags & DN_RENAME) {
52 /* BB review this - checking various server behaviors */
53 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME |
54 FILE_NOTIFY_CHANGE_FILE_NAME;
55 }
56 if (fcntl_notify_flags & DN_ATTRIB) {
57 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY |
58 FILE_NOTIFY_CHANGE_ATTRIBUTES;
59 }
60/* if (fcntl_notify_flags & DN_MULTISHOT) {
61 cifs_ntfy_flags |= ;
62 } */ /* BB fixme - not sure how to handle this with CIFS yet */
63
64 return cifs_ntfy_flags;
65}
66
67int cifs_dir_notify(struct file *file, unsigned long arg)
68{
69 int xid;
70 int rc = -EINVAL;
71 int oplock = 0;
72 struct cifs_sb_info *cifs_sb;
73 struct cifsTconInfo *pTcon;
74 char *full_path = NULL;
75 __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES;
76 __u16 netfid;
77
78 if (experimEnabled == 0)
79 return 0;
80
81 xid = GetXid();
82 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
83 pTcon = cifs_sb->tcon;
84
85 full_path = build_path_from_dentry(file->f_path.dentry);
86
87 if (full_path == NULL) {
88 rc = -ENOMEM;
89 } else {
90 cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg));
91 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
92 GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
93 &netfid, &oplock, NULL, cifs_sb->local_nls,
94 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
95 /* BB fixme - add this handle to a notify handle list */
96 if (rc) {
97 cFYI(1, ("Could not open directory for notify"));
98 } else {
99 filter = convert_to_cifs_notify_flags(arg);
100 if (filter != 0) {
101 rc = CIFSSMBNotify(xid, pTcon,
102 0 /* no subdirs */, netfid,
103 filter, file, arg & DN_MULTISHOT,
104 cifs_sb->local_nls);
105 } else {
106 rc = -EINVAL;
107 }
108 /* BB add code to close file eventually (at unmount
109 it would close automatically but may be a way
110 to do it easily when inode freed or when
111 notify info is cleared/changed */
112 cFYI(1, ("notify rc %d", rc));
113 }
114 }
115
116 FreeXid(xid);
117 return rc;
118}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index f0a81e631ae..12bb656fbe7 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -644,10 +644,10 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
644 __u64 length; 644 __u64 length;
645 bool wait_flag = false; 645 bool wait_flag = false;
646 struct cifs_sb_info *cifs_sb; 646 struct cifs_sb_info *cifs_sb;
647 struct cifsTconInfo *pTcon; 647 struct cifsTconInfo *tcon;
648 __u16 netfid; 648 __u16 netfid;
649 __u8 lockType = LOCKING_ANDX_LARGE_FILES; 649 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
650 bool posix_locking; 650 bool posix_locking = 0;
651 651
652 length = 1 + pfLock->fl_end - pfLock->fl_start; 652 length = 1 + pfLock->fl_end - pfLock->fl_start;
653 rc = -EACCES; 653 rc = -EACCES;
@@ -698,7 +698,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
698 cFYI(1, ("Unknown type of lock")); 698 cFYI(1, ("Unknown type of lock"));
699 699
700 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 700 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
701 pTcon = cifs_sb->tcon; 701 tcon = cifs_sb->tcon;
702 702
703 if (file->private_data == NULL) { 703 if (file->private_data == NULL) {
704 FreeXid(xid); 704 FreeXid(xid);
@@ -706,9 +706,10 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
706 } 706 }
707 netfid = ((struct cifsFileInfo *)file->private_data)->netfid; 707 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
708 708
709 posix_locking = (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && 709 if ((tcon->ses->capabilities & CAP_UNIX) &&
710 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability)); 710 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
711 711 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
712 posix_locking = 1;
712 /* BB add code here to normalize offset and length to 713 /* BB add code here to normalize offset and length to
713 account for negative length which we can not accept over the 714 account for negative length which we can not accept over the
714 wire */ 715 wire */
@@ -719,7 +720,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
719 posix_lock_type = CIFS_RDLCK; 720 posix_lock_type = CIFS_RDLCK;
720 else 721 else
721 posix_lock_type = CIFS_WRLCK; 722 posix_lock_type = CIFS_WRLCK;
722 rc = CIFSSMBPosixLock(xid, pTcon, netfid, 1 /* get */, 723 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
723 length, pfLock, 724 length, pfLock,
724 posix_lock_type, wait_flag); 725 posix_lock_type, wait_flag);
725 FreeXid(xid); 726 FreeXid(xid);
@@ -727,10 +728,10 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
727 } 728 }
728 729
729 /* BB we could chain these into one lock request BB */ 730 /* BB we could chain these into one lock request BB */
730 rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, 731 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
731 0, 1, lockType, 0 /* wait flag */ ); 732 0, 1, lockType, 0 /* wait flag */ );
732 if (rc == 0) { 733 if (rc == 0) {
733 rc = CIFSSMBLock(xid, pTcon, netfid, length, 734 rc = CIFSSMBLock(xid, tcon, netfid, length,
734 pfLock->fl_start, 1 /* numUnlock */ , 735 pfLock->fl_start, 1 /* numUnlock */ ,
735 0 /* numLock */ , lockType, 736 0 /* numLock */ , lockType,
736 0 /* wait flag */ ); 737 0 /* wait flag */ );
@@ -767,7 +768,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
767 if (numUnlock == 1) 768 if (numUnlock == 1)
768 posix_lock_type = CIFS_UNLCK; 769 posix_lock_type = CIFS_UNLCK;
769 770
770 rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */, 771 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
771 length, pfLock, 772 length, pfLock,
772 posix_lock_type, wait_flag); 773 posix_lock_type, wait_flag);
773 } else { 774 } else {
@@ -775,7 +776,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
775 (struct cifsFileInfo *)file->private_data; 776 (struct cifsFileInfo *)file->private_data;
776 777
777 if (numLock) { 778 if (numLock) {
778 rc = CIFSSMBLock(xid, pTcon, netfid, length, 779 rc = CIFSSMBLock(xid, tcon, netfid, length,
779 pfLock->fl_start, 780 pfLock->fl_start,
780 0, numLock, lockType, wait_flag); 781 0, numLock, lockType, wait_flag);
781 782
@@ -796,7 +797,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
796 if (pfLock->fl_start <= li->offset && 797 if (pfLock->fl_start <= li->offset &&
797 (pfLock->fl_start + length) >= 798 (pfLock->fl_start + length) >=
798 (li->offset + li->length)) { 799 (li->offset + li->length)) {
799 stored_rc = CIFSSMBLock(xid, pTcon, 800 stored_rc = CIFSSMBLock(xid, tcon,
800 netfid, 801 netfid,
801 li->length, li->offset, 802 li->length, li->offset,
802 1, 0, li->type, false); 803 1, 0, li->type, false);
@@ -2073,7 +2074,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
2073 2074
2074 cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); 2075 cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
2075 2076
2076 page = __grab_cache_page(mapping, index); 2077 page = grab_cache_page_write_begin(mapping, index, flags);
2077 if (!page) { 2078 if (!page) {
2078 rc = -ENOMEM; 2079 rc = -ENOMEM;
2079 goto out; 2080 goto out;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ff8c68de4a9..f247da9f4ed 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/inode.c 2 * fs/cifs/inode.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2007 4 * Copyright (C) International Business Machines Corp., 2002,2008
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -621,6 +621,47 @@ static const struct inode_operations cifs_ipc_inode_ops = {
621 .lookup = cifs_lookup, 621 .lookup = cifs_lookup,
622}; 622};
623 623
624static char *build_path_to_root(struct cifs_sb_info *cifs_sb)
625{
626 int pplen = cifs_sb->prepathlen;
627 int dfsplen;
628 char *full_path = NULL;
629
630 /* if no prefix path, simply set path to the root of share to "" */
631 if (pplen == 0) {
632 full_path = kmalloc(1, GFP_KERNEL);
633 if (full_path)
634 full_path[0] = 0;
635 return full_path;
636 }
637
638 if (cifs_sb->tcon && (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS))
639 dfsplen = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE + 1);
640 else
641 dfsplen = 0;
642
643 full_path = kmalloc(dfsplen + pplen + 1, GFP_KERNEL);
644 if (full_path == NULL)
645 return full_path;
646
647 if (dfsplen) {
648 strncpy(full_path, cifs_sb->tcon->treeName, dfsplen);
649 /* switch slash direction in prepath depending on whether
650 * windows or posix style path names
651 */
652 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
653 int i;
654 for (i = 0; i < dfsplen; i++) {
655 if (full_path[i] == '\\')
656 full_path[i] = '/';
657 }
658 }
659 }
660 strncpy(full_path + dfsplen, cifs_sb->prepath, pplen);
661 full_path[dfsplen + pplen] = 0; /* add trailing null */
662 return full_path;
663}
664
624/* gets root inode */ 665/* gets root inode */
625struct inode *cifs_iget(struct super_block *sb, unsigned long ino) 666struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
626{ 667{
@@ -628,6 +669,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
628 struct cifs_sb_info *cifs_sb; 669 struct cifs_sb_info *cifs_sb;
629 struct inode *inode; 670 struct inode *inode;
630 long rc; 671 long rc;
672 char *full_path;
631 673
632 inode = iget_locked(sb, ino); 674 inode = iget_locked(sb, ino);
633 if (!inode) 675 if (!inode)
@@ -636,13 +678,17 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
636 return inode; 678 return inode;
637 679
638 cifs_sb = CIFS_SB(inode->i_sb); 680 cifs_sb = CIFS_SB(inode->i_sb);
639 xid = GetXid(); 681 full_path = build_path_to_root(cifs_sb);
682 if (full_path == NULL)
683 return ERR_PTR(-ENOMEM);
640 684
685 xid = GetXid();
641 if (cifs_sb->tcon->unix_ext) 686 if (cifs_sb->tcon->unix_ext)
642 rc = cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid); 687 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
688 xid);
643 else 689 else
644 rc = cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid, 690 rc = cifs_get_inode_info(&inode, full_path, NULL, inode->i_sb,
645 NULL); 691 xid, NULL);
646 if (rc && cifs_sb->tcon->ipc) { 692 if (rc && cifs_sb->tcon->ipc) {
647 cFYI(1, ("ipc connection - fake read inode")); 693 cFYI(1, ("ipc connection - fake read inode"));
648 inode->i_mode |= S_IFDIR; 694 inode->i_mode |= S_IFDIR;
@@ -652,6 +698,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
652 inode->i_uid = cifs_sb->mnt_uid; 698 inode->i_uid = cifs_sb->mnt_uid;
653 inode->i_gid = cifs_sb->mnt_gid; 699 inode->i_gid = cifs_sb->mnt_gid;
654 } else if (rc) { 700 } else if (rc) {
701 kfree(full_path);
655 _FreeXid(xid); 702 _FreeXid(xid);
656 iget_failed(inode); 703 iget_failed(inode);
657 return ERR_PTR(rc); 704 return ERR_PTR(rc);
@@ -659,6 +706,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
659 706
660 unlock_new_inode(inode); 707 unlock_new_inode(inode);
661 708
709 kfree(full_path);
662 /* can not call macro FreeXid here since in a void func 710 /* can not call macro FreeXid here since in a void func
663 * TODO: This is no longer true 711 * TODO: This is no longer true
664 */ 712 */
@@ -1143,11 +1191,11 @@ mkdir_get_info:
1143 .device = 0, 1191 .device = 0,
1144 }; 1192 };
1145 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 1193 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
1146 args.uid = (__u64)current->fsuid; 1194 args.uid = (__u64)current_fsuid();
1147 if (inode->i_mode & S_ISGID) 1195 if (inode->i_mode & S_ISGID)
1148 args.gid = (__u64)inode->i_gid; 1196 args.gid = (__u64)inode->i_gid;
1149 else 1197 else
1150 args.gid = (__u64)current->fsgid; 1198 args.gid = (__u64)current_fsgid();
1151 } else { 1199 } else {
1152 args.uid = NO_CHANGE_64; 1200 args.uid = NO_CHANGE_64;
1153 args.gid = NO_CHANGE_64; 1201 args.gid = NO_CHANGE_64;
@@ -1184,13 +1232,13 @@ mkdir_get_info:
1184 if (cifs_sb->mnt_cifs_flags & 1232 if (cifs_sb->mnt_cifs_flags &
1185 CIFS_MOUNT_SET_UID) { 1233 CIFS_MOUNT_SET_UID) {
1186 direntry->d_inode->i_uid = 1234 direntry->d_inode->i_uid =
1187 current->fsuid; 1235 current_fsuid();
1188 if (inode->i_mode & S_ISGID) 1236 if (inode->i_mode & S_ISGID)
1189 direntry->d_inode->i_gid = 1237 direntry->d_inode->i_gid =
1190 inode->i_gid; 1238 inode->i_gid;
1191 else 1239 else
1192 direntry->d_inode->i_gid = 1240 direntry->d_inode->i_gid =
1193 current->fsgid; 1241 current_fsgid();
1194 } 1242 }
1195 } 1243 }
1196 } 1244 }
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 0088a5b5256..f94650683a0 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -65,7 +65,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
65 switch (command) { 65 switch (command) {
66 case CIFS_IOC_CHECKUMOUNT: 66 case CIFS_IOC_CHECKUMOUNT:
67 cFYI(1, ("User unmount attempted")); 67 cFYI(1, ("User unmount attempted"));
68 if (cifs_sb->mnt_uid == current->uid) 68 if (cifs_sb->mnt_uid == current_uid())
69 rc = 0; 69 rc = 0;
70 else { 70 else {
71 rc = -EACCES; 71 rc = -EACCES;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 9ee3f689c2b..4c89c572891 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -97,7 +97,10 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
97 kfree(buf_to_free->serverOS); 97 kfree(buf_to_free->serverOS);
98 kfree(buf_to_free->serverDomain); 98 kfree(buf_to_free->serverDomain);
99 kfree(buf_to_free->serverNOS); 99 kfree(buf_to_free->serverNOS);
100 kfree(buf_to_free->password); 100 if (buf_to_free->password) {
101 memset(buf_to_free->password, 0, strlen(buf_to_free->password));
102 kfree(buf_to_free->password);
103 }
101 kfree(buf_to_free->domainName); 104 kfree(buf_to_free->domainName);
102 kfree(buf_to_free); 105 kfree(buf_to_free);
103} 106}
@@ -129,6 +132,10 @@ tconInfoFree(struct cifsTconInfo *buf_to_free)
129 } 132 }
130 atomic_dec(&tconInfoAllocCount); 133 atomic_dec(&tconInfoAllocCount);
131 kfree(buf_to_free->nativeFileSystem); 134 kfree(buf_to_free->nativeFileSystem);
135 if (buf_to_free->password) {
136 memset(buf_to_free->password, 0, strlen(buf_to_free->password));
137 kfree(buf_to_free->password);
138 }
132 kfree(buf_to_free); 139 kfree(buf_to_free);
133} 140}
134 141
@@ -338,13 +345,13 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
338 /* BB Add support for establishing new tCon and SMB Session */ 345 /* BB Add support for establishing new tCon and SMB Session */
339 /* with userid/password pairs found on the smb session */ 346 /* with userid/password pairs found on the smb session */
340 /* for other target tcp/ip addresses BB */ 347 /* for other target tcp/ip addresses BB */
341 if (current->fsuid != treeCon->ses->linux_uid) { 348 if (current_fsuid() != treeCon->ses->linux_uid) {
342 cFYI(1, ("Multiuser mode and UID " 349 cFYI(1, ("Multiuser mode and UID "
343 "did not match tcon uid")); 350 "did not match tcon uid"));
344 read_lock(&cifs_tcp_ses_lock); 351 read_lock(&cifs_tcp_ses_lock);
345 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) { 352 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) {
346 ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list); 353 ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list);
347 if (ses->linux_uid == current->fsuid) { 354 if (ses->linux_uid == current_fsuid()) {
348 if (ses->server == treeCon->ses->server) { 355 if (ses->server == treeCon->ses->server) {
349 cFYI(1, ("found matching uid substitute right smb_uid")); 356 cFYI(1, ("found matching uid substitute right smb_uid"));
350 buffer->Uid = ses->Suid; 357 buffer->Uid = ses->Suid;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 2851d5da0c8..5f22de7b79a 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -417,7 +417,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
417 /* BB calculate hash with password */ 417 /* BB calculate hash with password */
418 /* and copy into bcc */ 418 /* and copy into bcc */
419 419
420 calc_lanman_hash(ses, lnm_session_key); 420 calc_lanman_hash(ses->password, ses->server->cryptKey,
421 ses->server->secMode & SECMODE_PW_ENCRYPT ?
422 true : false, lnm_session_key);
423
421 ses->flags |= CIFS_SES_LANMAN; 424 ses->flags |= CIFS_SES_LANMAN;
422 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE); 425 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE);
423 bcc_ptr += CIFS_SESS_KEY_SIZE; 426 bcc_ptr += CIFS_SESS_KEY_SIZE;
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index 04943c976f9..224a1f47896 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -318,7 +318,8 @@ str_to_key(unsigned char *str, unsigned char *key)
318} 318}
319 319
320static void 320static void
321smbhash(unsigned char *out, unsigned char *in, unsigned char *key, int forw) 321smbhash(unsigned char *out, const unsigned char *in, unsigned char *key,
322 int forw)
322{ 323{
323 int i; 324 int i;
324 char *outb; /* outb[64] */ 325 char *outb; /* outb[64] */
@@ -363,7 +364,7 @@ E_P16(unsigned char *p14, unsigned char *p16)
363} 364}
364 365
365void 366void
366E_P24(unsigned char *p21, unsigned char *c8, unsigned char *p24) 367E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
367{ 368{
368 smbhash(p24, c8, p21, 1); 369 smbhash(p24, c8, p21, 1);
369 smbhash(p24 + 8, c8, p21 + 7, 1); 370 smbhash(p24 + 8, c8, p21 + 7, 1);
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index ff3232fa101..93fb09a99c6 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -49,9 +49,10 @@
49 49
50/*The following definitions come from libsmb/smbencrypt.c */ 50/*The following definitions come from libsmb/smbencrypt.c */
51 51
52void SMBencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); 52void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
53 unsigned char *p24);
53void E_md4hash(const unsigned char *passwd, unsigned char *p16); 54void E_md4hash(const unsigned char *passwd, unsigned char *p16);
54static void SMBOWFencrypt(unsigned char passwd[16], unsigned char *c8, 55static void SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
55 unsigned char p24[24]); 56 unsigned char p24[24]);
56void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); 57void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24);
57 58
@@ -61,7 +62,7 @@ void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24);
61 encrypted password into p24 */ 62 encrypted password into p24 */
62/* Note that password must be uppercased and null terminated */ 63/* Note that password must be uppercased and null terminated */
63void 64void
64SMBencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) 65SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
65{ 66{
66 unsigned char p14[15], p21[21]; 67 unsigned char p14[15], p21[21];
67 68
@@ -212,7 +213,7 @@ ntv2_owf_gen(const unsigned char owf[16], const char *user_n,
212 213
213/* Does the des encryption from the NT or LM MD4 hash. */ 214/* Does the des encryption from the NT or LM MD4 hash. */
214static void 215static void
215SMBOWFencrypt(unsigned char passwd[16], unsigned char *c8, 216SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
216 unsigned char p24[24]) 217 unsigned char p24[24])
217{ 218{
218 unsigned char p21[21]; 219 unsigned char p21[21];
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index ff8243a8fe3..7ebe6599ed3 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -37,15 +37,11 @@ extern mempool_t *cifs_mid_poolp;
37extern struct kmem_cache *cifs_oplock_cachep; 37extern struct kmem_cache *cifs_oplock_cachep;
38 38
39static struct mid_q_entry * 39static struct mid_q_entry *
40AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) 40AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
41{ 41{
42 struct mid_q_entry *temp; 42 struct mid_q_entry *temp;
43 43
44 if (ses == NULL) { 44 if (server == NULL) {
45 cERROR(1, ("Null session passed in to AllocMidQEntry"));
46 return NULL;
47 }
48 if (ses->server == NULL) {
49 cERROR(1, ("Null TCP session in AllocMidQEntry")); 45 cERROR(1, ("Null TCP session in AllocMidQEntry"));
50 return NULL; 46 return NULL;
51 } 47 }
@@ -62,12 +58,11 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
62 /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ 58 /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */
63 /* when mid allocated can be before when sent */ 59 /* when mid allocated can be before when sent */
64 temp->when_alloc = jiffies; 60 temp->when_alloc = jiffies;
65 temp->ses = ses;
66 temp->tsk = current; 61 temp->tsk = current;
67 } 62 }
68 63
69 spin_lock(&GlobalMid_Lock); 64 spin_lock(&GlobalMid_Lock);
70 list_add_tail(&temp->qhead, &ses->server->pending_mid_q); 65 list_add_tail(&temp->qhead, &server->pending_mid_q);
71 atomic_inc(&midCount); 66 atomic_inc(&midCount);
72 temp->midState = MID_REQUEST_ALLOCATED; 67 temp->midState = MID_REQUEST_ALLOCATED;
73 spin_unlock(&GlobalMid_Lock); 68 spin_unlock(&GlobalMid_Lock);
@@ -349,37 +344,38 @@ static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op)
349 if (long_op == CIFS_ASYNC_OP) { 344 if (long_op == CIFS_ASYNC_OP) {
350 /* oplock breaks must not be held up */ 345 /* oplock breaks must not be held up */
351 atomic_inc(&ses->server->inFlight); 346 atomic_inc(&ses->server->inFlight);
352 } else { 347 return 0;
353 spin_lock(&GlobalMid_Lock); 348 }
354 while (1) { 349
355 if (atomic_read(&ses->server->inFlight) >= 350 spin_lock(&GlobalMid_Lock);
356 cifs_max_pending){ 351 while (1) {
357 spin_unlock(&GlobalMid_Lock); 352 if (atomic_read(&ses->server->inFlight) >=
353 cifs_max_pending){
354 spin_unlock(&GlobalMid_Lock);
358#ifdef CONFIG_CIFS_STATS2 355#ifdef CONFIG_CIFS_STATS2
359 atomic_inc(&ses->server->num_waiters); 356 atomic_inc(&ses->server->num_waiters);
360#endif 357#endif
361 wait_event(ses->server->request_q, 358 wait_event(ses->server->request_q,
362 atomic_read(&ses->server->inFlight) 359 atomic_read(&ses->server->inFlight)
363 < cifs_max_pending); 360 < cifs_max_pending);
364#ifdef CONFIG_CIFS_STATS2 361#ifdef CONFIG_CIFS_STATS2
365 atomic_dec(&ses->server->num_waiters); 362 atomic_dec(&ses->server->num_waiters);
366#endif 363#endif
367 spin_lock(&GlobalMid_Lock); 364 spin_lock(&GlobalMid_Lock);
368 } else { 365 } else {
369 if (ses->server->tcpStatus == CifsExiting) { 366 if (ses->server->tcpStatus == CifsExiting) {
370 spin_unlock(&GlobalMid_Lock);
371 return -ENOENT;
372 }
373
374 /* can not count locking commands against total
375 as they are allowed to block on server */
376
377 /* update # of requests on the wire to server */
378 if (long_op != CIFS_BLOCKING_OP)
379 atomic_inc(&ses->server->inFlight);
380 spin_unlock(&GlobalMid_Lock); 367 spin_unlock(&GlobalMid_Lock);
381 break; 368 return -ENOENT;
382 } 369 }
370
371 /* can not count locking commands against total
372 as they are allowed to block on server */
373
374 /* update # of requests on the wire to server */
375 if (long_op != CIFS_BLOCKING_OP)
376 atomic_inc(&ses->server->inFlight);
377 spin_unlock(&GlobalMid_Lock);
378 break;
383 } 379 }
384 } 380 }
385 return 0; 381 return 0;
@@ -390,17 +386,21 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf,
390{ 386{
391 if (ses->server->tcpStatus == CifsExiting) { 387 if (ses->server->tcpStatus == CifsExiting) {
392 return -ENOENT; 388 return -ENOENT;
393 } else if (ses->server->tcpStatus == CifsNeedReconnect) { 389 }
390
391 if (ses->server->tcpStatus == CifsNeedReconnect) {
394 cFYI(1, ("tcp session dead - return to caller to retry")); 392 cFYI(1, ("tcp session dead - return to caller to retry"));
395 return -EAGAIN; 393 return -EAGAIN;
396 } else if (ses->status != CifsGood) { 394 }
395
396 if (ses->status != CifsGood) {
397 /* check if SMB session is bad because we are setting it up */ 397 /* check if SMB session is bad because we are setting it up */
398 if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && 398 if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
399 (in_buf->Command != SMB_COM_NEGOTIATE)) 399 (in_buf->Command != SMB_COM_NEGOTIATE))
400 return -EAGAIN; 400 return -EAGAIN;
401 /* else ok - we are setting up session */ 401 /* else ok - we are setting up session */
402 } 402 }
403 *ppmidQ = AllocMidQEntry(in_buf, ses); 403 *ppmidQ = AllocMidQEntry(in_buf, ses->server);
404 if (*ppmidQ == NULL) 404 if (*ppmidQ == NULL)
405 return -ENOMEM; 405 return -ENOMEM;
406 return 0; 406 return 0;
@@ -415,11 +415,8 @@ static int wait_for_response(struct cifsSesInfo *ses,
415 415
416 for (;;) { 416 for (;;) {
417 curr_timeout = timeout + jiffies; 417 curr_timeout = timeout + jiffies;
418 wait_event(ses->server->response_q, 418 wait_event_timeout(ses->server->response_q,
419 (!(midQ->midState == MID_REQUEST_SUBMITTED)) || 419 midQ->midState != MID_REQUEST_SUBMITTED, timeout);
420 time_after(jiffies, curr_timeout) ||
421 ((ses->server->tcpStatus != CifsGood) &&
422 (ses->server->tcpStatus != CifsNew)));
423 420
424 if (time_after(jiffies, curr_timeout) && 421 if (time_after(jiffies, curr_timeout) &&
425 (midQ->midState == MID_REQUEST_SUBMITTED) && 422 (midQ->midState == MID_REQUEST_SUBMITTED) &&
@@ -521,11 +518,11 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
521 and avoid races inside tcp sendmsg code that could cause corruption 518 and avoid races inside tcp sendmsg code that could cause corruption
522 of smb data */ 519 of smb data */
523 520
524 down(&ses->server->tcpSem); 521 mutex_lock(&ses->server->srv_mutex);
525 522
526 rc = allocate_mid(ses, in_buf, &midQ); 523 rc = allocate_mid(ses, in_buf, &midQ);
527 if (rc) { 524 if (rc) {
528 up(&ses->server->tcpSem); 525 mutex_unlock(&ses->server->srv_mutex);
529 cifs_small_buf_release(in_buf); 526 cifs_small_buf_release(in_buf);
530 /* Update # of requests on wire to server */ 527 /* Update # of requests on wire to server */
531 atomic_dec(&ses->server->inFlight); 528 atomic_dec(&ses->server->inFlight);
@@ -533,6 +530,11 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
533 return rc; 530 return rc;
534 } 531 }
535 rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); 532 rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
533 if (rc) {
534 mutex_unlock(&ses->server->srv_mutex);
535 cifs_small_buf_release(in_buf);
536 goto out;
537 }
536 538
537 midQ->midState = MID_REQUEST_SUBMITTED; 539 midQ->midState = MID_REQUEST_SUBMITTED;
538#ifdef CONFIG_CIFS_STATS2 540#ifdef CONFIG_CIFS_STATS2
@@ -546,7 +548,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
546 midQ->when_sent = jiffies; 548 midQ->when_sent = jiffies;
547#endif 549#endif
548 550
549 up(&ses->server->tcpSem); 551 mutex_unlock(&ses->server->srv_mutex);
550 cifs_small_buf_release(in_buf); 552 cifs_small_buf_release(in_buf);
551 553
552 if (rc < 0) 554 if (rc < 0)
@@ -581,10 +583,8 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
581 wait_for_response(ses, midQ, timeout, 10 * HZ); 583 wait_for_response(ses, midQ, timeout, 10 * HZ);
582 584
583 spin_lock(&GlobalMid_Lock); 585 spin_lock(&GlobalMid_Lock);
584 if (midQ->resp_buf) { 586
585 spin_unlock(&GlobalMid_Lock); 587 if (midQ->resp_buf == NULL) {
586 receive_len = midQ->resp_buf->smb_buf_length;
587 } else {
588 cERROR(1, ("No response to cmd %d mid %d", 588 cERROR(1, ("No response to cmd %d mid %d",
589 midQ->command, midQ->mid)); 589 midQ->command, midQ->mid));
590 if (midQ->midState == MID_REQUEST_SUBMITTED) { 590 if (midQ->midState == MID_REQUEST_SUBMITTED) {
@@ -612,53 +612,59 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
612 return rc; 612 return rc;
613 } 613 }
614 614
615 spin_unlock(&GlobalMid_Lock);
616 receive_len = midQ->resp_buf->smb_buf_length;
617
615 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 618 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
616 cERROR(1, ("Frame too large received. Length: %d Xid: %d", 619 cERROR(1, ("Frame too large received. Length: %d Xid: %d",
617 receive_len, xid)); 620 receive_len, xid));
618 rc = -EIO; 621 rc = -EIO;
619 } else { /* rcvd frame is ok */ 622 goto out;
620 if (midQ->resp_buf && 623 }
621 (midQ->midState == MID_RESPONSE_RECEIVED)) { 624
622 625 /* rcvd frame is ok */
623 iov[0].iov_base = (char *)midQ->resp_buf; 626
624 if (midQ->largeBuf) 627 if (midQ->resp_buf &&
625 *pRespBufType = CIFS_LARGE_BUFFER; 628 (midQ->midState == MID_RESPONSE_RECEIVED)) {
626 else 629
627 *pRespBufType = CIFS_SMALL_BUFFER; 630 iov[0].iov_base = (char *)midQ->resp_buf;
628 iov[0].iov_len = receive_len + 4; 631 if (midQ->largeBuf)
629 632 *pRespBufType = CIFS_LARGE_BUFFER;
630 dump_smb(midQ->resp_buf, 80); 633 else
631 /* convert the length into a more usable form */ 634 *pRespBufType = CIFS_SMALL_BUFFER;
632 if ((receive_len > 24) && 635 iov[0].iov_len = receive_len + 4;
633 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 636
634 SECMODE_SIGN_ENABLED))) { 637 dump_smb(midQ->resp_buf, 80);
635 rc = cifs_verify_signature(midQ->resp_buf, 638 /* convert the length into a more usable form */
639 if ((receive_len > 24) &&
640 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
641 SECMODE_SIGN_ENABLED))) {
642 rc = cifs_verify_signature(midQ->resp_buf,
636 &ses->server->mac_signing_key, 643 &ses->server->mac_signing_key,
637 midQ->sequence_number+1); 644 midQ->sequence_number+1);
638 if (rc) { 645 if (rc) {
639 cERROR(1, ("Unexpected SMB signature")); 646 cERROR(1, ("Unexpected SMB signature"));
640 /* BB FIXME add code to kill session */ 647 /* BB FIXME add code to kill session */
641 }
642 } 648 }
643
644 /* BB special case reconnect tid and uid here? */
645 rc = map_smb_to_linux_error(midQ->resp_buf,
646 flags & CIFS_LOG_ERROR);
647
648 /* convert ByteCount if necessary */
649 if (receive_len >= sizeof(struct smb_hdr) - 4
650 /* do not count RFC1001 header */ +
651 (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
652 BCC(midQ->resp_buf) =
653 le16_to_cpu(BCC_LE(midQ->resp_buf));
654 if ((flags & CIFS_NO_RESP) == 0)
655 midQ->resp_buf = NULL; /* mark it so buf will
656 not be freed by
657 DeleteMidQEntry */
658 } else {
659 rc = -EIO;
660 cFYI(1, ("Bad MID state?"));
661 } 649 }
650
651 /* BB special case reconnect tid and uid here? */
652 rc = map_smb_to_linux_error(midQ->resp_buf,
653 flags & CIFS_LOG_ERROR);
654
655 /* convert ByteCount if necessary */
656 if (receive_len >= sizeof(struct smb_hdr) - 4
657 /* do not count RFC1001 header */ +
658 (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
659 BCC(midQ->resp_buf) =
660 le16_to_cpu(BCC_LE(midQ->resp_buf));
661 if ((flags & CIFS_NO_RESP) == 0)
662 midQ->resp_buf = NULL; /* mark it so buf will
663 not be freed by
664 DeleteMidQEntry */
665 } else {
666 rc = -EIO;
667 cFYI(1, ("Bad MID state?"));
662 } 668 }
663 669
664out: 670out:
@@ -695,6 +701,12 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
695 to the same server. We may make this configurable later or 701 to the same server. We may make this configurable later or
696 use ses->maxReq */ 702 use ses->maxReq */
697 703
704 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
705 cERROR(1, ("Illegal length, greater than maximum frame, %d",
706 in_buf->smb_buf_length));
707 return -EIO;
708 }
709
698 rc = wait_for_free_request(ses, long_op); 710 rc = wait_for_free_request(ses, long_op);
699 if (rc) 711 if (rc)
700 return rc; 712 return rc;
@@ -703,29 +715,22 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
703 and avoid races inside tcp sendmsg code that could cause corruption 715 and avoid races inside tcp sendmsg code that could cause corruption
704 of smb data */ 716 of smb data */
705 717
706 down(&ses->server->tcpSem); 718 mutex_lock(&ses->server->srv_mutex);
707 719
708 rc = allocate_mid(ses, in_buf, &midQ); 720 rc = allocate_mid(ses, in_buf, &midQ);
709 if (rc) { 721 if (rc) {
710 up(&ses->server->tcpSem); 722 mutex_unlock(&ses->server->srv_mutex);
711 /* Update # of requests on wire to server */ 723 /* Update # of requests on wire to server */
712 atomic_dec(&ses->server->inFlight); 724 atomic_dec(&ses->server->inFlight);
713 wake_up(&ses->server->request_q); 725 wake_up(&ses->server->request_q);
714 return rc; 726 return rc;
715 } 727 }
716 728
717 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
718 cERROR(1, ("Illegal length, greater than maximum frame, %d",
719 in_buf->smb_buf_length));
720 DeleteMidQEntry(midQ);
721 up(&ses->server->tcpSem);
722 /* Update # of requests on wire to server */
723 atomic_dec(&ses->server->inFlight);
724 wake_up(&ses->server->request_q);
725 return -EIO;
726 }
727
728 rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); 729 rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number);
730 if (rc) {
731 mutex_unlock(&ses->server->srv_mutex);
732 goto out;
733 }
729 734
730 midQ->midState = MID_REQUEST_SUBMITTED; 735 midQ->midState = MID_REQUEST_SUBMITTED;
731#ifdef CONFIG_CIFS_STATS2 736#ifdef CONFIG_CIFS_STATS2
@@ -738,7 +743,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
738 atomic_dec(&ses->server->inSend); 743 atomic_dec(&ses->server->inSend);
739 midQ->when_sent = jiffies; 744 midQ->when_sent = jiffies;
740#endif 745#endif
741 up(&ses->server->tcpSem); 746 mutex_unlock(&ses->server->srv_mutex);
742 747
743 if (rc < 0) 748 if (rc < 0)
744 goto out; 749 goto out;
@@ -772,10 +777,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
772 wait_for_response(ses, midQ, timeout, 10 * HZ); 777 wait_for_response(ses, midQ, timeout, 10 * HZ);
773 778
774 spin_lock(&GlobalMid_Lock); 779 spin_lock(&GlobalMid_Lock);
775 if (midQ->resp_buf) { 780 if (midQ->resp_buf == NULL) {
776 spin_unlock(&GlobalMid_Lock);
777 receive_len = midQ->resp_buf->smb_buf_length;
778 } else {
779 cERROR(1, ("No response for cmd %d mid %d", 781 cERROR(1, ("No response for cmd %d mid %d",
780 midQ->command, midQ->mid)); 782 midQ->command, midQ->mid));
781 if (midQ->midState == MID_REQUEST_SUBMITTED) { 783 if (midQ->midState == MID_REQUEST_SUBMITTED) {
@@ -803,47 +805,52 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
803 return rc; 805 return rc;
804 } 806 }
805 807
808 spin_unlock(&GlobalMid_Lock);
809 receive_len = midQ->resp_buf->smb_buf_length;
810
806 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 811 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
807 cERROR(1, ("Frame too large received. Length: %d Xid: %d", 812 cERROR(1, ("Frame too large received. Length: %d Xid: %d",
808 receive_len, xid)); 813 receive_len, xid));
809 rc = -EIO; 814 rc = -EIO;
810 } else { /* rcvd frame is ok */ 815 goto out;
811 816 }
812 if (midQ->resp_buf && out_buf 817
813 && (midQ->midState == MID_RESPONSE_RECEIVED)) { 818 /* rcvd frame is ok */
814 out_buf->smb_buf_length = receive_len; 819
815 memcpy((char *)out_buf + 4, 820 if (midQ->resp_buf && out_buf
816 (char *)midQ->resp_buf + 4, 821 && (midQ->midState == MID_RESPONSE_RECEIVED)) {
817 receive_len); 822 out_buf->smb_buf_length = receive_len;
818 823 memcpy((char *)out_buf + 4,
819 dump_smb(out_buf, 92); 824 (char *)midQ->resp_buf + 4,
820 /* convert the length into a more usable form */ 825 receive_len);
821 if ((receive_len > 24) && 826
822 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 827 dump_smb(out_buf, 92);
823 SECMODE_SIGN_ENABLED))) { 828 /* convert the length into a more usable form */
824 rc = cifs_verify_signature(out_buf, 829 if ((receive_len > 24) &&
830 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
831 SECMODE_SIGN_ENABLED))) {
832 rc = cifs_verify_signature(out_buf,
825 &ses->server->mac_signing_key, 833 &ses->server->mac_signing_key,
826 midQ->sequence_number+1); 834 midQ->sequence_number+1);
827 if (rc) { 835 if (rc) {
828 cERROR(1, ("Unexpected SMB signature")); 836 cERROR(1, ("Unexpected SMB signature"));
829 /* BB FIXME add code to kill session */ 837 /* BB FIXME add code to kill session */
830 }
831 } 838 }
839 }
832 840
833 *pbytes_returned = out_buf->smb_buf_length; 841 *pbytes_returned = out_buf->smb_buf_length;
834 842
835 /* BB special case reconnect tid and uid here? */ 843 /* BB special case reconnect tid and uid here? */
836 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ ); 844 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
837 845
838 /* convert ByteCount if necessary */ 846 /* convert ByteCount if necessary */
839 if (receive_len >= sizeof(struct smb_hdr) - 4 847 if (receive_len >= sizeof(struct smb_hdr) - 4
840 /* do not count RFC1001 header */ + 848 /* do not count RFC1001 header */ +
841 (2 * out_buf->WordCount) + 2 /* bcc */ ) 849 (2 * out_buf->WordCount) + 2 /* bcc */ )
842 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); 850 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
843 } else { 851 } else {
844 rc = -EIO; 852 rc = -EIO;
845 cERROR(1, ("Bad MID state?")); 853 cERROR(1, ("Bad MID state?"));
846 }
847 } 854 }
848 855
849out: 856out:
@@ -866,16 +873,16 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf,
866 873
867 header_assemble(in_buf, SMB_COM_NT_CANCEL, tcon, 0); 874 header_assemble(in_buf, SMB_COM_NT_CANCEL, tcon, 0);
868 in_buf->Mid = mid; 875 in_buf->Mid = mid;
869 down(&ses->server->tcpSem); 876 mutex_lock(&ses->server->srv_mutex);
870 rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); 877 rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number);
871 if (rc) { 878 if (rc) {
872 up(&ses->server->tcpSem); 879 mutex_unlock(&ses->server->srv_mutex);
873 return rc; 880 return rc;
874 } 881 }
875 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 882 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
876 (struct sockaddr *) &(ses->server->addr.sockAddr), 883 (struct sockaddr *) &(ses->server->addr.sockAddr),
877 ses->server->noblocksnd); 884 ses->server->noblocksnd);
878 up(&ses->server->tcpSem); 885 mutex_unlock(&ses->server->srv_mutex);
879 return rc; 886 return rc;
880} 887}
881 888
@@ -933,6 +940,12 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
933 to the same server. We may make this configurable later or 940 to the same server. We may make this configurable later or
934 use ses->maxReq */ 941 use ses->maxReq */
935 942
943 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
944 cERROR(1, ("Illegal length, greater than maximum frame, %d",
945 in_buf->smb_buf_length));
946 return -EIO;
947 }
948
936 rc = wait_for_free_request(ses, CIFS_BLOCKING_OP); 949 rc = wait_for_free_request(ses, CIFS_BLOCKING_OP);
937 if (rc) 950 if (rc)
938 return rc; 951 return rc;
@@ -941,24 +954,21 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
941 and avoid races inside tcp sendmsg code that could cause corruption 954 and avoid races inside tcp sendmsg code that could cause corruption
942 of smb data */ 955 of smb data */
943 956
944 down(&ses->server->tcpSem); 957 mutex_lock(&ses->server->srv_mutex);
945 958
946 rc = allocate_mid(ses, in_buf, &midQ); 959 rc = allocate_mid(ses, in_buf, &midQ);
947 if (rc) { 960 if (rc) {
948 up(&ses->server->tcpSem); 961 mutex_unlock(&ses->server->srv_mutex);
949 return rc; 962 return rc;
950 } 963 }
951 964
952 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 965 rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number);
953 up(&ses->server->tcpSem); 966 if (rc) {
954 cERROR(1, ("Illegal length, greater than maximum frame, %d",
955 in_buf->smb_buf_length));
956 DeleteMidQEntry(midQ); 967 DeleteMidQEntry(midQ);
957 return -EIO; 968 mutex_unlock(&ses->server->srv_mutex);
969 return rc;
958 } 970 }
959 971
960 rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number);
961
962 midQ->midState = MID_REQUEST_SUBMITTED; 972 midQ->midState = MID_REQUEST_SUBMITTED;
963#ifdef CONFIG_CIFS_STATS2 973#ifdef CONFIG_CIFS_STATS2
964 atomic_inc(&ses->server->inSend); 974 atomic_inc(&ses->server->inSend);
@@ -970,7 +980,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
970 atomic_dec(&ses->server->inSend); 980 atomic_dec(&ses->server->inSend);
971 midQ->when_sent = jiffies; 981 midQ->when_sent = jiffies;
972#endif 982#endif
973 up(&ses->server->tcpSem); 983 mutex_unlock(&ses->server->srv_mutex);
974 984
975 if (rc < 0) { 985 if (rc < 0) {
976 DeleteMidQEntry(midQ); 986 DeleteMidQEntry(midQ);
@@ -1052,44 +1062,48 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
1052 cERROR(1, ("Frame too large received. Length: %d Xid: %d", 1062 cERROR(1, ("Frame too large received. Length: %d Xid: %d",
1053 receive_len, xid)); 1063 receive_len, xid));
1054 rc = -EIO; 1064 rc = -EIO;
1055 } else { /* rcvd frame is ok */ 1065 goto out;
1056 1066 }
1057 if (midQ->resp_buf && out_buf
1058 && (midQ->midState == MID_RESPONSE_RECEIVED)) {
1059 out_buf->smb_buf_length = receive_len;
1060 memcpy((char *)out_buf + 4,
1061 (char *)midQ->resp_buf + 4,
1062 receive_len);
1063
1064 dump_smb(out_buf, 92);
1065 /* convert the length into a more usable form */
1066 if ((receive_len > 24) &&
1067 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
1068 SECMODE_SIGN_ENABLED))) {
1069 rc = cifs_verify_signature(out_buf,
1070 &ses->server->mac_signing_key,
1071 midQ->sequence_number+1);
1072 if (rc) {
1073 cERROR(1, ("Unexpected SMB signature"));
1074 /* BB FIXME add code to kill session */
1075 }
1076 }
1077 1067
1078 *pbytes_returned = out_buf->smb_buf_length; 1068 /* rcvd frame is ok */
1079 1069
1080 /* BB special case reconnect tid and uid here? */ 1070 if ((out_buf == NULL) || (midQ->midState != MID_RESPONSE_RECEIVED)) {
1081 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ ); 1071 rc = -EIO;
1072 cERROR(1, ("Bad MID state?"));
1073 goto out;
1074 }
1082 1075
1083 /* convert ByteCount if necessary */ 1076 out_buf->smb_buf_length = receive_len;
1084 if (receive_len >= sizeof(struct smb_hdr) - 4 1077 memcpy((char *)out_buf + 4,
1085 /* do not count RFC1001 header */ + 1078 (char *)midQ->resp_buf + 4,
1086 (2 * out_buf->WordCount) + 2 /* bcc */ ) 1079 receive_len);
1087 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); 1080
1088 } else { 1081 dump_smb(out_buf, 92);
1089 rc = -EIO; 1082 /* convert the length into a more usable form */
1090 cERROR(1, ("Bad MID state?")); 1083 if ((receive_len > 24) &&
1084 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
1085 SECMODE_SIGN_ENABLED))) {
1086 rc = cifs_verify_signature(out_buf,
1087 &ses->server->mac_signing_key,
1088 midQ->sequence_number+1);
1089 if (rc) {
1090 cERROR(1, ("Unexpected SMB signature"));
1091 /* BB FIXME add code to kill session */
1091 } 1092 }
1092 } 1093 }
1094
1095 *pbytes_returned = out_buf->smb_buf_length;
1096
1097 /* BB special case reconnect tid and uid here? */
1098 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
1099
1100 /* convert ByteCount if necessary */
1101 if (receive_len >= sizeof(struct smb_hdr) - 4
1102 /* do not count RFC1001 header */ +
1103 (2 * out_buf->WordCount) + 2 /* bcc */ )
1104 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
1105
1106out:
1093 DeleteMidQEntry(midQ); 1107 DeleteMidQEntry(midQ);
1094 if (rstart && rc == -EACCES) 1108 if (rstart && rc == -EACCES)
1095 return -ERESTARTSYS; 1109 return -ERESTARTSYS;
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 8a2370341c7..a5bf5771a22 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -32,8 +32,8 @@ void coda_cache_enter(struct inode *inode, int mask)
32 struct coda_inode_info *cii = ITOC(inode); 32 struct coda_inode_info *cii = ITOC(inode);
33 33
34 cii->c_cached_epoch = atomic_read(&permission_epoch); 34 cii->c_cached_epoch = atomic_read(&permission_epoch);
35 if (cii->c_uid != current->fsuid) { 35 if (cii->c_uid != current_fsuid()) {
36 cii->c_uid = current->fsuid; 36 cii->c_uid = current_fsuid();
37 cii->c_cached_perm = mask; 37 cii->c_cached_perm = mask;
38 } else 38 } else
39 cii->c_cached_perm |= mask; 39 cii->c_cached_perm |= mask;
@@ -60,7 +60,7 @@ int coda_cache_check(struct inode *inode, int mask)
60 int hit; 60 int hit;
61 61
62 hit = (mask & cii->c_cached_perm) == mask && 62 hit = (mask & cii->c_cached_perm) == mask &&
63 cii->c_uid == current->fsuid && 63 cii->c_uid == current_fsuid() &&
64 cii->c_cached_epoch == atomic_read(&permission_epoch); 64 cii->c_cached_epoch == atomic_read(&permission_epoch);
65 65
66 return hit; 66 return hit;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29137ff3ca6..466303db2df 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -13,6 +13,7 @@
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/stat.h> 15#include <linux/stat.h>
16#include <linux/cred.h>
16#include <linux/errno.h> 17#include <linux/errno.h>
17#include <linux/smp_lock.h> 18#include <linux/smp_lock.h>
18#include <linux/string.h> 19#include <linux/string.h>
@@ -174,7 +175,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
174 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 175 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
175 176
176 err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode), 177 err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode),
177 coda_flags, coda_file->f_uid); 178 coda_flags, coda_file->f_cred->fsuid);
178 179
179 host_inode = cfi->cfi_container->f_path.dentry->d_inode; 180 host_inode = cfi->cfi_container->f_path.dentry->d_inode;
180 cii = ITOC(coda_inode); 181 cii = ITOC(coda_inode);
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index ce432bca95d..c274d949179 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,7 +52,7 @@ static void *alloc_upcall(int opcode, int size)
52 inp->ih.opcode = opcode; 52 inp->ih.opcode = opcode;
53 inp->ih.pid = current->pid; 53 inp->ih.pid = current->pid;
54 inp->ih.pgid = task_pgrp_nr(current); 54 inp->ih.pgid = task_pgrp_nr(current);
55 inp->ih.uid = current->fsuid; 55 inp->ih.uid = current_fsuid();
56 56
57 return (void*)inp; 57 return (void*)inp;
58} 58}
diff --git a/fs/compat.c b/fs/compat.c
index e5f49f53850..d1ece79b641 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1393,10 +1393,20 @@ int compat_do_execve(char * filename,
1393 if (!bprm) 1393 if (!bprm)
1394 goto out_ret; 1394 goto out_ret;
1395 1395
1396 retval = mutex_lock_interruptible(&current->cred_exec_mutex);
1397 if (retval < 0)
1398 goto out_free;
1399
1400 retval = -ENOMEM;
1401 bprm->cred = prepare_exec_creds();
1402 if (!bprm->cred)
1403 goto out_unlock;
1404 check_unsafe_exec(bprm);
1405
1396 file = open_exec(filename); 1406 file = open_exec(filename);
1397 retval = PTR_ERR(file); 1407 retval = PTR_ERR(file);
1398 if (IS_ERR(file)) 1408 if (IS_ERR(file))
1399 goto out_kfree; 1409 goto out_unlock;
1400 1410
1401 sched_exec(); 1411 sched_exec();
1402 1412
@@ -1410,14 +1420,10 @@ int compat_do_execve(char * filename,
1410 1420
1411 bprm->argc = compat_count(argv, MAX_ARG_STRINGS); 1421 bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
1412 if ((retval = bprm->argc) < 0) 1422 if ((retval = bprm->argc) < 0)
1413 goto out_mm; 1423 goto out;
1414 1424
1415 bprm->envc = compat_count(envp, MAX_ARG_STRINGS); 1425 bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
1416 if ((retval = bprm->envc) < 0) 1426 if ((retval = bprm->envc) < 0)
1417 goto out_mm;
1418
1419 retval = security_bprm_alloc(bprm);
1420 if (retval)
1421 goto out; 1427 goto out;
1422 1428
1423 retval = prepare_binprm(bprm); 1429 retval = prepare_binprm(bprm);
@@ -1438,19 +1444,16 @@ int compat_do_execve(char * filename,
1438 goto out; 1444 goto out;
1439 1445
1440 retval = search_binary_handler(bprm, regs); 1446 retval = search_binary_handler(bprm, regs);
1441 if (retval >= 0) { 1447 if (retval < 0)
1442 /* execve success */ 1448 goto out;
1443 security_bprm_free(bprm);
1444 acct_update_integrals(current);
1445 free_bprm(bprm);
1446 return retval;
1447 }
1448 1449
1449out: 1450 /* execve succeeded */
1450 if (bprm->security) 1451 mutex_unlock(&current->cred_exec_mutex);
1451 security_bprm_free(bprm); 1452 acct_update_integrals(current);
1453 free_bprm(bprm);
1454 return retval;
1452 1455
1453out_mm: 1456out:
1454 if (bprm->mm) 1457 if (bprm->mm)
1455 mmput(bprm->mm); 1458 mmput(bprm->mm);
1456 1459
@@ -1460,7 +1463,10 @@ out_file:
1460 fput(bprm->file); 1463 fput(bprm->file);
1461 } 1464 }
1462 1465
1463out_kfree: 1466out_unlock:
1467 mutex_unlock(&current->cred_exec_mutex);
1468
1469out_free:
1464 free_bprm(bprm); 1470 free_bprm(bprm);
1465 1471
1466out_ret: 1472out_ret:
diff --git a/fs/dcache.c b/fs/dcache.c
index a1d86c7f3e6..e88c23b85a3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -34,7 +34,6 @@
34#include <linux/bootmem.h> 34#include <linux/bootmem.h>
35#include "internal.h" 35#include "internal.h"
36 36
37
38int sysctl_vfs_cache_pressure __read_mostly = 100; 37int sysctl_vfs_cache_pressure __read_mostly = 100;
39EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 38EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
40 39
@@ -948,9 +947,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
948 dentry->d_op = NULL; 947 dentry->d_op = NULL;
949 dentry->d_fsdata = NULL; 948 dentry->d_fsdata = NULL;
950 dentry->d_mounted = 0; 949 dentry->d_mounted = 0;
951#ifdef CONFIG_PROFILING
952 dentry->d_cookie = NULL;
953#endif
954 INIT_HLIST_NODE(&dentry->d_hash); 950 INIT_HLIST_NODE(&dentry->d_hash);
955 INIT_LIST_HEAD(&dentry->d_lru); 951 INIT_LIST_HEAD(&dentry->d_lru);
956 INIT_LIST_HEAD(&dentry->d_subdirs); 952 INIT_LIST_HEAD(&dentry->d_subdirs);
@@ -1336,7 +1332,7 @@ err_out:
1336 * 1332 *
1337 * Searches the children of the parent dentry for the name in question. If 1333 * Searches the children of the parent dentry for the name in question. If
1338 * the dentry is found its reference count is incremented and the dentry 1334 * the dentry is found its reference count is incremented and the dentry
1339 * is returned. The caller must use d_put to free the entry when it has 1335 * is returned. The caller must use dput to free the entry when it has
1340 * finished using it. %NULL is returned on failure. 1336 * finished using it. %NULL is returned on failure.
1341 * 1337 *
1342 * __d_lookup is dcache_lock free. The hash list is protected using RCU. 1338 * __d_lookup is dcache_lock free. The hash list is protected using RCU.
@@ -1620,8 +1616,11 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
1620 */ 1616 */
1621 memcpy(dentry->d_iname, target->d_name.name, 1617 memcpy(dentry->d_iname, target->d_name.name,
1622 target->d_name.len + 1); 1618 target->d_name.len + 1);
1619 dentry->d_name.len = target->d_name.len;
1620 return;
1623 } 1621 }
1624 } 1622 }
1623 do_switch(dentry->d_name.len, target->d_name.len);
1625} 1624}
1626 1625
1627/* 1626/*
@@ -1681,7 +1680,6 @@ already_unhashed:
1681 1680
1682 /* Switch the names.. */ 1681 /* Switch the names.. */
1683 switch_names(dentry, target); 1682 switch_names(dentry, target);
1684 do_switch(dentry->d_name.len, target->d_name.len);
1685 do_switch(dentry->d_name.hash, target->d_name.hash); 1683 do_switch(dentry->d_name.hash, target->d_name.hash);
1686 1684
1687 /* ... and switch the parents */ 1685 /* ... and switch the parents */
@@ -1791,7 +1789,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1791 struct dentry *dparent, *aparent; 1789 struct dentry *dparent, *aparent;
1792 1790
1793 switch_names(dentry, anon); 1791 switch_names(dentry, anon);
1794 do_switch(dentry->d_name.len, anon->d_name.len);
1795 do_switch(dentry->d_name.hash, anon->d_name.hash); 1792 do_switch(dentry->d_name.hash, anon->d_name.hash);
1796 1793
1797 dparent = dentry->d_parent; 1794 dparent = dentry->d_parent;
@@ -1911,7 +1908,8 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1911 * Convert a dentry into an ASCII path name. If the entry has been deleted 1908 * Convert a dentry into an ASCII path name. If the entry has been deleted
1912 * the string " (deleted)" is appended. Note that this is ambiguous. 1909 * the string " (deleted)" is appended. Note that this is ambiguous.
1913 * 1910 *
1914 * Returns the buffer or an error code if the path was too long. 1911 * Returns a pointer into the buffer or an error code if the
1912 * path was too long.
1915 * 1913 *
1916 * "buflen" should be positive. Caller holds the dcache_lock. 1914 * "buflen" should be positive. Caller holds the dcache_lock.
1917 * 1915 *
@@ -1987,7 +1985,10 @@ Elong:
1987 * Convert a dentry into an ASCII path name. If the entry has been deleted 1985 * Convert a dentry into an ASCII path name. If the entry has been deleted
1988 * the string " (deleted)" is appended. Note that this is ambiguous. 1986 * the string " (deleted)" is appended. Note that this is ambiguous.
1989 * 1987 *
1990 * Returns the buffer or an error code if the path was too long. 1988 * Returns a pointer into the buffer or an error code if the path was
1989 * too long. Note: Callers should use the returned pointer, not the passed
1990 * in buffer, to use the name! The implementation often starts at an offset
1991 * into the buffer, and may leave 0 bytes at the start.
1991 * 1992 *
1992 * "buflen" should be positive. 1993 * "buflen" should be positive.
1993 */ 1994 */
@@ -2313,9 +2314,6 @@ static void __init dcache_init(void)
2313/* SLAB cache for __getname() consumers */ 2314/* SLAB cache for __getname() consumers */
2314struct kmem_cache *names_cachep __read_mostly; 2315struct kmem_cache *names_cachep __read_mostly;
2315 2316
2316/* SLAB cache for file structures */
2317struct kmem_cache *filp_cachep __read_mostly;
2318
2319EXPORT_SYMBOL(d_genocide); 2317EXPORT_SYMBOL(d_genocide);
2320 2318
2321void __init vfs_caches_init_early(void) 2319void __init vfs_caches_init_early(void)
@@ -2337,9 +2335,6 @@ void __init vfs_caches_init(unsigned long mempages)
2337 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 2335 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
2338 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 2336 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2339 2337
2340 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
2341 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2342
2343 dcache_init(); 2338 dcache_init();
2344 inode_init(); 2339 inode_init();
2345 files_init(mempages); 2340 files_init(mempages);
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 855d4b1d619..180e9fec4ad 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -93,10 +93,15 @@ static struct dcookie_struct *alloc_dcookie(struct path *path)
93{ 93{
94 struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache, 94 struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache,
95 GFP_KERNEL); 95 GFP_KERNEL);
96 struct dentry *d;
96 if (!dcs) 97 if (!dcs)
97 return NULL; 98 return NULL;
98 99
99 path->dentry->d_cookie = dcs; 100 d = path->dentry;
101 spin_lock(&d->d_lock);
102 d->d_flags |= DCACHE_COOKIE;
103 spin_unlock(&d->d_lock);
104
100 dcs->path = *path; 105 dcs->path = *path;
101 path_get(path); 106 path_get(path);
102 hash_dcookie(dcs); 107 hash_dcookie(dcs);
@@ -119,14 +124,14 @@ int get_dcookie(struct path *path, unsigned long *cookie)
119 goto out; 124 goto out;
120 } 125 }
121 126
122 dcs = path->dentry->d_cookie; 127 if (path->dentry->d_flags & DCACHE_COOKIE) {
123 128 dcs = find_dcookie((unsigned long)path->dentry);
124 if (!dcs) 129 } else {
125 dcs = alloc_dcookie(path); 130 dcs = alloc_dcookie(path);
126 131 if (!dcs) {
127 if (!dcs) { 132 err = -ENOMEM;
128 err = -ENOMEM; 133 goto out;
129 goto out; 134 }
130 } 135 }
131 136
132 *cookie = dcookie_value(dcs); 137 *cookie = dcookie_value(dcs);
@@ -251,7 +256,12 @@ out_kmem:
251 256
252static void free_dcookie(struct dcookie_struct * dcs) 257static void free_dcookie(struct dcookie_struct * dcs)
253{ 258{
254 dcs->path.dentry->d_cookie = NULL; 259 struct dentry *d = dcs->path.dentry;
260
261 spin_lock(&d->d_lock);
262 d->d_flags &= ~DCACHE_COOKIE;
263 spin_unlock(&d->d_lock);
264
255 path_put(&dcs->path); 265 path_put(&dcs->path);
256 kmem_cache_free(dcookie_cache, dcs); 266 kmem_cache_free(dcookie_cache, dcs);
257} 267}
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 4a714f6c1be..fff96e152c0 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -27,25 +27,32 @@
27#define DEVPTS_SUPER_MAGIC 0x1cd1 27#define DEVPTS_SUPER_MAGIC 0x1cd1
28 28
29#define DEVPTS_DEFAULT_MODE 0600 29#define DEVPTS_DEFAULT_MODE 0600
30/*
31 * ptmx is a new node in /dev/pts and will be unused in legacy (single-
32 * instance) mode. To prevent surprises in user space, set permissions of
33 * ptmx to 0. Use 'chmod' or remount with '-o ptmxmode' to set meaningful
34 * permissions.
35 */
36#define DEVPTS_DEFAULT_PTMX_MODE 0000
30#define PTMX_MINOR 2 37#define PTMX_MINOR 2
31 38
32extern int pty_limit; /* Config limit on Unix98 ptys */ 39extern int pty_limit; /* Config limit on Unix98 ptys */
33static DEFINE_IDA(allocated_ptys);
34static DEFINE_MUTEX(allocated_ptys_lock); 40static DEFINE_MUTEX(allocated_ptys_lock);
35 41
36static struct vfsmount *devpts_mnt; 42static struct vfsmount *devpts_mnt;
37static struct dentry *devpts_root;
38 43
39static struct { 44struct pts_mount_opts {
40 int setuid; 45 int setuid;
41 int setgid; 46 int setgid;
42 uid_t uid; 47 uid_t uid;
43 gid_t gid; 48 gid_t gid;
44 umode_t mode; 49 umode_t mode;
45} config = {.mode = DEVPTS_DEFAULT_MODE}; 50 umode_t ptmxmode;
51 int newinstance;
52};
46 53
47enum { 54enum {
48 Opt_uid, Opt_gid, Opt_mode, 55 Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance,
49 Opt_err 56 Opt_err
50}; 57};
51 58
@@ -53,18 +60,50 @@ static const match_table_t tokens = {
53 {Opt_uid, "uid=%u"}, 60 {Opt_uid, "uid=%u"},
54 {Opt_gid, "gid=%u"}, 61 {Opt_gid, "gid=%u"},
55 {Opt_mode, "mode=%o"}, 62 {Opt_mode, "mode=%o"},
63#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
64 {Opt_ptmxmode, "ptmxmode=%o"},
65 {Opt_newinstance, "newinstance"},
66#endif
56 {Opt_err, NULL} 67 {Opt_err, NULL}
57}; 68};
58 69
59static int devpts_remount(struct super_block *sb, int *flags, char *data) 70struct pts_fs_info {
71 struct ida allocated_ptys;
72 struct pts_mount_opts mount_opts;
73 struct dentry *ptmx_dentry;
74};
75
76static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
77{
78 return sb->s_fs_info;
79}
80
81static inline struct super_block *pts_sb_from_inode(struct inode *inode)
82{
83#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
84 if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
85 return inode->i_sb;
86#endif
87 return devpts_mnt->mnt_sb;
88}
89
90#define PARSE_MOUNT 0
91#define PARSE_REMOUNT 1
92
93static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
60{ 94{
61 char *p; 95 char *p;
62 96
63 config.setuid = 0; 97 opts->setuid = 0;
64 config.setgid = 0; 98 opts->setgid = 0;
65 config.uid = 0; 99 opts->uid = 0;
66 config.gid = 0; 100 opts->gid = 0;
67 config.mode = DEVPTS_DEFAULT_MODE; 101 opts->mode = DEVPTS_DEFAULT_MODE;
102 opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
103
104 /* newinstance makes sense only on initial mount */
105 if (op == PARSE_MOUNT)
106 opts->newinstance = 0;
68 107
69 while ((p = strsep(&data, ",")) != NULL) { 108 while ((p = strsep(&data, ",")) != NULL) {
70 substring_t args[MAX_OPT_ARGS]; 109 substring_t args[MAX_OPT_ARGS];
@@ -79,20 +118,32 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
79 case Opt_uid: 118 case Opt_uid:
80 if (match_int(&args[0], &option)) 119 if (match_int(&args[0], &option))
81 return -EINVAL; 120 return -EINVAL;
82 config.uid = option; 121 opts->uid = option;
83 config.setuid = 1; 122 opts->setuid = 1;
84 break; 123 break;
85 case Opt_gid: 124 case Opt_gid:
86 if (match_int(&args[0], &option)) 125 if (match_int(&args[0], &option))
87 return -EINVAL; 126 return -EINVAL;
88 config.gid = option; 127 opts->gid = option;
89 config.setgid = 1; 128 opts->setgid = 1;
90 break; 129 break;
91 case Opt_mode: 130 case Opt_mode:
92 if (match_octal(&args[0], &option)) 131 if (match_octal(&args[0], &option))
93 return -EINVAL; 132 return -EINVAL;
94 config.mode = option & S_IALLUGO; 133 opts->mode = option & S_IALLUGO;
134 break;
135#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
136 case Opt_ptmxmode:
137 if (match_octal(&args[0], &option))
138 return -EINVAL;
139 opts->ptmxmode = option & S_IALLUGO;
140 break;
141 case Opt_newinstance:
142 /* newinstance makes sense only on initial mount */
143 if (op == PARSE_MOUNT)
144 opts->newinstance = 1;
95 break; 145 break;
146#endif
96 default: 147 default:
97 printk(KERN_ERR "devpts: called with bogus options\n"); 148 printk(KERN_ERR "devpts: called with bogus options\n");
98 return -EINVAL; 149 return -EINVAL;
@@ -102,13 +153,108 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
102 return 0; 153 return 0;
103} 154}
104 155
156#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
157static int mknod_ptmx(struct super_block *sb)
158{
159 int mode;
160 int rc = -ENOMEM;
161 struct dentry *dentry;
162 struct inode *inode;
163 struct dentry *root = sb->s_root;
164 struct pts_fs_info *fsi = DEVPTS_SB(sb);
165 struct pts_mount_opts *opts = &fsi->mount_opts;
166
167 mutex_lock(&root->d_inode->i_mutex);
168
169 /* If we have already created ptmx node, return */
170 if (fsi->ptmx_dentry) {
171 rc = 0;
172 goto out;
173 }
174
175 dentry = d_alloc_name(root, "ptmx");
176 if (!dentry) {
177 printk(KERN_NOTICE "Unable to alloc dentry for ptmx node\n");
178 goto out;
179 }
180
181 /*
182 * Create a new 'ptmx' node in this mount of devpts.
183 */
184 inode = new_inode(sb);
185 if (!inode) {
186 printk(KERN_ERR "Unable to alloc inode for ptmx node\n");
187 dput(dentry);
188 goto out;
189 }
190
191 inode->i_ino = 2;
192 inode->i_uid = inode->i_gid = 0;
193 inode->i_blocks = 0;
194 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
195
196 mode = S_IFCHR|opts->ptmxmode;
197 init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
198
199 d_add(dentry, inode);
200
201 fsi->ptmx_dentry = dentry;
202 rc = 0;
203
204 printk(KERN_DEBUG "Created ptmx node in devpts ino %lu\n",
205 inode->i_ino);
206out:
207 mutex_unlock(&root->d_inode->i_mutex);
208 return rc;
209}
210
211static void update_ptmx_mode(struct pts_fs_info *fsi)
212{
213 struct inode *inode;
214 if (fsi->ptmx_dentry) {
215 inode = fsi->ptmx_dentry->d_inode;
216 inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
217 }
218}
219#else
220static inline void update_ptmx_mode(struct pts_fs_info *fsi)
221{
222 return;
223}
224#endif
225
226static int devpts_remount(struct super_block *sb, int *flags, char *data)
227{
228 int err;
229 struct pts_fs_info *fsi = DEVPTS_SB(sb);
230 struct pts_mount_opts *opts = &fsi->mount_opts;
231
232 err = parse_mount_options(data, PARSE_REMOUNT, opts);
233
234 /*
235 * parse_mount_options() restores options to default values
236 * before parsing and may have changed ptmxmode. So, update the
237 * mode in the inode too. Bogus options don't fail the remount,
238 * so do this even on error return.
239 */
240 update_ptmx_mode(fsi);
241
242 return err;
243}
244
105static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs) 245static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs)
106{ 246{
107 if (config.setuid) 247 struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb);
108 seq_printf(seq, ",uid=%u", config.uid); 248 struct pts_mount_opts *opts = &fsi->mount_opts;
109 if (config.setgid) 249
110 seq_printf(seq, ",gid=%u", config.gid); 250 if (opts->setuid)
111 seq_printf(seq, ",mode=%03o", config.mode); 251 seq_printf(seq, ",uid=%u", opts->uid);
252 if (opts->setgid)
253 seq_printf(seq, ",gid=%u", opts->gid);
254 seq_printf(seq, ",mode=%03o", opts->mode);
255#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
256 seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
257#endif
112 258
113 return 0; 259 return 0;
114} 260}
@@ -119,10 +265,25 @@ static const struct super_operations devpts_sops = {
119 .show_options = devpts_show_options, 265 .show_options = devpts_show_options,
120}; 266};
121 267
268static void *new_pts_fs_info(void)
269{
270 struct pts_fs_info *fsi;
271
272 fsi = kzalloc(sizeof(struct pts_fs_info), GFP_KERNEL);
273 if (!fsi)
274 return NULL;
275
276 ida_init(&fsi->allocated_ptys);
277 fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
278 fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
279
280 return fsi;
281}
282
122static int 283static int
123devpts_fill_super(struct super_block *s, void *data, int silent) 284devpts_fill_super(struct super_block *s, void *data, int silent)
124{ 285{
125 struct inode * inode; 286 struct inode *inode;
126 287
127 s->s_blocksize = 1024; 288 s->s_blocksize = 1024;
128 s->s_blocksize_bits = 10; 289 s->s_blocksize_bits = 10;
@@ -130,9 +291,13 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
130 s->s_op = &devpts_sops; 291 s->s_op = &devpts_sops;
131 s->s_time_gran = 1; 292 s->s_time_gran = 1;
132 293
294 s->s_fs_info = new_pts_fs_info();
295 if (!s->s_fs_info)
296 goto fail;
297
133 inode = new_inode(s); 298 inode = new_inode(s);
134 if (!inode) 299 if (!inode)
135 goto fail; 300 goto free_fsi;
136 inode->i_ino = 1; 301 inode->i_ino = 1;
137 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 302 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
138 inode->i_blocks = 0; 303 inode->i_blocks = 0;
@@ -142,27 +307,226 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
142 inode->i_fop = &simple_dir_operations; 307 inode->i_fop = &simple_dir_operations;
143 inode->i_nlink = 2; 308 inode->i_nlink = 2;
144 309
145 devpts_root = s->s_root = d_alloc_root(inode); 310 s->s_root = d_alloc_root(inode);
146 if (s->s_root) 311 if (s->s_root)
147 return 0; 312 return 0;
148 313
149 printk("devpts: get root dentry failed\n"); 314 printk(KERN_ERR "devpts: get root dentry failed\n");
150 iput(inode); 315 iput(inode);
316
317free_fsi:
318 kfree(s->s_fs_info);
151fail: 319fail:
152 return -ENOMEM; 320 return -ENOMEM;
153} 321}
154 322
323#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
324static int compare_init_pts_sb(struct super_block *s, void *p)
325{
326 if (devpts_mnt)
327 return devpts_mnt->mnt_sb == s;
328 return 0;
329}
330
331/*
332 * Safely parse the mount options in @data and update @opts.
333 *
334 * devpts ends up parsing options two times during mount, due to the
335 * two modes of operation it supports. The first parse occurs in
336 * devpts_get_sb() when determining the mode (single-instance or
337 * multi-instance mode). The second parse happens in devpts_remount()
338 * or new_pts_mount() depending on the mode.
339 *
340 * Parsing of options modifies the @data making subsequent parsing
341 * incorrect. So make a local copy of @data and parse it.
342 *
343 * Return: 0 On success, -errno on error
344 */
345static int safe_parse_mount_options(void *data, struct pts_mount_opts *opts)
346{
347 int rc;
348 void *datacp;
349
350 if (!data)
351 return 0;
352
353 /* Use kstrdup() ? */
354 datacp = kmalloc(PAGE_SIZE, GFP_KERNEL);
355 if (!datacp)
356 return -ENOMEM;
357
358 memcpy(datacp, data, PAGE_SIZE);
359 rc = parse_mount_options((char *)datacp, PARSE_MOUNT, opts);
360 kfree(datacp);
361
362 return rc;
363}
364
365/*
366 * Mount a new (private) instance of devpts. PTYs created in this
367 * instance are independent of the PTYs in other devpts instances.
368 */
369static int new_pts_mount(struct file_system_type *fs_type, int flags,
370 void *data, struct vfsmount *mnt)
371{
372 int err;
373 struct pts_fs_info *fsi;
374 struct pts_mount_opts *opts;
375
376 printk(KERN_NOTICE "devpts: newinstance mount\n");
377
378 err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt);
379 if (err)
380 return err;
381
382 fsi = DEVPTS_SB(mnt->mnt_sb);
383 opts = &fsi->mount_opts;
384
385 err = parse_mount_options(data, PARSE_MOUNT, opts);
386 if (err)
387 goto fail;
388
389 err = mknod_ptmx(mnt->mnt_sb);
390 if (err)
391 goto fail;
392
393 return 0;
394
395fail:
396 dput(mnt->mnt_sb->s_root);
397 deactivate_super(mnt->mnt_sb);
398 return err;
399}
400
401/*
402 * Check if 'newinstance' mount option was specified in @data.
403 *
404 * Return: -errno on error (eg: invalid mount options specified)
405 * : 1 if 'newinstance' mount option was specified
406 * : 0 if 'newinstance' mount option was NOT specified
407 */
408static int is_new_instance_mount(void *data)
409{
410 int rc;
411 struct pts_mount_opts opts;
412
413 if (!data)
414 return 0;
415
416 rc = safe_parse_mount_options(data, &opts);
417 if (!rc)
418 rc = opts.newinstance;
419
420 return rc;
421}
422
423/*
424 * get_init_pts_sb()
425 *
426 * This interface is needed to support multiple namespace semantics in
427 * devpts while preserving backward compatibility of the current 'single-
428 * namespace' semantics. i.e all mounts of devpts without the 'newinstance'
429 * mount option should bind to the initial kernel mount, like
430 * get_sb_single().
431 *
432 * Mounts with 'newinstance' option create a new private namespace.
433 *
434 * But for single-mount semantics, devpts cannot use get_sb_single(),
435 * because get_sb_single()/sget() find and use the super-block from
436 * the most recent mount of devpts. But that recent mount may be a
437 * 'newinstance' mount and get_sb_single() would pick the newinstance
438 * super-block instead of the initial super-block.
439 *
440 * This interface is identical to get_sb_single() except that it
441 * consistently selects the 'single-namespace' superblock even in the
442 * presence of the private namespace (i.e 'newinstance') super-blocks.
443 */
444static int get_init_pts_sb(struct file_system_type *fs_type, int flags,
445 void *data, struct vfsmount *mnt)
446{
447 struct super_block *s;
448 int error;
449
450 s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
451 if (IS_ERR(s))
452 return PTR_ERR(s);
453
454 if (!s->s_root) {
455 s->s_flags = flags;
456 error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
457 if (error) {
458 up_write(&s->s_umount);
459 deactivate_super(s);
460 return error;
461 }
462 s->s_flags |= MS_ACTIVE;
463 }
464 do_remount_sb(s, flags, data, 0);
465 return simple_set_mnt(mnt, s);
466}
467
468/*
469 * Mount or remount the initial kernel mount of devpts. This type of
470 * mount maintains the legacy, single-instance semantics, while the
471 * kernel still allows multiple-instances.
472 */
473static int init_pts_mount(struct file_system_type *fs_type, int flags,
474 void *data, struct vfsmount *mnt)
475{
476 int err;
477
478 err = get_init_pts_sb(fs_type, flags, data, mnt);
479 if (err)
480 return err;
481
482 err = mknod_ptmx(mnt->mnt_sb);
483 if (err) {
484 dput(mnt->mnt_sb->s_root);
485 deactivate_super(mnt->mnt_sb);
486 }
487
488 return err;
489}
490
155static int devpts_get_sb(struct file_system_type *fs_type, 491static int devpts_get_sb(struct file_system_type *fs_type,
156 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 492 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
157{ 493{
494 int new;
495
496 new = is_new_instance_mount(data);
497 if (new < 0)
498 return new;
499
500 if (new)
501 return new_pts_mount(fs_type, flags, data, mnt);
502
503 return init_pts_mount(fs_type, flags, data, mnt);
504}
505#else
506/*
507 * This supports only the legacy single-instance semantics (no
508 * multiple-instance semantics)
509 */
510static int devpts_get_sb(struct file_system_type *fs_type, int flags,
511 const char *dev_name, void *data, struct vfsmount *mnt)
512{
158 return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt); 513 return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
159} 514}
515#endif
516
517static void devpts_kill_sb(struct super_block *sb)
518{
519 struct pts_fs_info *fsi = DEVPTS_SB(sb);
520
521 kfree(fsi);
522 kill_litter_super(sb);
523}
160 524
161static struct file_system_type devpts_fs_type = { 525static struct file_system_type devpts_fs_type = {
162 .owner = THIS_MODULE, 526 .owner = THIS_MODULE,
163 .name = "devpts", 527 .name = "devpts",
164 .get_sb = devpts_get_sb, 528 .get_sb = devpts_get_sb,
165 .kill_sb = kill_anon_super, 529 .kill_sb = devpts_kill_sb,
166}; 530};
167 531
168/* 532/*
@@ -172,16 +536,17 @@ static struct file_system_type devpts_fs_type = {
172 536
173int devpts_new_index(struct inode *ptmx_inode) 537int devpts_new_index(struct inode *ptmx_inode)
174{ 538{
539 struct super_block *sb = pts_sb_from_inode(ptmx_inode);
540 struct pts_fs_info *fsi = DEVPTS_SB(sb);
175 int index; 541 int index;
176 int ida_ret; 542 int ida_ret;
177 543
178retry: 544retry:
179 if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) { 545 if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
180 return -ENOMEM; 546 return -ENOMEM;
181 }
182 547
183 mutex_lock(&allocated_ptys_lock); 548 mutex_lock(&allocated_ptys_lock);
184 ida_ret = ida_get_new(&allocated_ptys, &index); 549 ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
185 if (ida_ret < 0) { 550 if (ida_ret < 0) {
186 mutex_unlock(&allocated_ptys_lock); 551 mutex_unlock(&allocated_ptys_lock);
187 if (ida_ret == -EAGAIN) 552 if (ida_ret == -EAGAIN)
@@ -190,7 +555,7 @@ retry:
190 } 555 }
191 556
192 if (index >= pty_limit) { 557 if (index >= pty_limit) {
193 ida_remove(&allocated_ptys, index); 558 ida_remove(&fsi->allocated_ptys, index);
194 mutex_unlock(&allocated_ptys_lock); 559 mutex_unlock(&allocated_ptys_lock);
195 return -EIO; 560 return -EIO;
196 } 561 }
@@ -200,18 +565,26 @@ retry:
200 565
201void devpts_kill_index(struct inode *ptmx_inode, int idx) 566void devpts_kill_index(struct inode *ptmx_inode, int idx)
202{ 567{
568 struct super_block *sb = pts_sb_from_inode(ptmx_inode);
569 struct pts_fs_info *fsi = DEVPTS_SB(sb);
570
203 mutex_lock(&allocated_ptys_lock); 571 mutex_lock(&allocated_ptys_lock);
204 ida_remove(&allocated_ptys, idx); 572 ida_remove(&fsi->allocated_ptys, idx);
205 mutex_unlock(&allocated_ptys_lock); 573 mutex_unlock(&allocated_ptys_lock);
206} 574}
207 575
208int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) 576int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
209{ 577{
210 int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ 578 /* tty layer puts index from devpts_new_index() in here */
579 int number = tty->index;
211 struct tty_driver *driver = tty->driver; 580 struct tty_driver *driver = tty->driver;
212 dev_t device = MKDEV(driver->major, driver->minor_start+number); 581 dev_t device = MKDEV(driver->major, driver->minor_start+number);
213 struct dentry *dentry; 582 struct dentry *dentry;
214 struct inode *inode = new_inode(devpts_mnt->mnt_sb); 583 struct super_block *sb = pts_sb_from_inode(ptmx_inode);
584 struct inode *inode = new_inode(sb);
585 struct dentry *root = sb->s_root;
586 struct pts_fs_info *fsi = DEVPTS_SB(sb);
587 struct pts_mount_opts *opts = &fsi->mount_opts;
215 char s[12]; 588 char s[12];
216 589
217 /* We're supposed to be given the slave end of a pty */ 590 /* We're supposed to be given the slave end of a pty */
@@ -221,25 +594,25 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
221 if (!inode) 594 if (!inode)
222 return -ENOMEM; 595 return -ENOMEM;
223 596
224 inode->i_ino = number+2; 597 inode->i_ino = number + 3;
225 inode->i_uid = config.setuid ? config.uid : current->fsuid; 598 inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
226 inode->i_gid = config.setgid ? config.gid : current->fsgid; 599 inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
227 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 600 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
228 init_special_inode(inode, S_IFCHR|config.mode, device); 601 init_special_inode(inode, S_IFCHR|opts->mode, device);
229 inode->i_private = tty; 602 inode->i_private = tty;
230 tty->driver_data = inode; 603 tty->driver_data = inode;
231 604
232 sprintf(s, "%d", number); 605 sprintf(s, "%d", number);
233 606
234 mutex_lock(&devpts_root->d_inode->i_mutex); 607 mutex_lock(&root->d_inode->i_mutex);
235 608
236 dentry = d_alloc_name(devpts_root, s); 609 dentry = d_alloc_name(root, s);
237 if (!IS_ERR(dentry)) { 610 if (!IS_ERR(dentry)) {
238 d_add(dentry, inode); 611 d_add(dentry, inode);
239 fsnotify_create(devpts_root->d_inode, dentry); 612 fsnotify_create(root->d_inode, dentry);
240 } 613 }
241 614
242 mutex_unlock(&devpts_root->d_inode->i_mutex); 615 mutex_unlock(&root->d_inode->i_mutex);
243 616
244 return 0; 617 return 0;
245} 618}
@@ -256,20 +629,27 @@ struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
256void devpts_pty_kill(struct tty_struct *tty) 629void devpts_pty_kill(struct tty_struct *tty)
257{ 630{
258 struct inode *inode = tty->driver_data; 631 struct inode *inode = tty->driver_data;
632 struct super_block *sb = pts_sb_from_inode(inode);
633 struct dentry *root = sb->s_root;
259 struct dentry *dentry; 634 struct dentry *dentry;
260 635
261 BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); 636 BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
262 637
263 mutex_lock(&devpts_root->d_inode->i_mutex); 638 mutex_lock(&root->d_inode->i_mutex);
264 639
265 dentry = d_find_alias(inode); 640 dentry = d_find_alias(inode);
266 if (dentry && !IS_ERR(dentry)) { 641 if (IS_ERR(dentry))
642 goto out;
643
644 if (dentry) {
267 inode->i_nlink--; 645 inode->i_nlink--;
268 d_delete(dentry); 646 d_delete(dentry);
269 dput(dentry); 647 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
270 } 648 }
271 649
272 mutex_unlock(&devpts_root->d_inode->i_mutex); 650 dput(dentry); /* d_find_alias above */
651out:
652 mutex_unlock(&root->d_inode->i_mutex);
273} 653}
274 654
275static int __init init_devpts_fs(void) 655static int __init init_devpts_fs(void)
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 18bda83cc89..aa2a5775a02 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -127,8 +127,8 @@ static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
127 127
128void dlm_timeout_warn(struct dlm_lkb *lkb) 128void dlm_timeout_warn(struct dlm_lkb *lkb)
129{ 129{
130 struct sk_buff *uninitialized_var(send_skb);
130 struct dlm_lock_data *data; 131 struct dlm_lock_data *data;
131 struct sk_buff *send_skb;
132 size_t size; 132 size_t size;
133 int rv; 133 int rv;
134 134
diff --git a/fs/dquot.c b/fs/dquot.c
index 5e95261005b..c237ccc8581 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -874,7 +874,7 @@ static inline int need_print_warning(struct dquot *dquot)
874 874
875 switch (dquot->dq_type) { 875 switch (dquot->dq_type) {
876 case USRQUOTA: 876 case USRQUOTA:
877 return current->fsuid == dquot->dq_id; 877 return current_fsuid() == dquot->dq_id;
878 case GRPQUOTA: 878 case GRPQUOTA:
879 return in_group_p(dquot->dq_id); 879 return in_group_p(dquot->dq_id);
880 } 880 }
@@ -981,7 +981,7 @@ static void send_warning(const struct dquot *dquot, const char warntype)
981 MINOR(dquot->dq_sb->s_dev)); 981 MINOR(dquot->dq_sb->s_dev));
982 if (ret) 982 if (ret)
983 goto attr_err_out; 983 goto attr_err_out;
984 ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid); 984 ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
985 if (ret) 985 if (ret)
986 goto attr_err_out; 986 goto attr_err_out;
987 genlmsg_end(skb, msg_head); 987 genlmsg_end(skb, msg_head);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 3504cf9df35..a75026d35d1 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -691,7 +691,8 @@ int ecryptfs_init_kthread(void);
691void ecryptfs_destroy_kthread(void); 691void ecryptfs_destroy_kthread(void);
692int ecryptfs_privileged_open(struct file **lower_file, 692int ecryptfs_privileged_open(struct file **lower_file,
693 struct dentry *lower_dentry, 693 struct dentry *lower_dentry,
694 struct vfsmount *lower_mnt); 694 struct vfsmount *lower_mnt,
695 const struct cred *cred);
695int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry); 696int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
696 697
697#endif /* #ifndef ECRYPTFS_KERNEL_H */ 698#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 89209f00f9c..5e78fc17988 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -673,10 +673,11 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
673 ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " 673 ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
674 "dentry->d_name.name = [%s]\n", dentry->d_name.name); 674 "dentry->d_name.name = [%s]\n", dentry->d_name.name);
675 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); 675 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
676 buf[rc] = '\0';
677 set_fs(old_fs); 676 set_fs(old_fs);
678 if (rc < 0) 677 if (rc < 0)
679 goto out_free; 678 goto out_free;
679 else
680 buf[rc] = '\0';
680 rc = 0; 681 rc = 0;
681 nd_set_link(nd, buf); 682 nd_set_link(nd, buf);
682 goto out; 683 goto out;
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index c440c6b58b2..c6d7a4d748a 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -73,7 +73,7 @@ static int ecryptfs_threadfn(void *ignored)
73 mntget(req->lower_mnt); 73 mntget(req->lower_mnt);
74 (*req->lower_file) = dentry_open( 74 (*req->lower_file) = dentry_open(
75 req->lower_dentry, req->lower_mnt, 75 req->lower_dentry, req->lower_mnt,
76 (O_RDWR | O_LARGEFILE)); 76 (O_RDWR | O_LARGEFILE), current_cred());
77 req->flags |= ECRYPTFS_REQ_PROCESSED; 77 req->flags |= ECRYPTFS_REQ_PROCESSED;
78 } 78 }
79 wake_up(&req->wait); 79 wake_up(&req->wait);
@@ -132,7 +132,8 @@ void ecryptfs_destroy_kthread(void)
132 */ 132 */
133int ecryptfs_privileged_open(struct file **lower_file, 133int ecryptfs_privileged_open(struct file **lower_file,
134 struct dentry *lower_dentry, 134 struct dentry *lower_dentry,
135 struct vfsmount *lower_mnt) 135 struct vfsmount *lower_mnt,
136 const struct cred *cred)
136{ 137{
137 struct ecryptfs_open_req *req; 138 struct ecryptfs_open_req *req;
138 int rc = 0; 139 int rc = 0;
@@ -143,7 +144,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
143 dget(lower_dentry); 144 dget(lower_dentry);
144 mntget(lower_mnt); 145 mntget(lower_mnt);
145 (*lower_file) = dentry_open(lower_dentry, lower_mnt, 146 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
146 (O_RDWR | O_LARGEFILE)); 147 (O_RDWR | O_LARGEFILE), cred);
147 if (!IS_ERR(*lower_file)) 148 if (!IS_ERR(*lower_file))
148 goto out; 149 goto out;
149 req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL); 150 req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
@@ -184,7 +185,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
184 dget(lower_dentry); 185 dget(lower_dentry);
185 mntget(lower_mnt); 186 mntget(lower_mnt);
186 (*lower_file) = dentry_open(lower_dentry, lower_mnt, 187 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
187 (O_RDONLY | O_LARGEFILE)); 188 (O_RDONLY | O_LARGEFILE), cred);
188 if (IS_ERR(*lower_file)) { 189 if (IS_ERR(*lower_file)) {
189 rc = PTR_ERR(*req->lower_file); 190 rc = PTR_ERR(*req->lower_file);
190 (*lower_file) = NULL; 191 (*lower_file) = NULL;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 64d2ba980df..fd630713c5c 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -115,6 +115,7 @@ void __ecryptfs_printk(const char *fmt, ...)
115 */ 115 */
116int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) 116int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
117{ 117{
118 const struct cred *cred = current_cred();
118 struct ecryptfs_inode_info *inode_info = 119 struct ecryptfs_inode_info *inode_info =
119 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 120 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
120 int rc = 0; 121 int rc = 0;
@@ -127,7 +128,7 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
127 128
128 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 129 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
129 rc = ecryptfs_privileged_open(&inode_info->lower_file, 130 rc = ecryptfs_privileged_open(&inode_info->lower_file,
130 lower_dentry, lower_mnt); 131 lower_dentry, lower_mnt, cred);
131 if (rc || IS_ERR(inode_info->lower_file)) { 132 if (rc || IS_ERR(inode_info->lower_file)) {
132 printk(KERN_ERR "Error opening lower persistent file " 133 printk(KERN_ERR "Error opening lower persistent file "
133 "for lower_dentry [0x%p] and lower_mnt [0x%p]; " 134 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index c6983978a31..6913f727624 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -360,7 +360,8 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
360 struct ecryptfs_msg_ctx *msg_ctx; 360 struct ecryptfs_msg_ctx *msg_ctx;
361 size_t msg_size; 361 size_t msg_size;
362 struct nsproxy *nsproxy; 362 struct nsproxy *nsproxy;
363 struct user_namespace *current_user_ns; 363 struct user_namespace *tsk_user_ns;
364 uid_t ctx_euid;
364 int rc; 365 int rc;
365 366
366 if (msg->index >= ecryptfs_message_buf_len) { 367 if (msg->index >= ecryptfs_message_buf_len) {
@@ -384,9 +385,9 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
384 mutex_unlock(&ecryptfs_daemon_hash_mux); 385 mutex_unlock(&ecryptfs_daemon_hash_mux);
385 goto wake_up; 386 goto wake_up;
386 } 387 }
387 current_user_ns = nsproxy->user_ns; 388 tsk_user_ns = __task_cred(msg_ctx->task)->user->user_ns;
388 rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid, 389 ctx_euid = task_euid(msg_ctx->task);
389 current_user_ns); 390 rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns);
390 rcu_read_unlock(); 391 rcu_read_unlock();
391 mutex_unlock(&ecryptfs_daemon_hash_mux); 392 mutex_unlock(&ecryptfs_daemon_hash_mux);
392 if (rc) { 393 if (rc) {
@@ -394,28 +395,28 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
394 printk(KERN_WARNING "%s: User [%d] received a " 395 printk(KERN_WARNING "%s: User [%d] received a "
395 "message response from process [0x%p] but does " 396 "message response from process [0x%p] but does "
396 "not have a registered daemon\n", __func__, 397 "not have a registered daemon\n", __func__,
397 msg_ctx->task->euid, pid); 398 ctx_euid, pid);
398 goto wake_up; 399 goto wake_up;
399 } 400 }
400 if (msg_ctx->task->euid != euid) { 401 if (ctx_euid != euid) {
401 rc = -EBADMSG; 402 rc = -EBADMSG;
402 printk(KERN_WARNING "%s: Received message from user " 403 printk(KERN_WARNING "%s: Received message from user "
403 "[%d]; expected message from user [%d]\n", __func__, 404 "[%d]; expected message from user [%d]\n", __func__,
404 euid, msg_ctx->task->euid); 405 euid, ctx_euid);
405 goto unlock; 406 goto unlock;
406 } 407 }
407 if (current_user_ns != user_ns) { 408 if (tsk_user_ns != user_ns) {
408 rc = -EBADMSG; 409 rc = -EBADMSG;
409 printk(KERN_WARNING "%s: Received message from user_ns " 410 printk(KERN_WARNING "%s: Received message from user_ns "
410 "[0x%p]; expected message from user_ns [0x%p]\n", 411 "[0x%p]; expected message from user_ns [0x%p]\n",
411 __func__, user_ns, nsproxy->user_ns); 412 __func__, user_ns, tsk_user_ns);
412 goto unlock; 413 goto unlock;
413 } 414 }
414 if (daemon->pid != pid) { 415 if (daemon->pid != pid) {
415 rc = -EBADMSG; 416 rc = -EBADMSG;
416 printk(KERN_ERR "%s: User [%d] sent a message response " 417 printk(KERN_ERR "%s: User [%d] sent a message response "
417 "from an unrecognized process [0x%p]\n", 418 "from an unrecognized process [0x%p]\n",
418 __func__, msg_ctx->task->euid, pid); 419 __func__, ctx_euid, pid);
419 goto unlock; 420 goto unlock;
420 } 421 }
421 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) { 422 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) {
@@ -464,14 +465,14 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
464 struct ecryptfs_msg_ctx **msg_ctx) 465 struct ecryptfs_msg_ctx **msg_ctx)
465{ 466{
466 struct ecryptfs_daemon *daemon; 467 struct ecryptfs_daemon *daemon;
468 uid_t euid = current_euid();
467 int rc; 469 int rc;
468 470
469 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, 471 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
470 current->nsproxy->user_ns);
471 if (rc || !daemon) { 472 if (rc || !daemon) {
472 rc = -ENOTCONN; 473 rc = -ENOTCONN;
473 printk(KERN_ERR "%s: User [%d] does not have a daemon " 474 printk(KERN_ERR "%s: User [%d] does not have a daemon "
474 "registered\n", __func__, current->euid); 475 "registered\n", __func__, euid);
475 goto out; 476 goto out;
476 } 477 }
477 mutex_lock(&ecryptfs_msg_ctx_lists_mux); 478 mutex_lock(&ecryptfs_msg_ctx_lists_mux);
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index b484792a099..efd95a0ed1e 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -42,12 +42,12 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
42{ 42{
43 struct ecryptfs_daemon *daemon; 43 struct ecryptfs_daemon *daemon;
44 unsigned int mask = 0; 44 unsigned int mask = 0;
45 uid_t euid = current_euid();
45 int rc; 46 int rc;
46 47
47 mutex_lock(&ecryptfs_daemon_hash_mux); 48 mutex_lock(&ecryptfs_daemon_hash_mux);
48 /* TODO: Just use file->private_data? */ 49 /* TODO: Just use file->private_data? */
49 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, 50 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
50 current->nsproxy->user_ns);
51 BUG_ON(rc || !daemon); 51 BUG_ON(rc || !daemon);
52 mutex_lock(&daemon->mux); 52 mutex_lock(&daemon->mux);
53 mutex_unlock(&ecryptfs_daemon_hash_mux); 53 mutex_unlock(&ecryptfs_daemon_hash_mux);
@@ -83,6 +83,7 @@ static int
83ecryptfs_miscdev_open(struct inode *inode, struct file *file) 83ecryptfs_miscdev_open(struct inode *inode, struct file *file)
84{ 84{
85 struct ecryptfs_daemon *daemon = NULL; 85 struct ecryptfs_daemon *daemon = NULL;
86 uid_t euid = current_euid();
86 int rc; 87 int rc;
87 88
88 mutex_lock(&ecryptfs_daemon_hash_mux); 89 mutex_lock(&ecryptfs_daemon_hash_mux);
@@ -93,11 +94,9 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
93 "count; rc = [%d]\n", __func__, rc); 94 "count; rc = [%d]\n", __func__, rc);
94 goto out_unlock_daemon_list; 95 goto out_unlock_daemon_list;
95 } 96 }
96 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, 97 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
97 current->nsproxy->user_ns);
98 if (rc || !daemon) { 98 if (rc || !daemon) {
99 rc = ecryptfs_spawn_daemon(&daemon, current->euid, 99 rc = ecryptfs_spawn_daemon(&daemon, euid, current_user_ns(),
100 current->nsproxy->user_ns,
101 task_pid(current)); 100 task_pid(current));
102 if (rc) { 101 if (rc) {
103 printk(KERN_ERR "%s: Error attempting to spawn daemon; " 102 printk(KERN_ERR "%s: Error attempting to spawn daemon; "
@@ -147,11 +146,11 @@ static int
147ecryptfs_miscdev_release(struct inode *inode, struct file *file) 146ecryptfs_miscdev_release(struct inode *inode, struct file *file)
148{ 147{
149 struct ecryptfs_daemon *daemon = NULL; 148 struct ecryptfs_daemon *daemon = NULL;
149 uid_t euid = current_euid();
150 int rc; 150 int rc;
151 151
152 mutex_lock(&ecryptfs_daemon_hash_mux); 152 mutex_lock(&ecryptfs_daemon_hash_mux);
153 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, 153 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
154 current->nsproxy->user_ns);
155 BUG_ON(rc || !daemon); 154 BUG_ON(rc || !daemon);
156 mutex_lock(&daemon->mux); 155 mutex_lock(&daemon->mux);
157 BUG_ON(daemon->pid != task_pid(current)); 156 BUG_ON(daemon->pid != task_pid(current));
@@ -246,12 +245,12 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
246 char packet_length[3]; 245 char packet_length[3];
247 size_t i; 246 size_t i;
248 size_t total_length; 247 size_t total_length;
248 uid_t euid = current_euid();
249 int rc; 249 int rc;
250 250
251 mutex_lock(&ecryptfs_daemon_hash_mux); 251 mutex_lock(&ecryptfs_daemon_hash_mux);
252 /* TODO: Just use file->private_data? */ 252 /* TODO: Just use file->private_data? */
253 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, 253 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
254 current->nsproxy->user_ns);
255 BUG_ON(rc || !daemon); 254 BUG_ON(rc || !daemon);
256 mutex_lock(&daemon->mux); 255 mutex_lock(&daemon->mux);
257 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { 256 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
@@ -290,8 +289,8 @@ check_list:
290 * message from the queue; try again */ 289 * message from the queue; try again */
291 goto check_list; 290 goto check_list;
292 } 291 }
293 BUG_ON(current->euid != daemon->euid); 292 BUG_ON(euid != daemon->euid);
294 BUG_ON(current->nsproxy->user_ns != daemon->user_ns); 293 BUG_ON(current_user_ns() != daemon->user_ns);
295 BUG_ON(task_pid(current) != daemon->pid); 294 BUG_ON(task_pid(current) != daemon->pid);
296 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, 295 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
297 struct ecryptfs_msg_ctx, daemon_out_list); 296 struct ecryptfs_msg_ctx, daemon_out_list);
@@ -414,6 +413,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
414 size_t packet_size, packet_size_length, i; 413 size_t packet_size, packet_size_length, i;
415 ssize_t sz = 0; 414 ssize_t sz = 0;
416 char *data; 415 char *data;
416 uid_t euid = current_euid();
417 int rc; 417 int rc;
418 418
419 if (count == 0) 419 if (count == 0)
@@ -463,8 +463,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
463 goto out_free; 463 goto out_free;
464 } 464 }
465 rc = ecryptfs_miscdev_response(&data[i], packet_size, 465 rc = ecryptfs_miscdev_response(&data[i], packet_size,
466 current->euid, 466 euid, current_user_ns(),
467 current->nsproxy->user_ns,
468 task_pid(current), seq); 467 task_pid(current), seq);
469 if (rc) 468 if (rc)
470 printk(KERN_WARNING "%s: Failed to deliver miscdev " 469 printk(KERN_WARNING "%s: Failed to deliver miscdev "
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 04d7b3fa1ac..46cec2b6979 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -288,7 +288,7 @@ static int ecryptfs_write_begin(struct file *file,
288 loff_t prev_page_end_size; 288 loff_t prev_page_end_size;
289 int rc = 0; 289 int rc = 0;
290 290
291 page = __grab_cache_page(mapping, index); 291 page = grab_cache_page_write_begin(mapping, index, flags);
292 if (!page) 292 if (!page)
293 return -ENOMEM; 293 return -ENOMEM;
294 *pagep = page; 294 *pagep = page;
diff --git a/fs/exec.c b/fs/exec.c
index 4e834f16d9d..3ef9cf9b187 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,11 +55,7 @@
55#include <asm/uaccess.h> 55#include <asm/uaccess.h>
56#include <asm/mmu_context.h> 56#include <asm/mmu_context.h>
57#include <asm/tlb.h> 57#include <asm/tlb.h>
58 58#include "internal.h"
59#ifdef __alpha__
60/* for /sbin/loader handling in search_binary_handler() */
61#include <linux/a.out.h>
62#endif
63 59
64int core_uses_pid; 60int core_uses_pid;
65char core_pattern[CORENAME_MAX_SIZE] = "core"; 61char core_pattern[CORENAME_MAX_SIZE] = "core";
@@ -126,7 +122,8 @@ asmlinkage long sys_uselib(const char __user * library)
126 if (nd.path.mnt->mnt_flags & MNT_NOEXEC) 122 if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
127 goto exit; 123 goto exit;
128 124
129 error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN); 125 error = inode_permission(nd.path.dentry->d_inode,
126 MAY_READ | MAY_EXEC | MAY_OPEN);
130 if (error) 127 if (error)
131 goto exit; 128 goto exit;
132 129
@@ -679,7 +676,7 @@ struct file *open_exec(const char *name)
679 if (nd.path.mnt->mnt_flags & MNT_NOEXEC) 676 if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
680 goto out_path_put; 677 goto out_path_put;
681 678
682 err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN); 679 err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
683 if (err) 680 if (err)
684 goto out_path_put; 681 goto out_path_put;
685 682
@@ -772,7 +769,6 @@ static int de_thread(struct task_struct *tsk)
772 struct signal_struct *sig = tsk->signal; 769 struct signal_struct *sig = tsk->signal;
773 struct sighand_struct *oldsighand = tsk->sighand; 770 struct sighand_struct *oldsighand = tsk->sighand;
774 spinlock_t *lock = &oldsighand->siglock; 771 spinlock_t *lock = &oldsighand->siglock;
775 struct task_struct *leader = NULL;
776 int count; 772 int count;
777 773
778 if (thread_group_empty(tsk)) 774 if (thread_group_empty(tsk))
@@ -810,7 +806,7 @@ static int de_thread(struct task_struct *tsk)
810 * and to assume its PID: 806 * and to assume its PID:
811 */ 807 */
812 if (!thread_group_leader(tsk)) { 808 if (!thread_group_leader(tsk)) {
813 leader = tsk->group_leader; 809 struct task_struct *leader = tsk->group_leader;
814 810
815 sig->notify_count = -1; /* for exit_notify() */ 811 sig->notify_count = -1; /* for exit_notify() */
816 for (;;) { 812 for (;;) {
@@ -862,8 +858,9 @@ static int de_thread(struct task_struct *tsk)
862 858
863 BUG_ON(leader->exit_state != EXIT_ZOMBIE); 859 BUG_ON(leader->exit_state != EXIT_ZOMBIE);
864 leader->exit_state = EXIT_DEAD; 860 leader->exit_state = EXIT_DEAD;
865
866 write_unlock_irq(&tasklist_lock); 861 write_unlock_irq(&tasklist_lock);
862
863 release_task(leader);
867 } 864 }
868 865
869 sig->group_exit_task = NULL; 866 sig->group_exit_task = NULL;
@@ -872,8 +869,6 @@ static int de_thread(struct task_struct *tsk)
872no_thread_group: 869no_thread_group:
873 exit_itimers(sig); 870 exit_itimers(sig);
874 flush_itimer_signals(); 871 flush_itimer_signals();
875 if (leader)
876 release_task(leader);
877 872
878 if (atomic_read(&oldsighand->count) != 1) { 873 if (atomic_read(&oldsighand->count) != 1) {
879 struct sighand_struct *newsighand; 874 struct sighand_struct *newsighand;
@@ -980,7 +975,7 @@ int flush_old_exec(struct linux_binprm * bprm)
980 /* This is the point of no return */ 975 /* This is the point of no return */
981 current->sas_ss_sp = current->sas_ss_size = 0; 976 current->sas_ss_sp = current->sas_ss_size = 0;
982 977
983 if (current->euid == current->uid && current->egid == current->gid) 978 if (current_euid() == current_uid() && current_egid() == current_gid())
984 set_dumpable(current->mm, 1); 979 set_dumpable(current->mm, 1);
985 else 980 else
986 set_dumpable(current->mm, suid_dumpable); 981 set_dumpable(current->mm, suid_dumpable);
@@ -1007,16 +1002,17 @@ int flush_old_exec(struct linux_binprm * bprm)
1007 */ 1002 */
1008 current->mm->task_size = TASK_SIZE; 1003 current->mm->task_size = TASK_SIZE;
1009 1004
1010 if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) { 1005 /* install the new credentials */
1011 suid_keys(current); 1006 if (bprm->cred->uid != current_euid() ||
1012 set_dumpable(current->mm, suid_dumpable); 1007 bprm->cred->gid != current_egid()) {
1013 current->pdeath_signal = 0; 1008 current->pdeath_signal = 0;
1014 } else if (file_permission(bprm->file, MAY_READ) || 1009 } else if (file_permission(bprm->file, MAY_READ) ||
1015 (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { 1010 bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) {
1016 suid_keys(current);
1017 set_dumpable(current->mm, suid_dumpable); 1011 set_dumpable(current->mm, suid_dumpable);
1018 } 1012 }
1019 1013
1014 current->personality &= ~bprm->per_clear;
1015
1020 /* An exec changes our domain. We are no longer part of the thread 1016 /* An exec changes our domain. We are no longer part of the thread
1021 group */ 1017 group */
1022 1018
@@ -1033,13 +1029,50 @@ out:
1033 1029
1034EXPORT_SYMBOL(flush_old_exec); 1030EXPORT_SYMBOL(flush_old_exec);
1035 1031
1032/*
1033 * install the new credentials for this executable
1034 */
1035void install_exec_creds(struct linux_binprm *bprm)
1036{
1037 security_bprm_committing_creds(bprm);
1038
1039 commit_creds(bprm->cred);
1040 bprm->cred = NULL;
1041
1042 /* cred_exec_mutex must be held at least to this point to prevent
1043 * ptrace_attach() from altering our determination of the task's
1044 * credentials; any time after this it may be unlocked */
1045
1046 security_bprm_committed_creds(bprm);
1047}
1048EXPORT_SYMBOL(install_exec_creds);
1049
1050/*
1051 * determine how safe it is to execute the proposed program
1052 * - the caller must hold current->cred_exec_mutex to protect against
1053 * PTRACE_ATTACH
1054 */
1055void check_unsafe_exec(struct linux_binprm *bprm)
1056{
1057 struct task_struct *p = current;
1058
1059 bprm->unsafe = tracehook_unsafe_exec(p);
1060
1061 if (atomic_read(&p->fs->count) > 1 ||
1062 atomic_read(&p->files->count) > 1 ||
1063 atomic_read(&p->sighand->count) > 1)
1064 bprm->unsafe |= LSM_UNSAFE_SHARE;
1065}
1066
1036/* 1067/*
1037 * Fill the binprm structure from the inode. 1068 * Fill the binprm structure from the inode.
1038 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes 1069 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
1070 *
1071 * This may be called multiple times for binary chains (scripts for example).
1039 */ 1072 */
1040int prepare_binprm(struct linux_binprm *bprm) 1073int prepare_binprm(struct linux_binprm *bprm)
1041{ 1074{
1042 int mode; 1075 umode_t mode;
1043 struct inode * inode = bprm->file->f_path.dentry->d_inode; 1076 struct inode * inode = bprm->file->f_path.dentry->d_inode;
1044 int retval; 1077 int retval;
1045 1078
@@ -1047,14 +1080,15 @@ int prepare_binprm(struct linux_binprm *bprm)
1047 if (bprm->file->f_op == NULL) 1080 if (bprm->file->f_op == NULL)
1048 return -EACCES; 1081 return -EACCES;
1049 1082
1050 bprm->e_uid = current->euid; 1083 /* clear any previous set[ug]id data from a previous binary */
1051 bprm->e_gid = current->egid; 1084 bprm->cred->euid = current_euid();
1085 bprm->cred->egid = current_egid();
1052 1086
1053 if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) { 1087 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
1054 /* Set-uid? */ 1088 /* Set-uid? */
1055 if (mode & S_ISUID) { 1089 if (mode & S_ISUID) {
1056 current->personality &= ~PER_CLEAR_ON_SETID; 1090 bprm->per_clear |= PER_CLEAR_ON_SETID;
1057 bprm->e_uid = inode->i_uid; 1091 bprm->cred->euid = inode->i_uid;
1058 } 1092 }
1059 1093
1060 /* Set-gid? */ 1094 /* Set-gid? */
@@ -1064,52 +1098,23 @@ int prepare_binprm(struct linux_binprm *bprm)
1064 * executable. 1098 * executable.
1065 */ 1099 */
1066 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { 1100 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1067 current->personality &= ~PER_CLEAR_ON_SETID; 1101 bprm->per_clear |= PER_CLEAR_ON_SETID;
1068 bprm->e_gid = inode->i_gid; 1102 bprm->cred->egid = inode->i_gid;
1069 } 1103 }
1070 } 1104 }
1071 1105
1072 /* fill in binprm security blob */ 1106 /* fill in binprm security blob */
1073 retval = security_bprm_set(bprm); 1107 retval = security_bprm_set_creds(bprm);
1074 if (retval) 1108 if (retval)
1075 return retval; 1109 return retval;
1110 bprm->cred_prepared = 1;
1076 1111
1077 memset(bprm->buf,0,BINPRM_BUF_SIZE); 1112 memset(bprm->buf, 0, BINPRM_BUF_SIZE);
1078 return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE); 1113 return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
1079} 1114}
1080 1115
1081EXPORT_SYMBOL(prepare_binprm); 1116EXPORT_SYMBOL(prepare_binprm);
1082 1117
1083static int unsafe_exec(struct task_struct *p)
1084{
1085 int unsafe = tracehook_unsafe_exec(p);
1086
1087 if (atomic_read(&p->fs->count) > 1 ||
1088 atomic_read(&p->files->count) > 1 ||
1089 atomic_read(&p->sighand->count) > 1)
1090 unsafe |= LSM_UNSAFE_SHARE;
1091
1092 return unsafe;
1093}
1094
1095void compute_creds(struct linux_binprm *bprm)
1096{
1097 int unsafe;
1098
1099 if (bprm->e_uid != current->uid) {
1100 suid_keys(current);
1101 current->pdeath_signal = 0;
1102 }
1103 exec_keys(current);
1104
1105 task_lock(current);
1106 unsafe = unsafe_exec(current);
1107 security_bprm_apply_creds(bprm, unsafe);
1108 task_unlock(current);
1109 security_bprm_post_apply_creds(bprm);
1110}
1111EXPORT_SYMBOL(compute_creds);
1112
1113/* 1118/*
1114 * Arguments are '\0' separated strings found at the location bprm->p 1119 * Arguments are '\0' separated strings found at the location bprm->p
1115 * points to; chop off the first by relocating brpm->p to right after 1120 * points to; chop off the first by relocating brpm->p to right after
@@ -1159,43 +1164,10 @@ EXPORT_SYMBOL(remove_arg_zero);
1159 */ 1164 */
1160int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) 1165int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1161{ 1166{
1167 unsigned int depth = bprm->recursion_depth;
1162 int try,retval; 1168 int try,retval;
1163 struct linux_binfmt *fmt; 1169 struct linux_binfmt *fmt;
1164#ifdef __alpha__
1165 /* handle /sbin/loader.. */
1166 {
1167 struct exec * eh = (struct exec *) bprm->buf;
1168
1169 if (!bprm->loader && eh->fh.f_magic == 0x183 &&
1170 (eh->fh.f_flags & 0x3000) == 0x3000)
1171 {
1172 struct file * file;
1173 unsigned long loader;
1174 1170
1175 allow_write_access(bprm->file);
1176 fput(bprm->file);
1177 bprm->file = NULL;
1178
1179 loader = bprm->vma->vm_end - sizeof(void *);
1180
1181 file = open_exec("/sbin/loader");
1182 retval = PTR_ERR(file);
1183 if (IS_ERR(file))
1184 return retval;
1185
1186 /* Remember if the application is TASO. */
1187 bprm->taso = eh->ah.entry < 0x100000000UL;
1188
1189 bprm->file = file;
1190 bprm->loader = loader;
1191 retval = prepare_binprm(bprm);
1192 if (retval<0)
1193 return retval;
1194 /* should call search_binary_handler recursively here,
1195 but it does not matter */
1196 }
1197 }
1198#endif
1199 retval = security_bprm_check(bprm); 1171 retval = security_bprm_check(bprm);
1200 if (retval) 1172 if (retval)
1201 return retval; 1173 return retval;
@@ -1219,8 +1191,15 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1219 continue; 1191 continue;
1220 read_unlock(&binfmt_lock); 1192 read_unlock(&binfmt_lock);
1221 retval = fn(bprm, regs); 1193 retval = fn(bprm, regs);
1194 /*
1195 * Restore the depth counter to its starting value
1196 * in this call, so we don't have to rely on every
1197 * load_binary function to restore it on return.
1198 */
1199 bprm->recursion_depth = depth;
1222 if (retval >= 0) { 1200 if (retval >= 0) {
1223 tracehook_report_exec(fmt, bprm, regs); 1201 if (depth == 0)
1202 tracehook_report_exec(fmt, bprm, regs);
1224 put_binfmt(fmt); 1203 put_binfmt(fmt);
1225 allow_write_access(bprm->file); 1204 allow_write_access(bprm->file);
1226 if (bprm->file) 1205 if (bprm->file)
@@ -1262,6 +1241,8 @@ EXPORT_SYMBOL(search_binary_handler);
1262void free_bprm(struct linux_binprm *bprm) 1241void free_bprm(struct linux_binprm *bprm)
1263{ 1242{
1264 free_arg_pages(bprm); 1243 free_arg_pages(bprm);
1244 if (bprm->cred)
1245 abort_creds(bprm->cred);
1265 kfree(bprm); 1246 kfree(bprm);
1266} 1247}
1267 1248
@@ -1287,10 +1268,20 @@ int do_execve(char * filename,
1287 if (!bprm) 1268 if (!bprm)
1288 goto out_files; 1269 goto out_files;
1289 1270
1271 retval = mutex_lock_interruptible(&current->cred_exec_mutex);
1272 if (retval < 0)
1273 goto out_free;
1274
1275 retval = -ENOMEM;
1276 bprm->cred = prepare_exec_creds();
1277 if (!bprm->cred)
1278 goto out_unlock;
1279 check_unsafe_exec(bprm);
1280
1290 file = open_exec(filename); 1281 file = open_exec(filename);
1291 retval = PTR_ERR(file); 1282 retval = PTR_ERR(file);
1292 if (IS_ERR(file)) 1283 if (IS_ERR(file))
1293 goto out_kfree; 1284 goto out_unlock;
1294 1285
1295 sched_exec(); 1286 sched_exec();
1296 1287
@@ -1304,14 +1295,10 @@ int do_execve(char * filename,
1304 1295
1305 bprm->argc = count(argv, MAX_ARG_STRINGS); 1296 bprm->argc = count(argv, MAX_ARG_STRINGS);
1306 if ((retval = bprm->argc) < 0) 1297 if ((retval = bprm->argc) < 0)
1307 goto out_mm; 1298 goto out;
1308 1299
1309 bprm->envc = count(envp, MAX_ARG_STRINGS); 1300 bprm->envc = count(envp, MAX_ARG_STRINGS);
1310 if ((retval = bprm->envc) < 0) 1301 if ((retval = bprm->envc) < 0)
1311 goto out_mm;
1312
1313 retval = security_bprm_alloc(bprm);
1314 if (retval)
1315 goto out; 1302 goto out;
1316 1303
1317 retval = prepare_binprm(bprm); 1304 retval = prepare_binprm(bprm);
@@ -1333,21 +1320,18 @@ int do_execve(char * filename,
1333 1320
1334 current->flags &= ~PF_KTHREAD; 1321 current->flags &= ~PF_KTHREAD;
1335 retval = search_binary_handler(bprm,regs); 1322 retval = search_binary_handler(bprm,regs);
1336 if (retval >= 0) { 1323 if (retval < 0)
1337 /* execve success */ 1324 goto out;
1338 security_bprm_free(bprm);
1339 acct_update_integrals(current);
1340 free_bprm(bprm);
1341 if (displaced)
1342 put_files_struct(displaced);
1343 return retval;
1344 }
1345 1325
1346out: 1326 /* execve succeeded */
1347 if (bprm->security) 1327 mutex_unlock(&current->cred_exec_mutex);
1348 security_bprm_free(bprm); 1328 acct_update_integrals(current);
1329 free_bprm(bprm);
1330 if (displaced)
1331 put_files_struct(displaced);
1332 return retval;
1349 1333
1350out_mm: 1334out:
1351 if (bprm->mm) 1335 if (bprm->mm)
1352 mmput (bprm->mm); 1336 mmput (bprm->mm);
1353 1337
@@ -1356,7 +1340,11 @@ out_file:
1356 allow_write_access(bprm->file); 1340 allow_write_access(bprm->file);
1357 fput(bprm->file); 1341 fput(bprm->file);
1358 } 1342 }
1359out_kfree: 1343
1344out_unlock:
1345 mutex_unlock(&current->cred_exec_mutex);
1346
1347out_free:
1360 free_bprm(bprm); 1348 free_bprm(bprm);
1361 1349
1362out_files: 1350out_files:
@@ -1388,6 +1376,7 @@ EXPORT_SYMBOL(set_binfmt);
1388 */ 1376 */
1389static int format_corename(char *corename, long signr) 1377static int format_corename(char *corename, long signr)
1390{ 1378{
1379 const struct cred *cred = current_cred();
1391 const char *pat_ptr = core_pattern; 1380 const char *pat_ptr = core_pattern;
1392 int ispipe = (*pat_ptr == '|'); 1381 int ispipe = (*pat_ptr == '|');
1393 char *out_ptr = corename; 1382 char *out_ptr = corename;
@@ -1424,7 +1413,7 @@ static int format_corename(char *corename, long signr)
1424 /* uid */ 1413 /* uid */
1425 case 'u': 1414 case 'u':
1426 rc = snprintf(out_ptr, out_end - out_ptr, 1415 rc = snprintf(out_ptr, out_end - out_ptr,
1427 "%d", current->uid); 1416 "%d", cred->uid);
1428 if (rc > out_end - out_ptr) 1417 if (rc > out_end - out_ptr)
1429 goto out; 1418 goto out;
1430 out_ptr += rc; 1419 out_ptr += rc;
@@ -1432,7 +1421,7 @@ static int format_corename(char *corename, long signr)
1432 /* gid */ 1421 /* gid */
1433 case 'g': 1422 case 'g':
1434 rc = snprintf(out_ptr, out_end - out_ptr, 1423 rc = snprintf(out_ptr, out_end - out_ptr,
1435 "%d", current->gid); 1424 "%d", cred->gid);
1436 if (rc > out_end - out_ptr) 1425 if (rc > out_end - out_ptr)
1437 goto out; 1426 goto out;
1438 out_ptr += rc; 1427 out_ptr += rc;
@@ -1708,8 +1697,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1708 struct linux_binfmt * binfmt; 1697 struct linux_binfmt * binfmt;
1709 struct inode * inode; 1698 struct inode * inode;
1710 struct file * file; 1699 struct file * file;
1700 const struct cred *old_cred;
1701 struct cred *cred;
1711 int retval = 0; 1702 int retval = 0;
1712 int fsuid = current->fsuid;
1713 int flag = 0; 1703 int flag = 0;
1714 int ispipe = 0; 1704 int ispipe = 0;
1715 unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; 1705 unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
@@ -1722,12 +1712,20 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1722 binfmt = current->binfmt; 1712 binfmt = current->binfmt;
1723 if (!binfmt || !binfmt->core_dump) 1713 if (!binfmt || !binfmt->core_dump)
1724 goto fail; 1714 goto fail;
1715
1716 cred = prepare_creds();
1717 if (!cred) {
1718 retval = -ENOMEM;
1719 goto fail;
1720 }
1721
1725 down_write(&mm->mmap_sem); 1722 down_write(&mm->mmap_sem);
1726 /* 1723 /*
1727 * If another thread got here first, or we are not dumpable, bail out. 1724 * If another thread got here first, or we are not dumpable, bail out.
1728 */ 1725 */
1729 if (mm->core_state || !get_dumpable(mm)) { 1726 if (mm->core_state || !get_dumpable(mm)) {
1730 up_write(&mm->mmap_sem); 1727 up_write(&mm->mmap_sem);
1728 put_cred(cred);
1731 goto fail; 1729 goto fail;
1732 } 1730 }
1733 1731
@@ -1738,12 +1736,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1738 */ 1736 */
1739 if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ 1737 if (get_dumpable(mm) == 2) { /* Setuid core dump mode */
1740 flag = O_EXCL; /* Stop rewrite attacks */ 1738 flag = O_EXCL; /* Stop rewrite attacks */
1741 current->fsuid = 0; /* Dump root private */ 1739 cred->fsuid = 0; /* Dump root private */
1742 } 1740 }
1743 1741
1744 retval = coredump_wait(exit_code, &core_state); 1742 retval = coredump_wait(exit_code, &core_state);
1745 if (retval < 0) 1743 if (retval < 0) {
1744 put_cred(cred);
1746 goto fail; 1745 goto fail;
1746 }
1747
1748 old_cred = override_creds(cred);
1747 1749
1748 /* 1750 /*
1749 * Clear any false indication of pending signals that might 1751 * Clear any false indication of pending signals that might
@@ -1815,7 +1817,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1815 * Dont allow local users get cute and trick others to coredump 1817 * Dont allow local users get cute and trick others to coredump
1816 * into their pre-created files: 1818 * into their pre-created files:
1817 */ 1819 */
1818 if (inode->i_uid != current->fsuid) 1820 if (inode->i_uid != current_fsuid())
1819 goto close_fail; 1821 goto close_fail;
1820 if (!file->f_op) 1822 if (!file->f_op)
1821 goto close_fail; 1823 goto close_fail;
@@ -1834,7 +1836,8 @@ fail_unlock:
1834 if (helper_argv) 1836 if (helper_argv)
1835 argv_free(helper_argv); 1837 argv_free(helper_argv);
1836 1838
1837 current->fsuid = fsuid; 1839 revert_creds(old_cred);
1840 put_cred(cred);
1838 coredump_finish(mm); 1841 coredump_finish(mm);
1839fail: 1842fail:
1840 return retval; 1843 return retval;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 890e0182881..197c7db583c 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -14,6 +14,7 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/mount.h> 15#include <linux/mount.h>
16#include <linux/namei.h> 16#include <linux/namei.h>
17#include <linux/sched.h>
17 18
18#define dprintk(fmt, args...) do{}while(0) 19#define dprintk(fmt, args...) do{}while(0)
19 20
@@ -249,6 +250,7 @@ static int filldir_one(void * __buf, const char * name, int len,
249static int get_name(struct vfsmount *mnt, struct dentry *dentry, 250static int get_name(struct vfsmount *mnt, struct dentry *dentry,
250 char *name, struct dentry *child) 251 char *name, struct dentry *child)
251{ 252{
253 const struct cred *cred = current_cred();
252 struct inode *dir = dentry->d_inode; 254 struct inode *dir = dentry->d_inode;
253 int error; 255 int error;
254 struct file *file; 256 struct file *file;
@@ -263,7 +265,7 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry,
263 /* 265 /*
264 * Open the directory ... 266 * Open the directory ...
265 */ 267 */
266 file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY); 268 file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred);
267 error = PTR_ERR(file); 269 error = PTR_ERR(file);
268 if (IS_ERR(file)) 270 if (IS_ERR(file))
269 goto out; 271 goto out;
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 6dac7ba2d22..4a29d637608 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -1193,7 +1193,7 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi)
1193 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1193 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1194 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); 1194 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
1195 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && 1195 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
1196 sbi->s_resuid != current->fsuid && 1196 sbi->s_resuid != current_fsuid() &&
1197 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 1197 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
1198 return 0; 1198 return 0;
1199 } 1199 }
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index f5974134676..c454d5db28a 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -550,7 +550,7 @@ got:
550 550
551 sb->s_dirt = 1; 551 sb->s_dirt = 1;
552 mark_buffer_dirty(bh2); 552 mark_buffer_dirty(bh2);
553 inode->i_uid = current->fsuid; 553 inode->i_uid = current_fsuid();
554 if (test_opt (sb, GRPID)) 554 if (test_opt (sb, GRPID))
555 inode->i_gid = dir->i_gid; 555 inode->i_gid = dir->i_gid;
556 else if (dir->i_mode & S_ISGID) { 556 else if (dir->i_mode & S_ISGID) {
@@ -558,7 +558,7 @@ got:
558 if (S_ISDIR(mode)) 558 if (S_ISDIR(mode))
559 mode |= S_ISGID; 559 mode |= S_ISGID;
560 } else 560 } else
561 inode->i_gid = current->fsgid; 561 inode->i_gid = current_fsgid();
562 inode->i_mode = mode; 562 inode->i_mode = mode;
563 563
564 inode->i_ino = ino; 564 inode->i_ino = ino;
@@ -585,7 +585,10 @@ got:
585 spin_lock(&sbi->s_next_gen_lock); 585 spin_lock(&sbi->s_next_gen_lock);
586 inode->i_generation = sbi->s_next_generation++; 586 inode->i_generation = sbi->s_next_generation++;
587 spin_unlock(&sbi->s_next_gen_lock); 587 spin_unlock(&sbi->s_next_gen_lock);
588 insert_inode_hash(inode); 588 if (insert_inode_locked(inode) < 0) {
589 err = -EINVAL;
590 goto fail_drop;
591 }
589 592
590 if (DQUOT_ALLOC_INODE(inode)) { 593 if (DQUOT_ALLOC_INODE(inode)) {
591 err = -EDQUOT; 594 err = -EDQUOT;
@@ -612,6 +615,7 @@ fail_drop:
612 DQUOT_DROP(inode); 615 DQUOT_DROP(inode);
613 inode->i_flags |= S_NOQUOTA; 616 inode->i_flags |= S_NOQUOTA;
614 inode->i_nlink = 0; 617 inode->i_nlink = 0;
618 unlock_new_inode(inode);
615 iput(inode); 619 iput(inode);
616 return ERR_PTR(err); 620 return ERR_PTR(err);
617 621
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 7658b33e265..02b39a5deb7 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -32,6 +32,7 @@
32#include <linux/buffer_head.h> 32#include <linux/buffer_head.h>
33#include <linux/mpage.h> 33#include <linux/mpage.h>
34#include <linux/fiemap.h> 34#include <linux/fiemap.h>
35#include <linux/namei.h>
35#include "ext2.h" 36#include "ext2.h"
36#include "acl.h" 37#include "acl.h"
37#include "xip.h" 38#include "xip.h"
@@ -1286,9 +1287,11 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1286 else 1287 else
1287 inode->i_mapping->a_ops = &ext2_aops; 1288 inode->i_mapping->a_ops = &ext2_aops;
1288 } else if (S_ISLNK(inode->i_mode)) { 1289 } else if (S_ISLNK(inode->i_mode)) {
1289 if (ext2_inode_is_fast_symlink(inode)) 1290 if (ext2_inode_is_fast_symlink(inode)) {
1290 inode->i_op = &ext2_fast_symlink_inode_operations; 1291 inode->i_op = &ext2_fast_symlink_inode_operations;
1291 else { 1292 nd_terminate_link(ei->i_data, inode->i_size,
1293 sizeof(ei->i_data) - 1);
1294 } else {
1292 inode->i_op = &ext2_symlink_inode_operations; 1295 inode->i_op = &ext2_symlink_inode_operations;
1293 if (test_opt(inode->i_sb, NOBH)) 1296 if (test_opt(inode->i_sb, NOBH))
1294 inode->i_mapping->a_ops = &ext2_nobh_aops; 1297 inode->i_mapping->a_ops = &ext2_nobh_aops;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2a747252ec1..90ea17998a7 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -41,9 +41,11 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
41 int err = ext2_add_link(dentry, inode); 41 int err = ext2_add_link(dentry, inode);
42 if (!err) { 42 if (!err) {
43 d_instantiate(dentry, inode); 43 d_instantiate(dentry, inode);
44 unlock_new_inode(inode);
44 return 0; 45 return 0;
45 } 46 }
46 inode_dec_link_count(inode); 47 inode_dec_link_count(inode);
48 unlock_new_inode(inode);
47 iput(inode); 49 iput(inode);
48 return err; 50 return err;
49} 51}
@@ -170,6 +172,7 @@ out:
170 172
171out_fail: 173out_fail:
172 inode_dec_link_count(inode); 174 inode_dec_link_count(inode);
175 unlock_new_inode(inode);
173 iput (inode); 176 iput (inode);
174 goto out; 177 goto out;
175} 178}
@@ -178,6 +181,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
178 struct dentry *dentry) 181 struct dentry *dentry)
179{ 182{
180 struct inode *inode = old_dentry->d_inode; 183 struct inode *inode = old_dentry->d_inode;
184 int err;
181 185
182 if (inode->i_nlink >= EXT2_LINK_MAX) 186 if (inode->i_nlink >= EXT2_LINK_MAX)
183 return -EMLINK; 187 return -EMLINK;
@@ -186,7 +190,14 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
186 inode_inc_link_count(inode); 190 inode_inc_link_count(inode);
187 atomic_inc(&inode->i_count); 191 atomic_inc(&inode->i_count);
188 192
189 return ext2_add_nondir(dentry, inode); 193 err = ext2_add_link(dentry, inode);
194 if (!err) {
195 d_instantiate(dentry, inode);
196 return 0;
197 }
198 inode_dec_link_count(inode);
199 iput(inode);
200 return err;
190} 201}
191 202
192static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) 203static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
@@ -222,12 +233,14 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
222 goto out_fail; 233 goto out_fail;
223 234
224 d_instantiate(dentry, inode); 235 d_instantiate(dentry, inode);
236 unlock_new_inode(inode);
225out: 237out:
226 return err; 238 return err;
227 239
228out_fail: 240out_fail:
229 inode_dec_link_count(inode); 241 inode_dec_link_count(inode);
230 inode_dec_link_count(inode); 242 inode_dec_link_count(inode);
243 unlock_new_inode(inode);
231 iput(inode); 244 iput(inode);
232out_dir: 245out_dir:
233 inode_dec_link_count(dir); 246 inode_dec_link_count(dir);
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index f5b57a2ca35..0dbf1c04847 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1422,7 +1422,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1422 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1422 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1423 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); 1423 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
1424 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && 1424 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
1425 sbi->s_resuid != current->fsuid && 1425 sbi->s_resuid != current_fsuid() &&
1426 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 1426 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
1427 return 0; 1427 return 0;
1428 } 1428 }
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 47b678d73e7..5655fbcbd11 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -539,7 +539,7 @@ got:
539 percpu_counter_inc(&sbi->s_dirs_counter); 539 percpu_counter_inc(&sbi->s_dirs_counter);
540 sb->s_dirt = 1; 540 sb->s_dirt = 1;
541 541
542 inode->i_uid = current->fsuid; 542 inode->i_uid = current_fsuid();
543 if (test_opt (sb, GRPID)) 543 if (test_opt (sb, GRPID))
544 inode->i_gid = dir->i_gid; 544 inode->i_gid = dir->i_gid;
545 else if (dir->i_mode & S_ISGID) { 545 else if (dir->i_mode & S_ISGID) {
@@ -547,7 +547,7 @@ got:
547 if (S_ISDIR(mode)) 547 if (S_ISDIR(mode))
548 mode |= S_ISGID; 548 mode |= S_ISGID;
549 } else 549 } else
550 inode->i_gid = current->fsgid; 550 inode->i_gid = current_fsgid();
551 inode->i_mode = mode; 551 inode->i_mode = mode;
552 552
553 inode->i_ino = ino; 553 inode->i_ino = ino;
@@ -579,7 +579,10 @@ got:
579 ext3_set_inode_flags(inode); 579 ext3_set_inode_flags(inode);
580 if (IS_DIRSYNC(inode)) 580 if (IS_DIRSYNC(inode))
581 handle->h_sync = 1; 581 handle->h_sync = 1;
582 insert_inode_hash(inode); 582 if (insert_inode_locked(inode) < 0) {
583 err = -EINVAL;
584 goto fail_drop;
585 }
583 spin_lock(&sbi->s_next_gen_lock); 586 spin_lock(&sbi->s_next_gen_lock);
584 inode->i_generation = sbi->s_next_generation++; 587 inode->i_generation = sbi->s_next_generation++;
585 spin_unlock(&sbi->s_next_gen_lock); 588 spin_unlock(&sbi->s_next_gen_lock);
@@ -627,6 +630,7 @@ fail_drop:
627 DQUOT_DROP(inode); 630 DQUOT_DROP(inode);
628 inode->i_flags |= S_NOQUOTA; 631 inode->i_flags |= S_NOQUOTA;
629 inode->i_nlink = 0; 632 inode->i_nlink = 0;
633 unlock_new_inode(inode);
630 iput(inode); 634 iput(inode);
631 brelse(bitmap_bh); 635 brelse(bitmap_bh);
632 return ERR_PTR(err); 636 return ERR_PTR(err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f8424ad8997..5fa453b49a6 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -37,6 +37,7 @@
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <linux/fiemap.h> 39#include <linux/fiemap.h>
40#include <linux/namei.h>
40#include "xattr.h" 41#include "xattr.h"
41#include "acl.h" 42#include "acl.h"
42 43
@@ -1160,7 +1161,7 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
1160 to = from + len; 1161 to = from + len;
1161 1162
1162retry: 1163retry:
1163 page = __grab_cache_page(mapping, index); 1164 page = grab_cache_page_write_begin(mapping, index, flags);
1164 if (!page) 1165 if (!page)
1165 return -ENOMEM; 1166 return -ENOMEM;
1166 *pagep = page; 1167 *pagep = page;
@@ -2817,9 +2818,11 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2817 inode->i_op = &ext3_dir_inode_operations; 2818 inode->i_op = &ext3_dir_inode_operations;
2818 inode->i_fop = &ext3_dir_operations; 2819 inode->i_fop = &ext3_dir_operations;
2819 } else if (S_ISLNK(inode->i_mode)) { 2820 } else if (S_ISLNK(inode->i_mode)) {
2820 if (ext3_inode_is_fast_symlink(inode)) 2821 if (ext3_inode_is_fast_symlink(inode)) {
2821 inode->i_op = &ext3_fast_symlink_inode_operations; 2822 inode->i_op = &ext3_fast_symlink_inode_operations;
2822 else { 2823 nd_terminate_link(ei->i_data, inode->i_size,
2824 sizeof(ei->i_data) - 1);
2825 } else {
2823 inode->i_op = &ext3_symlink_inode_operations; 2826 inode->i_op = &ext3_symlink_inode_operations;
2824 ext3_set_aops(inode); 2827 ext3_set_aops(inode);
2825 } 2828 }
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 3e5edc92aa0..1dd2abe6313 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1652,9 +1652,11 @@ static int ext3_add_nondir(handle_t *handle,
1652 if (!err) { 1652 if (!err) {
1653 ext3_mark_inode_dirty(handle, inode); 1653 ext3_mark_inode_dirty(handle, inode);
1654 d_instantiate(dentry, inode); 1654 d_instantiate(dentry, inode);
1655 unlock_new_inode(inode);
1655 return 0; 1656 return 0;
1656 } 1657 }
1657 drop_nlink(inode); 1658 drop_nlink(inode);
1659 unlock_new_inode(inode);
1658 iput(inode); 1660 iput(inode);
1659 return err; 1661 return err;
1660} 1662}
@@ -1765,6 +1767,7 @@ retry:
1765 dir_block = ext3_bread (handle, inode, 0, 1, &err); 1767 dir_block = ext3_bread (handle, inode, 0, 1, &err);
1766 if (!dir_block) { 1768 if (!dir_block) {
1767 drop_nlink(inode); /* is this nlink == 0? */ 1769 drop_nlink(inode); /* is this nlink == 0? */
1770 unlock_new_inode(inode);
1768 ext3_mark_inode_dirty(handle, inode); 1771 ext3_mark_inode_dirty(handle, inode);
1769 iput (inode); 1772 iput (inode);
1770 goto out_stop; 1773 goto out_stop;
@@ -1792,6 +1795,7 @@ retry:
1792 err = ext3_add_entry (handle, dentry, inode); 1795 err = ext3_add_entry (handle, dentry, inode);
1793 if (err) { 1796 if (err) {
1794 inode->i_nlink = 0; 1797 inode->i_nlink = 0;
1798 unlock_new_inode(inode);
1795 ext3_mark_inode_dirty(handle, inode); 1799 ext3_mark_inode_dirty(handle, inode);
1796 iput (inode); 1800 iput (inode);
1797 goto out_stop; 1801 goto out_stop;
@@ -1800,6 +1804,7 @@ retry:
1800 ext3_update_dx_flag(dir); 1804 ext3_update_dx_flag(dir);
1801 ext3_mark_inode_dirty(handle, dir); 1805 ext3_mark_inode_dirty(handle, dir);
1802 d_instantiate(dentry, inode); 1806 d_instantiate(dentry, inode);
1807 unlock_new_inode(inode);
1803out_stop: 1808out_stop:
1804 ext3_journal_stop(handle); 1809 ext3_journal_stop(handle);
1805 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 1810 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
@@ -2170,10 +2175,10 @@ retry:
2170 * We have a transaction open. All is sweetness. It also sets 2175 * We have a transaction open. All is sweetness. It also sets
2171 * i_size in generic_commit_write(). 2176 * i_size in generic_commit_write().
2172 */ 2177 */
2173 err = __page_symlink(inode, symname, l, 2178 err = __page_symlink(inode, symname, l, 1);
2174 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
2175 if (err) { 2179 if (err) {
2176 drop_nlink(inode); 2180 drop_nlink(inode);
2181 unlock_new_inode(inode);
2177 ext3_mark_inode_dirty(handle, inode); 2182 ext3_mark_inode_dirty(handle, inode);
2178 iput (inode); 2183 iput (inode);
2179 goto out_stop; 2184 goto out_stop;
@@ -2221,7 +2226,14 @@ retry:
2221 inc_nlink(inode); 2226 inc_nlink(inode);
2222 atomic_inc(&inode->i_count); 2227 atomic_inc(&inode->i_count);
2223 2228
2224 err = ext3_add_nondir(handle, dentry, inode); 2229 err = ext3_add_entry(handle, dentry, inode);
2230 if (!err) {
2231 ext3_mark_inode_dirty(handle, inode);
2232 d_instantiate(dentry, inode);
2233 } else {
2234 drop_nlink(inode);
2235 iput(inode);
2236 }
2225 ext3_journal_stop(handle); 2237 ext3_journal_stop(handle);
2226 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 2238 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
2227 goto retry; 2239 goto retry;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d2003cdc36a..38b3acf5683 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -609,8 +609,8 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
609 609
610 if (free_blocks - (nblocks + root_blocks + dirty_blocks) < 610 if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
611 EXT4_FREEBLOCKS_WATERMARK) { 611 EXT4_FREEBLOCKS_WATERMARK) {
612 free_blocks = percpu_counter_sum(fbc); 612 free_blocks = percpu_counter_sum_positive(fbc);
613 dirty_blocks = percpu_counter_sum(dbc); 613 dirty_blocks = percpu_counter_sum_positive(dbc);
614 if (dirty_blocks < 0) { 614 if (dirty_blocks < 0) {
615 printk(KERN_CRIT "Dirty block accounting " 615 printk(KERN_CRIT "Dirty block accounting "
616 "went wrong %lld\n", 616 "went wrong %lld\n",
@@ -624,7 +624,7 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
624 return 1; 624 return 1;
625 625
626 /* Hm, nope. Are (enough) root reserved blocks available? */ 626 /* Hm, nope. Are (enough) root reserved blocks available? */
627 if (sbi->s_resuid == current->fsuid || 627 if (sbi->s_resuid == current_fsuid() ||
628 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || 628 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
629 capable(CAP_SYS_RESOURCE)) { 629 capable(CAP_SYS_RESOURCE)) {
630 if (free_blocks >= (nblocks + dirty_blocks)) 630 if (free_blocks >= (nblocks + dirty_blocks))
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 445fde603df..b21f16713db 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -146,4 +146,10 @@ struct ext4_sb_info {
146 struct flex_groups *s_flex_groups; 146 struct flex_groups *s_flex_groups;
147}; 147};
148 148
149static inline spinlock_t *
150sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
151{
152 return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
153}
154
149#endif /* _EXT4_SB */ 155#endif /* _EXT4_SB */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 2a117e286e5..6e6052879aa 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -787,7 +787,7 @@ got:
787 spin_unlock(sb_bgl_lock(sbi, flex_group)); 787 spin_unlock(sb_bgl_lock(sbi, flex_group));
788 } 788 }
789 789
790 inode->i_uid = current->fsuid; 790 inode->i_uid = current_fsuid();
791 if (test_opt(sb, GRPID)) 791 if (test_opt(sb, GRPID))
792 inode->i_gid = dir->i_gid; 792 inode->i_gid = dir->i_gid;
793 else if (dir->i_mode & S_ISGID) { 793 else if (dir->i_mode & S_ISGID) {
@@ -795,7 +795,7 @@ got:
795 if (S_ISDIR(mode)) 795 if (S_ISDIR(mode))
796 mode |= S_ISGID; 796 mode |= S_ISGID;
797 } else 797 } else
798 inode->i_gid = current->fsgid; 798 inode->i_gid = current_fsgid();
799 inode->i_mode = mode; 799 inode->i_mode = mode;
800 800
801 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); 801 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
@@ -826,7 +826,10 @@ got:
826 ext4_set_inode_flags(inode); 826 ext4_set_inode_flags(inode);
827 if (IS_DIRSYNC(inode)) 827 if (IS_DIRSYNC(inode))
828 handle->h_sync = 1; 828 handle->h_sync = 1;
829 insert_inode_hash(inode); 829 if (insert_inode_locked(inode) < 0) {
830 err = -EINVAL;
831 goto fail_drop;
832 }
830 spin_lock(&sbi->s_next_gen_lock); 833 spin_lock(&sbi->s_next_gen_lock);
831 inode->i_generation = sbi->s_next_generation++; 834 inode->i_generation = sbi->s_next_generation++;
832 spin_unlock(&sbi->s_next_gen_lock); 835 spin_unlock(&sbi->s_next_gen_lock);
@@ -881,6 +884,7 @@ fail_drop:
881 DQUOT_DROP(inode); 884 DQUOT_DROP(inode);
882 inode->i_flags |= S_NOQUOTA; 885 inode->i_flags |= S_NOQUOTA;
883 inode->i_nlink = 0; 886 inode->i_nlink = 0;
887 unlock_new_inode(inode);
884 iput(inode); 888 iput(inode);
885 brelse(bitmap_bh); 889 brelse(bitmap_bh);
886 return ERR_PTR(err); 890 return ERR_PTR(err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be21a5ae33c..6702a49992a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -34,6 +34,7 @@
34#include <linux/writeback.h> 34#include <linux/writeback.h>
35#include <linux/pagevec.h> 35#include <linux/pagevec.h>
36#include <linux/mpage.h> 36#include <linux/mpage.h>
37#include <linux/namei.h>
37#include <linux/uio.h> 38#include <linux/uio.h>
38#include <linux/bio.h> 39#include <linux/bio.h>
39#include "ext4_jbd2.h" 40#include "ext4_jbd2.h"
@@ -1345,7 +1346,7 @@ retry:
1345 goto out; 1346 goto out;
1346 } 1347 }
1347 1348
1348 page = __grab_cache_page(mapping, index); 1349 page = grab_cache_page_write_begin(mapping, index, flags);
1349 if (!page) { 1350 if (!page) {
1350 ext4_journal_stop(handle); 1351 ext4_journal_stop(handle);
1351 ret = -ENOMEM; 1352 ret = -ENOMEM;
@@ -2549,7 +2550,7 @@ retry:
2549 goto out; 2550 goto out;
2550 } 2551 }
2551 2552
2552 page = __grab_cache_page(mapping, index); 2553 page = grab_cache_page_write_begin(mapping, index, flags);
2553 if (!page) { 2554 if (!page) {
2554 ext4_journal_stop(handle); 2555 ext4_journal_stop(handle);
2555 ret = -ENOMEM; 2556 ret = -ENOMEM;
@@ -4164,9 +4165,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4164 inode->i_op = &ext4_dir_inode_operations; 4165 inode->i_op = &ext4_dir_inode_operations;
4165 inode->i_fop = &ext4_dir_operations; 4166 inode->i_fop = &ext4_dir_operations;
4166 } else if (S_ISLNK(inode->i_mode)) { 4167 } else if (S_ISLNK(inode->i_mode)) {
4167 if (ext4_inode_is_fast_symlink(inode)) 4168 if (ext4_inode_is_fast_symlink(inode)) {
4168 inode->i_op = &ext4_fast_symlink_inode_operations; 4169 inode->i_op = &ext4_fast_symlink_inode_operations;
4169 else { 4170 nd_terminate_link(ei->i_data, inode->i_size,
4171 sizeof(ei->i_data) - 1);
4172 } else {
4170 inode->i_op = &ext4_symlink_inode_operations; 4173 inode->i_op = &ext4_symlink_inode_operations;
4171 ext4_set_aops(inode); 4174 ext4_set_aops(inode);
4172 } 4175 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 63adcb79298..9fd2a5e1be4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1693,9 +1693,11 @@ static int ext4_add_nondir(handle_t *handle,
1693 if (!err) { 1693 if (!err) {
1694 ext4_mark_inode_dirty(handle, inode); 1694 ext4_mark_inode_dirty(handle, inode);
1695 d_instantiate(dentry, inode); 1695 d_instantiate(dentry, inode);
1696 unlock_new_inode(inode);
1696 return 0; 1697 return 0;
1697 } 1698 }
1698 drop_nlink(inode); 1699 drop_nlink(inode);
1700 unlock_new_inode(inode);
1699 iput(inode); 1701 iput(inode);
1700 return err; 1702 return err;
1701} 1703}
@@ -1830,6 +1832,7 @@ retry:
1830 if (err) { 1832 if (err) {
1831out_clear_inode: 1833out_clear_inode:
1832 clear_nlink(inode); 1834 clear_nlink(inode);
1835 unlock_new_inode(inode);
1833 ext4_mark_inode_dirty(handle, inode); 1836 ext4_mark_inode_dirty(handle, inode);
1834 iput(inode); 1837 iput(inode);
1835 goto out_stop; 1838 goto out_stop;
@@ -1838,6 +1841,7 @@ out_clear_inode:
1838 ext4_update_dx_flag(dir); 1841 ext4_update_dx_flag(dir);
1839 ext4_mark_inode_dirty(handle, dir); 1842 ext4_mark_inode_dirty(handle, dir);
1840 d_instantiate(dentry, inode); 1843 d_instantiate(dentry, inode);
1844 unlock_new_inode(inode);
1841out_stop: 1845out_stop:
1842 ext4_journal_stop(handle); 1846 ext4_journal_stop(handle);
1843 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 1847 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
@@ -2208,10 +2212,10 @@ retry:
2208 * We have a transaction open. All is sweetness. It also sets 2212 * We have a transaction open. All is sweetness. It also sets
2209 * i_size in generic_commit_write(). 2213 * i_size in generic_commit_write().
2210 */ 2214 */
2211 err = __page_symlink(inode, symname, l, 2215 err = __page_symlink(inode, symname, l, 1);
2212 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
2213 if (err) { 2216 if (err) {
2214 clear_nlink(inode); 2217 clear_nlink(inode);
2218 unlock_new_inode(inode);
2215 ext4_mark_inode_dirty(handle, inode); 2219 ext4_mark_inode_dirty(handle, inode);
2216 iput(inode); 2220 iput(inode);
2217 goto out_stop; 2221 goto out_stop;
@@ -2262,7 +2266,14 @@ retry:
2262 ext4_inc_count(handle, inode); 2266 ext4_inc_count(handle, inode);
2263 atomic_inc(&inode->i_count); 2267 atomic_inc(&inode->i_count);
2264 2268
2265 err = ext4_add_nondir(handle, dentry, inode); 2269 err = ext4_add_entry(handle, dentry, inode);
2270 if (!err) {
2271 ext4_mark_inode_dirty(handle, inode);
2272 d_instantiate(dentry, inode);
2273 } else {
2274 drop_nlink(inode);
2275 iput(inode);
2276 }
2266 ext4_journal_stop(handle); 2277 ext4_journal_stop(handle);
2267 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2278 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2268 goto retry; 2279 goto retry;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c65db..04158ad74db 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1721,7 +1721,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
1721 /* small i_blocks in vfs inode? */ 1721 /* small i_blocks in vfs inode? */
1722 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1722 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1723 /* 1723 /*
1724 * CONFIG_LSF is not enabled implies the inode 1724 * CONFIG_LBD is not enabled implies the inode
1725 * i_block represent total blocks in 512 bytes 1725 * i_block represent total blocks in 512 bytes
1726 * 32 == size of vfs inode i_blocks * 8 1726 * 32 == size of vfs inode i_blocks * 8
1727 */ 1727 */
@@ -1764,7 +1764,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1764 1764
1765 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1765 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1766 /* 1766 /*
1767 * !has_huge_files or CONFIG_LSF is not enabled 1767 * !has_huge_files or CONFIG_LBD is not enabled
1768 * implies the inode i_block represent total blocks in 1768 * implies the inode i_block represent total blocks in
1769 * 512 bytes 32 == size of vfs inode i_blocks * 8 1769 * 512 bytes 32 == size of vfs inode i_blocks * 8
1770 */ 1770 */
@@ -2021,13 +2021,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2021 if (has_huge_files) { 2021 if (has_huge_files) {
2022 /* 2022 /*
2023 * Large file size enabled file system can only be 2023 * Large file size enabled file system can only be
2024 * mount if kernel is build with CONFIG_LSF 2024 * mount if kernel is build with CONFIG_LBD
2025 */ 2025 */
2026 if (sizeof(root->i_blocks) < sizeof(u64) && 2026 if (sizeof(root->i_blocks) < sizeof(u64) &&
2027 !(sb->s_flags & MS_RDONLY)) { 2027 !(sb->s_flags & MS_RDONLY)) {
2028 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " 2028 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
2029 "files cannot be mounted read-write " 2029 "files cannot be mounted read-write "
2030 "without CONFIG_LSF.\n", sb->s_id); 2030 "without CONFIG_LBD.\n", sb->s_id);
2031 goto failed_mount; 2031 goto failed_mount;
2032 } 2032 }
2033 } 2033 }
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 67e05835709..3a7f603b698 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -841,7 +841,6 @@ const struct file_operations fat_dir_operations = {
841 .compat_ioctl = fat_compat_dir_ioctl, 841 .compat_ioctl = fat_compat_dir_ioctl,
842#endif 842#endif
843 .fsync = file_fsync, 843 .fsync = file_fsync,
844 .llseek = generic_file_llseek,
845}; 844};
846 845
847static int fat_get_short_entry(struct inode *dir, loff_t *pos, 846static int fat_get_short_entry(struct inode *dir, loff_t *pos,
diff --git a/fs/fat/file.c b/fs/fat/file.c
index f06a4e525ec..0a7f4a9918b 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -304,7 +304,7 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
304{ 304{
305 mode_t allow_utime = sbi->options.allow_utime; 305 mode_t allow_utime = sbi->options.allow_utime;
306 306
307 if (current->fsuid != inode->i_uid) { 307 if (current_fsuid() != inode->i_uid) {
308 if (in_group_p(inode->i_gid)) 308 if (in_group_p(inode->i_gid))
309 allow_utime >>= 3; 309 allow_utime >>= 3;
310 if (allow_utime & MAY_WRITE) 310 if (allow_utime & MAY_WRITE)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index bdd8fb7be2c..6b74d09adbe 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -749,6 +749,8 @@ static struct dentry *fat_get_parent(struct dentry *child)
749 brelse(bh); 749 brelse(bh);
750 750
751 parent = d_obtain_alias(inode); 751 parent = d_obtain_alias(inode);
752 if (!IS_ERR(parent))
753 parent->d_op = sb->s_root->d_op;
752out: 754out:
753 unlock_super(sb); 755 unlock_super(sb);
754 756
@@ -926,8 +928,8 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
926 928
927 opts->isvfat = is_vfat; 929 opts->isvfat = is_vfat;
928 930
929 opts->fs_uid = current->uid; 931 opts->fs_uid = current_uid();
930 opts->fs_gid = current->gid; 932 opts->fs_gid = current_gid();
931 opts->fs_fmask = opts->fs_dmask = current->fs->umask; 933 opts->fs_fmask = opts->fs_dmask = current->fs->umask;
932 opts->allow_utime = -1; 934 opts->allow_utime = -1;
933 opts->codepage = fat_default_codepage; 935 opts->codepage = fat_default_codepage;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index bf326d4356a..8ae32e37673 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -78,7 +78,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
78 * for creation. 78 * for creation.
79 */ 79 */
80 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { 80 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
81 if (nd->flags & LOOKUP_CREATE) 81 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
82 return 0; 82 return 0;
83 } 83 }
84 84
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 549daf8005f..cdc14194672 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -212,13 +212,14 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
212int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 212int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
213 int force) 213 int force)
214{ 214{
215 const struct cred *cred = current_cred();
215 int err; 216 int err;
216 217
217 err = security_file_set_fowner(filp); 218 err = security_file_set_fowner(filp);
218 if (err) 219 if (err)
219 return err; 220 return err;
220 221
221 f_modown(filp, pid, type, current->uid, current->euid, force); 222 f_modown(filp, pid, type, cred->uid, cred->euid, force);
222 return 0; 223 return 0;
223} 224}
224EXPORT_SYMBOL(__f_setown); 225EXPORT_SYMBOL(__f_setown);
@@ -407,10 +408,17 @@ static const long band_table[NSIGPOLL] = {
407static inline int sigio_perm(struct task_struct *p, 408static inline int sigio_perm(struct task_struct *p,
408 struct fown_struct *fown, int sig) 409 struct fown_struct *fown, int sig)
409{ 410{
410 return (((fown->euid == 0) || 411 const struct cred *cred;
411 (fown->euid == p->suid) || (fown->euid == p->uid) || 412 int ret;
412 (fown->uid == p->suid) || (fown->uid == p->uid)) && 413
413 !security_file_send_sigiotask(p, fown, sig)); 414 rcu_read_lock();
415 cred = __task_cred(p);
416 ret = ((fown->euid == 0 ||
417 fown->euid == cred->suid || fown->euid == cred->uid ||
418 fown->uid == cred->suid || fown->uid == cred->uid) &&
419 !security_file_send_sigiotask(p, fown, sig));
420 rcu_read_unlock();
421 return ret;
414} 422}
415 423
416static void send_sigio_to_task(struct task_struct *p, 424static void send_sigio_to_task(struct task_struct *p,
diff --git a/fs/file_table.c b/fs/file_table.c
index 5ad0eca6eea..bbeeac6efa1 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -32,11 +32,16 @@ struct files_stat_struct files_stat = {
32/* public. Not pretty! */ 32/* public. Not pretty! */
33__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); 33__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
34 34
35/* SLAB cache for file structures */
36static struct kmem_cache *filp_cachep __read_mostly;
37
35static struct percpu_counter nr_files __cacheline_aligned_in_smp; 38static struct percpu_counter nr_files __cacheline_aligned_in_smp;
36 39
37static inline void file_free_rcu(struct rcu_head *head) 40static inline void file_free_rcu(struct rcu_head *head)
38{ 41{
39 struct file *f = container_of(head, struct file, f_u.fu_rcuhead); 42 struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
43
44 put_cred(f->f_cred);
40 kmem_cache_free(filp_cachep, f); 45 kmem_cache_free(filp_cachep, f);
41} 46}
42 47
@@ -94,7 +99,7 @@ int proc_nr_files(ctl_table *table, int write, struct file *filp,
94 */ 99 */
95struct file *get_empty_filp(void) 100struct file *get_empty_filp(void)
96{ 101{
97 struct task_struct *tsk; 102 const struct cred *cred = current_cred();
98 static int old_max; 103 static int old_max;
99 struct file * f; 104 struct file * f;
100 105
@@ -118,12 +123,10 @@ struct file *get_empty_filp(void)
118 if (security_file_alloc(f)) 123 if (security_file_alloc(f))
119 goto fail_sec; 124 goto fail_sec;
120 125
121 tsk = current;
122 INIT_LIST_HEAD(&f->f_u.fu_list); 126 INIT_LIST_HEAD(&f->f_u.fu_list);
123 atomic_long_set(&f->f_count, 1); 127 atomic_long_set(&f->f_count, 1);
124 rwlock_init(&f->f_owner.lock); 128 rwlock_init(&f->f_owner.lock);
125 f->f_uid = tsk->fsuid; 129 f->f_cred = get_cred(cred);
126 f->f_gid = tsk->fsgid;
127 eventpoll_init_file(f); 130 eventpoll_init_file(f);
128 /* f->f_version: 0 */ 131 /* f->f_version: 0 */
129 return f; 132 return f;
@@ -397,7 +400,12 @@ too_bad:
397void __init files_init(unsigned long mempages) 400void __init files_init(unsigned long mempages)
398{ 401{
399 int n; 402 int n;
400 /* One file with associated inode and dcache is very roughly 1K. 403
404 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
405 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
406
407 /*
408 * One file with associated inode and dcache is very roughly 1K.
401 * Per default don't use more than 10% of our memory for files. 409 * Per default don't use more than 10% of our memory for files.
402 */ 410 */
403 411
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 9f3f2ceb73f..03a6ea5e99f 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -325,8 +325,10 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
325 if (!VXFS_ISIMMED(vip)) { 325 if (!VXFS_ISIMMED(vip)) {
326 ip->i_op = &page_symlink_inode_operations; 326 ip->i_op = &page_symlink_inode_operations;
327 ip->i_mapping->a_ops = &vxfs_aops; 327 ip->i_mapping->a_ops = &vxfs_aops;
328 } else 328 } else {
329 ip->i_op = &vxfs_immed_symlink_iops; 329 ip->i_op = &vxfs_immed_symlink_iops;
330 vip->vii_immed.vi_immed[ip->i_size] = '\0';
331 }
330 } else 332 } else
331 init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev)); 333 init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev));
332 334
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index b72361479be..fba571648a8 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -87,8 +87,8 @@ static void __fuse_put_request(struct fuse_req *req)
87 87
88static void fuse_req_init_context(struct fuse_req *req) 88static void fuse_req_init_context(struct fuse_req *req)
89{ 89{
90 req->in.h.uid = current->fsuid; 90 req->in.h.uid = current_fsuid();
91 req->in.h.gid = current->fsgid; 91 req->in.h.gid = current_fsgid();
92 req->in.h.pid = current->pid; 92 req->in.h.pid = current->pid;
93} 93}
94 94
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index fd03330cade..95bc22bdd06 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -869,18 +869,25 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
869 */ 869 */
870int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) 870int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
871{ 871{
872 const struct cred *cred;
873 int ret;
874
872 if (fc->flags & FUSE_ALLOW_OTHER) 875 if (fc->flags & FUSE_ALLOW_OTHER)
873 return 1; 876 return 1;
874 877
875 if (task->euid == fc->user_id && 878 rcu_read_lock();
876 task->suid == fc->user_id && 879 ret = 0;
877 task->uid == fc->user_id && 880 cred = __task_cred(task);
878 task->egid == fc->group_id && 881 if (cred->euid == fc->user_id &&
879 task->sgid == fc->group_id && 882 cred->suid == fc->user_id &&
880 task->gid == fc->group_id) 883 cred->uid == fc->user_id &&
881 return 1; 884 cred->egid == fc->group_id &&
885 cred->sgid == fc->group_id &&
886 cred->gid == fc->group_id)
887 ret = 1;
888 rcu_read_unlock();
882 889
883 return 0; 890 return ret;
884} 891}
885 892
886static int fuse_access(struct inode *inode, int mask) 893static int fuse_access(struct inode *inode, int mask)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 34930a964b8..4c9ee701126 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -646,7 +646,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
646{ 646{
647 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 647 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
648 648
649 *pagep = __grab_cache_page(mapping, index); 649 *pagep = grab_cache_page_write_begin(mapping, index, flags);
650 if (!*pagep) 650 if (!*pagep)
651 return -ENOMEM; 651 return -ENOMEM;
652 return 0; 652 return 0;
@@ -779,7 +779,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
779 break; 779 break;
780 780
781 err = -ENOMEM; 781 err = -ENOMEM;
782 page = __grab_cache_page(mapping, index); 782 page = grab_cache_page_write_begin(mapping, index, 0);
783 if (!page) 783 if (!page)
784 break; 784 break;
785 785
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7cee695fa44..d57616840e8 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -705,18 +705,18 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
705 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 705 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) {
706 if (S_ISDIR(*mode)) 706 if (S_ISDIR(*mode))
707 *mode |= S_ISUID; 707 *mode |= S_ISUID;
708 else if (dip->i_inode.i_uid != current->fsuid) 708 else if (dip->i_inode.i_uid != current_fsuid())
709 *mode &= ~07111; 709 *mode &= ~07111;
710 *uid = dip->i_inode.i_uid; 710 *uid = dip->i_inode.i_uid;
711 } else 711 } else
712 *uid = current->fsuid; 712 *uid = current_fsuid();
713 713
714 if (dip->i_inode.i_mode & S_ISGID) { 714 if (dip->i_inode.i_mode & S_ISGID) {
715 if (S_ISDIR(*mode)) 715 if (S_ISDIR(*mode))
716 *mode |= S_ISGID; 716 *mode |= S_ISGID;
717 *gid = dip->i_inode.i_gid; 717 *gid = dip->i_inode.i_gid;
718 } else 718 } else
719 *gid = current->fsgid; 719 *gid = current_fsgid();
720} 720}
721 721
722static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) 722static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
@@ -1124,8 +1124,8 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
1124 return -EPERM; 1124 return -EPERM;
1125 1125
1126 if ((dip->i_inode.i_mode & S_ISVTX) && 1126 if ((dip->i_inode.i_mode & S_ISVTX) &&
1127 dip->i_inode.i_uid != current->fsuid && 1127 dip->i_inode.i_uid != current_fsuid() &&
1128 ip->i_inode.i_uid != current->fsuid && !capable(CAP_FOWNER)) 1128 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
1129 return -EPERM; 1129 return -EPERM;
1130 1130
1131 if (IS_APPEND(&dip->i_inode)) 1131 if (IS_APPEND(&dip->i_inode))
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 27563816e1c..15f710f2d4d 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -675,7 +675,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
675 goto out_trans_fail; 675 goto out_trans_fail;
676 676
677 error = -ENOMEM; 677 error = -ENOMEM;
678 page = __grab_cache_page(mapping, index); 678 page = grab_cache_page_write_begin(mapping, index, flags);
679 *pagep = page; 679 *pagep = page;
680 if (unlikely(!page)) 680 if (unlikely(!page))
681 goto out_endtrans; 681 goto out_endtrans;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index c69b7ac75bf..9435dda8f1e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -155,8 +155,8 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
155 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); 155 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
156 inode->i_ino = HFS_SB(sb)->next_id++; 156 inode->i_ino = HFS_SB(sb)->next_id++;
157 inode->i_mode = mode; 157 inode->i_mode = mode;
158 inode->i_uid = current->fsuid; 158 inode->i_uid = current_fsuid();
159 inode->i_gid = current->fsgid; 159 inode->i_gid = current_fsgid();
160 inode->i_nlink = 1; 160 inode->i_nlink = 1;
161 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 161 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
162 HFS_I(inode)->flags = 0; 162 HFS_I(inode)->flags = 0;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 3c7c7637719..c8b5acf4b0b 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -210,8 +210,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
210 int tmp, token; 210 int tmp, token;
211 211
212 /* initialize the sb with defaults */ 212 /* initialize the sb with defaults */
213 hsb->s_uid = current->uid; 213 hsb->s_uid = current_uid();
214 hsb->s_gid = current->gid; 214 hsb->s_gid = current_gid();
215 hsb->s_file_umask = 0133; 215 hsb->s_file_umask = 0133;
216 hsb->s_dir_umask = 0022; 216 hsb->s_dir_umask = 0022;
217 hsb->s_type = hsb->s_creator = cpu_to_be32(0x3f3f3f3f); /* == '????' */ 217 hsb->s_type = hsb->s_creator = cpu_to_be32(0x3f3f3f3f); /* == '????' */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b207f0e6fc2..f105ee9e1cc 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -296,8 +296,8 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
296 296
297 inode->i_ino = HFSPLUS_SB(sb).next_cnid++; 297 inode->i_ino = HFSPLUS_SB(sb).next_cnid++;
298 inode->i_mode = mode; 298 inode->i_mode = mode;
299 inode->i_uid = current->fsuid; 299 inode->i_uid = current_fsuid();
300 inode->i_gid = current->fsgid; 300 inode->i_gid = current_fsgid();
301 inode->i_nlink = 1; 301 inode->i_nlink = 1;
302 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 302 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
303 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 303 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 9699c56d323..bab7f8d1bdf 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -49,8 +49,8 @@ void hfsplus_fill_defaults(struct hfsplus_sb_info *opts)
49 opts->creator = HFSPLUS_DEF_CR_TYPE; 49 opts->creator = HFSPLUS_DEF_CR_TYPE;
50 opts->type = HFSPLUS_DEF_CR_TYPE; 50 opts->type = HFSPLUS_DEF_CR_TYPE;
51 opts->umask = current->fs->umask; 51 opts->umask = current->fs->umask;
52 opts->uid = current->uid; 52 opts->uid = current_uid();
53 opts->gid = current->gid; 53 opts->gid = current_gid();
54 opts->part = -1; 54 opts->part = -1;
55 opts->session = -1; 55 opts->session = -1;
56} 56}
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 3a31451ac17..5c538e0ec14 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -501,7 +501,7 @@ int hostfs_write_begin(struct file *file, struct address_space *mapping,
501{ 501{
502 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 502 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
503 503
504 *pagep = __grab_cache_page(mapping, index); 504 *pagep = grab_cache_page_write_begin(mapping, index, flags);
505 if (!*pagep) 505 if (!*pagep)
506 return -ENOMEM; 506 return -ENOMEM;
507 return 0; 507 return 0;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 10783f3d265..b649232dde9 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -92,11 +92,11 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
92 inc_nlink(dir); 92 inc_nlink(dir);
93 insert_inode_hash(result); 93 insert_inode_hash(result);
94 94
95 if (result->i_uid != current->fsuid || 95 if (result->i_uid != current_fsuid() ||
96 result->i_gid != current->fsgid || 96 result->i_gid != current_fsgid() ||
97 result->i_mode != (mode | S_IFDIR)) { 97 result->i_mode != (mode | S_IFDIR)) {
98 result->i_uid = current->fsuid; 98 result->i_uid = current_fsuid();
99 result->i_gid = current->fsgid; 99 result->i_gid = current_fsgid();
100 result->i_mode = mode | S_IFDIR; 100 result->i_mode = mode | S_IFDIR;
101 hpfs_write_inode_nolock(result); 101 hpfs_write_inode_nolock(result);
102 } 102 }
@@ -184,11 +184,11 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
184 184
185 insert_inode_hash(result); 185 insert_inode_hash(result);
186 186
187 if (result->i_uid != current->fsuid || 187 if (result->i_uid != current_fsuid() ||
188 result->i_gid != current->fsgid || 188 result->i_gid != current_fsgid() ||
189 result->i_mode != (mode | S_IFREG)) { 189 result->i_mode != (mode | S_IFREG)) {
190 result->i_uid = current->fsuid; 190 result->i_uid = current_fsuid();
191 result->i_gid = current->fsgid; 191 result->i_gid = current_fsgid();
192 result->i_mode = mode | S_IFREG; 192 result->i_mode = mode | S_IFREG;
193 hpfs_write_inode_nolock(result); 193 hpfs_write_inode_nolock(result);
194 } 194 }
@@ -247,8 +247,8 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
247 result->i_mtime.tv_nsec = 0; 247 result->i_mtime.tv_nsec = 0;
248 result->i_atime.tv_nsec = 0; 248 result->i_atime.tv_nsec = 0;
249 hpfs_i(result)->i_ea_size = 0; 249 hpfs_i(result)->i_ea_size = 0;
250 result->i_uid = current->fsuid; 250 result->i_uid = current_fsuid();
251 result->i_gid = current->fsgid; 251 result->i_gid = current_fsgid();
252 result->i_nlink = 1; 252 result->i_nlink = 1;
253 result->i_size = 0; 253 result->i_size = 0;
254 result->i_blocks = 1; 254 result->i_blocks = 1;
@@ -325,8 +325,8 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
325 result->i_atime.tv_nsec = 0; 325 result->i_atime.tv_nsec = 0;
326 hpfs_i(result)->i_ea_size = 0; 326 hpfs_i(result)->i_ea_size = 0;
327 result->i_mode = S_IFLNK | 0777; 327 result->i_mode = S_IFLNK | 0777;
328 result->i_uid = current->fsuid; 328 result->i_uid = current_fsuid();
329 result->i_gid = current->fsgid; 329 result->i_gid = current_fsgid();
330 result->i_blocks = 1; 330 result->i_blocks = 1;
331 result->i_nlink = 1; 331 result->i_nlink = 1;
332 result->i_size = strlen(symlink); 332 result->i_size = strlen(symlink);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 29ad461d568..0d049b8919c 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -475,8 +475,8 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
475 475
476 init_MUTEX(&sbi->hpfs_creation_de); 476 init_MUTEX(&sbi->hpfs_creation_de);
477 477
478 uid = current->uid; 478 uid = current_uid();
479 gid = current->gid; 479 gid = current_gid();
480 umask = current->fs->umask; 480 umask = current->fs->umask;
481 lowercase = 0; 481 lowercase = 0;
482 conv = CONV_BINARY; 482 conv = CONV_BINARY;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 2b3d1828db9..b278f7f5202 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -426,6 +426,7 @@ static int file_mode(int fmode)
426 426
427static int hppfs_open(struct inode *inode, struct file *file) 427static int hppfs_open(struct inode *inode, struct file *file)
428{ 428{
429 const struct cred *cred = file->f_cred;
429 struct hppfs_private *data; 430 struct hppfs_private *data;
430 struct vfsmount *proc_mnt; 431 struct vfsmount *proc_mnt;
431 struct dentry *proc_dentry; 432 struct dentry *proc_dentry;
@@ -446,7 +447,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
446 447
447 /* XXX This isn't closed anywhere */ 448 /* XXX This isn't closed anywhere */
448 data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), 449 data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
449 file_mode(file->f_mode)); 450 file_mode(file->f_mode), cred);
450 err = PTR_ERR(data->proc_file); 451 err = PTR_ERR(data->proc_file);
451 if (IS_ERR(data->proc_file)) 452 if (IS_ERR(data->proc_file))
452 goto out_free1; 453 goto out_free1;
@@ -489,6 +490,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
489 490
490static int hppfs_dir_open(struct inode *inode, struct file *file) 491static int hppfs_dir_open(struct inode *inode, struct file *file)
491{ 492{
493 const struct cred *cred = file->f_cred;
492 struct hppfs_private *data; 494 struct hppfs_private *data;
493 struct vfsmount *proc_mnt; 495 struct vfsmount *proc_mnt;
494 struct dentry *proc_dentry; 496 struct dentry *proc_dentry;
@@ -502,7 +504,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file)
502 proc_dentry = HPPFS_I(inode)->proc_dentry; 504 proc_dentry = HPPFS_I(inode)->proc_dentry;
503 proc_mnt = inode->i_sb->s_fs_info; 505 proc_mnt = inode->i_sb->s_fs_info;
504 data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), 506 data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
505 file_mode(file->f_mode)); 507 file_mode(file->f_mode), cred);
506 err = PTR_ERR(data->proc_file); 508 err = PTR_ERR(data->proc_file);
507 if (IS_ERR(data->proc_file)) 509 if (IS_ERR(data->proc_file))
508 goto out_free; 510 goto out_free;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 61edc701b0e..7d479ce3ace 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -551,9 +551,9 @@ static int hugetlbfs_mknod(struct inode *dir,
551 if (S_ISDIR(mode)) 551 if (S_ISDIR(mode))
552 mode |= S_ISGID; 552 mode |= S_ISGID;
553 } else { 553 } else {
554 gid = current->fsgid; 554 gid = current_fsgid();
555 } 555 }
556 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev); 556 inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(), gid, mode, dev);
557 if (inode) { 557 if (inode) {
558 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 558 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
559 d_instantiate(dentry, inode); 559 d_instantiate(dentry, inode);
@@ -586,9 +586,9 @@ static int hugetlbfs_symlink(struct inode *dir,
586 if (dir->i_mode & S_ISGID) 586 if (dir->i_mode & S_ISGID)
587 gid = dir->i_gid; 587 gid = dir->i_gid;
588 else 588 else
589 gid = current->fsgid; 589 gid = current_fsgid();
590 590
591 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, 591 inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(),
592 gid, S_IFLNK|S_IRWXUGO, 0); 592 gid, S_IFLNK|S_IRWXUGO, 0);
593 if (inode) { 593 if (inode) {
594 int l = strlen(symname)+1; 594 int l = strlen(symname)+1;
@@ -854,8 +854,8 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
854 854
855 config.nr_blocks = -1; /* No limit on size by default */ 855 config.nr_blocks = -1; /* No limit on size by default */
856 config.nr_inodes = -1; /* No limit on number of inodes by default */ 856 config.nr_inodes = -1; /* No limit on number of inodes by default */
857 config.uid = current->fsuid; 857 config.uid = current_fsuid();
858 config.gid = current->fsgid; 858 config.gid = current_fsgid();
859 config.mode = 0755; 859 config.mode = 0755;
860 config.hstate = &default_hstate; 860 config.hstate = &default_hstate;
861 ret = hugetlbfs_parse_options(data, &config); 861 ret = hugetlbfs_parse_options(data, &config);
@@ -951,6 +951,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
951 struct inode *inode; 951 struct inode *inode;
952 struct dentry *dentry, *root; 952 struct dentry *dentry, *root;
953 struct qstr quick_string; 953 struct qstr quick_string;
954 struct user_struct *user = current_user();
954 955
955 if (!hugetlbfs_vfsmount) 956 if (!hugetlbfs_vfsmount)
956 return ERR_PTR(-ENOENT); 957 return ERR_PTR(-ENOENT);
@@ -958,7 +959,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
958 if (!can_do_hugetlb_shm()) 959 if (!can_do_hugetlb_shm())
959 return ERR_PTR(-EPERM); 960 return ERR_PTR(-EPERM);
960 961
961 if (!user_shm_lock(size, current->user)) 962 if (!user_shm_lock(size, user))
962 return ERR_PTR(-ENOMEM); 963 return ERR_PTR(-ENOMEM);
963 964
964 root = hugetlbfs_vfsmount->mnt_root; 965 root = hugetlbfs_vfsmount->mnt_root;
@@ -970,8 +971,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
970 goto out_shm_unlock; 971 goto out_shm_unlock;
971 972
972 error = -ENOSPC; 973 error = -ENOSPC;
973 inode = hugetlbfs_get_inode(root->d_sb, current->fsuid, 974 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
974 current->fsgid, S_IFREG | S_IRWXUGO, 0); 975 current_fsgid(), S_IFREG | S_IRWXUGO, 0);
975 if (!inode) 976 if (!inode)
976 goto out_dentry; 977 goto out_dentry;
977 978
@@ -998,7 +999,7 @@ out_inode:
998out_dentry: 999out_dentry:
999 dput(dentry); 1000 dput(dentry);
1000out_shm_unlock: 1001out_shm_unlock:
1001 user_shm_unlock(size, current->user); 1002 user_shm_unlock(size, user);
1002 return ERR_PTR(error); 1003 return ERR_PTR(error);
1003} 1004}
1004 1005
diff --git a/fs/inode.c b/fs/inode.c
index 0487ddba139..7de1cda9248 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -108,84 +108,100 @@ static void wake_up_inode(struct inode *inode)
108 wake_up_bit(&inode->i_state, __I_LOCK); 108 wake_up_bit(&inode->i_state, __I_LOCK);
109} 109}
110 110
111static struct inode *alloc_inode(struct super_block *sb) 111/**
112 * inode_init_always - perform inode structure intialisation
113 * @sb - superblock inode belongs to.
114 * @inode - inode to initialise
115 *
116 * These are initializations that need to be done on every inode
117 * allocation as the fields are not initialised by slab allocation.
118 */
119struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
112{ 120{
113 static const struct address_space_operations empty_aops; 121 static const struct address_space_operations empty_aops;
114 static struct inode_operations empty_iops; 122 static struct inode_operations empty_iops;
115 static const struct file_operations empty_fops; 123 static const struct file_operations empty_fops;
116 struct inode *inode;
117 124
118 if (sb->s_op->alloc_inode) 125 struct address_space * const mapping = &inode->i_data;
119 inode = sb->s_op->alloc_inode(sb); 126
120 else 127 inode->i_sb = sb;
121 inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL); 128 inode->i_blkbits = sb->s_blocksize_bits;
122 129 inode->i_flags = 0;
123 if (inode) { 130 atomic_set(&inode->i_count, 1);
124 struct address_space * const mapping = &inode->i_data; 131 inode->i_op = &empty_iops;
125 132 inode->i_fop = &empty_fops;
126 inode->i_sb = sb; 133 inode->i_nlink = 1;
127 inode->i_blkbits = sb->s_blocksize_bits; 134 atomic_set(&inode->i_writecount, 0);
128 inode->i_flags = 0; 135 inode->i_size = 0;
129 atomic_set(&inode->i_count, 1); 136 inode->i_blocks = 0;
130 inode->i_op = &empty_iops; 137 inode->i_bytes = 0;
131 inode->i_fop = &empty_fops; 138 inode->i_generation = 0;
132 inode->i_nlink = 1;
133 atomic_set(&inode->i_writecount, 0);
134 inode->i_size = 0;
135 inode->i_blocks = 0;
136 inode->i_bytes = 0;
137 inode->i_generation = 0;
138#ifdef CONFIG_QUOTA 139#ifdef CONFIG_QUOTA
139 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); 140 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
140#endif 141#endif
141 inode->i_pipe = NULL; 142 inode->i_pipe = NULL;
142 inode->i_bdev = NULL; 143 inode->i_bdev = NULL;
143 inode->i_cdev = NULL; 144 inode->i_cdev = NULL;
144 inode->i_rdev = 0; 145 inode->i_rdev = 0;
145 inode->dirtied_when = 0; 146 inode->dirtied_when = 0;
146 if (security_inode_alloc(inode)) { 147 if (security_inode_alloc(inode)) {
147 if (inode->i_sb->s_op->destroy_inode) 148 if (inode->i_sb->s_op->destroy_inode)
148 inode->i_sb->s_op->destroy_inode(inode); 149 inode->i_sb->s_op->destroy_inode(inode);
149 else 150 else
150 kmem_cache_free(inode_cachep, (inode)); 151 kmem_cache_free(inode_cachep, (inode));
151 return NULL; 152 return NULL;
152 } 153 }
153 154
154 spin_lock_init(&inode->i_lock); 155 spin_lock_init(&inode->i_lock);
155 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 156 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
156 157
157 mutex_init(&inode->i_mutex); 158 mutex_init(&inode->i_mutex);
158 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 159 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
159 160
160 init_rwsem(&inode->i_alloc_sem); 161 init_rwsem(&inode->i_alloc_sem);
161 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); 162 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
162 163
163 mapping->a_ops = &empty_aops; 164 mapping->a_ops = &empty_aops;
164 mapping->host = inode; 165 mapping->host = inode;
165 mapping->flags = 0; 166 mapping->flags = 0;
166 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); 167 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
167 mapping->assoc_mapping = NULL; 168 mapping->assoc_mapping = NULL;
168 mapping->backing_dev_info = &default_backing_dev_info; 169 mapping->backing_dev_info = &default_backing_dev_info;
169 mapping->writeback_index = 0; 170 mapping->writeback_index = 0;
170 171
171 /* 172 /*
172 * If the block_device provides a backing_dev_info for client 173 * If the block_device provides a backing_dev_info for client
173 * inodes then use that. Otherwise the inode share the bdev's 174 * inodes then use that. Otherwise the inode share the bdev's
174 * backing_dev_info. 175 * backing_dev_info.
175 */ 176 */
176 if (sb->s_bdev) { 177 if (sb->s_bdev) {
177 struct backing_dev_info *bdi; 178 struct backing_dev_info *bdi;
178 179
179 bdi = sb->s_bdev->bd_inode_backing_dev_info; 180 bdi = sb->s_bdev->bd_inode_backing_dev_info;
180 if (!bdi) 181 if (!bdi)
181 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 182 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
182 mapping->backing_dev_info = bdi; 183 mapping->backing_dev_info = bdi;
183 }
184 inode->i_private = NULL;
185 inode->i_mapping = mapping;
186 } 184 }
185 inode->i_private = NULL;
186 inode->i_mapping = mapping;
187
187 return inode; 188 return inode;
188} 189}
190EXPORT_SYMBOL(inode_init_always);
191
192static struct inode *alloc_inode(struct super_block *sb)
193{
194 struct inode *inode;
195
196 if (sb->s_op->alloc_inode)
197 inode = sb->s_op->alloc_inode(sb);
198 else
199 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
200
201 if (inode)
202 return inode_init_always(sb, inode);
203 return NULL;
204}
189 205
190void destroy_inode(struct inode *inode) 206void destroy_inode(struct inode *inode)
191{ 207{
@@ -196,6 +212,7 @@ void destroy_inode(struct inode *inode)
196 else 212 else
197 kmem_cache_free(inode_cachep, (inode)); 213 kmem_cache_free(inode_cachep, (inode));
198} 214}
215EXPORT_SYMBOL(destroy_inode);
199 216
200 217
201/* 218/*
@@ -534,6 +551,49 @@ repeat:
534 return node ? inode : NULL; 551 return node ? inode : NULL;
535} 552}
536 553
554static unsigned long hash(struct super_block *sb, unsigned long hashval)
555{
556 unsigned long tmp;
557
558 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
559 L1_CACHE_BYTES;
560 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
561 return tmp & I_HASHMASK;
562}
563
564static inline void
565__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
566 struct inode *inode)
567{
568 inodes_stat.nr_inodes++;
569 list_add(&inode->i_list, &inode_in_use);
570 list_add(&inode->i_sb_list, &sb->s_inodes);
571 if (head)
572 hlist_add_head(&inode->i_hash, head);
573}
574
575/**
576 * inode_add_to_lists - add a new inode to relevant lists
577 * @sb - superblock inode belongs to.
578 * @inode - inode to mark in use
579 *
580 * When an inode is allocated it needs to be accounted for, added to the in use
581 * list, the owning superblock and the inode hash. This needs to be done under
582 * the inode_lock, so export a function to do this rather than the inode lock
583 * itself. We calculate the hash list to add to here so it is all internal
584 * which requires the caller to have already set up the inode number in the
585 * inode to add.
586 */
587void inode_add_to_lists(struct super_block *sb, struct inode *inode)
588{
589 struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
590
591 spin_lock(&inode_lock);
592 __inode_add_to_lists(sb, head, inode);
593 spin_unlock(&inode_lock);
594}
595EXPORT_SYMBOL_GPL(inode_add_to_lists);
596
537/** 597/**
538 * new_inode - obtain an inode 598 * new_inode - obtain an inode
539 * @sb: superblock 599 * @sb: superblock
@@ -561,9 +621,7 @@ struct inode *new_inode(struct super_block *sb)
561 inode = alloc_inode(sb); 621 inode = alloc_inode(sb);
562 if (inode) { 622 if (inode) {
563 spin_lock(&inode_lock); 623 spin_lock(&inode_lock);
564 inodes_stat.nr_inodes++; 624 __inode_add_to_lists(sb, NULL, inode);
565 list_add(&inode->i_list, &inode_in_use);
566 list_add(&inode->i_sb_list, &sb->s_inodes);
567 inode->i_ino = ++last_ino; 625 inode->i_ino = ++last_ino;
568 inode->i_state = 0; 626 inode->i_state = 0;
569 spin_unlock(&inode_lock); 627 spin_unlock(&inode_lock);
@@ -622,10 +680,7 @@ static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *h
622 if (set(inode, data)) 680 if (set(inode, data))
623 goto set_failed; 681 goto set_failed;
624 682
625 inodes_stat.nr_inodes++; 683 __inode_add_to_lists(sb, head, inode);
626 list_add(&inode->i_list, &inode_in_use);
627 list_add(&inode->i_sb_list, &sb->s_inodes);
628 hlist_add_head(&inode->i_hash, head);
629 inode->i_state = I_LOCK|I_NEW; 684 inode->i_state = I_LOCK|I_NEW;
630 spin_unlock(&inode_lock); 685 spin_unlock(&inode_lock);
631 686
@@ -671,10 +726,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
671 old = find_inode_fast(sb, head, ino); 726 old = find_inode_fast(sb, head, ino);
672 if (!old) { 727 if (!old) {
673 inode->i_ino = ino; 728 inode->i_ino = ino;
674 inodes_stat.nr_inodes++; 729 __inode_add_to_lists(sb, head, inode);
675 list_add(&inode->i_list, &inode_in_use);
676 list_add(&inode->i_sb_list, &sb->s_inodes);
677 hlist_add_head(&inode->i_hash, head);
678 inode->i_state = I_LOCK|I_NEW; 730 inode->i_state = I_LOCK|I_NEW;
679 spin_unlock(&inode_lock); 731 spin_unlock(&inode_lock);
680 732
@@ -698,16 +750,6 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
698 return inode; 750 return inode;
699} 751}
700 752
701static unsigned long hash(struct super_block *sb, unsigned long hashval)
702{
703 unsigned long tmp;
704
705 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
706 L1_CACHE_BYTES;
707 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
708 return tmp & I_HASHMASK;
709}
710
711/** 753/**
712 * iunique - get a unique inode number 754 * iunique - get a unique inode number
713 * @sb: superblock 755 * @sb: superblock
@@ -990,6 +1032,65 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
990 1032
991EXPORT_SYMBOL(iget_locked); 1033EXPORT_SYMBOL(iget_locked);
992 1034
1035int insert_inode_locked(struct inode *inode)
1036{
1037 struct super_block *sb = inode->i_sb;
1038 ino_t ino = inode->i_ino;
1039 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1040 struct inode *old;
1041
1042 inode->i_state |= I_LOCK|I_NEW;
1043 while (1) {
1044 spin_lock(&inode_lock);
1045 old = find_inode_fast(sb, head, ino);
1046 if (likely(!old)) {
1047 hlist_add_head(&inode->i_hash, head);
1048 spin_unlock(&inode_lock);
1049 return 0;
1050 }
1051 __iget(old);
1052 spin_unlock(&inode_lock);
1053 wait_on_inode(old);
1054 if (unlikely(!hlist_unhashed(&old->i_hash))) {
1055 iput(old);
1056 return -EBUSY;
1057 }
1058 iput(old);
1059 }
1060}
1061
1062EXPORT_SYMBOL(insert_inode_locked);
1063
1064int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1065 int (*test)(struct inode *, void *), void *data)
1066{
1067 struct super_block *sb = inode->i_sb;
1068 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1069 struct inode *old;
1070
1071 inode->i_state |= I_LOCK|I_NEW;
1072
1073 while (1) {
1074 spin_lock(&inode_lock);
1075 old = find_inode(sb, head, test, data);
1076 if (likely(!old)) {
1077 hlist_add_head(&inode->i_hash, head);
1078 spin_unlock(&inode_lock);
1079 return 0;
1080 }
1081 __iget(old);
1082 spin_unlock(&inode_lock);
1083 wait_on_inode(old);
1084 if (unlikely(!hlist_unhashed(&old->i_hash))) {
1085 iput(old);
1086 return -EBUSY;
1087 }
1088 iput(old);
1089 }
1090}
1091
1092EXPORT_SYMBOL(insert_inode_locked4);
1093
993/** 1094/**
994 * __insert_inode_hash - hash an inode 1095 * __insert_inode_hash - hash an inode
995 * @inode: unhashed inode 1096 * @inode: unhashed inode
@@ -1292,6 +1393,7 @@ int inode_wait(void *word)
1292 schedule(); 1393 schedule();
1293 return 0; 1394 return 0;
1294} 1395}
1396EXPORT_SYMBOL(inode_wait);
1295 1397
1296/* 1398/*
1297 * If we try to find an inode in the inode hash while it is being 1399 * If we try to find an inode in the inode hash while it is being
diff --git a/fs/internal.h b/fs/internal.h
index 80aa9a02337..53af885f173 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -10,6 +10,7 @@
10 */ 10 */
11 11
12struct super_block; 12struct super_block;
13struct linux_binprm;
13 14
14/* 15/*
15 * block_dev.c 16 * block_dev.c
@@ -40,6 +41,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
40extern void __init chrdev_init(void); 41extern void __init chrdev_init(void);
41 42
42/* 43/*
44 * exec.c
45 */
46extern void check_unsafe_exec(struct linux_binprm *);
47
48/*
43 * namespace.c 49 * namespace.c
44 */ 50 */
45extern int copy_mount_options(const void __user *, unsigned long *); 51extern int copy_mount_options(const void __user *, unsigned long *);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index da3cc460d4d..3569e0ad86a 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -31,10 +31,16 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
31{ 31{
32 int err; 32 int err;
33 struct io_context *ioc; 33 struct io_context *ioc;
34 const struct cred *cred = current_cred(), *tcred;
34 35
35 if (task->uid != current->euid && 36 rcu_read_lock();
36 task->uid != current->uid && !capable(CAP_SYS_NICE)) 37 tcred = __task_cred(task);
38 if (tcred->uid != cred->euid &&
39 tcred->uid != cred->uid && !capable(CAP_SYS_NICE)) {
40 rcu_read_unlock();
37 return -EPERM; 41 return -EPERM;
42 }
43 rcu_read_unlock();
38 44
39 err = security_task_setioprio(task, ioprio); 45 err = security_task_setioprio(task, ioprio);
40 if (err) 46 if (err)
@@ -123,7 +129,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
123 break; 129 break;
124 case IOPRIO_WHO_USER: 130 case IOPRIO_WHO_USER:
125 if (!who) 131 if (!who)
126 user = current->user; 132 user = current_user();
127 else 133 else
128 user = find_user(who); 134 user = find_user(who);
129 135
@@ -131,7 +137,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
131 break; 137 break;
132 138
133 do_each_thread(g, p) { 139 do_each_thread(g, p) {
134 if (p->uid != who) 140 if (__task_cred(p)->uid != who)
135 continue; 141 continue;
136 ret = set_task_ioprio(p, ioprio); 142 ret = set_task_ioprio(p, ioprio);
137 if (ret) 143 if (ret)
@@ -216,7 +222,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
216 break; 222 break;
217 case IOPRIO_WHO_USER: 223 case IOPRIO_WHO_USER:
218 if (!who) 224 if (!who)
219 user = current->user; 225 user = current_user();
220 else 226 else
221 user = find_user(who); 227 user = find_user(who);
222 228
@@ -224,7 +230,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
224 break; 230 break;
225 231
226 do_each_thread(g, p) { 232 do_each_thread(g, p) {
227 if (p->uid != user->uid) 233 if (__task_cred(p)->uid != user->uid)
228 continue; 234 continue;
229 tmpio = get_task_ioprio(p); 235 tmpio = get_task_ioprio(p);
230 if (tmpio < 0) 236 if (tmpio < 0)
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5a98aa87c85..5edc2bf2058 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -132,7 +132,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
132 uint32_t pageofs = index << PAGE_CACHE_SHIFT; 132 uint32_t pageofs = index << PAGE_CACHE_SHIFT;
133 int ret = 0; 133 int ret = 0;
134 134
135 pg = __grab_cache_page(mapping, index); 135 pg = grab_cache_page_write_begin(mapping, index, flags);
136 if (!pg) 136 if (!pg)
137 return -ENOMEM; 137 return -ENOMEM;
138 *pagep = pg; 138 *pagep = pg;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 210339784b5..b00ee9f05a0 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -59,8 +59,14 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
59 if (inode->i_size >= IDATASIZE) { 59 if (inode->i_size >= IDATASIZE) {
60 inode->i_op = &page_symlink_inode_operations; 60 inode->i_op = &page_symlink_inode_operations;
61 inode->i_mapping->a_ops = &jfs_aops; 61 inode->i_mapping->a_ops = &jfs_aops;
62 } else 62 } else {
63 inode->i_op = &jfs_symlink_inode_operations; 63 inode->i_op = &jfs_symlink_inode_operations;
64 /*
65 * The inline data should be null-terminated, but
66 * don't let on-disk corruption crash the kernel
67 */
68 JFS_IP(inode)->i_inline[inode->i_size] = '\0';
69 }
64 } else { 70 } else {
65 inode->i_op = &jfs_file_inode_operations; 71 inode->i_op = &jfs_file_inode_operations;
66 init_special_inode(inode, inode->i_mode, inode->i_rdev); 72 init_special_inode(inode, inode->i_mode, inode->i_rdev);
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index ed6574bee51..d4d142c2edd 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -79,7 +79,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
79 inode = new_inode(sb); 79 inode = new_inode(sb);
80 if (!inode) { 80 if (!inode) {
81 jfs_warn("ialloc: new_inode returned NULL!"); 81 jfs_warn("ialloc: new_inode returned NULL!");
82 return ERR_PTR(-ENOMEM); 82 rc = -ENOMEM;
83 goto fail;
83 } 84 }
84 85
85 jfs_inode = JFS_IP(inode); 86 jfs_inode = JFS_IP(inode);
@@ -89,17 +90,21 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
89 jfs_warn("ialloc: diAlloc returned %d!", rc); 90 jfs_warn("ialloc: diAlloc returned %d!", rc);
90 if (rc == -EIO) 91 if (rc == -EIO)
91 make_bad_inode(inode); 92 make_bad_inode(inode);
92 iput(inode); 93 goto fail_put;
93 return ERR_PTR(rc);
94 } 94 }
95 95
96 inode->i_uid = current->fsuid; 96 if (insert_inode_locked(inode) < 0) {
97 rc = -EINVAL;
98 goto fail_unlock;
99 }
100
101 inode->i_uid = current_fsuid();
97 if (parent->i_mode & S_ISGID) { 102 if (parent->i_mode & S_ISGID) {
98 inode->i_gid = parent->i_gid; 103 inode->i_gid = parent->i_gid;
99 if (S_ISDIR(mode)) 104 if (S_ISDIR(mode))
100 mode |= S_ISGID; 105 mode |= S_ISGID;
101 } else 106 } else
102 inode->i_gid = current->fsgid; 107 inode->i_gid = current_fsgid();
103 108
104 /* 109 /*
105 * New inodes need to save sane values on disk when 110 * New inodes need to save sane values on disk when
@@ -112,11 +117,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
112 * Allocate inode to quota. 117 * Allocate inode to quota.
113 */ 118 */
114 if (DQUOT_ALLOC_INODE(inode)) { 119 if (DQUOT_ALLOC_INODE(inode)) {
115 DQUOT_DROP(inode); 120 rc = -EDQUOT;
116 inode->i_flags |= S_NOQUOTA; 121 goto fail_drop;
117 inode->i_nlink = 0;
118 iput(inode);
119 return ERR_PTR(-EDQUOT);
120 } 122 }
121 123
122 inode->i_mode = mode; 124 inode->i_mode = mode;
@@ -158,4 +160,15 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
158 jfs_info("ialloc returns inode = 0x%p\n", inode); 160 jfs_info("ialloc returns inode = 0x%p\n", inode);
159 161
160 return inode; 162 return inode;
163
164fail_drop:
165 DQUOT_DROP(inode);
166 inode->i_flags |= S_NOQUOTA;
167fail_unlock:
168 inode->i_nlink = 0;
169 unlock_new_inode(inode);
170fail_put:
171 iput(inode);
172fail:
173 return ERR_PTR(rc);
161} 174}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index cc3cedffbfa..b4de56b851e 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -155,7 +155,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
155 ip->i_fop = &jfs_file_operations; 155 ip->i_fop = &jfs_file_operations;
156 ip->i_mapping->a_ops = &jfs_aops; 156 ip->i_mapping->a_ops = &jfs_aops;
157 157
158 insert_inode_hash(ip);
159 mark_inode_dirty(ip); 158 mark_inode_dirty(ip);
160 159
161 dip->i_ctime = dip->i_mtime = CURRENT_TIME; 160 dip->i_ctime = dip->i_mtime = CURRENT_TIME;
@@ -171,9 +170,12 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
171 if (rc) { 170 if (rc) {
172 free_ea_wmap(ip); 171 free_ea_wmap(ip);
173 ip->i_nlink = 0; 172 ip->i_nlink = 0;
173 unlock_new_inode(ip);
174 iput(ip); 174 iput(ip);
175 } else 175 } else {
176 d_instantiate(dentry, ip); 176 d_instantiate(dentry, ip);
177 unlock_new_inode(ip);
178 }
177 179
178 out2: 180 out2:
179 free_UCSname(&dname); 181 free_UCSname(&dname);
@@ -289,7 +291,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
289 ip->i_op = &jfs_dir_inode_operations; 291 ip->i_op = &jfs_dir_inode_operations;
290 ip->i_fop = &jfs_dir_operations; 292 ip->i_fop = &jfs_dir_operations;
291 293
292 insert_inode_hash(ip);
293 mark_inode_dirty(ip); 294 mark_inode_dirty(ip);
294 295
295 /* update parent directory inode */ 296 /* update parent directory inode */
@@ -306,9 +307,12 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
306 if (rc) { 307 if (rc) {
307 free_ea_wmap(ip); 308 free_ea_wmap(ip);
308 ip->i_nlink = 0; 309 ip->i_nlink = 0;
310 unlock_new_inode(ip);
309 iput(ip); 311 iput(ip);
310 } else 312 } else {
311 d_instantiate(dentry, ip); 313 d_instantiate(dentry, ip);
314 unlock_new_inode(ip);
315 }
312 316
313 out2: 317 out2:
314 free_UCSname(&dname); 318 free_UCSname(&dname);
@@ -1019,7 +1023,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1019 goto out3; 1023 goto out3;
1020 } 1024 }
1021 1025
1022 insert_inode_hash(ip);
1023 mark_inode_dirty(ip); 1026 mark_inode_dirty(ip);
1024 1027
1025 dip->i_ctime = dip->i_mtime = CURRENT_TIME; 1028 dip->i_ctime = dip->i_mtime = CURRENT_TIME;
@@ -1039,9 +1042,12 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1039 if (rc) { 1042 if (rc) {
1040 free_ea_wmap(ip); 1043 free_ea_wmap(ip);
1041 ip->i_nlink = 0; 1044 ip->i_nlink = 0;
1045 unlock_new_inode(ip);
1042 iput(ip); 1046 iput(ip);
1043 } else 1047 } else {
1044 d_instantiate(dentry, ip); 1048 d_instantiate(dentry, ip);
1049 unlock_new_inode(ip);
1050 }
1045 1051
1046 out2: 1052 out2:
1047 free_UCSname(&dname); 1053 free_UCSname(&dname);
@@ -1399,7 +1405,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1399 jfs_ip->dev = new_encode_dev(rdev); 1405 jfs_ip->dev = new_encode_dev(rdev);
1400 init_special_inode(ip, ip->i_mode, rdev); 1406 init_special_inode(ip, ip->i_mode, rdev);
1401 1407
1402 insert_inode_hash(ip);
1403 mark_inode_dirty(ip); 1408 mark_inode_dirty(ip);
1404 1409
1405 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 1410 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
@@ -1417,9 +1422,12 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1417 if (rc) { 1422 if (rc) {
1418 free_ea_wmap(ip); 1423 free_ea_wmap(ip);
1419 ip->i_nlink = 0; 1424 ip->i_nlink = 0;
1425 unlock_new_inode(ip);
1420 iput(ip); 1426 iput(ip);
1421 } else 1427 } else {
1422 d_instantiate(dentry, ip); 1428 d_instantiate(dentry, ip);
1429 unlock_new_inode(ip);
1430 }
1423 1431
1424 out1: 1432 out1:
1425 free_UCSname(&dname); 1433 free_UCSname(&dname);
diff --git a/fs/libfs.c b/fs/libfs.c
index e960a832190..bdaec17fa38 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -360,7 +360,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
360 index = pos >> PAGE_CACHE_SHIFT; 360 index = pos >> PAGE_CACHE_SHIFT;
361 from = pos & (PAGE_CACHE_SIZE - 1); 361 from = pos & (PAGE_CACHE_SIZE - 1);
362 362
363 page = __grab_cache_page(mapping, index); 363 page = grab_cache_page_write_begin(mapping, index, flags);
364 if (!page) 364 if (!page)
365 return -ENOMEM; 365 return -ENOMEM;
366 366
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 8307dd64bf4..1f3b0fc0d35 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -14,6 +14,7 @@
14#include <linux/sunrpc/svc.h> 14#include <linux/sunrpc/svc.h>
15#include <linux/lockd/lockd.h> 15#include <linux/lockd/lockd.h>
16#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
17#include <linux/kthread.h>
17 18
18#define NLMDBG_FACILITY NLMDBG_CLIENT 19#define NLMDBG_FACILITY NLMDBG_CLIENT
19 20
@@ -60,7 +61,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
60 61
61 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, 62 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
62 nlm_init->protocol, nlm_version, 63 nlm_init->protocol, nlm_version,
63 nlm_init->hostname); 64 nlm_init->hostname, nlm_init->noresvport);
64 if (host == NULL) { 65 if (host == NULL) {
65 lockd_down(); 66 lockd_down();
66 return ERR_PTR(-ENOLCK); 67 return ERR_PTR(-ENOLCK);
@@ -191,11 +192,15 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
191void 192void
192nlmclnt_recovery(struct nlm_host *host) 193nlmclnt_recovery(struct nlm_host *host)
193{ 194{
195 struct task_struct *task;
196
194 if (!host->h_reclaiming++) { 197 if (!host->h_reclaiming++) {
195 nlm_get_host(host); 198 nlm_get_host(host);
196 __module_get(THIS_MODULE); 199 task = kthread_run(reclaimer, host, "%s-reclaim", host->h_name);
197 if (kernel_thread(reclaimer, host, CLONE_FS | CLONE_FILES) < 0) 200 if (IS_ERR(task))
198 module_put(THIS_MODULE); 201 printk(KERN_ERR "lockd: unable to spawn reclaimer "
202 "thread. Locks for %s won't be reclaimed! "
203 "(%ld)\n", host->h_name, PTR_ERR(task));
199 } 204 }
200} 205}
201 206
@@ -207,7 +212,6 @@ reclaimer(void *ptr)
207 struct file_lock *fl, *next; 212 struct file_lock *fl, *next;
208 u32 nsmstate; 213 u32 nsmstate;
209 214
210 daemonize("%s-reclaim", host->h_name);
211 allow_signal(SIGKILL); 215 allow_signal(SIGKILL);
212 216
213 down_write(&host->h_rwsem); 217 down_write(&host->h_rwsem);
@@ -233,7 +237,12 @@ restart:
233 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { 237 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
234 list_del_init(&fl->fl_u.nfs_fl.list); 238 list_del_init(&fl->fl_u.nfs_fl.list);
235 239
236 /* Why are we leaking memory here? --okir */ 240 /*
241 * sending this thread a SIGKILL will result in any unreclaimed
242 * locks being removed from the h_granted list. This means that
243 * the kernel will not attempt to reclaim them again if a new
244 * reclaimer thread is spawned for this host.
245 */
237 if (signalled()) 246 if (signalled())
238 continue; 247 continue;
239 if (nlmclnt_reclaim(host, fl) != 0) 248 if (nlmclnt_reclaim(host, fl) != 0)
@@ -261,5 +270,5 @@ restart:
261 nlm_release_host(host); 270 nlm_release_host(host);
262 lockd_down(); 271 lockd_down();
263 unlock_kernel(); 272 unlock_kernel();
264 module_put_and_exit(0); 273 return 0;
265} 274}
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 70fc63a1727..abdebf76b82 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -48,6 +48,7 @@ struct nlm_lookup_host_info {
48 const size_t hostname_len; /* it's length */ 48 const size_t hostname_len; /* it's length */
49 const struct sockaddr *src_sap; /* our address (optional) */ 49 const struct sockaddr *src_sap; /* our address (optional) */
50 const size_t src_len; /* it's length */ 50 const size_t src_len; /* it's length */
51 const int noresvport; /* use non-priv port */
51}; 52};
52 53
53/* 54/*
@@ -115,14 +116,14 @@ static void nlm_display_address(const struct sockaddr *sap,
115 snprintf(buf, len, "unspecified"); 116 snprintf(buf, len, "unspecified");
116 break; 117 break;
117 case AF_INET: 118 case AF_INET:
118 snprintf(buf, len, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); 119 snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr);
119 break; 120 break;
120 case AF_INET6: 121 case AF_INET6:
121 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 122 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
122 snprintf(buf, len, NIPQUAD_FMT, 123 snprintf(buf, len, "%pI4",
123 NIPQUAD(sin6->sin6_addr.s6_addr32[3])); 124 &sin6->sin6_addr.s6_addr32[3]);
124 else 125 else
125 snprintf(buf, len, NIP6_FMT, NIP6(sin6->sin6_addr)); 126 snprintf(buf, len, "%pI6", &sin6->sin6_addr);
126 break; 127 break;
127 default: 128 default:
128 snprintf(buf, len, "unsupported address family"); 129 snprintf(buf, len, "unsupported address family");
@@ -222,6 +223,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
222 host->h_nsmstate = 0; /* real NSM state */ 223 host->h_nsmstate = 0; /* real NSM state */
223 host->h_nsmhandle = nsm; 224 host->h_nsmhandle = nsm;
224 host->h_server = ni->server; 225 host->h_server = ni->server;
226 host->h_noresvport = ni->noresvport;
225 hlist_add_head(&host->h_hash, chain); 227 hlist_add_head(&host->h_hash, chain);
226 INIT_LIST_HEAD(&host->h_lockowners); 228 INIT_LIST_HEAD(&host->h_lockowners);
227 spin_lock_init(&host->h_lock); 229 spin_lock_init(&host->h_lock);
@@ -272,6 +274,7 @@ nlm_destroy_host(struct nlm_host *host)
272 * @protocol: transport protocol to use 274 * @protocol: transport protocol to use
273 * @version: NLM protocol version 275 * @version: NLM protocol version
274 * @hostname: '\0'-terminated hostname of server 276 * @hostname: '\0'-terminated hostname of server
277 * @noresvport: 1 if non-privileged port should be used
275 * 278 *
276 * Returns an nlm_host structure that matches the passed-in 279 * Returns an nlm_host structure that matches the passed-in
277 * [server address, transport protocol, NLM version, server hostname]. 280 * [server address, transport protocol, NLM version, server hostname].
@@ -281,7 +284,9 @@ nlm_destroy_host(struct nlm_host *host)
281struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, 284struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
282 const size_t salen, 285 const size_t salen,
283 const unsigned short protocol, 286 const unsigned short protocol,
284 const u32 version, const char *hostname) 287 const u32 version,
288 const char *hostname,
289 int noresvport)
285{ 290{
286 const struct sockaddr source = { 291 const struct sockaddr source = {
287 .sa_family = AF_UNSPEC, 292 .sa_family = AF_UNSPEC,
@@ -296,6 +301,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
296 .hostname_len = strlen(hostname), 301 .hostname_len = strlen(hostname),
297 .src_sap = &source, 302 .src_sap = &source,
298 .src_len = sizeof(source), 303 .src_len = sizeof(source),
304 .noresvport = noresvport,
299 }; 305 };
300 306
301 dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__, 307 dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
@@ -417,6 +423,8 @@ nlm_bind_host(struct nlm_host *host)
417 */ 423 */
418 if (!host->h_server) 424 if (!host->h_server)
419 args.flags |= RPC_CLNT_CREATE_HARDRTRY; 425 args.flags |= RPC_CLNT_CREATE_HARDRTRY;
426 if (host->h_noresvport)
427 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
420 428
421 clnt = rpc_create(&args); 429 clnt = rpc_create(&args);
422 if (!IS_ERR(clnt)) 430 if (!IS_ERR(clnt))
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 4e7e958e8f6..ffd3461f75e 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -179,7 +179,7 @@ static __be32 *xdr_encode_mon_name(__be32 *p, struct nsm_args *argp)
179 179
180 if (!nsm_use_hostnames) { 180 if (!nsm_use_hostnames) {
181 snprintf(buffer, XDR_ADDRBUF_LEN, 181 snprintf(buffer, XDR_ADDRBUF_LEN,
182 NIPQUAD_FMT, NIPQUAD(argp->addr)); 182 "%pI4", &argp->addr);
183 name = buffer; 183 name = buffer;
184 } 184 }
185 185
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 56b076736b5..252d80163d0 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -45,7 +45,7 @@
45static struct svc_program nlmsvc_program; 45static struct svc_program nlmsvc_program;
46 46
47struct nlmsvc_binding * nlmsvc_ops; 47struct nlmsvc_binding * nlmsvc_ops;
48EXPORT_SYMBOL(nlmsvc_ops); 48EXPORT_SYMBOL_GPL(nlmsvc_ops);
49 49
50static DEFINE_MUTEX(nlmsvc_mutex); 50static DEFINE_MUTEX(nlmsvc_mutex);
51static unsigned int nlmsvc_users; 51static unsigned int nlmsvc_users;
@@ -300,7 +300,7 @@ out:
300 mutex_unlock(&nlmsvc_mutex); 300 mutex_unlock(&nlmsvc_mutex);
301 return error; 301 return error;
302} 302}
303EXPORT_SYMBOL(lockd_up); 303EXPORT_SYMBOL_GPL(lockd_up);
304 304
305/* 305/*
306 * Decrement the user count and bring down lockd if we're the last. 306 * Decrement the user count and bring down lockd if we're the last.
@@ -329,7 +329,7 @@ lockd_down(void)
329out: 329out:
330 mutex_unlock(&nlmsvc_mutex); 330 mutex_unlock(&nlmsvc_mutex);
331} 331}
332EXPORT_SYMBOL(lockd_down); 332EXPORT_SYMBOL_GPL(lockd_down);
333 333
334#ifdef CONFIG_SYSCTL 334#ifdef CONFIG_SYSCTL
335 335
diff --git a/fs/locks.c b/fs/locks.c
index 09062e3ff10..46a2e12f7d4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1349,7 +1349,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1349 struct inode *inode = dentry->d_inode; 1349 struct inode *inode = dentry->d_inode;
1350 int error, rdlease_count = 0, wrlease_count = 0; 1350 int error, rdlease_count = 0, wrlease_count = 0;
1351 1351
1352 if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE)) 1352 if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE))
1353 return -EACCES; 1353 return -EACCES;
1354 if (!S_ISREG(inode->i_mode)) 1354 if (!S_ISREG(inode->i_mode))
1355 return -EINVAL; 1355 return -EINVAL;
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 703cc35e04b..3aebe322271 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -262,8 +262,8 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
262 iput(inode); 262 iput(inode);
263 return NULL; 263 return NULL;
264 } 264 }
265 inode->i_uid = current->fsuid; 265 inode->i_uid = current_fsuid();
266 inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; 266 inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current_fsgid();
267 inode->i_ino = j; 267 inode->i_ino = j;
268 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 268 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
269 inode->i_blocks = 0; 269 inode->i_blocks = 0;
diff --git a/fs/namei.c b/fs/namei.c
index d34e0f9681c..df2d3df4f04 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -186,7 +186,7 @@ int generic_permission(struct inode *inode, int mask,
186 186
187 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 187 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
188 188
189 if (current->fsuid == inode->i_uid) 189 if (current_fsuid() == inode->i_uid)
190 mode >>= 6; 190 mode >>= 6;
191 else { 191 else {
192 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 192 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
@@ -226,6 +226,16 @@ int generic_permission(struct inode *inode, int mask,
226 return -EACCES; 226 return -EACCES;
227} 227}
228 228
229/**
230 * inode_permission - check for access rights to a given inode
231 * @inode: inode to check permission on
232 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
233 *
234 * Used to check for read/write/execute permissions on an inode.
235 * We use "fsuid" for this, letting us set arbitrary permissions
236 * for filesystem access without changing the "normal" uids which
237 * are used for other things.
238 */
229int inode_permission(struct inode *inode, int mask) 239int inode_permission(struct inode *inode, int mask)
230{ 240{
231 int retval; 241 int retval;
@@ -247,7 +257,6 @@ int inode_permission(struct inode *inode, int mask)
247 return -EACCES; 257 return -EACCES;
248 } 258 }
249 259
250 /* Ordinary permission routines do not understand MAY_APPEND. */
251 if (inode->i_op && inode->i_op->permission) 260 if (inode->i_op && inode->i_op->permission)
252 retval = inode->i_op->permission(inode, mask); 261 retval = inode->i_op->permission(inode, mask);
253 else 262 else
@@ -265,21 +274,6 @@ int inode_permission(struct inode *inode, int mask)
265} 274}
266 275
267/** 276/**
268 * vfs_permission - check for access rights to a given path
269 * @nd: lookup result that describes the path
270 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
271 *
272 * Used to check for read/write/execute permissions on a path.
273 * We use "fsuid" for this, letting us set arbitrary permissions
274 * for filesystem access without changing the "normal" uids which
275 * are used for other things.
276 */
277int vfs_permission(struct nameidata *nd, int mask)
278{
279 return inode_permission(nd->path.dentry->d_inode, mask);
280}
281
282/**
283 * file_permission - check for additional access rights to a given file 277 * file_permission - check for additional access rights to a given file
284 * @file: file to check access rights for 278 * @file: file to check access rights for
285 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 279 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
@@ -289,7 +283,7 @@ int vfs_permission(struct nameidata *nd, int mask)
289 * 283 *
290 * Note: 284 * Note:
291 * Do not use this function in new code. All access checks should 285 * Do not use this function in new code. All access checks should
292 * be done using vfs_permission(). 286 * be done using inode_permission().
293 */ 287 */
294int file_permission(struct file *file, int mask) 288int file_permission(struct file *file, int mask)
295{ 289{
@@ -441,7 +435,7 @@ static int exec_permission_lite(struct inode *inode)
441 if (inode->i_op && inode->i_op->permission) 435 if (inode->i_op && inode->i_op->permission)
442 return -EAGAIN; 436 return -EAGAIN;
443 437
444 if (current->fsuid == inode->i_uid) 438 if (current_fsuid() == inode->i_uid)
445 mode >>= 6; 439 mode >>= 6;
446 else if (in_group_p(inode->i_gid)) 440 else if (in_group_p(inode->i_gid))
447 mode >>= 3; 441 mode >>= 3;
@@ -527,18 +521,6 @@ out_unlock:
527 return result; 521 return result;
528} 522}
529 523
530/* SMP-safe */
531static __always_inline void
532walk_init_root(const char *name, struct nameidata *nd)
533{
534 struct fs_struct *fs = current->fs;
535
536 read_lock(&fs->lock);
537 nd->path = fs->root;
538 path_get(&fs->root);
539 read_unlock(&fs->lock);
540}
541
542/* 524/*
543 * Wrapper to retry pathname resolution whenever the underlying 525 * Wrapper to retry pathname resolution whenever the underlying
544 * file system returns an ESTALE. 526 * file system returns an ESTALE.
@@ -576,9 +558,16 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
576 goto fail; 558 goto fail;
577 559
578 if (*link == '/') { 560 if (*link == '/') {
561 struct fs_struct *fs = current->fs;
562
579 path_put(&nd->path); 563 path_put(&nd->path);
580 walk_init_root(link, nd); 564
565 read_lock(&fs->lock);
566 nd->path = fs->root;
567 path_get(&fs->root);
568 read_unlock(&fs->lock);
581 } 569 }
570
582 res = link_path_walk(link, nd); 571 res = link_path_walk(link, nd);
583 if (nd->depth || res || nd->last_type!=LAST_NORM) 572 if (nd->depth || res || nd->last_type!=LAST_NORM)
584 return res; 573 return res;
@@ -859,7 +848,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
859 nd->flags |= LOOKUP_CONTINUE; 848 nd->flags |= LOOKUP_CONTINUE;
860 err = exec_permission_lite(inode); 849 err = exec_permission_lite(inode);
861 if (err == -EAGAIN) 850 if (err == -EAGAIN)
862 err = vfs_permission(nd, MAY_EXEC); 851 err = inode_permission(nd->path.dentry->d_inode,
852 MAY_EXEC);
863 if (err) 853 if (err)
864 break; 854 break;
865 855
@@ -1334,11 +1324,13 @@ static int user_path_parent(int dfd, const char __user *path,
1334 */ 1324 */
1335static inline int check_sticky(struct inode *dir, struct inode *inode) 1325static inline int check_sticky(struct inode *dir, struct inode *inode)
1336{ 1326{
1327 uid_t fsuid = current_fsuid();
1328
1337 if (!(dir->i_mode & S_ISVTX)) 1329 if (!(dir->i_mode & S_ISVTX))
1338 return 0; 1330 return 0;
1339 if (inode->i_uid == current->fsuid) 1331 if (inode->i_uid == fsuid)
1340 return 0; 1332 return 0;
1341 if (dir->i_uid == current->fsuid) 1333 if (dir->i_uid == fsuid)
1342 return 0; 1334 return 0;
1343 return !capable(CAP_FOWNER); 1335 return !capable(CAP_FOWNER);
1344} 1336}
@@ -1491,9 +1483,9 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1491 return error; 1483 return error;
1492} 1484}
1493 1485
1494int may_open(struct nameidata *nd, int acc_mode, int flag) 1486int may_open(struct path *path, int acc_mode, int flag)
1495{ 1487{
1496 struct dentry *dentry = nd->path.dentry; 1488 struct dentry *dentry = path->dentry;
1497 struct inode *inode = dentry->d_inode; 1489 struct inode *inode = dentry->d_inode;
1498 int error; 1490 int error;
1499 1491
@@ -1514,13 +1506,13 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1514 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 1506 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1515 flag &= ~O_TRUNC; 1507 flag &= ~O_TRUNC;
1516 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 1508 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1517 if (nd->path.mnt->mnt_flags & MNT_NODEV) 1509 if (path->mnt->mnt_flags & MNT_NODEV)
1518 return -EACCES; 1510 return -EACCES;
1519 1511
1520 flag &= ~O_TRUNC; 1512 flag &= ~O_TRUNC;
1521 } 1513 }
1522 1514
1523 error = vfs_permission(nd, acc_mode); 1515 error = inode_permission(inode, acc_mode);
1524 if (error) 1516 if (error)
1525 return error; 1517 return error;
1526 /* 1518 /*
@@ -1554,6 +1546,9 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1554 * Refuse to truncate files with mandatory locks held on them. 1546 * Refuse to truncate files with mandatory locks held on them.
1555 */ 1547 */
1556 error = locks_verify_locked(inode); 1548 error = locks_verify_locked(inode);
1549 if (!error)
1550 error = security_path_truncate(path, 0,
1551 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1557 if (!error) { 1552 if (!error) {
1558 DQUOT_INIT(inode); 1553 DQUOT_INIT(inode);
1559 1554
@@ -1584,14 +1579,18 @@ static int __open_namei_create(struct nameidata *nd, struct path *path,
1584 1579
1585 if (!IS_POSIXACL(dir->d_inode)) 1580 if (!IS_POSIXACL(dir->d_inode))
1586 mode &= ~current->fs->umask; 1581 mode &= ~current->fs->umask;
1582 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
1583 if (error)
1584 goto out_unlock;
1587 error = vfs_create(dir->d_inode, path->dentry, mode, nd); 1585 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
1586out_unlock:
1588 mutex_unlock(&dir->d_inode->i_mutex); 1587 mutex_unlock(&dir->d_inode->i_mutex);
1589 dput(nd->path.dentry); 1588 dput(nd->path.dentry);
1590 nd->path.dentry = path->dentry; 1589 nd->path.dentry = path->dentry;
1591 if (error) 1590 if (error)
1592 return error; 1591 return error;
1593 /* Don't check for write permission, don't truncate */ 1592 /* Don't check for write permission, don't truncate */
1594 return may_open(nd, 0, flag & ~O_TRUNC); 1593 return may_open(&nd->path, 0, flag & ~O_TRUNC);
1595} 1594}
1596 1595
1597/* 1596/*
@@ -1777,7 +1776,7 @@ ok:
1777 if (error) 1776 if (error)
1778 goto exit; 1777 goto exit;
1779 } 1778 }
1780 error = may_open(&nd, acc_mode, flag); 1779 error = may_open(&nd.path, acc_mode, flag);
1781 if (error) { 1780 if (error) {
1782 if (will_write) 1781 if (will_write)
1783 mnt_drop_write(nd.path.mnt); 1782 mnt_drop_write(nd.path.mnt);
@@ -1997,6 +1996,9 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1997 error = mnt_want_write(nd.path.mnt); 1996 error = mnt_want_write(nd.path.mnt);
1998 if (error) 1997 if (error)
1999 goto out_dput; 1998 goto out_dput;
1999 error = security_path_mknod(&nd.path, dentry, mode, dev);
2000 if (error)
2001 goto out_drop_write;
2000 switch (mode & S_IFMT) { 2002 switch (mode & S_IFMT) {
2001 case 0: case S_IFREG: 2003 case 0: case S_IFREG:
2002 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); 2004 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
@@ -2009,6 +2011,7 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
2009 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); 2011 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
2010 break; 2012 break;
2011 } 2013 }
2014out_drop_write:
2012 mnt_drop_write(nd.path.mnt); 2015 mnt_drop_write(nd.path.mnt);
2013out_dput: 2016out_dput:
2014 dput(dentry); 2017 dput(dentry);
@@ -2068,7 +2071,11 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
2068 error = mnt_want_write(nd.path.mnt); 2071 error = mnt_want_write(nd.path.mnt);
2069 if (error) 2072 if (error)
2070 goto out_dput; 2073 goto out_dput;
2074 error = security_path_mkdir(&nd.path, dentry, mode);
2075 if (error)
2076 goto out_drop_write;
2071 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 2077 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
2078out_drop_write:
2072 mnt_drop_write(nd.path.mnt); 2079 mnt_drop_write(nd.path.mnt);
2073out_dput: 2080out_dput:
2074 dput(dentry); 2081 dput(dentry);
@@ -2178,7 +2185,11 @@ static long do_rmdir(int dfd, const char __user *pathname)
2178 error = mnt_want_write(nd.path.mnt); 2185 error = mnt_want_write(nd.path.mnt);
2179 if (error) 2186 if (error)
2180 goto exit3; 2187 goto exit3;
2188 error = security_path_rmdir(&nd.path, dentry);
2189 if (error)
2190 goto exit4;
2181 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 2191 error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
2192exit4:
2182 mnt_drop_write(nd.path.mnt); 2193 mnt_drop_write(nd.path.mnt);
2183exit3: 2194exit3:
2184 dput(dentry); 2195 dput(dentry);
@@ -2263,7 +2274,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2263 error = mnt_want_write(nd.path.mnt); 2274 error = mnt_want_write(nd.path.mnt);
2264 if (error) 2275 if (error)
2265 goto exit2; 2276 goto exit2;
2277 error = security_path_unlink(&nd.path, dentry);
2278 if (error)
2279 goto exit3;
2266 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 2280 error = vfs_unlink(nd.path.dentry->d_inode, dentry);
2281exit3:
2267 mnt_drop_write(nd.path.mnt); 2282 mnt_drop_write(nd.path.mnt);
2268 exit2: 2283 exit2:
2269 dput(dentry); 2284 dput(dentry);
@@ -2344,7 +2359,11 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
2344 error = mnt_want_write(nd.path.mnt); 2359 error = mnt_want_write(nd.path.mnt);
2345 if (error) 2360 if (error)
2346 goto out_dput; 2361 goto out_dput;
2362 error = security_path_symlink(&nd.path, dentry, from);
2363 if (error)
2364 goto out_drop_write;
2347 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); 2365 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
2366out_drop_write:
2348 mnt_drop_write(nd.path.mnt); 2367 mnt_drop_write(nd.path.mnt);
2349out_dput: 2368out_dput:
2350 dput(dentry); 2369 dput(dentry);
@@ -2441,7 +2460,11 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2441 error = mnt_want_write(nd.path.mnt); 2460 error = mnt_want_write(nd.path.mnt);
2442 if (error) 2461 if (error)
2443 goto out_dput; 2462 goto out_dput;
2463 error = security_path_link(old_path.dentry, &nd.path, new_dentry);
2464 if (error)
2465 goto out_drop_write;
2444 error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); 2466 error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
2467out_drop_write:
2445 mnt_drop_write(nd.path.mnt); 2468 mnt_drop_write(nd.path.mnt);
2446out_dput: 2469out_dput:
2447 dput(new_dentry); 2470 dput(new_dentry);
@@ -2677,8 +2700,13 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
2677 error = mnt_want_write(oldnd.path.mnt); 2700 error = mnt_want_write(oldnd.path.mnt);
2678 if (error) 2701 if (error)
2679 goto exit5; 2702 goto exit5;
2703 error = security_path_rename(&oldnd.path, old_dentry,
2704 &newnd.path, new_dentry);
2705 if (error)
2706 goto exit6;
2680 error = vfs_rename(old_dir->d_inode, old_dentry, 2707 error = vfs_rename(old_dir->d_inode, old_dentry,
2681 new_dir->d_inode, new_dentry); 2708 new_dir->d_inode, new_dentry);
2709exit6:
2682 mnt_drop_write(oldnd.path.mnt); 2710 mnt_drop_write(oldnd.path.mnt);
2683exit5: 2711exit5:
2684 dput(new_dentry); 2712 dput(new_dentry);
@@ -2748,13 +2776,16 @@ int vfs_follow_link(struct nameidata *nd, const char *link)
2748/* get the link contents into pagecache */ 2776/* get the link contents into pagecache */
2749static char *page_getlink(struct dentry * dentry, struct page **ppage) 2777static char *page_getlink(struct dentry * dentry, struct page **ppage)
2750{ 2778{
2751 struct page * page; 2779 char *kaddr;
2780 struct page *page;
2752 struct address_space *mapping = dentry->d_inode->i_mapping; 2781 struct address_space *mapping = dentry->d_inode->i_mapping;
2753 page = read_mapping_page(mapping, 0, NULL); 2782 page = read_mapping_page(mapping, 0, NULL);
2754 if (IS_ERR(page)) 2783 if (IS_ERR(page))
2755 return (char*)page; 2784 return (char*)page;
2756 *ppage = page; 2785 *ppage = page;
2757 return kmap(page); 2786 kaddr = kmap(page);
2787 nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1);
2788 return kaddr;
2758} 2789}
2759 2790
2760int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2791int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
@@ -2786,18 +2817,23 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
2786 } 2817 }
2787} 2818}
2788 2819
2789int __page_symlink(struct inode *inode, const char *symname, int len, 2820/*
2790 gfp_t gfp_mask) 2821 * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
2822 */
2823int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
2791{ 2824{
2792 struct address_space *mapping = inode->i_mapping; 2825 struct address_space *mapping = inode->i_mapping;
2793 struct page *page; 2826 struct page *page;
2794 void *fsdata; 2827 void *fsdata;
2795 int err; 2828 int err;
2796 char *kaddr; 2829 char *kaddr;
2830 unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
2831 if (nofs)
2832 flags |= AOP_FLAG_NOFS;
2797 2833
2798retry: 2834retry:
2799 err = pagecache_write_begin(NULL, mapping, 0, len-1, 2835 err = pagecache_write_begin(NULL, mapping, 0, len-1,
2800 AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); 2836 flags, &page, &fsdata);
2801 if (err) 2837 if (err)
2802 goto fail; 2838 goto fail;
2803 2839
@@ -2821,7 +2857,7 @@ fail:
2821int page_symlink(struct inode *inode, const char *symname, int len) 2857int page_symlink(struct inode *inode, const char *symname, int len)
2822{ 2858{
2823 return __page_symlink(inode, symname, len, 2859 return __page_symlink(inode, symname, len,
2824 mapping_gfp_mask(inode->i_mapping)); 2860 !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
2825} 2861}
2826 2862
2827const struct inode_operations page_symlink_inode_operations = { 2863const struct inode_operations page_symlink_inode_operations = {
@@ -2847,7 +2883,6 @@ EXPORT_SYMBOL(path_lookup);
2847EXPORT_SYMBOL(kern_path); 2883EXPORT_SYMBOL(kern_path);
2848EXPORT_SYMBOL(vfs_path_lookup); 2884EXPORT_SYMBOL(vfs_path_lookup);
2849EXPORT_SYMBOL(inode_permission); 2885EXPORT_SYMBOL(inode_permission);
2850EXPORT_SYMBOL(vfs_permission);
2851EXPORT_SYMBOL(file_permission); 2886EXPORT_SYMBOL(file_permission);
2852EXPORT_SYMBOL(unlock_rename); 2887EXPORT_SYMBOL(unlock_rename);
2853EXPORT_SYMBOL(vfs_create); 2888EXPORT_SYMBOL(vfs_create);
@@ -2863,3 +2898,10 @@ EXPORT_SYMBOL(vfs_symlink);
2863EXPORT_SYMBOL(vfs_unlink); 2898EXPORT_SYMBOL(vfs_unlink);
2864EXPORT_SYMBOL(dentry_unhash); 2899EXPORT_SYMBOL(dentry_unhash);
2865EXPORT_SYMBOL(generic_readlink); 2900EXPORT_SYMBOL(generic_readlink);
2901
2902/* to be mentioned only in INIT_TASK */
2903struct fs_struct init_fs = {
2904 .count = ATOMIC_INIT(1),
2905 .lock = __RW_LOCK_UNLOCKED(init_fs.lock),
2906 .umask = 0022,
2907};
diff --git a/fs/namespace.c b/fs/namespace.c
index 65b3dc844c8..a40685d800a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1176,7 +1176,7 @@ static int mount_is_safe(struct path *path)
1176 if (S_ISLNK(path->dentry->d_inode->i_mode)) 1176 if (S_ISLNK(path->dentry->d_inode->i_mode))
1177 return -EPERM; 1177 return -EPERM;
1178 if (path->dentry->d_inode->i_mode & S_ISVTX) { 1178 if (path->dentry->d_inode->i_mode & S_ISVTX) {
1179 if (current->uid != path->dentry->d_inode->i_uid) 1179 if (current_uid() != path->dentry->d_inode->i_uid)
1180 return -EPERM; 1180 return -EPERM;
1181 } 1181 }
1182 if (inode_permission(path->dentry->d_inode, MAY_WRITE)) 1182 if (inode_permission(path->dentry->d_inode, MAY_WRITE))
@@ -1990,7 +1990,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1990 if (!new_ns->root) { 1990 if (!new_ns->root) {
1991 up_write(&namespace_sem); 1991 up_write(&namespace_sem);
1992 kfree(new_ns); 1992 kfree(new_ns);
1993 return ERR_PTR(-ENOMEM);; 1993 return ERR_PTR(-ENOMEM);
1994 } 1994 }
1995 spin_lock(&vfsmount_lock); 1995 spin_lock(&vfsmount_lock);
1996 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 1996 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 3a97c95e1ca..6d04e050c74 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -40,10 +40,10 @@ ncp_get_fs_info(struct ncp_server * server, struct file *file,
40 struct inode *inode = file->f_path.dentry->d_inode; 40 struct inode *inode = file->f_path.dentry->d_inode;
41 struct ncp_fs_info info; 41 struct ncp_fs_info info;
42 42
43 if ((file_permission(file, MAY_WRITE) != 0) 43 if (file_permission(file, MAY_WRITE) != 0
44 && (current->uid != server->m.mounted_uid)) { 44 && current_uid() != server->m.mounted_uid)
45 return -EACCES; 45 return -EACCES;
46 } 46
47 if (copy_from_user(&info, arg, sizeof(info))) 47 if (copy_from_user(&info, arg, sizeof(info)))
48 return -EFAULT; 48 return -EFAULT;
49 49
@@ -70,10 +70,10 @@ ncp_get_fs_info_v2(struct ncp_server * server, struct file *file,
70 struct inode *inode = file->f_path.dentry->d_inode; 70 struct inode *inode = file->f_path.dentry->d_inode;
71 struct ncp_fs_info_v2 info2; 71 struct ncp_fs_info_v2 info2;
72 72
73 if ((file_permission(file, MAY_WRITE) != 0) 73 if (file_permission(file, MAY_WRITE) != 0
74 && (current->uid != server->m.mounted_uid)) { 74 && current_uid() != server->m.mounted_uid)
75 return -EACCES; 75 return -EACCES;
76 } 76
77 if (copy_from_user(&info2, arg, sizeof(info2))) 77 if (copy_from_user(&info2, arg, sizeof(info2)))
78 return -EFAULT; 78 return -EFAULT;
79 79
@@ -141,10 +141,10 @@ ncp_get_compat_fs_info_v2(struct ncp_server * server, struct file *file,
141 struct inode *inode = file->f_path.dentry->d_inode; 141 struct inode *inode = file->f_path.dentry->d_inode;
142 struct compat_ncp_fs_info_v2 info2; 142 struct compat_ncp_fs_info_v2 info2;
143 143
144 if ((file_permission(file, MAY_WRITE) != 0) 144 if (file_permission(file, MAY_WRITE) != 0
145 && (current->uid != server->m.mounted_uid)) { 145 && current_uid() != server->m.mounted_uid)
146 return -EACCES; 146 return -EACCES;
147 } 147
148 if (copy_from_user(&info2, arg, sizeof(info2))) 148 if (copy_from_user(&info2, arg, sizeof(info2)))
149 return -EFAULT; 149 return -EFAULT;
150 150
@@ -270,16 +270,17 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
270 struct ncp_ioctl_request request; 270 struct ncp_ioctl_request request;
271 char* bouncebuffer; 271 char* bouncebuffer;
272 void __user *argp = (void __user *)arg; 272 void __user *argp = (void __user *)arg;
273 uid_t uid = current_uid();
273 274
274 switch (cmd) { 275 switch (cmd) {
275#ifdef CONFIG_COMPAT 276#ifdef CONFIG_COMPAT
276 case NCP_IOC_NCPREQUEST_32: 277 case NCP_IOC_NCPREQUEST_32:
277#endif 278#endif
278 case NCP_IOC_NCPREQUEST: 279 case NCP_IOC_NCPREQUEST:
279 if ((file_permission(filp, MAY_WRITE) != 0) 280 if (file_permission(filp, MAY_WRITE) != 0
280 && (current->uid != server->m.mounted_uid)) { 281 && uid != server->m.mounted_uid)
281 return -EACCES; 282 return -EACCES;
282 } 283
283#ifdef CONFIG_COMPAT 284#ifdef CONFIG_COMPAT
284 if (cmd == NCP_IOC_NCPREQUEST_32) { 285 if (cmd == NCP_IOC_NCPREQUEST_32) {
285 struct compat_ncp_ioctl_request request32; 286 struct compat_ncp_ioctl_request request32;
@@ -356,10 +357,10 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
356 case NCP_IOC_GETMOUNTUID16: 357 case NCP_IOC_GETMOUNTUID16:
357 case NCP_IOC_GETMOUNTUID32: 358 case NCP_IOC_GETMOUNTUID32:
358 case NCP_IOC_GETMOUNTUID64: 359 case NCP_IOC_GETMOUNTUID64:
359 if ((file_permission(filp, MAY_READ) != 0) 360 if (file_permission(filp, MAY_READ) != 0
360 && (current->uid != server->m.mounted_uid)) { 361 && uid != server->m.mounted_uid)
361 return -EACCES; 362 return -EACCES;
362 } 363
363 if (cmd == NCP_IOC_GETMOUNTUID16) { 364 if (cmd == NCP_IOC_GETMOUNTUID16) {
364 u16 uid; 365 u16 uid;
365 SET_UID(uid, server->m.mounted_uid); 366 SET_UID(uid, server->m.mounted_uid);
@@ -380,11 +381,10 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
380 { 381 {
381 struct ncp_setroot_ioctl sr; 382 struct ncp_setroot_ioctl sr;
382 383
383 if ((file_permission(filp, MAY_READ) != 0) 384 if (file_permission(filp, MAY_READ) != 0
384 && (current->uid != server->m.mounted_uid)) 385 && uid != server->m.mounted_uid)
385 {
386 return -EACCES; 386 return -EACCES;
387 } 387
388 if (server->m.mounted_vol[0]) { 388 if (server->m.mounted_vol[0]) {
389 struct dentry* dentry = inode->i_sb->s_root; 389 struct dentry* dentry = inode->i_sb->s_root;
390 390
@@ -408,6 +408,7 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
408 return -EFAULT; 408 return -EFAULT;
409 return 0; 409 return 0;
410 } 410 }
411
411 case NCP_IOC_SETROOT: 412 case NCP_IOC_SETROOT:
412 { 413 {
413 struct ncp_setroot_ioctl sr; 414 struct ncp_setroot_ioctl sr;
@@ -455,11 +456,10 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
455 456
456#ifdef CONFIG_NCPFS_PACKET_SIGNING 457#ifdef CONFIG_NCPFS_PACKET_SIGNING
457 case NCP_IOC_SIGN_INIT: 458 case NCP_IOC_SIGN_INIT:
458 if ((file_permission(filp, MAY_WRITE) != 0) 459 if (file_permission(filp, MAY_WRITE) != 0
459 && (current->uid != server->m.mounted_uid)) 460 && uid != server->m.mounted_uid)
460 {
461 return -EACCES; 461 return -EACCES;
462 } 462
463 if (argp) { 463 if (argp) {
464 if (server->sign_wanted) 464 if (server->sign_wanted)
465 { 465 {
@@ -478,24 +478,22 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
478 return 0; 478 return 0;
479 479
480 case NCP_IOC_SIGN_WANTED: 480 case NCP_IOC_SIGN_WANTED:
481 if ((file_permission(filp, MAY_READ) != 0) 481 if (file_permission(filp, MAY_READ) != 0
482 && (current->uid != server->m.mounted_uid)) 482 && uid != server->m.mounted_uid)
483 {
484 return -EACCES; 483 return -EACCES;
485 }
486 484
487 if (put_user(server->sign_wanted, (int __user *)argp)) 485 if (put_user(server->sign_wanted, (int __user *)argp))
488 return -EFAULT; 486 return -EFAULT;
489 return 0; 487 return 0;
488
490 case NCP_IOC_SET_SIGN_WANTED: 489 case NCP_IOC_SET_SIGN_WANTED:
491 { 490 {
492 int newstate; 491 int newstate;
493 492
494 if ((file_permission(filp, MAY_WRITE) != 0) 493 if (file_permission(filp, MAY_WRITE) != 0
495 && (current->uid != server->m.mounted_uid)) 494 && uid != server->m.mounted_uid)
496 {
497 return -EACCES; 495 return -EACCES;
498 } 496
499 /* get only low 8 bits... */ 497 /* get only low 8 bits... */
500 if (get_user(newstate, (unsigned char __user *)argp)) 498 if (get_user(newstate, (unsigned char __user *)argp))
501 return -EFAULT; 499 return -EFAULT;
@@ -512,11 +510,10 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
512 510
513#ifdef CONFIG_NCPFS_IOCTL_LOCKING 511#ifdef CONFIG_NCPFS_IOCTL_LOCKING
514 case NCP_IOC_LOCKUNLOCK: 512 case NCP_IOC_LOCKUNLOCK:
515 if ((file_permission(filp, MAY_WRITE) != 0) 513 if (file_permission(filp, MAY_WRITE) != 0
516 && (current->uid != server->m.mounted_uid)) 514 && uid != server->m.mounted_uid)
517 {
518 return -EACCES; 515 return -EACCES;
519 } 516
520 { 517 {
521 struct ncp_lock_ioctl rqdata; 518 struct ncp_lock_ioctl rqdata;
522 519
@@ -585,9 +582,8 @@ outrel:
585 582
586#ifdef CONFIG_COMPAT 583#ifdef CONFIG_COMPAT
587 case NCP_IOC_GETOBJECTNAME_32: 584 case NCP_IOC_GETOBJECTNAME_32:
588 if (current->uid != server->m.mounted_uid) { 585 if (uid != server->m.mounted_uid)
589 return -EACCES; 586 return -EACCES;
590 }
591 { 587 {
592 struct compat_ncp_objectname_ioctl user; 588 struct compat_ncp_objectname_ioctl user;
593 size_t outl; 589 size_t outl;
@@ -609,10 +605,10 @@ outrel:
609 return 0; 605 return 0;
610 } 606 }
611#endif 607#endif
608
612 case NCP_IOC_GETOBJECTNAME: 609 case NCP_IOC_GETOBJECTNAME:
613 if (current->uid != server->m.mounted_uid) { 610 if (uid != server->m.mounted_uid)
614 return -EACCES; 611 return -EACCES;
615 }
616 { 612 {
617 struct ncp_objectname_ioctl user; 613 struct ncp_objectname_ioctl user;
618 size_t outl; 614 size_t outl;
@@ -633,13 +629,13 @@ outrel:
633 return -EFAULT; 629 return -EFAULT;
634 return 0; 630 return 0;
635 } 631 }
632
636#ifdef CONFIG_COMPAT 633#ifdef CONFIG_COMPAT
637 case NCP_IOC_SETOBJECTNAME_32: 634 case NCP_IOC_SETOBJECTNAME_32:
638#endif 635#endif
639 case NCP_IOC_SETOBJECTNAME: 636 case NCP_IOC_SETOBJECTNAME:
640 if (current->uid != server->m.mounted_uid) { 637 if (uid != server->m.mounted_uid)
641 return -EACCES; 638 return -EACCES;
642 }
643 { 639 {
644 struct ncp_objectname_ioctl user; 640 struct ncp_objectname_ioctl user;
645 void* newname; 641 void* newname;
@@ -691,13 +687,13 @@ outrel:
691 kfree(oldname); 687 kfree(oldname);
692 return 0; 688 return 0;
693 } 689 }
690
694#ifdef CONFIG_COMPAT 691#ifdef CONFIG_COMPAT
695 case NCP_IOC_GETPRIVATEDATA_32: 692 case NCP_IOC_GETPRIVATEDATA_32:
696#endif 693#endif
697 case NCP_IOC_GETPRIVATEDATA: 694 case NCP_IOC_GETPRIVATEDATA:
698 if (current->uid != server->m.mounted_uid) { 695 if (uid != server->m.mounted_uid)
699 return -EACCES; 696 return -EACCES;
700 }
701 { 697 {
702 struct ncp_privatedata_ioctl user; 698 struct ncp_privatedata_ioctl user;
703 size_t outl; 699 size_t outl;
@@ -736,13 +732,13 @@ outrel:
736 732
737 return 0; 733 return 0;
738 } 734 }
735
739#ifdef CONFIG_COMPAT 736#ifdef CONFIG_COMPAT
740 case NCP_IOC_SETPRIVATEDATA_32: 737 case NCP_IOC_SETPRIVATEDATA_32:
741#endif 738#endif
742 case NCP_IOC_SETPRIVATEDATA: 739 case NCP_IOC_SETPRIVATEDATA:
743 if (current->uid != server->m.mounted_uid) { 740 if (uid != server->m.mounted_uid)
744 return -EACCES; 741 return -EACCES;
745 }
746 { 742 {
747 struct ncp_privatedata_ioctl user; 743 struct ncp_privatedata_ioctl user;
748 void* new; 744 void* new;
@@ -794,9 +790,10 @@ outrel:
794#endif /* CONFIG_NCPFS_NLS */ 790#endif /* CONFIG_NCPFS_NLS */
795 791
796 case NCP_IOC_SETDENTRYTTL: 792 case NCP_IOC_SETDENTRYTTL:
797 if ((file_permission(filp, MAY_WRITE) != 0) && 793 if (file_permission(filp, MAY_WRITE) != 0 &&
798 (current->uid != server->m.mounted_uid)) 794 uid != server->m.mounted_uid)
799 return -EACCES; 795 return -EACCES;
796
800 { 797 {
801 u_int32_t user; 798 u_int32_t user;
802 799
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index c2e9cfd9e5a..3e634f2a108 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -16,6 +16,7 @@
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/freezer.h> 17#include <linux/freezer.h>
18#include <linux/kthread.h> 18#include <linux/kthread.h>
19#include <linux/sunrpc/svcauth_gss.h>
19 20
20#include <net/inet_sock.h> 21#include <net/inet_sock.h>
21 22
@@ -182,10 +183,34 @@ void nfs_callback_down(void)
182 mutex_unlock(&nfs_callback_mutex); 183 mutex_unlock(&nfs_callback_mutex);
183} 184}
184 185
186static int check_gss_callback_principal(struct nfs_client *clp,
187 struct svc_rqst *rqstp)
188{
189 struct rpc_clnt *r = clp->cl_rpcclient;
190 char *p = svc_gss_principal(rqstp);
191
192 /*
193 * It might just be a normal user principal, in which case
194 * userspace won't bother to tell us the name at all.
195 */
196 if (p == NULL)
197 return SVC_DENIED;
198
199 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
200
201 if (memcmp(p, "nfs@", 4) != 0)
202 return SVC_DENIED;
203 p += 4;
204 if (strcmp(p, r->cl_server) != 0)
205 return SVC_DENIED;
206 return SVC_OK;
207}
208
185static int nfs_callback_authenticate(struct svc_rqst *rqstp) 209static int nfs_callback_authenticate(struct svc_rqst *rqstp)
186{ 210{
187 struct nfs_client *clp; 211 struct nfs_client *clp;
188 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 212 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
213 int ret = SVC_OK;
189 214
190 /* Don't talk to strangers */ 215 /* Don't talk to strangers */
191 clp = nfs_find_client(svc_addr(rqstp), 4); 216 clp = nfs_find_client(svc_addr(rqstp), 4);
@@ -194,21 +219,22 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
194 219
195 dprintk("%s: %s NFSv4 callback!\n", __func__, 220 dprintk("%s: %s NFSv4 callback!\n", __func__,
196 svc_print_addr(rqstp, buf, sizeof(buf))); 221 svc_print_addr(rqstp, buf, sizeof(buf)));
197 nfs_put_client(clp);
198 222
199 switch (rqstp->rq_authop->flavour) { 223 switch (rqstp->rq_authop->flavour) {
200 case RPC_AUTH_NULL: 224 case RPC_AUTH_NULL:
201 if (rqstp->rq_proc != CB_NULL) 225 if (rqstp->rq_proc != CB_NULL)
202 return SVC_DENIED; 226 ret = SVC_DENIED;
203 break; 227 break;
204 case RPC_AUTH_UNIX: 228 case RPC_AUTH_UNIX:
205 break; 229 break;
206 case RPC_AUTH_GSS: 230 case RPC_AUTH_GSS:
207 /* FIXME: RPCSEC_GSS handling? */ 231 ret = check_gss_callback_principal(clp, rqstp);
232 break;
208 default: 233 default:
209 return SVC_DENIED; 234 ret = SVC_DENIED;
210 } 235 }
211 return SVC_OK; 236 nfs_put_client(clp);
237 return ret;
212} 238}
213 239
214/* 240/*
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7547600b617..9b728f3565a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
143 clp->cl_proto = cl_init->proto; 143 clp->cl_proto = cl_init->proto;
144 144
145#ifdef CONFIG_NFS_V4 145#ifdef CONFIG_NFS_V4
146 init_rwsem(&clp->cl_sem);
147 INIT_LIST_HEAD(&clp->cl_delegations); 146 INIT_LIST_HEAD(&clp->cl_delegations);
148 spin_lock_init(&clp->cl_lock); 147 spin_lock_init(&clp->cl_lock);
149 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); 148 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
@@ -224,31 +223,54 @@ void nfs_put_client(struct nfs_client *clp)
224 } 223 }
225} 224}
226 225
227static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1, 226#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
228 const struct sockaddr_in *sa2) 227static const struct in6_addr *nfs_map_ipv4_addr(const struct sockaddr *sa, struct in6_addr *addr_mapped)
229{ 228{
230 return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr; 229 switch (sa->sa_family) {
230 default:
231 return NULL;
232 case AF_INET6:
233 return &((const struct sockaddr_in6 *)sa)->sin6_addr;
234 break;
235 case AF_INET:
236 ipv6_addr_set_v4mapped(((const struct sockaddr_in *)sa)->sin_addr.s_addr,
237 addr_mapped);
238 return addr_mapped;
239 }
231} 240}
232 241
233static int nfs_sockaddr_match_ipaddr6(const struct sockaddr_in6 *sa1, 242static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
234 const struct sockaddr_in6 *sa2) 243 const struct sockaddr *sa2)
244{
245 const struct in6_addr *addr1;
246 const struct in6_addr *addr2;
247 struct in6_addr addr1_mapped;
248 struct in6_addr addr2_mapped;
249
250 addr1 = nfs_map_ipv4_addr(sa1, &addr1_mapped);
251 if (likely(addr1 != NULL)) {
252 addr2 = nfs_map_ipv4_addr(sa2, &addr2_mapped);
253 if (likely(addr2 != NULL))
254 return ipv6_addr_equal(addr1, addr2);
255 }
256 return 0;
257}
258#else
259static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
260 const struct sockaddr_in *sa2)
235{ 261{
236 return ipv6_addr_equal(&sa1->sin6_addr, &sa2->sin6_addr); 262 return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
237} 263}
238 264
239static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, 265static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
240 const struct sockaddr *sa2) 266 const struct sockaddr *sa2)
241{ 267{
242 switch (sa1->sa_family) { 268 if (unlikely(sa1->sa_family != AF_INET || sa2->sa_family != AF_INET))
243 case AF_INET: 269 return 0;
244 return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1, 270 return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
245 (const struct sockaddr_in *)sa2); 271 (const struct sockaddr_in *)sa2);
246 case AF_INET6:
247 return nfs_sockaddr_match_ipaddr6((const struct sockaddr_in6 *)sa1,
248 (const struct sockaddr_in6 *)sa2);
249 }
250 BUG();
251} 272}
273#endif
252 274
253/* 275/*
254 * Find a client by IP address and protocol version 276 * Find a client by IP address and protocol version
@@ -270,8 +292,6 @@ struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
270 if (clp->rpc_ops->version != nfsversion) 292 if (clp->rpc_ops->version != nfsversion)
271 continue; 293 continue;
272 294
273 if (addr->sa_family != clap->sa_family)
274 continue;
275 /* Match only the IP address, not the port number */ 295 /* Match only the IP address, not the port number */
276 if (!nfs_sockaddr_match_ipaddr(addr, clap)) 296 if (!nfs_sockaddr_match_ipaddr(addr, clap))
277 continue; 297 continue;
@@ -305,8 +325,6 @@ struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
305 if (clp->rpc_ops->version != nfsvers) 325 if (clp->rpc_ops->version != nfsvers)
306 continue; 326 continue;
307 327
308 if (sap->sa_family != clap->sa_family)
309 continue;
310 /* Match only the IP address, not the port number */ 328 /* Match only the IP address, not the port number */
311 if (!nfs_sockaddr_match_ipaddr(sap, clap)) 329 if (!nfs_sockaddr_match_ipaddr(sap, clap))
312 continue; 330 continue;
@@ -470,7 +488,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
470static int nfs_create_rpc_client(struct nfs_client *clp, 488static int nfs_create_rpc_client(struct nfs_client *clp,
471 const struct rpc_timeout *timeparms, 489 const struct rpc_timeout *timeparms,
472 rpc_authflavor_t flavor, 490 rpc_authflavor_t flavor,
473 int flags) 491 int discrtry, int noresvport)
474{ 492{
475 struct rpc_clnt *clnt = NULL; 493 struct rpc_clnt *clnt = NULL;
476 struct rpc_create_args args = { 494 struct rpc_create_args args = {
@@ -482,9 +500,13 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
482 .program = &nfs_program, 500 .program = &nfs_program,
483 .version = clp->rpc_ops->version, 501 .version = clp->rpc_ops->version,
484 .authflavor = flavor, 502 .authflavor = flavor,
485 .flags = flags,
486 }; 503 };
487 504
505 if (discrtry)
506 args.flags |= RPC_CLNT_CREATE_DISCRTRY;
507 if (noresvport)
508 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
509
488 if (!IS_ERR(clp->cl_rpcclient)) 510 if (!IS_ERR(clp->cl_rpcclient))
489 return 0; 511 return 0;
490 512
@@ -522,6 +544,8 @@ static int nfs_start_lockd(struct nfs_server *server)
522 .protocol = server->flags & NFS_MOUNT_TCP ? 544 .protocol = server->flags & NFS_MOUNT_TCP ?
523 IPPROTO_TCP : IPPROTO_UDP, 545 IPPROTO_TCP : IPPROTO_UDP,
524 .nfs_version = clp->rpc_ops->version, 546 .nfs_version = clp->rpc_ops->version,
547 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
548 1 : 0,
525 }; 549 };
526 550
527 if (nlm_init.nfs_version > 3) 551 if (nlm_init.nfs_version > 3)
@@ -623,7 +647,8 @@ static int nfs_init_client(struct nfs_client *clp,
623 * Create a client RPC handle for doing FSSTAT with UNIX auth only 647 * Create a client RPC handle for doing FSSTAT with UNIX auth only
624 * - RFC 2623, sec 2.3.2 648 * - RFC 2623, sec 2.3.2
625 */ 649 */
626 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0); 650 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
651 0, data->flags & NFS_MOUNT_NORESVPORT);
627 if (error < 0) 652 if (error < 0)
628 goto error; 653 goto error;
629 nfs_mark_client_ready(clp, NFS_CS_READY); 654 nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -965,7 +990,8 @@ error:
965static int nfs4_init_client(struct nfs_client *clp, 990static int nfs4_init_client(struct nfs_client *clp,
966 const struct rpc_timeout *timeparms, 991 const struct rpc_timeout *timeparms,
967 const char *ip_addr, 992 const char *ip_addr,
968 rpc_authflavor_t authflavour) 993 rpc_authflavor_t authflavour,
994 int flags)
969{ 995{
970 int error; 996 int error;
971 997
@@ -979,7 +1005,7 @@ static int nfs4_init_client(struct nfs_client *clp,
979 clp->rpc_ops = &nfs_v4_clientops; 1005 clp->rpc_ops = &nfs_v4_clientops;
980 1006
981 error = nfs_create_rpc_client(clp, timeparms, authflavour, 1007 error = nfs_create_rpc_client(clp, timeparms, authflavour,
982 RPC_CLNT_CREATE_DISCRTRY); 1008 1, flags & NFS_MOUNT_NORESVPORT);
983 if (error < 0) 1009 if (error < 0)
984 goto error; 1010 goto error;
985 memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); 1011 memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
@@ -1030,7 +1056,8 @@ static int nfs4_set_client(struct nfs_server *server,
1030 error = PTR_ERR(clp); 1056 error = PTR_ERR(clp);
1031 goto error; 1057 goto error;
1032 } 1058 }
1033 error = nfs4_init_client(clp, timeparms, ip_addr, authflavour); 1059 error = nfs4_init_client(clp, timeparms, ip_addr, authflavour,
1060 server->flags);
1034 if (error < 0) 1061 if (error < 0)
1035 goto error_put; 1062 goto error_put;
1036 1063
@@ -1059,6 +1086,10 @@ static int nfs4_init_server(struct nfs_server *server,
1059 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, 1086 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
1060 data->timeo, data->retrans); 1087 data->timeo, data->retrans);
1061 1088
1089 /* Initialise the client representation from the mount data */
1090 server->flags = data->flags;
1091 server->caps |= NFS_CAP_ATOMIC_OPEN;
1092
1062 /* Get a client record */ 1093 /* Get a client record */
1063 error = nfs4_set_client(server, 1094 error = nfs4_set_client(server,
1064 data->nfs_server.hostname, 1095 data->nfs_server.hostname,
@@ -1071,10 +1102,6 @@ static int nfs4_init_server(struct nfs_server *server,
1071 if (error < 0) 1102 if (error < 0)
1072 goto error; 1103 goto error;
1073 1104
1074 /* Initialise the client representation from the mount data */
1075 server->flags = data->flags;
1076 server->caps |= NFS_CAP_ATOMIC_OPEN;
1077
1078 if (data->rsize) 1105 if (data->rsize)
1079 server->rsize = nfs_block_size(data->rsize, NULL); 1106 server->rsize = nfs_block_size(data->rsize, NULL);
1080 if (data->wsize) 1107 if (data->wsize)
@@ -1177,6 +1204,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1177 parent_server = NFS_SB(data->sb); 1204 parent_server = NFS_SB(data->sb);
1178 parent_client = parent_server->nfs_client; 1205 parent_client = parent_server->nfs_client;
1179 1206
1207 /* Initialise the client representation from the parent server */
1208 nfs_server_copy_userdata(server, parent_server);
1209 server->caps |= NFS_CAP_ATOMIC_OPEN;
1210
1180 /* Get a client representation. 1211 /* Get a client representation.
1181 * Note: NFSv4 always uses TCP, */ 1212 * Note: NFSv4 always uses TCP, */
1182 error = nfs4_set_client(server, data->hostname, 1213 error = nfs4_set_client(server, data->hostname,
@@ -1189,10 +1220,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1189 if (error < 0) 1220 if (error < 0)
1190 goto error; 1221 goto error;
1191 1222
1192 /* Initialise the client representation from the parent server */
1193 nfs_server_copy_userdata(server, parent_server);
1194 server->caps |= NFS_CAP_ATOMIC_OPEN;
1195
1196 error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); 1223 error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
1197 if (error < 0) 1224 if (error < 0)
1198 goto error; 1225 goto error;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index cc563cfa694..968225a8801 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -43,6 +43,27 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
43 put_rpccred(cred); 43 put_rpccred(cred);
44} 44}
45 45
46void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
47{
48 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
49}
50
51int nfs_have_delegation(struct inode *inode, fmode_t flags)
52{
53 struct nfs_delegation *delegation;
54 int ret = 0;
55
56 flags &= FMODE_READ|FMODE_WRITE;
57 rcu_read_lock();
58 delegation = rcu_dereference(NFS_I(inode)->delegation);
59 if (delegation != NULL && (delegation->type & flags) == flags) {
60 nfs_mark_delegation_referenced(delegation);
61 ret = 1;
62 }
63 rcu_read_unlock();
64 return ret;
65}
66
46static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) 67static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
47{ 68{
48 struct inode *inode = state->inode; 69 struct inode *inode = state->inode;
@@ -119,7 +140,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
119 delegation->maxsize = res->maxsize; 140 delegation->maxsize = res->maxsize;
120 oldcred = delegation->cred; 141 oldcred = delegation->cred;
121 delegation->cred = get_rpccred(cred); 142 delegation->cred = get_rpccred(cred);
122 delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; 143 clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
123 NFS_I(inode)->delegation_state = delegation->type; 144 NFS_I(inode)->delegation_state = delegation->type;
124 smp_wmb(); 145 smp_wmb();
125 put_rpccred(oldcred); 146 put_rpccred(oldcred);
@@ -134,19 +155,35 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
134 return res; 155 return res;
135} 156}
136 157
158static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation)
159{
160 struct inode *inode = NULL;
161
162 spin_lock(&delegation->lock);
163 if (delegation->inode != NULL)
164 inode = igrab(delegation->inode);
165 spin_unlock(&delegation->lock);
166 return inode;
167}
168
137static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) 169static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
138{ 170{
139 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); 171 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
140 172
141 if (delegation == NULL) 173 if (delegation == NULL)
142 goto nomatch; 174 goto nomatch;
175 spin_lock(&delegation->lock);
143 if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, 176 if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
144 sizeof(delegation->stateid.data)) != 0) 177 sizeof(delegation->stateid.data)) != 0)
145 goto nomatch; 178 goto nomatch_unlock;
146 list_del_rcu(&delegation->super_list); 179 list_del_rcu(&delegation->super_list);
180 delegation->inode = NULL;
147 nfsi->delegation_state = 0; 181 nfsi->delegation_state = 0;
148 rcu_assign_pointer(nfsi->delegation, NULL); 182 rcu_assign_pointer(nfsi->delegation, NULL);
183 spin_unlock(&delegation->lock);
149 return delegation; 184 return delegation;
185nomatch_unlock:
186 spin_unlock(&delegation->lock);
150nomatch: 187nomatch:
151 return NULL; 188 return NULL;
152} 189}
@@ -172,6 +209,8 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
172 delegation->change_attr = nfsi->change_attr; 209 delegation->change_attr = nfsi->change_attr;
173 delegation->cred = get_rpccred(cred); 210 delegation->cred = get_rpccred(cred);
174 delegation->inode = inode; 211 delegation->inode = inode;
212 delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
213 spin_lock_init(&delegation->lock);
175 214
176 spin_lock(&clp->cl_lock); 215 spin_lock(&clp->cl_lock);
177 if (rcu_dereference(nfsi->delegation) != NULL) { 216 if (rcu_dereference(nfsi->delegation) != NULL) {
@@ -226,22 +265,47 @@ static void nfs_msync_inode(struct inode *inode)
226 */ 265 */
227static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation) 266static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
228{ 267{
229 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
230 struct nfs_inode *nfsi = NFS_I(inode); 268 struct nfs_inode *nfsi = NFS_I(inode);
231 269
232 nfs_msync_inode(inode); 270 nfs_msync_inode(inode);
233 down_read(&clp->cl_sem);
234 /* Guard against new delegated open calls */ 271 /* Guard against new delegated open calls */
235 down_write(&nfsi->rwsem); 272 down_write(&nfsi->rwsem);
236 nfs_delegation_claim_opens(inode, &delegation->stateid); 273 nfs_delegation_claim_opens(inode, &delegation->stateid);
237 up_write(&nfsi->rwsem); 274 up_write(&nfsi->rwsem);
238 up_read(&clp->cl_sem);
239 nfs_msync_inode(inode); 275 nfs_msync_inode(inode);
240 276
241 return nfs_do_return_delegation(inode, delegation, 1); 277 return nfs_do_return_delegation(inode, delegation, 1);
242} 278}
243 279
244/* 280/*
281 * Return all delegations that have been marked for return
282 */
283void nfs_client_return_marked_delegations(struct nfs_client *clp)
284{
285 struct nfs_delegation *delegation;
286 struct inode *inode;
287
288restart:
289 rcu_read_lock();
290 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
291 if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
292 continue;
293 inode = nfs_delegation_grab_inode(delegation);
294 if (inode == NULL)
295 continue;
296 spin_lock(&clp->cl_lock);
297 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
298 spin_unlock(&clp->cl_lock);
299 rcu_read_unlock();
300 if (delegation != NULL)
301 __nfs_inode_return_delegation(inode, delegation);
302 iput(inode);
303 goto restart;
304 }
305 rcu_read_unlock();
306}
307
308/*
245 * This function returns the delegation without reclaiming opens 309 * This function returns the delegation without reclaiming opens
246 * or protecting against delegation reclaims. 310 * or protecting against delegation reclaims.
247 * It is therefore really only safe to be called from 311 * It is therefore really only safe to be called from
@@ -279,83 +343,55 @@ int nfs_inode_return_delegation(struct inode *inode)
279 return err; 343 return err;
280} 344}
281 345
346static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation)
347{
348 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
349 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
350}
351
282/* 352/*
283 * Return all delegations associated to a super block 353 * Return all delegations associated to a super block
284 */ 354 */
285void nfs_return_all_delegations(struct super_block *sb) 355void nfs_super_return_all_delegations(struct super_block *sb)
286{ 356{
287 struct nfs_client *clp = NFS_SB(sb)->nfs_client; 357 struct nfs_client *clp = NFS_SB(sb)->nfs_client;
288 struct nfs_delegation *delegation; 358 struct nfs_delegation *delegation;
289 struct inode *inode;
290 359
291 if (clp == NULL) 360 if (clp == NULL)
292 return; 361 return;
293restart:
294 rcu_read_lock(); 362 rcu_read_lock();
295 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 363 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
296 if (delegation->inode->i_sb != sb) 364 spin_lock(&delegation->lock);
297 continue; 365 if (delegation->inode != NULL && delegation->inode->i_sb == sb)
298 inode = igrab(delegation->inode); 366 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
299 if (inode == NULL) 367 spin_unlock(&delegation->lock);
300 continue;
301 spin_lock(&clp->cl_lock);
302 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
303 spin_unlock(&clp->cl_lock);
304 rcu_read_unlock();
305 if (delegation != NULL)
306 __nfs_inode_return_delegation(inode, delegation);
307 iput(inode);
308 goto restart;
309 } 368 }
310 rcu_read_unlock(); 369 rcu_read_unlock();
370 nfs_client_return_marked_delegations(clp);
311} 371}
312 372
313static int nfs_do_expire_all_delegations(void *ptr) 373static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
314{ 374{
315 struct nfs_client *clp = ptr;
316 struct nfs_delegation *delegation; 375 struct nfs_delegation *delegation;
317 struct inode *inode;
318 376
319 allow_signal(SIGKILL);
320restart:
321 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
322 goto out;
323 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
324 goto out;
325 rcu_read_lock(); 377 rcu_read_lock();
326 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 378 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
327 inode = igrab(delegation->inode); 379 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
328 if (inode == NULL) 380 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
329 continue;
330 spin_lock(&clp->cl_lock);
331 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
332 spin_unlock(&clp->cl_lock);
333 rcu_read_unlock();
334 if (delegation)
335 __nfs_inode_return_delegation(inode, delegation);
336 iput(inode);
337 goto restart;
338 } 381 }
339 rcu_read_unlock(); 382 rcu_read_unlock();
340out: 383}
341 nfs_put_client(clp); 384
342 module_put_and_exit(0); 385static void nfs_delegation_run_state_manager(struct nfs_client *clp)
386{
387 if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
388 nfs4_schedule_state_manager(clp);
343} 389}
344 390
345void nfs_expire_all_delegations(struct nfs_client *clp) 391void nfs_expire_all_delegations(struct nfs_client *clp)
346{ 392{
347 struct task_struct *task; 393 nfs_client_mark_return_all_delegations(clp);
348 394 nfs_delegation_run_state_manager(clp);
349 __module_get(THIS_MODULE);
350 atomic_inc(&clp->cl_count);
351 task = kthread_run(nfs_do_expire_all_delegations, clp,
352 "%s-delegreturn",
353 rpc_peeraddr2str(clp->cl_rpcclient,
354 RPC_DISPLAY_ADDR));
355 if (!IS_ERR(task))
356 return;
357 nfs_put_client(clp);
358 module_put(THIS_MODULE);
359} 395}
360 396
361/* 397/*
@@ -363,68 +399,29 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
363 */ 399 */
364void nfs_handle_cb_pathdown(struct nfs_client *clp) 400void nfs_handle_cb_pathdown(struct nfs_client *clp)
365{ 401{
366 struct nfs_delegation *delegation;
367 struct inode *inode;
368
369 if (clp == NULL) 402 if (clp == NULL)
370 return; 403 return;
371restart: 404 nfs_client_mark_return_all_delegations(clp);
405}
406
407static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp)
408{
409 struct nfs_delegation *delegation;
410
372 rcu_read_lock(); 411 rcu_read_lock();
373 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 412 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
374 inode = igrab(delegation->inode); 413 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
375 if (inode == NULL)
376 continue; 414 continue;
377 spin_lock(&clp->cl_lock); 415 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
378 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 416 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
379 spin_unlock(&clp->cl_lock);
380 rcu_read_unlock();
381 if (delegation != NULL)
382 __nfs_inode_return_delegation(inode, delegation);
383 iput(inode);
384 goto restart;
385 } 417 }
386 rcu_read_unlock(); 418 rcu_read_unlock();
387} 419}
388 420
389struct recall_threadargs { 421void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
390 struct inode *inode;
391 struct nfs_client *clp;
392 const nfs4_stateid *stateid;
393
394 struct completion started;
395 int result;
396};
397
398static int recall_thread(void *data)
399{ 422{
400 struct recall_threadargs *args = (struct recall_threadargs *)data; 423 nfs_client_mark_return_unreferenced_delegations(clp);
401 struct inode *inode = igrab(args->inode); 424 nfs_delegation_run_state_manager(clp);
402 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
403 struct nfs_inode *nfsi = NFS_I(inode);
404 struct nfs_delegation *delegation;
405
406 daemonize("nfsv4-delegreturn");
407
408 nfs_msync_inode(inode);
409 down_read(&clp->cl_sem);
410 down_write(&nfsi->rwsem);
411 spin_lock(&clp->cl_lock);
412 delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
413 if (delegation != NULL)
414 args->result = 0;
415 else
416 args->result = -ENOENT;
417 spin_unlock(&clp->cl_lock);
418 complete(&args->started);
419 nfs_delegation_claim_opens(inode, args->stateid);
420 up_write(&nfsi->rwsem);
421 up_read(&clp->cl_sem);
422 nfs_msync_inode(inode);
423
424 if (delegation != NULL)
425 nfs_do_return_delegation(inode, delegation, 1);
426 iput(inode);
427 module_put_and_exit(0);
428} 425}
429 426
430/* 427/*
@@ -432,22 +429,20 @@ static int recall_thread(void *data)
432 */ 429 */
433int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) 430int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
434{ 431{
435 struct recall_threadargs data = { 432 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
436 .inode = inode, 433 struct nfs_delegation *delegation;
437 .stateid = stateid,
438 };
439 int status;
440 434
441 init_completion(&data.started); 435 rcu_read_lock();
442 __module_get(THIS_MODULE); 436 delegation = rcu_dereference(NFS_I(inode)->delegation);
443 status = kernel_thread(recall_thread, &data, CLONE_KERNEL); 437 if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
444 if (status < 0) 438 sizeof(delegation->stateid.data)) != 0) {
445 goto out_module_put; 439 rcu_read_unlock();
446 wait_for_completion(&data.started); 440 return -ENOENT;
447 return data.result; 441 }
448out_module_put: 442 nfs_mark_return_delegation(clp, delegation);
449 module_put(THIS_MODULE); 443 rcu_read_unlock();
450 return status; 444 nfs_delegation_run_state_manager(clp);
445 return 0;
451} 446}
452 447
453/* 448/*
@@ -459,10 +454,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
459 struct inode *res = NULL; 454 struct inode *res = NULL;
460 rcu_read_lock(); 455 rcu_read_lock();
461 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 456 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
462 if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { 457 spin_lock(&delegation->lock);
458 if (delegation->inode != NULL &&
459 nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
463 res = igrab(delegation->inode); 460 res = igrab(delegation->inode);
464 break;
465 } 461 }
462 spin_unlock(&delegation->lock);
463 if (res != NULL)
464 break;
466 } 465 }
467 rcu_read_unlock(); 466 rcu_read_unlock();
468 return res; 467 return res;
@@ -476,7 +475,7 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
476 struct nfs_delegation *delegation; 475 struct nfs_delegation *delegation;
477 rcu_read_lock(); 476 rcu_read_lock();
478 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) 477 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
479 delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; 478 set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
480 rcu_read_unlock(); 479 rcu_read_unlock();
481} 480}
482 481
@@ -486,17 +485,22 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
486void nfs_delegation_reap_unclaimed(struct nfs_client *clp) 485void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
487{ 486{
488 struct nfs_delegation *delegation; 487 struct nfs_delegation *delegation;
488 struct inode *inode;
489restart: 489restart:
490 rcu_read_lock(); 490 rcu_read_lock();
491 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 491 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
492 if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) 492 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0)
493 continue;
494 inode = nfs_delegation_grab_inode(delegation);
495 if (inode == NULL)
493 continue; 496 continue;
494 spin_lock(&clp->cl_lock); 497 spin_lock(&clp->cl_lock);
495 delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL); 498 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
496 spin_unlock(&clp->cl_lock); 499 spin_unlock(&clp->cl_lock);
497 rcu_read_unlock(); 500 rcu_read_unlock();
498 if (delegation != NULL) 501 if (delegation != NULL)
499 nfs_free_delegation(delegation); 502 nfs_free_delegation(delegation);
503 iput(inode);
500 goto restart; 504 goto restart;
501 } 505 }
502 rcu_read_unlock(); 506 rcu_read_unlock();
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index f1c5e2a5d88..09f38379517 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -17,14 +17,20 @@ struct nfs_delegation {
17 struct rpc_cred *cred; 17 struct rpc_cred *cred;
18 struct inode *inode; 18 struct inode *inode;
19 nfs4_stateid stateid; 19 nfs4_stateid stateid;
20 int type; 20 fmode_t type;
21#define NFS_DELEGATION_NEED_RECLAIM 1
22 long flags;
23 loff_t maxsize; 21 loff_t maxsize;
24 __u64 change_attr; 22 __u64 change_attr;
23 unsigned long flags;
24 spinlock_t lock;
25 struct rcu_head rcu; 25 struct rcu_head rcu;
26}; 26};
27 27
28enum {
29 NFS_DELEGATION_NEED_RECLAIM = 0,
30 NFS_DELEGATION_RETURN,
31 NFS_DELEGATION_REFERENCED,
32};
33
28int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
29void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 35void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
30int nfs_inode_return_delegation(struct inode *inode); 36int nfs_inode_return_delegation(struct inode *inode);
@@ -32,9 +38,11 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
32void nfs_inode_return_delegation_noreclaim(struct inode *inode); 38void nfs_inode_return_delegation_noreclaim(struct inode *inode);
33 39
34struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); 40struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
35void nfs_return_all_delegations(struct super_block *sb); 41void nfs_super_return_all_delegations(struct super_block *sb);
36void nfs_expire_all_delegations(struct nfs_client *clp); 42void nfs_expire_all_delegations(struct nfs_client *clp);
43void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
37void nfs_handle_cb_pathdown(struct nfs_client *clp); 44void nfs_handle_cb_pathdown(struct nfs_client *clp);
45void nfs_client_return_marked_delegations(struct nfs_client *clp);
38 46
39void nfs_delegation_mark_reclaim(struct nfs_client *clp); 47void nfs_delegation_mark_reclaim(struct nfs_client *clp);
40void nfs_delegation_reap_unclaimed(struct nfs_client *clp); 48void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
@@ -45,22 +53,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
45int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); 53int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
46int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); 54int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
47 55
48static inline int nfs_have_delegation(struct inode *inode, int flags) 56void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
49{ 57int nfs_have_delegation(struct inode *inode, fmode_t flags);
50 struct nfs_delegation *delegation;
51 int ret = 0;
52
53 flags &= FMODE_READ|FMODE_WRITE;
54 rcu_read_lock();
55 delegation = rcu_dereference(NFS_I(inode)->delegation);
56 if (delegation != NULL && (delegation->type & flags) == flags)
57 ret = 1;
58 rcu_read_unlock();
59 return ret;
60}
61 58
62#else 59#else
63static inline int nfs_have_delegation(struct inode *inode, int flags) 60static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
64{ 61{
65 return 0; 62 return 0;
66} 63}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3e64b98f3a9..e35c8199f82 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -799,6 +799,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
799 goto out_bad; 799 goto out_bad;
800 } 800 }
801 801
802 if (nfs_have_delegation(inode, FMODE_READ))
803 goto out_set_verifier;
804
802 /* Force a full look up iff the parent directory has changed */ 805 /* Force a full look up iff the parent directory has changed */
803 if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { 806 if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) {
804 if (nfs_lookup_verify_inode(inode, nd)) 807 if (nfs_lookup_verify_inode(inode, nd))
@@ -817,6 +820,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
817 if ((error = nfs_refresh_inode(inode, &fattr)) != 0) 820 if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
818 goto out_bad; 821 goto out_bad;
819 822
823out_set_verifier:
820 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 824 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
821 out_valid: 825 out_valid:
822 dput(parent); 826 dput(parent);
@@ -973,7 +977,7 @@ struct dentry_operations nfs4_dentry_operations = {
973 * Use intent information to determine whether we need to substitute 977 * Use intent information to determine whether we need to substitute
974 * the NFSv4-style stateful OPEN for the LOOKUP call 978 * the NFSv4-style stateful OPEN for the LOOKUP call
975 */ 979 */
976static int is_atomic_open(struct inode *dir, struct nameidata *nd) 980static int is_atomic_open(struct nameidata *nd)
977{ 981{
978 if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0) 982 if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0)
979 return 0; 983 return 0;
@@ -996,7 +1000,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
996 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1000 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
997 1001
998 /* Check that we are indeed trying to open this file */ 1002 /* Check that we are indeed trying to open this file */
999 if (!is_atomic_open(dir, nd)) 1003 if (!is_atomic_open(nd))
1000 goto no_open; 1004 goto no_open;
1001 1005
1002 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { 1006 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
@@ -1047,10 +1051,10 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1047 struct inode *dir; 1051 struct inode *dir;
1048 int openflags, ret = 0; 1052 int openflags, ret = 0;
1049 1053
1054 if (!is_atomic_open(nd))
1055 goto no_open;
1050 parent = dget_parent(dentry); 1056 parent = dget_parent(dentry);
1051 dir = parent->d_inode; 1057 dir = parent->d_inode;
1052 if (!is_atomic_open(dir, nd))
1053 goto no_open;
1054 /* We can't create new files in nfs_open_revalidate(), so we 1058 /* We can't create new files in nfs_open_revalidate(), so we
1055 * optimize away revalidation of negative dentries. 1059 * optimize away revalidation of negative dentries.
1056 */ 1060 */
@@ -1062,11 +1066,11 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1062 1066
1063 /* NFS only supports OPEN on regular files */ 1067 /* NFS only supports OPEN on regular files */
1064 if (!S_ISREG(inode->i_mode)) 1068 if (!S_ISREG(inode->i_mode))
1065 goto no_open; 1069 goto no_open_dput;
1066 openflags = nd->intent.open.flags; 1070 openflags = nd->intent.open.flags;
1067 /* We cannot do exclusive creation on a positive dentry */ 1071 /* We cannot do exclusive creation on a positive dentry */
1068 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 1072 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
1069 goto no_open; 1073 goto no_open_dput;
1070 /* We can't create new files, or truncate existing ones here */ 1074 /* We can't create new files, or truncate existing ones here */
1071 openflags &= ~(O_CREAT|O_TRUNC); 1075 openflags &= ~(O_CREAT|O_TRUNC);
1072 1076
@@ -1081,10 +1085,9 @@ out:
1081 if (!ret) 1085 if (!ret)
1082 d_drop(dentry); 1086 d_drop(dentry);
1083 return ret; 1087 return ret;
1084no_open: 1088no_open_dput:
1085 dput(parent); 1089 dput(parent);
1086 if (inode != NULL && nfs_have_delegation(inode, FMODE_READ)) 1090no_open:
1087 return 1;
1088 return nfs_lookup_revalidate(dentry, nd); 1091 return nfs_lookup_revalidate(dentry, nd);
1089} 1092}
1090#endif /* CONFIG_NFSV4 */ 1093#endif /* CONFIG_NFSV4 */
@@ -1794,7 +1797,8 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str
1794 cache = nfs_access_search_rbtree(inode, cred); 1797 cache = nfs_access_search_rbtree(inode, cred);
1795 if (cache == NULL) 1798 if (cache == NULL)
1796 goto out; 1799 goto out;
1797 if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) 1800 if (!nfs_have_delegation(inode, FMODE_READ) &&
1801 !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
1798 goto out_stale; 1802 goto out_stale;
1799 res->jiffies = cache->jiffies; 1803 res->jiffies = cache->jiffies;
1800 res->cred = cache->cred; 1804 res->cred = cache->cred;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d319b49f8f0..90f292b520d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -354,7 +354,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
354 file->f_path.dentry->d_name.name, 354 file->f_path.dentry->d_name.name,
355 mapping->host->i_ino, len, (long long) pos); 355 mapping->host->i_ino, len, (long long) pos);
356 356
357 page = __grab_cache_page(mapping, index); 357 page = grab_cache_page_write_begin(mapping, index, flags);
358 if (!page) 358 if (!page)
359 return -ENOMEM; 359 return -ENOMEM;
360 *pagep = page; 360 *pagep = page;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d22eb383e1c..0c381686171 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -592,7 +592,7 @@ static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context
592/* 592/*
593 * Given an inode, search for an open context with the desired characteristics 593 * Given an inode, search for an open context with the desired characteristics
594 */ 594 */
595struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode) 595struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode)
596{ 596{
597 struct nfs_inode *nfsi = NFS_I(inode); 597 struct nfs_inode *nfsi = NFS_I(inode);
598 struct nfs_open_context *pos, *ctx = NULL; 598 struct nfs_open_context *pos, *ctx = NULL;
@@ -712,14 +712,7 @@ int nfs_attribute_timeout(struct inode *inode)
712 712
713 if (nfs_have_delegation(inode, FMODE_READ)) 713 if (nfs_have_delegation(inode, FMODE_READ))
714 return 0; 714 return 0;
715 /* 715 return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
716 * Special case: if the attribute timeout is set to 0, then always
717 * treat the cache as having expired (unless holding
718 * a delegation).
719 */
720 if (nfsi->attrtimeo == 0)
721 return 1;
722 return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
723} 716}
724 717
725/** 718/**
@@ -1182,7 +1175,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1182 nfsi->attrtimeo_timestamp = now; 1175 nfsi->attrtimeo_timestamp = now;
1183 nfsi->attr_gencount = nfs_inc_attr_generation_counter(); 1176 nfsi->attr_gencount = nfs_inc_attr_generation_counter();
1184 } else { 1177 } else {
1185 if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { 1178 if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
1186 if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) 1179 if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
1187 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); 1180 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
1188 nfsi->attrtimeo_timestamp = now; 1181 nfsi->attrtimeo_timestamp = now;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d212ee41caf..340ede8f608 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -63,6 +63,20 @@ struct nfs_parsed_mount_data {
63 struct security_mnt_opts lsm_opts; 63 struct security_mnt_opts lsm_opts;
64}; 64};
65 65
66/* mount_clnt.c */
67struct nfs_mount_request {
68 struct sockaddr *sap;
69 size_t salen;
70 char *hostname;
71 char *dirpath;
72 u32 version;
73 unsigned short protocol;
74 struct nfs_fh *fh;
75 int noresvport;
76};
77
78extern int nfs_mount(struct nfs_mount_request *info);
79
66/* client.c */ 80/* client.c */
67extern struct rpc_program nfs_program; 81extern struct rpc_program nfs_program;
68 82
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 086a6830d78..ca905a5bb1b 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -29,47 +29,43 @@ struct mnt_fhstatus {
29 29
30/** 30/**
31 * nfs_mount - Obtain an NFS file handle for the given host and path 31 * nfs_mount - Obtain an NFS file handle for the given host and path
32 * @addr: pointer to server's address 32 * @info: pointer to mount request arguments
33 * @len: size of server's address
34 * @hostname: name of server host, or NULL
35 * @path: pointer to string containing export path to mount
36 * @version: mount version to use for this request
37 * @protocol: transport protocol to use for thie request
38 * @fh: pointer to location to place returned file handle
39 * 33 *
40 * Uses default timeout parameters specified by underlying transport. 34 * Uses default timeout parameters specified by underlying transport.
41 */ 35 */
42int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, 36int nfs_mount(struct nfs_mount_request *info)
43 int version, int protocol, struct nfs_fh *fh)
44{ 37{
45 struct mnt_fhstatus result = { 38 struct mnt_fhstatus result = {
46 .fh = fh 39 .fh = info->fh
47 }; 40 };
48 struct rpc_message msg = { 41 struct rpc_message msg = {
49 .rpc_argp = path, 42 .rpc_argp = info->dirpath,
50 .rpc_resp = &result, 43 .rpc_resp = &result,
51 }; 44 };
52 struct rpc_create_args args = { 45 struct rpc_create_args args = {
53 .protocol = protocol, 46 .protocol = info->protocol,
54 .address = addr, 47 .address = info->sap,
55 .addrsize = len, 48 .addrsize = info->salen,
56 .servername = hostname, 49 .servername = info->hostname,
57 .program = &mnt_program, 50 .program = &mnt_program,
58 .version = version, 51 .version = info->version,
59 .authflavor = RPC_AUTH_UNIX, 52 .authflavor = RPC_AUTH_UNIX,
60 .flags = 0,
61 }; 53 };
62 struct rpc_clnt *mnt_clnt; 54 struct rpc_clnt *mnt_clnt;
63 int status; 55 int status;
64 56
65 dprintk("NFS: sending MNT request for %s:%s\n", 57 dprintk("NFS: sending MNT request for %s:%s\n",
66 (hostname ? hostname : "server"), path); 58 (info->hostname ? info->hostname : "server"),
59 info->dirpath);
60
61 if (info->noresvport)
62 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
67 63
68 mnt_clnt = rpc_create(&args); 64 mnt_clnt = rpc_create(&args);
69 if (IS_ERR(mnt_clnt)) 65 if (IS_ERR(mnt_clnt))
70 goto out_clnt_err; 66 goto out_clnt_err;
71 67
72 if (version == NFS_MNT3_VERSION) 68 if (info->version == NFS_MNT3_VERSION)
73 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; 69 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
74 else 70 else
75 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; 71 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ea790645fda..4e4d3320437 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -38,8 +38,12 @@ struct idmap;
38 ((err) != NFSERR_NOFILEHANDLE)) 38 ((err) != NFSERR_NOFILEHANDLE))
39 39
40enum nfs4_client_state { 40enum nfs4_client_state {
41 NFS4CLNT_STATE_RECOVER = 0, 41 NFS4CLNT_MANAGER_RUNNING = 0,
42 NFS4CLNT_CHECK_LEASE,
42 NFS4CLNT_LEASE_EXPIRED, 43 NFS4CLNT_LEASE_EXPIRED,
44 NFS4CLNT_RECLAIM_REBOOT,
45 NFS4CLNT_RECLAIM_NOGRACE,
46 NFS4CLNT_DELEGRETURN,
43}; 47};
44 48
45/* 49/*
@@ -90,12 +94,18 @@ struct nfs4_state_owner {
90 94
91 spinlock_t so_lock; 95 spinlock_t so_lock;
92 atomic_t so_count; 96 atomic_t so_count;
97 unsigned long so_flags;
93 struct list_head so_states; 98 struct list_head so_states;
94 struct list_head so_delegations; 99 struct list_head so_delegations;
95 struct nfs_seqid_counter so_seqid; 100 struct nfs_seqid_counter so_seqid;
96 struct rpc_sequence so_sequence; 101 struct rpc_sequence so_sequence;
97}; 102};
98 103
104enum {
105 NFS_OWNER_RECLAIM_REBOOT,
106 NFS_OWNER_RECLAIM_NOGRACE
107};
108
99/* 109/*
100 * struct nfs4_state maintains the client-side state for a given 110 * struct nfs4_state maintains the client-side state for a given
101 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). 111 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
@@ -128,6 +138,8 @@ enum {
128 NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ 138 NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
129 NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ 139 NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
130 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ 140 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
141 NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */
142 NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */
131}; 143};
132 144
133struct nfs4_state { 145struct nfs4_state {
@@ -149,7 +161,7 @@ struct nfs4_state {
149 unsigned int n_rdonly; /* Number of read-only references */ 161 unsigned int n_rdonly; /* Number of read-only references */
150 unsigned int n_wronly; /* Number of write-only references */ 162 unsigned int n_wronly; /* Number of write-only references */
151 unsigned int n_rdwr; /* Number of read/write references */ 163 unsigned int n_rdwr; /* Number of read/write references */
152 int state; /* State on the server (R,W, or RW) */ 164 fmode_t state; /* State on the server (R,W, or RW) */
153 atomic_t count; 165 atomic_t count;
154}; 166};
155 167
@@ -157,9 +169,12 @@ struct nfs4_state {
157struct nfs4_exception { 169struct nfs4_exception {
158 long timeout; 170 long timeout;
159 int retry; 171 int retry;
172 struct nfs4_state *state;
160}; 173};
161 174
162struct nfs4_state_recovery_ops { 175struct nfs4_state_recovery_ops {
176 int owner_flag_bit;
177 int state_flag_bit;
163 int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); 178 int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
164 int (*recover_lock)(struct nfs4_state *, struct file_lock *); 179 int (*recover_lock)(struct nfs4_state *, struct file_lock *);
165}; 180};
@@ -174,7 +189,6 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
174 189
175 190
176/* nfs4proc.c */ 191/* nfs4proc.c */
177extern int nfs4_map_errors(int err);
178extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); 192extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
179extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); 193extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
180extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); 194extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
@@ -187,7 +201,7 @@ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
187 struct nfs4_fs_locations *fs_locations, struct page *page); 201 struct nfs4_fs_locations *fs_locations, struct page *page);
188 202
189extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; 203extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
190extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; 204extern struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops;
191 205
192extern const u32 nfs4_fattr_bitmap[2]; 206extern const u32 nfs4_fattr_bitmap[2];
193extern const u32 nfs4_statfs_bitmap[2]; 207extern const u32 nfs4_statfs_bitmap[2];
@@ -202,16 +216,18 @@ extern void nfs4_kill_renewd(struct nfs_client *);
202extern void nfs4_renew_state(struct work_struct *); 216extern void nfs4_renew_state(struct work_struct *);
203 217
204/* nfs4state.c */ 218/* nfs4state.c */
205struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); 219struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
206 220
207extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 221extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
208extern void nfs4_put_state_owner(struct nfs4_state_owner *); 222extern void nfs4_put_state_owner(struct nfs4_state_owner *);
209extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 223extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
210extern void nfs4_put_open_state(struct nfs4_state *); 224extern void nfs4_put_open_state(struct nfs4_state *);
211extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); 225extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
212extern void nfs4_close_sync(struct path *, struct nfs4_state *, mode_t); 226extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
213extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); 227extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
214extern void nfs4_schedule_state_recovery(struct nfs_client *); 228extern void nfs4_schedule_state_recovery(struct nfs_client *);
229extern void nfs4_schedule_state_manager(struct nfs_client *);
230extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
215extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 231extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
216extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 232extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
217extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 233extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 83e700a2b0c..8dde84b988d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -62,14 +62,12 @@
62struct nfs4_opendata; 62struct nfs4_opendata;
63static int _nfs4_proc_open(struct nfs4_opendata *data); 63static int _nfs4_proc_open(struct nfs4_opendata *data);
64static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 64static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
65static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); 65static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
68static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 66static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
69static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 67static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
70 68
71/* Prevent leaks of NFSv4 errors into userland */ 69/* Prevent leaks of NFSv4 errors into userland */
72int nfs4_map_errors(int err) 70static int nfs4_map_errors(int err)
73{ 71{
74 if (err < -1000) { 72 if (err < -1000) {
75 dprintk("%s could not handle NFSv4 error %d\n", 73 dprintk("%s could not handle NFSv4 error %d\n",
@@ -195,6 +193,83 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
195 kunmap_atomic(start, KM_USER0); 193 kunmap_atomic(start, KM_USER0);
196} 194}
197 195
196static int nfs4_wait_bit_killable(void *word)
197{
198 if (fatal_signal_pending(current))
199 return -ERESTARTSYS;
200 schedule();
201 return 0;
202}
203
204static int nfs4_wait_clnt_recover(struct nfs_client *clp)
205{
206 int res;
207
208 might_sleep();
209
210 res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
211 nfs4_wait_bit_killable, TASK_KILLABLE);
212 return res;
213}
214
215static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
216{
217 int res = 0;
218
219 might_sleep();
220
221 if (*timeout <= 0)
222 *timeout = NFS4_POLL_RETRY_MIN;
223 if (*timeout > NFS4_POLL_RETRY_MAX)
224 *timeout = NFS4_POLL_RETRY_MAX;
225 schedule_timeout_killable(*timeout);
226 if (fatal_signal_pending(current))
227 res = -ERESTARTSYS;
228 *timeout <<= 1;
229 return res;
230}
231
232/* This is the error handling routine for processes that are allowed
233 * to sleep.
234 */
235static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
236{
237 struct nfs_client *clp = server->nfs_client;
238 struct nfs4_state *state = exception->state;
239 int ret = errorcode;
240
241 exception->retry = 0;
242 switch(errorcode) {
243 case 0:
244 return 0;
245 case -NFS4ERR_ADMIN_REVOKED:
246 case -NFS4ERR_BAD_STATEID:
247 case -NFS4ERR_OPENMODE:
248 if (state == NULL)
249 break;
250 nfs4_state_mark_reclaim_nograce(clp, state);
251 case -NFS4ERR_STALE_CLIENTID:
252 case -NFS4ERR_STALE_STATEID:
253 case -NFS4ERR_EXPIRED:
254 nfs4_schedule_state_recovery(clp);
255 ret = nfs4_wait_clnt_recover(clp);
256 if (ret == 0)
257 exception->retry = 1;
258 break;
259 case -NFS4ERR_FILE_OPEN:
260 case -NFS4ERR_GRACE:
261 case -NFS4ERR_DELAY:
262 ret = nfs4_delay(server->client, &exception->timeout);
263 if (ret != 0)
264 break;
265 case -NFS4ERR_OLD_STATEID:
266 exception->retry = 1;
267 }
268 /* We failed to handle the error */
269 return nfs4_map_errors(ret);
270}
271
272
198static void renew_lease(const struct nfs_server *server, unsigned long timestamp) 273static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
199{ 274{
200 struct nfs_client *clp = server->nfs_client; 275 struct nfs_client *clp = server->nfs_client;
@@ -248,7 +323,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
248} 323}
249 324
250static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 325static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
251 struct nfs4_state_owner *sp, int flags, 326 struct nfs4_state_owner *sp, fmode_t fmode, int flags,
252 const struct iattr *attrs) 327 const struct iattr *attrs)
253{ 328{
254 struct dentry *parent = dget_parent(path->dentry); 329 struct dentry *parent = dget_parent(path->dentry);
@@ -268,7 +343,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
268 p->owner = sp; 343 p->owner = sp;
269 atomic_inc(&sp->so_count); 344 atomic_inc(&sp->so_count);
270 p->o_arg.fh = NFS_FH(dir); 345 p->o_arg.fh = NFS_FH(dir);
271 p->o_arg.open_flags = flags, 346 p->o_arg.open_flags = flags;
347 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
272 p->o_arg.clientid = server->nfs_client->cl_clientid; 348 p->o_arg.clientid = server->nfs_client->cl_clientid;
273 p->o_arg.id = sp->so_owner_id.id; 349 p->o_arg.id = sp->so_owner_id.id;
274 p->o_arg.name = &p->path.dentry->d_name; 350 p->o_arg.name = &p->path.dentry->d_name;
@@ -324,10 +400,13 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
324 return ret; 400 return ret;
325} 401}
326 402
327static int can_open_cached(struct nfs4_state *state, int mode) 403static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
328{ 404{
329 int ret = 0; 405 int ret = 0;
330 switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) { 406
407 if (open_mode & O_EXCL)
408 goto out;
409 switch (mode & (FMODE_READ|FMODE_WRITE)) {
331 case FMODE_READ: 410 case FMODE_READ:
332 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0; 411 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
333 break; 412 break;
@@ -337,21 +416,23 @@ static int can_open_cached(struct nfs4_state *state, int mode)
337 case FMODE_READ|FMODE_WRITE: 416 case FMODE_READ|FMODE_WRITE:
338 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; 417 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
339 } 418 }
419out:
340 return ret; 420 return ret;
341} 421}
342 422
343static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags) 423static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
344{ 424{
345 if ((delegation->type & open_flags) != open_flags) 425 if ((delegation->type & fmode) != fmode)
346 return 0; 426 return 0;
347 if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) 427 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
348 return 0; 428 return 0;
429 nfs_mark_delegation_referenced(delegation);
349 return 1; 430 return 1;
350} 431}
351 432
352static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) 433static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
353{ 434{
354 switch (open_flags) { 435 switch (fmode) {
355 case FMODE_WRITE: 436 case FMODE_WRITE:
356 state->n_wronly++; 437 state->n_wronly++;
357 break; 438 break;
@@ -361,15 +442,15 @@ static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
361 case FMODE_READ|FMODE_WRITE: 442 case FMODE_READ|FMODE_WRITE:
362 state->n_rdwr++; 443 state->n_rdwr++;
363 } 444 }
364 nfs4_state_set_mode_locked(state, state->state | open_flags); 445 nfs4_state_set_mode_locked(state, state->state | fmode);
365} 446}
366 447
367static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) 448static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
368{ 449{
369 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) 450 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
370 memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); 451 memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
371 memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); 452 memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
372 switch (open_flags) { 453 switch (fmode) {
373 case FMODE_READ: 454 case FMODE_READ:
374 set_bit(NFS_O_RDONLY_STATE, &state->flags); 455 set_bit(NFS_O_RDONLY_STATE, &state->flags);
375 break; 456 break;
@@ -381,16 +462,15 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
381 } 462 }
382} 463}
383 464
384static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) 465static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
385{ 466{
386 write_seqlock(&state->seqlock); 467 write_seqlock(&state->seqlock);
387 nfs_set_open_stateid_locked(state, stateid, open_flags); 468 nfs_set_open_stateid_locked(state, stateid, fmode);
388 write_sequnlock(&state->seqlock); 469 write_sequnlock(&state->seqlock);
389} 470}
390 471
391static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags) 472static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode)
392{ 473{
393 open_flags &= (FMODE_READ|FMODE_WRITE);
394 /* 474 /*
395 * Protect the call to nfs4_state_set_mode_locked and 475 * Protect the call to nfs4_state_set_mode_locked and
396 * serialise the stateid update 476 * serialise the stateid update
@@ -401,20 +481,60 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_sta
401 set_bit(NFS_DELEGATED_STATE, &state->flags); 481 set_bit(NFS_DELEGATED_STATE, &state->flags);
402 } 482 }
403 if (open_stateid != NULL) 483 if (open_stateid != NULL)
404 nfs_set_open_stateid_locked(state, open_stateid, open_flags); 484 nfs_set_open_stateid_locked(state, open_stateid, fmode);
405 write_sequnlock(&state->seqlock); 485 write_sequnlock(&state->seqlock);
406 spin_lock(&state->owner->so_lock); 486 spin_lock(&state->owner->so_lock);
407 update_open_stateflags(state, open_flags); 487 update_open_stateflags(state, fmode);
408 spin_unlock(&state->owner->so_lock); 488 spin_unlock(&state->owner->so_lock);
409} 489}
410 490
411static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags) 491static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, fmode_t fmode)
492{
493 struct nfs_inode *nfsi = NFS_I(state->inode);
494 struct nfs_delegation *deleg_cur;
495 int ret = 0;
496
497 fmode &= (FMODE_READ|FMODE_WRITE);
498
499 rcu_read_lock();
500 deleg_cur = rcu_dereference(nfsi->delegation);
501 if (deleg_cur == NULL)
502 goto no_delegation;
503
504 spin_lock(&deleg_cur->lock);
505 if (nfsi->delegation != deleg_cur ||
506 (deleg_cur->type & fmode) != fmode)
507 goto no_delegation_unlock;
508
509 if (delegation == NULL)
510 delegation = &deleg_cur->stateid;
511 else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
512 goto no_delegation_unlock;
513
514 nfs_mark_delegation_referenced(deleg_cur);
515 __update_open_stateid(state, open_stateid, &deleg_cur->stateid, fmode);
516 ret = 1;
517no_delegation_unlock:
518 spin_unlock(&deleg_cur->lock);
519no_delegation:
520 rcu_read_unlock();
521
522 if (!ret && open_stateid != NULL) {
523 __update_open_stateid(state, open_stateid, NULL, fmode);
524 ret = 1;
525 }
526
527 return ret;
528}
529
530
531static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode)
412{ 532{
413 struct nfs_delegation *delegation; 533 struct nfs_delegation *delegation;
414 534
415 rcu_read_lock(); 535 rcu_read_lock();
416 delegation = rcu_dereference(NFS_I(inode)->delegation); 536 delegation = rcu_dereference(NFS_I(inode)->delegation);
417 if (delegation == NULL || (delegation->type & open_flags) == open_flags) { 537 if (delegation == NULL || (delegation->type & fmode) == fmode) {
418 rcu_read_unlock(); 538 rcu_read_unlock();
419 return; 539 return;
420 } 540 }
@@ -427,27 +547,28 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
427 struct nfs4_state *state = opendata->state; 547 struct nfs4_state *state = opendata->state;
428 struct nfs_inode *nfsi = NFS_I(state->inode); 548 struct nfs_inode *nfsi = NFS_I(state->inode);
429 struct nfs_delegation *delegation; 549 struct nfs_delegation *delegation;
430 int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL); 550 int open_mode = opendata->o_arg.open_flags & O_EXCL;
551 fmode_t fmode = opendata->o_arg.fmode;
431 nfs4_stateid stateid; 552 nfs4_stateid stateid;
432 int ret = -EAGAIN; 553 int ret = -EAGAIN;
433 554
434 rcu_read_lock();
435 delegation = rcu_dereference(nfsi->delegation);
436 for (;;) { 555 for (;;) {
437 if (can_open_cached(state, open_mode)) { 556 if (can_open_cached(state, fmode, open_mode)) {
438 spin_lock(&state->owner->so_lock); 557 spin_lock(&state->owner->so_lock);
439 if (can_open_cached(state, open_mode)) { 558 if (can_open_cached(state, fmode, open_mode)) {
440 update_open_stateflags(state, open_mode); 559 update_open_stateflags(state, fmode);
441 spin_unlock(&state->owner->so_lock); 560 spin_unlock(&state->owner->so_lock);
442 rcu_read_unlock();
443 goto out_return_state; 561 goto out_return_state;
444 } 562 }
445 spin_unlock(&state->owner->so_lock); 563 spin_unlock(&state->owner->so_lock);
446 } 564 }
447 if (delegation == NULL) 565 rcu_read_lock();
448 break; 566 delegation = rcu_dereference(nfsi->delegation);
449 if (!can_open_delegated(delegation, open_mode)) 567 if (delegation == NULL ||
568 !can_open_delegated(delegation, fmode)) {
569 rcu_read_unlock();
450 break; 570 break;
571 }
451 /* Save the delegation */ 572 /* Save the delegation */
452 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); 573 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
453 rcu_read_unlock(); 574 rcu_read_unlock();
@@ -455,19 +576,11 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
455 if (ret != 0) 576 if (ret != 0)
456 goto out; 577 goto out;
457 ret = -EAGAIN; 578 ret = -EAGAIN;
458 rcu_read_lock(); 579
459 delegation = rcu_dereference(nfsi->delegation); 580 /* Try to update the stateid using the delegation */
460 /* If no delegation, try a cached open */ 581 if (update_open_stateid(state, NULL, &stateid, fmode))
461 if (delegation == NULL) 582 goto out_return_state;
462 continue;
463 /* Is the delegation still valid? */
464 if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
465 continue;
466 rcu_read_unlock();
467 update_open_stateid(state, NULL, &stateid, open_mode);
468 goto out_return_state;
469 } 583 }
470 rcu_read_unlock();
471out: 584out:
472 return ERR_PTR(ret); 585 return ERR_PTR(ret);
473out_return_state: 586out_return_state:
@@ -480,7 +593,6 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
480 struct inode *inode; 593 struct inode *inode;
481 struct nfs4_state *state = NULL; 594 struct nfs4_state *state = NULL;
482 struct nfs_delegation *delegation; 595 struct nfs_delegation *delegation;
483 nfs4_stateid *deleg_stateid = NULL;
484 int ret; 596 int ret;
485 597
486 if (!data->rpc_done) { 598 if (!data->rpc_done) {
@@ -507,7 +619,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
507 if (delegation) 619 if (delegation)
508 delegation_flags = delegation->flags; 620 delegation_flags = delegation->flags;
509 rcu_read_unlock(); 621 rcu_read_unlock();
510 if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) 622 if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
511 nfs_inode_set_delegation(state->inode, 623 nfs_inode_set_delegation(state->inode,
512 data->owner->so_cred, 624 data->owner->so_cred,
513 &data->o_res); 625 &data->o_res);
@@ -516,12 +628,9 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
516 data->owner->so_cred, 628 data->owner->so_cred,
517 &data->o_res); 629 &data->o_res);
518 } 630 }
519 rcu_read_lock(); 631
520 delegation = rcu_dereference(NFS_I(inode)->delegation); 632 update_open_stateid(state, &data->o_res.stateid, NULL,
521 if (delegation != NULL) 633 data->o_arg.fmode);
522 deleg_stateid = &delegation->stateid;
523 update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
524 rcu_read_unlock();
525 iput(inode); 634 iput(inode);
526out: 635out:
527 return state; 636 return state;
@@ -552,7 +661,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
552{ 661{
553 struct nfs4_opendata *opendata; 662 struct nfs4_opendata *opendata;
554 663
555 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); 664 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL);
556 if (opendata == NULL) 665 if (opendata == NULL)
557 return ERR_PTR(-ENOMEM); 666 return ERR_PTR(-ENOMEM);
558 opendata->state = state; 667 opendata->state = state;
@@ -560,12 +669,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
560 return opendata; 669 return opendata;
561} 670}
562 671
563static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res) 672static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res)
564{ 673{
565 struct nfs4_state *newstate; 674 struct nfs4_state *newstate;
566 int ret; 675 int ret;
567 676
568 opendata->o_arg.open_flags = openflags; 677 opendata->o_arg.open_flags = 0;
678 opendata->o_arg.fmode = fmode;
569 memset(&opendata->o_res, 0, sizeof(opendata->o_res)); 679 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
570 memset(&opendata->c_res, 0, sizeof(opendata->c_res)); 680 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
571 nfs4_init_opendata_res(opendata); 681 nfs4_init_opendata_res(opendata);
@@ -575,7 +685,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openf
575 newstate = nfs4_opendata_to_nfs4_state(opendata); 685 newstate = nfs4_opendata_to_nfs4_state(opendata);
576 if (IS_ERR(newstate)) 686 if (IS_ERR(newstate))
577 return PTR_ERR(newstate); 687 return PTR_ERR(newstate);
578 nfs4_close_state(&opendata->path, newstate, openflags); 688 nfs4_close_state(&opendata->path, newstate, fmode);
579 *res = newstate; 689 *res = newstate;
580 return 0; 690 return 0;
581} 691}
@@ -631,7 +741,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
631{ 741{
632 struct nfs_delegation *delegation; 742 struct nfs_delegation *delegation;
633 struct nfs4_opendata *opendata; 743 struct nfs4_opendata *opendata;
634 int delegation_type = 0; 744 fmode_t delegation_type = 0;
635 int status; 745 int status;
636 746
637 opendata = nfs4_open_recoverdata_alloc(ctx, state); 747 opendata = nfs4_open_recoverdata_alloc(ctx, state);
@@ -641,7 +751,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
641 opendata->o_arg.fh = NFS_FH(state->inode); 751 opendata->o_arg.fh = NFS_FH(state->inode);
642 rcu_read_lock(); 752 rcu_read_lock();
643 delegation = rcu_dereference(NFS_I(state->inode)->delegation); 753 delegation = rcu_dereference(NFS_I(state->inode)->delegation);
644 if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0) 754 if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0)
645 delegation_type = delegation->type; 755 delegation_type = delegation->type;
646 rcu_read_unlock(); 756 rcu_read_unlock();
647 opendata->o_arg.u.delegation_type = delegation_type; 757 opendata->o_arg.u.delegation_type = delegation_type;
@@ -744,7 +854,7 @@ static void nfs4_open_confirm_release(void *calldata)
744 goto out_free; 854 goto out_free;
745 state = nfs4_opendata_to_nfs4_state(data); 855 state = nfs4_opendata_to_nfs4_state(data);
746 if (!IS_ERR(state)) 856 if (!IS_ERR(state))
747 nfs4_close_state(&data->path, state, data->o_arg.open_flags); 857 nfs4_close_state(&data->path, state, data->o_arg.fmode);
748out_free: 858out_free:
749 nfs4_opendata_put(data); 859 nfs4_opendata_put(data);
750} 860}
@@ -808,12 +918,12 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
808 if (data->state != NULL) { 918 if (data->state != NULL) {
809 struct nfs_delegation *delegation; 919 struct nfs_delegation *delegation;
810 920
811 if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL))) 921 if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
812 goto out_no_action; 922 goto out_no_action;
813 rcu_read_lock(); 923 rcu_read_lock();
814 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); 924 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
815 if (delegation != NULL && 925 if (delegation != NULL &&
816 (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) { 926 test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) {
817 rcu_read_unlock(); 927 rcu_read_unlock();
818 goto out_no_action; 928 goto out_no_action;
819 } 929 }
@@ -877,7 +987,7 @@ static void nfs4_open_release(void *calldata)
877 goto out_free; 987 goto out_free;
878 state = nfs4_opendata_to_nfs4_state(data); 988 state = nfs4_opendata_to_nfs4_state(data);
879 if (!IS_ERR(state)) 989 if (!IS_ERR(state))
880 nfs4_close_state(&data->path, state, data->o_arg.open_flags); 990 nfs4_close_state(&data->path, state, data->o_arg.fmode);
881out_free: 991out_free:
882 nfs4_opendata_put(data); 992 nfs4_opendata_put(data);
883} 993}
@@ -955,10 +1065,11 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
955 int ret; 1065 int ret;
956 1066
957 for (;;) { 1067 for (;;) {
958 ret = nfs4_wait_clnt_recover(server->client, clp); 1068 ret = nfs4_wait_clnt_recover(clp);
959 if (ret != 0) 1069 if (ret != 0)
960 return ret; 1070 return ret;
961 if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1071 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1072 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
962 break; 1073 break;
963 nfs4_schedule_state_recovery(clp); 1074 nfs4_schedule_state_recovery(clp);
964 } 1075 }
@@ -993,8 +1104,9 @@ static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4
993 1104
994 do { 1105 do {
995 err = _nfs4_open_expired(ctx, state); 1106 err = _nfs4_open_expired(ctx, state);
996 if (err == -NFS4ERR_DELAY) 1107 if (err != -NFS4ERR_DELAY)
997 nfs4_handle_exception(server, err, &exception); 1108 break;
1109 nfs4_handle_exception(server, err, &exception);
998 } while (exception.retry); 1110 } while (exception.retry);
999 return err; 1111 return err;
1000} 1112}
@@ -1031,12 +1143,11 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1031/* 1143/*
1032 * Returns a referenced nfs4_state 1144 * Returns a referenced nfs4_state
1033 */ 1145 */
1034static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) 1146static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
1035{ 1147{
1036 struct nfs4_state_owner *sp; 1148 struct nfs4_state_owner *sp;
1037 struct nfs4_state *state = NULL; 1149 struct nfs4_state *state = NULL;
1038 struct nfs_server *server = NFS_SERVER(dir); 1150 struct nfs_server *server = NFS_SERVER(dir);
1039 struct nfs_client *clp = server->nfs_client;
1040 struct nfs4_opendata *opendata; 1151 struct nfs4_opendata *opendata;
1041 int status; 1152 int status;
1042 1153
@@ -1050,12 +1161,11 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
1050 if (status != 0) 1161 if (status != 0)
1051 goto err_put_state_owner; 1162 goto err_put_state_owner;
1052 if (path->dentry->d_inode != NULL) 1163 if (path->dentry->d_inode != NULL)
1053 nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE)); 1164 nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
1054 down_read(&clp->cl_sem);
1055 status = -ENOMEM; 1165 status = -ENOMEM;
1056 opendata = nfs4_opendata_alloc(path, sp, flags, sattr); 1166 opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr);
1057 if (opendata == NULL) 1167 if (opendata == NULL)
1058 goto err_release_rwsem; 1168 goto err_put_state_owner;
1059 1169
1060 if (path->dentry->d_inode != NULL) 1170 if (path->dentry->d_inode != NULL)
1061 opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); 1171 opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
@@ -1073,13 +1183,10 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
1073 goto err_opendata_put; 1183 goto err_opendata_put;
1074 nfs4_opendata_put(opendata); 1184 nfs4_opendata_put(opendata);
1075 nfs4_put_state_owner(sp); 1185 nfs4_put_state_owner(sp);
1076 up_read(&clp->cl_sem);
1077 *res = state; 1186 *res = state;
1078 return 0; 1187 return 0;
1079err_opendata_put: 1188err_opendata_put:
1080 nfs4_opendata_put(opendata); 1189 nfs4_opendata_put(opendata);
1081err_release_rwsem:
1082 up_read(&clp->cl_sem);
1083err_put_state_owner: 1190err_put_state_owner:
1084 nfs4_put_state_owner(sp); 1191 nfs4_put_state_owner(sp);
1085out_err: 1192out_err:
@@ -1088,14 +1195,14 @@ out_err:
1088} 1195}
1089 1196
1090 1197
1091static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred) 1198static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
1092{ 1199{
1093 struct nfs4_exception exception = { }; 1200 struct nfs4_exception exception = { };
1094 struct nfs4_state *res; 1201 struct nfs4_state *res;
1095 int status; 1202 int status;
1096 1203
1097 do { 1204 do {
1098 status = _nfs4_do_open(dir, path, flags, sattr, cred, &res); 1205 status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res);
1099 if (status == 0) 1206 if (status == 0)
1100 break; 1207 break;
1101 /* NOTE: BAD_SEQID means the server and client disagree about the 1208 /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1230,10 +1337,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1230 renew_lease(server, calldata->timestamp); 1337 renew_lease(server, calldata->timestamp);
1231 break; 1338 break;
1232 case -NFS4ERR_STALE_STATEID: 1339 case -NFS4ERR_STALE_STATEID:
1340 case -NFS4ERR_OLD_STATEID:
1341 case -NFS4ERR_BAD_STATEID:
1233 case -NFS4ERR_EXPIRED: 1342 case -NFS4ERR_EXPIRED:
1234 break; 1343 if (calldata->arg.fmode == 0)
1344 break;
1235 default: 1345 default:
1236 if (nfs4_async_handle_error(task, server) == -EAGAIN) { 1346 if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
1237 rpc_restart_call(task); 1347 rpc_restart_call(task);
1238 return; 1348 return;
1239 } 1349 }
@@ -1272,10 +1382,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1272 nfs_fattr_init(calldata->res.fattr); 1382 nfs_fattr_init(calldata->res.fattr);
1273 if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) { 1383 if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
1274 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 1384 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
1275 calldata->arg.open_flags = FMODE_READ; 1385 calldata->arg.fmode = FMODE_READ;
1276 } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) { 1386 } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
1277 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 1387 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
1278 calldata->arg.open_flags = FMODE_WRITE; 1388 calldata->arg.fmode = FMODE_WRITE;
1279 } 1389 }
1280 calldata->timestamp = jiffies; 1390 calldata->timestamp = jiffies;
1281 rpc_call_start(task); 1391 rpc_call_start(task);
@@ -1328,6 +1438,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1328 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1438 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
1329 if (calldata->arg.seqid == NULL) 1439 if (calldata->arg.seqid == NULL)
1330 goto out_free_calldata; 1440 goto out_free_calldata;
1441 calldata->arg.fmode = 0;
1331 calldata->arg.bitmask = server->attr_bitmask; 1442 calldata->arg.bitmask = server->attr_bitmask;
1332 calldata->res.fattr = &calldata->fattr; 1443 calldata->res.fattr = &calldata->fattr;
1333 calldata->res.seqid = calldata->arg.seqid; 1444 calldata->res.seqid = calldata->arg.seqid;
@@ -1354,13 +1465,13 @@ out:
1354 return status; 1465 return status;
1355} 1466}
1356 1467
1357static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state) 1468static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode)
1358{ 1469{
1359 struct file *filp; 1470 struct file *filp;
1360 int ret; 1471 int ret;
1361 1472
1362 /* If the open_intent is for execute, we have an extra check to make */ 1473 /* If the open_intent is for execute, we have an extra check to make */
1363 if (nd->intent.open.flags & FMODE_EXEC) { 1474 if (fmode & FMODE_EXEC) {
1364 ret = nfs_may_open(state->inode, 1475 ret = nfs_may_open(state->inode,
1365 state->owner->so_cred, 1476 state->owner->so_cred,
1366 nd->intent.open.flags); 1477 nd->intent.open.flags);
@@ -1376,7 +1487,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct
1376 } 1487 }
1377 ret = PTR_ERR(filp); 1488 ret = PTR_ERR(filp);
1378out_close: 1489out_close:
1379 nfs4_close_sync(path, state, nd->intent.open.flags); 1490 nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE));
1380 return ret; 1491 return ret;
1381} 1492}
1382 1493
@@ -1392,6 +1503,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1392 struct rpc_cred *cred; 1503 struct rpc_cred *cred;
1393 struct nfs4_state *state; 1504 struct nfs4_state *state;
1394 struct dentry *res; 1505 struct dentry *res;
1506 fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
1395 1507
1396 if (nd->flags & LOOKUP_CREATE) { 1508 if (nd->flags & LOOKUP_CREATE) {
1397 attr.ia_mode = nd->intent.open.create_mode; 1509 attr.ia_mode = nd->intent.open.create_mode;
@@ -1409,7 +1521,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1409 parent = dentry->d_parent; 1521 parent = dentry->d_parent;
1410 /* Protect against concurrent sillydeletes */ 1522 /* Protect against concurrent sillydeletes */
1411 nfs_block_sillyrename(parent); 1523 nfs_block_sillyrename(parent);
1412 state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); 1524 state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred);
1413 put_rpccred(cred); 1525 put_rpccred(cred);
1414 if (IS_ERR(state)) { 1526 if (IS_ERR(state)) {
1415 if (PTR_ERR(state) == -ENOENT) { 1527 if (PTR_ERR(state) == -ENOENT) {
@@ -1424,7 +1536,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1424 path.dentry = res; 1536 path.dentry = res;
1425 nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); 1537 nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
1426 nfs_unblock_sillyrename(parent); 1538 nfs_unblock_sillyrename(parent);
1427 nfs4_intent_set_file(nd, &path, state); 1539 nfs4_intent_set_file(nd, &path, state, fmode);
1428 return res; 1540 return res;
1429} 1541}
1430 1542
@@ -1437,11 +1549,12 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1437 }; 1549 };
1438 struct rpc_cred *cred; 1550 struct rpc_cred *cred;
1439 struct nfs4_state *state; 1551 struct nfs4_state *state;
1552 fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE);
1440 1553
1441 cred = rpc_lookup_cred(); 1554 cred = rpc_lookup_cred();
1442 if (IS_ERR(cred)) 1555 if (IS_ERR(cred))
1443 return PTR_ERR(cred); 1556 return PTR_ERR(cred);
1444 state = nfs4_do_open(dir, &path, openflags, NULL, cred); 1557 state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred);
1445 put_rpccred(cred); 1558 put_rpccred(cred);
1446 if (IS_ERR(state)) { 1559 if (IS_ERR(state)) {
1447 switch (PTR_ERR(state)) { 1560 switch (PTR_ERR(state)) {
@@ -1458,10 +1571,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1458 } 1571 }
1459 if (state->inode == dentry->d_inode) { 1572 if (state->inode == dentry->d_inode) {
1460 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1573 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1461 nfs4_intent_set_file(nd, &path, state); 1574 nfs4_intent_set_file(nd, &path, state, fmode);
1462 return 1; 1575 return 1;
1463 } 1576 }
1464 nfs4_close_sync(&path, state, openflags); 1577 nfs4_close_sync(&path, state, fmode);
1465out_drop: 1578out_drop:
1466 d_drop(dentry); 1579 d_drop(dentry);
1467 return 0; 1580 return 0;
@@ -1887,6 +2000,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1887 }; 2000 };
1888 struct nfs4_state *state; 2001 struct nfs4_state *state;
1889 struct rpc_cred *cred; 2002 struct rpc_cred *cred;
2003 fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE);
1890 int status = 0; 2004 int status = 0;
1891 2005
1892 cred = rpc_lookup_cred(); 2006 cred = rpc_lookup_cred();
@@ -1894,7 +2008,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1894 status = PTR_ERR(cred); 2008 status = PTR_ERR(cred);
1895 goto out; 2009 goto out;
1896 } 2010 }
1897 state = nfs4_do_open(dir, &path, flags, sattr, cred); 2011 state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred);
1898 d_drop(dentry); 2012 d_drop(dentry);
1899 if (IS_ERR(state)) { 2013 if (IS_ERR(state)) {
1900 status = PTR_ERR(state); 2014 status = PTR_ERR(state);
@@ -1910,9 +2024,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1910 nfs_post_op_update_inode(state->inode, &fattr); 2024 nfs_post_op_update_inode(state->inode, &fattr);
1911 } 2025 }
1912 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) 2026 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
1913 status = nfs4_intent_set_file(nd, &path, state); 2027 status = nfs4_intent_set_file(nd, &path, state, fmode);
1914 else 2028 else
1915 nfs4_close_sync(&path, state, flags); 2029 nfs4_close_sync(&path, state, fmode);
1916out_putcred: 2030out_putcred:
1917 put_rpccred(cred); 2031 put_rpccred(cred);
1918out: 2032out:
@@ -1974,7 +2088,7 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
1974{ 2088{
1975 struct nfs_removeres *res = task->tk_msg.rpc_resp; 2089 struct nfs_removeres *res = task->tk_msg.rpc_resp;
1976 2090
1977 if (nfs4_async_handle_error(task, res->server) == -EAGAIN) 2091 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
1978 return 0; 2092 return 0;
1979 update_changeattr(dir, &res->cinfo); 2093 update_changeattr(dir, &res->cinfo);
1980 nfs_post_op_update_inode(dir, &res->dir_attr); 2094 nfs_post_op_update_inode(dir, &res->dir_attr);
@@ -2402,7 +2516,7 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
2402{ 2516{
2403 struct nfs_server *server = NFS_SERVER(data->inode); 2517 struct nfs_server *server = NFS_SERVER(data->inode);
2404 2518
2405 if (nfs4_async_handle_error(task, server) == -EAGAIN) { 2519 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
2406 rpc_restart_call(task); 2520 rpc_restart_call(task);
2407 return -EAGAIN; 2521 return -EAGAIN;
2408 } 2522 }
@@ -2423,7 +2537,7 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
2423{ 2537{
2424 struct inode *inode = data->inode; 2538 struct inode *inode = data->inode;
2425 2539
2426 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { 2540 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
2427 rpc_restart_call(task); 2541 rpc_restart_call(task);
2428 return -EAGAIN; 2542 return -EAGAIN;
2429 } 2543 }
@@ -2449,7 +2563,7 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
2449{ 2563{
2450 struct inode *inode = data->inode; 2564 struct inode *inode = data->inode;
2451 2565
2452 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { 2566 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
2453 rpc_restart_call(task); 2567 rpc_restart_call(task);
2454 return -EAGAIN; 2568 return -EAGAIN;
2455 } 2569 }
@@ -2742,19 +2856,25 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
2742} 2856}
2743 2857
2744static int 2858static int
2745nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) 2859nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
2746{ 2860{
2747 struct nfs_client *clp = server->nfs_client; 2861 struct nfs_client *clp = server->nfs_client;
2748 2862
2749 if (!clp || task->tk_status >= 0) 2863 if (!clp || task->tk_status >= 0)
2750 return 0; 2864 return 0;
2751 switch(task->tk_status) { 2865 switch(task->tk_status) {
2866 case -NFS4ERR_ADMIN_REVOKED:
2867 case -NFS4ERR_BAD_STATEID:
2868 case -NFS4ERR_OPENMODE:
2869 if (state == NULL)
2870 break;
2871 nfs4_state_mark_reclaim_nograce(clp, state);
2752 case -NFS4ERR_STALE_CLIENTID: 2872 case -NFS4ERR_STALE_CLIENTID:
2753 case -NFS4ERR_STALE_STATEID: 2873 case -NFS4ERR_STALE_STATEID:
2754 case -NFS4ERR_EXPIRED: 2874 case -NFS4ERR_EXPIRED:
2755 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 2875 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
2756 nfs4_schedule_state_recovery(clp); 2876 nfs4_schedule_state_recovery(clp);
2757 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) 2877 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
2758 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 2878 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
2759 task->tk_status = 0; 2879 task->tk_status = 0;
2760 return -EAGAIN; 2880 return -EAGAIN;
@@ -2772,79 +2892,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
2772 return 0; 2892 return 0;
2773} 2893}
2774 2894
2775static int nfs4_wait_bit_killable(void *word)
2776{
2777 if (fatal_signal_pending(current))
2778 return -ERESTARTSYS;
2779 schedule();
2780 return 0;
2781}
2782
2783static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
2784{
2785 int res;
2786
2787 might_sleep();
2788
2789 rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_);
2790
2791 res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER,
2792 nfs4_wait_bit_killable, TASK_KILLABLE);
2793
2794 rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_);
2795 return res;
2796}
2797
2798static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
2799{
2800 int res = 0;
2801
2802 might_sleep();
2803
2804 if (*timeout <= 0)
2805 *timeout = NFS4_POLL_RETRY_MIN;
2806 if (*timeout > NFS4_POLL_RETRY_MAX)
2807 *timeout = NFS4_POLL_RETRY_MAX;
2808 schedule_timeout_killable(*timeout);
2809 if (fatal_signal_pending(current))
2810 res = -ERESTARTSYS;
2811 *timeout <<= 1;
2812 return res;
2813}
2814
2815/* This is the error handling routine for processes that are allowed
2816 * to sleep.
2817 */
2818static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
2819{
2820 struct nfs_client *clp = server->nfs_client;
2821 int ret = errorcode;
2822
2823 exception->retry = 0;
2824 switch(errorcode) {
2825 case 0:
2826 return 0;
2827 case -NFS4ERR_STALE_CLIENTID:
2828 case -NFS4ERR_STALE_STATEID:
2829 case -NFS4ERR_EXPIRED:
2830 nfs4_schedule_state_recovery(clp);
2831 ret = nfs4_wait_clnt_recover(server->client, clp);
2832 if (ret == 0)
2833 exception->retry = 1;
2834 break;
2835 case -NFS4ERR_FILE_OPEN:
2836 case -NFS4ERR_GRACE:
2837 case -NFS4ERR_DELAY:
2838 ret = nfs4_delay(server->client, &exception->timeout);
2839 if (ret != 0)
2840 break;
2841 case -NFS4ERR_OLD_STATEID:
2842 exception->retry = 1;
2843 }
2844 /* We failed to handle the error */
2845 return nfs4_map_errors(ret);
2846}
2847
2848int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) 2895int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
2849{ 2896{
2850 nfs4_verifier sc_verifier; 2897 nfs4_verifier sc_verifier;
@@ -2916,7 +2963,6 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre
2916 spin_lock(&clp->cl_lock); 2963 spin_lock(&clp->cl_lock);
2917 clp->cl_lease_time = fsinfo.lease_time * HZ; 2964 clp->cl_lease_time = fsinfo.lease_time * HZ;
2918 clp->cl_last_renewal = now; 2965 clp->cl_last_renewal = now;
2919 clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
2920 spin_unlock(&clp->cl_lock); 2966 spin_unlock(&clp->cl_lock);
2921 } 2967 }
2922 return status; 2968 return status;
@@ -3074,7 +3120,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3074 struct nfs4_lock_state *lsp; 3120 struct nfs4_lock_state *lsp;
3075 int status; 3121 int status;
3076 3122
3077 down_read(&clp->cl_sem);
3078 arg.lock_owner.clientid = clp->cl_clientid; 3123 arg.lock_owner.clientid = clp->cl_clientid;
3079 status = nfs4_set_lock_state(state, request); 3124 status = nfs4_set_lock_state(state, request);
3080 if (status != 0) 3125 if (status != 0)
@@ -3091,7 +3136,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3091 } 3136 }
3092 request->fl_ops->fl_release_private(request); 3137 request->fl_ops->fl_release_private(request);
3093out: 3138out:
3094 up_read(&clp->cl_sem);
3095 return status; 3139 return status;
3096} 3140}
3097 3141
@@ -3181,11 +3225,13 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
3181 sizeof(calldata->lsp->ls_stateid.data)); 3225 sizeof(calldata->lsp->ls_stateid.data));
3182 renew_lease(calldata->server, calldata->timestamp); 3226 renew_lease(calldata->server, calldata->timestamp);
3183 break; 3227 break;
3228 case -NFS4ERR_BAD_STATEID:
3229 case -NFS4ERR_OLD_STATEID:
3184 case -NFS4ERR_STALE_STATEID: 3230 case -NFS4ERR_STALE_STATEID:
3185 case -NFS4ERR_EXPIRED: 3231 case -NFS4ERR_EXPIRED:
3186 break; 3232 break;
3187 default: 3233 default:
3188 if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) 3234 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
3189 rpc_restart_call(task); 3235 rpc_restart_call(task);
3190 } 3236 }
3191} 3237}
@@ -3248,6 +3294,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3248 3294
3249static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) 3295static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
3250{ 3296{
3297 struct nfs_inode *nfsi = NFS_I(state->inode);
3251 struct nfs_seqid *seqid; 3298 struct nfs_seqid *seqid;
3252 struct nfs4_lock_state *lsp; 3299 struct nfs4_lock_state *lsp;
3253 struct rpc_task *task; 3300 struct rpc_task *task;
@@ -3257,8 +3304,12 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
3257 status = nfs4_set_lock_state(state, request); 3304 status = nfs4_set_lock_state(state, request);
3258 /* Unlock _before_ we do the RPC call */ 3305 /* Unlock _before_ we do the RPC call */
3259 request->fl_flags |= FL_EXISTS; 3306 request->fl_flags |= FL_EXISTS;
3260 if (do_vfs_lock(request->fl_file, request) == -ENOENT) 3307 down_read(&nfsi->rwsem);
3308 if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
3309 up_read(&nfsi->rwsem);
3261 goto out; 3310 goto out;
3311 }
3312 up_read(&nfsi->rwsem);
3262 if (status != 0) 3313 if (status != 0)
3263 goto out; 3314 goto out;
3264 /* Is this a delegated lock? */ 3315 /* Is this a delegated lock? */
@@ -3484,7 +3535,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
3484 3535
3485static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 3536static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
3486{ 3537{
3487 struct nfs_client *clp = state->owner->so_client; 3538 struct nfs_inode *nfsi = NFS_I(state->inode);
3488 unsigned char fl_flags = request->fl_flags; 3539 unsigned char fl_flags = request->fl_flags;
3489 int status; 3540 int status;
3490 3541
@@ -3496,19 +3547,13 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
3496 status = do_vfs_lock(request->fl_file, request); 3547 status = do_vfs_lock(request->fl_file, request);
3497 if (status < 0) 3548 if (status < 0)
3498 goto out; 3549 goto out;
3499 down_read(&clp->cl_sem); 3550 down_read(&nfsi->rwsem);
3500 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { 3551 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
3501 struct nfs_inode *nfsi = NFS_I(state->inode);
3502 /* Yes: cache locks! */ 3552 /* Yes: cache locks! */
3503 down_read(&nfsi->rwsem);
3504 /* ...but avoid races with delegation recall... */ 3553 /* ...but avoid races with delegation recall... */
3505 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { 3554 request->fl_flags = fl_flags & ~FL_SLEEP;
3506 request->fl_flags = fl_flags & ~FL_SLEEP; 3555 status = do_vfs_lock(request->fl_file, request);
3507 status = do_vfs_lock(request->fl_file, request); 3556 goto out_unlock;
3508 up_read(&nfsi->rwsem);
3509 goto out_unlock;
3510 }
3511 up_read(&nfsi->rwsem);
3512 } 3557 }
3513 status = _nfs4_do_setlk(state, cmd, request, 0); 3558 status = _nfs4_do_setlk(state, cmd, request, 0);
3514 if (status != 0) 3559 if (status != 0)
@@ -3518,7 +3563,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
3518 if (do_vfs_lock(request->fl_file, request) < 0) 3563 if (do_vfs_lock(request->fl_file, request) < 0)
3519 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); 3564 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
3520out_unlock: 3565out_unlock:
3521 up_read(&clp->cl_sem); 3566 up_read(&nfsi->rwsem);
3522out: 3567out:
3523 request->fl_flags = fl_flags; 3568 request->fl_flags = fl_flags;
3524 return status; 3569 return status;
@@ -3664,11 +3709,15 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
3664} 3709}
3665 3710
3666struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { 3711struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
3712 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
3713 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
3667 .recover_open = nfs4_open_reclaim, 3714 .recover_open = nfs4_open_reclaim,
3668 .recover_lock = nfs4_lock_reclaim, 3715 .recover_lock = nfs4_lock_reclaim,
3669}; 3716};
3670 3717
3671struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = { 3718struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops = {
3719 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
3720 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
3672 .recover_open = nfs4_open_expired, 3721 .recover_open = nfs4_open_expired,
3673 .recover_lock = nfs4_lock_expired, 3722 .recover_lock = nfs4_lock_expired,
3674}; 3723};
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 3305acbbe2a..f524e932ff7 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -65,7 +65,6 @@ nfs4_renew_state(struct work_struct *work)
65 long lease, timeout; 65 long lease, timeout;
66 unsigned long last, now; 66 unsigned long last, now;
67 67
68 down_read(&clp->cl_sem);
69 dprintk("%s: start\n", __func__); 68 dprintk("%s: start\n", __func__);
70 /* Are there any active superblocks? */ 69 /* Are there any active superblocks? */
71 if (list_empty(&clp->cl_superblocks)) 70 if (list_empty(&clp->cl_superblocks))
@@ -77,17 +76,19 @@ nfs4_renew_state(struct work_struct *work)
77 timeout = (2 * lease) / 3 + (long)last - (long)now; 76 timeout = (2 * lease) / 3 + (long)last - (long)now;
78 /* Are we close to a lease timeout? */ 77 /* Are we close to a lease timeout? */
79 if (time_after(now, last + lease/3)) { 78 if (time_after(now, last + lease/3)) {
80 cred = nfs4_get_renew_cred(clp); 79 cred = nfs4_get_renew_cred_locked(clp);
80 spin_unlock(&clp->cl_lock);
81 if (cred == NULL) { 81 if (cred == NULL) {
82 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 82 if (list_empty(&clp->cl_delegations)) {
83 spin_unlock(&clp->cl_lock); 83 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
84 goto out;
85 }
84 nfs_expire_all_delegations(clp); 86 nfs_expire_all_delegations(clp);
85 goto out; 87 } else {
88 /* Queue an asynchronous RENEW. */
89 nfs4_proc_async_renew(clp, cred);
90 put_rpccred(cred);
86 } 91 }
87 spin_unlock(&clp->cl_lock);
88 /* Queue an asynchronous RENEW. */
89 nfs4_proc_async_renew(clp, cred);
90 put_rpccred(cred);
91 timeout = (2 * lease) / 3; 92 timeout = (2 * lease) / 3;
92 spin_lock(&clp->cl_lock); 93 spin_lock(&clp->cl_lock);
93 } else 94 } else
@@ -100,12 +101,11 @@ nfs4_renew_state(struct work_struct *work)
100 cancel_delayed_work(&clp->cl_renewd); 101 cancel_delayed_work(&clp->cl_renewd);
101 schedule_delayed_work(&clp->cl_renewd, timeout); 102 schedule_delayed_work(&clp->cl_renewd, timeout);
102 spin_unlock(&clp->cl_lock); 103 spin_unlock(&clp->cl_lock);
104 nfs_expire_unreferenced_delegations(clp);
103out: 105out:
104 up_read(&clp->cl_sem);
105 dprintk("%s: done\n", __func__); 106 dprintk("%s: done\n", __func__);
106} 107}
107 108
108/* Must be called with clp->cl_sem locked for writes */
109void 109void
110nfs4_schedule_state_renewal(struct nfs_client *clp) 110nfs4_schedule_state_renewal(struct nfs_client *clp)
111{ 111{
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 401ef8b28f9..2022fe47966 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -71,14 +71,12 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
71 return status; 71 return status;
72} 72}
73 73
74static struct rpc_cred *nfs4_get_machine_cred(struct nfs_client *clp) 74static struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
75{ 75{
76 struct rpc_cred *cred = NULL; 76 struct rpc_cred *cred = NULL;
77 77
78 spin_lock(&clp->cl_lock);
79 if (clp->cl_machine_cred != NULL) 78 if (clp->cl_machine_cred != NULL)
80 cred = get_rpccred(clp->cl_machine_cred); 79 cred = get_rpccred(clp->cl_machine_cred);
81 spin_unlock(&clp->cl_lock);
82 return cred; 80 return cred;
83} 81}
84 82
@@ -94,7 +92,7 @@ static void nfs4_clear_machine_cred(struct nfs_client *clp)
94 put_rpccred(cred); 92 put_rpccred(cred);
95} 93}
96 94
97struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) 95struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
98{ 96{
99 struct nfs4_state_owner *sp; 97 struct nfs4_state_owner *sp;
100 struct rb_node *pos; 98 struct rb_node *pos;
@@ -110,13 +108,24 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
110 return cred; 108 return cred;
111} 109}
112 110
111static struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
112{
113 struct rpc_cred *cred;
114
115 spin_lock(&clp->cl_lock);
116 cred = nfs4_get_renew_cred_locked(clp);
117 spin_unlock(&clp->cl_lock);
118 return cred;
119}
120
113static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) 121static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
114{ 122{
115 struct nfs4_state_owner *sp; 123 struct nfs4_state_owner *sp;
116 struct rb_node *pos; 124 struct rb_node *pos;
117 struct rpc_cred *cred; 125 struct rpc_cred *cred;
118 126
119 cred = nfs4_get_machine_cred(clp); 127 spin_lock(&clp->cl_lock);
128 cred = nfs4_get_machine_cred_locked(clp);
120 if (cred != NULL) 129 if (cred != NULL)
121 goto out; 130 goto out;
122 pos = rb_first(&clp->cl_state_owners); 131 pos = rb_first(&clp->cl_state_owners);
@@ -125,6 +134,7 @@ static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
125 cred = get_rpccred(sp->so_cred); 134 cred = get_rpccred(sp->so_cred);
126 } 135 }
127out: 136out:
137 spin_unlock(&clp->cl_lock);
128 return cred; 138 return cred;
129} 139}
130 140
@@ -295,10 +305,6 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
295 } 305 }
296} 306}
297 307
298/*
299 * Note: must be called with clp->cl_sem held in order to prevent races
300 * with reboot recovery!
301 */
302struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) 308struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
303{ 309{
304 struct nfs_client *clp = server->nfs_client; 310 struct nfs_client *clp = server->nfs_client;
@@ -327,10 +333,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
327 return sp; 333 return sp;
328} 334}
329 335
330/*
331 * Must be called with clp->cl_sem held in order to avoid races
332 * with state recovery...
333 */
334void nfs4_put_state_owner(struct nfs4_state_owner *sp) 336void nfs4_put_state_owner(struct nfs4_state_owner *sp)
335{ 337{
336 struct nfs_client *clp = sp->so_client; 338 struct nfs_client *clp = sp->so_client;
@@ -361,18 +363,18 @@ nfs4_alloc_open_state(void)
361} 363}
362 364
363void 365void
364nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode) 366nfs4_state_set_mode_locked(struct nfs4_state *state, fmode_t fmode)
365{ 367{
366 if (state->state == mode) 368 if (state->state == fmode)
367 return; 369 return;
368 /* NB! List reordering - see the reclaim code for why. */ 370 /* NB! List reordering - see the reclaim code for why. */
369 if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) { 371 if ((fmode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
370 if (mode & FMODE_WRITE) 372 if (fmode & FMODE_WRITE)
371 list_move(&state->open_states, &state->owner->so_states); 373 list_move(&state->open_states, &state->owner->so_states);
372 else 374 else
373 list_move_tail(&state->open_states, &state->owner->so_states); 375 list_move_tail(&state->open_states, &state->owner->so_states);
374 } 376 }
375 state->state = mode; 377 state->state = fmode;
376} 378}
377 379
378static struct nfs4_state * 380static struct nfs4_state *
@@ -432,10 +434,6 @@ out:
432 return state; 434 return state;
433} 435}
434 436
435/*
436 * Beware! Caller must be holding exactly one
437 * reference to clp->cl_sem!
438 */
439void nfs4_put_open_state(struct nfs4_state *state) 437void nfs4_put_open_state(struct nfs4_state *state)
440{ 438{
441 struct inode *inode = state->inode; 439 struct inode *inode = state->inode;
@@ -456,16 +454,16 @@ void nfs4_put_open_state(struct nfs4_state *state)
456/* 454/*
457 * Close the current file. 455 * Close the current file.
458 */ 456 */
459static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mode, int wait) 457static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait)
460{ 458{
461 struct nfs4_state_owner *owner = state->owner; 459 struct nfs4_state_owner *owner = state->owner;
462 int call_close = 0; 460 int call_close = 0;
463 int newstate; 461 fmode_t newstate;
464 462
465 atomic_inc(&owner->so_count); 463 atomic_inc(&owner->so_count);
466 /* Protect against nfs4_find_state() */ 464 /* Protect against nfs4_find_state() */
467 spin_lock(&owner->so_lock); 465 spin_lock(&owner->so_lock);
468 switch (mode & (FMODE_READ | FMODE_WRITE)) { 466 switch (fmode & (FMODE_READ | FMODE_WRITE)) {
469 case FMODE_READ: 467 case FMODE_READ:
470 state->n_rdonly--; 468 state->n_rdonly--;
471 break; 469 break;
@@ -500,14 +498,14 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mod
500 nfs4_do_close(path, state, wait); 498 nfs4_do_close(path, state, wait);
501} 499}
502 500
503void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) 501void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
504{ 502{
505 __nfs4_close(path, state, mode, 0); 503 __nfs4_close(path, state, fmode, 0);
506} 504}
507 505
508void nfs4_close_sync(struct path *path, struct nfs4_state *state, mode_t mode) 506void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
509{ 507{
510 __nfs4_close(path, state, mode, 1); 508 __nfs4_close(path, state, fmode, 1);
511} 509}
512 510
513/* 511/*
@@ -568,7 +566,6 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
568 * Return a compatible lock_state. If no initialized lock_state structure 566 * Return a compatible lock_state. If no initialized lock_state structure
569 * exists, return an uninitialized one. 567 * exists, return an uninitialized one.
570 * 568 *
571 * The caller must be holding clp->cl_sem
572 */ 569 */
573static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) 570static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
574{ 571{
@@ -770,32 +767,34 @@ unlock:
770 return status; 767 return status;
771} 768}
772 769
773static int reclaimer(void *); 770static int nfs4_run_state_manager(void *);
774 771
775static inline void nfs4_clear_recover_bit(struct nfs_client *clp) 772static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
776{ 773{
777 smp_mb__before_clear_bit(); 774 smp_mb__before_clear_bit();
778 clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state); 775 clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
779 smp_mb__after_clear_bit(); 776 smp_mb__after_clear_bit();
780 wake_up_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER); 777 wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING);
781 rpc_wake_up(&clp->cl_rpcwaitq); 778 rpc_wake_up(&clp->cl_rpcwaitq);
782} 779}
783 780
784/* 781/*
785 * State recovery routine 782 * Schedule the nfs_client asynchronous state management routine
786 */ 783 */
787static void nfs4_recover_state(struct nfs_client *clp) 784void nfs4_schedule_state_manager(struct nfs_client *clp)
788{ 785{
789 struct task_struct *task; 786 struct task_struct *task;
790 787
788 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
789 return;
791 __module_get(THIS_MODULE); 790 __module_get(THIS_MODULE);
792 atomic_inc(&clp->cl_count); 791 atomic_inc(&clp->cl_count);
793 task = kthread_run(reclaimer, clp, "%s-reclaim", 792 task = kthread_run(nfs4_run_state_manager, clp, "%s-manager",
794 rpc_peeraddr2str(clp->cl_rpcclient, 793 rpc_peeraddr2str(clp->cl_rpcclient,
795 RPC_DISPLAY_ADDR)); 794 RPC_DISPLAY_ADDR));
796 if (!IS_ERR(task)) 795 if (!IS_ERR(task))
797 return; 796 return;
798 nfs4_clear_recover_bit(clp); 797 nfs4_clear_state_manager_bit(clp);
799 nfs_put_client(clp); 798 nfs_put_client(clp);
800 module_put(THIS_MODULE); 799 module_put(THIS_MODULE);
801} 800}
@@ -807,16 +806,42 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
807{ 806{
808 if (!clp) 807 if (!clp)
809 return; 808 return;
810 if (test_and_set_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) 809 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
811 nfs4_recover_state(clp); 810 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
811 nfs4_schedule_state_manager(clp);
812} 812}
813 813
814static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state) 814static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
815{
816
817 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
818 /* Don't recover state that expired before the reboot */
819 if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) {
820 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
821 return 0;
822 }
823 set_bit(NFS_OWNER_RECLAIM_REBOOT, &state->owner->so_flags);
824 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
825 return 1;
826}
827
828int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
829{
830 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
831 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
832 set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags);
833 set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
834 return 1;
835}
836
837static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
815{ 838{
816 struct inode *inode = state->inode; 839 struct inode *inode = state->inode;
840 struct nfs_inode *nfsi = NFS_I(inode);
817 struct file_lock *fl; 841 struct file_lock *fl;
818 int status = 0; 842 int status = 0;
819 843
844 down_write(&nfsi->rwsem);
820 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 845 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
821 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 846 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
822 continue; 847 continue;
@@ -839,12 +864,14 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s
839 goto out_err; 864 goto out_err;
840 } 865 }
841 } 866 }
867 up_write(&nfsi->rwsem);
842 return 0; 868 return 0;
843out_err: 869out_err:
870 up_write(&nfsi->rwsem);
844 return status; 871 return status;
845} 872}
846 873
847static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp) 874static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops)
848{ 875{
849 struct nfs4_state *state; 876 struct nfs4_state *state;
850 struct nfs4_lock_state *lock; 877 struct nfs4_lock_state *lock;
@@ -858,28 +885,34 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n
858 * recovering after a network partition or a reboot from a 885 * recovering after a network partition or a reboot from a
859 * server that doesn't support a grace period. 886 * server that doesn't support a grace period.
860 */ 887 */
888restart:
889 spin_lock(&sp->so_lock);
861 list_for_each_entry(state, &sp->so_states, open_states) { 890 list_for_each_entry(state, &sp->so_states, open_states) {
891 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
892 continue;
862 if (state->state == 0) 893 if (state->state == 0)
863 continue; 894 continue;
895 atomic_inc(&state->count);
896 spin_unlock(&sp->so_lock);
864 status = ops->recover_open(sp, state); 897 status = ops->recover_open(sp, state);
865 if (status >= 0) { 898 if (status >= 0) {
866 status = nfs4_reclaim_locks(ops, state); 899 status = nfs4_reclaim_locks(state, ops);
867 if (status < 0) 900 if (status >= 0) {
868 goto out_err; 901 list_for_each_entry(lock, &state->lock_states, ls_locks) {
869 list_for_each_entry(lock, &state->lock_states, ls_locks) { 902 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
870 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) 903 printk("%s: Lock reclaim failed!\n",
871 printk("%s: Lock reclaim failed!\n",
872 __func__); 904 __func__);
905 }
906 nfs4_put_open_state(state);
907 goto restart;
873 } 908 }
874 continue;
875 } 909 }
876 switch (status) { 910 switch (status) {
877 default: 911 default:
878 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", 912 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
879 __func__, status); 913 __func__, status);
880 case -ENOENT: 914 case -ENOENT:
881 case -NFS4ERR_RECLAIM_BAD: 915 case -ESTALE:
882 case -NFS4ERR_RECLAIM_CONFLICT:
883 /* 916 /*
884 * Open state on this file cannot be recovered 917 * Open state on this file cannot be recovered
885 * All we can do is revert to using the zero stateid. 918 * All we can do is revert to using the zero stateid.
@@ -889,84 +922,176 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n
889 /* Mark the file as being 'closed' */ 922 /* Mark the file as being 'closed' */
890 state->state = 0; 923 state->state = 0;
891 break; 924 break;
925 case -NFS4ERR_RECLAIM_BAD:
926 case -NFS4ERR_RECLAIM_CONFLICT:
927 nfs4_state_mark_reclaim_nograce(sp->so_client, state);
928 break;
892 case -NFS4ERR_EXPIRED: 929 case -NFS4ERR_EXPIRED:
893 case -NFS4ERR_NO_GRACE: 930 case -NFS4ERR_NO_GRACE:
931 nfs4_state_mark_reclaim_nograce(sp->so_client, state);
894 case -NFS4ERR_STALE_CLIENTID: 932 case -NFS4ERR_STALE_CLIENTID:
895 goto out_err; 933 goto out_err;
896 } 934 }
935 nfs4_put_open_state(state);
936 goto restart;
897 } 937 }
938 spin_unlock(&sp->so_lock);
898 return 0; 939 return 0;
899out_err: 940out_err:
941 nfs4_put_open_state(state);
900 return status; 942 return status;
901} 943}
902 944
903static void nfs4_state_mark_reclaim(struct nfs_client *clp) 945static void nfs4_clear_open_state(struct nfs4_state *state)
946{
947 struct nfs4_lock_state *lock;
948
949 clear_bit(NFS_DELEGATED_STATE, &state->flags);
950 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
951 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
952 clear_bit(NFS_O_RDWR_STATE, &state->flags);
953 list_for_each_entry(lock, &state->lock_states, ls_locks) {
954 lock->ls_seqid.flags = 0;
955 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
956 }
957}
958
959static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
904{ 960{
905 struct nfs4_state_owner *sp; 961 struct nfs4_state_owner *sp;
906 struct rb_node *pos; 962 struct rb_node *pos;
907 struct nfs4_state *state; 963 struct nfs4_state *state;
908 struct nfs4_lock_state *lock;
909 964
910 /* Reset all sequence ids to zero */ 965 /* Reset all sequence ids to zero */
911 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 966 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
912 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 967 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
913 sp->so_seqid.counter = 0;
914 sp->so_seqid.flags = 0; 968 sp->so_seqid.flags = 0;
915 spin_lock(&sp->so_lock); 969 spin_lock(&sp->so_lock);
916 list_for_each_entry(state, &sp->so_states, open_states) { 970 list_for_each_entry(state, &sp->so_states, open_states) {
917 clear_bit(NFS_DELEGATED_STATE, &state->flags); 971 if (mark_reclaim(clp, state))
918 clear_bit(NFS_O_RDONLY_STATE, &state->flags); 972 nfs4_clear_open_state(state);
919 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
920 clear_bit(NFS_O_RDWR_STATE, &state->flags);
921 list_for_each_entry(lock, &state->lock_states, ls_locks) {
922 lock->ls_seqid.counter = 0;
923 lock->ls_seqid.flags = 0;
924 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
925 }
926 } 973 }
927 spin_unlock(&sp->so_lock); 974 spin_unlock(&sp->so_lock);
928 } 975 }
929} 976}
930 977
931static int reclaimer(void *ptr) 978static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
979{
980 /* Mark all delegations for reclaim */
981 nfs_delegation_mark_reclaim(clp);
982 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
983}
984
985static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
932{ 986{
933 struct nfs_client *clp = ptr;
934 struct nfs4_state_owner *sp; 987 struct nfs4_state_owner *sp;
935 struct rb_node *pos; 988 struct rb_node *pos;
936 struct nfs4_state_recovery_ops *ops; 989 struct nfs4_state *state;
937 struct rpc_cred *cred; 990
991 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
992 return;
993
994 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
995 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
996 spin_lock(&sp->so_lock);
997 list_for_each_entry(state, &sp->so_states, open_states) {
998 if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags))
999 continue;
1000 nfs4_state_mark_reclaim_nograce(clp, state);
1001 }
1002 spin_unlock(&sp->so_lock);
1003 }
1004
1005 nfs_delegation_reap_unclaimed(clp);
1006}
1007
1008static void nfs_delegation_clear_all(struct nfs_client *clp)
1009{
1010 nfs_delegation_mark_reclaim(clp);
1011 nfs_delegation_reap_unclaimed(clp);
1012}
1013
1014static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
1015{
1016 nfs_delegation_clear_all(clp);
1017 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
1018}
1019
1020static void nfs4_state_end_reclaim_nograce(struct nfs_client *clp)
1021{
1022 clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
1023}
1024
1025static void nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1026{
1027 switch (error) {
1028 case -NFS4ERR_CB_PATH_DOWN:
1029 nfs_handle_cb_pathdown(clp);
1030 break;
1031 case -NFS4ERR_STALE_CLIENTID:
1032 case -NFS4ERR_LEASE_MOVED:
1033 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1034 nfs4_state_start_reclaim_reboot(clp);
1035 break;
1036 case -NFS4ERR_EXPIRED:
1037 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1038 nfs4_state_start_reclaim_nograce(clp);
1039 }
1040}
1041
1042static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
1043{
1044 struct rb_node *pos;
938 int status = 0; 1045 int status = 0;
939 1046
940 allow_signal(SIGKILL); 1047restart:
1048 spin_lock(&clp->cl_lock);
1049 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
1050 struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
1051 if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags))
1052 continue;
1053 atomic_inc(&sp->so_count);
1054 spin_unlock(&clp->cl_lock);
1055 status = nfs4_reclaim_open_state(sp, ops);
1056 if (status < 0) {
1057 set_bit(ops->owner_flag_bit, &sp->so_flags);
1058 nfs4_put_state_owner(sp);
1059 nfs4_recovery_handle_error(clp, status);
1060 return status;
1061 }
1062 nfs4_put_state_owner(sp);
1063 goto restart;
1064 }
1065 spin_unlock(&clp->cl_lock);
1066 return status;
1067}
941 1068
942 /* Ensure exclusive access to NFSv4 state */ 1069static int nfs4_check_lease(struct nfs_client *clp)
943 down_write(&clp->cl_sem); 1070{
944 /* Are there any NFS mounts out there? */ 1071 struct rpc_cred *cred;
945 if (list_empty(&clp->cl_superblocks)) 1072 int status = -NFS4ERR_EXPIRED;
946 goto out; 1073
947restart_loop: 1074 /* Is the client already known to have an expired lease? */
948 ops = &nfs4_network_partition_recovery_ops; 1075 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
949 /* Are there any open files on this volume? */ 1076 return 0;
950 cred = nfs4_get_renew_cred(clp); 1077 cred = nfs4_get_renew_cred(clp);
951 if (cred != NULL) { 1078 if (cred == NULL) {
952 /* Yes there are: try to renew the old lease */ 1079 cred = nfs4_get_setclientid_cred(clp);
953 status = nfs4_proc_renew(clp, cred); 1080 if (cred == NULL)
954 put_rpccred(cred); 1081 goto out;
955 switch (status) {
956 case 0:
957 case -NFS4ERR_CB_PATH_DOWN:
958 goto out;
959 case -NFS4ERR_STALE_CLIENTID:
960 case -NFS4ERR_LEASE_MOVED:
961 ops = &nfs4_reboot_recovery_ops;
962 }
963 } else {
964 /* "reboot" to ensure we clear all state on the server */
965 clp->cl_boot_time = CURRENT_TIME;
966 } 1082 }
967 /* We're going to have to re-establish a clientid */ 1083 status = nfs4_proc_renew(clp, cred);
968 nfs4_state_mark_reclaim(clp); 1084 put_rpccred(cred);
969 status = -ENOENT; 1085out:
1086 nfs4_recovery_handle_error(clp, status);
1087 return status;
1088}
1089
1090static int nfs4_reclaim_lease(struct nfs_client *clp)
1091{
1092 struct rpc_cred *cred;
1093 int status = -ENOENT;
1094
970 cred = nfs4_get_setclientid_cred(clp); 1095 cred = nfs4_get_setclientid_cred(clp);
971 if (cred != NULL) { 1096 if (cred != NULL) {
972 status = nfs4_init_client(clp, cred); 1097 status = nfs4_init_client(clp, cred);
@@ -974,42 +1099,90 @@ restart_loop:
974 /* Handle case where the user hasn't set up machine creds */ 1099 /* Handle case where the user hasn't set up machine creds */
975 if (status == -EACCES && cred == clp->cl_machine_cred) { 1100 if (status == -EACCES && cred == clp->cl_machine_cred) {
976 nfs4_clear_machine_cred(clp); 1101 nfs4_clear_machine_cred(clp);
977 goto restart_loop; 1102 status = -EAGAIN;
978 } 1103 }
979 } 1104 }
980 if (status) 1105 return status;
981 goto out_error; 1106}
982 /* Mark all delegations for reclaim */ 1107
983 nfs_delegation_mark_reclaim(clp); 1108static void nfs4_state_manager(struct nfs_client *clp)
984 /* Note: list is protected by exclusive lock on cl->cl_sem */ 1109{
985 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1110 int status = 0;
986 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1111
987 status = nfs4_reclaim_open_state(ops, sp); 1112 /* Ensure exclusive access to NFSv4 state */
988 if (status < 0) { 1113 for(;;) {
989 if (status == -NFS4ERR_NO_GRACE) { 1114 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
990 ops = &nfs4_network_partition_recovery_ops; 1115 /* We're going to have to re-establish a clientid */
991 status = nfs4_reclaim_open_state(ops, sp); 1116 status = nfs4_reclaim_lease(clp);
1117 if (status) {
1118 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1119 if (status == -EAGAIN)
1120 continue;
1121 goto out_error;
992 } 1122 }
1123 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1124 }
1125
1126 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
1127 status = nfs4_check_lease(clp);
1128 if (status != 0)
1129 continue;
1130 }
1131
1132 /* First recover reboot state... */
1133 if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
1134 status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops);
993 if (status == -NFS4ERR_STALE_CLIENTID) 1135 if (status == -NFS4ERR_STALE_CLIENTID)
994 goto restart_loop; 1136 continue;
995 if (status == -NFS4ERR_EXPIRED) 1137 nfs4_state_end_reclaim_reboot(clp);
996 goto restart_loop; 1138 continue;
1139 }
1140
1141 /* Now recover expired state... */
1142 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
1143 status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops);
1144 if (status < 0) {
1145 set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
1146 if (status == -NFS4ERR_STALE_CLIENTID)
1147 continue;
1148 if (status == -NFS4ERR_EXPIRED)
1149 continue;
1150 goto out_error;
1151 } else
1152 nfs4_state_end_reclaim_nograce(clp);
1153 continue;
997 } 1154 }
1155
1156 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
1157 nfs_client_return_marked_delegations(clp);
1158 continue;
1159 }
1160
1161 nfs4_clear_state_manager_bit(clp);
1162 /* Did we race with an attempt to give us more work? */
1163 if (clp->cl_state == 0)
1164 break;
1165 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
1166 break;
998 } 1167 }
999 nfs_delegation_reap_unclaimed(clp); 1168 return;
1000out: 1169out_error:
1001 up_write(&clp->cl_sem); 1170 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
1002 if (status == -NFS4ERR_CB_PATH_DOWN) 1171 " with error %d\n", clp->cl_hostname, -status);
1003 nfs_handle_cb_pathdown(clp); 1172 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1004 nfs4_clear_recover_bit(clp); 1173 nfs4_state_end_reclaim_reboot(clp);
1174 nfs4_clear_state_manager_bit(clp);
1175}
1176
1177static int nfs4_run_state_manager(void *ptr)
1178{
1179 struct nfs_client *clp = ptr;
1180
1181 allow_signal(SIGKILL);
1182 nfs4_state_manager(clp);
1005 nfs_put_client(clp); 1183 nfs_put_client(clp);
1006 module_put_and_exit(0); 1184 module_put_and_exit(0);
1007 return 0; 1185 return 0;
1008out_error:
1009 printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %s"
1010 " with error %d\n", clp->cl_hostname, -status);
1011 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1012 goto out;
1013} 1186}
1014 1187
1015/* 1188/*
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b916297d233..d1e4c8f8a0a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -8,7 +8,7 @@
8 * 8 *
9 * Kendrick Smith <kmsmith@umich.edu> 9 * Kendrick Smith <kmsmith@umich.edu>
10 * Andy Adamson <andros@umich.edu> 10 * Andy Adamson <andros@umich.edu>
11 * 11 *
12 * Redistribution and use in source and binary forms, with or without 12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions 13 * modification, are permitted provided that the following conditions
14 * are met: 14 * are met:
@@ -67,7 +67,7 @@ static int nfs4_stat_to_errno(int);
67#define NFS4_MAXTAGLEN 0 67#define NFS4_MAXTAGLEN 0
68#endif 68#endif
69 69
70/* lock,open owner id: 70/* lock,open owner id:
71 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) 71 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
72 */ 72 */
73#define open_owner_id_maxsz (1 + 4) 73#define open_owner_id_maxsz (1 + 4)
@@ -541,6 +541,7 @@ static struct {
541struct compound_hdr { 541struct compound_hdr {
542 int32_t status; 542 int32_t status;
543 uint32_t nops; 543 uint32_t nops;
544 __be32 * nops_p;
544 uint32_t taglen; 545 uint32_t taglen;
545 char * tag; 546 char * tag;
546}; 547};
@@ -578,7 +579,7 @@ static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *
578 xdr_encode_opaque(p, str, len); 579 xdr_encode_opaque(p, str, len);
579} 580}
580 581
581static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) 582static void encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
582{ 583{
583 __be32 *p; 584 __be32 *p;
584 585
@@ -588,8 +589,13 @@ static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
588 WRITE32(hdr->taglen); 589 WRITE32(hdr->taglen);
589 WRITEMEM(hdr->tag, hdr->taglen); 590 WRITEMEM(hdr->tag, hdr->taglen);
590 WRITE32(NFS4_MINOR_VERSION); 591 WRITE32(NFS4_MINOR_VERSION);
592 hdr->nops_p = p;
591 WRITE32(hdr->nops); 593 WRITE32(hdr->nops);
592 return 0; 594}
595
596static void encode_nops(struct compound_hdr *hdr)
597{
598 *hdr->nops_p = htonl(hdr->nops);
593} 599}
594 600
595static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) 601static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
@@ -601,7 +607,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
601 xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); 607 xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE);
602} 608}
603 609
604static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) 610static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server)
605{ 611{
606 char owner_name[IDMAP_NAMESZ]; 612 char owner_name[IDMAP_NAMESZ];
607 char owner_group[IDMAP_NAMESZ]; 613 char owner_group[IDMAP_NAMESZ];
@@ -612,7 +618,6 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
612 int len; 618 int len;
613 uint32_t bmval0 = 0; 619 uint32_t bmval0 = 0;
614 uint32_t bmval1 = 0; 620 uint32_t bmval1 = 0;
615 int status;
616 621
617 /* 622 /*
618 * We reserve enough space to write the entire attribute buffer at once. 623 * We reserve enough space to write the entire attribute buffer at once.
@@ -709,7 +714,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
709 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 714 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
710 WRITE32(NFS4_SET_TO_SERVER_TIME); 715 WRITE32(NFS4_SET_TO_SERVER_TIME);
711 } 716 }
712 717
713 /* 718 /*
714 * Now we backfill the bitmap and the attribute buffer length. 719 * Now we backfill the bitmap and the attribute buffer length.
715 */ 720 */
@@ -723,23 +728,20 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
723 *q++ = htonl(bmval1); 728 *q++ = htonl(bmval1);
724 *q++ = htonl(len); 729 *q++ = htonl(len);
725 730
726 status = 0;
727/* out: */ 731/* out: */
728 return status;
729} 732}
730 733
731static int encode_access(struct xdr_stream *xdr, u32 access) 734static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr)
732{ 735{
733 __be32 *p; 736 __be32 *p;
734 737
735 RESERVE_SPACE(8); 738 RESERVE_SPACE(8);
736 WRITE32(OP_ACCESS); 739 WRITE32(OP_ACCESS);
737 WRITE32(access); 740 WRITE32(access);
738 741 hdr->nops++;
739 return 0;
740} 742}
741 743
742static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) 744static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
743{ 745{
744 __be32 *p; 746 __be32 *p;
745 747
@@ -747,26 +749,24 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
747 WRITE32(OP_CLOSE); 749 WRITE32(OP_CLOSE);
748 WRITE32(arg->seqid->sequence->counter); 750 WRITE32(arg->seqid->sequence->counter);
749 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 751 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
750 752 hdr->nops++;
751 return 0;
752} 753}
753 754
754static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args) 755static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
755{ 756{
756 __be32 *p; 757 __be32 *p;
757
758 RESERVE_SPACE(16);
759 WRITE32(OP_COMMIT);
760 WRITE64(args->offset);
761 WRITE32(args->count);
762 758
763 return 0; 759 RESERVE_SPACE(16);
760 WRITE32(OP_COMMIT);
761 WRITE64(args->offset);
762 WRITE32(args->count);
763 hdr->nops++;
764} 764}
765 765
766static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create) 766static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr)
767{ 767{
768 __be32 *p; 768 __be32 *p;
769 769
770 RESERVE_SPACE(8); 770 RESERVE_SPACE(8);
771 WRITE32(OP_CREATE); 771 WRITE32(OP_CREATE);
772 WRITE32(create->ftype); 772 WRITE32(create->ftype);
@@ -791,64 +791,62 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c
791 RESERVE_SPACE(4 + create->name->len); 791 RESERVE_SPACE(4 + create->name->len);
792 WRITE32(create->name->len); 792 WRITE32(create->name->len);
793 WRITEMEM(create->name->name, create->name->len); 793 WRITEMEM(create->name->name, create->name->len);
794 hdr->nops++;
794 795
795 return encode_attrs(xdr, create->attrs, create->server); 796 encode_attrs(xdr, create->attrs, create->server);
796} 797}
797 798
798static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap) 799static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr)
799{ 800{
800 __be32 *p; 801 __be32 *p;
801 802
802 RESERVE_SPACE(12); 803 RESERVE_SPACE(12);
803 WRITE32(OP_GETATTR); 804 WRITE32(OP_GETATTR);
804 WRITE32(1); 805 WRITE32(1);
805 WRITE32(bitmap); 806 WRITE32(bitmap);
806 return 0; 807 hdr->nops++;
807} 808}
808 809
809static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1) 810static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr)
810{ 811{
811 __be32 *p; 812 __be32 *p;
812 813
813 RESERVE_SPACE(16); 814 RESERVE_SPACE(16);
814 WRITE32(OP_GETATTR); 815 WRITE32(OP_GETATTR);
815 WRITE32(2); 816 WRITE32(2);
816 WRITE32(bm0); 817 WRITE32(bm0);
817 WRITE32(bm1); 818 WRITE32(bm1);
818 return 0; 819 hdr->nops++;
819} 820}
820 821
821static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) 822static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
822{ 823{
823 return encode_getattr_two(xdr, 824 encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
824 bitmask[0] & nfs4_fattr_bitmap[0], 825 bitmask[1] & nfs4_fattr_bitmap[1], hdr);
825 bitmask[1] & nfs4_fattr_bitmap[1]);
826} 826}
827 827
828static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) 828static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
829{ 829{
830 return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], 830 encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
831 bitmask[1] & nfs4_fsinfo_bitmap[1]); 831 bitmask[1] & nfs4_fsinfo_bitmap[1], hdr);
832} 832}
833 833
834static int encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask) 834static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
835{ 835{
836 return encode_getattr_two(xdr, 836 encode_getattr_two(xdr, bitmask[0] & nfs4_fs_locations_bitmap[0],
837 bitmask[0] & nfs4_fs_locations_bitmap[0], 837 bitmask[1] & nfs4_fs_locations_bitmap[1], hdr);
838 bitmask[1] & nfs4_fs_locations_bitmap[1]);
839} 838}
840 839
841static int encode_getfh(struct xdr_stream *xdr) 840static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
842{ 841{
843 __be32 *p; 842 __be32 *p;
844 843
845 RESERVE_SPACE(4); 844 RESERVE_SPACE(4);
846 WRITE32(OP_GETFH); 845 WRITE32(OP_GETFH);
847 846 hdr->nops++;
848 return 0;
849} 847}
850 848
851static int encode_link(struct xdr_stream *xdr, const struct qstr *name) 849static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
852{ 850{
853 __be32 *p; 851 __be32 *p;
854 852
@@ -856,8 +854,7 @@ static int encode_link(struct xdr_stream *xdr, const struct qstr *name)
856 WRITE32(OP_LINK); 854 WRITE32(OP_LINK);
857 WRITE32(name->len); 855 WRITE32(name->len);
858 WRITEMEM(name->name, name->len); 856 WRITEMEM(name->name, name->len);
859 857 hdr->nops++;
860 return 0;
861} 858}
862 859
863static inline int nfs4_lock_type(struct file_lock *fl, int block) 860static inline int nfs4_lock_type(struct file_lock *fl, int block)
@@ -878,7 +875,7 @@ static inline uint64_t nfs4_lock_length(struct file_lock *fl)
878 * opcode,type,reclaim,offset,length,new_lock_owner = 32 875 * opcode,type,reclaim,offset,length,new_lock_owner = 32
879 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 876 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
880 */ 877 */
881static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) 878static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args, struct compound_hdr *hdr)
882{ 879{
883 __be32 *p; 880 __be32 *p;
884 881
@@ -904,11 +901,10 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
904 WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE); 901 WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE);
905 WRITE32(args->lock_seqid->sequence->counter); 902 WRITE32(args->lock_seqid->sequence->counter);
906 } 903 }
907 904 hdr->nops++;
908 return 0;
909} 905}
910 906
911static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args) 907static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr)
912{ 908{
913 __be32 *p; 909 __be32 *p;
914 910
@@ -921,11 +917,10 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
921 WRITE32(16); 917 WRITE32(16);
922 WRITEMEM("lock id:", 8); 918 WRITEMEM("lock id:", 8);
923 WRITE64(args->lock_owner.id); 919 WRITE64(args->lock_owner.id);
924 920 hdr->nops++;
925 return 0;
926} 921}
927 922
928static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args) 923static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr)
929{ 924{
930 __be32 *p; 925 __be32 *p;
931 926
@@ -936,11 +931,10 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *arg
936 WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE); 931 WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE);
937 WRITE64(args->fl->fl_start); 932 WRITE64(args->fl->fl_start);
938 WRITE64(nfs4_lock_length(args->fl)); 933 WRITE64(nfs4_lock_length(args->fl));
939 934 hdr->nops++;
940 return 0;
941} 935}
942 936
943static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name) 937static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
944{ 938{
945 int len = name->len; 939 int len = name->len;
946 __be32 *p; 940 __be32 *p;
@@ -949,27 +943,26 @@ static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name)
949 WRITE32(OP_LOOKUP); 943 WRITE32(OP_LOOKUP);
950 WRITE32(len); 944 WRITE32(len);
951 WRITEMEM(name->name, len); 945 WRITEMEM(name->name, len);
952 946 hdr->nops++;
953 return 0;
954} 947}
955 948
956static void encode_share_access(struct xdr_stream *xdr, int open_flags) 949static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
957{ 950{
958 __be32 *p; 951 __be32 *p;
959 952
960 RESERVE_SPACE(8); 953 RESERVE_SPACE(8);
961 switch (open_flags & (FMODE_READ|FMODE_WRITE)) { 954 switch (fmode & (FMODE_READ|FMODE_WRITE)) {
962 case FMODE_READ: 955 case FMODE_READ:
963 WRITE32(NFS4_SHARE_ACCESS_READ); 956 WRITE32(NFS4_SHARE_ACCESS_READ);
964 break; 957 break;
965 case FMODE_WRITE: 958 case FMODE_WRITE:
966 WRITE32(NFS4_SHARE_ACCESS_WRITE); 959 WRITE32(NFS4_SHARE_ACCESS_WRITE);
967 break; 960 break;
968 case FMODE_READ|FMODE_WRITE: 961 case FMODE_READ|FMODE_WRITE:
969 WRITE32(NFS4_SHARE_ACCESS_BOTH); 962 WRITE32(NFS4_SHARE_ACCESS_BOTH);
970 break; 963 break;
971 default: 964 default:
972 BUG(); 965 WRITE32(0);
973 } 966 }
974 WRITE32(0); /* for linux, share_deny = 0 always */ 967 WRITE32(0); /* for linux, share_deny = 0 always */
975} 968}
@@ -984,7 +977,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
984 RESERVE_SPACE(8); 977 RESERVE_SPACE(8);
985 WRITE32(OP_OPEN); 978 WRITE32(OP_OPEN);
986 WRITE32(arg->seqid->sequence->counter); 979 WRITE32(arg->seqid->sequence->counter);
987 encode_share_access(xdr, arg->open_flags); 980 encode_share_access(xdr, arg->fmode);
988 RESERVE_SPACE(28); 981 RESERVE_SPACE(28);
989 WRITE64(arg->clientid); 982 WRITE64(arg->clientid);
990 WRITE32(16); 983 WRITE32(16);
@@ -998,13 +991,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
998 991
999 RESERVE_SPACE(4); 992 RESERVE_SPACE(4);
1000 switch(arg->open_flags & O_EXCL) { 993 switch(arg->open_flags & O_EXCL) {
1001 case 0: 994 case 0:
1002 WRITE32(NFS4_CREATE_UNCHECKED); 995 WRITE32(NFS4_CREATE_UNCHECKED);
1003 encode_attrs(xdr, arg->u.attrs, arg->server); 996 encode_attrs(xdr, arg->u.attrs, arg->server);
1004 break; 997 break;
1005 default: 998 default:
1006 WRITE32(NFS4_CREATE_EXCLUSIVE); 999 WRITE32(NFS4_CREATE_EXCLUSIVE);
1007 encode_nfs4_verifier(xdr, &arg->u.verifier); 1000 encode_nfs4_verifier(xdr, &arg->u.verifier);
1008 } 1001 }
1009} 1002}
1010 1003
@@ -1014,33 +1007,33 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
1014 1007
1015 RESERVE_SPACE(4); 1008 RESERVE_SPACE(4);
1016 switch (arg->open_flags & O_CREAT) { 1009 switch (arg->open_flags & O_CREAT) {
1017 case 0: 1010 case 0:
1018 WRITE32(NFS4_OPEN_NOCREATE); 1011 WRITE32(NFS4_OPEN_NOCREATE);
1019 break; 1012 break;
1020 default: 1013 default:
1021 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); 1014 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
1022 WRITE32(NFS4_OPEN_CREATE); 1015 WRITE32(NFS4_OPEN_CREATE);
1023 encode_createmode(xdr, arg); 1016 encode_createmode(xdr, arg);
1024 } 1017 }
1025} 1018}
1026 1019
1027static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type) 1020static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type)
1028{ 1021{
1029 __be32 *p; 1022 __be32 *p;
1030 1023
1031 RESERVE_SPACE(4); 1024 RESERVE_SPACE(4);
1032 switch (delegation_type) { 1025 switch (delegation_type) {
1033 case 0: 1026 case 0:
1034 WRITE32(NFS4_OPEN_DELEGATE_NONE); 1027 WRITE32(NFS4_OPEN_DELEGATE_NONE);
1035 break; 1028 break;
1036 case FMODE_READ: 1029 case FMODE_READ:
1037 WRITE32(NFS4_OPEN_DELEGATE_READ); 1030 WRITE32(NFS4_OPEN_DELEGATE_READ);
1038 break; 1031 break;
1039 case FMODE_WRITE|FMODE_READ: 1032 case FMODE_WRITE|FMODE_READ:
1040 WRITE32(NFS4_OPEN_DELEGATE_WRITE); 1033 WRITE32(NFS4_OPEN_DELEGATE_WRITE);
1041 break; 1034 break;
1042 default: 1035 default:
1043 BUG(); 1036 BUG();
1044 } 1037 }
1045} 1038}
1046 1039
@@ -1053,7 +1046,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
1053 encode_string(xdr, name->len, name->name); 1046 encode_string(xdr, name->len, name->name);
1054} 1047}
1055 1048
1056static inline void encode_claim_previous(struct xdr_stream *xdr, int type) 1049static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
1057{ 1050{
1058 __be32 *p; 1051 __be32 *p;
1059 1052
@@ -1072,27 +1065,27 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
1072 encode_string(xdr, name->len, name->name); 1065 encode_string(xdr, name->len, name->name);
1073} 1066}
1074 1067
1075static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1068static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr)
1076{ 1069{
1077 encode_openhdr(xdr, arg); 1070 encode_openhdr(xdr, arg);
1078 encode_opentype(xdr, arg); 1071 encode_opentype(xdr, arg);
1079 switch (arg->claim) { 1072 switch (arg->claim) {
1080 case NFS4_OPEN_CLAIM_NULL: 1073 case NFS4_OPEN_CLAIM_NULL:
1081 encode_claim_null(xdr, arg->name); 1074 encode_claim_null(xdr, arg->name);
1082 break; 1075 break;
1083 case NFS4_OPEN_CLAIM_PREVIOUS: 1076 case NFS4_OPEN_CLAIM_PREVIOUS:
1084 encode_claim_previous(xdr, arg->u.delegation_type); 1077 encode_claim_previous(xdr, arg->u.delegation_type);
1085 break; 1078 break;
1086 case NFS4_OPEN_CLAIM_DELEGATE_CUR: 1079 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
1087 encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); 1080 encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation);
1088 break; 1081 break;
1089 default: 1082 default:
1090 BUG(); 1083 BUG();
1091 } 1084 }
1092 return 0; 1085 hdr->nops++;
1093} 1086}
1094 1087
1095static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg) 1088static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr)
1096{ 1089{
1097 __be32 *p; 1090 __be32 *p;
1098 1091
@@ -1100,11 +1093,10 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con
1100 WRITE32(OP_OPEN_CONFIRM); 1093 WRITE32(OP_OPEN_CONFIRM);
1101 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1094 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
1102 WRITE32(arg->seqid->sequence->counter); 1095 WRITE32(arg->seqid->sequence->counter);
1103 1096 hdr->nops++;
1104 return 0;
1105} 1097}
1106 1098
1107static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg) 1099static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
1108{ 1100{
1109 __be32 *p; 1101 __be32 *p;
1110 1102
@@ -1112,12 +1104,12 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea
1112 WRITE32(OP_OPEN_DOWNGRADE); 1104 WRITE32(OP_OPEN_DOWNGRADE);
1113 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1105 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
1114 WRITE32(arg->seqid->sequence->counter); 1106 WRITE32(arg->seqid->sequence->counter);
1115 encode_share_access(xdr, arg->open_flags); 1107 encode_share_access(xdr, arg->fmode);
1116 return 0; 1108 hdr->nops++;
1117} 1109}
1118 1110
1119static int 1111static void
1120encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh) 1112encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr)
1121{ 1113{
1122 int len = fh->size; 1114 int len = fh->size;
1123 __be32 *p; 1115 __be32 *p;
@@ -1126,18 +1118,16 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh)
1126 WRITE32(OP_PUTFH); 1118 WRITE32(OP_PUTFH);
1127 WRITE32(len); 1119 WRITE32(len);
1128 WRITEMEM(fh->data, len); 1120 WRITEMEM(fh->data, len);
1129 1121 hdr->nops++;
1130 return 0;
1131} 1122}
1132 1123
1133static int encode_putrootfh(struct xdr_stream *xdr) 1124static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1134{ 1125{
1135 __be32 *p; 1126 __be32 *p;
1136
1137 RESERVE_SPACE(4);
1138 WRITE32(OP_PUTROOTFH);
1139 1127
1140 return 0; 1128 RESERVE_SPACE(4);
1129 WRITE32(OP_PUTROOTFH);
1130 hdr->nops++;
1141} 1131}
1142 1132
1143static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) 1133static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
@@ -1153,7 +1143,7 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1153 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); 1143 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE);
1154} 1144}
1155 1145
1156static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) 1146static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
1157{ 1147{
1158 __be32 *p; 1148 __be32 *p;
1159 1149
@@ -1165,11 +1155,10 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
1165 RESERVE_SPACE(12); 1155 RESERVE_SPACE(12);
1166 WRITE64(args->offset); 1156 WRITE64(args->offset);
1167 WRITE32(args->count); 1157 WRITE32(args->count);
1168 1158 hdr->nops++;
1169 return 0;
1170} 1159}
1171 1160
1172static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) 1161static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
1173{ 1162{
1174 uint32_t attrs[2] = { 1163 uint32_t attrs[2] = {
1175 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, 1164 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
@@ -1191,6 +1180,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1191 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 1180 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1192 WRITE32(attrs[0] & readdir->bitmask[0]); 1181 WRITE32(attrs[0] & readdir->bitmask[0]);
1193 WRITE32(attrs[1] & readdir->bitmask[1]); 1182 WRITE32(attrs[1] & readdir->bitmask[1]);
1183 hdr->nops++;
1194 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", 1184 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
1195 __func__, 1185 __func__,
1196 (unsigned long long)readdir->cookie, 1186 (unsigned long long)readdir->cookie,
@@ -1198,21 +1188,18 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1198 ((u32 *)readdir->verifier.data)[1], 1188 ((u32 *)readdir->verifier.data)[1],
1199 attrs[0] & readdir->bitmask[0], 1189 attrs[0] & readdir->bitmask[0],
1200 attrs[1] & readdir->bitmask[1]); 1190 attrs[1] & readdir->bitmask[1]);
1201
1202 return 0;
1203} 1191}
1204 1192
1205static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) 1193static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr)
1206{ 1194{
1207 __be32 *p; 1195 __be32 *p;
1208 1196
1209 RESERVE_SPACE(4); 1197 RESERVE_SPACE(4);
1210 WRITE32(OP_READLINK); 1198 WRITE32(OP_READLINK);
1211 1199 hdr->nops++;
1212 return 0;
1213} 1200}
1214 1201
1215static int encode_remove(struct xdr_stream *xdr, const struct qstr *name) 1202static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1216{ 1203{
1217 __be32 *p; 1204 __be32 *p;
1218 1205
@@ -1220,11 +1207,10 @@ static int encode_remove(struct xdr_stream *xdr, const struct qstr *name)
1220 WRITE32(OP_REMOVE); 1207 WRITE32(OP_REMOVE);
1221 WRITE32(name->len); 1208 WRITE32(name->len);
1222 WRITEMEM(name->name, name->len); 1209 WRITEMEM(name->name, name->len);
1223 1210 hdr->nops++;
1224 return 0;
1225} 1211}
1226 1212
1227static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname) 1213static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr)
1228{ 1214{
1229 __be32 *p; 1215 __be32 *p;
1230 1216
@@ -1232,38 +1218,35 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con
1232 WRITE32(OP_RENAME); 1218 WRITE32(OP_RENAME);
1233 WRITE32(oldname->len); 1219 WRITE32(oldname->len);
1234 WRITEMEM(oldname->name, oldname->len); 1220 WRITEMEM(oldname->name, oldname->len);
1235 1221
1236 RESERVE_SPACE(4 + newname->len); 1222 RESERVE_SPACE(4 + newname->len);
1237 WRITE32(newname->len); 1223 WRITE32(newname->len);
1238 WRITEMEM(newname->name, newname->len); 1224 WRITEMEM(newname->name, newname->len);
1239 1225 hdr->nops++;
1240 return 0;
1241} 1226}
1242 1227
1243static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid) 1228static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr)
1244{ 1229{
1245 __be32 *p; 1230 __be32 *p;
1246 1231
1247 RESERVE_SPACE(12); 1232 RESERVE_SPACE(12);
1248 WRITE32(OP_RENEW); 1233 WRITE32(OP_RENEW);
1249 WRITE64(client_stateid->cl_clientid); 1234 WRITE64(client_stateid->cl_clientid);
1250 1235 hdr->nops++;
1251 return 0;
1252} 1236}
1253 1237
1254static int 1238static void
1255encode_restorefh(struct xdr_stream *xdr) 1239encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1256{ 1240{
1257 __be32 *p; 1241 __be32 *p;
1258 1242
1259 RESERVE_SPACE(4); 1243 RESERVE_SPACE(4);
1260 WRITE32(OP_RESTOREFH); 1244 WRITE32(OP_RESTOREFH);
1261 1245 hdr->nops++;
1262 return 0;
1263} 1246}
1264 1247
1265static int 1248static int
1266encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) 1249encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
1267{ 1250{
1268 __be32 *p; 1251 __be32 *p;
1269 1252
@@ -1278,36 +1261,32 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
1278 RESERVE_SPACE(4); 1261 RESERVE_SPACE(4);
1279 WRITE32(arg->acl_len); 1262 WRITE32(arg->acl_len);
1280 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1263 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1264 hdr->nops++;
1281 return 0; 1265 return 0;
1282} 1266}
1283 1267
1284static int 1268static void
1285encode_savefh(struct xdr_stream *xdr) 1269encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1286{ 1270{
1287 __be32 *p; 1271 __be32 *p;
1288 1272
1289 RESERVE_SPACE(4); 1273 RESERVE_SPACE(4);
1290 WRITE32(OP_SAVEFH); 1274 WRITE32(OP_SAVEFH);
1291 1275 hdr->nops++;
1292 return 0;
1293} 1276}
1294 1277
1295static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server) 1278static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr)
1296{ 1279{
1297 int status;
1298 __be32 *p; 1280 __be32 *p;
1299
1300 RESERVE_SPACE(4+NFS4_STATEID_SIZE);
1301 WRITE32(OP_SETATTR);
1302 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE);
1303 1281
1304 if ((status = encode_attrs(xdr, arg->iap, server))) 1282 RESERVE_SPACE(4+NFS4_STATEID_SIZE);
1305 return status; 1283 WRITE32(OP_SETATTR);
1306 1284 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE);
1307 return 0; 1285 hdr->nops++;
1286 encode_attrs(xdr, arg->iap, server);
1308} 1287}
1309 1288
1310static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid) 1289static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
1311{ 1290{
1312 __be32 *p; 1291 __be32 *p;
1313 1292
@@ -1322,23 +1301,21 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien
1322 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); 1301 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
1323 RESERVE_SPACE(4); 1302 RESERVE_SPACE(4);
1324 WRITE32(setclientid->sc_cb_ident); 1303 WRITE32(setclientid->sc_cb_ident);
1325 1304 hdr->nops++;
1326 return 0;
1327} 1305}
1328 1306
1329static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state) 1307static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr)
1330{ 1308{
1331 __be32 *p; 1309 __be32 *p;
1332
1333 RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE);
1334 WRITE32(OP_SETCLIENTID_CONFIRM);
1335 WRITE64(client_state->cl_clientid);
1336 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
1337 1310
1338 return 0; 1311 RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE);
1312 WRITE32(OP_SETCLIENTID_CONFIRM);
1313 WRITE64(client_state->cl_clientid);
1314 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
1315 hdr->nops++;
1339} 1316}
1340 1317
1341static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args) 1318static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
1342{ 1319{
1343 __be32 *p; 1320 __be32 *p;
1344 1321
@@ -1353,11 +1330,10 @@ static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args
1353 WRITE32(args->count); 1330 WRITE32(args->count);
1354 1331
1355 xdr_write_pages(xdr, args->pages, args->pgbase, args->count); 1332 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
1356 1333 hdr->nops++;
1357 return 0;
1358} 1334}
1359 1335
1360static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid) 1336static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr)
1361{ 1337{
1362 __be32 *p; 1338 __be32 *p;
1363 1339
@@ -1365,8 +1341,7 @@ static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *statei
1365 1341
1366 WRITE32(OP_DELEGRETURN); 1342 WRITE32(OP_DELEGRETURN);
1367 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1343 WRITEMEM(stateid->data, NFS4_STATEID_SIZE);
1368 return 0; 1344 hdr->nops++;
1369
1370} 1345}
1371/* 1346/*
1372 * END OF "GENERIC" ENCODE ROUTINES. 1347 * END OF "GENERIC" ENCODE ROUTINES.
@@ -1379,21 +1354,16 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs
1379{ 1354{
1380 struct xdr_stream xdr; 1355 struct xdr_stream xdr;
1381 struct compound_hdr hdr = { 1356 struct compound_hdr hdr = {
1382 .nops = 3, 1357 .nops = 0,
1383 }; 1358 };
1384 int status;
1385 1359
1386 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1360 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1387 encode_compound_hdr(&xdr, &hdr); 1361 encode_compound_hdr(&xdr, &hdr);
1388 status = encode_putfh(&xdr, args->fh); 1362 encode_putfh(&xdr, args->fh, &hdr);
1389 if (status != 0) 1363 encode_access(&xdr, args->access, &hdr);
1390 goto out; 1364 encode_getfattr(&xdr, args->bitmask, &hdr);
1391 status = encode_access(&xdr, args->access); 1365 encode_nops(&hdr);
1392 if (status != 0) 1366 return 0;
1393 goto out;
1394 status = encode_getfattr(&xdr, args->bitmask);
1395out:
1396 return status;
1397} 1367}
1398 1368
1399/* 1369/*
@@ -1403,21 +1373,17 @@ static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs
1403{ 1373{
1404 struct xdr_stream xdr; 1374 struct xdr_stream xdr;
1405 struct compound_hdr hdr = { 1375 struct compound_hdr hdr = {
1406 .nops = 4, 1376 .nops = 0,
1407 }; 1377 };
1408 int status;
1409 1378
1410 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1379 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1411 encode_compound_hdr(&xdr, &hdr); 1380 encode_compound_hdr(&xdr, &hdr);
1412 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) 1381 encode_putfh(&xdr, args->dir_fh, &hdr);
1413 goto out; 1382 encode_lookup(&xdr, args->name, &hdr);
1414 if ((status = encode_lookup(&xdr, args->name)) != 0) 1383 encode_getfh(&xdr, &hdr);
1415 goto out; 1384 encode_getfattr(&xdr, args->bitmask, &hdr);
1416 if ((status = encode_getfh(&xdr)) != 0) 1385 encode_nops(&hdr);
1417 goto out; 1386 return 0;
1418 status = encode_getfattr(&xdr, args->bitmask);
1419out:
1420 return status;
1421} 1387}
1422 1388
1423/* 1389/*
@@ -1427,18 +1393,16 @@ static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struc
1427{ 1393{
1428 struct xdr_stream xdr; 1394 struct xdr_stream xdr;
1429 struct compound_hdr hdr = { 1395 struct compound_hdr hdr = {
1430 .nops = 3, 1396 .nops = 0,
1431 }; 1397 };
1432 int status;
1433 1398
1434 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1399 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1435 encode_compound_hdr(&xdr, &hdr); 1400 encode_compound_hdr(&xdr, &hdr);
1436 if ((status = encode_putrootfh(&xdr)) != 0) 1401 encode_putrootfh(&xdr, &hdr);
1437 goto out; 1402 encode_getfh(&xdr, &hdr);
1438 if ((status = encode_getfh(&xdr)) == 0) 1403 encode_getfattr(&xdr, args->bitmask, &hdr);
1439 status = encode_getfattr(&xdr, args->bitmask); 1404 encode_nops(&hdr);
1440out: 1405 return 0;
1441 return status;
1442} 1406}
1443 1407
1444/* 1408/*
@@ -1448,19 +1412,16 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs
1448{ 1412{
1449 struct xdr_stream xdr; 1413 struct xdr_stream xdr;
1450 struct compound_hdr hdr = { 1414 struct compound_hdr hdr = {
1451 .nops = 3, 1415 .nops = 0,
1452 }; 1416 };
1453 int status;
1454 1417
1455 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1418 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1456 encode_compound_hdr(&xdr, &hdr); 1419 encode_compound_hdr(&xdr, &hdr);
1457 if ((status = encode_putfh(&xdr, args->fh)) != 0) 1420 encode_putfh(&xdr, args->fh, &hdr);
1458 goto out; 1421 encode_remove(&xdr, &args->name, &hdr);
1459 if ((status = encode_remove(&xdr, &args->name)) != 0) 1422 encode_getfattr(&xdr, args->bitmask, &hdr);
1460 goto out; 1423 encode_nops(&hdr);
1461 status = encode_getfattr(&xdr, args->bitmask); 1424 return 0;
1462out:
1463 return status;
1464} 1425}
1465 1426
1466/* 1427/*
@@ -1470,27 +1431,20 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs
1470{ 1431{
1471 struct xdr_stream xdr; 1432 struct xdr_stream xdr;
1472 struct compound_hdr hdr = { 1433 struct compound_hdr hdr = {
1473 .nops = 7, 1434 .nops = 0,
1474 }; 1435 };
1475 int status;
1476 1436
1477 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1437 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1478 encode_compound_hdr(&xdr, &hdr); 1438 encode_compound_hdr(&xdr, &hdr);
1479 if ((status = encode_putfh(&xdr, args->old_dir)) != 0) 1439 encode_putfh(&xdr, args->old_dir, &hdr);
1480 goto out; 1440 encode_savefh(&xdr, &hdr);
1481 if ((status = encode_savefh(&xdr)) != 0) 1441 encode_putfh(&xdr, args->new_dir, &hdr);
1482 goto out; 1442 encode_rename(&xdr, args->old_name, args->new_name, &hdr);
1483 if ((status = encode_putfh(&xdr, args->new_dir)) != 0) 1443 encode_getfattr(&xdr, args->bitmask, &hdr);
1484 goto out; 1444 encode_restorefh(&xdr, &hdr);
1485 if ((status = encode_rename(&xdr, args->old_name, args->new_name)) != 0) 1445 encode_getfattr(&xdr, args->bitmask, &hdr);
1486 goto out; 1446 encode_nops(&hdr);
1487 if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) 1447 return 0;
1488 goto out;
1489 if ((status = encode_restorefh(&xdr)) != 0)
1490 goto out;
1491 status = encode_getfattr(&xdr, args->bitmask);
1492out:
1493 return status;
1494} 1448}
1495 1449
1496/* 1450/*
@@ -1500,27 +1454,20 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_
1500{ 1454{
1501 struct xdr_stream xdr; 1455 struct xdr_stream xdr;
1502 struct compound_hdr hdr = { 1456 struct compound_hdr hdr = {
1503 .nops = 7, 1457 .nops = 0,
1504 }; 1458 };
1505 int status;
1506 1459
1507 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1460 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1508 encode_compound_hdr(&xdr, &hdr); 1461 encode_compound_hdr(&xdr, &hdr);
1509 if ((status = encode_putfh(&xdr, args->fh)) != 0) 1462 encode_putfh(&xdr, args->fh, &hdr);
1510 goto out; 1463 encode_savefh(&xdr, &hdr);
1511 if ((status = encode_savefh(&xdr)) != 0) 1464 encode_putfh(&xdr, args->dir_fh, &hdr);
1512 goto out; 1465 encode_link(&xdr, args->name, &hdr);
1513 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) 1466 encode_getfattr(&xdr, args->bitmask, &hdr);
1514 goto out; 1467 encode_restorefh(&xdr, &hdr);
1515 if ((status = encode_link(&xdr, args->name)) != 0) 1468 encode_getfattr(&xdr, args->bitmask, &hdr);
1516 goto out; 1469 encode_nops(&hdr);
1517 if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) 1470 return 0;
1518 goto out;
1519 if ((status = encode_restorefh(&xdr)) != 0)
1520 goto out;
1521 status = encode_getfattr(&xdr, args->bitmask);
1522out:
1523 return status;
1524} 1471}
1525 1472
1526/* 1473/*
@@ -1530,27 +1477,20 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs
1530{ 1477{
1531 struct xdr_stream xdr; 1478 struct xdr_stream xdr;
1532 struct compound_hdr hdr = { 1479 struct compound_hdr hdr = {
1533 .nops = 7, 1480 .nops = 0,
1534 }; 1481 };
1535 int status;
1536 1482
1537 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1483 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1538 encode_compound_hdr(&xdr, &hdr); 1484 encode_compound_hdr(&xdr, &hdr);
1539 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) 1485 encode_putfh(&xdr, args->dir_fh, &hdr);
1540 goto out; 1486 encode_savefh(&xdr, &hdr);
1541 if ((status = encode_savefh(&xdr)) != 0) 1487 encode_create(&xdr, args, &hdr);
1542 goto out; 1488 encode_getfh(&xdr, &hdr);
1543 if ((status = encode_create(&xdr, args)) != 0) 1489 encode_getfattr(&xdr, args->bitmask, &hdr);
1544 goto out; 1490 encode_restorefh(&xdr, &hdr);
1545 if ((status = encode_getfh(&xdr)) != 0) 1491 encode_getfattr(&xdr, args->bitmask, &hdr);
1546 goto out; 1492 encode_nops(&hdr);
1547 if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) 1493 return 0;
1548 goto out;
1549 if ((status = encode_restorefh(&xdr)) != 0)
1550 goto out;
1551 status = encode_getfattr(&xdr, args->bitmask);
1552out:
1553 return status;
1554} 1494}
1555 1495
1556/* 1496/*
@@ -1568,15 +1508,15 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf
1568{ 1508{
1569 struct xdr_stream xdr; 1509 struct xdr_stream xdr;
1570 struct compound_hdr hdr = { 1510 struct compound_hdr hdr = {
1571 .nops = 2, 1511 .nops = 0,
1572 }; 1512 };
1573 int status;
1574 1513
1575 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1514 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1576 encode_compound_hdr(&xdr, &hdr); 1515 encode_compound_hdr(&xdr, &hdr);
1577 if ((status = encode_putfh(&xdr, args->fh)) == 0) 1516 encode_putfh(&xdr, args->fh, &hdr);
1578 status = encode_getfattr(&xdr, args->bitmask); 1517 encode_getfattr(&xdr, args->bitmask, &hdr);
1579 return status; 1518 encode_nops(&hdr);
1519 return 0;
1580} 1520}
1581 1521
1582/* 1522/*
@@ -1584,23 +1524,18 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf
1584 */ 1524 */
1585static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) 1525static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
1586{ 1526{
1587 struct xdr_stream xdr; 1527 struct xdr_stream xdr;
1588 struct compound_hdr hdr = { 1528 struct compound_hdr hdr = {
1589 .nops = 3, 1529 .nops = 0,
1590 }; 1530 };
1591 int status; 1531
1592 1532 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1593 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1533 encode_compound_hdr(&xdr, &hdr);
1594 encode_compound_hdr(&xdr, &hdr); 1534 encode_putfh(&xdr, args->fh, &hdr);
1595 status = encode_putfh(&xdr, args->fh); 1535 encode_close(&xdr, args, &hdr);
1596 if(status) 1536 encode_getfattr(&xdr, args->bitmask, &hdr);
1597 goto out; 1537 encode_nops(&hdr);
1598 status = encode_close(&xdr, args); 1538 return 0;
1599 if (status != 0)
1600 goto out;
1601 status = encode_getfattr(&xdr, args->bitmask);
1602out:
1603 return status;
1604} 1539}
1605 1540
1606/* 1541/*
@@ -1610,33 +1545,20 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openarg
1610{ 1545{
1611 struct xdr_stream xdr; 1546 struct xdr_stream xdr;
1612 struct compound_hdr hdr = { 1547 struct compound_hdr hdr = {
1613 .nops = 7, 1548 .nops = 0,
1614 }; 1549 };
1615 int status;
1616 1550
1617 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1551 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1618 encode_compound_hdr(&xdr, &hdr); 1552 encode_compound_hdr(&xdr, &hdr);
1619 status = encode_putfh(&xdr, args->fh); 1553 encode_putfh(&xdr, args->fh, &hdr);
1620 if (status) 1554 encode_savefh(&xdr, &hdr);
1621 goto out; 1555 encode_open(&xdr, args, &hdr);
1622 status = encode_savefh(&xdr); 1556 encode_getfh(&xdr, &hdr);
1623 if (status) 1557 encode_getfattr(&xdr, args->bitmask, &hdr);
1624 goto out; 1558 encode_restorefh(&xdr, &hdr);
1625 status = encode_open(&xdr, args); 1559 encode_getfattr(&xdr, args->bitmask, &hdr);
1626 if (status) 1560 encode_nops(&hdr);
1627 goto out; 1561 return 0;
1628 status = encode_getfh(&xdr);
1629 if (status)
1630 goto out;
1631 status = encode_getfattr(&xdr, args->bitmask);
1632 if (status)
1633 goto out;
1634 status = encode_restorefh(&xdr);
1635 if (status)
1636 goto out;
1637 status = encode_getfattr(&xdr, args->bitmask);
1638out:
1639 return status;
1640} 1562}
1641 1563
1642/* 1564/*
@@ -1646,18 +1568,15 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs
1646{ 1568{
1647 struct xdr_stream xdr; 1569 struct xdr_stream xdr;
1648 struct compound_hdr hdr = { 1570 struct compound_hdr hdr = {
1649 .nops = 2, 1571 .nops = 0,
1650 }; 1572 };
1651 int status;
1652 1573
1653 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1574 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1654 encode_compound_hdr(&xdr, &hdr); 1575 encode_compound_hdr(&xdr, &hdr);
1655 status = encode_putfh(&xdr, args->fh); 1576 encode_putfh(&xdr, args->fh, &hdr);
1656 if(status) 1577 encode_open_confirm(&xdr, args, &hdr);
1657 goto out; 1578 encode_nops(&hdr);
1658 status = encode_open_confirm(&xdr, args); 1579 return 0;
1659out:
1660 return status;
1661} 1580}
1662 1581
1663/* 1582/*
@@ -1667,21 +1586,16 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_
1667{ 1586{
1668 struct xdr_stream xdr; 1587 struct xdr_stream xdr;
1669 struct compound_hdr hdr = { 1588 struct compound_hdr hdr = {
1670 .nops = 3, 1589 .nops = 0,
1671 }; 1590 };
1672 int status;
1673 1591
1674 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1592 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1675 encode_compound_hdr(&xdr, &hdr); 1593 encode_compound_hdr(&xdr, &hdr);
1676 status = encode_putfh(&xdr, args->fh); 1594 encode_putfh(&xdr, args->fh, &hdr);
1677 if (status) 1595 encode_open(&xdr, args, &hdr);
1678 goto out; 1596 encode_getfattr(&xdr, args->bitmask, &hdr);
1679 status = encode_open(&xdr, args); 1597 encode_nops(&hdr);
1680 if (status) 1598 return 0;
1681 goto out;
1682 status = encode_getfattr(&xdr, args->bitmask);
1683out:
1684 return status;
1685} 1599}
1686 1600
1687/* 1601/*
@@ -1691,21 +1605,16 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct n
1691{ 1605{
1692 struct xdr_stream xdr; 1606 struct xdr_stream xdr;
1693 struct compound_hdr hdr = { 1607 struct compound_hdr hdr = {
1694 .nops = 3, 1608 .nops = 0,
1695 }; 1609 };
1696 int status;
1697 1610
1698 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1611 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1699 encode_compound_hdr(&xdr, &hdr); 1612 encode_compound_hdr(&xdr, &hdr);
1700 status = encode_putfh(&xdr, args->fh); 1613 encode_putfh(&xdr, args->fh, &hdr);
1701 if (status) 1614 encode_open_downgrade(&xdr, args, &hdr);
1702 goto out; 1615 encode_getfattr(&xdr, args->bitmask, &hdr);
1703 status = encode_open_downgrade(&xdr, args); 1616 encode_nops(&hdr);
1704 if (status != 0) 1617 return 0;
1705 goto out;
1706 status = encode_getfattr(&xdr, args->bitmask);
1707out:
1708 return status;
1709} 1618}
1710 1619
1711/* 1620/*
@@ -1715,18 +1624,15 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_ar
1715{ 1624{
1716 struct xdr_stream xdr; 1625 struct xdr_stream xdr;
1717 struct compound_hdr hdr = { 1626 struct compound_hdr hdr = {
1718 .nops = 2, 1627 .nops = 0,
1719 }; 1628 };
1720 int status;
1721 1629
1722 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1630 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1723 encode_compound_hdr(&xdr, &hdr); 1631 encode_compound_hdr(&xdr, &hdr);
1724 status = encode_putfh(&xdr, args->fh); 1632 encode_putfh(&xdr, args->fh, &hdr);
1725 if(status) 1633 encode_lock(&xdr, args, &hdr);
1726 goto out; 1634 encode_nops(&hdr);
1727 status = encode_lock(&xdr, args); 1635 return 0;
1728out:
1729 return status;
1730} 1636}
1731 1637
1732/* 1638/*
@@ -1736,18 +1642,15 @@ static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_
1736{ 1642{
1737 struct xdr_stream xdr; 1643 struct xdr_stream xdr;
1738 struct compound_hdr hdr = { 1644 struct compound_hdr hdr = {
1739 .nops = 2, 1645 .nops = 0,
1740 }; 1646 };
1741 int status;
1742 1647
1743 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1648 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1744 encode_compound_hdr(&xdr, &hdr); 1649 encode_compound_hdr(&xdr, &hdr);
1745 status = encode_putfh(&xdr, args->fh); 1650 encode_putfh(&xdr, args->fh, &hdr);
1746 if(status) 1651 encode_lockt(&xdr, args, &hdr);
1747 goto out; 1652 encode_nops(&hdr);
1748 status = encode_lockt(&xdr, args); 1653 return 0;
1749out:
1750 return status;
1751} 1654}
1752 1655
1753/* 1656/*
@@ -1757,18 +1660,15 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_
1757{ 1660{
1758 struct xdr_stream xdr; 1661 struct xdr_stream xdr;
1759 struct compound_hdr hdr = { 1662 struct compound_hdr hdr = {
1760 .nops = 2, 1663 .nops = 0,
1761 }; 1664 };
1762 int status;
1763 1665
1764 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1666 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1765 encode_compound_hdr(&xdr, &hdr); 1667 encode_compound_hdr(&xdr, &hdr);
1766 status = encode_putfh(&xdr, args->fh); 1668 encode_putfh(&xdr, args->fh, &hdr);
1767 if(status) 1669 encode_locku(&xdr, args, &hdr);
1768 goto out; 1670 encode_nops(&hdr);
1769 status = encode_locku(&xdr, args); 1671 return 0;
1770out:
1771 return status;
1772} 1672}
1773 1673
1774/* 1674/*
@@ -1778,18 +1678,15 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n
1778{ 1678{
1779 struct xdr_stream xdr; 1679 struct xdr_stream xdr;
1780 struct compound_hdr hdr = { 1680 struct compound_hdr hdr = {
1781 .nops = 2, 1681 .nops = 0,
1782 }; 1682 };
1783 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 1683 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1784 unsigned int replen; 1684 unsigned int replen;
1785 int status;
1786 1685
1787 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1686 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1788 encode_compound_hdr(&xdr, &hdr); 1687 encode_compound_hdr(&xdr, &hdr);
1789 status = encode_putfh(&xdr, args->fh); 1688 encode_putfh(&xdr, args->fh, &hdr);
1790 if(status) 1689 encode_readlink(&xdr, args, req, &hdr);
1791 goto out;
1792 status = encode_readlink(&xdr, args, req);
1793 1690
1794 /* set up reply kvec 1691 /* set up reply kvec
1795 * toplevel_status + taglen + rescount + OP_PUTFH + status 1692 * toplevel_status + taglen + rescount + OP_PUTFH + status
@@ -1798,9 +1695,8 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n
1798 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readlink_sz) << 2; 1695 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readlink_sz) << 2;
1799 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 1696 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages,
1800 args->pgbase, args->pglen); 1697 args->pgbase, args->pglen);
1801 1698 encode_nops(&hdr);
1802out: 1699 return 0;
1803 return status;
1804} 1700}
1805 1701
1806/* 1702/*
@@ -1810,18 +1706,15 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
1810{ 1706{
1811 struct xdr_stream xdr; 1707 struct xdr_stream xdr;
1812 struct compound_hdr hdr = { 1708 struct compound_hdr hdr = {
1813 .nops = 2, 1709 .nops = 0,
1814 }; 1710 };
1815 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 1711 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1816 int replen; 1712 int replen;
1817 int status;
1818 1713
1819 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1714 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1820 encode_compound_hdr(&xdr, &hdr); 1715 encode_compound_hdr(&xdr, &hdr);
1821 status = encode_putfh(&xdr, args->fh); 1716 encode_putfh(&xdr, args->fh, &hdr);
1822 if(status) 1717 encode_readdir(&xdr, args, req, &hdr);
1823 goto out;
1824 status = encode_readdir(&xdr, args, req);
1825 1718
1826 /* set up reply kvec 1719 /* set up reply kvec
1827 * toplevel_status + taglen + rescount + OP_PUTFH + status 1720 * toplevel_status + taglen + rescount + OP_PUTFH + status
@@ -1833,9 +1726,8 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
1833 dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", 1726 dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
1834 __func__, replen, args->pages, 1727 __func__, replen, args->pages,
1835 args->pgbase, args->count); 1728 args->pgbase, args->count);
1836 1729 encode_nops(&hdr);
1837out: 1730 return 0;
1838 return status;
1839} 1731}
1840 1732
1841/* 1733/*
@@ -1846,18 +1738,14 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg
1846 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 1738 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1847 struct xdr_stream xdr; 1739 struct xdr_stream xdr;
1848 struct compound_hdr hdr = { 1740 struct compound_hdr hdr = {
1849 .nops = 2, 1741 .nops = 0,
1850 }; 1742 };
1851 int replen, status; 1743 int replen;
1852 1744
1853 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1745 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1854 encode_compound_hdr(&xdr, &hdr); 1746 encode_compound_hdr(&xdr, &hdr);
1855 status = encode_putfh(&xdr, args->fh); 1747 encode_putfh(&xdr, args->fh, &hdr);
1856 if (status) 1748 encode_read(&xdr, args, &hdr);
1857 goto out;
1858 status = encode_read(&xdr, args);
1859 if (status)
1860 goto out;
1861 1749
1862 /* set up reply kvec 1750 /* set up reply kvec
1863 * toplevel status + taglen=0 + rescount + OP_PUTFH + status 1751 * toplevel status + taglen=0 + rescount + OP_PUTFH + status
@@ -1867,33 +1755,27 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg
1867 xdr_inline_pages(&req->rq_rcv_buf, replen, 1755 xdr_inline_pages(&req->rq_rcv_buf, replen,
1868 args->pages, args->pgbase, args->count); 1756 args->pages, args->pgbase, args->count);
1869 req->rq_rcv_buf.flags |= XDRBUF_READ; 1757 req->rq_rcv_buf.flags |= XDRBUF_READ;
1870out: 1758 encode_nops(&hdr);
1871 return status; 1759 return 0;
1872} 1760}
1873 1761
1874/* 1762/*
1875 * Encode an SETATTR request 1763 * Encode an SETATTR request
1876 */ 1764 */
1877static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args) 1765static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args)
1878
1879{ 1766{
1880 struct xdr_stream xdr; 1767 struct xdr_stream xdr;
1881 struct compound_hdr hdr = { 1768 struct compound_hdr hdr = {
1882 .nops = 3, 1769 .nops = 0,
1883 }; 1770 };
1884 int status; 1771
1885 1772 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1886 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1773 encode_compound_hdr(&xdr, &hdr);
1887 encode_compound_hdr(&xdr, &hdr); 1774 encode_putfh(&xdr, args->fh, &hdr);
1888 status = encode_putfh(&xdr, args->fh); 1775 encode_setattr(&xdr, args, args->server, &hdr);
1889 if(status) 1776 encode_getfattr(&xdr, args->bitmask, &hdr);
1890 goto out; 1777 encode_nops(&hdr);
1891 status = encode_setattr(&xdr, args, args->server); 1778 return 0;
1892 if(status)
1893 goto out;
1894 status = encode_getfattr(&xdr, args->bitmask);
1895out:
1896 return status;
1897} 1779}
1898 1780
1899/* 1781/*
@@ -1906,22 +1788,21 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
1906 struct xdr_stream xdr; 1788 struct xdr_stream xdr;
1907 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 1789 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1908 struct compound_hdr hdr = { 1790 struct compound_hdr hdr = {
1909 .nops = 2, 1791 .nops = 0,
1910 }; 1792 };
1911 int replen, status; 1793 int replen;
1912 1794
1913 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1795 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1914 encode_compound_hdr(&xdr, &hdr); 1796 encode_compound_hdr(&xdr, &hdr);
1915 status = encode_putfh(&xdr, args->fh); 1797 encode_putfh(&xdr, args->fh, &hdr);
1916 if (status) 1798 encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr);
1917 goto out; 1799
1918 status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0);
1919 /* set up reply buffer: */ 1800 /* set up reply buffer: */
1920 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; 1801 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2;
1921 xdr_inline_pages(&req->rq_rcv_buf, replen, 1802 xdr_inline_pages(&req->rq_rcv_buf, replen,
1922 args->acl_pages, args->acl_pgbase, args->acl_len); 1803 args->acl_pages, args->acl_pgbase, args->acl_len);
1923out: 1804 encode_nops(&hdr);
1924 return status; 1805 return 0;
1925} 1806}
1926 1807
1927/* 1808/*
@@ -1931,22 +1812,17 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea
1931{ 1812{
1932 struct xdr_stream xdr; 1813 struct xdr_stream xdr;
1933 struct compound_hdr hdr = { 1814 struct compound_hdr hdr = {
1934 .nops = 3, 1815 .nops = 0,
1935 }; 1816 };
1936 int status;
1937 1817
1938 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1818 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1939 encode_compound_hdr(&xdr, &hdr); 1819 encode_compound_hdr(&xdr, &hdr);
1940 status = encode_putfh(&xdr, args->fh); 1820 encode_putfh(&xdr, args->fh, &hdr);
1941 if (status) 1821 encode_write(&xdr, args, &hdr);
1942 goto out;
1943 status = encode_write(&xdr, args);
1944 if (status)
1945 goto out;
1946 req->rq_snd_buf.flags |= XDRBUF_WRITE; 1822 req->rq_snd_buf.flags |= XDRBUF_WRITE;
1947 status = encode_getfattr(&xdr, args->bitmask); 1823 encode_getfattr(&xdr, args->bitmask, &hdr);
1948out: 1824 encode_nops(&hdr);
1949 return status; 1825 return 0;
1950} 1826}
1951 1827
1952/* 1828/*
@@ -1956,21 +1832,16 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_write
1956{ 1832{
1957 struct xdr_stream xdr; 1833 struct xdr_stream xdr;
1958 struct compound_hdr hdr = { 1834 struct compound_hdr hdr = {
1959 .nops = 3, 1835 .nops = 0,
1960 }; 1836 };
1961 int status;
1962 1837
1963 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1838 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1964 encode_compound_hdr(&xdr, &hdr); 1839 encode_compound_hdr(&xdr, &hdr);
1965 status = encode_putfh(&xdr, args->fh); 1840 encode_putfh(&xdr, args->fh, &hdr);
1966 if (status) 1841 encode_commit(&xdr, args, &hdr);
1967 goto out; 1842 encode_getfattr(&xdr, args->bitmask, &hdr);
1968 status = encode_commit(&xdr, args); 1843 encode_nops(&hdr);
1969 if (status) 1844 return 0;
1970 goto out;
1971 status = encode_getfattr(&xdr, args->bitmask);
1972out:
1973 return status;
1974} 1845}
1975 1846
1976/* 1847/*
@@ -1980,16 +1851,15 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsin
1980{ 1851{
1981 struct xdr_stream xdr; 1852 struct xdr_stream xdr;
1982 struct compound_hdr hdr = { 1853 struct compound_hdr hdr = {
1983 .nops = 2, 1854 .nops = 0,
1984 }; 1855 };
1985 int status;
1986 1856
1987 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1857 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1988 encode_compound_hdr(&xdr, &hdr); 1858 encode_compound_hdr(&xdr, &hdr);
1989 status = encode_putfh(&xdr, args->fh); 1859 encode_putfh(&xdr, args->fh, &hdr);
1990 if (!status) 1860 encode_fsinfo(&xdr, args->bitmask, &hdr);
1991 status = encode_fsinfo(&xdr, args->bitmask); 1861 encode_nops(&hdr);
1992 return status; 1862 return 0;
1993} 1863}
1994 1864
1995/* 1865/*
@@ -1999,17 +1869,16 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct n
1999{ 1869{
2000 struct xdr_stream xdr; 1870 struct xdr_stream xdr;
2001 struct compound_hdr hdr = { 1871 struct compound_hdr hdr = {
2002 .nops = 2, 1872 .nops = 0,
2003 }; 1873 };
2004 int status;
2005 1874
2006 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1875 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2007 encode_compound_hdr(&xdr, &hdr); 1876 encode_compound_hdr(&xdr, &hdr);
2008 status = encode_putfh(&xdr, args->fh); 1877 encode_putfh(&xdr, args->fh, &hdr);
2009 if (!status) 1878 encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
2010 status = encode_getattr_one(&xdr, 1879 &hdr);
2011 args->bitmask[0] & nfs4_pathconf_bitmap[0]); 1880 encode_nops(&hdr);
2012 return status; 1881 return 0;
2013} 1882}
2014 1883
2015/* 1884/*
@@ -2019,18 +1888,16 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs
2019{ 1888{
2020 struct xdr_stream xdr; 1889 struct xdr_stream xdr;
2021 struct compound_hdr hdr = { 1890 struct compound_hdr hdr = {
2022 .nops = 2, 1891 .nops = 0,
2023 }; 1892 };
2024 int status;
2025 1893
2026 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1894 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2027 encode_compound_hdr(&xdr, &hdr); 1895 encode_compound_hdr(&xdr, &hdr);
2028 status = encode_putfh(&xdr, args->fh); 1896 encode_putfh(&xdr, args->fh, &hdr);
2029 if (status == 0) 1897 encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
2030 status = encode_getattr_two(&xdr, 1898 args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
2031 args->bitmask[0] & nfs4_statfs_bitmap[0], 1899 encode_nops(&hdr);
2032 args->bitmask[1] & nfs4_statfs_bitmap[1]); 1900 return 0;
2033 return status;
2034} 1901}
2035 1902
2036/* 1903/*
@@ -2040,19 +1907,18 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struc
2040{ 1907{
2041 struct xdr_stream xdr; 1908 struct xdr_stream xdr;
2042 struct compound_hdr hdr = { 1909 struct compound_hdr hdr = {
2043 .nops = 2, 1910 .nops = 0,
2044 }; 1911 };
2045 int status;
2046 1912
2047 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1913 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2048 encode_compound_hdr(&xdr, &hdr); 1914 encode_compound_hdr(&xdr, &hdr);
2049 status = encode_putfh(&xdr, fhandle); 1915 encode_putfh(&xdr, fhandle, &hdr);
2050 if (status == 0) 1916 encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
2051 status = encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| 1917 FATTR4_WORD0_LINK_SUPPORT|
2052 FATTR4_WORD0_LINK_SUPPORT| 1918 FATTR4_WORD0_SYMLINK_SUPPORT|
2053 FATTR4_WORD0_SYMLINK_SUPPORT| 1919 FATTR4_WORD0_ACLSUPPORT, &hdr);
2054 FATTR4_WORD0_ACLSUPPORT); 1920 encode_nops(&hdr);
2055 return status; 1921 return 0;
2056} 1922}
2057 1923
2058/* 1924/*
@@ -2062,12 +1928,14 @@ static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client
2062{ 1928{
2063 struct xdr_stream xdr; 1929 struct xdr_stream xdr;
2064 struct compound_hdr hdr = { 1930 struct compound_hdr hdr = {
2065 .nops = 1, 1931 .nops = 0,
2066 }; 1932 };
2067 1933
2068 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1934 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2069 encode_compound_hdr(&xdr, &hdr); 1935 encode_compound_hdr(&xdr, &hdr);
2070 return encode_renew(&xdr, clp); 1936 encode_renew(&xdr, clp, &hdr);
1937 encode_nops(&hdr);
1938 return 0;
2071} 1939}
2072 1940
2073/* 1941/*
@@ -2077,12 +1945,14 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4
2077{ 1945{
2078 struct xdr_stream xdr; 1946 struct xdr_stream xdr;
2079 struct compound_hdr hdr = { 1947 struct compound_hdr hdr = {
2080 .nops = 1, 1948 .nops = 0,
2081 }; 1949 };
2082 1950
2083 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1951 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2084 encode_compound_hdr(&xdr, &hdr); 1952 encode_compound_hdr(&xdr, &hdr);
2085 return encode_setclientid(&xdr, sc); 1953 encode_setclientid(&xdr, sc, &hdr);
1954 encode_nops(&hdr);
1955 return 0;
2086} 1956}
2087 1957
2088/* 1958/*
@@ -2092,19 +1962,17 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
2092{ 1962{
2093 struct xdr_stream xdr; 1963 struct xdr_stream xdr;
2094 struct compound_hdr hdr = { 1964 struct compound_hdr hdr = {
2095 .nops = 3, 1965 .nops = 0,
2096 }; 1966 };
2097 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; 1967 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
2098 int status;
2099 1968
2100 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1969 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2101 encode_compound_hdr(&xdr, &hdr); 1970 encode_compound_hdr(&xdr, &hdr);
2102 status = encode_setclientid_confirm(&xdr, clp); 1971 encode_setclientid_confirm(&xdr, clp, &hdr);
2103 if (!status) 1972 encode_putrootfh(&xdr, &hdr);
2104 status = encode_putrootfh(&xdr); 1973 encode_fsinfo(&xdr, lease_bitmap, &hdr);
2105 if (!status) 1974 encode_nops(&hdr);
2106 status = encode_fsinfo(&xdr, lease_bitmap); 1975 return 0;
2107 return status;
2108} 1976}
2109 1977
2110/* 1978/*
@@ -2114,21 +1982,16 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struc
2114{ 1982{
2115 struct xdr_stream xdr; 1983 struct xdr_stream xdr;
2116 struct compound_hdr hdr = { 1984 struct compound_hdr hdr = {
2117 .nops = 3, 1985 .nops = 0,
2118 }; 1986 };
2119 int status;
2120 1987
2121 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1988 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2122 encode_compound_hdr(&xdr, &hdr); 1989 encode_compound_hdr(&xdr, &hdr);
2123 status = encode_putfh(&xdr, args->fhandle); 1990 encode_putfh(&xdr, args->fhandle, &hdr);
2124 if (status != 0) 1991 encode_delegreturn(&xdr, args->stateid, &hdr);
2125 goto out; 1992 encode_getfattr(&xdr, args->bitmask, &hdr);
2126 status = encode_delegreturn(&xdr, args->stateid); 1993 encode_nops(&hdr);
2127 if (status != 0) 1994 return 0;
2128 goto out;
2129 status = encode_getfattr(&xdr, args->bitmask);
2130out:
2131 return status;
2132} 1995}
2133 1996
2134/* 1997/*
@@ -2138,20 +2001,17 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
2138{ 2001{
2139 struct xdr_stream xdr; 2002 struct xdr_stream xdr;
2140 struct compound_hdr hdr = { 2003 struct compound_hdr hdr = {
2141 .nops = 3, 2004 .nops = 0,
2142 }; 2005 };
2143 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 2006 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
2144 int replen; 2007 int replen;
2145 int status;
2146 2008
2147 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2009 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2148 encode_compound_hdr(&xdr, &hdr); 2010 encode_compound_hdr(&xdr, &hdr);
2149 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) 2011 encode_putfh(&xdr, args->dir_fh, &hdr);
2150 goto out; 2012 encode_lookup(&xdr, args->name, &hdr);
2151 if ((status = encode_lookup(&xdr, args->name)) != 0) 2013 encode_fs_locations(&xdr, args->bitmask, &hdr);
2152 goto out; 2014
2153 if ((status = encode_fs_locations(&xdr, args->bitmask)) != 0)
2154 goto out;
2155 /* set up reply 2015 /* set up reply
2156 * toplevel_status + OP_PUTFH + status 2016 * toplevel_status + OP_PUTFH + status
2157 * + OP_LOOKUP + status + OP_GETATTR + status = 7 2017 * + OP_LOOKUP + status + OP_GETATTR + status = 7
@@ -2159,8 +2019,8 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
2159 replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; 2019 replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2;
2160 xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page, 2020 xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page,
2161 0, PAGE_SIZE); 2021 0, PAGE_SIZE);
2162out: 2022 encode_nops(&hdr);
2163 return status; 2023 return 0;
2164} 2024}
2165 2025
2166/* 2026/*
@@ -2217,11 +2077,13 @@ static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
2217 READ_BUF(8); 2077 READ_BUF(8);
2218 READ32(hdr->status); 2078 READ32(hdr->status);
2219 READ32(hdr->taglen); 2079 READ32(hdr->taglen);
2220 2080
2221 READ_BUF(hdr->taglen + 4); 2081 READ_BUF(hdr->taglen + 4);
2222 hdr->tag = (char *)p; 2082 hdr->tag = (char *)p;
2223 p += XDR_QUADLEN(hdr->taglen); 2083 p += XDR_QUADLEN(hdr->taglen);
2224 READ32(hdr->nops); 2084 READ32(hdr->nops);
2085 if (unlikely(hdr->nops < 1))
2086 return nfs4_stat_to_errno(hdr->status);
2225 return 0; 2087 return 0;
2226} 2088}
2227 2089
@@ -3047,8 +2909,7 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
3047static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) 2909static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
3048{ 2910{
3049 __be32 *savep; 2911 __be32 *savep;
3050 uint32_t attrlen, 2912 uint32_t attrlen, bitmap[2] = {0};
3051 bitmap[2] = {0};
3052 int status; 2913 int status;
3053 2914
3054 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 2915 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
@@ -3070,14 +2931,13 @@ xdr_error:
3070 dprintk("%s: xdr returned %d!\n", __func__, -status); 2931 dprintk("%s: xdr returned %d!\n", __func__, -status);
3071 return status; 2932 return status;
3072} 2933}
3073 2934
3074static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) 2935static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
3075{ 2936{
3076 __be32 *savep; 2937 __be32 *savep;
3077 uint32_t attrlen, 2938 uint32_t attrlen, bitmap[2] = {0};
3078 bitmap[2] = {0};
3079 int status; 2939 int status;
3080 2940
3081 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 2941 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
3082 goto xdr_error; 2942 goto xdr_error;
3083 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) 2943 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
@@ -3107,10 +2967,9 @@ xdr_error:
3107static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) 2967static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
3108{ 2968{
3109 __be32 *savep; 2969 __be32 *savep;
3110 uint32_t attrlen, 2970 uint32_t attrlen, bitmap[2] = {0};
3111 bitmap[2] = {0};
3112 int status; 2971 int status;
3113 2972
3114 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 2973 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
3115 goto xdr_error; 2974 goto xdr_error;
3116 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) 2975 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
@@ -3256,7 +3115,7 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
3256static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 3115static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
3257{ 3116{
3258 int status; 3117 int status;
3259 3118
3260 status = decode_op_hdr(xdr, OP_LINK); 3119 status = decode_op_hdr(xdr, OP_LINK);
3261 if (status) 3120 if (status)
3262 return status; 3121 return status;
@@ -3344,27 +3203,27 @@ static int decode_lookup(struct xdr_stream *xdr)
3344/* This is too sick! */ 3203/* This is too sick! */
3345static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize) 3204static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
3346{ 3205{
3347 __be32 *p; 3206 __be32 *p;
3348 uint32_t limit_type, nblocks, blocksize; 3207 uint32_t limit_type, nblocks, blocksize;
3349 3208
3350 READ_BUF(12); 3209 READ_BUF(12);
3351 READ32(limit_type); 3210 READ32(limit_type);
3352 switch (limit_type) { 3211 switch (limit_type) {
3353 case 1: 3212 case 1:
3354 READ64(*maxsize); 3213 READ64(*maxsize);
3355 break; 3214 break;
3356 case 2: 3215 case 2:
3357 READ32(nblocks); 3216 READ32(nblocks);
3358 READ32(blocksize); 3217 READ32(blocksize);
3359 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; 3218 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
3360 } 3219 }
3361 return 0; 3220 return 0;
3362} 3221}
3363 3222
3364static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) 3223static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3365{ 3224{
3366 __be32 *p; 3225 __be32 *p;
3367 uint32_t delegation_type; 3226 uint32_t delegation_type;
3368 3227
3369 READ_BUF(4); 3228 READ_BUF(4);
3370 READ32(delegation_type); 3229 READ32(delegation_type);
@@ -3375,13 +3234,14 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3375 READ_BUF(NFS4_STATEID_SIZE+4); 3234 READ_BUF(NFS4_STATEID_SIZE+4);
3376 COPYMEM(res->delegation.data, NFS4_STATEID_SIZE); 3235 COPYMEM(res->delegation.data, NFS4_STATEID_SIZE);
3377 READ32(res->do_recall); 3236 READ32(res->do_recall);
3237
3378 switch (delegation_type) { 3238 switch (delegation_type) {
3379 case NFS4_OPEN_DELEGATE_READ: 3239 case NFS4_OPEN_DELEGATE_READ:
3380 res->delegation_type = FMODE_READ; 3240 res->delegation_type = FMODE_READ;
3381 break; 3241 break;
3382 case NFS4_OPEN_DELEGATE_WRITE: 3242 case NFS4_OPEN_DELEGATE_WRITE:
3383 res->delegation_type = FMODE_WRITE|FMODE_READ; 3243 res->delegation_type = FMODE_WRITE|FMODE_READ;
3384 if (decode_space_limit(xdr, &res->maxsize) < 0) 3244 if (decode_space_limit(xdr, &res->maxsize) < 0)
3385 return -EIO; 3245 return -EIO;
3386 } 3246 }
3387 return decode_ace(xdr, NULL, res->server->nfs_client); 3247 return decode_ace(xdr, NULL, res->server->nfs_client);
@@ -3389,27 +3249,27 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3389 3249
3390static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3250static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3391{ 3251{
3392 __be32 *p; 3252 __be32 *p;
3393 uint32_t savewords, bmlen, i; 3253 uint32_t savewords, bmlen, i;
3394 int status; 3254 int status;
3395 3255
3396 status = decode_op_hdr(xdr, OP_OPEN); 3256 status = decode_op_hdr(xdr, OP_OPEN);
3397 if (status != -EIO) 3257 if (status != -EIO)
3398 nfs_increment_open_seqid(status, res->seqid); 3258 nfs_increment_open_seqid(status, res->seqid);
3399 if (status) 3259 if (status)
3400 return status; 3260 return status;
3401 READ_BUF(NFS4_STATEID_SIZE); 3261 READ_BUF(NFS4_STATEID_SIZE);
3402 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3262 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3403 3263
3404 decode_change_info(xdr, &res->cinfo); 3264 decode_change_info(xdr, &res->cinfo);
3405 3265
3406 READ_BUF(8); 3266 READ_BUF(8);
3407 READ32(res->rflags); 3267 READ32(res->rflags);
3408 READ32(bmlen); 3268 READ32(bmlen);
3409 if (bmlen > 10) 3269 if (bmlen > 10)
3410 goto xdr_error; 3270 goto xdr_error;
3411 3271
3412 READ_BUF(bmlen << 2); 3272 READ_BUF(bmlen << 2);
3413 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); 3273 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
3414 for (i = 0; i < savewords; ++i) 3274 for (i = 0; i < savewords; ++i)
3415 READ32(res->attrset[i]); 3275 READ32(res->attrset[i]);
@@ -3424,17 +3284,17 @@ xdr_error:
3424 3284
3425static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) 3285static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
3426{ 3286{
3427 __be32 *p; 3287 __be32 *p;
3428 int status; 3288 int status;
3429 3289
3430 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); 3290 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
3431 if (status != -EIO) 3291 if (status != -EIO)
3432 nfs_increment_open_seqid(status, res->seqid); 3292 nfs_increment_open_seqid(status, res->seqid);
3433 if (status) 3293 if (status)
3434 return status; 3294 return status;
3435 READ_BUF(NFS4_STATEID_SIZE); 3295 READ_BUF(NFS4_STATEID_SIZE);
3436 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3296 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3437 return 0; 3297 return 0;
3438} 3298}
3439 3299
3440static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) 3300static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
@@ -3562,7 +3422,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3562 dprintk("NFS: readdir reply truncated!\n"); 3422 dprintk("NFS: readdir reply truncated!\n");
3563 entry[1] = 1; 3423 entry[1] = 1;
3564 } 3424 }
3565out: 3425out:
3566 kunmap_atomic(kaddr, KM_USER0); 3426 kunmap_atomic(kaddr, KM_USER0);
3567 return 0; 3427 return 0;
3568short_pkt: 3428short_pkt:
@@ -3718,7 +3578,6 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
3718 uint32_t bmlen; 3578 uint32_t bmlen;
3719 int status; 3579 int status;
3720 3580
3721
3722 status = decode_op_hdr(xdr, OP_SETATTR); 3581 status = decode_op_hdr(xdr, OP_SETATTR);
3723 if (status) 3582 if (status)
3724 return status; 3583 return status;
@@ -3738,7 +3597,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
3738 READ32(opnum); 3597 READ32(opnum);
3739 if (opnum != OP_SETCLIENTID) { 3598 if (opnum != OP_SETCLIENTID) {
3740 dprintk("nfs: decode_setclientid: Server returned operation" 3599 dprintk("nfs: decode_setclientid: Server returned operation"
3741 " %d\n", opnum); 3600 " %d\n", opnum);
3742 return -EIO; 3601 return -EIO;
3743 } 3602 }
3744 READ32(nfserr); 3603 READ32(nfserr);
@@ -3792,34 +3651,34 @@ static int decode_delegreturn(struct xdr_stream *xdr)
3792} 3651}
3793 3652
3794/* 3653/*
3654 * END OF "GENERIC" DECODE ROUTINES.
3655 */
3656
3657/*
3795 * Decode OPEN_DOWNGRADE response 3658 * Decode OPEN_DOWNGRADE response
3796 */ 3659 */
3797static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) 3660static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
3798{ 3661{
3799 struct xdr_stream xdr; 3662 struct xdr_stream xdr;
3800 struct compound_hdr hdr; 3663 struct compound_hdr hdr;
3801 int status; 3664 int status;
3802 3665
3803 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3666 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3804 status = decode_compound_hdr(&xdr, &hdr); 3667 status = decode_compound_hdr(&xdr, &hdr);
3805 if (status) 3668 if (status)
3806 goto out; 3669 goto out;
3807 status = decode_putfh(&xdr); 3670 status = decode_putfh(&xdr);
3808 if (status) 3671 if (status)
3809 goto out; 3672 goto out;
3810 status = decode_open_downgrade(&xdr, res); 3673 status = decode_open_downgrade(&xdr, res);
3811 if (status != 0) 3674 if (status != 0)
3812 goto out; 3675 goto out;
3813 decode_getfattr(&xdr, res->fattr, res->server); 3676 decode_getfattr(&xdr, res->fattr, res->server);
3814out: 3677out:
3815 return status; 3678 return status;
3816} 3679}
3817 3680
3818/* 3681/*
3819 * END OF "GENERIC" DECODE ROUTINES.
3820 */
3821
3822/*
3823 * Decode ACCESS response 3682 * Decode ACCESS response
3824 */ 3683 */
3825static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res) 3684static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res)
@@ -3827,7 +3686,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac
3827 struct xdr_stream xdr; 3686 struct xdr_stream xdr;
3828 struct compound_hdr hdr; 3687 struct compound_hdr hdr;
3829 int status; 3688 int status;
3830 3689
3831 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3690 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3832 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3691 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3833 goto out; 3692 goto out;
@@ -3850,7 +3709,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo
3850 struct xdr_stream xdr; 3709 struct xdr_stream xdr;
3851 struct compound_hdr hdr; 3710 struct compound_hdr hdr;
3852 int status; 3711 int status;
3853 3712
3854 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3713 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3855 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3714 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3856 goto out; 3715 goto out;
@@ -3873,7 +3732,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf
3873 struct xdr_stream xdr; 3732 struct xdr_stream xdr;
3874 struct compound_hdr hdr; 3733 struct compound_hdr hdr;
3875 int status; 3734 int status;
3876 3735
3877 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3736 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3878 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3737 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3879 goto out; 3738 goto out;
@@ -3893,7 +3752,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
3893 struct xdr_stream xdr; 3752 struct xdr_stream xdr;
3894 struct compound_hdr hdr; 3753 struct compound_hdr hdr;
3895 int status; 3754 int status;
3896 3755
3897 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3756 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3898 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3757 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3899 goto out; 3758 goto out;
@@ -3914,7 +3773,7 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re
3914 struct xdr_stream xdr; 3773 struct xdr_stream xdr;
3915 struct compound_hdr hdr; 3774 struct compound_hdr hdr;
3916 int status; 3775 int status;
3917 3776
3918 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3777 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3919 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3778 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3920 goto out; 3779 goto out;
@@ -3944,7 +3803,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link
3944 struct xdr_stream xdr; 3803 struct xdr_stream xdr;
3945 struct compound_hdr hdr; 3804 struct compound_hdr hdr;
3946 int status; 3805 int status;
3947 3806
3948 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3807 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3949 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3808 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3950 goto out; 3809 goto out;
@@ -3977,7 +3836,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr
3977 struct xdr_stream xdr; 3836 struct xdr_stream xdr;
3978 struct compound_hdr hdr; 3837 struct compound_hdr hdr;
3979 int status; 3838 int status;
3980 3839
3981 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3840 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3982 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3841 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3983 goto out; 3842 goto out;
@@ -4014,7 +3873,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g
4014 struct xdr_stream xdr; 3873 struct xdr_stream xdr;
4015 struct compound_hdr hdr; 3874 struct compound_hdr hdr;
4016 int status; 3875 int status;
4017 3876
4018 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3877 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4019 status = decode_compound_hdr(&xdr, &hdr); 3878 status = decode_compound_hdr(&xdr, &hdr);
4020 if (status) 3879 if (status)
@@ -4025,7 +3884,6 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g
4025 status = decode_getfattr(&xdr, res->fattr, res->server); 3884 status = decode_getfattr(&xdr, res->fattr, res->server);
4026out: 3885out:
4027 return status; 3886 return status;
4028
4029} 3887}
4030 3888
4031/* 3889/*
@@ -4034,21 +3892,20 @@ out:
4034static int 3892static int
4035nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args) 3893nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args)
4036{ 3894{
4037 struct xdr_stream xdr; 3895 struct xdr_stream xdr;
4038 struct compound_hdr hdr = { 3896 struct compound_hdr hdr = {
4039 .nops = 2, 3897 .nops = 0,
4040 }; 3898 };
4041 int status; 3899 int status;
4042 3900
4043 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 3901 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
4044 encode_compound_hdr(&xdr, &hdr); 3902 encode_compound_hdr(&xdr, &hdr);
4045 status = encode_putfh(&xdr, args->fh); 3903 encode_putfh(&xdr, args->fh, &hdr);
4046 if (status) 3904 status = encode_setacl(&xdr, args, &hdr);
4047 goto out; 3905 encode_nops(&hdr);
4048 status = encode_setacl(&xdr, args); 3906 return status;
4049out:
4050 return status;
4051} 3907}
3908
4052/* 3909/*
4053 * Decode SETACL response 3910 * Decode SETACL response
4054 */ 3911 */
@@ -4099,18 +3956,18 @@ out:
4099 */ 3956 */
4100static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) 3957static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
4101{ 3958{
4102 struct xdr_stream xdr; 3959 struct xdr_stream xdr;
4103 struct compound_hdr hdr; 3960 struct compound_hdr hdr;
4104 int status; 3961 int status;
4105 3962
4106 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3963 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4107 status = decode_compound_hdr(&xdr, &hdr); 3964 status = decode_compound_hdr(&xdr, &hdr);
4108 if (status) 3965 if (status)
4109 goto out; 3966 goto out;
4110 status = decode_putfh(&xdr); 3967 status = decode_putfh(&xdr);
4111 if (status) 3968 if (status)
4112 goto out; 3969 goto out;
4113 status = decode_close(&xdr, res); 3970 status = decode_close(&xdr, res);
4114 if (status != 0) 3971 if (status != 0)
4115 goto out; 3972 goto out;
4116 /* 3973 /*
@@ -4121,7 +3978,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
4121 */ 3978 */
4122 decode_getfattr(&xdr, res->fattr, res->server); 3979 decode_getfattr(&xdr, res->fattr, res->server);
4123out: 3980out:
4124 return status; 3981 return status;
4125} 3982}
4126 3983
4127/* 3984/*
@@ -4129,23 +3986,23 @@ out:
4129 */ 3986 */
4130static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) 3987static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
4131{ 3988{
4132 struct xdr_stream xdr; 3989 struct xdr_stream xdr;
4133 struct compound_hdr hdr; 3990 struct compound_hdr hdr;
4134 int status; 3991 int status;
4135 3992
4136 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3993 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4137 status = decode_compound_hdr(&xdr, &hdr); 3994 status = decode_compound_hdr(&xdr, &hdr);
4138 if (status) 3995 if (status)
4139 goto out; 3996 goto out;
4140 status = decode_putfh(&xdr); 3997 status = decode_putfh(&xdr);
4141 if (status) 3998 if (status)
4142 goto out; 3999 goto out;
4143 status = decode_savefh(&xdr); 4000 status = decode_savefh(&xdr);
4001 if (status)
4002 goto out;
4003 status = decode_open(&xdr, res);
4144 if (status) 4004 if (status)
4145 goto out; 4005 goto out;
4146 status = decode_open(&xdr, res);
4147 if (status)
4148 goto out;
4149 if (decode_getfh(&xdr, &res->fh) != 0) 4006 if (decode_getfh(&xdr, &res->fh) != 0)
4150 goto out; 4007 goto out;
4151 if (decode_getfattr(&xdr, res->f_attr, res->server) != 0) 4008 if (decode_getfattr(&xdr, res->f_attr, res->server) != 0)
@@ -4154,7 +4011,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr
4154 goto out; 4011 goto out;
4155 decode_getfattr(&xdr, res->dir_attr, res->server); 4012 decode_getfattr(&xdr, res->dir_attr, res->server);
4156out: 4013out:
4157 return status; 4014 return status;
4158} 4015}
4159 4016
4160/* 4017/*
@@ -4162,20 +4019,20 @@ out:
4162 */ 4019 */
4163static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res) 4020static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res)
4164{ 4021{
4165 struct xdr_stream xdr; 4022 struct xdr_stream xdr;
4166 struct compound_hdr hdr; 4023 struct compound_hdr hdr;
4167 int status; 4024 int status;
4168 4025
4169 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4026 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4170 status = decode_compound_hdr(&xdr, &hdr); 4027 status = decode_compound_hdr(&xdr, &hdr);
4171 if (status) 4028 if (status)
4172 goto out; 4029 goto out;
4173 status = decode_putfh(&xdr); 4030 status = decode_putfh(&xdr);
4174 if (status) 4031 if (status)
4175 goto out; 4032 goto out;
4176 status = decode_open_confirm(&xdr, res); 4033 status = decode_open_confirm(&xdr, res);
4177out: 4034out:
4178 return status; 4035 return status;
4179} 4036}
4180 4037
4181/* 4038/*
@@ -4183,23 +4040,23 @@ out:
4183 */ 4040 */
4184static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) 4041static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
4185{ 4042{
4186 struct xdr_stream xdr; 4043 struct xdr_stream xdr;
4187 struct compound_hdr hdr; 4044 struct compound_hdr hdr;
4188 int status; 4045 int status;
4189 4046
4190 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4047 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4191 status = decode_compound_hdr(&xdr, &hdr); 4048 status = decode_compound_hdr(&xdr, &hdr);
4192 if (status) 4049 if (status)
4193 goto out; 4050 goto out;
4194 status = decode_putfh(&xdr); 4051 status = decode_putfh(&xdr);
4195 if (status) 4052 if (status)
4196 goto out; 4053 goto out;
4197 status = decode_open(&xdr, res); 4054 status = decode_open(&xdr, res);
4198 if (status) 4055 if (status)
4199 goto out; 4056 goto out;
4200 decode_getfattr(&xdr, res->f_attr, res->server); 4057 decode_getfattr(&xdr, res->f_attr, res->server);
4201out: 4058out:
4202 return status; 4059 return status;
4203} 4060}
4204 4061
4205/* 4062/*
@@ -4207,25 +4064,25 @@ out:
4207 */ 4064 */
4208static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res) 4065static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res)
4209{ 4066{
4210 struct xdr_stream xdr; 4067 struct xdr_stream xdr;
4211 struct compound_hdr hdr; 4068 struct compound_hdr hdr;
4212 int status; 4069 int status;
4213 4070
4214 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4071 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4215 status = decode_compound_hdr(&xdr, &hdr); 4072 status = decode_compound_hdr(&xdr, &hdr);
4216 if (status) 4073 if (status)
4217 goto out; 4074 goto out;
4218 status = decode_putfh(&xdr); 4075 status = decode_putfh(&xdr);
4219 if (status) 4076 if (status)
4220 goto out; 4077 goto out;
4221 status = decode_setattr(&xdr, res); 4078 status = decode_setattr(&xdr, res);
4222 if (status) 4079 if (status)
4223 goto out; 4080 goto out;
4224 status = decode_getfattr(&xdr, res->fattr, res->server); 4081 status = decode_getfattr(&xdr, res->fattr, res->server);
4225 if (status == NFS4ERR_DELAY) 4082 if (status == NFS4ERR_DELAY)
4226 status = 0; 4083 status = 0;
4227out: 4084out:
4228 return status; 4085 return status;
4229} 4086}
4230 4087
4231/* 4088/*
@@ -4421,8 +4278,6 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf
4421 status = decode_putfh(&xdr); 4278 status = decode_putfh(&xdr);
4422 if (!status) 4279 if (!status)
4423 status = decode_fsinfo(&xdr, fsinfo); 4280 status = decode_fsinfo(&xdr, fsinfo);
4424 if (!status)
4425 status = nfs4_stat_to_errno(hdr.status);
4426 return status; 4281 return status;
4427} 4282}
4428 4283
@@ -4511,8 +4366,6 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
4511 status = decode_compound_hdr(&xdr, &hdr); 4366 status = decode_compound_hdr(&xdr, &hdr);
4512 if (!status) 4367 if (!status)
4513 status = decode_setclientid(&xdr, clp); 4368 status = decode_setclientid(&xdr, clp);
4514 if (!status)
4515 status = nfs4_stat_to_errno(hdr.status);
4516 return status; 4369 return status;
4517} 4370}
4518 4371
@@ -4533,8 +4386,6 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
4533 status = decode_putrootfh(&xdr); 4386 status = decode_putrootfh(&xdr);
4534 if (!status) 4387 if (!status)
4535 status = decode_fsinfo(&xdr, fsinfo); 4388 status = decode_fsinfo(&xdr, fsinfo);
4536 if (!status)
4537 status = nfs4_stat_to_errno(hdr.status);
4538 return status; 4389 return status;
4539} 4390}
4540 4391
@@ -4715,7 +4566,7 @@ nfs4_stat_to_errno(int stat)
4715 .p_replen = NFS4_##restype##_sz, \ 4566 .p_replen = NFS4_##restype##_sz, \
4716 .p_statidx = NFSPROC4_CLNT_##proc, \ 4567 .p_statidx = NFSPROC4_CLNT_##proc, \
4717 .p_name = #proc, \ 4568 .p_name = #proc, \
4718 } 4569}
4719 4570
4720struct rpc_procinfo nfs4_procedures[] = { 4571struct rpc_procinfo nfs4_procedures[] = {
4721 PROC(READ, enc_read, dec_read), 4572 PROC(READ, enc_read, dec_read),
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8478fc25dae..d9ef602fbc5 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,6 +86,8 @@
86#include <net/ipconfig.h> 86#include <net/ipconfig.h>
87#include <linux/parser.h> 87#include <linux/parser.h>
88 88
89#include "internal.h"
90
89/* Define this to allow debugging output */ 91/* Define this to allow debugging output */
90#undef NFSROOT_DEBUG 92#undef NFSROOT_DEBUG
91#define NFSDBG_FACILITY NFSDBG_ROOT 93#define NFSDBG_FACILITY NFSDBG_ROOT
@@ -100,7 +102,7 @@ static char nfs_root_name[256] __initdata = "";
100static __be32 servaddr __initdata = 0; 102static __be32 servaddr __initdata = 0;
101 103
102/* Name of directory to mount */ 104/* Name of directory to mount */
103static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, }; 105static char nfs_export_path[NFS_MAXPATHLEN] __initdata = { 0, };
104 106
105/* NFS-related data */ 107/* NFS-related data */
106static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */ 108static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
@@ -312,7 +314,7 @@ static int __init root_nfs_name(char *name)
312 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); 314 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
313 return -1; 315 return -1;
314 } 316 }
315 sprintf(nfs_path, buf, cp); 317 sprintf(nfs_export_path, buf, cp);
316 318
317 return 1; 319 return 1;
318} 320}
@@ -329,7 +331,7 @@ static int __init root_nfs_addr(void)
329 } 331 }
330 332
331 snprintf(nfs_data.hostname, sizeof(nfs_data.hostname), 333 snprintf(nfs_data.hostname, sizeof(nfs_data.hostname),
332 "%u.%u.%u.%u", NIPQUAD(servaddr)); 334 "%pI4", &servaddr);
333 return 0; 335 return 0;
334} 336}
335 337
@@ -340,7 +342,7 @@ static int __init root_nfs_addr(void)
340static void __init root_nfs_print(void) 342static void __init root_nfs_print(void)
341{ 343{
342 printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", 344 printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n",
343 nfs_path, nfs_data.hostname); 345 nfs_export_path, nfs_data.hostname);
344 printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n", 346 printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
345 nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans); 347 nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
346 printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n", 348 printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
@@ -421,8 +423,8 @@ static int __init root_nfs_getport(int program, int version, int proto)
421{ 423{
422 struct sockaddr_in sin; 424 struct sockaddr_in sin;
423 425
424 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", 426 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %pI4\n",
425 program, version, NIPQUAD(servaddr)); 427 program, version, &servaddr);
426 set_sockaddr(&sin, servaddr, 0); 428 set_sockaddr(&sin, servaddr, 0);
427 return rpcb_getport_sync(&sin, program, version, proto); 429 return rpcb_getport_sync(&sin, program, version, proto);
428} 430}
@@ -485,18 +487,23 @@ static int __init root_nfs_get_handle(void)
485{ 487{
486 struct nfs_fh fh; 488 struct nfs_fh fh;
487 struct sockaddr_in sin; 489 struct sockaddr_in sin;
490 struct nfs_mount_request request = {
491 .sap = (struct sockaddr *)&sin,
492 .salen = sizeof(sin),
493 .dirpath = nfs_export_path,
494 .version = (nfs_data.flags & NFS_MOUNT_VER3) ?
495 NFS_MNT3_VERSION : NFS_MNT_VERSION,
496 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
497 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
498 .fh = &fh,
499 };
488 int status; 500 int status;
489 int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
490 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP;
491 int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
492 NFS_MNT3_VERSION : NFS_MNT_VERSION;
493 501
494 set_sockaddr(&sin, servaddr, htons(mount_port)); 502 set_sockaddr(&sin, servaddr, htons(mount_port));
495 status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL, 503 status = nfs_mount(&request);
496 nfs_path, version, protocol, &fh);
497 if (status < 0) 504 if (status < 0)
498 printk(KERN_ERR "Root-NFS: Server returned error %d " 505 printk(KERN_ERR "Root-NFS: Server returned error %d "
499 "while mounting %s\n", status, nfs_path); 506 "while mounting %s\n", status, nfs_export_path);
500 else { 507 else {
501 nfs_data.root.size = fh.size; 508 nfs_data.root.size = fh.size;
502 memcpy(nfs_data.root.data, fh.data, fh.size); 509 memcpy(nfs_data.root.data, fh.data, fh.size);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 40d17987d0e..f856004bb7f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -533,12 +533,6 @@ readpage_async_filler(void *data, struct page *page)
533 unsigned int len; 533 unsigned int len;
534 int error; 534 int error;
535 535
536 error = nfs_wb_page(inode, page);
537 if (error)
538 goto out_unlock;
539 if (PageUptodate(page))
540 goto out_unlock;
541
542 len = nfs_page_length(page); 536 len = nfs_page_length(page);
543 if (len == 0) 537 if (len == 0)
544 return nfs_return_empty_page(page); 538 return nfs_return_empty_page(page);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f48db679a1c..d6686f4786d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -75,6 +75,7 @@ enum {
75 Opt_acl, Opt_noacl, 75 Opt_acl, Opt_noacl,
76 Opt_rdirplus, Opt_nordirplus, 76 Opt_rdirplus, Opt_nordirplus,
77 Opt_sharecache, Opt_nosharecache, 77 Opt_sharecache, Opt_nosharecache,
78 Opt_resvport, Opt_noresvport,
78 79
79 /* Mount options that take integer arguments */ 80 /* Mount options that take integer arguments */
80 Opt_port, 81 Opt_port,
@@ -129,6 +130,8 @@ static const match_table_t nfs_mount_option_tokens = {
129 { Opt_nordirplus, "nordirplus" }, 130 { Opt_nordirplus, "nordirplus" },
130 { Opt_sharecache, "sharecache" }, 131 { Opt_sharecache, "sharecache" },
131 { Opt_nosharecache, "nosharecache" }, 132 { Opt_nosharecache, "nosharecache" },
133 { Opt_resvport, "resvport" },
134 { Opt_noresvport, "noresvport" },
132 135
133 { Opt_port, "port=%u" }, 136 { Opt_port, "port=%u" },
134 { Opt_rsize, "rsize=%u" }, 137 { Opt_rsize, "rsize=%u" },
@@ -462,14 +465,12 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
462 switch (sap->sa_family) { 465 switch (sap->sa_family) {
463 case AF_INET: { 466 case AF_INET: {
464 struct sockaddr_in *sin = (struct sockaddr_in *)sap; 467 struct sockaddr_in *sin = (struct sockaddr_in *)sap;
465 seq_printf(m, ",mountaddr=" NIPQUAD_FMT, 468 seq_printf(m, ",mountaddr=%pI4", &sin->sin_addr.s_addr);
466 NIPQUAD(sin->sin_addr.s_addr));
467 break; 469 break;
468 } 470 }
469 case AF_INET6: { 471 case AF_INET6: {
470 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; 472 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
471 seq_printf(m, ",mountaddr=" NIP6_FMT, 473 seq_printf(m, ",mountaddr=%pI6", &sin6->sin6_addr);
472 NIP6(sin6->sin6_addr));
473 break; 474 break;
474 } 475 }
475 default: 476 default:
@@ -514,7 +515,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
514 { NFS_MOUNT_NONLM, ",nolock", "" }, 515 { NFS_MOUNT_NONLM, ",nolock", "" },
515 { NFS_MOUNT_NOACL, ",noacl", "" }, 516 { NFS_MOUNT_NOACL, ",noacl", "" },
516 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, 517 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
517 { NFS_MOUNT_UNSHARED, ",nosharecache", ""}, 518 { NFS_MOUNT_UNSHARED, ",nosharecache", "" },
519 { NFS_MOUNT_NORESVPORT, ",noresvport", "" },
518 { 0, NULL, NULL } 520 { 0, NULL, NULL }
519 }; 521 };
520 const struct proc_nfs_info *nfs_infop; 522 const struct proc_nfs_info *nfs_infop;
@@ -1035,6 +1037,12 @@ static int nfs_parse_mount_options(char *raw,
1035 case Opt_nosharecache: 1037 case Opt_nosharecache:
1036 mnt->flags |= NFS_MOUNT_UNSHARED; 1038 mnt->flags |= NFS_MOUNT_UNSHARED;
1037 break; 1039 break;
1040 case Opt_resvport:
1041 mnt->flags &= ~NFS_MOUNT_NORESVPORT;
1042 break;
1043 case Opt_noresvport:
1044 mnt->flags |= NFS_MOUNT_NORESVPORT;
1045 break;
1038 1046
1039 /* 1047 /*
1040 * options that take numeric values 1048 * options that take numeric values
@@ -1329,8 +1337,14 @@ out_security_failure:
1329static int nfs_try_mount(struct nfs_parsed_mount_data *args, 1337static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1330 struct nfs_fh *root_fh) 1338 struct nfs_fh *root_fh)
1331{ 1339{
1332 struct sockaddr *sap = (struct sockaddr *)&args->mount_server.address; 1340 struct nfs_mount_request request = {
1333 char *hostname; 1341 .sap = (struct sockaddr *)
1342 &args->mount_server.address,
1343 .dirpath = args->nfs_server.export_path,
1344 .protocol = args->mount_server.protocol,
1345 .fh = root_fh,
1346 .noresvport = args->flags & NFS_MOUNT_NORESVPORT,
1347 };
1334 int status; 1348 int status;
1335 1349
1336 if (args->mount_server.version == 0) { 1350 if (args->mount_server.version == 0) {
@@ -1339,42 +1353,38 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1339 else 1353 else
1340 args->mount_server.version = NFS_MNT_VERSION; 1354 args->mount_server.version = NFS_MNT_VERSION;
1341 } 1355 }
1356 request.version = args->mount_server.version;
1342 1357
1343 if (args->mount_server.hostname) 1358 if (args->mount_server.hostname)
1344 hostname = args->mount_server.hostname; 1359 request.hostname = args->mount_server.hostname;
1345 else 1360 else
1346 hostname = args->nfs_server.hostname; 1361 request.hostname = args->nfs_server.hostname;
1347 1362
1348 /* 1363 /*
1349 * Construct the mount server's address. 1364 * Construct the mount server's address.
1350 */ 1365 */
1351 if (args->mount_server.address.ss_family == AF_UNSPEC) { 1366 if (args->mount_server.address.ss_family == AF_UNSPEC) {
1352 memcpy(sap, &args->nfs_server.address, 1367 memcpy(request.sap, &args->nfs_server.address,
1353 args->nfs_server.addrlen); 1368 args->nfs_server.addrlen);
1354 args->mount_server.addrlen = args->nfs_server.addrlen; 1369 args->mount_server.addrlen = args->nfs_server.addrlen;
1355 } 1370 }
1371 request.salen = args->mount_server.addrlen;
1356 1372
1357 /* 1373 /*
1358 * autobind will be used if mount_server.port == 0 1374 * autobind will be used if mount_server.port == 0
1359 */ 1375 */
1360 nfs_set_port(sap, args->mount_server.port); 1376 nfs_set_port(request.sap, args->mount_server.port);
1361 1377
1362 /* 1378 /*
1363 * Now ask the mount server to map our export path 1379 * Now ask the mount server to map our export path
1364 * to a file handle. 1380 * to a file handle.
1365 */ 1381 */
1366 status = nfs_mount(sap, 1382 status = nfs_mount(&request);
1367 args->mount_server.addrlen,
1368 hostname,
1369 args->nfs_server.export_path,
1370 args->mount_server.version,
1371 args->mount_server.protocol,
1372 root_fh);
1373 if (status == 0) 1383 if (status == 0)
1374 return 0; 1384 return 0;
1375 1385
1376 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", 1386 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
1377 hostname, status); 1387 request.hostname, status);
1378 return status; 1388 return status;
1379} 1389}
1380 1390
@@ -2421,7 +2431,7 @@ static void nfs4_kill_super(struct super_block *sb)
2421{ 2431{
2422 struct nfs_server *server = NFS_SB(sb); 2432 struct nfs_server *server = NFS_SB(sb);
2423 2433
2424 nfs_return_all_delegations(sb); 2434 nfs_super_return_all_delegations(sb);
2425 kill_anon_super(sb); 2435 kill_anon_super(sb);
2426 2436
2427 nfs4_renewd_prepare_shutdown(server); 2437 nfs4_renewd_prepare_shutdown(server);
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index c11f5375d7c..04133aacb1e 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -29,8 +29,8 @@
29 29
30MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
31 31
32EXPORT_SYMBOL(nfsacl_encode); 32EXPORT_SYMBOL_GPL(nfsacl_encode);
33EXPORT_SYMBOL(nfsacl_decode); 33EXPORT_SYMBOL_GPL(nfsacl_decode);
34 34
35struct nfsacl_encode_desc { 35struct nfsacl_encode_desc {
36 struct xdr_array2_desc desc; 36 struct xdr_array2_desc desc;
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index aed8145d908..b27451909df 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -10,6 +10,8 @@
10#include <linux/sunrpc/svc.h> 10#include <linux/sunrpc/svc.h>
11#include <linux/nfsd/nfsd.h> 11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/syscall.h> 12#include <linux/nfsd/syscall.h>
13#include <linux/cred.h>
14#include <linux/sched.h>
13#include <linux/linkage.h> 15#include <linux/linkage.h>
14#include <linux/namei.h> 16#include <linux/namei.h>
15#include <linux/mount.h> 17#include <linux/mount.h>
@@ -36,12 +38,14 @@ static struct file *do_open(char *name, int flags)
36 return ERR_PTR(error); 38 return ERR_PTR(error);
37 39
38 if (flags == O_RDWR) 40 if (flags == O_RDWR)
39 error = may_open(&nd,MAY_READ|MAY_WRITE,FMODE_READ|FMODE_WRITE); 41 error = may_open(&nd.path, MAY_READ|MAY_WRITE,
42 FMODE_READ|FMODE_WRITE);
40 else 43 else
41 error = may_open(&nd, MAY_WRITE, FMODE_WRITE); 44 error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE);
42 45
43 if (!error) 46 if (!error)
44 return dentry_open(nd.path.dentry, nd.path.mnt, flags); 47 return dentry_open(nd.path.dentry, nd.path.mnt, flags,
48 current_cred());
45 49
46 path_put(&nd.path); 50 path_put(&nd.path);
47 return ERR_PTR(error); 51 return ERR_PTR(error);
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 294992e9bf6..0184fe9b514 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -27,53 +27,70 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
27 27
28int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) 28int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
29{ 29{
30 struct svc_cred cred = rqstp->rq_cred; 30 struct group_info *rqgi;
31 struct group_info *gi;
32 struct cred *new;
31 int i; 33 int i;
32 int flags = nfsexp_flags(rqstp, exp); 34 int flags = nfsexp_flags(rqstp, exp);
33 int ret; 35 int ret;
34 36
37 /* discard any old override before preparing the new set */
38 revert_creds(get_cred(current->real_cred));
39 new = prepare_creds();
40 if (!new)
41 return -ENOMEM;
42
43 new->fsuid = rqstp->rq_cred.cr_uid;
44 new->fsgid = rqstp->rq_cred.cr_gid;
45
46 rqgi = rqstp->rq_cred.cr_group_info;
47
35 if (flags & NFSEXP_ALLSQUASH) { 48 if (flags & NFSEXP_ALLSQUASH) {
36 cred.cr_uid = exp->ex_anon_uid; 49 new->fsuid = exp->ex_anon_uid;
37 cred.cr_gid = exp->ex_anon_gid; 50 new->fsgid = exp->ex_anon_gid;
38 cred.cr_group_info = groups_alloc(0); 51 gi = groups_alloc(0);
39 } else if (flags & NFSEXP_ROOTSQUASH) { 52 } else if (flags & NFSEXP_ROOTSQUASH) {
40 struct group_info *gi; 53 if (!new->fsuid)
41 if (!cred.cr_uid) 54 new->fsuid = exp->ex_anon_uid;
42 cred.cr_uid = exp->ex_anon_uid; 55 if (!new->fsgid)
43 if (!cred.cr_gid) 56 new->fsgid = exp->ex_anon_gid;
44 cred.cr_gid = exp->ex_anon_gid;
45 gi = groups_alloc(cred.cr_group_info->ngroups);
46 if (gi)
47 for (i = 0; i < cred.cr_group_info->ngroups; i++) {
48 if (!GROUP_AT(cred.cr_group_info, i))
49 GROUP_AT(gi, i) = exp->ex_anon_gid;
50 else
51 GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i);
52 }
53 cred.cr_group_info = gi;
54 } else
55 get_group_info(cred.cr_group_info);
56
57 if (cred.cr_uid != (uid_t) -1)
58 current->fsuid = cred.cr_uid;
59 else
60 current->fsuid = exp->ex_anon_uid;
61 if (cred.cr_gid != (gid_t) -1)
62 current->fsgid = cred.cr_gid;
63 else
64 current->fsgid = exp->ex_anon_gid;
65 57
66 if (!cred.cr_group_info) 58 gi = groups_alloc(rqgi->ngroups);
67 return -ENOMEM; 59 if (!gi)
68 ret = set_current_groups(cred.cr_group_info); 60 goto oom;
69 put_group_info(cred.cr_group_info); 61
70 if ((cred.cr_uid)) { 62 for (i = 0; i < rqgi->ngroups; i++) {
71 current->cap_effective = 63 if (!GROUP_AT(rqgi, i))
72 cap_drop_nfsd_set(current->cap_effective); 64 GROUP_AT(gi, i) = exp->ex_anon_gid;
65 else
66 GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
67 }
73 } else { 68 } else {
74 current->cap_effective = 69 gi = get_group_info(rqgi);
75 cap_raise_nfsd_set(current->cap_effective,
76 current->cap_permitted);
77 } 70 }
71
72 if (new->fsuid == (uid_t) -1)
73 new->fsuid = exp->ex_anon_uid;
74 if (new->fsgid == (gid_t) -1)
75 new->fsgid = exp->ex_anon_gid;
76
77 ret = set_groups(new, gi);
78 put_group_info(gi);
79 if (!ret)
80 goto error;
81
82 if (new->uid)
83 new->cap_effective = cap_drop_nfsd_set(new->cap_effective);
84 else
85 new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
86 new->cap_permitted);
87 put_cred(override_creds(new));
88 return 0;
89
90oom:
91 ret = -ENOMEM;
92error:
93 abort_creds(new);
78 return ret; 94 return ret;
79} 95}
96
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 094747a1227..6d7d8c02c19 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -358,6 +358,7 @@ static struct rpc_program cb_program = {
358 .nrvers = ARRAY_SIZE(nfs_cb_version), 358 .nrvers = ARRAY_SIZE(nfs_cb_version),
359 .version = nfs_cb_version, 359 .version = nfs_cb_version,
360 .stats = &cb_stats, 360 .stats = &cb_stats,
361 .pipe_dir_name = "/nfsd4_cb",
361}; 362};
362 363
363/* Reference counting, callback cleanup, etc., all look racy as heck. 364/* Reference counting, callback cleanup, etc., all look racy as heck.
@@ -382,8 +383,9 @@ static int do_probe_callback(void *data)
382 .program = &cb_program, 383 .program = &cb_program,
383 .prognumber = cb->cb_prog, 384 .prognumber = cb->cb_prog,
384 .version = nfs_cb_version[1]->number, 385 .version = nfs_cb_version[1]->number,
385 .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ 386 .authflavor = clp->cl_flavor,
386 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 387 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
388 .client_name = clp->cl_principal,
387 }; 389 };
388 struct rpc_message msg = { 390 struct rpc_message msg = {
389 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 391 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
@@ -392,6 +394,11 @@ static int do_probe_callback(void *data)
392 struct rpc_clnt *client; 394 struct rpc_clnt *client;
393 int status; 395 int status;
394 396
397 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) {
398 status = nfserr_cb_path_down;
399 goto out_err;
400 }
401
395 /* Initialize address */ 402 /* Initialize address */
396 memset(&addr, 0, sizeof(addr)); 403 memset(&addr, 0, sizeof(addr));
397 addr.sin_family = AF_INET; 404 addr.sin_family = AF_INET;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b79ec930d9f..0f9d6efaa62 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -54,20 +54,26 @@
54static struct path rec_dir; 54static struct path rec_dir;
55static int rec_dir_init = 0; 55static int rec_dir_init = 0;
56 56
57static void 57static int
58nfs4_save_user(uid_t *saveuid, gid_t *savegid) 58nfs4_save_creds(const struct cred **original_creds)
59{ 59{
60 *saveuid = current->fsuid; 60 struct cred *new;
61 *savegid = current->fsgid; 61
62 current->fsuid = 0; 62 new = prepare_creds();
63 current->fsgid = 0; 63 if (!new)
64 return -ENOMEM;
65
66 new->fsuid = 0;
67 new->fsgid = 0;
68 *original_creds = override_creds(new);
69 put_cred(new);
70 return 0;
64} 71}
65 72
66static void 73static void
67nfs4_reset_user(uid_t saveuid, gid_t savegid) 74nfs4_reset_creds(const struct cred *original)
68{ 75{
69 current->fsuid = saveuid; 76 revert_creds(original);
70 current->fsgid = savegid;
71} 77}
72 78
73static void 79static void
@@ -129,10 +135,9 @@ nfsd4_sync_rec_dir(void)
129int 135int
130nfsd4_create_clid_dir(struct nfs4_client *clp) 136nfsd4_create_clid_dir(struct nfs4_client *clp)
131{ 137{
138 const struct cred *original_cred;
132 char *dname = clp->cl_recdir; 139 char *dname = clp->cl_recdir;
133 struct dentry *dentry; 140 struct dentry *dentry;
134 uid_t uid;
135 gid_t gid;
136 int status; 141 int status;
137 142
138 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); 143 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
@@ -140,7 +145,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
140 if (!rec_dir_init || clp->cl_firststate) 145 if (!rec_dir_init || clp->cl_firststate)
141 return 0; 146 return 0;
142 147
143 nfs4_save_user(&uid, &gid); 148 status = nfs4_save_creds(&original_cred);
149 if (status < 0)
150 return status;
144 151
145 /* lock the parent */ 152 /* lock the parent */
146 mutex_lock(&rec_dir.dentry->d_inode->i_mutex); 153 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
@@ -168,7 +175,7 @@ out_unlock:
168 clp->cl_firststate = 1; 175 clp->cl_firststate = 1;
169 nfsd4_sync_rec_dir(); 176 nfsd4_sync_rec_dir();
170 } 177 }
171 nfs4_reset_user(uid, gid); 178 nfs4_reset_creds(original_cred);
172 dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); 179 dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
173 return status; 180 return status;
174} 181}
@@ -211,26 +218,29 @@ nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
211static int 218static int
212nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) 219nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
213{ 220{
221 const struct cred *original_cred;
214 struct file *filp; 222 struct file *filp;
215 struct dentry_list_arg dla = { 223 struct dentry_list_arg dla = {
216 .parent = dir, 224 .parent = dir,
217 }; 225 };
218 struct list_head *dentries = &dla.dentries; 226 struct list_head *dentries = &dla.dentries;
219 struct dentry_list *child; 227 struct dentry_list *child;
220 uid_t uid;
221 gid_t gid;
222 int status; 228 int status;
223 229
224 if (!rec_dir_init) 230 if (!rec_dir_init)
225 return 0; 231 return 0;
226 232
227 nfs4_save_user(&uid, &gid); 233 status = nfs4_save_creds(&original_cred);
234 if (status < 0)
235 return status;
228 INIT_LIST_HEAD(dentries); 236 INIT_LIST_HEAD(dentries);
229 237
230 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY); 238 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY,
239 current_cred());
231 status = PTR_ERR(filp); 240 status = PTR_ERR(filp);
232 if (IS_ERR(filp)) 241 if (IS_ERR(filp))
233 goto out; 242 goto out;
243 INIT_LIST_HEAD(dentries);
234 status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla); 244 status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
235 fput(filp); 245 fput(filp);
236 while (!list_empty(dentries)) { 246 while (!list_empty(dentries)) {
@@ -249,7 +259,7 @@ out:
249 dput(child->dentry); 259 dput(child->dentry);
250 kfree(child); 260 kfree(child);
251 } 261 }
252 nfs4_reset_user(uid, gid); 262 nfs4_reset_creds(original_cred);
253 return status; 263 return status;
254} 264}
255 265
@@ -311,8 +321,7 @@ out:
311void 321void
312nfsd4_remove_clid_dir(struct nfs4_client *clp) 322nfsd4_remove_clid_dir(struct nfs4_client *clp)
313{ 323{
314 uid_t uid; 324 const struct cred *original_cred;
315 gid_t gid;
316 int status; 325 int status;
317 326
318 if (!rec_dir_init || !clp->cl_firststate) 327 if (!rec_dir_init || !clp->cl_firststate)
@@ -322,9 +331,13 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
322 if (status) 331 if (status)
323 goto out; 332 goto out;
324 clp->cl_firststate = 0; 333 clp->cl_firststate = 0;
325 nfs4_save_user(&uid, &gid); 334
335 status = nfs4_save_creds(&original_cred);
336 if (status < 0)
337 goto out;
338
326 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); 339 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
327 nfs4_reset_user(uid, gid); 340 nfs4_reset_creds(original_cred);
328 if (status == 0) 341 if (status == 0)
329 nfsd4_sync_rec_dir(); 342 nfsd4_sync_rec_dir();
330 mnt_drop_write(rec_dir.mnt); 343 mnt_drop_write(rec_dir.mnt);
@@ -401,16 +414,21 @@ nfsd4_recdir_load(void) {
401void 414void
402nfsd4_init_recdir(char *rec_dirname) 415nfsd4_init_recdir(char *rec_dirname)
403{ 416{
404 uid_t uid = 0; 417 const struct cred *original_cred;
405 gid_t gid = 0; 418 int status;
406 int status;
407 419
408 printk("NFSD: Using %s as the NFSv4 state recovery directory\n", 420 printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
409 rec_dirname); 421 rec_dirname);
410 422
411 BUG_ON(rec_dir_init); 423 BUG_ON(rec_dir_init);
412 424
413 nfs4_save_user(&uid, &gid); 425 status = nfs4_save_creds(&original_cred);
426 if (status < 0) {
427 printk("NFSD: Unable to change credentials to find recovery"
428 " directory: error %d\n",
429 status);
430 return;
431 }
414 432
415 status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, 433 status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
416 &rec_dir); 434 &rec_dir);
@@ -420,7 +438,7 @@ nfsd4_init_recdir(char *rec_dirname)
420 438
421 if (!status) 439 if (!status)
422 rec_dir_init = 1; 440 rec_dir_init = 1;
423 nfs4_reset_user(uid, gid); 441 nfs4_reset_creds(original_cred);
424} 442}
425 443
426void 444void
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1a052ac2bde..13e0e074dbb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -54,6 +54,7 @@
54#include <linux/mutex.h> 54#include <linux/mutex.h>
55#include <linux/lockd/bind.h> 55#include <linux/lockd/bind.h>
56#include <linux/module.h> 56#include <linux/module.h>
57#include <linux/sunrpc/svcauth_gss.h>
57 58
58#define NFSDDBG_FACILITY NFSDDBG_PROC 59#define NFSDDBG_FACILITY NFSDDBG_PROC
59 60
@@ -377,6 +378,7 @@ free_client(struct nfs4_client *clp)
377 shutdown_callback_client(clp); 378 shutdown_callback_client(clp);
378 if (clp->cl_cred.cr_group_info) 379 if (clp->cl_cred.cr_group_info)
379 put_group_info(clp->cl_cred.cr_group_info); 380 put_group_info(clp->cl_cred.cr_group_info);
381 kfree(clp->cl_principal);
380 kfree(clp->cl_name.data); 382 kfree(clp->cl_name.data);
381 kfree(clp); 383 kfree(clp);
382} 384}
@@ -696,6 +698,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
696 unsigned int strhashval; 698 unsigned int strhashval;
697 struct nfs4_client *conf, *unconf, *new; 699 struct nfs4_client *conf, *unconf, *new;
698 __be32 status; 700 __be32 status;
701 char *princ;
699 char dname[HEXDIR_LEN]; 702 char dname[HEXDIR_LEN];
700 703
701 if (!check_name(clname)) 704 if (!check_name(clname))
@@ -719,8 +722,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
719 status = nfserr_clid_inuse; 722 status = nfserr_clid_inuse;
720 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) 723 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)
721 || conf->cl_addr != sin->sin_addr.s_addr) { 724 || conf->cl_addr != sin->sin_addr.s_addr) {
722 dprintk("NFSD: setclientid: string in use by client" 725 dprintk("NFSD: setclientid: string in use by clientat %pI4\n",
723 "at %u.%u.%u.%u\n", NIPQUAD(conf->cl_addr)); 726 &conf->cl_addr);
724 goto out; 727 goto out;
725 } 728 }
726 } 729 }
@@ -783,6 +786,15 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
783 } 786 }
784 copy_verf(new, &clverifier); 787 copy_verf(new, &clverifier);
785 new->cl_addr = sin->sin_addr.s_addr; 788 new->cl_addr = sin->sin_addr.s_addr;
789 new->cl_flavor = rqstp->rq_flavor;
790 princ = svc_gss_principal(rqstp);
791 if (princ) {
792 new->cl_principal = kstrdup(princ, GFP_KERNEL);
793 if (new->cl_principal == NULL) {
794 free_client(new);
795 goto out;
796 }
797 }
786 copy_cred(&new->cl_cred, &rqstp->rq_cred); 798 copy_cred(&new->cl_cred, &rqstp->rq_cred);
787 gen_confirm(new); 799 gen_confirm(new);
788 gen_callback(new, setclid); 800 gen_callback(new, setclid);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e3f9783fdcf..77d7b8c531a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -330,7 +330,7 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
330 return -EINVAL; 330 return -EINVAL;
331 331
332 /* get ipv4 address */ 332 /* get ipv4 address */
333 if (sscanf(fo_path, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) != 4) 333 if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
334 return -EINVAL; 334 return -EINVAL;
335 if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255) 335 if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
336 return -EINVAL; 336 return -EINVAL;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index cd25d91895a..f0da7d9c3a9 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -186,9 +186,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
186 * access control settings being in effect, we cannot 186 * access control settings being in effect, we cannot
187 * fix that case easily. 187 * fix that case easily.
188 */ 188 */
189 current->cap_effective = 189 struct cred *new = prepare_creds();
190 cap_raise_nfsd_set(current->cap_effective, 190 if (!new)
191 current->cap_permitted); 191 return nfserrno(-ENOMEM);
192 new->cap_effective =
193 cap_raise_nfsd_set(new->cap_effective,
194 new->cap_permitted);
195 put_cred(override_creds(new));
196 put_cred(new);
192 } else { 197 } else {
193 error = nfsd_setuser_and_check_port(rqstp, exp); 198 error = nfsd_setuser_and_check_port(rqstp, exp);
194 if (error) 199 if (error)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4433c8f0016..d1c5f787b36 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -671,6 +671,7 @@ __be32
671nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 671nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
672 int access, struct file **filp) 672 int access, struct file **filp)
673{ 673{
674 const struct cred *cred = current_cred();
674 struct dentry *dentry; 675 struct dentry *dentry;
675 struct inode *inode; 676 struct inode *inode;
676 int flags = O_RDONLY|O_LARGEFILE; 677 int flags = O_RDONLY|O_LARGEFILE;
@@ -725,7 +726,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
725 DQUOT_INIT(inode); 726 DQUOT_INIT(inode);
726 } 727 }
727 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), 728 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
728 flags); 729 flags, cred);
729 if (IS_ERR(*filp)) 730 if (IS_ERR(*filp))
730 host_err = PTR_ERR(*filp); 731 host_err = PTR_ERR(*filp);
731out_nfserr: 732out_nfserr:
@@ -1169,7 +1170,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
1169 * send along the gid on create when it tries to implement 1170 * send along the gid on create when it tries to implement
1170 * setgid directories via NFS: 1171 * setgid directories via NFS:
1171 */ 1172 */
1172 if (current->fsuid != 0) 1173 if (current_fsuid() != 0)
1173 iap->ia_valid &= ~(ATTR_UID|ATTR_GID); 1174 iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
1174 if (iap->ia_valid) 1175 if (iap->ia_valid)
1175 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1176 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
@@ -2001,7 +2002,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2001 IS_APPEND(inode)? " append" : "", 2002 IS_APPEND(inode)? " append" : "",
2002 __mnt_is_readonly(exp->ex_path.mnt)? " ro" : ""); 2003 __mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
2003 dprintk(" owner %d/%d user %d/%d\n", 2004 dprintk(" owner %d/%d user %d/%d\n",
2004 inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); 2005 inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid());
2005#endif 2006#endif
2006 2007
2007 /* Normally we reject any write/sattr etc access on a read-only file 2008 /* Normally we reject any write/sattr etc access on a read-only file
@@ -2044,7 +2045,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2044 * with NFSv3. 2045 * with NFSv3.
2045 */ 2046 */
2046 if ((acc & NFSD_MAY_OWNER_OVERRIDE) && 2047 if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
2047 inode->i_uid == current->fsuid) 2048 inode->i_uid == current_fsuid())
2048 return 0; 2049 return 0;
2049 2050
2050 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ 2051 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
new file mode 100644
index 00000000000..50914d7303c
--- /dev/null
+++ b/fs/notify/Kconfig
@@ -0,0 +1,2 @@
1source "fs/notify/dnotify/Kconfig"
2source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
new file mode 100644
index 00000000000..5a95b6010ce
--- /dev/null
+++ b/fs/notify/Makefile
@@ -0,0 +1,2 @@
1obj-y += dnotify/
2obj-y += inotify/
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig
new file mode 100644
index 00000000000..26adf5dfa64
--- /dev/null
+++ b/fs/notify/dnotify/Kconfig
@@ -0,0 +1,10 @@
1config DNOTIFY
2 bool "Dnotify support"
3 default y
4 help
5 Dnotify is a directory-based per-fd file change notification system
6 that uses signals to communicate events to user-space. There exist
7 superior alternatives, but some applications may still rely on
8 dnotify.
9
10 If unsure, say Y.
diff --git a/fs/notify/dnotify/Makefile b/fs/notify/dnotify/Makefile
new file mode 100644
index 00000000000..f145251dcad
--- /dev/null
+++ b/fs/notify/dnotify/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_DNOTIFY) += dnotify.o
diff --git a/fs/dnotify.c b/fs/notify/dnotify/dnotify.c
index 676073b8dda..b0aa2cde80b 100644
--- a/fs/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -115,9 +115,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
115 dn->dn_next = inode->i_dnotify; 115 dn->dn_next = inode->i_dnotify;
116 inode->i_dnotify = dn; 116 inode->i_dnotify = dn;
117 spin_unlock(&inode->i_lock); 117 spin_unlock(&inode->i_lock);
118
119 if (filp->f_op && filp->f_op->dir_notify)
120 return filp->f_op->dir_notify(filp, arg);
121 return 0; 118 return 0;
122 119
123out_free: 120out_free:
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
new file mode 100644
index 00000000000..44679284102
--- /dev/null
+++ b/fs/notify/inotify/Kconfig
@@ -0,0 +1,27 @@
1config INOTIFY
2 bool "Inotify file change notification support"
3 default y
4 ---help---
5 Say Y here to enable inotify support. Inotify is a file change
6 notification system and a replacement for dnotify. Inotify fixes
7 numerous shortcomings in dnotify and introduces several new features
8 including multiple file events, one-shot support, and unmount
9 notification.
10
11 For more information, see <file:Documentation/filesystems/inotify.txt>
12
13 If unsure, say Y.
14
15config INOTIFY_USER
16 bool "Inotify support for userspace"
17 depends on INOTIFY
18 default y
19 ---help---
20 Say Y here to enable inotify support for userspace, including the
21 associated system calls. Inotify allows monitoring of both files and
22 directories via a single open fd. Events are read from the file
23 descriptor, which is also select()- and poll()-able.
24
25 For more information, see <file:Documentation/filesystems/inotify.txt>
26
27 If unsure, say Y.
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
new file mode 100644
index 00000000000..e290f3bb9d8
--- /dev/null
+++ b/fs/notify/inotify/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_INOTIFY) += inotify.o
2obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
diff --git a/fs/inotify.c b/fs/notify/inotify/inotify.c
index 7bbed1b8982..dae3f28f30d 100644
--- a/fs/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -428,11 +428,13 @@ void inotify_unmount_inodes(struct list_head *list)
428 watches = &inode->inotify_watches; 428 watches = &inode->inotify_watches;
429 list_for_each_entry_safe(watch, next_w, watches, i_list) { 429 list_for_each_entry_safe(watch, next_w, watches, i_list) {
430 struct inotify_handle *ih= watch->ih; 430 struct inotify_handle *ih= watch->ih;
431 get_inotify_watch(watch);
431 mutex_lock(&ih->mutex); 432 mutex_lock(&ih->mutex);
432 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, 433 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
433 NULL, NULL); 434 NULL, NULL);
434 inotify_remove_watch_locked(ih, watch); 435 inotify_remove_watch_locked(ih, watch);
435 mutex_unlock(&ih->mutex); 436 mutex_unlock(&ih->mutex);
437 put_inotify_watch(watch);
436 } 438 }
437 mutex_unlock(&inode->inotify_mutex); 439 mutex_unlock(&inode->inotify_mutex);
438 iput(inode); 440 iput(inode);
diff --git a/fs/inotify_user.c b/fs/notify/inotify/inotify_user.c
index d367e9b9286..400f8064a54 100644
--- a/fs/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -76,10 +76,10 @@ struct inotify_device {
76 struct mutex ev_mutex; /* protects event queue */ 76 struct mutex ev_mutex; /* protects event queue */
77 struct mutex up_mutex; /* synchronizes watch updates */ 77 struct mutex up_mutex; /* synchronizes watch updates */
78 struct list_head events; /* list of queued events */ 78 struct list_head events; /* list of queued events */
79 atomic_t count; /* reference count */
80 struct user_struct *user; /* user who opened this dev */ 79 struct user_struct *user; /* user who opened this dev */
81 struct inotify_handle *ih; /* inotify handle */ 80 struct inotify_handle *ih; /* inotify handle */
82 struct fasync_struct *fa; /* async notification */ 81 struct fasync_struct *fa; /* async notification */
82 atomic_t count; /* reference count */
83 unsigned int queue_size; /* size of the queue (bytes) */ 83 unsigned int queue_size; /* size of the queue (bytes) */
84 unsigned int event_count; /* number of pending events */ 84 unsigned int event_count; /* number of pending events */
85 unsigned int max_events; /* maximum number of events */ 85 unsigned int max_events; /* maximum number of events */
@@ -601,7 +601,7 @@ asmlinkage long sys_inotify_init1(int flags)
601 goto out_put_fd; 601 goto out_put_fd;
602 } 602 }
603 603
604 user = get_uid(current->user); 604 user = get_current_user();
605 if (unlikely(atomic_read(&user->inotify_devs) >= 605 if (unlikely(atomic_read(&user->inotify_devs) >=
606 inotify_max_user_instances)) { 606 inotify_max_user_instances)) {
607 ret = -EMFILE; 607 ret = -EMFILE;
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 52276c02f71..f8424874fa0 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -304,8 +304,8 @@ static int sc_seq_show(struct seq_file *seq, void *v)
304 * use of it here generates a warning with -Wbitwise */ 304 * use of it here generates a warning with -Wbitwise */
305 seq_printf(seq, "%p:\n" 305 seq_printf(seq, "%p:\n"
306 " krefs: %d\n" 306 " krefs: %d\n"
307 " sock: %u.%u.%u.%u:%u -> " 307 " sock: %pI4:%u -> "
308 "%u.%u.%u.%u:%u\n" 308 "%pI4:%u\n"
309 " remote node: %s\n" 309 " remote node: %s\n"
310 " page off: %zu\n" 310 " page off: %zu\n"
311 " handshake ok: %u\n" 311 " handshake ok: %u\n"
@@ -319,8 +319,8 @@ static int sc_seq_show(struct seq_file *seq, void *v)
319 " func type: %u\n", 319 " func type: %u\n",
320 sc, 320 sc,
321 atomic_read(&sc->sc_kref.refcount), 321 atomic_read(&sc->sc_kref.refcount),
322 NIPQUAD(saddr), inet ? ntohs(sport) : 0, 322 &saddr, inet ? ntohs(sport) : 0,
323 NIPQUAD(daddr), inet ? ntohs(dport) : 0, 323 &daddr, inet ? ntohs(dport) : 0,
324 sc->sc_node->nd_name, 324 sc->sc_node->nd_name,
325 sc->sc_page_off, 325 sc->sc_page_off,
326 sc->sc_handshake_ok, 326 sc->sc_handshake_ok,
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 816a3f61330..70e8fa9e253 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -250,7 +250,7 @@ static ssize_t o2nm_node_ipv4_port_write(struct o2nm_node *node,
250 250
251static ssize_t o2nm_node_ipv4_address_read(struct o2nm_node *node, char *page) 251static ssize_t o2nm_node_ipv4_address_read(struct o2nm_node *node, char *page)
252{ 252{
253 return sprintf(page, "%u.%u.%u.%u\n", NIPQUAD(node->nd_ipv4_address)); 253 return sprintf(page, "%pI4\n", &node->nd_ipv4_address);
254} 254}
255 255
256static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node, 256static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2bcf706d9dd..9fbe849f634 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1597,8 +1597,8 @@ static void o2net_start_connect(struct work_struct *work)
1597 ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr, 1597 ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr,
1598 sizeof(myaddr)); 1598 sizeof(myaddr));
1599 if (ret) { 1599 if (ret) {
1600 mlog(ML_ERROR, "bind failed with %d at address %u.%u.%u.%u\n", 1600 mlog(ML_ERROR, "bind failed with %d at address %pI4\n",
1601 ret, NIPQUAD(mynode->nd_ipv4_address)); 1601 ret, &mynode->nd_ipv4_address);
1602 goto out; 1602 goto out;
1603 } 1603 }
1604 1604
@@ -1790,17 +1790,16 @@ static int o2net_accept_one(struct socket *sock)
1790 1790
1791 node = o2nm_get_node_by_ip(sin.sin_addr.s_addr); 1791 node = o2nm_get_node_by_ip(sin.sin_addr.s_addr);
1792 if (node == NULL) { 1792 if (node == NULL) {
1793 mlog(ML_NOTICE, "attempt to connect from unknown node at " 1793 mlog(ML_NOTICE, "attempt to connect from unknown node at %pI4:%d\n",
1794 "%u.%u.%u.%u:%d\n", NIPQUAD(sin.sin_addr.s_addr), 1794 &sin.sin_addr.s_addr, ntohs(sin.sin_port));
1795 ntohs(sin.sin_port));
1796 ret = -EINVAL; 1795 ret = -EINVAL;
1797 goto out; 1796 goto out;
1798 } 1797 }
1799 1798
1800 if (o2nm_this_node() > node->nd_num) { 1799 if (o2nm_this_node() > node->nd_num) {
1801 mlog(ML_NOTICE, "unexpected connect attempted from a lower " 1800 mlog(ML_NOTICE, "unexpected connect attempted from a lower "
1802 "numbered node '%s' at " "%u.%u.%u.%u:%d with num %u\n", 1801 "numbered node '%s' at " "%pI4:%d with num %u\n",
1803 node->nd_name, NIPQUAD(sin.sin_addr.s_addr), 1802 node->nd_name, &sin.sin_addr.s_addr,
1804 ntohs(sin.sin_port), node->nd_num); 1803 ntohs(sin.sin_port), node->nd_num);
1805 ret = -EINVAL; 1804 ret = -EINVAL;
1806 goto out; 1805 goto out;
@@ -1810,8 +1809,8 @@ static int o2net_accept_one(struct socket *sock)
1810 * and tries to connect before we see their heartbeat */ 1809 * and tries to connect before we see their heartbeat */
1811 if (!o2hb_check_node_heartbeating_from_callback(node->nd_num)) { 1810 if (!o2hb_check_node_heartbeating_from_callback(node->nd_num)) {
1812 mlog(ML_CONN, "attempt to connect from node '%s' at " 1811 mlog(ML_CONN, "attempt to connect from node '%s' at "
1813 "%u.%u.%u.%u:%d but it isn't heartbeating\n", 1812 "%pI4:%d but it isn't heartbeating\n",
1814 node->nd_name, NIPQUAD(sin.sin_addr.s_addr), 1813 node->nd_name, &sin.sin_addr.s_addr,
1815 ntohs(sin.sin_port)); 1814 ntohs(sin.sin_port));
1816 ret = -EINVAL; 1815 ret = -EINVAL;
1817 goto out; 1816 goto out;
@@ -1827,8 +1826,8 @@ static int o2net_accept_one(struct socket *sock)
1827 spin_unlock(&nn->nn_lock); 1826 spin_unlock(&nn->nn_lock);
1828 if (ret) { 1827 if (ret) {
1829 mlog(ML_NOTICE, "attempt to connect from node '%s' at " 1828 mlog(ML_NOTICE, "attempt to connect from node '%s' at "
1830 "%u.%u.%u.%u:%d but it already has an open connection\n", 1829 "%pI4:%d but it already has an open connection\n",
1831 node->nd_name, NIPQUAD(sin.sin_addr.s_addr), 1830 node->nd_name, &sin.sin_addr.s_addr,
1832 ntohs(sin.sin_port)); 1831 ntohs(sin.sin_port));
1833 goto out; 1832 goto out;
1834 } 1833 }
@@ -1924,15 +1923,15 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port)
1924 sock->sk->sk_reuse = 1; 1923 sock->sk->sk_reuse = 1;
1925 ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); 1924 ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
1926 if (ret < 0) { 1925 if (ret < 0) {
1927 mlog(ML_ERROR, "unable to bind socket at %u.%u.%u.%u:%u, " 1926 mlog(ML_ERROR, "unable to bind socket at %pI4:%u, "
1928 "ret=%d\n", NIPQUAD(addr), ntohs(port), ret); 1927 "ret=%d\n", &addr, ntohs(port), ret);
1929 goto out; 1928 goto out;
1930 } 1929 }
1931 1930
1932 ret = sock->ops->listen(sock, 64); 1931 ret = sock->ops->listen(sock, 64);
1933 if (ret < 0) { 1932 if (ret < 0) {
1934 mlog(ML_ERROR, "unable to listen on %u.%u.%u.%u:%u, ret=%d\n", 1933 mlog(ML_ERROR, "unable to listen on %pI4:%u, ret=%d\n",
1935 NIPQUAD(addr), ntohs(port), ret); 1934 &addr, ntohs(port), ret);
1936 } 1935 }
1937 1936
1938out: 1937out:
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index ba962d71b34..6f7a77d5402 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -339,8 +339,8 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
339 ip = DLMFS_I(inode); 339 ip = DLMFS_I(inode);
340 340
341 inode->i_mode = mode; 341 inode->i_mode = mode;
342 inode->i_uid = current->fsuid; 342 inode->i_uid = current_fsuid();
343 inode->i_gid = current->fsgid; 343 inode->i_gid = current_fsgid();
344 inode->i_blocks = 0; 344 inode->i_blocks = 0;
345 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 345 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
346 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 346 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -365,8 +365,8 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
365 return NULL; 365 return NULL;
366 366
367 inode->i_mode = mode; 367 inode->i_mode = mode;
368 inode->i_uid = current->fsuid; 368 inode->i_uid = current_fsuid();
369 inode->i_gid = current->fsgid; 369 inode->i_gid = current_fsgid();
370 inode->i_blocks = 0; 370 inode->i_blocks = 0;
371 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 371 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
372 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 372 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f4967e634ff..2545e7402ef 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -421,13 +421,13 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
421 fe->i_blkno = cpu_to_le64(fe_blkno); 421 fe->i_blkno = cpu_to_le64(fe_blkno);
422 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); 422 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
423 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); 423 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
424 fe->i_uid = cpu_to_le32(current->fsuid); 424 fe->i_uid = cpu_to_le32(current_fsuid());
425 if (dir->i_mode & S_ISGID) { 425 if (dir->i_mode & S_ISGID) {
426 fe->i_gid = cpu_to_le32(dir->i_gid); 426 fe->i_gid = cpu_to_le32(dir->i_gid);
427 if (S_ISDIR(mode)) 427 if (S_ISDIR(mode))
428 mode |= S_ISGID; 428 mode |= S_ISGID;
429 } else 429 } else
430 fe->i_gid = cpu_to_le32(current->fsgid); 430 fe->i_gid = cpu_to_le32(current_fsgid());
431 fe->i_mode = cpu_to_le16(mode); 431 fe->i_mode = cpu_to_le16(mode);
432 if (S_ISCHR(mode) || S_ISBLK(mode)) 432 if (S_ISCHR(mode) || S_ISBLK(mode))
433 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); 433 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 5f180cf7abb..5e0c0d0aef7 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -86,7 +86,8 @@
86#define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask) \ 86#define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask) \
87 OCFS2_SB(sb)->s_feature_incompat &= ~(mask) 87 OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
88 88
89#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB 89#define OCFS2_FEATURE_COMPAT_SUPP (OCFS2_FEATURE_COMPAT_BACKUP_SB \
90 | OCFS2_FEATURE_COMPAT_JBD2_SB)
90#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ 91#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
91 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ 92 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
92 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ 93 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
@@ -153,6 +154,11 @@
153#define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 154#define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001
154 155
155/* 156/*
157 * The filesystem will correctly handle journal feature bits.
158 */
159#define OCFS2_FEATURE_COMPAT_JBD2_SB 0x0002
160
161/*
156 * Unwritten extents support. 162 * Unwritten extents support.
157 */ 163 */
158#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 164#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 054e2efb0b7..74d7367ade1 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2645,9 +2645,9 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2645 return ret; 2645 return ret;
2646 } 2646 }
2647 2647
2648 i = xs->here - old_xh->xh_entries;
2649 xs->here = &xs->header->xh_entries[i];
2650 } 2648 }
2649 i = xs->here - old_xh->xh_entries;
2650 xs->here = &xs->header->xh_entries[i];
2651 } 2651 }
2652 2652
2653 return ret; 2653 return ret;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index cbf047a847c..6afe57c84f8 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -37,8 +37,8 @@ struct inode *omfs_new_inode(struct inode *dir, int mode)
37 37
38 inode->i_ino = new_block; 38 inode->i_ino = new_block;
39 inode->i_mode = mode; 39 inode->i_mode = mode;
40 inode->i_uid = current->fsuid; 40 inode->i_uid = current_fsuid();
41 inode->i_gid = current->fsgid; 41 inode->i_gid = current_fsgid();
42 inode->i_blocks = 0; 42 inode->i_blocks = 0;
43 inode->i_mapping->a_ops = &omfs_aops; 43 inode->i_mapping->a_ops = &omfs_aops;
44 44
@@ -420,8 +420,8 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
420 420
421 sb->s_fs_info = sbi; 421 sb->s_fs_info = sbi;
422 422
423 sbi->s_uid = current->uid; 423 sbi->s_uid = current_uid();
424 sbi->s_gid = current->gid; 424 sbi->s_gid = current_gid();
425 sbi->s_dmask = sbi->s_fmask = current->fs->umask; 425 sbi->s_dmask = sbi->s_fmask = current->fs->umask;
426 426
427 if (!parse_options((char *) data, sbi)) 427 if (!parse_options((char *) data, sbi))
diff --git a/fs/open.c b/fs/open.c
index 83cdb9dee0c..1cd7d40e999 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -272,6 +272,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
272 goto put_write_and_out; 272 goto put_write_and_out;
273 273
274 error = locks_verify_truncate(inode, NULL, length); 274 error = locks_verify_truncate(inode, NULL, length);
275 if (!error)
276 error = security_path_truncate(&path, length, 0);
275 if (!error) { 277 if (!error) {
276 DQUOT_INIT(inode); 278 DQUOT_INIT(inode);
277 error = do_truncate(path.dentry, length, 0, NULL); 279 error = do_truncate(path.dentry, length, 0, NULL);
@@ -329,6 +331,9 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
329 331
330 error = locks_verify_truncate(inode, file, length); 332 error = locks_verify_truncate(inode, file, length);
331 if (!error) 333 if (!error)
334 error = security_path_truncate(&file->f_path, length,
335 ATTR_MTIME|ATTR_CTIME);
336 if (!error)
332 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 337 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
333out_putf: 338out_putf:
334 fput(file); 339 fput(file);
@@ -425,39 +430,33 @@ out:
425 */ 430 */
426asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) 431asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
427{ 432{
433 const struct cred *old_cred;
434 struct cred *override_cred;
428 struct path path; 435 struct path path;
429 struct inode *inode; 436 struct inode *inode;
430 int old_fsuid, old_fsgid;
431 kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */
432 int res; 437 int res;
433 438
434 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 439 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
435 return -EINVAL; 440 return -EINVAL;
436 441
437 old_fsuid = current->fsuid; 442 override_cred = prepare_creds();
438 old_fsgid = current->fsgid; 443 if (!override_cred)
444 return -ENOMEM;
439 445
440 current->fsuid = current->uid; 446 override_cred->fsuid = override_cred->uid;
441 current->fsgid = current->gid; 447 override_cred->fsgid = override_cred->gid;
442 448
443 if (!issecure(SECURE_NO_SETUID_FIXUP)) { 449 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
444 /* 450 /* Clear the capabilities if we switch to a non-root user */
445 * Clear the capabilities if we switch to a non-root user 451 if (override_cred->uid)
446 */ 452 cap_clear(override_cred->cap_effective);
447#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
448 /*
449 * FIXME: There is a race here against sys_capset. The
450 * capabilities can change yet we will restore the old
451 * value below. We should hold task_capabilities_lock,
452 * but we cannot because user_path_at can sleep.
453 */
454#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
455 if (current->uid)
456 old_cap = cap_set_effective(__cap_empty_set);
457 else 453 else
458 old_cap = cap_set_effective(current->cap_permitted); 454 override_cred->cap_effective =
455 override_cred->cap_permitted;
459 } 456 }
460 457
458 old_cred = override_creds(override_cred);
459
461 res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); 460 res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
462 if (res) 461 if (res)
463 goto out; 462 goto out;
@@ -494,12 +493,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
494out_path_release: 493out_path_release:
495 path_put(&path); 494 path_put(&path);
496out: 495out:
497 current->fsuid = old_fsuid; 496 revert_creds(old_cred);
498 current->fsgid = old_fsgid; 497 put_cred(override_cred);
499
500 if (!issecure(SECURE_NO_SETUID_FIXUP))
501 cap_set_effective(old_cap);
502
503 return res; 498 return res;
504} 499}
505 500
@@ -792,7 +787,8 @@ static inline int __get_file_write_access(struct inode *inode,
792 787
793static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 788static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
794 int flags, struct file *f, 789 int flags, struct file *f,
795 int (*open)(struct inode *, struct file *)) 790 int (*open)(struct inode *, struct file *),
791 const struct cred *cred)
796{ 792{
797 struct inode *inode; 793 struct inode *inode;
798 int error; 794 int error;
@@ -816,7 +812,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
816 f->f_op = fops_get(inode->i_fop); 812 f->f_op = fops_get(inode->i_fop);
817 file_move(f, &inode->i_sb->s_files); 813 file_move(f, &inode->i_sb->s_files);
818 814
819 error = security_dentry_open(f); 815 error = security_dentry_open(f, cred);
820 if (error) 816 if (error)
821 goto cleanup_all; 817 goto cleanup_all;
822 818
@@ -891,6 +887,8 @@ cleanup_file:
891struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, 887struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
892 int (*open)(struct inode *, struct file *)) 888 int (*open)(struct inode *, struct file *))
893{ 889{
890 const struct cred *cred = current_cred();
891
894 if (IS_ERR(nd->intent.open.file)) 892 if (IS_ERR(nd->intent.open.file))
895 goto out; 893 goto out;
896 if (IS_ERR(dentry)) 894 if (IS_ERR(dentry))
@@ -898,7 +896,7 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
898 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), 896 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
899 nd->intent.open.flags - 1, 897 nd->intent.open.flags - 1,
900 nd->intent.open.file, 898 nd->intent.open.file,
901 open); 899 open, cred);
902out: 900out:
903 return nd->intent.open.file; 901 return nd->intent.open.file;
904out_err: 902out_err:
@@ -917,6 +915,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
917 */ 915 */
918struct file *nameidata_to_filp(struct nameidata *nd, int flags) 916struct file *nameidata_to_filp(struct nameidata *nd, int flags)
919{ 917{
918 const struct cred *cred = current_cred();
920 struct file *filp; 919 struct file *filp;
921 920
922 /* Pick up the filp from the open intent */ 921 /* Pick up the filp from the open intent */
@@ -924,7 +923,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
924 /* Has the filesystem initialised the file for us? */ 923 /* Has the filesystem initialised the file for us? */
925 if (filp->f_path.dentry == NULL) 924 if (filp->f_path.dentry == NULL)
926 filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp, 925 filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
927 NULL); 926 NULL, cred);
928 else 927 else
929 path_put(&nd->path); 928 path_put(&nd->path);
930 return filp; 929 return filp;
@@ -934,7 +933,8 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
934 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an 933 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
935 * error. 934 * error.
936 */ 935 */
937struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) 936struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
937 const struct cred *cred)
938{ 938{
939 int error; 939 int error;
940 struct file *f; 940 struct file *f;
@@ -959,7 +959,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
959 return ERR_PTR(error); 959 return ERR_PTR(error);
960 } 960 }
961 961
962 return __dentry_open(dentry, mnt, flags, f, NULL); 962 return __dentry_open(dentry, mnt, flags, f, NULL, cred);
963} 963}
964EXPORT_SYMBOL(dentry_open); 964EXPORT_SYMBOL(dentry_open);
965 965
diff --git a/fs/pipe.c b/fs/pipe.c
index 7aea8b89baa..891697112f6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -899,8 +899,8 @@ static struct inode * get_pipe_inode(void)
899 */ 899 */
900 inode->i_state = I_DIRTY; 900 inode->i_state = I_DIRTY;
901 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; 901 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
902 inode->i_uid = current->fsuid; 902 inode->i_uid = current_fsuid();
903 inode->i_gid = current->fsgid; 903 inode->i_gid = current_fsgid();
904 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 904 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
905 905
906 return inode; 906 return inode;
@@ -1016,10 +1016,7 @@ int do_pipe_flags(int *fd, int flags)
1016 goto err_fdr; 1016 goto err_fdr;
1017 fdw = error; 1017 fdw = error;
1018 1018
1019 error = audit_fd_pair(fdr, fdw); 1019 audit_fd_pair(fdr, fdw);
1020 if (error < 0)
1021 goto err_fdw;
1022
1023 fd_install(fdr, fr); 1020 fd_install(fdr, fr);
1024 fd_install(fdw, fw); 1021 fd_install(fdw, fw);
1025 fd[0] = fdr; 1022 fd[0] = fdr;
@@ -1027,8 +1024,6 @@ int do_pipe_flags(int *fd, int flags)
1027 1024
1028 return 0; 1025 return 0;
1029 1026
1030 err_fdw:
1031 put_unused_fd(fdw);
1032 err_fdr: 1027 err_fdr:
1033 put_unused_fd(fdr); 1028 put_unused_fd(fdr);
1034 err_read_pipe: 1029 err_read_pipe:
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index aec931e0997..39df95a0ec2 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -217,11 +217,11 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
217 switch(pa->e_tag) { 217 switch(pa->e_tag) {
218 case ACL_USER_OBJ: 218 case ACL_USER_OBJ:
219 /* (May have been checked already) */ 219 /* (May have been checked already) */
220 if (inode->i_uid == current->fsuid) 220 if (inode->i_uid == current_fsuid())
221 goto check_perm; 221 goto check_perm;
222 break; 222 break;
223 case ACL_USER: 223 case ACL_USER:
224 if (pa->e_id == current->fsuid) 224 if (pa->e_id == current_fsuid())
225 goto mask; 225 goto mask;
226 break; 226 break;
227 case ACL_GROUP_OBJ: 227 case ACL_GROUP_OBJ:
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6af7fba7abb..7e4877d9dcb 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
159 struct group_info *group_info; 159 struct group_info *group_info;
160 int g; 160 int g;
161 struct fdtable *fdt = NULL; 161 struct fdtable *fdt = NULL;
162 const struct cred *cred;
162 pid_t ppid, tpid; 163 pid_t ppid, tpid;
163 164
164 rcu_read_lock(); 165 rcu_read_lock();
@@ -170,6 +171,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
170 if (tracer) 171 if (tracer)
171 tpid = task_pid_nr_ns(tracer, ns); 172 tpid = task_pid_nr_ns(tracer, ns);
172 } 173 }
174 cred = get_cred((struct cred *) __task_cred(p));
173 seq_printf(m, 175 seq_printf(m,
174 "State:\t%s\n" 176 "State:\t%s\n"
175 "Tgid:\t%d\n" 177 "Tgid:\t%d\n"
@@ -182,8 +184,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
182 task_tgid_nr_ns(p, ns), 184 task_tgid_nr_ns(p, ns),
183 pid_nr_ns(pid, ns), 185 pid_nr_ns(pid, ns),
184 ppid, tpid, 186 ppid, tpid,
185 p->uid, p->euid, p->suid, p->fsuid, 187 cred->uid, cred->euid, cred->suid, cred->fsuid,
186 p->gid, p->egid, p->sgid, p->fsgid); 188 cred->gid, cred->egid, cred->sgid, cred->fsgid);
187 189
188 task_lock(p); 190 task_lock(p);
189 if (p->files) 191 if (p->files)
@@ -194,13 +196,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
194 fdt ? fdt->max_fds : 0); 196 fdt ? fdt->max_fds : 0);
195 rcu_read_unlock(); 197 rcu_read_unlock();
196 198
197 group_info = p->group_info; 199 group_info = cred->group_info;
198 get_group_info(group_info);
199 task_unlock(p); 200 task_unlock(p);
200 201
201 for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) 202 for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
202 seq_printf(m, "%d ", GROUP_AT(group_info, g)); 203 seq_printf(m, "%d ", GROUP_AT(group_info, g));
203 put_group_info(group_info); 204 put_cred(cred);
204 205
205 seq_printf(m, "\n"); 206 seq_printf(m, "\n");
206} 207}
@@ -262,7 +263,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
262 blocked = p->blocked; 263 blocked = p->blocked;
263 collect_sigign_sigcatch(p, &ignored, &caught); 264 collect_sigign_sigcatch(p, &ignored, &caught);
264 num_threads = atomic_read(&p->signal->count); 265 num_threads = atomic_read(&p->signal->count);
265 qsize = atomic_read(&p->user->sigpending); 266 qsize = atomic_read(&__task_cred(p)->user->sigpending);
266 qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; 267 qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
267 unlock_task_sighand(p, &flags); 268 unlock_task_sighand(p, &flags);
268 } 269 }
@@ -293,10 +294,21 @@ static void render_cap_t(struct seq_file *m, const char *header,
293 294
294static inline void task_cap(struct seq_file *m, struct task_struct *p) 295static inline void task_cap(struct seq_file *m, struct task_struct *p)
295{ 296{
296 render_cap_t(m, "CapInh:\t", &p->cap_inheritable); 297 const struct cred *cred;
297 render_cap_t(m, "CapPrm:\t", &p->cap_permitted); 298 kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset;
298 render_cap_t(m, "CapEff:\t", &p->cap_effective); 299
299 render_cap_t(m, "CapBnd:\t", &p->cap_bset); 300 rcu_read_lock();
301 cred = __task_cred(p);
302 cap_inheritable = cred->cap_inheritable;
303 cap_permitted = cred->cap_permitted;
304 cap_effective = cred->cap_effective;
305 cap_bset = cred->cap_bset;
306 rcu_read_unlock();
307
308 render_cap_t(m, "CapInh:\t", &cap_inheritable);
309 render_cap_t(m, "CapPrm:\t", &cap_permitted);
310 render_cap_t(m, "CapEff:\t", &cap_effective);
311 render_cap_t(m, "CapBnd:\t", &cap_bset);
300} 312}
301 313
302static inline void task_context_switch_counts(struct seq_file *m, 314static inline void task_context_switch_counts(struct seq_file *m,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 486cf3fe713..cad92c1ac2b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -347,8 +347,8 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
347static int proc_pid_schedstat(struct task_struct *task, char *buffer) 347static int proc_pid_schedstat(struct task_struct *task, char *buffer)
348{ 348{
349 return sprintf(buffer, "%llu %llu %lu\n", 349 return sprintf(buffer, "%llu %llu %lu\n",
350 task->sched_info.cpu_time, 350 (unsigned long long)task->se.sum_exec_runtime,
351 task->sched_info.run_delay, 351 (unsigned long long)task->sched_info.run_delay,
352 task->sched_info.pcount); 352 task->sched_info.pcount);
353} 353}
354#endif 354#endif
@@ -371,7 +371,7 @@ static int lstats_show_proc(struct seq_file *m, void *v)
371 task->latency_record[i].time, 371 task->latency_record[i].time,
372 task->latency_record[i].max); 372 task->latency_record[i].max);
373 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 373 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
374 char sym[KSYM_NAME_LEN]; 374 char sym[KSYM_SYMBOL_LEN];
375 char *c; 375 char *c;
376 if (!task->latency_record[i].backtrace[q]) 376 if (!task->latency_record[i].backtrace[q])
377 break; 377 break;
@@ -1406,6 +1406,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1406{ 1406{
1407 struct inode * inode; 1407 struct inode * inode;
1408 struct proc_inode *ei; 1408 struct proc_inode *ei;
1409 const struct cred *cred;
1409 1410
1410 /* We need a new inode */ 1411 /* We need a new inode */
1411 1412
@@ -1428,8 +1429,11 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1428 inode->i_uid = 0; 1429 inode->i_uid = 0;
1429 inode->i_gid = 0; 1430 inode->i_gid = 0;
1430 if (task_dumpable(task)) { 1431 if (task_dumpable(task)) {
1431 inode->i_uid = task->euid; 1432 rcu_read_lock();
1432 inode->i_gid = task->egid; 1433 cred = __task_cred(task);
1434 inode->i_uid = cred->euid;
1435 inode->i_gid = cred->egid;
1436 rcu_read_unlock();
1433 } 1437 }
1434 security_task_to_inode(task, inode); 1438 security_task_to_inode(task, inode);
1435 1439
@@ -1445,6 +1449,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
1445{ 1449{
1446 struct inode *inode = dentry->d_inode; 1450 struct inode *inode = dentry->d_inode;
1447 struct task_struct *task; 1451 struct task_struct *task;
1452 const struct cred *cred;
1453
1448 generic_fillattr(inode, stat); 1454 generic_fillattr(inode, stat);
1449 1455
1450 rcu_read_lock(); 1456 rcu_read_lock();
@@ -1454,8 +1460,9 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
1454 if (task) { 1460 if (task) {
1455 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1461 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1456 task_dumpable(task)) { 1462 task_dumpable(task)) {
1457 stat->uid = task->euid; 1463 cred = __task_cred(task);
1458 stat->gid = task->egid; 1464 stat->uid = cred->euid;
1465 stat->gid = cred->egid;
1459 } 1466 }
1460 } 1467 }
1461 rcu_read_unlock(); 1468 rcu_read_unlock();
@@ -1483,11 +1490,16 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1483{ 1490{
1484 struct inode *inode = dentry->d_inode; 1491 struct inode *inode = dentry->d_inode;
1485 struct task_struct *task = get_proc_task(inode); 1492 struct task_struct *task = get_proc_task(inode);
1493 const struct cred *cred;
1494
1486 if (task) { 1495 if (task) {
1487 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1496 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1488 task_dumpable(task)) { 1497 task_dumpable(task)) {
1489 inode->i_uid = task->euid; 1498 rcu_read_lock();
1490 inode->i_gid = task->egid; 1499 cred = __task_cred(task);
1500 inode->i_uid = cred->euid;
1501 inode->i_gid = cred->egid;
1502 rcu_read_unlock();
1491 } else { 1503 } else {
1492 inode->i_uid = 0; 1504 inode->i_uid = 0;
1493 inode->i_gid = 0; 1505 inode->i_gid = 0;
@@ -1649,6 +1661,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1649 struct task_struct *task = get_proc_task(inode); 1661 struct task_struct *task = get_proc_task(inode);
1650 int fd = proc_fd(inode); 1662 int fd = proc_fd(inode);
1651 struct files_struct *files; 1663 struct files_struct *files;
1664 const struct cred *cred;
1652 1665
1653 if (task) { 1666 if (task) {
1654 files = get_files_struct(task); 1667 files = get_files_struct(task);
@@ -1658,8 +1671,11 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1658 rcu_read_unlock(); 1671 rcu_read_unlock();
1659 put_files_struct(files); 1672 put_files_struct(files);
1660 if (task_dumpable(task)) { 1673 if (task_dumpable(task)) {
1661 inode->i_uid = task->euid; 1674 rcu_read_lock();
1662 inode->i_gid = task->egid; 1675 cred = __task_cred(task);
1676 inode->i_uid = cred->euid;
1677 inode->i_gid = cred->egid;
1678 rcu_read_unlock();
1663 } else { 1679 } else {
1664 inode->i_uid = 0; 1680 inode->i_uid = 0;
1665 inode->i_gid = 0; 1681 inode->i_gid = 0;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d777789b7a8..de2bba5a344 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -218,8 +218,7 @@ void proc_device_tree_add_node(struct device_node *np,
218void __init proc_device_tree_init(void) 218void __init proc_device_tree_init(void)
219{ 219{
220 struct device_node *root; 220 struct device_node *root;
221 if ( !have_of ) 221
222 return;
223 proc_device_tree = proc_mkdir("device-tree", NULL); 222 proc_device_tree = proc_mkdir("device-tree", NULL);
224 if (proc_device_tree == 0) 223 if (proc_device_tree == 0)
225 return; 224 return;
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 81904f07679..f75efa22df5 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -9,6 +9,7 @@
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/irqnr.h>
12#include <asm/cputime.h> 13#include <asm/cputime.h>
13 14
14#ifndef arch_irq_stat_cpu 15#ifndef arch_irq_stat_cpu
@@ -44,10 +45,9 @@ static int show_stat(struct seq_file *p, void *v)
44 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 45 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
45 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 46 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
46 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 47 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
47 48 for_each_irq_nr(j) {
48 for_each_irq_nr(j)
49 sum += kstat_irqs_cpu(j, i); 49 sum += kstat_irqs_cpu(j, i);
50 50 }
51 sum += arch_irq_stat_cpu(i); 51 sum += arch_irq_stat_cpu(i);
52 } 52 }
53 sum += arch_irq_stat(); 53 sum += arch_irq_stat();
@@ -92,7 +92,6 @@ static int show_stat(struct seq_file *p, void *v)
92 /* sum again ? it could be updated? */ 92 /* sum again ? it could be updated? */
93 for_each_irq_nr(j) { 93 for_each_irq_nr(j) {
94 per_irq_sum = 0; 94 per_irq_sum = 0;
95
96 for_each_possible_cpu(i) 95 for_each_possible_cpu(i)
97 per_irq_sum += kstat_irqs_cpu(j, i); 96 per_irq_sum += kstat_irqs_cpu(j, i);
98 97
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index b770c095e45..3a8bdd7f575 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -557,9 +557,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
557 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 557 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
558} 558}
559 559
560static unsigned long pte_to_pagemap_entry(pte_t pte) 560static u64 pte_to_pagemap_entry(pte_t pte)
561{ 561{
562 unsigned long pme = 0; 562 u64 pme = 0;
563 if (is_swap_pte(pte)) 563 if (is_swap_pte(pte))
564 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) 564 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte))
565 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; 565 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
diff --git a/fs/quota.c b/fs/quota.c
index 7f4386ebc23..b7fe44e0161 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -79,7 +79,7 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid
79 79
80 /* Check privileges */ 80 /* Check privileges */
81 if (cmd == Q_GETQUOTA) { 81 if (cmd == Q_GETQUOTA) {
82 if (((type == USRQUOTA && current->euid != id) || 82 if (((type == USRQUOTA && current_euid() != id) ||
83 (type == GRPQUOTA && !in_egroup_p(id))) && 83 (type == GRPQUOTA && !in_egroup_p(id))) &&
84 !capable(CAP_SYS_ADMIN)) 84 !capable(CAP_SYS_ADMIN))
85 return -EPERM; 85 return -EPERM;
@@ -130,7 +130,7 @@ static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t i
130 130
131 /* Check privileges */ 131 /* Check privileges */
132 if (cmd == Q_XGETQUOTA) { 132 if (cmd == Q_XGETQUOTA) {
133 if (((type == XQM_USRQUOTA && current->euid != id) || 133 if (((type == XQM_USRQUOTA && current_euid() != id) ||
134 (type == XQM_GRPQUOTA && !in_egroup_p(id))) && 134 (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
135 !capable(CAP_SYS_ADMIN)) 135 !capable(CAP_SYS_ADMIN))
136 return -EPERM; 136 return -EPERM;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index f031d1c925f..a83a3518ae3 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -55,8 +55,8 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
55 55
56 if (inode) { 56 if (inode) {
57 inode->i_mode = mode; 57 inode->i_mode = mode;
58 inode->i_uid = current->fsuid; 58 inode->i_uid = current_fsuid();
59 inode->i_gid = current->fsgid; 59 inode->i_gid = current_fsgid();
60 inode->i_blocks = 0; 60 inode->i_blocks = 0;
61 inode->i_mapping->a_ops = &ramfs_aops; 61 inode->i_mapping->a_ops = &ramfs_aops;
62 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; 62 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6c4c2c69449..ed04f47007f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1753,6 +1753,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1753 struct inode *inode) 1753 struct inode *inode)
1754{ 1754{
1755 struct super_block *sb; 1755 struct super_block *sb;
1756 struct reiserfs_iget_args args;
1756 INITIALIZE_PATH(path_to_key); 1757 INITIALIZE_PATH(path_to_key);
1757 struct cpu_key key; 1758 struct cpu_key key;
1758 struct item_head ih; 1759 struct item_head ih;
@@ -1780,6 +1781,14 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1780 err = -ENOMEM; 1781 err = -ENOMEM;
1781 goto out_bad_inode; 1782 goto out_bad_inode;
1782 } 1783 }
1784 args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1785 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1786 args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
1787 if (insert_inode_locked4(inode, args.objectid,
1788 reiserfs_find_actor, &args) < 0) {
1789 err = -EINVAL;
1790 goto out_bad_inode;
1791 }
1783 if (old_format_only(sb)) 1792 if (old_format_only(sb))
1784 /* not a perfect generation count, as object ids can be reused, but 1793 /* not a perfect generation count, as object ids can be reused, but
1785 ** this is as good as reiserfs can do right now. 1794 ** this is as good as reiserfs can do right now.
@@ -1859,13 +1868,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1859 } else { 1868 } else {
1860 inode2sd(&sd, inode, inode->i_size); 1869 inode2sd(&sd, inode, inode->i_size);
1861 } 1870 }
1862 // these do not go to on-disk stat data
1863 inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1864
1865 // store in in-core inode the key of stat data and version all 1871 // store in in-core inode the key of stat data and version all
1866 // object items will have (directory items will have old offset 1872 // object items will have (directory items will have old offset
1867 // format, other new objects will consist of new items) 1873 // format, other new objects will consist of new items)
1868 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1869 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) 1874 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1870 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1875 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1871 else 1876 else
@@ -1929,7 +1934,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1929 reiserfs_mark_inode_private(inode); 1934 reiserfs_mark_inode_private(inode);
1930 } 1935 }
1931 1936
1932 insert_inode_hash(inode);
1933 reiserfs_update_sd(th, inode); 1937 reiserfs_update_sd(th, inode);
1934 reiserfs_check_path(&path_to_key); 1938 reiserfs_check_path(&path_to_key);
1935 1939
@@ -1956,6 +1960,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1956 out_inserted_sd: 1960 out_inserted_sd:
1957 inode->i_nlink = 0; 1961 inode->i_nlink = 0;
1958 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1962 th->t_trans_id = 0; /* so the caller can't use this handle later */
1963 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
1959 1964
1960 /* If we were inheriting an ACL, we need to release the lock so that 1965 /* If we were inheriting an ACL, we need to release the lock so that
1961 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking 1966 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
@@ -2556,7 +2561,7 @@ static int reiserfs_write_begin(struct file *file,
2556 } 2561 }
2557 2562
2558 index = pos >> PAGE_CACHE_SHIFT; 2563 index = pos >> PAGE_CACHE_SHIFT;
2559 page = __grab_cache_page(mapping, index); 2564 page = grab_cache_page_write_begin(mapping, index, flags);
2560 if (!page) 2565 if (!page)
2561 return -ENOMEM; 2566 return -ENOMEM;
2562 *pagep = page; 2567 *pagep = page;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index f89ebb943f3..738967f6c8e 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -573,7 +573,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
573 /* the quota init calls have to know who to charge the quota to, so 573 /* the quota init calls have to know who to charge the quota to, so
574 ** we have to set uid and gid here 574 ** we have to set uid and gid here
575 */ 575 */
576 inode->i_uid = current->fsuid; 576 inode->i_uid = current_fsuid();
577 inode->i_mode = mode; 577 inode->i_mode = mode;
578 /* Make inode invalid - just in case we are going to drop it before 578 /* Make inode invalid - just in case we are going to drop it before
579 * the initialization happens */ 579 * the initialization happens */
@@ -584,7 +584,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
584 if (S_ISDIR(mode)) 584 if (S_ISDIR(mode))
585 inode->i_mode |= S_ISGID; 585 inode->i_mode |= S_ISGID;
586 } else { 586 } else {
587 inode->i_gid = current->fsgid; 587 inode->i_gid = current_fsgid();
588 } 588 }
589 DQUOT_INIT(inode); 589 DQUOT_INIT(inode);
590 return 0; 590 return 0;
@@ -646,6 +646,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
646 err = journal_end(&th, dir->i_sb, jbegin_count); 646 err = journal_end(&th, dir->i_sb, jbegin_count);
647 if (err) 647 if (err)
648 retval = err; 648 retval = err;
649 unlock_new_inode(inode);
649 iput(inode); 650 iput(inode);
650 goto out_failed; 651 goto out_failed;
651 } 652 }
@@ -653,6 +654,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
653 reiserfs_update_inode_transaction(dir); 654 reiserfs_update_inode_transaction(dir);
654 655
655 d_instantiate(dentry, inode); 656 d_instantiate(dentry, inode);
657 unlock_new_inode(inode);
656 retval = journal_end(&th, dir->i_sb, jbegin_count); 658 retval = journal_end(&th, dir->i_sb, jbegin_count);
657 659
658 out_failed: 660 out_failed:
@@ -727,11 +729,13 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
727 err = journal_end(&th, dir->i_sb, jbegin_count); 729 err = journal_end(&th, dir->i_sb, jbegin_count);
728 if (err) 730 if (err)
729 retval = err; 731 retval = err;
732 unlock_new_inode(inode);
730 iput(inode); 733 iput(inode);
731 goto out_failed; 734 goto out_failed;
732 } 735 }
733 736
734 d_instantiate(dentry, inode); 737 d_instantiate(dentry, inode);
738 unlock_new_inode(inode);
735 retval = journal_end(&th, dir->i_sb, jbegin_count); 739 retval = journal_end(&th, dir->i_sb, jbegin_count);
736 740
737 out_failed: 741 out_failed:
@@ -812,6 +816,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
812 err = journal_end(&th, dir->i_sb, jbegin_count); 816 err = journal_end(&th, dir->i_sb, jbegin_count);
813 if (err) 817 if (err)
814 retval = err; 818 retval = err;
819 unlock_new_inode(inode);
815 iput(inode); 820 iput(inode);
816 goto out_failed; 821 goto out_failed;
817 } 822 }
@@ -819,6 +824,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
819 reiserfs_update_sd(&th, dir); 824 reiserfs_update_sd(&th, dir);
820 825
821 d_instantiate(dentry, inode); 826 d_instantiate(dentry, inode);
827 unlock_new_inode(inode);
822 retval = journal_end(&th, dir->i_sb, jbegin_count); 828 retval = journal_end(&th, dir->i_sb, jbegin_count);
823 out_failed: 829 out_failed:
824 if (locked) 830 if (locked)
@@ -1096,11 +1102,13 @@ static int reiserfs_symlink(struct inode *parent_dir,
1096 err = journal_end(&th, parent_dir->i_sb, jbegin_count); 1102 err = journal_end(&th, parent_dir->i_sb, jbegin_count);
1097 if (err) 1103 if (err)
1098 retval = err; 1104 retval = err;
1105 unlock_new_inode(inode);
1099 iput(inode); 1106 iput(inode);
1100 goto out_failed; 1107 goto out_failed;
1101 } 1108 }
1102 1109
1103 d_instantiate(dentry, inode); 1110 d_instantiate(dentry, inode);
1111 unlock_new_inode(inode);
1104 retval = journal_end(&th, parent_dir->i_sb, jbegin_count); 1112 retval = journal_end(&th, parent_dir->i_sb, jbegin_count);
1105 out_failed: 1113 out_failed:
1106 reiserfs_write_unlock(parent_dir->i_sb); 1114 reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index eba2eabcd2b..b569ff1c4dc 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -357,7 +357,18 @@ int seq_printf(struct seq_file *m, const char *f, ...)
357} 357}
358EXPORT_SYMBOL(seq_printf); 358EXPORT_SYMBOL(seq_printf);
359 359
360static char *mangle_path(char *s, char *p, char *esc) 360/**
361 * mangle_path - mangle and copy path to buffer beginning
362 * @s: buffer start
363 * @p: beginning of path in above buffer
364 * @esc: set of characters that need escaping
365 *
366 * Copy the path from @p to @s, replacing each occurrence of character from
367 * @esc with usual octal escape.
368 * Returns pointer past last written character in @s, or NULL in case of
369 * failure.
370 */
371char *mangle_path(char *s, char *p, char *esc)
361{ 372{
362 while (s <= p) { 373 while (s <= p) {
363 char c = *p++; 374 char c = *p++;
@@ -376,9 +387,16 @@ static char *mangle_path(char *s, char *p, char *esc)
376 } 387 }
377 return NULL; 388 return NULL;
378} 389}
390EXPORT_SYMBOL(mangle_path);
379 391
380/* 392/**
381 * return the absolute path of 'dentry' residing in mount 'mnt'. 393 * seq_path - seq_file interface to print a pathname
394 * @m: the seq_file handle
395 * @path: the struct path to print
396 * @esc: set of characters to escape in the output
397 *
398 * return the absolute path of 'path', as represented by the
399 * dentry / mnt pair in the path parameter.
382 */ 400 */
383int seq_path(struct seq_file *m, struct path *path, char *esc) 401int seq_path(struct seq_file *m, struct path *path, char *esc)
384{ 402{
@@ -450,7 +468,8 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
450 return -1; 468 return -1;
451} 469}
452 470
453int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) 471int seq_bitmap(struct seq_file *m, const unsigned long *bits,
472 unsigned int nr_bits)
454{ 473{
455 if (m->count < m->size) { 474 if (m->count < m->size) {
456 int len = bitmap_scnprintf(m->buf + m->count, 475 int len = bitmap_scnprintf(m->buf + m->count,
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 48da4fa6b7d..e7ddd0328dd 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -667,8 +667,7 @@ smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
667 667
668 attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID; 668 attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
669 attr.ia_mode = mode; 669 attr.ia_mode = mode;
670 attr.ia_uid = current->euid; 670 current_euid_egid(&attr.ia_uid, &attr.ia_gid);
671 attr.ia_gid = current->egid;
672 671
673 if (!new_valid_dev(dev)) 672 if (!new_valid_dev(dev))
674 return -EINVAL; 673 return -EINVAL;
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index e4f8d51a555..92d5e8ffb63 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -297,7 +297,7 @@ static int smb_write_begin(struct file *file, struct address_space *mapping,
297 struct page **pagep, void **fsdata) 297 struct page **pagep, void **fsdata)
298{ 298{
299 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 299 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
300 *pagep = __grab_cache_page(mapping, index); 300 *pagep = grab_cache_page_write_begin(mapping, index, flags);
301 if (!*pagep) 301 if (!*pagep)
302 return -ENOMEM; 302 return -ENOMEM;
303 return 0; 303 return 0;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 3528f40ffb0..fc27fbfc539 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -586,7 +586,7 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
586 if (parse_options(mnt, raw_data)) 586 if (parse_options(mnt, raw_data))
587 goto out_bad_option; 587 goto out_bad_option;
588 } 588 }
589 mnt->mounted_uid = current->uid; 589 mnt->mounted_uid = current_uid();
590 smb_setcodepage(server, &mnt->codepage); 590 smb_setcodepage(server, &mnt->codepage);
591 591
592 /* 592 /*
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index ee536e8a649..9468168b9af 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -864,7 +864,7 @@ smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
864 goto out; 864 goto out;
865 865
866 error = -EACCES; 866 error = -EACCES;
867 if (current->uid != server->mnt->mounted_uid && 867 if (current_uid() != server->mnt->mounted_uid &&
868 !capable(CAP_SYS_ADMIN)) 868 !capable(CAP_SYS_ADMIN))
869 goto out; 869 goto out;
870 870
diff --git a/fs/super.c b/fs/super.c
index 400a7608f15..ddba069d7a9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -914,7 +914,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
914 goto out_free_secdata; 914 goto out_free_secdata;
915 BUG_ON(!mnt->mnt_sb); 915 BUG_ON(!mnt->mnt_sb);
916 916
917 error = security_sb_kern_mount(mnt->mnt_sb, secdata); 917 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
918 if (error) 918 if (error)
919 goto out_sb; 919 goto out_sb;
920 920
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 115ab0d6f4b..241e9765cfa 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -165,9 +165,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
165 if (S_ISDIR(mode)) 165 if (S_ISDIR(mode))
166 mode |= S_ISGID; 166 mode |= S_ISGID;
167 } else 167 } else
168 inode->i_gid = current->fsgid; 168 inode->i_gid = current_fsgid();
169 169
170 inode->i_uid = current->fsuid; 170 inode->i_uid = current_fsuid();
171 inode->i_ino = fs16_to_cpu(sbi, ino); 171 inode->i_ino = fs16_to_cpu(sbi, ino);
172 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 172 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
173 inode->i_blocks = 0; 173 inode->i_blocks = 0;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index df0d435baa4..3d81bf58dae 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -27,6 +27,7 @@
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/vfs.h> 29#include <linux/vfs.h>
30#include <linux/namei.h>
30#include <asm/byteorder.h> 31#include <asm/byteorder.h>
31#include "sysv.h" 32#include "sysv.h"
32 33
@@ -163,8 +164,11 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)
163 if (inode->i_blocks) { 164 if (inode->i_blocks) {
164 inode->i_op = &sysv_symlink_inode_operations; 165 inode->i_op = &sysv_symlink_inode_operations;
165 inode->i_mapping->a_ops = &sysv_aops; 166 inode->i_mapping->a_ops = &sysv_aops;
166 } else 167 } else {
167 inode->i_op = &sysv_fast_symlink_inode_operations; 168 inode->i_op = &sysv_fast_symlink_inode_operations;
169 nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size,
170 sizeof(SYSV_I(inode)->i_data) - 1);
171 }
168 } else 172 } else
169 init_special_inode(inode, inode->i_mode, rdev); 173 init_special_inode(inode, inode->i_mode, rdev);
170} 174}
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 1a4973e1066..0e5e54d8292 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -32,18 +32,15 @@
32 32
33#include "ubifs.h" 33#include "ubifs.h"
34#include <linux/writeback.h> 34#include <linux/writeback.h>
35#include <asm/div64.h> 35#include <linux/math64.h>
36 36
37/* 37/*
38 * When pessimistic budget calculations say that there is no enough space, 38 * When pessimistic budget calculations say that there is no enough space,
39 * UBIFS starts writing back dirty inodes and pages, doing garbage collection, 39 * UBIFS starts writing back dirty inodes and pages, doing garbage collection,
40 * or committing. The below constants define maximum number of times UBIFS 40 * or committing. The below constant defines maximum number of times UBIFS
41 * repeats the operations. 41 * repeats the operations.
42 */ 42 */
43#define MAX_SHRINK_RETRIES 8 43#define MAX_MKSPC_RETRIES 3
44#define MAX_GC_RETRIES 4
45#define MAX_CMT_RETRIES 2
46#define MAX_NOSPC_RETRIES 1
47 44
48/* 45/*
49 * The below constant defines amount of dirty pages which should be written 46 * The below constant defines amount of dirty pages which should be written
@@ -52,30 +49,6 @@
52#define NR_TO_WRITE 16 49#define NR_TO_WRITE 16
53 50
54/** 51/**
55 * struct retries_info - information about re-tries while making free space.
56 * @prev_liability: previous liability
57 * @shrink_cnt: how many times the liability was shrinked
58 * @shrink_retries: count of liability shrink re-tries (increased when
59 * liability does not shrink)
60 * @try_gc: GC should be tried first
61 * @gc_retries: how many times GC was run
62 * @cmt_retries: how many times commit has been done
63 * @nospc_retries: how many times GC returned %-ENOSPC
64 *
65 * Since we consider budgeting to be the fast-path, and this structure has to
66 * be allocated on stack and zeroed out, we make it smaller using bit-fields.
67 */
68struct retries_info {
69 long long prev_liability;
70 unsigned int shrink_cnt;
71 unsigned int shrink_retries:5;
72 unsigned int try_gc:1;
73 unsigned int gc_retries:4;
74 unsigned int cmt_retries:3;
75 unsigned int nospc_retries:1;
76};
77
78/**
79 * shrink_liability - write-back some dirty pages/inodes. 52 * shrink_liability - write-back some dirty pages/inodes.
80 * @c: UBIFS file-system description object 53 * @c: UBIFS file-system description object
81 * @nr_to_write: how many dirty pages to write-back 54 * @nr_to_write: how many dirty pages to write-back
@@ -147,9 +120,25 @@ static int run_gc(struct ubifs_info *c)
147} 120}
148 121
149/** 122/**
123 * get_liability - calculate current liability.
124 * @c: UBIFS file-system description object
125 *
126 * This function calculates and returns current UBIFS liability, i.e. the
127 * amount of bytes UBIFS has "promised" to write to the media.
128 */
129static long long get_liability(struct ubifs_info *c)
130{
131 long long liab;
132
133 spin_lock(&c->space_lock);
134 liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth;
135 spin_unlock(&c->space_lock);
136 return liab;
137}
138
139/**
150 * make_free_space - make more free space on the file-system. 140 * make_free_space - make more free space on the file-system.
151 * @c: UBIFS file-system description object 141 * @c: UBIFS file-system description object
152 * @ri: information about previous invocations of this function
153 * 142 *
154 * This function is called when an operation cannot be budgeted because there 143 * This function is called when an operation cannot be budgeted because there
155 * is supposedly no free space. But in most cases there is some free space: 144 * is supposedly no free space. But in most cases there is some free space:
@@ -165,87 +154,42 @@ static int run_gc(struct ubifs_info *c)
165 * Returns %-ENOSPC if it couldn't do more free space, and other negative error 154 * Returns %-ENOSPC if it couldn't do more free space, and other negative error
166 * codes on failures. 155 * codes on failures.
167 */ 156 */
168static int make_free_space(struct ubifs_info *c, struct retries_info *ri) 157static int make_free_space(struct ubifs_info *c)
169{ 158{
170 int err; 159 int err, retries = 0;
171 160 long long liab1, liab2;
172 /*
173 * If we have some dirty pages and inodes (liability), try to write
174 * them back unless this was tried too many times without effect
175 * already.
176 */
177 if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) {
178 long long liability;
179
180 spin_lock(&c->space_lock);
181 liability = c->budg_idx_growth + c->budg_data_growth +
182 c->budg_dd_growth;
183 spin_unlock(&c->space_lock);
184 161
185 if (ri->prev_liability >= liability) { 162 do {
186 /* Liability does not shrink, next time try GC then */ 163 liab1 = get_liability(c);
187 ri->shrink_retries += 1; 164 /*
188 if (ri->gc_retries < MAX_GC_RETRIES) 165 * We probably have some dirty pages or inodes (liability), try
189 ri->try_gc = 1; 166 * to write them back.
190 dbg_budg("liability did not shrink: retries %d of %d", 167 */
191 ri->shrink_retries, MAX_SHRINK_RETRIES); 168 dbg_budg("liability %lld, run write-back", liab1);
192 } 169 shrink_liability(c, NR_TO_WRITE);
193 170
194 dbg_budg("force write-back (count %d)", ri->shrink_cnt); 171 liab2 = get_liability(c);
195 shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt); 172 if (liab2 < liab1)
173 return -EAGAIN;
196 174
197 ri->prev_liability = liability; 175 dbg_budg("new liability %lld (not shrinked)", liab2);
198 ri->shrink_cnt += 1;
199 return -EAGAIN;
200 }
201 176
202 /* 177 /* Liability did not shrink again, try GC */
203 * Try to run garbage collector unless it was already tried too many 178 dbg_budg("Run GC");
204 * times.
205 */
206 if (ri->gc_retries < MAX_GC_RETRIES) {
207 ri->gc_retries += 1;
208 dbg_budg("run GC, retries %d of %d",
209 ri->gc_retries, MAX_GC_RETRIES);
210
211 ri->try_gc = 0;
212 err = run_gc(c); 179 err = run_gc(c);
213 if (!err) 180 if (!err)
214 return -EAGAIN; 181 return -EAGAIN;
215 182
216 if (err == -EAGAIN) { 183 if (err != -EAGAIN && err != -ENOSPC)
217 dbg_budg("GC asked to commit"); 184 /* Some real error happened */
218 err = ubifs_run_commit(c);
219 if (err)
220 return err;
221 return -EAGAIN;
222 }
223
224 if (err != -ENOSPC)
225 return err;
226
227 /*
228 * GC could not make any progress. If this is the first time,
229 * then it makes sense to try to commit, because it might make
230 * some dirty space.
231 */
232 dbg_budg("GC returned -ENOSPC, retries %d",
233 ri->nospc_retries);
234 if (ri->nospc_retries >= MAX_NOSPC_RETRIES)
235 return err; 185 return err;
236 ri->nospc_retries += 1;
237 }
238 186
239 /* Neither GC nor write-back helped, try to commit */ 187 dbg_budg("Run commit (retries %d)", retries);
240 if (ri->cmt_retries < MAX_CMT_RETRIES) {
241 ri->cmt_retries += 1;
242 dbg_budg("run commit, retries %d of %d",
243 ri->cmt_retries, MAX_CMT_RETRIES);
244 err = ubifs_run_commit(c); 188 err = ubifs_run_commit(c);
245 if (err) 189 if (err)
246 return err; 190 return err;
247 return -EAGAIN; 191 } while (retries++ < MAX_MKSPC_RETRIES);
248 } 192
249 return -ENOSPC; 193 return -ENOSPC;
250} 194}
251 195
@@ -258,8 +202,8 @@ static int make_free_space(struct ubifs_info *c, struct retries_info *ri)
258 */ 202 */
259int ubifs_calc_min_idx_lebs(struct ubifs_info *c) 203int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
260{ 204{
261 int ret; 205 int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz;
262 uint64_t idx_size; 206 long long idx_size;
263 207
264 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; 208 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
265 209
@@ -271,23 +215,16 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
271 * pair, nor similarly the two variables for the new index size, so we 215 * pair, nor similarly the two variables for the new index size, so we
272 * have to do this costly 64-bit division on fast-path. 216 * have to do this costly 64-bit division on fast-path.
273 */ 217 */
274 if (do_div(idx_size, c->leb_size - c->max_idx_node_sz)) 218 idx_size += eff_leb_size - 1;
275 ret = idx_size + 1; 219 idx_lebs = div_u64(idx_size, eff_leb_size);
276 else
277 ret = idx_size;
278 /* 220 /*
279 * The index head is not available for the in-the-gaps method, so add an 221 * The index head is not available for the in-the-gaps method, so add an
280 * extra LEB to compensate. 222 * extra LEB to compensate.
281 */ 223 */
282 ret += 1; 224 idx_lebs += 1;
283 /* 225 if (idx_lebs < MIN_INDEX_LEBS)
284 * At present the index needs at least 2 LEBs: one for the index head 226 idx_lebs = MIN_INDEX_LEBS;
285 * and one for in-the-gaps method (which currently does not cater for 227 return idx_lebs;
286 * the index head and so excludes it from consideration).
287 */
288 if (ret < 2)
289 ret = 2;
290 return ret;
291} 228}
292 229
293/** 230/**
@@ -363,7 +300,7 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
363 */ 300 */
364static int can_use_rp(struct ubifs_info *c) 301static int can_use_rp(struct ubifs_info *c)
365{ 302{
366 if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) || 303 if (current_fsuid() == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
367 (c->rp_gid != 0 && in_group_p(c->rp_gid))) 304 (c->rp_gid != 0 && in_group_p(c->rp_gid)))
368 return 1; 305 return 1;
369 return 0; 306 return 0;
@@ -530,8 +467,7 @@ static int calc_dd_growth(const struct ubifs_info *c,
530int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) 467int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
531{ 468{
532 int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); 469 int uninitialized_var(cmt_retries), uninitialized_var(wb_retries);
533 int err, idx_growth, data_growth, dd_growth; 470 int err, idx_growth, data_growth, dd_growth, retried = 0;
534 struct retries_info ri;
535 471
536 ubifs_assert(req->new_page <= 1); 472 ubifs_assert(req->new_page <= 1);
537 ubifs_assert(req->dirtied_page <= 1); 473 ubifs_assert(req->dirtied_page <= 1);
@@ -549,7 +485,6 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
549 if (!data_growth && !dd_growth) 485 if (!data_growth && !dd_growth)
550 return 0; 486 return 0;
551 idx_growth = calc_idx_growth(c, req); 487 idx_growth = calc_idx_growth(c, req);
552 memset(&ri, 0, sizeof(struct retries_info));
553 488
554again: 489again:
555 spin_lock(&c->space_lock); 490 spin_lock(&c->space_lock);
@@ -587,12 +522,17 @@ again:
587 return err; 522 return err;
588 } 523 }
589 524
590 err = make_free_space(c, &ri); 525 err = make_free_space(c);
526 cond_resched();
591 if (err == -EAGAIN) { 527 if (err == -EAGAIN) {
592 dbg_budg("try again"); 528 dbg_budg("try again");
593 cond_resched();
594 goto again; 529 goto again;
595 } else if (err == -ENOSPC) { 530 } else if (err == -ENOSPC) {
531 if (!retried) {
532 retried = 1;
533 dbg_budg("-ENOSPC, but anyway try once again");
534 goto again;
535 }
596 dbg_budg("FS is full, -ENOSPC"); 536 dbg_budg("FS is full, -ENOSPC");
597 c->nospace = 1; 537 c->nospace = 1;
598 if (can_use_rp(c) || c->rp_size == 0) 538 if (can_use_rp(c) || c->rp_size == 0)
@@ -712,9 +652,9 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
712 * user-space. User-space application tend to expect that if the file-system 652 * user-space. User-space application tend to expect that if the file-system
713 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they 653 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
714 * are able to write a file of size N. UBIFS attaches node headers to each data 654 * are able to write a file of size N. UBIFS attaches node headers to each data
715 * node and it has to write indexind nodes as well. This introduces additional 655 * node and it has to write indexing nodes as well. This introduces additional
716 * overhead, and UBIFS it has to report sligtly less free space to meet the 656 * overhead, and UBIFS has to report slightly less free space to meet the above
717 * above expectetion. 657 * expectations.
718 * 658 *
719 * This function assumes free space is made up of uncompressed data nodes and 659 * This function assumes free space is made up of uncompressed data nodes and
720 * full index nodes (one per data node, tripled because we always allow enough 660 * full index nodes (one per data node, tripled because we always allow enough
@@ -723,7 +663,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
723 * Note, the calculation is pessimistic, which means that most of the time 663 * Note, the calculation is pessimistic, which means that most of the time
724 * UBIFS reports less space than it actually has. 664 * UBIFS reports less space than it actually has.
725 */ 665 */
726long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) 666long long ubifs_reported_space(const struct ubifs_info *c, long long free)
727{ 667{
728 int divisor, factor, f; 668 int divisor, factor, f;
729 669
@@ -737,7 +677,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
737 * of data nodes, f - fanout. Because effective UBIFS fanout is twice 677 * of data nodes, f - fanout. Because effective UBIFS fanout is twice
738 * as less than maximum fanout, we assume that each data node 678 * as less than maximum fanout, we assume that each data node
739 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. 679 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
740 * Note, the multiplier 3 is because UBIFS reseves thrice as more space 680 * Note, the multiplier 3 is because UBIFS reserves thrice as more space
741 * for the index. 681 * for the index.
742 */ 682 */
743 f = c->fanout > 3 ? c->fanout >> 1 : 2; 683 f = c->fanout > 3 ? c->fanout >> 1 : 2;
@@ -745,8 +685,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
745 divisor = UBIFS_MAX_DATA_NODE_SZ; 685 divisor = UBIFS_MAX_DATA_NODE_SZ;
746 divisor += (c->max_idx_node_sz * 3) / (f - 1); 686 divisor += (c->max_idx_node_sz * 3) / (f - 1);
747 free *= factor; 687 free *= factor;
748 do_div(free, divisor); 688 return div_u64(free, divisor);
749 return free;
750} 689}
751 690
752/** 691/**
@@ -756,10 +695,10 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
756 * This function calculates amount of free space to report to user-space. 695 * This function calculates amount of free space to report to user-space.
757 * 696 *
758 * Because UBIFS may introduce substantial overhead (the index, node headers, 697 * Because UBIFS may introduce substantial overhead (the index, node headers,
759 * alighment, wastage at the end of eraseblocks, etc), it cannot report real 698 * alignment, wastage at the end of eraseblocks, etc), it cannot report real
760 * amount of free flash space it has (well, because not all dirty space is 699 * amount of free flash space it has (well, because not all dirty space is
761 * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, 700 * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so,
762 * it would bread user expectetion about what free space is. Users seem to 701 * it would bread user expectations about what free space is. Users seem to
763 * accustomed to assume that if the file-system reports N bytes of free space, 702 * accustomed to assume that if the file-system reports N bytes of free space,
764 * they would be able to fit a file of N bytes to the FS. This almost works for 703 * they would be able to fit a file of N bytes to the FS. This almost works for
765 * traditional file-systems, because they have way less overhead than UBIFS. 704 * traditional file-systems, because they have way less overhead than UBIFS.
@@ -771,18 +710,9 @@ long long ubifs_get_free_space(struct ubifs_info *c)
771 long long available, outstanding, free; 710 long long available, outstanding, free;
772 711
773 spin_lock(&c->space_lock); 712 spin_lock(&c->space_lock);
774 min_idx_lebs = ubifs_calc_min_idx_lebs(c); 713 min_idx_lebs = c->min_idx_lebs;
714 ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c));
775 outstanding = c->budg_data_growth + c->budg_dd_growth; 715 outstanding = c->budg_data_growth + c->budg_dd_growth;
776
777 /*
778 * Force the amount available to the total size reported if the used
779 * space is zero.
780 */
781 if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) {
782 spin_unlock(&c->space_lock);
783 return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT;
784 }
785
786 available = ubifs_calc_available(c, min_idx_lebs); 716 available = ubifs_calc_available(c, min_idx_lebs);
787 717
788 /* 718 /*
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index b49884c8c10..f3a7945527f 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -470,12 +470,12 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
470{ 470{
471 struct ubifs_idx_node *idx; 471 struct ubifs_idx_node *idx;
472 int lnum, offs, len, err = 0; 472 int lnum, offs, len, err = 0;
473 struct ubifs_debug_info *d = c->dbg;
473 474
474 c->old_zroot = *zroot; 475 d->old_zroot = *zroot;
475 476 lnum = d->old_zroot.lnum;
476 lnum = c->old_zroot.lnum; 477 offs = d->old_zroot.offs;
477 offs = c->old_zroot.offs; 478 len = d->old_zroot.len;
478 len = c->old_zroot.len;
479 479
480 idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); 480 idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
481 if (!idx) 481 if (!idx)
@@ -485,8 +485,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
485 if (err) 485 if (err)
486 goto out; 486 goto out;
487 487
488 c->old_zroot_level = le16_to_cpu(idx->level); 488 d->old_zroot_level = le16_to_cpu(idx->level);
489 c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); 489 d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
490out: 490out:
491 kfree(idx); 491 kfree(idx);
492 return err; 492 return err;
@@ -509,6 +509,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
509{ 509{
510 int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; 510 int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
511 int first = 1, iip; 511 int first = 1, iip;
512 struct ubifs_debug_info *d = c->dbg;
512 union ubifs_key lower_key, upper_key, l_key, u_key; 513 union ubifs_key lower_key, upper_key, l_key, u_key;
513 unsigned long long uninitialized_var(last_sqnum); 514 unsigned long long uninitialized_var(last_sqnum);
514 struct ubifs_idx_node *idx; 515 struct ubifs_idx_node *idx;
@@ -525,9 +526,9 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
525 UBIFS_IDX_NODE_SZ; 526 UBIFS_IDX_NODE_SZ;
526 527
527 /* Start at the old zroot */ 528 /* Start at the old zroot */
528 lnum = c->old_zroot.lnum; 529 lnum = d->old_zroot.lnum;
529 offs = c->old_zroot.offs; 530 offs = d->old_zroot.offs;
530 len = c->old_zroot.len; 531 len = d->old_zroot.len;
531 iip = 0; 532 iip = 0;
532 533
533 /* 534 /*
@@ -560,11 +561,11 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
560 if (first) { 561 if (first) {
561 first = 0; 562 first = 0;
562 /* Check root level and sqnum */ 563 /* Check root level and sqnum */
563 if (le16_to_cpu(idx->level) != c->old_zroot_level) { 564 if (le16_to_cpu(idx->level) != d->old_zroot_level) {
564 err = 2; 565 err = 2;
565 goto out_dump; 566 goto out_dump;
566 } 567 }
567 if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) { 568 if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) {
568 err = 3; 569 err = 3;
569 goto out_dump; 570 goto out_dump;
570 } 571 }
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index a0ada596b17..11e4132f314 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -33,7 +33,7 @@
33/* Fake description object for the "none" compressor */ 33/* Fake description object for the "none" compressor */
34static struct ubifs_compressor none_compr = { 34static struct ubifs_compressor none_compr = {
35 .compr_type = UBIFS_COMPR_NONE, 35 .compr_type = UBIFS_COMPR_NONE,
36 .name = "no compression", 36 .name = "none",
37 .capi_name = "", 37 .capi_name = "",
38}; 38};
39 39
@@ -43,13 +43,13 @@ static DEFINE_MUTEX(lzo_mutex);
43static struct ubifs_compressor lzo_compr = { 43static struct ubifs_compressor lzo_compr = {
44 .compr_type = UBIFS_COMPR_LZO, 44 .compr_type = UBIFS_COMPR_LZO,
45 .comp_mutex = &lzo_mutex, 45 .comp_mutex = &lzo_mutex,
46 .name = "LZO", 46 .name = "lzo",
47 .capi_name = "lzo", 47 .capi_name = "lzo",
48}; 48};
49#else 49#else
50static struct ubifs_compressor lzo_compr = { 50static struct ubifs_compressor lzo_compr = {
51 .compr_type = UBIFS_COMPR_LZO, 51 .compr_type = UBIFS_COMPR_LZO,
52 .name = "LZO", 52 .name = "lzo",
53}; 53};
54#endif 54#endif
55 55
@@ -108,7 +108,7 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
108 if (compr->comp_mutex) 108 if (compr->comp_mutex)
109 mutex_lock(compr->comp_mutex); 109 mutex_lock(compr->comp_mutex);
110 err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf, 110 err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf,
111 out_len); 111 (unsigned int *)out_len);
112 if (compr->comp_mutex) 112 if (compr->comp_mutex)
113 mutex_unlock(compr->comp_mutex); 113 mutex_unlock(compr->comp_mutex);
114 if (unlikely(err)) { 114 if (unlikely(err)) {
@@ -119,10 +119,10 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
119 } 119 }
120 120
121 /* 121 /*
122 * Presently, we just require that compression results in less data, 122 * If the data compressed only slightly, it is better to leave it
123 * rather than any defined minimum compression ratio or amount. 123 * uncompressed to improve read speed.
124 */ 124 */
125 if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8)) 125 if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF)
126 goto no_compr; 126 goto no_compr;
127 127
128 return; 128 return;
@@ -172,7 +172,7 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
172 if (compr->decomp_mutex) 172 if (compr->decomp_mutex)
173 mutex_lock(compr->decomp_mutex); 173 mutex_lock(compr->decomp_mutex);
174 err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, 174 err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf,
175 out_len); 175 (unsigned int *)out_len);
176 if (compr->decomp_mutex) 176 if (compr->decomp_mutex)
177 mutex_unlock(compr->decomp_mutex); 177 mutex_unlock(compr->decomp_mutex);
178 if (err) 178 if (err)
@@ -244,7 +244,7 @@ out_lzo:
244/** 244/**
245 * ubifs_compressors_exit - de-initialize UBIFS compressors. 245 * ubifs_compressors_exit - de-initialize UBIFS compressors.
246 */ 246 */
247void __exit ubifs_compressors_exit(void) 247void ubifs_compressors_exit(void)
248{ 248{
249 compr_exit(&lzo_compr); 249 compr_exit(&lzo_compr);
250 compr_exit(&zlib_compr); 250 compr_exit(&zlib_compr);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 510ffa0bbda..792c5a16c18 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -32,6 +32,8 @@
32#include "ubifs.h" 32#include "ubifs.h"
33#include <linux/module.h> 33#include <linux/module.h>
34#include <linux/moduleparam.h> 34#include <linux/moduleparam.h>
35#include <linux/debugfs.h>
36#include <linux/math64.h>
35 37
36#ifdef CONFIG_UBIFS_FS_DEBUG 38#ifdef CONFIG_UBIFS_FS_DEBUG
37 39
@@ -596,7 +598,9 @@ void dbg_dump_budg(struct ubifs_info *c)
596 struct rb_node *rb; 598 struct rb_node *rb;
597 struct ubifs_bud *bud; 599 struct ubifs_bud *bud;
598 struct ubifs_gced_idx_leb *idx_gc; 600 struct ubifs_gced_idx_leb *idx_gc;
601 long long available, outstanding, free;
599 602
603 ubifs_assert(spin_is_locked(&c->space_lock));
600 spin_lock(&dbg_lock); 604 spin_lock(&dbg_lock);
601 printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " 605 printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
602 "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, 606 "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
@@ -629,6 +633,17 @@ void dbg_dump_budg(struct ubifs_info *c)
629 printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", 633 printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n",
630 idx_gc->lnum, idx_gc->unmap); 634 idx_gc->lnum, idx_gc->unmap);
631 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); 635 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
636
637 /* Print budgeting predictions */
638 available = ubifs_calc_available(c, c->min_idx_lebs);
639 outstanding = c->budg_data_growth + c->budg_dd_growth;
640 if (available > outstanding)
641 free = ubifs_reported_space(c, available - outstanding);
642 else
643 free = 0;
644 printk(KERN_DEBUG "Budgeting predictions:\n");
645 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
646 available, outstanding, free);
632 spin_unlock(&dbg_lock); 647 spin_unlock(&dbg_lock);
633} 648}
634 649
@@ -645,7 +660,8 @@ void dbg_dump_lprops(struct ubifs_info *c)
645 struct ubifs_lprops lp; 660 struct ubifs_lprops lp;
646 struct ubifs_lp_stats lst; 661 struct ubifs_lp_stats lst;
647 662
648 printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid); 663 printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n",
664 current->pid);
649 ubifs_get_lp_stats(c, &lst); 665 ubifs_get_lp_stats(c, &lst);
650 dbg_dump_lstats(&lst); 666 dbg_dump_lstats(&lst);
651 667
@@ -656,6 +672,8 @@ void dbg_dump_lprops(struct ubifs_info *c)
656 672
657 dbg_dump_lprop(c, &lp); 673 dbg_dump_lprop(c, &lp);
658 } 674 }
675 printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n",
676 current->pid);
659} 677}
660 678
661void dbg_dump_lpt_info(struct ubifs_info *c) 679void dbg_dump_lpt_info(struct ubifs_info *c)
@@ -663,6 +681,7 @@ void dbg_dump_lpt_info(struct ubifs_info *c)
663 int i; 681 int i;
664 682
665 spin_lock(&dbg_lock); 683 spin_lock(&dbg_lock);
684 printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid);
666 printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); 685 printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz);
667 printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); 686 printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz);
668 printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); 687 printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz);
@@ -684,7 +703,8 @@ void dbg_dump_lpt_info(struct ubifs_info *c)
684 printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); 703 printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
685 printk(KERN_DEBUG "\tLPT head is at %d:%d\n", 704 printk(KERN_DEBUG "\tLPT head is at %d:%d\n",
686 c->nhead_lnum, c->nhead_offs); 705 c->nhead_lnum, c->nhead_offs);
687 printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); 706 printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n",
707 c->ltab_lnum, c->ltab_offs);
688 if (c->big_lpt) 708 if (c->big_lpt)
689 printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", 709 printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n",
690 c->lsave_lnum, c->lsave_offs); 710 c->lsave_lnum, c->lsave_offs);
@@ -703,9 +723,9 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
703 if (dbg_failure_mode) 723 if (dbg_failure_mode)
704 return; 724 return;
705 725
706 printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum); 726 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
707 727 current->pid, lnum);
708 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); 728 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
709 if (IS_ERR(sleb)) { 729 if (IS_ERR(sleb)) {
710 ubifs_err("scan error %d", (int)PTR_ERR(sleb)); 730 ubifs_err("scan error %d", (int)PTR_ERR(sleb));
711 return; 731 return;
@@ -721,6 +741,8 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
721 dbg_dump_node(c, snod->node); 741 dbg_dump_node(c, snod->node);
722 } 742 }
723 743
744 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
745 current->pid, lnum);
724 ubifs_scan_destroy(sleb); 746 ubifs_scan_destroy(sleb);
725 return; 747 return;
726} 748}
@@ -768,7 +790,7 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
768{ 790{
769 int i; 791 int i;
770 792
771 printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n", 793 printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n",
772 current->pid, cat, heap->cnt); 794 current->pid, cat, heap->cnt);
773 for (i = 0; i < heap->cnt; i++) { 795 for (i = 0; i < heap->cnt; i++) {
774 struct ubifs_lprops *lprops = heap->arr[i]; 796 struct ubifs_lprops *lprops = heap->arr[i];
@@ -777,6 +799,7 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
777 "flags %d\n", i, lprops->lnum, lprops->hpos, 799 "flags %d\n", i, lprops->lnum, lprops->hpos,
778 lprops->free, lprops->dirty, lprops->flags); 800 lprops->free, lprops->dirty, lprops->flags);
779 } 801 }
802 printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid);
780} 803}
781 804
782void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, 805void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
@@ -784,7 +807,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
784{ 807{
785 int i; 808 int i;
786 809
787 printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid); 810 printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid);
788 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", 811 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
789 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); 812 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
790 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", 813 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
@@ -803,7 +826,7 @@ void dbg_dump_tnc(struct ubifs_info *c)
803 int level; 826 int level;
804 827
805 printk(KERN_DEBUG "\n"); 828 printk(KERN_DEBUG "\n");
806 printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid); 829 printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid);
807 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); 830 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
808 level = znode->level; 831 level = znode->level;
809 printk(KERN_DEBUG "== Level %d ==\n", level); 832 printk(KERN_DEBUG "== Level %d ==\n", level);
@@ -815,8 +838,7 @@ void dbg_dump_tnc(struct ubifs_info *c)
815 dbg_dump_znode(c, znode); 838 dbg_dump_znode(c, znode);
816 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); 839 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
817 } 840 }
818 841 printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid);
819 printk(KERN_DEBUG "\n");
820} 842}
821 843
822static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, 844static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
@@ -992,8 +1014,8 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
992 zbr1->offs, DBGKEY(&key)); 1014 zbr1->offs, DBGKEY(&key));
993 dbg_err("but it should have key %s according to tnc", 1015 dbg_err("but it should have key %s according to tnc",
994 DBGKEY(&zbr1->key)); 1016 DBGKEY(&zbr1->key));
995 dbg_dump_node(c, dent1); 1017 dbg_dump_node(c, dent1);
996 goto out_free; 1018 goto out_free;
997 } 1019 }
998 1020
999 key_read(c, &dent2->key, &key); 1021 key_read(c, &dent2->key, &key);
@@ -1002,8 +1024,8 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1002 zbr1->offs, DBGKEY(&key)); 1024 zbr1->offs, DBGKEY(&key));
1003 dbg_err("but it should have key %s according to tnc", 1025 dbg_err("but it should have key %s according to tnc",
1004 DBGKEY(&zbr2->key)); 1026 DBGKEY(&zbr2->key));
1005 dbg_dump_node(c, dent2); 1027 dbg_dump_node(c, dent2);
1006 goto out_free; 1028 goto out_free;
1007 } 1029 }
1008 1030
1009 nlen1 = le16_to_cpu(dent1->nlen); 1031 nlen1 = le16_to_cpu(dent1->nlen);
@@ -1020,9 +1042,9 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1020 dbg_err("bad order of colliding key %s", 1042 dbg_err("bad order of colliding key %s",
1021 DBGKEY(&key)); 1043 DBGKEY(&key));
1022 1044
1023 dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); 1045 ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
1024 dbg_dump_node(c, dent1); 1046 dbg_dump_node(c, dent1);
1025 dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); 1047 ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs);
1026 dbg_dump_node(c, dent2); 1048 dbg_dump_node(c, dent2);
1027 1049
1028out_free: 1050out_free:
@@ -2097,13 +2119,13 @@ static int simple_rand(void)
2097 return (next >> 16) & 32767; 2119 return (next >> 16) & 32767;
2098} 2120}
2099 2121
2100void dbg_failure_mode_registration(struct ubifs_info *c) 2122static void failure_mode_init(struct ubifs_info *c)
2101{ 2123{
2102 struct failure_mode_info *fmi; 2124 struct failure_mode_info *fmi;
2103 2125
2104 fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); 2126 fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
2105 if (!fmi) { 2127 if (!fmi) {
2106 dbg_err("Failed to register failure mode - no memory"); 2128 ubifs_err("Failed to register failure mode - no memory");
2107 return; 2129 return;
2108 } 2130 }
2109 fmi->c = c; 2131 fmi->c = c;
@@ -2112,7 +2134,7 @@ void dbg_failure_mode_registration(struct ubifs_info *c)
2112 spin_unlock(&fmi_lock); 2134 spin_unlock(&fmi_lock);
2113} 2135}
2114 2136
2115void dbg_failure_mode_deregistration(struct ubifs_info *c) 2137static void failure_mode_exit(struct ubifs_info *c)
2116{ 2138{
2117 struct failure_mode_info *fmi, *tmp; 2139 struct failure_mode_info *fmi, *tmp;
2118 2140
@@ -2146,42 +2168,44 @@ static int in_failure_mode(struct ubi_volume_desc *desc)
2146 struct ubifs_info *c = dbg_find_info(desc); 2168 struct ubifs_info *c = dbg_find_info(desc);
2147 2169
2148 if (c && dbg_failure_mode) 2170 if (c && dbg_failure_mode)
2149 return c->failure_mode; 2171 return c->dbg->failure_mode;
2150 return 0; 2172 return 0;
2151} 2173}
2152 2174
2153static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) 2175static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
2154{ 2176{
2155 struct ubifs_info *c = dbg_find_info(desc); 2177 struct ubifs_info *c = dbg_find_info(desc);
2178 struct ubifs_debug_info *d;
2156 2179
2157 if (!c || !dbg_failure_mode) 2180 if (!c || !dbg_failure_mode)
2158 return 0; 2181 return 0;
2159 if (c->failure_mode) 2182 d = c->dbg;
2183 if (d->failure_mode)
2160 return 1; 2184 return 1;
2161 if (!c->fail_cnt) { 2185 if (!d->fail_cnt) {
2162 /* First call - decide delay to failure */ 2186 /* First call - decide delay to failure */
2163 if (chance(1, 2)) { 2187 if (chance(1, 2)) {
2164 unsigned int delay = 1 << (simple_rand() >> 11); 2188 unsigned int delay = 1 << (simple_rand() >> 11);
2165 2189
2166 if (chance(1, 2)) { 2190 if (chance(1, 2)) {
2167 c->fail_delay = 1; 2191 d->fail_delay = 1;
2168 c->fail_timeout = jiffies + 2192 d->fail_timeout = jiffies +
2169 msecs_to_jiffies(delay); 2193 msecs_to_jiffies(delay);
2170 dbg_rcvry("failing after %ums", delay); 2194 dbg_rcvry("failing after %ums", delay);
2171 } else { 2195 } else {
2172 c->fail_delay = 2; 2196 d->fail_delay = 2;
2173 c->fail_cnt_max = delay; 2197 d->fail_cnt_max = delay;
2174 dbg_rcvry("failing after %u calls", delay); 2198 dbg_rcvry("failing after %u calls", delay);
2175 } 2199 }
2176 } 2200 }
2177 c->fail_cnt += 1; 2201 d->fail_cnt += 1;
2178 } 2202 }
2179 /* Determine if failure delay has expired */ 2203 /* Determine if failure delay has expired */
2180 if (c->fail_delay == 1) { 2204 if (d->fail_delay == 1) {
2181 if (time_before(jiffies, c->fail_timeout)) 2205 if (time_before(jiffies, d->fail_timeout))
2182 return 0; 2206 return 0;
2183 } else if (c->fail_delay == 2) 2207 } else if (d->fail_delay == 2)
2184 if (c->fail_cnt++ < c->fail_cnt_max) 2208 if (d->fail_cnt++ < d->fail_cnt_max)
2185 return 0; 2209 return 0;
2186 if (lnum == UBIFS_SB_LNUM) { 2210 if (lnum == UBIFS_SB_LNUM) {
2187 if (write) { 2211 if (write) {
@@ -2239,7 +2263,7 @@ static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
2239 dbg_rcvry("failing in bud LEB %d commit not running", lnum); 2263 dbg_rcvry("failing in bud LEB %d commit not running", lnum);
2240 } 2264 }
2241 ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); 2265 ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum);
2242 c->failure_mode = 1; 2266 d->failure_mode = 1;
2243 dump_stack(); 2267 dump_stack();
2244 return 1; 2268 return 1;
2245} 2269}
@@ -2344,4 +2368,181 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
2344 return 0; 2368 return 0;
2345} 2369}
2346 2370
2371/**
2372 * ubifs_debugging_init - initialize UBIFS debugging.
2373 * @c: UBIFS file-system description object
2374 *
2375 * This function initializes debugging-related data for the file system.
2376 * Returns zero in case of success and a negative error code in case of
2377 * failure.
2378 */
2379int ubifs_debugging_init(struct ubifs_info *c)
2380{
2381 c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
2382 if (!c->dbg)
2383 return -ENOMEM;
2384
2385 c->dbg->buf = vmalloc(c->leb_size);
2386 if (!c->dbg->buf)
2387 goto out;
2388
2389 failure_mode_init(c);
2390 return 0;
2391
2392out:
2393 kfree(c->dbg);
2394 return -ENOMEM;
2395}
2396
2397/**
2398 * ubifs_debugging_exit - free debugging data.
2399 * @c: UBIFS file-system description object
2400 */
2401void ubifs_debugging_exit(struct ubifs_info *c)
2402{
2403 failure_mode_exit(c);
2404 vfree(c->dbg->buf);
2405 kfree(c->dbg);
2406}
2407
2408/*
2409 * Root directory for UBIFS stuff in debugfs. Contains sub-directories which
2410 * contain the stuff specific to particular file-system mounts.
2411 */
2412static struct dentry *debugfs_rootdir;
2413
2414/**
2415 * dbg_debugfs_init - initialize debugfs file-system.
2416 *
2417 * UBIFS uses debugfs file-system to expose various debugging knobs to
2418 * user-space. This function creates "ubifs" directory in the debugfs
2419 * file-system. Returns zero in case of success and a negative error code in
2420 * case of failure.
2421 */
2422int dbg_debugfs_init(void)
2423{
2424 debugfs_rootdir = debugfs_create_dir("ubifs", NULL);
2425 if (IS_ERR(debugfs_rootdir)) {
2426 int err = PTR_ERR(debugfs_rootdir);
2427 ubifs_err("cannot create \"ubifs\" debugfs directory, "
2428 "error %d\n", err);
2429 return err;
2430 }
2431
2432 return 0;
2433}
2434
2435/**
2436 * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
2437 */
2438void dbg_debugfs_exit(void)
2439{
2440 debugfs_remove(debugfs_rootdir);
2441}
2442
2443static int open_debugfs_file(struct inode *inode, struct file *file)
2444{
2445 file->private_data = inode->i_private;
2446 return 0;
2447}
2448
2449static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
2450 size_t count, loff_t *ppos)
2451{
2452 struct ubifs_info *c = file->private_data;
2453 struct ubifs_debug_info *d = c->dbg;
2454
2455 if (file->f_path.dentry == d->dump_lprops)
2456 dbg_dump_lprops(c);
2457 else if (file->f_path.dentry == d->dump_budg) {
2458 spin_lock(&c->space_lock);
2459 dbg_dump_budg(c);
2460 spin_unlock(&c->space_lock);
2461 } else if (file->f_path.dentry == d->dump_tnc) {
2462 mutex_lock(&c->tnc_mutex);
2463 dbg_dump_tnc(c);
2464 mutex_unlock(&c->tnc_mutex);
2465 } else
2466 return -EINVAL;
2467
2468 *ppos += count;
2469 return count;
2470}
2471
2472static const struct file_operations debugfs_fops = {
2473 .open = open_debugfs_file,
2474 .write = write_debugfs_file,
2475 .owner = THIS_MODULE,
2476};
2477
2478/**
2479 * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance.
2480 * @c: UBIFS file-system description object
2481 *
2482 * This function creates all debugfs files for this instance of UBIFS. Returns
2483 * zero in case of success and a negative error code in case of failure.
2484 *
2485 * Note, the only reason we have not merged this function with the
2486 * 'ubifs_debugging_init()' function is because it is better to initialize
2487 * debugfs interfaces at the very end of the mount process, and remove them at
2488 * the very beginning of the mount process.
2489 */
2490int dbg_debugfs_init_fs(struct ubifs_info *c)
2491{
2492 int err;
2493 const char *fname;
2494 struct dentry *dent;
2495 struct ubifs_debug_info *d = c->dbg;
2496
2497 sprintf(d->debugfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2498 d->debugfs_dir = debugfs_create_dir(d->debugfs_dir_name,
2499 debugfs_rootdir);
2500 if (IS_ERR(d->debugfs_dir)) {
2501 err = PTR_ERR(d->debugfs_dir);
2502 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2503 d->debugfs_dir_name, err);
2504 goto out;
2505 }
2506
2507 fname = "dump_lprops";
2508 dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
2509 &debugfs_fops);
2510 if (IS_ERR(dent))
2511 goto out_remove;
2512 d->dump_lprops = dent;
2513
2514 fname = "dump_budg";
2515 dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
2516 &debugfs_fops);
2517 if (IS_ERR(dent))
2518 goto out_remove;
2519 d->dump_budg = dent;
2520
2521 fname = "dump_tnc";
2522 dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
2523 &debugfs_fops);
2524 if (IS_ERR(dent))
2525 goto out_remove;
2526 d->dump_tnc = dent;
2527
2528 return 0;
2529
2530out_remove:
2531 err = PTR_ERR(dent);
2532 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2533 fname, err);
2534 debugfs_remove_recursive(d->debugfs_dir);
2535out:
2536 return err;
2537}
2538
2539/**
2540 * dbg_debugfs_exit_fs - remove all debugfs files.
2541 * @c: UBIFS file-system description object
2542 */
2543void dbg_debugfs_exit_fs(struct ubifs_info *c)
2544{
2545 debugfs_remove_recursive(c->dbg->debugfs_dir);
2546}
2547
2347#endif /* CONFIG_UBIFS_FS_DEBUG */ 2548#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 33d6b95071e..9820d6999f7 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -25,7 +25,56 @@
25 25
26#ifdef CONFIG_UBIFS_FS_DEBUG 26#ifdef CONFIG_UBIFS_FS_DEBUG
27 27
28#define UBIFS_DBG(op) op 28/**
29 * ubifs_debug_info - per-FS debugging information.
30 * @buf: a buffer of LEB size, used for various purposes
31 * @old_zroot: old index root - used by 'dbg_check_old_index()'
32 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
33 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
34 * @failure_mode: failure mode for recovery testing
35 * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
36 * @fail_timeout: time in jiffies when delay of failure mode expires
37 * @fail_cnt: current number of calls to failure mode I/O functions
38 * @fail_cnt_max: number of calls by which to delay failure mode
39 * @chk_lpt_sz: used by LPT tree size checker
40 * @chk_lpt_sz2: used by LPT tree size checker
41 * @chk_lpt_wastage: used by LPT tree size checker
42 * @chk_lpt_lebs: used by LPT tree size checker
43 * @new_nhead_offs: used by LPT tree size checker
44 * @new_ihead_lnum: used by debugging to check ihead_lnum
45 * @new_ihead_offs: used by debugging to check ihead_offs
46 *
47 * debugfs_dir_name: name of debugfs directory containing this file-system's
48 * files
49 * debugfs_dir: direntry object of the file-system debugfs directory
50 * dump_lprops: "dump lprops" debugfs knob
51 * dump_budg: "dump budgeting information" debugfs knob
52 * dump_tnc: "dump TNC" debugfs knob
53 */
54struct ubifs_debug_info {
55 void *buf;
56 struct ubifs_zbranch old_zroot;
57 int old_zroot_level;
58 unsigned long long old_zroot_sqnum;
59 int failure_mode;
60 int fail_delay;
61 unsigned long fail_timeout;
62 unsigned int fail_cnt;
63 unsigned int fail_cnt_max;
64 long long chk_lpt_sz;
65 long long chk_lpt_sz2;
66 long long chk_lpt_wastage;
67 int chk_lpt_lebs;
68 int new_nhead_offs;
69 int new_ihead_lnum;
70 int new_ihead_offs;
71
72 char debugfs_dir_name[100];
73 struct dentry *debugfs_dir;
74 struct dentry *dump_lprops;
75 struct dentry *dump_budg;
76 struct dentry *dump_tnc;
77};
29 78
30#define ubifs_assert(expr) do { \ 79#define ubifs_assert(expr) do { \
31 if (unlikely(!(expr))) { \ 80 if (unlikely(!(expr))) { \
@@ -211,14 +260,18 @@ extern unsigned int ubifs_msg_flags;
211extern unsigned int ubifs_chk_flags; 260extern unsigned int ubifs_chk_flags;
212extern unsigned int ubifs_tst_flags; 261extern unsigned int ubifs_tst_flags;
213 262
214/* Dump functions */ 263int ubifs_debugging_init(struct ubifs_info *c);
264void ubifs_debugging_exit(struct ubifs_info *c);
215 265
266/* Dump functions */
216const char *dbg_ntype(int type); 267const char *dbg_ntype(int type);
217const char *dbg_cstate(int cmt_state); 268const char *dbg_cstate(int cmt_state);
218const char *dbg_get_key_dump(const struct ubifs_info *c, 269const char *dbg_get_key_dump(const struct ubifs_info *c,
219 const union ubifs_key *key); 270 const union ubifs_key *key);
220void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); 271void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
221void dbg_dump_node(const struct ubifs_info *c, const void *node); 272void dbg_dump_node(const struct ubifs_info *c, const void *node);
273void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
274 int offs);
222void dbg_dump_budget_req(const struct ubifs_budget_req *req); 275void dbg_dump_budget_req(const struct ubifs_budget_req *req);
223void dbg_dump_lstats(const struct ubifs_lp_stats *lst); 276void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
224void dbg_dump_budg(struct ubifs_info *c); 277void dbg_dump_budg(struct ubifs_info *c);
@@ -233,9 +286,9 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
233 struct ubifs_nnode *parent, int iip); 286 struct ubifs_nnode *parent, int iip);
234void dbg_dump_tnc(struct ubifs_info *c); 287void dbg_dump_tnc(struct ubifs_info *c);
235void dbg_dump_index(struct ubifs_info *c); 288void dbg_dump_index(struct ubifs_info *c);
289void dbg_dump_lpt_lebs(const struct ubifs_info *c);
236 290
237/* Checking helper functions */ 291/* Checking helper functions */
238
239typedef int (*dbg_leaf_callback)(struct ubifs_info *c, 292typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
240 struct ubifs_zbranch *zbr, void *priv); 293 struct ubifs_zbranch *zbr, void *priv);
241typedef int (*dbg_znode_callback)(struct ubifs_info *c, 294typedef int (*dbg_znode_callback)(struct ubifs_info *c,
@@ -274,9 +327,6 @@ int dbg_force_in_the_gaps(void);
274 327
275#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) 328#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
276 329
277void dbg_failure_mode_registration(struct ubifs_info *c);
278void dbg_failure_mode_deregistration(struct ubifs_info *c);
279
280#ifndef UBIFS_DBG_PRESERVE_UBI 330#ifndef UBIFS_DBG_PRESERVE_UBI
281 331
282#define ubi_leb_read dbg_leb_read 332#define ubi_leb_read dbg_leb_read
@@ -318,9 +368,13 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
318 return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); 368 return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
319} 369}
320 370
321#else /* !CONFIG_UBIFS_FS_DEBUG */ 371/* Debugfs-related stuff */
372int dbg_debugfs_init(void);
373void dbg_debugfs_exit(void);
374int dbg_debugfs_init_fs(struct ubifs_info *c);
375void dbg_debugfs_exit_fs(struct ubifs_info *c);
322 376
323#define UBIFS_DBG(op) 377#else /* !CONFIG_UBIFS_FS_DEBUG */
324 378
325/* Use "if (0)" to make compiler check arguments even if debugging is off */ 379/* Use "if (0)" to make compiler check arguments even if debugging is off */
326#define ubifs_assert(expr) do { \ 380#define ubifs_assert(expr) do { \
@@ -360,23 +414,28 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
360#define DBGKEY(key) ((char *)(key)) 414#define DBGKEY(key) ((char *)(key))
361#define DBGKEY1(key) ((char *)(key)) 415#define DBGKEY1(key) ((char *)(key))
362 416
363#define dbg_ntype(type) "" 417#define ubifs_debugging_init(c) 0
364#define dbg_cstate(cmt_state) "" 418#define ubifs_debugging_exit(c) ({})
365#define dbg_get_key_dump(c, key) ({}) 419
366#define dbg_dump_inode(c, inode) ({}) 420#define dbg_ntype(type) ""
367#define dbg_dump_node(c, node) ({}) 421#define dbg_cstate(cmt_state) ""
368#define dbg_dump_budget_req(req) ({}) 422#define dbg_get_key_dump(c, key) ({})
369#define dbg_dump_lstats(lst) ({}) 423#define dbg_dump_inode(c, inode) ({})
370#define dbg_dump_budg(c) ({}) 424#define dbg_dump_node(c, node) ({})
371#define dbg_dump_lprop(c, lp) ({}) 425#define dbg_dump_lpt_node(c, node, lnum, offs) ({})
372#define dbg_dump_lprops(c) ({}) 426#define dbg_dump_budget_req(req) ({})
373#define dbg_dump_lpt_info(c) ({}) 427#define dbg_dump_lstats(lst) ({})
374#define dbg_dump_leb(c, lnum) ({}) 428#define dbg_dump_budg(c) ({})
375#define dbg_dump_znode(c, znode) ({}) 429#define dbg_dump_lprop(c, lp) ({})
376#define dbg_dump_heap(c, heap, cat) ({}) 430#define dbg_dump_lprops(c) ({})
377#define dbg_dump_pnode(c, pnode, parent, iip) ({}) 431#define dbg_dump_lpt_info(c) ({})
378#define dbg_dump_tnc(c) ({}) 432#define dbg_dump_leb(c, lnum) ({})
379#define dbg_dump_index(c) ({}) 433#define dbg_dump_znode(c, znode) ({})
434#define dbg_dump_heap(c, heap, cat) ({})
435#define dbg_dump_pnode(c, pnode, parent, iip) ({})
436#define dbg_dump_tnc(c) ({})
437#define dbg_dump_index(c) ({})
438#define dbg_dump_lpt_lebs(c) ({})
380 439
381#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 440#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
382#define dbg_old_index_check_init(c, zroot) 0 441#define dbg_old_index_check_init(c, zroot) 0
@@ -396,9 +455,11 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
396#define dbg_force_in_the_gaps_enabled 0 455#define dbg_force_in_the_gaps_enabled 0
397#define dbg_force_in_the_gaps() 0 456#define dbg_force_in_the_gaps() 0
398#define dbg_failure_mode 0 457#define dbg_failure_mode 0
399#define dbg_failure_mode_registration(c) ({})
400#define dbg_failure_mode_deregistration(c) ({})
401 458
402#endif /* !CONFIG_UBIFS_FS_DEBUG */ 459#define dbg_debugfs_init() 0
460#define dbg_debugfs_exit()
461#define dbg_debugfs_init_fs(c) 0
462#define dbg_debugfs_exit_fs(c) 0
403 463
464#endif /* !CONFIG_UBIFS_FS_DEBUG */
404#endif /* !__UBIFS_DEBUG_H__ */ 465#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 0422c98e179..f448ab1f9c3 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -104,13 +104,13 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
104 */ 104 */
105 inode->i_flags |= (S_NOCMTIME); 105 inode->i_flags |= (S_NOCMTIME);
106 106
107 inode->i_uid = current->fsuid; 107 inode->i_uid = current_fsuid();
108 if (dir->i_mode & S_ISGID) { 108 if (dir->i_mode & S_ISGID) {
109 inode->i_gid = dir->i_gid; 109 inode->i_gid = dir->i_gid;
110 if (S_ISDIR(mode)) 110 if (S_ISDIR(mode))
111 mode |= S_ISGID; 111 mode |= S_ISGID;
112 } else 112 } else
113 inode->i_gid = current->fsgid; 113 inode->i_gid = current_fsgid();
114 inode->i_mode = mode; 114 inode->i_mode = mode;
115 inode->i_mtime = inode->i_atime = inode->i_ctime = 115 inode->i_mtime = inode->i_atime = inode->i_ctime =
116 ubifs_current_time(inode); 116 ubifs_current_time(inode);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2624411d975..bf37374567f 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -72,8 +72,8 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
72 return err; 72 return err;
73 } 73 }
74 74
75 ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum); 75 ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
76 76 ubifs_inode(inode)->creat_sqnum);
77 len = le32_to_cpu(dn->size); 77 len = le32_to_cpu(dn->size);
78 if (len <= 0 || len > UBIFS_BLOCK_SIZE) 78 if (len <= 0 || len > UBIFS_BLOCK_SIZE)
79 goto dump; 79 goto dump;
@@ -219,7 +219,8 @@ static void release_existing_page_budget(struct ubifs_info *c)
219} 219}
220 220
221static int write_begin_slow(struct address_space *mapping, 221static int write_begin_slow(struct address_space *mapping,
222 loff_t pos, unsigned len, struct page **pagep) 222 loff_t pos, unsigned len, struct page **pagep,
223 unsigned flags)
223{ 224{
224 struct inode *inode = mapping->host; 225 struct inode *inode = mapping->host;
225 struct ubifs_info *c = inode->i_sb->s_fs_info; 226 struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -247,14 +248,14 @@ static int write_begin_slow(struct address_space *mapping,
247 if (unlikely(err)) 248 if (unlikely(err))
248 return err; 249 return err;
249 250
250 page = __grab_cache_page(mapping, index); 251 page = grab_cache_page_write_begin(mapping, index, flags);
251 if (unlikely(!page)) { 252 if (unlikely(!page)) {
252 ubifs_release_budget(c, &req); 253 ubifs_release_budget(c, &req);
253 return -ENOMEM; 254 return -ENOMEM;
254 } 255 }
255 256
256 if (!PageUptodate(page)) { 257 if (!PageUptodate(page)) {
257 if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) 258 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
258 SetPageChecked(page); 259 SetPageChecked(page);
259 else { 260 else {
260 err = do_readpage(page); 261 err = do_readpage(page);
@@ -438,13 +439,13 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
438 return -EROFS; 439 return -EROFS;
439 440
440 /* Try out the fast-path part first */ 441 /* Try out the fast-path part first */
441 page = __grab_cache_page(mapping, index); 442 page = grab_cache_page_write_begin(mapping, index, flags);
442 if (unlikely(!page)) 443 if (unlikely(!page))
443 return -ENOMEM; 444 return -ENOMEM;
444 445
445 if (!PageUptodate(page)) { 446 if (!PageUptodate(page)) {
446 /* The page is not loaded from the flash */ 447 /* The page is not loaded from the flash */
447 if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) 448 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
448 /* 449 /*
449 * We change whole page so no need to load it. But we 450 * We change whole page so no need to load it. But we
450 * have to set the @PG_checked flag to make the further 451 * have to set the @PG_checked flag to make the further
@@ -483,7 +484,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
483 unlock_page(page); 484 unlock_page(page);
484 page_cache_release(page); 485 page_cache_release(page);
485 486
486 return write_begin_slow(mapping, pos, len, pagep); 487 return write_begin_slow(mapping, pos, len, pagep, flags);
487 } 488 }
488 489
489 /* 490 /*
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 5e82cffe969..6db7a6be6c9 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -154,6 +154,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
154 case FS_IOC_GETFLAGS: 154 case FS_IOC_GETFLAGS:
155 flags = ubifs2ioctl(ubifs_inode(inode)->flags); 155 flags = ubifs2ioctl(ubifs_inode(inode)->flags);
156 156
157 dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags);
157 return put_user(flags, (int __user *) arg); 158 return put_user(flags, (int __user *) arg);
158 159
159 case FS_IOC_SETFLAGS: { 160 case FS_IOC_SETFLAGS: {
@@ -176,6 +177,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
176 err = mnt_want_write(file->f_path.mnt); 177 err = mnt_want_write(file->f_path.mnt);
177 if (err) 178 if (err)
178 return err; 179 return err;
180 dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags);
179 err = setflags(inode, flags); 181 err = setflags(inode, flags);
180 mnt_drop_write(file->f_path.mnt); 182 mnt_drop_write(file->f_path.mnt);
181 return err; 183 return err;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index f91b745908e..10ae25b7d1d 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -704,7 +704,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
704 data->size = cpu_to_le32(len); 704 data->size = cpu_to_le32(len);
705 zero_data_node_unused(data); 705 zero_data_node_unused(data);
706 706
707 if (!(ui->flags && UBIFS_COMPR_FL)) 707 if (!(ui->flags & UBIFS_COMPR_FL))
708 /* Compression is disabled for this inode */ 708 /* Compression is disabled for this inode */
709 compr_type = UBIFS_COMPR_NONE; 709 compr_type = UBIFS_COMPR_NONE;
710 else 710 else
@@ -1220,7 +1220,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1220 data_key_init(c, &key, inum, blk); 1220 data_key_init(c, &key, inum, blk);
1221 1221
1222 bit = old_size & (UBIFS_BLOCK_SIZE - 1); 1222 bit = old_size & (UBIFS_BLOCK_SIZE - 1);
1223 blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1); 1223 blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0 : 1);
1224 data_key_init(c, &to_key, inum, blk); 1224 data_key_init(c, &to_key, inum, blk);
1225 1225
1226 err = ubifs_tnc_remove_range(c, &key, &to_key); 1226 err = ubifs_tnc_remove_range(c, &key, &to_key);
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 3f1f16bc25c..efb3430a258 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -38,6 +38,22 @@
38#define __UBIFS_KEY_H__ 38#define __UBIFS_KEY_H__
39 39
40/** 40/**
41 * key_mask_hash - mask a valid hash value.
42 * @val: value to be masked
43 *
44 * We use hash values as offset in directories, so values %0 and %1 are
45 * reserved for "." and "..". %2 is reserved for "end of readdir" marker. This
46 * function makes sure the reserved values are not used.
47 */
48static inline uint32_t key_mask_hash(uint32_t hash)
49{
50 hash &= UBIFS_S_KEY_HASH_MASK;
51 if (unlikely(hash <= 2))
52 hash += 3;
53 return hash;
54}
55
56/**
41 * key_r5_hash - R5 hash function (borrowed from reiserfs). 57 * key_r5_hash - R5 hash function (borrowed from reiserfs).
42 * @s: direntry name 58 * @s: direntry name
43 * @len: name length 59 * @len: name length
@@ -54,16 +70,7 @@ static inline uint32_t key_r5_hash(const char *s, int len)
54 str++; 70 str++;
55 } 71 }
56 72
57 a &= UBIFS_S_KEY_HASH_MASK; 73 return key_mask_hash(a);
58
59 /*
60 * We use hash values as offset in directories, so values %0 and %1 are
61 * reserved for "." and "..". %2 is reserved for "end of readdir"
62 * marker.
63 */
64 if (unlikely(a >= 0 && a <= 2))
65 a += 3;
66 return a;
67} 74}
68 75
69/** 76/**
@@ -77,10 +84,7 @@ static inline uint32_t key_test_hash(const char *str, int len)
77 84
78 len = min_t(uint32_t, len, 4); 85 len = min_t(uint32_t, len, 4);
79 memcpy(&a, str, len); 86 memcpy(&a, str, len);
80 a &= UBIFS_S_KEY_HASH_MASK; 87 return key_mask_hash(a);
81 if (unlikely(a >= 0 && a <= 2))
82 a += 3;
83 return a;
84} 88}
85 89
86/** 90/**
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index f27176e9b70..dfd2bcece27 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -520,13 +520,13 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
520 * @flags: new flags 520 * @flags: new flags
521 * @idx_gc_cnt: change to the count of idx_gc list 521 * @idx_gc_cnt: change to the count of idx_gc list
522 * 522 *
523 * This function changes LEB properties. This function does not change a LEB 523 * This function changes LEB properties (@free, @dirty or @flag). However, the
524 * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC. 524 * property which has the %LPROPS_NC value is not changed. Returns a pointer to
525 * the updated LEB properties on success and a negative error code on failure.
525 * 526 *
526 * This function returns a pointer to the updated LEB properties on success 527 * Note, the LEB properties may have had to be copied (due to COW) and
527 * and a negative error code on failure. N.B. the LEB properties may have had to 528 * consequently the pointer returned may not be the same as the pointer
528 * be copied (due to COW) and consequently the pointer returned may not be the 529 * passed.
529 * same as the pointer passed.
530 */ 530 */
531const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, 531const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
532 const struct ubifs_lprops *lp, 532 const struct ubifs_lprops *lp,
@@ -1088,7 +1088,7 @@ static int scan_check_cb(struct ubifs_info *c,
1088 } 1088 }
1089 } 1089 }
1090 1090
1091 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); 1091 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
1092 if (IS_ERR(sleb)) { 1092 if (IS_ERR(sleb)) {
1093 /* 1093 /*
1094 * After an unclean unmount, empty and freeable LEBs 1094 * After an unclean unmount, empty and freeable LEBs
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index db8bd0e518b..b2792e84d24 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -36,15 +36,16 @@
36 * can be written into a single eraseblock. In that case, garbage collection 36 * can be written into a single eraseblock. In that case, garbage collection
37 * consists of just writing the whole table, which therefore makes all other 37 * consists of just writing the whole table, which therefore makes all other
38 * eraseblocks reusable. In the case of the big model, dirty eraseblocks are 38 * eraseblocks reusable. In the case of the big model, dirty eraseblocks are
39 * selected for garbage collection, which consists are marking the nodes in 39 * selected for garbage collection, which consists of marking the clean nodes in
40 * that LEB as dirty, and then only the dirty nodes are written out. Also, in 40 * that LEB as dirty, and then only the dirty nodes are written out. Also, in
41 * the case of the big model, a table of LEB numbers is saved so that the entire 41 * the case of the big model, a table of LEB numbers is saved so that the entire
42 * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first 42 * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first
43 * mounted. 43 * mounted.
44 */ 44 */
45 45
46#include <linux/crc16.h>
47#include "ubifs.h" 46#include "ubifs.h"
47#include <linux/crc16.h>
48#include <linux/math64.h>
48 49
49/** 50/**
50 * do_calc_lpt_geom - calculate sizes for the LPT area. 51 * do_calc_lpt_geom - calculate sizes for the LPT area.
@@ -135,15 +136,13 @@ static void do_calc_lpt_geom(struct ubifs_info *c)
135int ubifs_calc_lpt_geom(struct ubifs_info *c) 136int ubifs_calc_lpt_geom(struct ubifs_info *c)
136{ 137{
137 int lebs_needed; 138 int lebs_needed;
138 uint64_t sz; 139 long long sz;
139 140
140 do_calc_lpt_geom(c); 141 do_calc_lpt_geom(c);
141 142
142 /* Verify that lpt_lebs is big enough */ 143 /* Verify that lpt_lebs is big enough */
143 sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ 144 sz = c->lpt_sz * 2; /* Must have at least 2 times the size */
144 sz += c->leb_size - 1; 145 lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
145 do_div(sz, c->leb_size);
146 lebs_needed = sz;
147 if (lebs_needed > c->lpt_lebs) { 146 if (lebs_needed > c->lpt_lebs) {
148 ubifs_err("too few LPT LEBs"); 147 ubifs_err("too few LPT LEBs");
149 return -EINVAL; 148 return -EINVAL;
@@ -156,7 +155,6 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c)
156 } 155 }
157 156
158 c->check_lpt_free = c->big_lpt; 157 c->check_lpt_free = c->big_lpt;
159
160 return 0; 158 return 0;
161} 159}
162 160
@@ -176,7 +174,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
176 int *big_lpt) 174 int *big_lpt)
177{ 175{
178 int i, lebs_needed; 176 int i, lebs_needed;
179 uint64_t sz; 177 long long sz;
180 178
181 /* Start by assuming the minimum number of LPT LEBs */ 179 /* Start by assuming the minimum number of LPT LEBs */
182 c->lpt_lebs = UBIFS_MIN_LPT_LEBS; 180 c->lpt_lebs = UBIFS_MIN_LPT_LEBS;
@@ -203,9 +201,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
203 /* Now check there are enough LPT LEBs */ 201 /* Now check there are enough LPT LEBs */
204 for (i = 0; i < 64 ; i++) { 202 for (i = 0; i < 64 ; i++) {
205 sz = c->lpt_sz * 4; /* Allow 4 times the size */ 203 sz = c->lpt_sz * 4; /* Allow 4 times the size */
206 sz += c->leb_size - 1; 204 lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
207 do_div(sz, c->leb_size);
208 lebs_needed = sz;
209 if (lebs_needed > c->lpt_lebs) { 205 if (lebs_needed > c->lpt_lebs) {
210 /* Not enough LPT LEBs so try again with more */ 206 /* Not enough LPT LEBs so try again with more */
211 c->lpt_lebs = lebs_needed; 207 c->lpt_lebs = lebs_needed;
@@ -558,7 +554,7 @@ static int calc_nnode_num(int row, int col)
558 * This function calculates and returns the nnode number based on the parent's 554 * This function calculates and returns the nnode number based on the parent's
559 * nnode number and the index in parent. 555 * nnode number and the index in parent.
560 */ 556 */
561static int calc_nnode_num_from_parent(struct ubifs_info *c, 557static int calc_nnode_num_from_parent(const struct ubifs_info *c,
562 struct ubifs_nnode *parent, int iip) 558 struct ubifs_nnode *parent, int iip)
563{ 559{
564 int num, shft; 560 int num, shft;
@@ -583,7 +579,7 @@ static int calc_nnode_num_from_parent(struct ubifs_info *c,
583 * This function calculates and returns the pnode number based on the parent's 579 * This function calculates and returns the pnode number based on the parent's
584 * nnode number and the index in parent. 580 * nnode number and the index in parent.
585 */ 581 */
586static int calc_pnode_num_from_parent(struct ubifs_info *c, 582static int calc_pnode_num_from_parent(const struct ubifs_info *c,
587 struct ubifs_nnode *parent, int iip) 583 struct ubifs_nnode *parent, int iip)
588{ 584{
589 int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; 585 int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0;
@@ -966,7 +962,7 @@ static int check_lpt_type(uint8_t **addr, int *pos, int type)
966 * 962 *
967 * This function returns %0 on success and a negative error code on failure. 963 * This function returns %0 on success and a negative error code on failure.
968 */ 964 */
969static int unpack_pnode(struct ubifs_info *c, void *buf, 965static int unpack_pnode(const struct ubifs_info *c, void *buf,
970 struct ubifs_pnode *pnode) 966 struct ubifs_pnode *pnode)
971{ 967{
972 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 968 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
@@ -996,15 +992,15 @@ static int unpack_pnode(struct ubifs_info *c, void *buf,
996} 992}
997 993
998/** 994/**
999 * unpack_nnode - unpack a nnode. 995 * ubifs_unpack_nnode - unpack a nnode.
1000 * @c: UBIFS file-system description object 996 * @c: UBIFS file-system description object
1001 * @buf: buffer containing packed nnode to unpack 997 * @buf: buffer containing packed nnode to unpack
1002 * @nnode: nnode structure to fill 998 * @nnode: nnode structure to fill
1003 * 999 *
1004 * This function returns %0 on success and a negative error code on failure. 1000 * This function returns %0 on success and a negative error code on failure.
1005 */ 1001 */
1006static int unpack_nnode(struct ubifs_info *c, void *buf, 1002int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
1007 struct ubifs_nnode *nnode) 1003 struct ubifs_nnode *nnode)
1008{ 1004{
1009 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1005 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1010 int i, pos = 0, err; 1006 int i, pos = 0, err;
@@ -1036,7 +1032,7 @@ static int unpack_nnode(struct ubifs_info *c, void *buf,
1036 * 1032 *
1037 * This function returns %0 on success and a negative error code on failure. 1033 * This function returns %0 on success and a negative error code on failure.
1038 */ 1034 */
1039static int unpack_ltab(struct ubifs_info *c, void *buf) 1035static int unpack_ltab(const struct ubifs_info *c, void *buf)
1040{ 1036{
1041 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1037 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1042 int i, pos = 0, err; 1038 int i, pos = 0, err;
@@ -1068,7 +1064,7 @@ static int unpack_ltab(struct ubifs_info *c, void *buf)
1068 * 1064 *
1069 * This function returns %0 on success and a negative error code on failure. 1065 * This function returns %0 on success and a negative error code on failure.
1070 */ 1066 */
1071static int unpack_lsave(struct ubifs_info *c, void *buf) 1067static int unpack_lsave(const struct ubifs_info *c, void *buf)
1072{ 1068{
1073 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1069 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1074 int i, pos = 0, err; 1070 int i, pos = 0, err;
@@ -1096,7 +1092,7 @@ static int unpack_lsave(struct ubifs_info *c, void *buf)
1096 * 1092 *
1097 * This function returns %0 on success and a negative error code on failure. 1093 * This function returns %0 on success and a negative error code on failure.
1098 */ 1094 */
1099static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode, 1095static int validate_nnode(const struct ubifs_info *c, struct ubifs_nnode *nnode,
1100 struct ubifs_nnode *parent, int iip) 1096 struct ubifs_nnode *parent, int iip)
1101{ 1097{
1102 int i, lvl, max_offs; 1098 int i, lvl, max_offs;
@@ -1140,7 +1136,7 @@ static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode,
1140 * 1136 *
1141 * This function returns %0 on success and a negative error code on failure. 1137 * This function returns %0 on success and a negative error code on failure.
1142 */ 1138 */
1143static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, 1139static int validate_pnode(const struct ubifs_info *c, struct ubifs_pnode *pnode,
1144 struct ubifs_nnode *parent, int iip) 1140 struct ubifs_nnode *parent, int iip)
1145{ 1141{
1146 int i; 1142 int i;
@@ -1174,7 +1170,8 @@ static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
1174 * This function calculates the LEB numbers for the LEB properties it contains 1170 * This function calculates the LEB numbers for the LEB properties it contains
1175 * based on the pnode number. 1171 * based on the pnode number.
1176 */ 1172 */
1177static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode) 1173static void set_pnode_lnum(const struct ubifs_info *c,
1174 struct ubifs_pnode *pnode)
1178{ 1175{
1179 int i, lnum; 1176 int i, lnum;
1180 1177
@@ -1227,7 +1224,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1227 err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); 1224 err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz);
1228 if (err) 1225 if (err)
1229 goto out; 1226 goto out;
1230 err = unpack_nnode(c, buf, nnode); 1227 err = ubifs_unpack_nnode(c, buf, nnode);
1231 if (err) 1228 if (err)
1232 goto out; 1229 goto out;
1233 } 1230 }
@@ -1816,7 +1813,7 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c,
1816 c->nnode_sz); 1813 c->nnode_sz);
1817 if (err) 1814 if (err)
1818 return ERR_PTR(err); 1815 return ERR_PTR(err);
1819 err = unpack_nnode(c, buf, nnode); 1816 err = ubifs_unpack_nnode(c, buf, nnode);
1820 if (err) 1817 if (err)
1821 return ERR_PTR(err); 1818 return ERR_PTR(err);
1822 } 1819 }
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index a41434b4278..96ca9570717 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -320,6 +320,8 @@ no_space:
320 dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " 320 dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, "
321 "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); 321 "done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
322 dbg_dump_lpt_info(c); 322 dbg_dump_lpt_info(c);
323 dbg_dump_lpt_lebs(c);
324 dump_stack();
323 return err; 325 return err;
324} 326}
325 327
@@ -546,8 +548,10 @@ static int write_cnodes(struct ubifs_info *c)
546no_space: 548no_space:
547 ubifs_err("LPT out of space mismatch"); 549 ubifs_err("LPT out of space mismatch");
548 dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " 550 dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab "
549 "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); 551 "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
550 dbg_dump_lpt_info(c); 552 dbg_dump_lpt_info(c);
553 dbg_dump_lpt_lebs(c);
554 dump_stack();
551 return err; 555 return err;
552} 556}
553 557
@@ -749,7 +753,7 @@ static void lpt_tgc_start(struct ubifs_info *c)
749 * LPT trivial garbage collection is where a LPT LEB contains only dirty and 753 * LPT trivial garbage collection is where a LPT LEB contains only dirty and
750 * free space and so may be reused as soon as the next commit is completed. 754 * free space and so may be reused as soon as the next commit is completed.
751 * This function is called after the commit is completed (master node has been 755 * This function is called after the commit is completed (master node has been
752 * written) and unmaps LPT LEBs that were marked for trivial GC. 756 * written) and un-maps LPT LEBs that were marked for trivial GC.
753 */ 757 */
754static int lpt_tgc_end(struct ubifs_info *c) 758static int lpt_tgc_end(struct ubifs_info *c)
755{ 759{
@@ -1025,7 +1029,7 @@ static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num,
1025 * @c: UBIFS file-system description object 1029 * @c: UBIFS file-system description object
1026 * @node_type: LPT node type 1030 * @node_type: LPT node type
1027 */ 1031 */
1028static int get_lpt_node_len(struct ubifs_info *c, int node_type) 1032static int get_lpt_node_len(const struct ubifs_info *c, int node_type)
1029{ 1033{
1030 switch (node_type) { 1034 switch (node_type) {
1031 case UBIFS_LPT_NNODE: 1035 case UBIFS_LPT_NNODE:
@@ -1046,7 +1050,7 @@ static int get_lpt_node_len(struct ubifs_info *c, int node_type)
1046 * @buf: buffer 1050 * @buf: buffer
1047 * @len: length of buffer 1051 * @len: length of buffer
1048 */ 1052 */
1049static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len) 1053static int get_pad_len(const struct ubifs_info *c, uint8_t *buf, int len)
1050{ 1054{
1051 int offs, pad_len; 1055 int offs, pad_len;
1052 1056
@@ -1063,7 +1067,8 @@ static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len)
1063 * @buf: buffer 1067 * @buf: buffer
1064 * @node_num: node number is returned here 1068 * @node_num: node number is returned here
1065 */ 1069 */
1066static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num) 1070static int get_lpt_node_type(const struct ubifs_info *c, uint8_t *buf,
1071 int *node_num)
1067{ 1072{
1068 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1073 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1069 int pos = 0, node_type; 1074 int pos = 0, node_type;
@@ -1081,7 +1086,7 @@ static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num)
1081 * 1086 *
1082 * This function returns %1 if the buffer contains a node or %0 if it does not. 1087 * This function returns %1 if the buffer contains a node or %0 if it does not.
1083 */ 1088 */
1084static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) 1089static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len)
1085{ 1090{
1086 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1091 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1087 int pos = 0, node_type, node_len; 1092 int pos = 0, node_type, node_len;
@@ -1105,7 +1110,6 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
1105 return 1; 1110 return 1;
1106} 1111}
1107 1112
1108
1109/** 1113/**
1110 * lpt_gc_lnum - garbage collect a LPT LEB. 1114 * lpt_gc_lnum - garbage collect a LPT LEB.
1111 * @c: UBIFS file-system description object 1115 * @c: UBIFS file-system description object
@@ -1463,7 +1467,7 @@ void ubifs_lpt_free(struct ubifs_info *c, int wr_only)
1463#ifdef CONFIG_UBIFS_FS_DEBUG 1467#ifdef CONFIG_UBIFS_FS_DEBUG
1464 1468
1465/** 1469/**
1466 * dbg_is_all_ff - determine if a buffer contains only 0xff bytes. 1470 * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes.
1467 * @buf: buffer 1471 * @buf: buffer
1468 * @len: buffer length 1472 * @len: buffer length
1469 */ 1473 */
@@ -1488,7 +1492,7 @@ static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs)
1488 struct ubifs_nnode *nnode; 1492 struct ubifs_nnode *nnode;
1489 int hght; 1493 int hght;
1490 1494
1491 /* Entire tree is in memory so first_nnode / next_nnode are ok */ 1495 /* Entire tree is in memory so first_nnode / next_nnode are OK */
1492 nnode = first_nnode(c, &hght); 1496 nnode = first_nnode(c, &hght);
1493 for (; nnode; nnode = next_nnode(c, nnode, &hght)) { 1497 for (; nnode; nnode = next_nnode(c, nnode, &hght)) {
1494 struct ubifs_nbranch *branch; 1498 struct ubifs_nbranch *branch;
@@ -1602,7 +1606,10 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1602{ 1606{
1603 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; 1607 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
1604 int ret; 1608 int ret;
1605 void *buf = c->dbg_buf; 1609 void *buf = c->dbg->buf;
1610
1611 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1612 return 0;
1606 1613
1607 dbg_lp("LEB %d", lnum); 1614 dbg_lp("LEB %d", lnum);
1608 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1615 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
@@ -1704,6 +1711,9 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1704 long long free = 0; 1711 long long free = 0;
1705 int i; 1712 int i;
1706 1713
1714 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1715 return 0;
1716
1707 for (i = 0; i < c->lpt_lebs; i++) { 1717 for (i = 0; i < c->lpt_lebs; i++) {
1708 if (c->ltab[i].tgc || c->ltab[i].cmt) 1718 if (c->ltab[i].tgc || c->ltab[i].cmt)
1709 continue; 1719 continue;
@@ -1716,6 +1726,8 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1716 dbg_err("LPT space error: free %lld lpt_sz %lld", 1726 dbg_err("LPT space error: free %lld lpt_sz %lld",
1717 free, c->lpt_sz); 1727 free, c->lpt_sz);
1718 dbg_dump_lpt_info(c); 1728 dbg_dump_lpt_info(c);
1729 dbg_dump_lpt_lebs(c);
1730 dump_stack();
1719 return -EINVAL; 1731 return -EINVAL;
1720 } 1732 }
1721 return 0; 1733 return 0;
@@ -1731,15 +1743,19 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1731 */ 1743 */
1732int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) 1744int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1733{ 1745{
1746 struct ubifs_debug_info *d = c->dbg;
1734 long long chk_lpt_sz, lpt_sz; 1747 long long chk_lpt_sz, lpt_sz;
1735 int err = 0; 1748 int err = 0;
1736 1749
1750 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1751 return 0;
1752
1737 switch (action) { 1753 switch (action) {
1738 case 0: 1754 case 0:
1739 c->chk_lpt_sz = 0; 1755 d->chk_lpt_sz = 0;
1740 c->chk_lpt_sz2 = 0; 1756 d->chk_lpt_sz2 = 0;
1741 c->chk_lpt_lebs = 0; 1757 d->chk_lpt_lebs = 0;
1742 c->chk_lpt_wastage = 0; 1758 d->chk_lpt_wastage = 0;
1743 if (c->dirty_pn_cnt > c->pnode_cnt) { 1759 if (c->dirty_pn_cnt > c->pnode_cnt) {
1744 dbg_err("dirty pnodes %d exceed max %d", 1760 dbg_err("dirty pnodes %d exceed max %d",
1745 c->dirty_pn_cnt, c->pnode_cnt); 1761 c->dirty_pn_cnt, c->pnode_cnt);
@@ -1752,35 +1768,35 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1752 } 1768 }
1753 return err; 1769 return err;
1754 case 1: 1770 case 1:
1755 c->chk_lpt_sz += len; 1771 d->chk_lpt_sz += len;
1756 return 0; 1772 return 0;
1757 case 2: 1773 case 2:
1758 c->chk_lpt_sz += len; 1774 d->chk_lpt_sz += len;
1759 c->chk_lpt_wastage += len; 1775 d->chk_lpt_wastage += len;
1760 c->chk_lpt_lebs += 1; 1776 d->chk_lpt_lebs += 1;
1761 return 0; 1777 return 0;
1762 case 3: 1778 case 3:
1763 chk_lpt_sz = c->leb_size; 1779 chk_lpt_sz = c->leb_size;
1764 chk_lpt_sz *= c->chk_lpt_lebs; 1780 chk_lpt_sz *= d->chk_lpt_lebs;
1765 chk_lpt_sz += len - c->nhead_offs; 1781 chk_lpt_sz += len - c->nhead_offs;
1766 if (c->chk_lpt_sz != chk_lpt_sz) { 1782 if (d->chk_lpt_sz != chk_lpt_sz) {
1767 dbg_err("LPT wrote %lld but space used was %lld", 1783 dbg_err("LPT wrote %lld but space used was %lld",
1768 c->chk_lpt_sz, chk_lpt_sz); 1784 d->chk_lpt_sz, chk_lpt_sz);
1769 err = -EINVAL; 1785 err = -EINVAL;
1770 } 1786 }
1771 if (c->chk_lpt_sz > c->lpt_sz) { 1787 if (d->chk_lpt_sz > c->lpt_sz) {
1772 dbg_err("LPT wrote %lld but lpt_sz is %lld", 1788 dbg_err("LPT wrote %lld but lpt_sz is %lld",
1773 c->chk_lpt_sz, c->lpt_sz); 1789 d->chk_lpt_sz, c->lpt_sz);
1774 err = -EINVAL; 1790 err = -EINVAL;
1775 } 1791 }
1776 if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) { 1792 if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) {
1777 dbg_err("LPT layout size %lld but wrote %lld", 1793 dbg_err("LPT layout size %lld but wrote %lld",
1778 c->chk_lpt_sz, c->chk_lpt_sz2); 1794 d->chk_lpt_sz, d->chk_lpt_sz2);
1779 err = -EINVAL; 1795 err = -EINVAL;
1780 } 1796 }
1781 if (c->chk_lpt_sz2 && c->new_nhead_offs != len) { 1797 if (d->chk_lpt_sz2 && d->new_nhead_offs != len) {
1782 dbg_err("LPT new nhead offs: expected %d was %d", 1798 dbg_err("LPT new nhead offs: expected %d was %d",
1783 c->new_nhead_offs, len); 1799 d->new_nhead_offs, len);
1784 err = -EINVAL; 1800 err = -EINVAL;
1785 } 1801 }
1786 lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; 1802 lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
@@ -1788,26 +1804,146 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1788 lpt_sz += c->ltab_sz; 1804 lpt_sz += c->ltab_sz;
1789 if (c->big_lpt) 1805 if (c->big_lpt)
1790 lpt_sz += c->lsave_sz; 1806 lpt_sz += c->lsave_sz;
1791 if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) { 1807 if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) {
1792 dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", 1808 dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
1793 c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz); 1809 d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz);
1794 err = -EINVAL; 1810 err = -EINVAL;
1795 } 1811 }
1796 if (err) 1812 if (err) {
1797 dbg_dump_lpt_info(c); 1813 dbg_dump_lpt_info(c);
1798 c->chk_lpt_sz2 = c->chk_lpt_sz; 1814 dbg_dump_lpt_lebs(c);
1799 c->chk_lpt_sz = 0; 1815 dump_stack();
1800 c->chk_lpt_wastage = 0; 1816 }
1801 c->chk_lpt_lebs = 0; 1817 d->chk_lpt_sz2 = d->chk_lpt_sz;
1802 c->new_nhead_offs = len; 1818 d->chk_lpt_sz = 0;
1819 d->chk_lpt_wastage = 0;
1820 d->chk_lpt_lebs = 0;
1821 d->new_nhead_offs = len;
1803 return err; 1822 return err;
1804 case 4: 1823 case 4:
1805 c->chk_lpt_sz += len; 1824 d->chk_lpt_sz += len;
1806 c->chk_lpt_wastage += len; 1825 d->chk_lpt_wastage += len;
1807 return 0; 1826 return 0;
1808 default: 1827 default:
1809 return -EINVAL; 1828 return -EINVAL;
1810 } 1829 }
1811} 1830}
1812 1831
1832/**
1833 * dbg_dump_lpt_leb - dump an LPT LEB.
1834 * @c: UBIFS file-system description object
1835 * @lnum: LEB number to dump
1836 *
1837 * This function dumps an LEB from LPT area. Nodes in this area are very
1838 * different to nodes in the main area (e.g., they do not have common headers,
1839 * they do not have 8-byte alignments, etc), so we have a separate function to
1840 * dump LPT area LEBs. Note, LPT has to be locked by the caller.
1841 */
1842static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1843{
1844 int err, len = c->leb_size, node_type, node_num, node_len, offs;
1845 void *buf = c->dbg->buf;
1846
1847 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
1848 current->pid, lnum);
1849 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1850 if (err) {
1851 ubifs_err("cannot read LEB %d, error %d", lnum, err);
1852 return;
1853 }
1854 while (1) {
1855 offs = c->leb_size - len;
1856 if (!is_a_node(c, buf, len)) {
1857 int pad_len;
1858
1859 pad_len = get_pad_len(c, buf, len);
1860 if (pad_len) {
1861 printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n",
1862 lnum, offs, pad_len);
1863 buf += pad_len;
1864 len -= pad_len;
1865 continue;
1866 }
1867 if (len)
1868 printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n",
1869 lnum, offs, len);
1870 break;
1871 }
1872
1873 node_type = get_lpt_node_type(c, buf, &node_num);
1874 switch (node_type) {
1875 case UBIFS_LPT_PNODE:
1876 {
1877 node_len = c->pnode_sz;
1878 if (c->big_lpt)
1879 printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n",
1880 lnum, offs, node_num);
1881 else
1882 printk(KERN_DEBUG "LEB %d:%d, pnode\n",
1883 lnum, offs);
1884 break;
1885 }
1886 case UBIFS_LPT_NNODE:
1887 {
1888 int i;
1889 struct ubifs_nnode nnode;
1890
1891 node_len = c->nnode_sz;
1892 if (c->big_lpt)
1893 printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ",
1894 lnum, offs, node_num);
1895 else
1896 printk(KERN_DEBUG "LEB %d:%d, nnode, ",
1897 lnum, offs);
1898 err = ubifs_unpack_nnode(c, buf, &nnode);
1899 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1900 printk("%d:%d", nnode.nbranch[i].lnum,
1901 nnode.nbranch[i].offs);
1902 if (i != UBIFS_LPT_FANOUT - 1)
1903 printk(", ");
1904 }
1905 printk("\n");
1906 break;
1907 }
1908 case UBIFS_LPT_LTAB:
1909 node_len = c->ltab_sz;
1910 printk(KERN_DEBUG "LEB %d:%d, ltab\n",
1911 lnum, offs);
1912 break;
1913 case UBIFS_LPT_LSAVE:
1914 node_len = c->lsave_sz;
1915 printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs);
1916 break;
1917 default:
1918 ubifs_err("LPT node type %d not recognized", node_type);
1919 return;
1920 }
1921
1922 buf += node_len;
1923 len -= node_len;
1924 }
1925
1926 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
1927 current->pid, lnum);
1928}
1929
1930/**
1931 * dbg_dump_lpt_lebs - dump LPT lebs.
1932 * @c: UBIFS file-system description object
1933 *
1934 * This function dumps all LPT LEBs. The caller has to make sure the LPT is
1935 * locked.
1936 */
1937void dbg_dump_lpt_lebs(const struct ubifs_info *c)
1938{
1939 int i;
1940
1941 printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n",
1942 current->pid);
1943 for (i = 0; i < c->lpt_lebs; i++)
1944 dump_lpt_leb(c, i + c->lpt_first);
1945 printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n",
1946 current->pid);
1947}
1948
1813#endif /* CONFIG_UBIFS_FS_DEBUG */ 1949#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 9bd5a43d452..9e6f403f170 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -899,7 +899,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
899 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { 899 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
900 struct ubifs_scan_leb *sleb; 900 struct ubifs_scan_leb *sleb;
901 901
902 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); 902 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
903 if (IS_ERR(sleb)) { 903 if (IS_ERR(sleb)) {
904 err = PTR_ERR(sleb); 904 err = PTR_ERR(sleb);
905 break; 905 break;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 21f7d047c30..ce42a7b0ca5 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -144,7 +144,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
144 /* 144 /*
145 * If the replay order was perfect the dirty space would now be 145 * If the replay order was perfect the dirty space would now be
146 * zero. The order is not perfect because the the journal heads 146 * zero. The order is not perfect because the the journal heads
147 * race with eachother. This is not a problem but is does mean 147 * race with each other. This is not a problem but is does mean
148 * that the dirty space may temporarily exceed c->leb_size 148 * that the dirty space may temporarily exceed c->leb_size
149 * during the replay. 149 * during the replay.
150 */ 150 */
@@ -656,7 +656,7 @@ out_dump:
656 * @dirty: amount of dirty space from padding and deletion nodes 656 * @dirty: amount of dirty space from padding and deletion nodes
657 * 657 *
658 * This function inserts a reference node to the replay tree and returns zero 658 * This function inserts a reference node to the replay tree and returns zero
659 * in case of success ort a negative error code in case of failure. 659 * in case of success or a negative error code in case of failure.
660 */ 660 */
661static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, 661static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
662 unsigned long long sqnum, int free, int dirty) 662 unsigned long long sqnum, int free, int dirty)
@@ -883,7 +883,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
883 * This means that we reached end of log and now 883 * This means that we reached end of log and now
884 * look to the older log data, which was already 884 * look to the older log data, which was already
885 * committed but the eraseblock was not erased (UBIFS 885 * committed but the eraseblock was not erased (UBIFS
886 * only unmaps it). So this basically means we have to 886 * only un-maps it). So this basically means we have to
887 * exit with "end of log" code. 887 * exit with "end of log" code.
888 */ 888 */
889 err = 1; 889 err = 1;
@@ -1062,6 +1062,15 @@ int ubifs_replay_journal(struct ubifs_info *c)
1062 if (err) 1062 if (err)
1063 goto out; 1063 goto out;
1064 1064
1065 /*
1066 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable
1067 * to roughly estimate index growth. Things like @c->min_idx_lebs
1068 * depend on it. This means we have to initialize it to make sure
1069 * budgeting works properly.
1070 */
1071 c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1072 c->budg_uncommitted_idx *= c->max_idx_node_sz;
1073
1065 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1074 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1066 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1075 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1067 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1076 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 0f392351dc5..e070c643d1b 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -28,6 +28,7 @@
28 28
29#include "ubifs.h" 29#include "ubifs.h"
30#include <linux/random.h> 30#include <linux/random.h>
31#include <linux/math64.h>
31 32
32/* 33/*
33 * Default journal size in logical eraseblocks as a percent of total 34 * Default journal size in logical eraseblocks as a percent of total
@@ -80,7 +81,7 @@ static int create_default_filesystem(struct ubifs_info *c)
80 int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first; 81 int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;
81 int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; 82 int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
82 int min_leb_cnt = UBIFS_MIN_LEB_CNT; 83 int min_leb_cnt = UBIFS_MIN_LEB_CNT;
83 uint64_t tmp64, main_bytes; 84 long long tmp64, main_bytes;
84 __le64 tmp_le64; 85 __le64 tmp_le64;
85 86
86 /* Some functions called from here depend on the @c->key_len filed */ 87 /* Some functions called from here depend on the @c->key_len filed */
@@ -160,7 +161,7 @@ static int create_default_filesystem(struct ubifs_info *c)
160 if (!sup) 161 if (!sup)
161 return -ENOMEM; 162 return -ENOMEM;
162 163
163 tmp64 = (uint64_t)max_buds * c->leb_size; 164 tmp64 = (long long)max_buds * c->leb_size;
164 if (big_lpt) 165 if (big_lpt)
165 sup_flags |= UBIFS_FLG_BIGLPT; 166 sup_flags |= UBIFS_FLG_BIGLPT;
166 167
@@ -179,14 +180,16 @@ static int create_default_filesystem(struct ubifs_info *c)
179 sup->fanout = cpu_to_le32(DEFAULT_FANOUT); 180 sup->fanout = cpu_to_le32(DEFAULT_FANOUT);
180 sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); 181 sup->lsave_cnt = cpu_to_le32(c->lsave_cnt);
181 sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); 182 sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION);
182 sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
183 sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); 183 sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN);
184 if (c->mount_opts.override_compr)
185 sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);
186 else
187 sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
184 188
185 generate_random_uuid(sup->uuid); 189 generate_random_uuid(sup->uuid);
186 190
187 main_bytes = (uint64_t)main_lebs * c->leb_size; 191 main_bytes = (long long)main_lebs * c->leb_size;
188 tmp64 = main_bytes * DEFAULT_RP_PERCENT; 192 tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100);
189 do_div(tmp64, 100);
190 if (tmp64 > DEFAULT_MAX_RP_SIZE) 193 if (tmp64 > DEFAULT_MAX_RP_SIZE)
191 tmp64 = DEFAULT_MAX_RP_SIZE; 194 tmp64 = DEFAULT_MAX_RP_SIZE;
192 sup->rp_size = cpu_to_le64(tmp64); 195 sup->rp_size = cpu_to_le64(tmp64);
@@ -582,16 +585,15 @@ int ubifs_read_superblock(struct ubifs_info *c)
582 c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; 585 c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT;
583 c->fanout = le32_to_cpu(sup->fanout); 586 c->fanout = le32_to_cpu(sup->fanout);
584 c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); 587 c->lsave_cnt = le32_to_cpu(sup->lsave_cnt);
585 c->default_compr = le16_to_cpu(sup->default_compr);
586 c->rp_size = le64_to_cpu(sup->rp_size); 588 c->rp_size = le64_to_cpu(sup->rp_size);
587 c->rp_uid = le32_to_cpu(sup->rp_uid); 589 c->rp_uid = le32_to_cpu(sup->rp_uid);
588 c->rp_gid = le32_to_cpu(sup->rp_gid); 590 c->rp_gid = le32_to_cpu(sup->rp_gid);
589 sup_flags = le32_to_cpu(sup->flags); 591 sup_flags = le32_to_cpu(sup->flags);
592 if (!c->mount_opts.override_compr)
593 c->default_compr = le16_to_cpu(sup->default_compr);
590 594
591 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); 595 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
592
593 memcpy(&c->uuid, &sup->uuid, 16); 596 memcpy(&c->uuid, &sup->uuid, 16);
594
595 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); 597 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
596 598
597 /* Automatically increase file system size to the maximum size */ 599 /* Automatically increase file system size to the maximum size */
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index d80b2aef42b..0d7564b95f8 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -34,6 +34,8 @@
34#include <linux/parser.h> 34#include <linux/parser.h>
35#include <linux/seq_file.h> 35#include <linux/seq_file.h>
36#include <linux/mount.h> 36#include <linux/mount.h>
37#include <linux/math64.h>
38#include <linux/writeback.h>
37#include "ubifs.h" 39#include "ubifs.h"
38 40
39/* 41/*
@@ -417,39 +419,54 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
417 else if (c->mount_opts.chk_data_crc == 1) 419 else if (c->mount_opts.chk_data_crc == 1)
418 seq_printf(s, ",no_chk_data_crc"); 420 seq_printf(s, ",no_chk_data_crc");
419 421
422 if (c->mount_opts.override_compr) {
423 seq_printf(s, ",compr=");
424 seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type));
425 }
426
420 return 0; 427 return 0;
421} 428}
422 429
423static int ubifs_sync_fs(struct super_block *sb, int wait) 430static int ubifs_sync_fs(struct super_block *sb, int wait)
424{ 431{
432 int i, err;
425 struct ubifs_info *c = sb->s_fs_info; 433 struct ubifs_info *c = sb->s_fs_info;
426 int i, ret = 0, err; 434 struct writeback_control wbc = {
427 long long bud_bytes; 435 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
428 436 .range_start = 0,
429 if (c->jheads) { 437 .range_end = LLONG_MAX,
430 for (i = 0; i < c->jhead_cnt; i++) { 438 .nr_to_write = LONG_MAX,
431 err = ubifs_wbuf_sync(&c->jheads[i].wbuf); 439 };
432 if (err && !ret) 440
433 ret = err; 441 if (sb->s_flags & MS_RDONLY)
434 } 442 return 0;
435 443
436 /* Commit the journal unless it has too little data */ 444 /*
437 spin_lock(&c->buds_lock); 445 * Synchronize write buffers, because 'ubifs_run_commit()' does not
438 bud_bytes = c->bud_bytes; 446 * do this if it waits for an already running commit.
439 spin_unlock(&c->buds_lock); 447 */
440 if (bud_bytes > c->leb_size) { 448 for (i = 0; i < c->jhead_cnt; i++) {
441 err = ubifs_run_commit(c); 449 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
442 if (err) 450 if (err)
443 return err; 451 return err;
444 }
445 } 452 }
446 453
447 /* 454 /*
448 * We ought to call sync for c->ubi but it does not have one. If it had 455 * VFS calls '->sync_fs()' before synchronizing all dirty inodes and
449 * it would in turn call mtd->sync, however mtd operations are 456 * pages, so synchronize them first, then commit the journal. Strictly
450 * synchronous anyway, so we don't lose any sleep here. 457 * speaking, it is not necessary to commit the journal here,
458 * synchronizing write-buffers would be enough. But committing makes
459 * UBIFS free space predictions much more accurate, so we want to let
460 * the user be able to get more accurate results of 'statfs()' after
461 * they synchronize the file system.
451 */ 462 */
452 return ret; 463 generic_sync_sb_inodes(sb, &wbc);
464
465 err = ubifs_run_commit(c);
466 if (err)
467 return err;
468
469 return ubi_sync(c->vi.ubi_num);
453} 470}
454 471
455/** 472/**
@@ -596,7 +613,7 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
596} 613}
597 614
598/* 615/*
599 * init_constants_late - initialize UBIFS constants. 616 * init_constants_sb - initialize UBIFS constants.
600 * @c: UBIFS file-system description object 617 * @c: UBIFS file-system description object
601 * 618 *
602 * This is a helper function which initializes various UBIFS constants after 619 * This is a helper function which initializes various UBIFS constants after
@@ -604,10 +621,10 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
604 * makes sure they are all right. Returns zero in case of success and a 621 * makes sure they are all right. Returns zero in case of success and a
605 * negative error code in case of failure. 622 * negative error code in case of failure.
606 */ 623 */
607static int init_constants_late(struct ubifs_info *c) 624static int init_constants_sb(struct ubifs_info *c)
608{ 625{
609 int tmp, err; 626 int tmp, err;
610 uint64_t tmp64; 627 long long tmp64;
611 628
612 c->main_bytes = (long long)c->main_lebs * c->leb_size; 629 c->main_bytes = (long long)c->main_lebs * c->leb_size;
613 c->max_znode_sz = sizeof(struct ubifs_znode) + 630 c->max_znode_sz = sizeof(struct ubifs_znode) +
@@ -634,9 +651,8 @@ static int init_constants_late(struct ubifs_info *c)
634 * Make sure that the log is large enough to fit reference nodes for 651 * Make sure that the log is large enough to fit reference nodes for
635 * all buds plus one reserved LEB. 652 * all buds plus one reserved LEB.
636 */ 653 */
637 tmp64 = c->max_bud_bytes; 654 tmp64 = c->max_bud_bytes + c->leb_size - 1;
638 tmp = do_div(tmp64, c->leb_size); 655 c->max_bud_cnt = div_u64(tmp64, c->leb_size);
639 c->max_bud_cnt = tmp64 + !!tmp;
640 tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); 656 tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1);
641 tmp /= c->leb_size; 657 tmp /= c->leb_size;
642 tmp += 1; 658 tmp += 1;
@@ -672,7 +688,7 @@ static int init_constants_late(struct ubifs_info *c)
672 * Consequently, if the journal is too small, UBIFS will treat it as 688 * Consequently, if the journal is too small, UBIFS will treat it as
673 * always full. 689 * always full.
674 */ 690 */
675 tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1; 691 tmp64 = (long long)(c->jhead_cnt + 1) * c->leb_size + 1;
676 if (c->bg_bud_bytes < tmp64) 692 if (c->bg_bud_bytes < tmp64)
677 c->bg_bud_bytes = tmp64; 693 c->bg_bud_bytes = tmp64;
678 if (c->max_bud_bytes < tmp64 + c->leb_size) 694 if (c->max_bud_bytes < tmp64 + c->leb_size)
@@ -682,6 +698,21 @@ static int init_constants_late(struct ubifs_info *c)
682 if (err) 698 if (err)
683 return err; 699 return err;
684 700
701 return 0;
702}
703
704/*
705 * init_constants_master - initialize UBIFS constants.
706 * @c: UBIFS file-system description object
707 *
708 * This is a helper function which initializes various UBIFS constants after
709 * the master node has been read. It also checks various UBIFS parameters and
710 * makes sure they are all right.
711 */
712static void init_constants_master(struct ubifs_info *c)
713{
714 long long tmp64;
715
685 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 716 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
686 717
687 /* 718 /*
@@ -690,14 +721,13 @@ static int init_constants_late(struct ubifs_info *c)
690 * necessary to report something for the 'statfs()' call. 721 * necessary to report something for the 'statfs()' call.
691 * 722 *
692 * Subtract the LEB reserved for GC, the LEB which is reserved for 723 * Subtract the LEB reserved for GC, the LEB which is reserved for
693 * deletions, and assume only one journal head is available. 724 * deletions, minimum LEBs for the index, and assume only one journal
725 * head is available.
694 */ 726 */
695 tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; 727 tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1;
696 tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; 728 tmp64 *= (long long)c->leb_size - c->leb_overhead;
697 tmp64 = ubifs_reported_space(c, tmp64); 729 tmp64 = ubifs_reported_space(c, tmp64);
698 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; 730 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
699
700 return 0;
701} 731}
702 732
703/** 733/**
@@ -878,6 +908,7 @@ static int check_volume_empty(struct ubifs_info *c)
878 * Opt_no_bulk_read: disable bulk-reads 908 * Opt_no_bulk_read: disable bulk-reads
879 * Opt_chk_data_crc: check CRCs when reading data nodes 909 * Opt_chk_data_crc: check CRCs when reading data nodes
880 * Opt_no_chk_data_crc: do not check CRCs when reading data nodes 910 * Opt_no_chk_data_crc: do not check CRCs when reading data nodes
911 * Opt_override_compr: override default compressor
881 * Opt_err: just end of array marker 912 * Opt_err: just end of array marker
882 */ 913 */
883enum { 914enum {
@@ -887,6 +918,7 @@ enum {
887 Opt_no_bulk_read, 918 Opt_no_bulk_read,
888 Opt_chk_data_crc, 919 Opt_chk_data_crc,
889 Opt_no_chk_data_crc, 920 Opt_no_chk_data_crc,
921 Opt_override_compr,
890 Opt_err, 922 Opt_err,
891}; 923};
892 924
@@ -897,6 +929,7 @@ static const match_table_t tokens = {
897 {Opt_no_bulk_read, "no_bulk_read"}, 929 {Opt_no_bulk_read, "no_bulk_read"},
898 {Opt_chk_data_crc, "chk_data_crc"}, 930 {Opt_chk_data_crc, "chk_data_crc"},
899 {Opt_no_chk_data_crc, "no_chk_data_crc"}, 931 {Opt_no_chk_data_crc, "no_chk_data_crc"},
932 {Opt_override_compr, "compr=%s"},
900 {Opt_err, NULL}, 933 {Opt_err, NULL},
901}; 934};
902 935
@@ -950,6 +983,28 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
950 c->mount_opts.chk_data_crc = 1; 983 c->mount_opts.chk_data_crc = 1;
951 c->no_chk_data_crc = 1; 984 c->no_chk_data_crc = 1;
952 break; 985 break;
986 case Opt_override_compr:
987 {
988 char *name = match_strdup(&args[0]);
989
990 if (!name)
991 return -ENOMEM;
992 if (!strcmp(name, "none"))
993 c->mount_opts.compr_type = UBIFS_COMPR_NONE;
994 else if (!strcmp(name, "lzo"))
995 c->mount_opts.compr_type = UBIFS_COMPR_LZO;
996 else if (!strcmp(name, "zlib"))
997 c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
998 else {
999 ubifs_err("unknown compressor \"%s\"", name);
1000 kfree(name);
1001 return -EINVAL;
1002 }
1003 kfree(name);
1004 c->mount_opts.override_compr = 1;
1005 c->default_compr = c->mount_opts.compr_type;
1006 break;
1007 }
953 default: 1008 default:
954 ubifs_err("unrecognized mount option \"%s\" " 1009 ubifs_err("unrecognized mount option \"%s\" "
955 "or missing value", p); 1010 "or missing value", p);
@@ -1019,6 +1074,30 @@ again:
1019} 1074}
1020 1075
1021/** 1076/**
1077 * check_free_space - check if there is enough free space to mount.
1078 * @c: UBIFS file-system description object
1079 *
1080 * This function makes sure UBIFS has enough free space to be mounted in
1081 * read/write mode. UBIFS must always have some free space to allow deletions.
1082 */
1083static int check_free_space(struct ubifs_info *c)
1084{
1085 ubifs_assert(c->dark_wm > 0);
1086 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
1087 ubifs_err("insufficient free space to mount in read/write mode");
1088 dbg_dump_budg(c);
1089 dbg_dump_lprops(c);
1090 /*
1091 * We return %-EINVAL instead of %-ENOSPC because it seems to
1092 * be the closest error code mentioned in the mount function
1093 * documentation.
1094 */
1095 return -EINVAL;
1096 }
1097 return 0;
1098}
1099
1100/**
1022 * mount_ubifs - mount UBIFS file-system. 1101 * mount_ubifs - mount UBIFS file-system.
1023 * @c: UBIFS file-system description object 1102 * @c: UBIFS file-system description object
1024 * 1103 *
@@ -1039,11 +1118,9 @@ static int mount_ubifs(struct ubifs_info *c)
1039 if (err) 1118 if (err)
1040 return err; 1119 return err;
1041 1120
1042#ifdef CONFIG_UBIFS_FS_DEBUG 1121 err = ubifs_debugging_init(c);
1043 c->dbg_buf = vmalloc(c->leb_size); 1122 if (err)
1044 if (!c->dbg_buf) 1123 return err;
1045 return -ENOMEM;
1046#endif
1047 1124
1048 err = check_volume_empty(c); 1125 err = check_volume_empty(c);
1049 if (err) 1126 if (err)
@@ -1100,27 +1177,25 @@ static int mount_ubifs(struct ubifs_info *c)
1100 goto out_free; 1177 goto out_free;
1101 1178
1102 /* 1179 /*
1103 * Make sure the compressor which is set as the default on in the 1180 * Make sure the compressor which is set as default in the superblock
1104 * superblock was actually compiled in. 1181 * or overridden by mount options is actually compiled in.
1105 */ 1182 */
1106 if (!ubifs_compr_present(c->default_compr)) { 1183 if (!ubifs_compr_present(c->default_compr)) {
1107 ubifs_warn("'%s' compressor is set by superblock, but not " 1184 ubifs_err("'compressor \"%s\" is not compiled in",
1108 "compiled in", ubifs_compr_name(c->default_compr)); 1185 ubifs_compr_name(c->default_compr));
1109 c->default_compr = UBIFS_COMPR_NONE; 1186 goto out_free;
1110 } 1187 }
1111 1188
1112 dbg_failure_mode_registration(c); 1189 err = init_constants_sb(c);
1113
1114 err = init_constants_late(c);
1115 if (err) 1190 if (err)
1116 goto out_dereg; 1191 goto out_free;
1117 1192
1118 sz = ALIGN(c->max_idx_node_sz, c->min_io_size); 1193 sz = ALIGN(c->max_idx_node_sz, c->min_io_size);
1119 sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); 1194 sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size);
1120 c->cbuf = kmalloc(sz, GFP_NOFS); 1195 c->cbuf = kmalloc(sz, GFP_NOFS);
1121 if (!c->cbuf) { 1196 if (!c->cbuf) {
1122 err = -ENOMEM; 1197 err = -ENOMEM;
1123 goto out_dereg; 1198 goto out_free;
1124 } 1199 }
1125 1200
1126 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); 1201 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
@@ -1145,6 +1220,8 @@ static int mount_ubifs(struct ubifs_info *c)
1145 if (err) 1220 if (err)
1146 goto out_master; 1221 goto out_master;
1147 1222
1223 init_constants_master(c);
1224
1148 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { 1225 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
1149 ubifs_msg("recovery needed"); 1226 ubifs_msg("recovery needed");
1150 c->need_recovery = 1; 1227 c->need_recovery = 1;
@@ -1183,12 +1260,9 @@ static int mount_ubifs(struct ubifs_info *c)
1183 if (!mounted_read_only) { 1260 if (!mounted_read_only) {
1184 int lnum; 1261 int lnum;
1185 1262
1186 /* Check for enough free space */ 1263 err = check_free_space(c);
1187 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { 1264 if (err)
1188 ubifs_err("insufficient available space");
1189 err = -EINVAL;
1190 goto out_orphans; 1265 goto out_orphans;
1191 }
1192 1266
1193 /* Check for enough log space */ 1267 /* Check for enough log space */
1194 lnum = c->lhead_lnum + 1; 1268 lnum = c->lhead_lnum + 1;
@@ -1232,6 +1306,10 @@ static int mount_ubifs(struct ubifs_info *c)
1232 } 1306 }
1233 } 1307 }
1234 1308
1309 err = dbg_debugfs_init_fs(c);
1310 if (err)
1311 goto out_infos;
1312
1235 err = dbg_check_filesystem(c); 1313 err = dbg_check_filesystem(c);
1236 if (err) 1314 if (err)
1237 goto out_infos; 1315 goto out_infos;
@@ -1283,8 +1361,20 @@ static int mount_ubifs(struct ubifs_info *c)
1283 dbg_msg("tree fanout: %d", c->fanout); 1361 dbg_msg("tree fanout: %d", c->fanout);
1284 dbg_msg("reserved GC LEB: %d", c->gc_lnum); 1362 dbg_msg("reserved GC LEB: %d", c->gc_lnum);
1285 dbg_msg("first main LEB: %d", c->main_first); 1363 dbg_msg("first main LEB: %d", c->main_first);
1364 dbg_msg("max. znode size %d", c->max_znode_sz);
1365 dbg_msg("max. index node size %d", c->max_idx_node_sz);
1366 dbg_msg("node sizes: data %zu, inode %zu, dentry %zu",
1367 UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ);
1368 dbg_msg("node sizes: trun %zu, sb %zu, master %zu",
1369 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
1370 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
1371 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
1372 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu",
1373 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
1374 UBIFS_MAX_DENT_NODE_SZ);
1286 dbg_msg("dead watermark: %d", c->dead_wm); 1375 dbg_msg("dead watermark: %d", c->dead_wm);
1287 dbg_msg("dark watermark: %d", c->dark_wm); 1376 dbg_msg("dark watermark: %d", c->dark_wm);
1377 dbg_msg("LEB overhead: %d", c->leb_overhead);
1288 x = (long long)c->main_lebs * c->dark_wm; 1378 x = (long long)c->main_lebs * c->dark_wm;
1289 dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", 1379 dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)",
1290 x, x >> 10, x >> 20); 1380 x, x >> 10, x >> 20);
@@ -1320,14 +1410,12 @@ out_wbufs:
1320 free_wbufs(c); 1410 free_wbufs(c);
1321out_cbuf: 1411out_cbuf:
1322 kfree(c->cbuf); 1412 kfree(c->cbuf);
1323out_dereg:
1324 dbg_failure_mode_deregistration(c);
1325out_free: 1413out_free:
1326 kfree(c->bu.buf); 1414 kfree(c->bu.buf);
1327 vfree(c->ileb_buf); 1415 vfree(c->ileb_buf);
1328 vfree(c->sbuf); 1416 vfree(c->sbuf);
1329 kfree(c->bottom_up_buf); 1417 kfree(c->bottom_up_buf);
1330 UBIFS_DBG(vfree(c->dbg_buf)); 1418 ubifs_debugging_exit(c);
1331 return err; 1419 return err;
1332} 1420}
1333 1421
@@ -1345,6 +1433,7 @@ static void ubifs_umount(struct ubifs_info *c)
1345 dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, 1433 dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num,
1346 c->vi.vol_id); 1434 c->vi.vol_id);
1347 1435
1436 dbg_debugfs_exit_fs(c);
1348 spin_lock(&ubifs_infos_lock); 1437 spin_lock(&ubifs_infos_lock);
1349 list_del(&c->infos_list); 1438 list_del(&c->infos_list);
1350 spin_unlock(&ubifs_infos_lock); 1439 spin_unlock(&ubifs_infos_lock);
@@ -1364,8 +1453,7 @@ static void ubifs_umount(struct ubifs_info *c)
1364 vfree(c->ileb_buf); 1453 vfree(c->ileb_buf);
1365 vfree(c->sbuf); 1454 vfree(c->sbuf);
1366 kfree(c->bottom_up_buf); 1455 kfree(c->bottom_up_buf);
1367 UBIFS_DBG(vfree(c->dbg_buf)); 1456 ubifs_debugging_exit(c);
1368 dbg_failure_mode_deregistration(c);
1369} 1457}
1370 1458
1371/** 1459/**
@@ -1387,12 +1475,9 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1387 c->remounting_rw = 1; 1475 c->remounting_rw = 1;
1388 c->always_chk_crc = 1; 1476 c->always_chk_crc = 1;
1389 1477
1390 /* Check for enough free space */ 1478 err = check_free_space(c);
1391 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { 1479 if (err)
1392 ubifs_err("insufficient available space");
1393 err = -EINVAL;
1394 goto out; 1480 goto out;
1395 }
1396 1481
1397 if (c->old_leb_cnt != c->leb_cnt) { 1482 if (c->old_leb_cnt != c->leb_cnt) {
1398 struct ubifs_sb_node *sup; 1483 struct ubifs_sb_node *sup;
@@ -1515,20 +1600,24 @@ out:
1515 * @c: UBIFS file-system description object 1600 * @c: UBIFS file-system description object
1516 * 1601 *
1517 * This function is called during un-mounting and re-mounting, and it commits 1602 * This function is called during un-mounting and re-mounting, and it commits
1518 * the journal unless the "fast unmount" mode is enabled. It also avoids 1603 * the journal unless the "fast unmount" mode is enabled.
1519 * committing the journal if it contains too few data.
1520 */ 1604 */
1521static void commit_on_unmount(struct ubifs_info *c) 1605static void commit_on_unmount(struct ubifs_info *c)
1522{ 1606{
1523 if (!c->fast_unmount) { 1607 struct super_block *sb = c->vfs_sb;
1524 long long bud_bytes; 1608 long long bud_bytes;
1525 1609
1526 spin_lock(&c->buds_lock); 1610 /*
1527 bud_bytes = c->bud_bytes; 1611 * This function is called before the background thread is stopped, so
1528 spin_unlock(&c->buds_lock); 1612 * we may race with ongoing commit, which means we have to take
1529 if (bud_bytes > c->leb_size) 1613 * @c->bud_lock to access @c->bud_bytes.
1530 ubifs_run_commit(c); 1614 */
1531 } 1615 spin_lock(&c->buds_lock);
1616 bud_bytes = c->bud_bytes;
1617 spin_unlock(&c->buds_lock);
1618
1619 if (!c->fast_unmount && !(sb->s_flags & MS_RDONLY) && bud_bytes)
1620 ubifs_run_commit(c);
1532} 1621}
1533 1622
1534/** 1623/**
@@ -1849,7 +1938,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1849 goto out_iput; 1938 goto out_iput;
1850 1939
1851 mutex_unlock(&c->umount_mutex); 1940 mutex_unlock(&c->umount_mutex);
1852
1853 return 0; 1941 return 0;
1854 1942
1855out_iput: 1943out_iput:
@@ -1955,7 +2043,7 @@ static void ubifs_kill_sb(struct super_block *sb)
1955 * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' 2043 * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()'
1956 * in order to be outside BKL. 2044 * in order to be outside BKL.
1957 */ 2045 */
1958 if (sb->s_root && !(sb->s_flags & MS_RDONLY)) 2046 if (sb->s_root)
1959 commit_on_unmount(c); 2047 commit_on_unmount(c);
1960 /* The un-mount routine is actually done in put_super() */ 2048 /* The un-mount routine is actually done in put_super() */
1961 generic_shutdown_super(sb); 2049 generic_shutdown_super(sb);
@@ -2021,6 +2109,14 @@ static int __init ubifs_init(void)
2021 BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); 2109 BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64);
2022 2110
2023 /* 2111 /*
2112 * We use 2 bit wide bit-fields to store compression type, which should
2113 * be amended if more compressors are added. The bit-fields are:
2114 * @compr_type in 'struct ubifs_inode', @default_compr in
2115 * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'.
2116 */
2117 BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4);
2118
2119 /*
2024 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to 2120 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
2025 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. 2121 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
2026 */ 2122 */
@@ -2049,11 +2145,17 @@ static int __init ubifs_init(void)
2049 2145
2050 err = ubifs_compressors_init(); 2146 err = ubifs_compressors_init();
2051 if (err) 2147 if (err)
2148 goto out_shrinker;
2149
2150 err = dbg_debugfs_init();
2151 if (err)
2052 goto out_compr; 2152 goto out_compr;
2053 2153
2054 return 0; 2154 return 0;
2055 2155
2056out_compr: 2156out_compr:
2157 ubifs_compressors_exit();
2158out_shrinker:
2057 unregister_shrinker(&ubifs_shrinker_info); 2159 unregister_shrinker(&ubifs_shrinker_info);
2058 kmem_cache_destroy(ubifs_inode_slab); 2160 kmem_cache_destroy(ubifs_inode_slab);
2059out_reg: 2161out_reg:
@@ -2068,6 +2170,7 @@ static void __exit ubifs_exit(void)
2068 ubifs_assert(list_empty(&ubifs_infos)); 2170 ubifs_assert(list_empty(&ubifs_infos));
2069 ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0); 2171 ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
2070 2172
2173 dbg_debugfs_exit();
2071 ubifs_compressors_exit(); 2174 ubifs_compressors_exit();
2072 unregister_shrinker(&ubifs_shrinker_info); 2175 unregister_shrinker(&ubifs_shrinker_info);
2073 kmem_cache_destroy(ubifs_inode_slab); 2176 kmem_cache_destroy(ubifs_inode_slab);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 6eef5344a14..f7e36f54552 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2245,12 +2245,11 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
2245 if (found) { 2245 if (found) {
2246 /* Ensure the znode is dirtied */ 2246 /* Ensure the znode is dirtied */
2247 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2247 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2248 znode = dirty_cow_bottom_up(c, 2248 znode = dirty_cow_bottom_up(c, znode);
2249 znode); 2249 if (IS_ERR(znode)) {
2250 if (IS_ERR(znode)) { 2250 err = PTR_ERR(znode);
2251 err = PTR_ERR(znode); 2251 goto out_unlock;
2252 goto out_unlock; 2252 }
2253 }
2254 } 2253 }
2255 zbr = &znode->zbranch[n]; 2254 zbr = &znode->zbranch[n];
2256 lnc_free(zbr); 2255 lnc_free(zbr);
@@ -2317,11 +2316,11 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
2317 2316
2318 /* Ensure the znode is dirtied */ 2317 /* Ensure the znode is dirtied */
2319 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2318 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2320 znode = dirty_cow_bottom_up(c, znode); 2319 znode = dirty_cow_bottom_up(c, znode);
2321 if (IS_ERR(znode)) { 2320 if (IS_ERR(znode)) {
2322 err = PTR_ERR(znode); 2321 err = PTR_ERR(znode);
2323 goto out_unlock; 2322 goto out_unlock;
2324 } 2323 }
2325 } 2324 }
2326 2325
2327 if (found == 1) { 2326 if (found == 1) {
@@ -2627,11 +2626,11 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
2627 2626
2628 /* Ensure the znode is dirtied */ 2627 /* Ensure the znode is dirtied */
2629 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2628 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2630 znode = dirty_cow_bottom_up(c, znode); 2629 znode = dirty_cow_bottom_up(c, znode);
2631 if (IS_ERR(znode)) { 2630 if (IS_ERR(znode)) {
2632 err = PTR_ERR(znode); 2631 err = PTR_ERR(znode);
2633 goto out_unlock; 2632 goto out_unlock;
2634 } 2633 }
2635 } 2634 }
2636 2635
2637 /* Remove all keys in range except the first */ 2636 /* Remove all keys in range except the first */
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 8ac76b1c2d5..fde8d127c76 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -553,8 +553,8 @@ static int layout_in_empty_space(struct ubifs_info *c)
553 } 553 }
554 554
555#ifdef CONFIG_UBIFS_FS_DEBUG 555#ifdef CONFIG_UBIFS_FS_DEBUG
556 c->new_ihead_lnum = lnum; 556 c->dbg->new_ihead_lnum = lnum;
557 c->new_ihead_offs = buf_offs; 557 c->dbg->new_ihead_offs = buf_offs;
558#endif 558#endif
559 559
560 return 0; 560 return 0;
@@ -802,8 +802,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
802 * budgeting subsystem to assume the index is already committed, 802 * budgeting subsystem to assume the index is already committed,
803 * even though it is not. 803 * even though it is not.
804 */ 804 */
805 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
805 c->old_idx_sz = c->calc_idx_sz; 806 c->old_idx_sz = c->calc_idx_sz;
806 c->budg_uncommitted_idx = 0; 807 c->budg_uncommitted_idx = 0;
808 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
807 spin_unlock(&c->space_lock); 809 spin_unlock(&c->space_lock);
808 mutex_unlock(&c->tnc_mutex); 810 mutex_unlock(&c->tnc_mutex);
809 811
@@ -1002,7 +1004,8 @@ static int write_index(struct ubifs_info *c)
1002 } 1004 }
1003 1005
1004#ifdef CONFIG_UBIFS_FS_DEBUG 1006#ifdef CONFIG_UBIFS_FS_DEBUG
1005 if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) { 1007 if (lnum != c->dbg->new_ihead_lnum ||
1008 buf_offs != c->dbg->new_ihead_offs) {
1006 ubifs_err("inconsistent ihead"); 1009 ubifs_err("inconsistent ihead");
1007 return -EINVAL; 1010 return -EINVAL;
1008 } 1011 }
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 0b378042a3a..b25fc36cf72 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -51,6 +51,13 @@
51 */ 51 */
52#define UBIFS_MIN_COMPR_LEN 128 52#define UBIFS_MIN_COMPR_LEN 128
53 53
54/*
55 * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes
56 * shorter than uncompressed data length, UBIFS preferes to leave this data
57 * node uncompress, because it'll be read faster.
58 */
59#define UBIFS_MIN_COMPRESS_DIFF 64
60
54/* Root inode number */ 61/* Root inode number */
55#define UBIFS_ROOT_INO 1 62#define UBIFS_ROOT_INO 1
56 63
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 46b172560a0..fc2a4cc66d0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -63,6 +63,14 @@
63#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL 63#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
64#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL 64#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL
65 65
66/*
67 * Minimum amount of LEBs reserved for the index. At present the index needs at
68 * least 2 LEBs: one for the index head and one for in-the-gaps method (which
69 * currently does not cater for the index head and so excludes it from
70 * consideration).
71 */
72#define MIN_INDEX_LEBS 2
73
66/* Minimum amount of data UBIFS writes to the flash */ 74/* Minimum amount of data UBIFS writes to the flash */
67#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) 75#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8)
68 76
@@ -386,12 +394,12 @@ struct ubifs_inode {
386 unsigned int dirty:1; 394 unsigned int dirty:1;
387 unsigned int xattr:1; 395 unsigned int xattr:1;
388 unsigned int bulk_read:1; 396 unsigned int bulk_read:1;
397 unsigned int compr_type:2;
389 struct mutex ui_mutex; 398 struct mutex ui_mutex;
390 spinlock_t ui_lock; 399 spinlock_t ui_lock;
391 loff_t synced_i_size; 400 loff_t synced_i_size;
392 loff_t ui_size; 401 loff_t ui_size;
393 int flags; 402 int flags;
394 int compr_type;
395 pgoff_t last_page_read; 403 pgoff_t last_page_read;
396 pgoff_t read_in_a_row; 404 pgoff_t read_in_a_row;
397 int data_len; 405 int data_len;
@@ -419,7 +427,7 @@ struct ubifs_unclean_leb {
419 * 427 *
420 * LPROPS_UNCAT: not categorized 428 * LPROPS_UNCAT: not categorized
421 * LPROPS_DIRTY: dirty > 0, not index 429 * LPROPS_DIRTY: dirty > 0, not index
422 * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index 430 * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index
423 * LPROPS_FREE: free > 0, not empty, not index 431 * LPROPS_FREE: free > 0, not empty, not index
424 * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs 432 * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs
425 * LPROPS_EMPTY: LEB is empty, not taken 433 * LPROPS_EMPTY: LEB is empty, not taken
@@ -473,8 +481,8 @@ struct ubifs_lprops {
473struct ubifs_lpt_lprops { 481struct ubifs_lpt_lprops {
474 int free; 482 int free;
475 int dirty; 483 int dirty;
476 unsigned tgc : 1; 484 unsigned tgc:1;
477 unsigned cmt : 1; 485 unsigned cmt:1;
478}; 486};
479 487
480/** 488/**
@@ -482,24 +490,26 @@ struct ubifs_lpt_lprops {
482 * @empty_lebs: number of empty LEBs 490 * @empty_lebs: number of empty LEBs
483 * @taken_empty_lebs: number of taken LEBs 491 * @taken_empty_lebs: number of taken LEBs
484 * @idx_lebs: number of indexing LEBs 492 * @idx_lebs: number of indexing LEBs
485 * @total_free: total free space in bytes 493 * @total_free: total free space in bytes (includes all LEBs)
486 * @total_dirty: total dirty space in bytes 494 * @total_dirty: total dirty space in bytes (includes all LEBs)
487 * @total_used: total used space in bytes (includes only data LEBs) 495 * @total_used: total used space in bytes (does not include index LEBs)
488 * @total_dead: total dead space in bytes (includes only data LEBs) 496 * @total_dead: total dead space in bytes (does not include index LEBs)
489 * @total_dark: total dark space in bytes (includes only data LEBs) 497 * @total_dark: total dark space in bytes (does not include index LEBs)
498 *
499 * The @taken_empty_lebs field counts the LEBs that are in the transient state
500 * of having been "taken" for use but not yet written to. @taken_empty_lebs is
501 * needed to account correctly for @gc_lnum, otherwise @empty_lebs could be
502 * used by itself (in which case 'unused_lebs' would be a better name). In the
503 * case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained
504 * by GC, but unlike other empty LEBs that are "taken", it may not be written
505 * straight away (i.e. before the next commit start or unmount), so either
506 * @gc_lnum must be specially accounted for, or the current approach followed
507 * i.e. count it under @taken_empty_lebs.
490 * 508 *
491 * N.B. total_dirty and total_used are different to other total_* fields, 509 * @empty_lebs includes @taken_empty_lebs.
492 * because they account _all_ LEBs, not just data LEBs.
493 * 510 *
494 * 'taken_empty_lebs' counts the LEBs that are in the transient state of having 511 * @total_used, @total_dead and @total_dark fields do not account indexing
495 * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed 512 * LEBs.
496 * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used
497 * by itself (in which case 'unused_lebs' would be a better name). In the case
498 * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC,
499 * but unlike other empty LEBs that are 'taken', it may not be written straight
500 * away (i.e. before the next commit start or unmount), so either gc_lnum must
501 * be specially accounted for, or the current approach followed i.e. count it
502 * under 'taken_empty_lebs'.
503 */ 513 */
504struct ubifs_lp_stats { 514struct ubifs_lp_stats {
505 int empty_lebs; 515 int empty_lebs;
@@ -893,15 +903,25 @@ struct ubifs_orphan {
893/** 903/**
894 * struct ubifs_mount_opts - UBIFS-specific mount options information. 904 * struct ubifs_mount_opts - UBIFS-specific mount options information.
895 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) 905 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
896 * @bulk_read: enable bulk-reads 906 * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable)
897 * @chk_data_crc: check CRCs when reading data nodes 907 * @chk_data_crc: enable/disable CRC data checking when reading data nodes
908 * (%0 default, %1 disabe, %2 enable)
909 * @override_compr: override default compressor (%0 - do not override and use
910 * superblock compressor, %1 - override and use compressor
911 * specified in @compr_type)
912 * @compr_type: compressor type to override the superblock compressor with
913 * (%UBIFS_COMPR_NONE, etc)
898 */ 914 */
899struct ubifs_mount_opts { 915struct ubifs_mount_opts {
900 unsigned int unmount_mode:2; 916 unsigned int unmount_mode:2;
901 unsigned int bulk_read:2; 917 unsigned int bulk_read:2;
902 unsigned int chk_data_crc:2; 918 unsigned int chk_data_crc:2;
919 unsigned int override_compr:1;
920 unsigned int compr_type:2;
903}; 921};
904 922
923struct ubifs_debug_info;
924
905/** 925/**
906 * struct ubifs_info - UBIFS file-system description data structure 926 * struct ubifs_info - UBIFS file-system description data structure
907 * (per-superblock). 927 * (per-superblock).
@@ -946,6 +966,7 @@ struct ubifs_mount_opts {
946 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during 966 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
947 * recovery) 967 * recovery)
948 * @bulk_read: enable bulk-reads 968 * @bulk_read: enable bulk-reads
969 * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
949 * 970 *
950 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and 971 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
951 * @calc_idx_sz 972 * @calc_idx_sz
@@ -963,8 +984,6 @@ struct ubifs_mount_opts {
963 * @ileb_nxt: next pre-allocated index LEBs 984 * @ileb_nxt: next pre-allocated index LEBs
964 * @old_idx: tree of index nodes obsoleted since the last commit start 985 * @old_idx: tree of index nodes obsoleted since the last commit start
965 * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c 986 * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c
966 * @new_ihead_lnum: used by debugging to check ihead_lnum
967 * @new_ihead_offs: used by debugging to check ihead_offs
968 * 987 *
969 * @mst_node: master node 988 * @mst_node: master node
970 * @mst_offs: offset of valid master node 989 * @mst_offs: offset of valid master node
@@ -986,7 +1005,6 @@ struct ubifs_mount_opts {
986 * @main_lebs: count of LEBs in the main area 1005 * @main_lebs: count of LEBs in the main area
987 * @main_first: first LEB of the main area 1006 * @main_first: first LEB of the main area
988 * @main_bytes: main area size in bytes 1007 * @main_bytes: main area size in bytes
989 * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
990 * 1008 *
991 * @key_hash_type: type of the key hash 1009 * @key_hash_type: type of the key hash
992 * @key_hash: direntry key hash function 1010 * @key_hash: direntry key hash function
@@ -1149,15 +1167,7 @@ struct ubifs_mount_opts {
1149 * @always_chk_crc: always check CRCs (while mounting and remounting rw) 1167 * @always_chk_crc: always check CRCs (while mounting and remounting rw)
1150 * @mount_opts: UBIFS-specific mount options 1168 * @mount_opts: UBIFS-specific mount options
1151 * 1169 *
1152 * @dbg_buf: a buffer of LEB size used for debugging purposes 1170 * @dbg: debugging-related information
1153 * @old_zroot: old index root - used by 'dbg_check_old_index()'
1154 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
1155 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
1156 * @failure_mode: failure mode for recovery testing
1157 * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
1158 * @fail_timeout: time in jiffies when delay of failure mode expires
1159 * @fail_cnt: current number of calls to failure mode I/O functions
1160 * @fail_cnt_max: number of calls by which to delay failure mode
1161 */ 1171 */
1162struct ubifs_info { 1172struct ubifs_info {
1163 struct super_block *vfs_sb; 1173 struct super_block *vfs_sb;
@@ -1196,6 +1206,7 @@ struct ubifs_info {
1196 unsigned int big_lpt:1; 1206 unsigned int big_lpt:1;
1197 unsigned int no_chk_data_crc:1; 1207 unsigned int no_chk_data_crc:1;
1198 unsigned int bulk_read:1; 1208 unsigned int bulk_read:1;
1209 unsigned int default_compr:2;
1199 1210
1200 struct mutex tnc_mutex; 1211 struct mutex tnc_mutex;
1201 struct ubifs_zbranch zroot; 1212 struct ubifs_zbranch zroot;
@@ -1212,10 +1223,6 @@ struct ubifs_info {
1212 int ileb_nxt; 1223 int ileb_nxt;
1213 struct rb_root old_idx; 1224 struct rb_root old_idx;
1214 int *bottom_up_buf; 1225 int *bottom_up_buf;
1215#ifdef CONFIG_UBIFS_FS_DEBUG
1216 int new_ihead_lnum;
1217 int new_ihead_offs;
1218#endif
1219 1226
1220 struct ubifs_mst_node *mst_node; 1227 struct ubifs_mst_node *mst_node;
1221 int mst_offs; 1228 int mst_offs;
@@ -1237,7 +1244,6 @@ struct ubifs_info {
1237 int main_lebs; 1244 int main_lebs;
1238 int main_first; 1245 int main_first;
1239 long long main_bytes; 1246 long long main_bytes;
1240 int default_compr;
1241 1247
1242 uint8_t key_hash_type; 1248 uint8_t key_hash_type;
1243 uint32_t (*key_hash)(const char *str, int len); 1249 uint32_t (*key_hash)(const char *str, int len);
@@ -1315,8 +1321,8 @@ struct ubifs_info {
1315 void *sbuf; 1321 void *sbuf;
1316 struct list_head idx_gc; 1322 struct list_head idx_gc;
1317 int idx_gc_cnt; 1323 int idx_gc_cnt;
1318 volatile int gc_seq; 1324 int gc_seq;
1319 volatile int gced_lnum; 1325 int gced_lnum;
1320 1326
1321 struct list_head infos_list; 1327 struct list_head infos_list;
1322 struct mutex umount_mutex; 1328 struct mutex umount_mutex;
@@ -1391,21 +1397,7 @@ struct ubifs_info {
1391 struct ubifs_mount_opts mount_opts; 1397 struct ubifs_mount_opts mount_opts;
1392 1398
1393#ifdef CONFIG_UBIFS_FS_DEBUG 1399#ifdef CONFIG_UBIFS_FS_DEBUG
1394 void *dbg_buf; 1400 struct ubifs_debug_info *dbg;
1395 struct ubifs_zbranch old_zroot;
1396 int old_zroot_level;
1397 unsigned long long old_zroot_sqnum;
1398 int failure_mode;
1399 int fail_delay;
1400 unsigned long fail_timeout;
1401 unsigned int fail_cnt;
1402 unsigned int fail_cnt_max;
1403 long long chk_lpt_sz;
1404 long long chk_lpt_sz2;
1405 long long chk_lpt_wastage;
1406 int chk_lpt_lebs;
1407 int new_nhead_lnum;
1408 int new_nhead_offs;
1409#endif 1401#endif
1410}; 1402};
1411 1403
@@ -1505,7 +1497,7 @@ void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
1505long long ubifs_get_free_space(struct ubifs_info *c); 1497long long ubifs_get_free_space(struct ubifs_info *c);
1506int ubifs_calc_min_idx_lebs(struct ubifs_info *c); 1498int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
1507void ubifs_convert_page_budget(struct ubifs_info *c); 1499void ubifs_convert_page_budget(struct ubifs_info *c);
1508long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); 1500long long ubifs_reported_space(const struct ubifs_info *c, long long free);
1509long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); 1501long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
1510 1502
1511/* find.c */ 1503/* find.c */
@@ -1639,6 +1631,9 @@ void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
1639void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); 1631void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
1640uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); 1632uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits);
1641struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); 1633struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
1634/* Needed only in debugging code in lpt_commit.c */
1635int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
1636 struct ubifs_nnode *nnode);
1642 1637
1643/* lpt_commit.c */ 1638/* lpt_commit.c */
1644int ubifs_lpt_start_commit(struct ubifs_info *c); 1639int ubifs_lpt_start_commit(struct ubifs_info *c);
@@ -1714,7 +1709,7 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1714 1709
1715/* compressor.c */ 1710/* compressor.c */
1716int __init ubifs_compressors_init(void); 1711int __init ubifs_compressors_init(void);
1717void __exit ubifs_compressors_exit(void); 1712void ubifs_compressors_exit(void);
1718void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, 1713void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
1719 int *compr_type); 1714 int *compr_type);
1720int ubifs_decompress(const void *buf, int len, void *out, int *out_len, 1715int ubifs_decompress(const void *buf, int len, void *out, int *out_len,
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index a4f2b3ce45b..31fc84297dd 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -126,13 +126,13 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
126 } 126 }
127 mutex_unlock(&sbi->s_alloc_mutex); 127 mutex_unlock(&sbi->s_alloc_mutex);
128 inode->i_mode = mode; 128 inode->i_mode = mode;
129 inode->i_uid = current->fsuid; 129 inode->i_uid = current_fsuid();
130 if (dir->i_mode & S_ISGID) { 130 if (dir->i_mode & S_ISGID) {
131 inode->i_gid = dir->i_gid; 131 inode->i_gid = dir->i_gid;
132 if (S_ISDIR(mode)) 132 if (S_ISDIR(mode))
133 mode |= S_ISGID; 133 mode |= S_ISGID;
134 } else { 134 } else {
135 inode->i_gid = current->fsgid; 135 inode->i_gid = current_fsgid();
136 } 136 }
137 137
138 iinfo->i_location.logicalBlockNum = block; 138 iinfo->i_location.logicalBlockNum = block;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 082409cd4b8..f84bfaa8d94 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -604,7 +604,7 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
604 goto out; 604 goto out;
605 605
606 iinfo = UDF_I(inode); 606 iinfo = UDF_I(inode);
607 inode->i_uid = current->fsuid; 607 inode->i_uid = current_fsuid();
608 init_special_inode(inode, mode, rdev); 608 init_special_inode(inode, mode, rdev);
609 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 609 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
610 if (!fi) { 610 if (!fi) {
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index ac181f6806a..6f5dcf00609 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -304,13 +304,13 @@ cg_found:
304 304
305 inode->i_ino = cg * uspi->s_ipg + bit; 305 inode->i_ino = cg * uspi->s_ipg + bit;
306 inode->i_mode = mode; 306 inode->i_mode = mode;
307 inode->i_uid = current->fsuid; 307 inode->i_uid = current_fsuid();
308 if (dir->i_mode & S_ISGID) { 308 if (dir->i_mode & S_ISGID) {
309 inode->i_gid = dir->i_gid; 309 inode->i_gid = dir->i_gid;
310 if (S_ISDIR(mode)) 310 if (S_ISDIR(mode))
311 inode->i_mode |= S_ISGID; 311 inode->i_mode |= S_ISGID;
312 } else 312 } else
313 inode->i_gid = current->fsgid; 313 inode->i_gid = current_fsgid();
314 314
315 inode->i_blocks = 0; 315 inode->i_blocks = 0;
316 inode->i_generation = 0; 316 inode->i_generation = 0;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 737c9a42536..c3dc491fff8 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -85,13 +85,13 @@ xfs-y += xfs_alloc.o \
85 xfs_trans_inode.o \ 85 xfs_trans_inode.o \
86 xfs_trans_item.o \ 86 xfs_trans_item.o \
87 xfs_utils.o \ 87 xfs_utils.o \
88 xfs_vfsops.o \
89 xfs_vnodeops.o \ 88 xfs_vnodeops.o \
90 xfs_rw.o \ 89 xfs_rw.o \
91 xfs_dmops.o \ 90 xfs_dmops.o \
92 xfs_qmops.o 91 xfs_qmops.o
93 92
94xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o 93xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \
94 xfs_dir2_trace.o
95 95
96# Objects in linux/ 96# Objects in linux/
97xfs-y += $(addprefix $(XFS_LINUX)/, \ 97xfs-y += $(addprefix $(XFS_LINUX)/, \
@@ -106,7 +106,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
106 xfs_iops.o \ 106 xfs_iops.o \
107 xfs_lrw.o \ 107 xfs_lrw.o \
108 xfs_super.o \ 108 xfs_super.o \
109 xfs_vnode.o \ 109 xfs_sync.o \
110 xfs_xattr.o) 110 xfs_xattr.o)
111 111
112# Objects in support/ 112# Objects in support/
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
index 351a8f454bd..4dfc7c37081 100644
--- a/fs/xfs/linux-2.6/sv.h
+++ b/fs/xfs/linux-2.6/sv.h
@@ -32,23 +32,15 @@ typedef struct sv_s {
32 wait_queue_head_t waiters; 32 wait_queue_head_t waiters;
33} sv_t; 33} sv_t;
34 34
35#define SV_FIFO 0x0 /* sv_t is FIFO type */ 35static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
36#define SV_LIFO 0x2 /* sv_t is LIFO type */
37#define SV_PRIO 0x4 /* sv_t is PRIO type */
38#define SV_KEYED 0x6 /* sv_t is KEYED type */
39#define SV_DEFAULT SV_FIFO
40
41
42static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
43 unsigned long timeout)
44{ 36{
45 DECLARE_WAITQUEUE(wait, current); 37 DECLARE_WAITQUEUE(wait, current);
46 38
47 add_wait_queue_exclusive(&sv->waiters, &wait); 39 add_wait_queue_exclusive(&sv->waiters, &wait);
48 __set_current_state(state); 40 __set_current_state(TASK_UNINTERRUPTIBLE);
49 spin_unlock(lock); 41 spin_unlock(lock);
50 42
51 schedule_timeout(timeout); 43 schedule();
52 44
53 remove_wait_queue(&sv->waiters, &wait); 45 remove_wait_queue(&sv->waiters, &wait);
54} 46}
@@ -58,13 +50,7 @@ static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
58#define sv_destroy(sv) \ 50#define sv_destroy(sv) \
59 /*NOTHING*/ 51 /*NOTHING*/
60#define sv_wait(sv, pri, lock, s) \ 52#define sv_wait(sv, pri, lock, s) \
61 _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) 53 _sv_wait(sv, lock)
62#define sv_wait_sig(sv, pri, lock, s) \
63 _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
64#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
65 _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
66#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
67 _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
68#define sv_signal(sv) \ 54#define sv_signal(sv) \
69 wake_up(&(sv)->waiters) 55 wake_up(&(sv)->waiters)
70#define sv_broadcast(sv) \ 56#define sv_broadcast(sv) \
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a44d68eb50b..de3a198f771 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -42,6 +42,40 @@
42#include <linux/pagevec.h> 42#include <linux/pagevec.h>
43#include <linux/writeback.h> 43#include <linux/writeback.h>
44 44
45
46/*
47 * Prime number of hash buckets since address is used as the key.
48 */
49#define NVSYNC 37
50#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
51static wait_queue_head_t xfs_ioend_wq[NVSYNC];
52
53void __init
54xfs_ioend_init(void)
55{
56 int i;
57
58 for (i = 0; i < NVSYNC; i++)
59 init_waitqueue_head(&xfs_ioend_wq[i]);
60}
61
62void
63xfs_ioend_wait(
64 xfs_inode_t *ip)
65{
66 wait_queue_head_t *wq = to_ioend_wq(ip);
67
68 wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
69}
70
71STATIC void
72xfs_ioend_wake(
73 xfs_inode_t *ip)
74{
75 if (atomic_dec_and_test(&ip->i_iocount))
76 wake_up(to_ioend_wq(ip));
77}
78
45STATIC void 79STATIC void
46xfs_count_page_state( 80xfs_count_page_state(
47 struct page *page, 81 struct page *page,
@@ -146,16 +180,25 @@ xfs_destroy_ioend(
146 xfs_ioend_t *ioend) 180 xfs_ioend_t *ioend)
147{ 181{
148 struct buffer_head *bh, *next; 182 struct buffer_head *bh, *next;
183 struct xfs_inode *ip = XFS_I(ioend->io_inode);
149 184
150 for (bh = ioend->io_buffer_head; bh; bh = next) { 185 for (bh = ioend->io_buffer_head; bh; bh = next) {
151 next = bh->b_private; 186 next = bh->b_private;
152 bh->b_end_io(bh, !ioend->io_error); 187 bh->b_end_io(bh, !ioend->io_error);
153 } 188 }
154 if (unlikely(ioend->io_error)) { 189
155 vn_ioerror(XFS_I(ioend->io_inode), ioend->io_error, 190 /*
156 __FILE__,__LINE__); 191 * Volume managers supporting multiple paths can send back ENODEV
192 * when the final path disappears. In this case continuing to fill
193 * the page cache with dirty data which cannot be written out is
194 * evil, so prevent that.
195 */
196 if (unlikely(ioend->io_error == -ENODEV)) {
197 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
198 __FILE__, __LINE__);
157 } 199 }
158 vn_iowake(XFS_I(ioend->io_inode)); 200
201 xfs_ioend_wake(ip);
159 mempool_free(ioend, xfs_ioend_pool); 202 mempool_free(ioend, xfs_ioend_pool);
160} 203}
161 204
@@ -191,7 +234,7 @@ xfs_setfilesize(
191 ip->i_d.di_size = isize; 234 ip->i_d.di_size = isize;
192 ip->i_update_core = 1; 235 ip->i_update_core = 1;
193 ip->i_update_size = 1; 236 ip->i_update_size = 1;
194 mark_inode_dirty_sync(ioend->io_inode); 237 xfs_mark_inode_dirty_sync(ip);
195 } 238 }
196 239
197 xfs_iunlock(ip, XFS_ILOCK_EXCL); 240 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -317,14 +360,9 @@ xfs_map_blocks(
317 xfs_iomap_t *mapp, 360 xfs_iomap_t *mapp,
318 int flags) 361 int flags)
319{ 362{
320 xfs_inode_t *ip = XFS_I(inode); 363 int nmaps = 1;
321 int error, nmaps = 1; 364
322 365 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
323 error = xfs_iomap(ip, offset, count,
324 flags, mapp, &nmaps);
325 if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
326 xfs_iflags_set(ip, XFS_IMODIFIED);
327 return -error;
328} 366}
329 367
330STATIC_INLINE int 368STATIC_INLINE int
@@ -512,7 +550,7 @@ xfs_cancel_ioend(
512 unlock_buffer(bh); 550 unlock_buffer(bh);
513 } while ((bh = next_bh) != NULL); 551 } while ((bh = next_bh) != NULL);
514 552
515 vn_iowake(XFS_I(ioend->io_inode)); 553 xfs_ioend_wake(XFS_I(ioend->io_inode));
516 mempool_free(ioend, xfs_ioend_pool); 554 mempool_free(ioend, xfs_ioend_pool);
517 } while ((ioend = next) != NULL); 555 } while ((ioend = next) != NULL);
518} 556}
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 3ba0631a381..7b26f5ff969 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -43,4 +43,7 @@ typedef struct xfs_ioend {
43extern const struct address_space_operations xfs_address_space_operations; 43extern const struct address_space_operations xfs_address_space_operations;
44extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); 44extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
45 45
46extern void xfs_ioend_init(void);
47extern void xfs_ioend_wait(struct xfs_inode *);
48
46#endif /* __XFS_AOPS_H__ */ 49#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 36d5fcd3f59..cb329edc925 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -630,6 +630,29 @@ xfs_buf_get_flags(
630 return NULL; 630 return NULL;
631} 631}
632 632
633STATIC int
634_xfs_buf_read(
635 xfs_buf_t *bp,
636 xfs_buf_flags_t flags)
637{
638 int status;
639
640 XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags);
641
642 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
643 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
644
645 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
646 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
647 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
648 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
649
650 status = xfs_buf_iorequest(bp);
651 if (!status && !(flags & XBF_ASYNC))
652 status = xfs_buf_iowait(bp);
653 return status;
654}
655
633xfs_buf_t * 656xfs_buf_t *
634xfs_buf_read_flags( 657xfs_buf_read_flags(
635 xfs_buftarg_t *target, 658 xfs_buftarg_t *target,
@@ -646,7 +669,7 @@ xfs_buf_read_flags(
646 if (!XFS_BUF_ISDONE(bp)) { 669 if (!XFS_BUF_ISDONE(bp)) {
647 XB_TRACE(bp, "read", (unsigned long)flags); 670 XB_TRACE(bp, "read", (unsigned long)flags);
648 XFS_STATS_INC(xb_get_read); 671 XFS_STATS_INC(xb_get_read);
649 xfs_buf_iostart(bp, flags); 672 _xfs_buf_read(bp, flags);
650 } else if (flags & XBF_ASYNC) { 673 } else if (flags & XBF_ASYNC) {
651 XB_TRACE(bp, "read_async", (unsigned long)flags); 674 XB_TRACE(bp, "read_async", (unsigned long)flags);
652 /* 675 /*
@@ -1048,50 +1071,39 @@ xfs_buf_ioerror(
1048 XB_TRACE(bp, "ioerror", (unsigned long)error); 1071 XB_TRACE(bp, "ioerror", (unsigned long)error);
1049} 1072}
1050 1073
1051/*
1052 * Initiate I/O on a buffer, based on the flags supplied.
1053 * The b_iodone routine in the buffer supplied will only be called
1054 * when all of the subsidiary I/O requests, if any, have been completed.
1055 */
1056int 1074int
1057xfs_buf_iostart( 1075xfs_bawrite(
1058 xfs_buf_t *bp, 1076 void *mp,
1059 xfs_buf_flags_t flags) 1077 struct xfs_buf *bp)
1060{ 1078{
1061 int status = 0; 1079 XB_TRACE(bp, "bawrite", 0);
1062 1080
1063 XB_TRACE(bp, "iostart", (unsigned long)flags); 1081 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
1064 1082
1065 if (flags & XBF_DELWRI) { 1083 xfs_buf_delwri_dequeue(bp);
1066 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
1067 bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
1068 xfs_buf_delwri_queue(bp, 1);
1069 return 0;
1070 }
1071 1084
1072 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ 1085 bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD);
1073 XBF_READ_AHEAD | _XBF_RUN_QUEUES); 1086 bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
1074 bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \ 1087
1075 XBF_READ_AHEAD | _XBF_RUN_QUEUES); 1088 bp->b_mount = mp;
1089 bp->b_strat = xfs_bdstrat_cb;
1090 return xfs_bdstrat_cb(bp);
1091}
1076 1092
1077 BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL); 1093void
1094xfs_bdwrite(
1095 void *mp,
1096 struct xfs_buf *bp)
1097{
1098 XB_TRACE(bp, "bdwrite", 0);
1078 1099
1079 /* For writes allow an alternate strategy routine to precede 1100 bp->b_strat = xfs_bdstrat_cb;
1080 * the actual I/O request (which may not be issued at all in 1101 bp->b_mount = mp;
1081 * a shutdown situation, for example).
1082 */
1083 status = (flags & XBF_WRITE) ?
1084 xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
1085 1102
1086 /* Wait for I/O if we are not an async request. 1103 bp->b_flags &= ~XBF_READ;
1087 * Note: async I/O request completion will release the buffer, 1104 bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
1088 * and that can already be done by this point. So using the
1089 * buffer pointer from here on, after async I/O, is invalid.
1090 */
1091 if (!status && !(flags & XBF_ASYNC))
1092 status = xfs_buf_iowait(bp);
1093 1105
1094 return status; 1106 xfs_buf_delwri_queue(bp, 1);
1095} 1107}
1096 1108
1097STATIC_INLINE void 1109STATIC_INLINE void
@@ -1114,8 +1126,7 @@ xfs_buf_bio_end_io(
1114 unsigned int blocksize = bp->b_target->bt_bsize; 1126 unsigned int blocksize = bp->b_target->bt_bsize;
1115 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1127 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1116 1128
1117 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1129 xfs_buf_ioerror(bp, -error);
1118 bp->b_error = EIO;
1119 1130
1120 do { 1131 do {
1121 struct page *page = bvec->bv_page; 1132 struct page *page = bvec->bv_page;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 456519a088c..288ae7c4c80 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -168,7 +168,7 @@ typedef struct xfs_buf {
168 struct completion b_iowait; /* queue for I/O waiters */ 168 struct completion b_iowait; /* queue for I/O waiters */
169 void *b_fspriv; 169 void *b_fspriv;
170 void *b_fspriv2; 170 void *b_fspriv2;
171 void *b_fspriv3; 171 struct xfs_mount *b_mount;
172 unsigned short b_error; /* error code on I/O */ 172 unsigned short b_error; /* error code on I/O */
173 unsigned int b_page_count; /* size of page array */ 173 unsigned int b_page_count; /* size of page array */
174 unsigned int b_offset; /* page offset in first page */ 174 unsigned int b_offset; /* page offset in first page */
@@ -214,9 +214,10 @@ extern void xfs_buf_lock(xfs_buf_t *);
214extern void xfs_buf_unlock(xfs_buf_t *); 214extern void xfs_buf_unlock(xfs_buf_t *);
215 215
216/* Buffer Read and Write Routines */ 216/* Buffer Read and Write Routines */
217extern int xfs_bawrite(void *mp, xfs_buf_t *bp);
218extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
217extern void xfs_buf_ioend(xfs_buf_t *, int); 219extern void xfs_buf_ioend(xfs_buf_t *, int);
218extern void xfs_buf_ioerror(xfs_buf_t *, int); 220extern void xfs_buf_ioerror(xfs_buf_t *, int);
219extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
220extern int xfs_buf_iorequest(xfs_buf_t *); 221extern int xfs_buf_iorequest(xfs_buf_t *);
221extern int xfs_buf_iowait(xfs_buf_t *); 222extern int xfs_buf_iowait(xfs_buf_t *);
222extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, 223extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
@@ -311,10 +312,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
311#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) 312#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
312#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) 313#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
313 314
314#define XFS_BUF_SHUT(bp) do { } while (0)
315#define XFS_BUF_UNSHUT(bp) do { } while (0)
316#define XFS_BUF_ISSHUT(bp) (0)
317
318#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) 315#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
319#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) 316#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
320#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) 317#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
@@ -334,8 +331,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
334#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) 331#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
335#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) 332#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
336#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) 333#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
337#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3)
338#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val))
339#define XFS_BUF_SET_START(bp) do { } while (0) 334#define XFS_BUF_SET_START(bp) do { } while (0)
340#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) 335#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))
341 336
@@ -366,14 +361,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
366#define XFS_BUF_TARGET(bp) ((bp)->b_target) 361#define XFS_BUF_TARGET(bp) ((bp)->b_target)
367#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) 362#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target)
368 363
369static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
370{
371 bp->b_fspriv3 = mp;
372 bp->b_strat = xfs_bdstrat_cb;
373 xfs_buf_delwri_dequeue(bp);
374 return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
375}
376
377static inline void xfs_buf_relse(xfs_buf_t *bp) 364static inline void xfs_buf_relse(xfs_buf_t *bp)
378{ 365{
379 if (!bp->b_relse) 366 if (!bp->b_relse)
@@ -414,17 +401,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
414 return error; 401 return error;
415} 402}
416 403
417/*
418 * No error can be returned from xfs_buf_iostart for delwri
419 * buffers as they are queued and no I/O is issued.
420 */
421static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
422{
423 bp->b_strat = xfs_bdstrat_cb;
424 bp->b_fspriv3 = mp;
425 (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
426}
427
428#define XFS_bdstrat(bp) xfs_buf_iorequest(bp) 404#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
429 405
430#define xfs_iowait(bp) xfs_buf_iowait(bp) 406#define xfs_iowait(bp) xfs_buf_iowait(bp)
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index 652721ce0ea..55bddf3b609 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -23,16 +23,6 @@
23/* 23/*
24 * Credentials 24 * Credentials
25 */ 25 */
26typedef struct cred { 26typedef const struct cred cred_t;
27 /* EMPTY */
28} cred_t;
29
30extern struct cred *sys_cred;
31
32/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
33static inline int capable_cred(cred_t *cr, int cid)
34{
35 return (cr == sys_cred) ? 1 : capable(cid);
36}
37 27
38#endif /* __XFS_CRED_H__ */ 28#endif /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 7f7abec25e1..595751f7835 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -29,7 +29,6 @@
29#include "xfs_vnodeops.h" 29#include "xfs_vnodeops.h"
30#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
31#include "xfs_inode.h" 31#include "xfs_inode.h"
32#include "xfs_vfsops.h"
33 32
34/* 33/*
35 * Note that we only accept fileids which are long enough rather than allow 34 * Note that we only accept fileids which are long enough rather than allow
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 3fee790f138..e14c4e3aea0 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -36,89 +36,54 @@
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_error.h" 37#include "xfs_error.h"
38#include "xfs_rw.h" 38#include "xfs_rw.h"
39#include "xfs_ioctl32.h"
40#include "xfs_vnodeops.h" 39#include "xfs_vnodeops.h"
40#include "xfs_da_btree.h"
41#include "xfs_ioctl.h"
41 42
42#include <linux/dcache.h> 43#include <linux/dcache.h>
43#include <linux/smp_lock.h> 44#include <linux/smp_lock.h>
44 45
45static struct vm_operations_struct xfs_file_vm_ops; 46static struct vm_operations_struct xfs_file_vm_ops;
46 47
47STATIC_INLINE ssize_t 48STATIC ssize_t
48__xfs_file_read( 49xfs_file_aio_read(
49 struct kiocb *iocb, 50 struct kiocb *iocb,
50 const struct iovec *iov, 51 const struct iovec *iov,
51 unsigned long nr_segs, 52 unsigned long nr_segs,
52 int ioflags,
53 loff_t pos) 53 loff_t pos)
54{ 54{
55 struct file *file = iocb->ki_filp; 55 struct file *file = iocb->ki_filp;
56 int ioflags = IO_ISAIO;
56 57
57 BUG_ON(iocb->ki_pos != pos); 58 BUG_ON(iocb->ki_pos != pos);
58 if (unlikely(file->f_flags & O_DIRECT)) 59 if (unlikely(file->f_flags & O_DIRECT))
59 ioflags |= IO_ISDIRECT; 60 ioflags |= IO_ISDIRECT;
61 if (file->f_mode & FMODE_NOCMTIME)
62 ioflags |= IO_INVIS;
60 return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov, 63 return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
61 nr_segs, &iocb->ki_pos, ioflags); 64 nr_segs, &iocb->ki_pos, ioflags);
62} 65}
63 66
64STATIC ssize_t 67STATIC ssize_t
65xfs_file_aio_read( 68xfs_file_aio_write(
66 struct kiocb *iocb,
67 const struct iovec *iov,
68 unsigned long nr_segs,
69 loff_t pos)
70{
71 return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
72}
73
74STATIC ssize_t
75xfs_file_aio_read_invis(
76 struct kiocb *iocb,
77 const struct iovec *iov,
78 unsigned long nr_segs,
79 loff_t pos)
80{
81 return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
82}
83
84STATIC_INLINE ssize_t
85__xfs_file_write(
86 struct kiocb *iocb, 69 struct kiocb *iocb,
87 const struct iovec *iov, 70 const struct iovec *iov,
88 unsigned long nr_segs, 71 unsigned long nr_segs,
89 int ioflags,
90 loff_t pos) 72 loff_t pos)
91{ 73{
92 struct file *file = iocb->ki_filp; 74 struct file *file = iocb->ki_filp;
75 int ioflags = IO_ISAIO;
93 76
94 BUG_ON(iocb->ki_pos != pos); 77 BUG_ON(iocb->ki_pos != pos);
95 if (unlikely(file->f_flags & O_DIRECT)) 78 if (unlikely(file->f_flags & O_DIRECT))
96 ioflags |= IO_ISDIRECT; 79 ioflags |= IO_ISDIRECT;
80 if (file->f_mode & FMODE_NOCMTIME)
81 ioflags |= IO_INVIS;
97 return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs, 82 return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
98 &iocb->ki_pos, ioflags); 83 &iocb->ki_pos, ioflags);
99} 84}
100 85
101STATIC ssize_t 86STATIC ssize_t
102xfs_file_aio_write(
103 struct kiocb *iocb,
104 const struct iovec *iov,
105 unsigned long nr_segs,
106 loff_t pos)
107{
108 return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
109}
110
111STATIC ssize_t
112xfs_file_aio_write_invis(
113 struct kiocb *iocb,
114 const struct iovec *iov,
115 unsigned long nr_segs,
116 loff_t pos)
117{
118 return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
119}
120
121STATIC ssize_t
122xfs_file_splice_read( 87xfs_file_splice_read(
123 struct file *infilp, 88 struct file *infilp,
124 loff_t *ppos, 89 loff_t *ppos,
@@ -126,20 +91,13 @@ xfs_file_splice_read(
126 size_t len, 91 size_t len,
127 unsigned int flags) 92 unsigned int flags)
128{ 93{
129 return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), 94 int ioflags = 0;
130 infilp, ppos, pipe, len, flags, 0); 95
131} 96 if (infilp->f_mode & FMODE_NOCMTIME)
97 ioflags |= IO_INVIS;
132 98
133STATIC ssize_t
134xfs_file_splice_read_invis(
135 struct file *infilp,
136 loff_t *ppos,
137 struct pipe_inode_info *pipe,
138 size_t len,
139 unsigned int flags)
140{
141 return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), 99 return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
142 infilp, ppos, pipe, len, flags, IO_INVIS); 100 infilp, ppos, pipe, len, flags, ioflags);
143} 101}
144 102
145STATIC ssize_t 103STATIC ssize_t
@@ -150,30 +108,49 @@ xfs_file_splice_write(
150 size_t len, 108 size_t len,
151 unsigned int flags) 109 unsigned int flags)
152{ 110{
153 return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), 111 int ioflags = 0;
154 pipe, outfilp, ppos, len, flags, 0); 112
155} 113 if (outfilp->f_mode & FMODE_NOCMTIME)
114 ioflags |= IO_INVIS;
156 115
157STATIC ssize_t
158xfs_file_splice_write_invis(
159 struct pipe_inode_info *pipe,
160 struct file *outfilp,
161 loff_t *ppos,
162 size_t len,
163 unsigned int flags)
164{
165 return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), 116 return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
166 pipe, outfilp, ppos, len, flags, IO_INVIS); 117 pipe, outfilp, ppos, len, flags, ioflags);
167} 118}
168 119
169STATIC int 120STATIC int
170xfs_file_open( 121xfs_file_open(
171 struct inode *inode, 122 struct inode *inode,
172 struct file *filp) 123 struct file *file)
173{ 124{
174 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 125 if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
175 return -EFBIG; 126 return -EFBIG;
176 return -xfs_open(XFS_I(inode)); 127 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
128 return -EIO;
129 return 0;
130}
131
132STATIC int
133xfs_dir_open(
134 struct inode *inode,
135 struct file *file)
136{
137 struct xfs_inode *ip = XFS_I(inode);
138 int mode;
139 int error;
140
141 error = xfs_file_open(inode, file);
142 if (error)
143 return error;
144
145 /*
146 * If there are any blocks, read-ahead block 0 as we're almost
147 * certain to have the next operation be a read there.
148 */
149 mode = xfs_ilock_map_shared(ip);
150 if (ip->i_d.di_nextents > 0)
151 xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
152 xfs_iunlock(ip, mode);
153 return 0;
177} 154}
178 155
179STATIC int 156STATIC int
@@ -227,7 +204,7 @@ xfs_file_readdir(
227 * point we can change the ->readdir prototype to include the 204 * point we can change the ->readdir prototype to include the
228 * buffer size. 205 * buffer size.
229 */ 206 */
230 bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size); 207 bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size);
231 208
232 error = xfs_readdir(ip, dirent, bufsize, 209 error = xfs_readdir(ip, dirent, bufsize,
233 (xfs_off_t *)&filp->f_pos, filldir); 210 (xfs_off_t *)&filp->f_pos, filldir);
@@ -248,48 +225,6 @@ xfs_file_mmap(
248 return 0; 225 return 0;
249} 226}
250 227
251STATIC long
252xfs_file_ioctl(
253 struct file *filp,
254 unsigned int cmd,
255 unsigned long p)
256{
257 int error;
258 struct inode *inode = filp->f_path.dentry->d_inode;
259
260 error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
261 xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
262
263 /* NOTE: some of the ioctl's return positive #'s as a
264 * byte count indicating success, such as
265 * readlink_by_handle. So we don't "sign flip"
266 * like most other routines. This means true
267 * errors need to be returned as a negative value.
268 */
269 return error;
270}
271
272STATIC long
273xfs_file_ioctl_invis(
274 struct file *filp,
275 unsigned int cmd,
276 unsigned long p)
277{
278 int error;
279 struct inode *inode = filp->f_path.dentry->d_inode;
280
281 error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
282 xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
283
284 /* NOTE: some of the ioctl's return positive #'s as a
285 * byte count indicating success, such as
286 * readlink_by_handle. So we don't "sign flip"
287 * like most other routines. This means true
288 * errors need to be returned as a negative value.
289 */
290 return error;
291}
292
293/* 228/*
294 * mmap()d file has taken write protection fault and is being made 229 * mmap()d file has taken write protection fault and is being made
295 * writable. We can set the page state up correctly for a writable 230 * writable. We can set the page state up correctly for a writable
@@ -325,26 +260,8 @@ const struct file_operations xfs_file_operations = {
325#endif 260#endif
326}; 261};
327 262
328const struct file_operations xfs_invis_file_operations = {
329 .llseek = generic_file_llseek,
330 .read = do_sync_read,
331 .write = do_sync_write,
332 .aio_read = xfs_file_aio_read_invis,
333 .aio_write = xfs_file_aio_write_invis,
334 .splice_read = xfs_file_splice_read_invis,
335 .splice_write = xfs_file_splice_write_invis,
336 .unlocked_ioctl = xfs_file_ioctl_invis,
337#ifdef CONFIG_COMPAT
338 .compat_ioctl = xfs_file_compat_invis_ioctl,
339#endif
340 .mmap = xfs_file_mmap,
341 .open = xfs_file_open,
342 .release = xfs_file_release,
343 .fsync = xfs_file_fsync,
344};
345
346
347const struct file_operations xfs_dir_file_operations = { 263const struct file_operations xfs_dir_file_operations = {
264 .open = xfs_dir_open,
348 .read = generic_read_dir, 265 .read = generic_read_dir,
349 .readdir = xfs_file_readdir, 266 .readdir = xfs_file_readdir,
350 .llseek = generic_file_llseek, 267 .llseek = generic_file_llseek,
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 36caa6d957d..5aeb7777696 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -24,6 +24,10 @@ int fs_noerr(void) { return 0; }
24int fs_nosys(void) { return ENOSYS; } 24int fs_nosys(void) { return ENOSYS; }
25void fs_noval(void) { return; } 25void fs_noval(void) { return; }
26 26
27/*
28 * note: all filemap functions return negative error codes. These
29 * need to be inverted before returning to the xfs core functions.
30 */
27void 31void
28xfs_tosspages( 32xfs_tosspages(
29 xfs_inode_t *ip, 33 xfs_inode_t *ip,
@@ -53,7 +57,7 @@ xfs_flushinval_pages(
53 if (!ret) 57 if (!ret)
54 truncate_inode_pages(mapping, first); 58 truncate_inode_pages(mapping, first);
55 } 59 }
56 return ret; 60 return -ret;
57} 61}
58 62
59int 63int
@@ -72,10 +76,23 @@ xfs_flush_pages(
72 xfs_iflags_clear(ip, XFS_ITRUNCATED); 76 xfs_iflags_clear(ip, XFS_ITRUNCATED);
73 ret = filemap_fdatawrite(mapping); 77 ret = filemap_fdatawrite(mapping);
74 if (flags & XFS_B_ASYNC) 78 if (flags & XFS_B_ASYNC)
75 return ret; 79 return -ret;
76 ret2 = filemap_fdatawait(mapping); 80 ret2 = filemap_fdatawait(mapping);
77 if (!ret) 81 if (!ret)
78 ret = ret2; 82 ret = ret2;
79 } 83 }
80 return ret; 84 return -ret;
85}
86
87int
88xfs_wait_on_pages(
89 xfs_inode_t *ip,
90 xfs_off_t first,
91 xfs_off_t last)
92{
93 struct address_space *mapping = VFS_I(ip)->i_mapping;
94
95 if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
96 return -filemap_fdatawait(mapping);
97 return 0;
81} 98}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ef90e64641e..2ae8b1ccb02 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -26,7 +26,6 @@
26 */ 26 */
27xfs_param_t xfs_params = { 27xfs_param_t xfs_params = {
28 /* MIN DFLT MAX */ 28 /* MIN DFLT MAX */
29 .restrict_chown = { 0, 1, 1 },
30 .sgid_inherit = { 0, 0, 1 }, 29 .sgid_inherit = { 0, 0, 1 },
31 .symlink_mode = { 0, 0, 1 }, 30 .symlink_mode = { 0, 0, 1 },
32 .panic_mask = { 0, 0, 255 }, 31 .panic_mask = { 0, 0, 255 },
@@ -43,10 +42,3 @@ xfs_param_t xfs_params = {
43 .inherit_nodfrg = { 0, 1, 1 }, 42 .inherit_nodfrg = { 0, 1, 1 },
44 .fstrm_timer = { 1, 30*100, 3600*100}, 43 .fstrm_timer = { 1, 30*100, 3600*100},
45}; 44};
46
47/*
48 * Global system credential structure.
49 */
50static cred_t sys_cred_val;
51cred_t *sys_cred = &sys_cred_val;
52
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
index 2770b0085ee..69f71caf061 100644
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -19,6 +19,5 @@
19#define __XFS_GLOBALS_H__ 19#define __XFS_GLOBALS_H__
20 20
21extern uint64_t xfs_panic_mask; /* set to cause more panics */ 21extern uint64_t xfs_panic_mask; /* set to cause more panics */
22extern struct cred *sys_cred;
23 22
24#endif /* __XFS_GLOBALS_H__ */ 23#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d3438c72dca..67205f6198b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -68,26 +68,22 @@
68 * XFS_IOC_PATH_TO_HANDLE 68 * XFS_IOC_PATH_TO_HANDLE
69 * returns full handle for a path 69 * returns full handle for a path
70 */ 70 */
71STATIC int 71int
72xfs_find_handle( 72xfs_find_handle(
73 unsigned int cmd, 73 unsigned int cmd,
74 void __user *arg) 74 xfs_fsop_handlereq_t *hreq)
75{ 75{
76 int hsize; 76 int hsize;
77 xfs_handle_t handle; 77 xfs_handle_t handle;
78 xfs_fsop_handlereq_t hreq;
79 struct inode *inode; 78 struct inode *inode;
80 79
81 if (copy_from_user(&hreq, arg, sizeof(hreq)))
82 return -XFS_ERROR(EFAULT);
83
84 memset((char *)&handle, 0, sizeof(handle)); 80 memset((char *)&handle, 0, sizeof(handle));
85 81
86 switch (cmd) { 82 switch (cmd) {
87 case XFS_IOC_PATH_TO_FSHANDLE: 83 case XFS_IOC_PATH_TO_FSHANDLE:
88 case XFS_IOC_PATH_TO_HANDLE: { 84 case XFS_IOC_PATH_TO_HANDLE: {
89 struct path path; 85 struct path path;
90 int error = user_lpath((const char __user *)hreq.path, &path); 86 int error = user_lpath((const char __user *)hreq->path, &path);
91 if (error) 87 if (error)
92 return error; 88 return error;
93 89
@@ -101,7 +97,7 @@ xfs_find_handle(
101 case XFS_IOC_FD_TO_HANDLE: { 97 case XFS_IOC_FD_TO_HANDLE: {
102 struct file *file; 98 struct file *file;
103 99
104 file = fget(hreq.fd); 100 file = fget(hreq->fd);
105 if (!file) 101 if (!file)
106 return -EBADF; 102 return -EBADF;
107 103
@@ -158,8 +154,8 @@ xfs_find_handle(
158 } 154 }
159 155
160 /* now copy our handle into the user buffer & write out the size */ 156 /* now copy our handle into the user buffer & write out the size */
161 if (copy_to_user(hreq.ohandle, &handle, hsize) || 157 if (copy_to_user(hreq->ohandle, &handle, hsize) ||
162 copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) { 158 copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) {
163 iput(inode); 159 iput(inode);
164 return -XFS_ERROR(EFAULT); 160 return -XFS_ERROR(EFAULT);
165 } 161 }
@@ -249,27 +245,25 @@ xfs_vget_fsop_handlereq(
249 return 0; 245 return 0;
250} 246}
251 247
252STATIC int 248int
253xfs_open_by_handle( 249xfs_open_by_handle(
254 xfs_mount_t *mp, 250 xfs_mount_t *mp,
255 void __user *arg, 251 xfs_fsop_handlereq_t *hreq,
256 struct file *parfilp, 252 struct file *parfilp,
257 struct inode *parinode) 253 struct inode *parinode)
258{ 254{
255 const struct cred *cred = current_cred();
259 int error; 256 int error;
260 int new_fd; 257 int new_fd;
261 int permflag; 258 int permflag;
262 struct file *filp; 259 struct file *filp;
263 struct inode *inode; 260 struct inode *inode;
264 struct dentry *dentry; 261 struct dentry *dentry;
265 xfs_fsop_handlereq_t hreq;
266 262
267 if (!capable(CAP_SYS_ADMIN)) 263 if (!capable(CAP_SYS_ADMIN))
268 return -XFS_ERROR(EPERM); 264 return -XFS_ERROR(EPERM);
269 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
270 return -XFS_ERROR(EFAULT);
271 265
272 error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); 266 error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode);
273 if (error) 267 if (error)
274 return -error; 268 return -error;
275 269
@@ -280,10 +274,10 @@ xfs_open_by_handle(
280 } 274 }
281 275
282#if BITS_PER_LONG != 32 276#if BITS_PER_LONG != 32
283 hreq.oflags |= O_LARGEFILE; 277 hreq->oflags |= O_LARGEFILE;
284#endif 278#endif
285 /* Put open permission in namei format. */ 279 /* Put open permission in namei format. */
286 permflag = hreq.oflags; 280 permflag = hreq->oflags;
287 if ((permflag+1) & O_ACCMODE) 281 if ((permflag+1) & O_ACCMODE)
288 permflag++; 282 permflag++;
289 if (permflag & O_TRUNC) 283 if (permflag & O_TRUNC)
@@ -321,15 +315,16 @@ xfs_open_by_handle(
321 mntget(parfilp->f_path.mnt); 315 mntget(parfilp->f_path.mnt);
322 316
323 /* Create file pointer. */ 317 /* Create file pointer. */
324 filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags); 318 filp = dentry_open(dentry, parfilp->f_path.mnt, hreq->oflags, cred);
325 if (IS_ERR(filp)) { 319 if (IS_ERR(filp)) {
326 put_unused_fd(new_fd); 320 put_unused_fd(new_fd);
327 return -XFS_ERROR(-PTR_ERR(filp)); 321 return -XFS_ERROR(-PTR_ERR(filp));
328 } 322 }
323
329 if (inode->i_mode & S_IFREG) { 324 if (inode->i_mode & S_IFREG) {
330 /* invisible operation should not change atime */ 325 /* invisible operation should not change atime */
331 filp->f_flags |= O_NOATIME; 326 filp->f_flags |= O_NOATIME;
332 filp->f_op = &xfs_invis_file_operations; 327 filp->f_mode |= FMODE_NOCMTIME;
333 } 328 }
334 329
335 fd_install(new_fd, filp); 330 fd_install(new_fd, filp);
@@ -362,24 +357,21 @@ do_readlink(
362} 357}
363 358
364 359
365STATIC int 360int
366xfs_readlink_by_handle( 361xfs_readlink_by_handle(
367 xfs_mount_t *mp, 362 xfs_mount_t *mp,
368 void __user *arg, 363 xfs_fsop_handlereq_t *hreq,
369 struct inode *parinode) 364 struct inode *parinode)
370{ 365{
371 struct inode *inode; 366 struct inode *inode;
372 xfs_fsop_handlereq_t hreq;
373 __u32 olen; 367 __u32 olen;
374 void *link; 368 void *link;
375 int error; 369 int error;
376 370
377 if (!capable(CAP_SYS_ADMIN)) 371 if (!capable(CAP_SYS_ADMIN))
378 return -XFS_ERROR(EPERM); 372 return -XFS_ERROR(EPERM);
379 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
380 return -XFS_ERROR(EFAULT);
381 373
382 error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); 374 error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode);
383 if (error) 375 if (error)
384 return -error; 376 return -error;
385 377
@@ -389,7 +381,7 @@ xfs_readlink_by_handle(
389 goto out_iput; 381 goto out_iput;
390 } 382 }
391 383
392 if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) { 384 if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
393 error = -XFS_ERROR(EFAULT); 385 error = -XFS_ERROR(EFAULT);
394 goto out_iput; 386 goto out_iput;
395 } 387 }
@@ -401,7 +393,7 @@ xfs_readlink_by_handle(
401 error = -xfs_readlink(XFS_I(inode), link); 393 error = -xfs_readlink(XFS_I(inode), link);
402 if (error) 394 if (error)
403 goto out_kfree; 395 goto out_kfree;
404 error = do_readlink(hreq.ohandle, olen, link); 396 error = do_readlink(hreq->ohandle, olen, link);
405 if (error) 397 if (error)
406 goto out_kfree; 398 goto out_kfree;
407 399
@@ -500,7 +492,7 @@ xfs_attrlist_by_handle(
500 return -error; 492 return -error;
501} 493}
502 494
503STATIC int 495int
504xfs_attrmulti_attr_get( 496xfs_attrmulti_attr_get(
505 struct inode *inode, 497 struct inode *inode,
506 char *name, 498 char *name,
@@ -529,7 +521,7 @@ xfs_attrmulti_attr_get(
529 return error; 521 return error;
530} 522}
531 523
532STATIC int 524int
533xfs_attrmulti_attr_set( 525xfs_attrmulti_attr_set(
534 struct inode *inode, 526 struct inode *inode,
535 char *name, 527 char *name,
@@ -559,7 +551,7 @@ xfs_attrmulti_attr_set(
559 return error; 551 return error;
560} 552}
561 553
562STATIC int 554int
563xfs_attrmulti_attr_remove( 555xfs_attrmulti_attr_remove(
564 struct inode *inode, 556 struct inode *inode,
565 char *name, 557 char *name,
@@ -661,19 +653,26 @@ xfs_attrmulti_by_handle(
661 return -error; 653 return -error;
662} 654}
663 655
664STATIC int 656int
665xfs_ioc_space( 657xfs_ioc_space(
666 struct xfs_inode *ip, 658 struct xfs_inode *ip,
667 struct inode *inode, 659 struct inode *inode,
668 struct file *filp, 660 struct file *filp,
669 int ioflags, 661 int ioflags,
670 unsigned int cmd, 662 unsigned int cmd,
671 void __user *arg) 663 xfs_flock64_t *bf)
672{ 664{
673 xfs_flock64_t bf;
674 int attr_flags = 0; 665 int attr_flags = 0;
675 int error; 666 int error;
676 667
668 /*
669 * Only allow the sys admin to reserve space unless
670 * unwritten extents are enabled.
671 */
672 if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
673 !capable(CAP_SYS_ADMIN))
674 return -XFS_ERROR(EPERM);
675
677 if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) 676 if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
678 return -XFS_ERROR(EPERM); 677 return -XFS_ERROR(EPERM);
679 678
@@ -683,16 +682,12 @@ xfs_ioc_space(
683 if (!S_ISREG(inode->i_mode)) 682 if (!S_ISREG(inode->i_mode))
684 return -XFS_ERROR(EINVAL); 683 return -XFS_ERROR(EINVAL);
685 684
686 if (copy_from_user(&bf, arg, sizeof(bf)))
687 return -XFS_ERROR(EFAULT);
688
689 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 685 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
690 attr_flags |= XFS_ATTR_NONBLOCK; 686 attr_flags |= XFS_ATTR_NONBLOCK;
691 if (ioflags & IO_INVIS) 687 if (ioflags & IO_INVIS)
692 attr_flags |= XFS_ATTR_DMI; 688 attr_flags |= XFS_ATTR_DMI;
693 689
694 error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, 690 error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
695 NULL, attr_flags);
696 return -error; 691 return -error;
697} 692}
698 693
@@ -1007,7 +1002,7 @@ xfs_ioctl_setattr(
1007 * to the file owner ID, except in cases where the 1002 * to the file owner ID, except in cases where the
1008 * CAP_FSETID capability is applicable. 1003 * CAP_FSETID capability is applicable.
1009 */ 1004 */
1010 if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) { 1005 if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
1011 code = XFS_ERROR(EPERM); 1006 code = XFS_ERROR(EPERM);
1012 goto error_return; 1007 goto error_return;
1013 } 1008 }
@@ -1104,10 +1099,6 @@ xfs_ioctl_setattr(
1104 1099
1105 /* 1100 /*
1106 * Change file ownership. Must be the owner or privileged. 1101 * Change file ownership. Must be the owner or privileged.
1107 * If the system was configured with the "restricted_chown"
1108 * option, the owner is not permitted to give away the file,
1109 * and can change the group id only to a group of which he
1110 * or she is a member.
1111 */ 1102 */
1112 if (mask & FSX_PROJID) { 1103 if (mask & FSX_PROJID) {
1113 /* 1104 /*
@@ -1136,7 +1127,7 @@ xfs_ioctl_setattr(
1136 * the superblock version number since projids didn't 1127 * the superblock version number since projids didn't
1137 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. 1128 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
1138 */ 1129 */
1139 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) 1130 if (ip->i_d.di_version == 1)
1140 xfs_bump_ino_vers2(tp, ip); 1131 xfs_bump_ino_vers2(tp, ip);
1141 } 1132 }
1142 1133
@@ -1255,43 +1246,67 @@ xfs_ioc_setxflags(
1255} 1246}
1256 1247
1257STATIC int 1248STATIC int
1249xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
1250{
1251 struct getbmap __user *base = *ap;
1252
1253 /* copy only getbmap portion (not getbmapx) */
1254 if (copy_to_user(base, bmv, sizeof(struct getbmap)))
1255 return XFS_ERROR(EFAULT);
1256
1257 *ap += sizeof(struct getbmap);
1258 return 0;
1259}
1260
1261STATIC int
1258xfs_ioc_getbmap( 1262xfs_ioc_getbmap(
1259 struct xfs_inode *ip, 1263 struct xfs_inode *ip,
1260 int ioflags, 1264 int ioflags,
1261 unsigned int cmd, 1265 unsigned int cmd,
1262 void __user *arg) 1266 void __user *arg)
1263{ 1267{
1264 struct getbmap bm; 1268 struct getbmapx bmx;
1265 int iflags;
1266 int error; 1269 int error;
1267 1270
1268 if (copy_from_user(&bm, arg, sizeof(bm))) 1271 if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
1269 return -XFS_ERROR(EFAULT); 1272 return -XFS_ERROR(EFAULT);
1270 1273
1271 if (bm.bmv_count < 2) 1274 if (bmx.bmv_count < 2)
1272 return -XFS_ERROR(EINVAL); 1275 return -XFS_ERROR(EINVAL);
1273 1276
1274 iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); 1277 bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
1275 if (ioflags & IO_INVIS) 1278 if (ioflags & IO_INVIS)
1276 iflags |= BMV_IF_NO_DMAPI_READ; 1279 bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
1277 1280
1278 error = xfs_getbmap(ip, &bm, (struct getbmap __user *)arg+1, iflags); 1281 error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
1282 (struct getbmap *)arg+1);
1279 if (error) 1283 if (error)
1280 return -error; 1284 return -error;
1281 1285
1282 if (copy_to_user(arg, &bm, sizeof(bm))) 1286 /* copy back header - only size of getbmap */
1287 if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
1283 return -XFS_ERROR(EFAULT); 1288 return -XFS_ERROR(EFAULT);
1284 return 0; 1289 return 0;
1285} 1290}
1286 1291
1287STATIC int 1292STATIC int
1293xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
1294{
1295 struct getbmapx __user *base = *ap;
1296
1297 if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
1298 return XFS_ERROR(EFAULT);
1299
1300 *ap += sizeof(struct getbmapx);
1301 return 0;
1302}
1303
1304STATIC int
1288xfs_ioc_getbmapx( 1305xfs_ioc_getbmapx(
1289 struct xfs_inode *ip, 1306 struct xfs_inode *ip,
1290 void __user *arg) 1307 void __user *arg)
1291{ 1308{
1292 struct getbmapx bmx; 1309 struct getbmapx bmx;
1293 struct getbmap bm;
1294 int iflags;
1295 int error; 1310 int error;
1296 1311
1297 if (copy_from_user(&bmx, arg, sizeof(bmx))) 1312 if (copy_from_user(&bmx, arg, sizeof(bmx)))
@@ -1300,46 +1315,46 @@ xfs_ioc_getbmapx(
1300 if (bmx.bmv_count < 2) 1315 if (bmx.bmv_count < 2)
1301 return -XFS_ERROR(EINVAL); 1316 return -XFS_ERROR(EINVAL);
1302 1317
1303 /* 1318 if (bmx.bmv_iflags & (~BMV_IF_VALID))
1304 * Map input getbmapx structure to a getbmap
1305 * structure for xfs_getbmap.
1306 */
1307 GETBMAP_CONVERT(bmx, bm);
1308
1309 iflags = bmx.bmv_iflags;
1310
1311 if (iflags & (~BMV_IF_VALID))
1312 return -XFS_ERROR(EINVAL); 1319 return -XFS_ERROR(EINVAL);
1313 1320
1314 iflags |= BMV_IF_EXTENDED; 1321 error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
1315 1322 (struct getbmapx *)arg+1);
1316 error = xfs_getbmap(ip, &bm, (struct getbmapx __user *)arg+1, iflags);
1317 if (error) 1323 if (error)
1318 return -error; 1324 return -error;
1319 1325
1320 GETBMAP_CONVERT(bm, bmx); 1326 /* copy back header */
1321 1327 if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
1322 if (copy_to_user(arg, &bmx, sizeof(bmx)))
1323 return -XFS_ERROR(EFAULT); 1328 return -XFS_ERROR(EFAULT);
1324 1329
1325 return 0; 1330 return 0;
1326} 1331}
1327 1332
1328int 1333/*
1329xfs_ioctl( 1334 * Note: some of the ioctl's return positive numbers as a
1330 xfs_inode_t *ip, 1335 * byte count indicating success, such as readlink_by_handle.
1336 * So we don't "sign flip" like most other routines. This means
1337 * true errors need to be returned as a negative value.
1338 */
1339long
1340xfs_file_ioctl(
1331 struct file *filp, 1341 struct file *filp,
1332 int ioflags,
1333 unsigned int cmd, 1342 unsigned int cmd,
1334 void __user *arg) 1343 unsigned long p)
1335{ 1344{
1336 struct inode *inode = filp->f_path.dentry->d_inode; 1345 struct inode *inode = filp->f_path.dentry->d_inode;
1337 xfs_mount_t *mp = ip->i_mount; 1346 struct xfs_inode *ip = XFS_I(inode);
1347 struct xfs_mount *mp = ip->i_mount;
1348 void __user *arg = (void __user *)p;
1349 int ioflags = 0;
1338 int error; 1350 int error;
1339 1351
1340 xfs_itrace_entry(XFS_I(inode)); 1352 if (filp->f_mode & FMODE_NOCMTIME)
1341 switch (cmd) { 1353 ioflags |= IO_INVIS;
1342 1354
1355 xfs_itrace_entry(ip);
1356
1357 switch (cmd) {
1343 case XFS_IOC_ALLOCSP: 1358 case XFS_IOC_ALLOCSP:
1344 case XFS_IOC_FREESP: 1359 case XFS_IOC_FREESP:
1345 case XFS_IOC_RESVSP: 1360 case XFS_IOC_RESVSP:
@@ -1347,17 +1362,13 @@ xfs_ioctl(
1347 case XFS_IOC_ALLOCSP64: 1362 case XFS_IOC_ALLOCSP64:
1348 case XFS_IOC_FREESP64: 1363 case XFS_IOC_FREESP64:
1349 case XFS_IOC_RESVSP64: 1364 case XFS_IOC_RESVSP64:
1350 case XFS_IOC_UNRESVSP64: 1365 case XFS_IOC_UNRESVSP64: {
1351 /* 1366 xfs_flock64_t bf;
1352 * Only allow the sys admin to reserve space unless
1353 * unwritten extents are enabled.
1354 */
1355 if (!xfs_sb_version_hasextflgbit(&mp->m_sb) &&
1356 !capable(CAP_SYS_ADMIN))
1357 return -EPERM;
1358
1359 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
1360 1367
1368 if (copy_from_user(&bf, arg, sizeof(bf)))
1369 return -XFS_ERROR(EFAULT);
1370 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
1371 }
1361 case XFS_IOC_DIOINFO: { 1372 case XFS_IOC_DIOINFO: {
1362 struct dioattr da; 1373 struct dioattr da;
1363 xfs_buftarg_t *target = 1374 xfs_buftarg_t *target =
@@ -1417,18 +1428,30 @@ xfs_ioctl(
1417 1428
1418 case XFS_IOC_FD_TO_HANDLE: 1429 case XFS_IOC_FD_TO_HANDLE:
1419 case XFS_IOC_PATH_TO_HANDLE: 1430 case XFS_IOC_PATH_TO_HANDLE:
1420 case XFS_IOC_PATH_TO_FSHANDLE: 1431 case XFS_IOC_PATH_TO_FSHANDLE: {
1421 return xfs_find_handle(cmd, arg); 1432 xfs_fsop_handlereq_t hreq;
1422 1433
1423 case XFS_IOC_OPEN_BY_HANDLE: 1434 if (copy_from_user(&hreq, arg, sizeof(hreq)))
1424 return xfs_open_by_handle(mp, arg, filp, inode); 1435 return -XFS_ERROR(EFAULT);
1436 return xfs_find_handle(cmd, &hreq);
1437 }
1438 case XFS_IOC_OPEN_BY_HANDLE: {
1439 xfs_fsop_handlereq_t hreq;
1425 1440
1441 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
1442 return -XFS_ERROR(EFAULT);
1443 return xfs_open_by_handle(mp, &hreq, filp, inode);
1444 }
1426 case XFS_IOC_FSSETDM_BY_HANDLE: 1445 case XFS_IOC_FSSETDM_BY_HANDLE:
1427 return xfs_fssetdm_by_handle(mp, arg, inode); 1446 return xfs_fssetdm_by_handle(mp, arg, inode);
1428 1447
1429 case XFS_IOC_READLINK_BY_HANDLE: 1448 case XFS_IOC_READLINK_BY_HANDLE: {
1430 return xfs_readlink_by_handle(mp, arg, inode); 1449 xfs_fsop_handlereq_t hreq;
1431 1450
1451 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
1452 return -XFS_ERROR(EFAULT);
1453 return xfs_readlink_by_handle(mp, &hreq, inode);
1454 }
1432 case XFS_IOC_ATTRLIST_BY_HANDLE: 1455 case XFS_IOC_ATTRLIST_BY_HANDLE:
1433 return xfs_attrlist_by_handle(mp, arg, inode); 1456 return xfs_attrlist_by_handle(mp, arg, inode);
1434 1457
@@ -1436,7 +1459,11 @@ xfs_ioctl(
1436 return xfs_attrmulti_by_handle(mp, arg, filp, inode); 1459 return xfs_attrmulti_by_handle(mp, arg, filp, inode);
1437 1460
1438 case XFS_IOC_SWAPEXT: { 1461 case XFS_IOC_SWAPEXT: {
1439 error = xfs_swapext((struct xfs_swapext __user *)arg); 1462 struct xfs_swapext sxp;
1463
1464 if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
1465 return -XFS_ERROR(EFAULT);
1466 error = xfs_swapext(&sxp);
1440 return -error; 1467 return -error;
1441 } 1468 }
1442 1469
@@ -1492,9 +1519,6 @@ xfs_ioctl(
1492 case XFS_IOC_FSGROWFSDATA: { 1519 case XFS_IOC_FSGROWFSDATA: {
1493 xfs_growfs_data_t in; 1520 xfs_growfs_data_t in;
1494 1521
1495 if (!capable(CAP_SYS_ADMIN))
1496 return -EPERM;
1497
1498 if (copy_from_user(&in, arg, sizeof(in))) 1522 if (copy_from_user(&in, arg, sizeof(in)))
1499 return -XFS_ERROR(EFAULT); 1523 return -XFS_ERROR(EFAULT);
1500 1524
@@ -1505,9 +1529,6 @@ xfs_ioctl(
1505 case XFS_IOC_FSGROWFSLOG: { 1529 case XFS_IOC_FSGROWFSLOG: {
1506 xfs_growfs_log_t in; 1530 xfs_growfs_log_t in;
1507 1531
1508 if (!capable(CAP_SYS_ADMIN))
1509 return -EPERM;
1510
1511 if (copy_from_user(&in, arg, sizeof(in))) 1532 if (copy_from_user(&in, arg, sizeof(in)))
1512 return -XFS_ERROR(EFAULT); 1533 return -XFS_ERROR(EFAULT);
1513 1534
@@ -1518,9 +1539,6 @@ xfs_ioctl(
1518 case XFS_IOC_FSGROWFSRT: { 1539 case XFS_IOC_FSGROWFSRT: {
1519 xfs_growfs_rt_t in; 1540 xfs_growfs_rt_t in;
1520 1541
1521 if (!capable(CAP_SYS_ADMIN))
1522 return -EPERM;
1523
1524 if (copy_from_user(&in, arg, sizeof(in))) 1542 if (copy_from_user(&in, arg, sizeof(in)))
1525 return -XFS_ERROR(EFAULT); 1543 return -XFS_ERROR(EFAULT);
1526 1544
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
new file mode 100644
index 00000000000..8c16bf2d7e0
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl.h
@@ -0,0 +1,82 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_IOCTL_H__
19#define __XFS_IOCTL_H__
20
21extern int
22xfs_ioc_space(
23 struct xfs_inode *ip,
24 struct inode *inode,
25 struct file *filp,
26 int ioflags,
27 unsigned int cmd,
28 xfs_flock64_t *bf);
29
30extern int
31xfs_find_handle(
32 unsigned int cmd,
33 xfs_fsop_handlereq_t *hreq);
34
35extern int
36xfs_open_by_handle(
37 xfs_mount_t *mp,
38 xfs_fsop_handlereq_t *hreq,
39 struct file *parfilp,
40 struct inode *parinode);
41
42extern int
43xfs_readlink_by_handle(
44 xfs_mount_t *mp,
45 xfs_fsop_handlereq_t *hreq,
46 struct inode *parinode);
47
48extern int
49xfs_attrmulti_attr_get(
50 struct inode *inode,
51 char *name,
52 char __user *ubuf,
53 __uint32_t *len,
54 __uint32_t flags);
55
56extern int
57 xfs_attrmulti_attr_set(
58 struct inode *inode,
59 char *name,
60 const char __user *ubuf,
61 __uint32_t len,
62 __uint32_t flags);
63
64extern int
65xfs_attrmulti_attr_remove(
66 struct inode *inode,
67 char *name,
68 __uint32_t flags);
69
70extern long
71xfs_file_ioctl(
72 struct file *filp,
73 unsigned int cmd,
74 unsigned long p);
75
76extern long
77xfs_file_compat_ioctl(
78 struct file *file,
79 unsigned int cmd,
80 unsigned long arg);
81
82#endif
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index a4b254eb43b..0504cece9f6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -16,11 +16,7 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include <linux/compat.h> 18#include <linux/compat.h>
19#include <linux/init.h>
20#include <linux/ioctl.h> 19#include <linux/ioctl.h>
21#include <linux/syscalls.h>
22#include <linux/types.h>
23#include <linux/fs.h>
24#include <asm/uaccess.h> 20#include <asm/uaccess.h>
25#include "xfs.h" 21#include "xfs.h"
26#include "xfs_fs.h" 22#include "xfs_fs.h"
@@ -36,7 +32,6 @@
36#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
37#include "xfs_attr_sf.h" 33#include "xfs_attr_sf.h"
38#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
39#include "xfs_vfs.h"
40#include "xfs_vnode.h" 35#include "xfs_vnode.h"
41#include "xfs_dinode.h" 36#include "xfs_dinode.h"
42#include "xfs_inode.h" 37#include "xfs_inode.h"
@@ -44,221 +39,219 @@
44#include "xfs_error.h" 39#include "xfs_error.h"
45#include "xfs_dfrag.h" 40#include "xfs_dfrag.h"
46#include "xfs_vnodeops.h" 41#include "xfs_vnodeops.h"
42#include "xfs_fsops.h"
43#include "xfs_alloc.h"
44#include "xfs_rtalloc.h"
45#include "xfs_attr.h"
46#include "xfs_ioctl.h"
47#include "xfs_ioctl32.h" 47#include "xfs_ioctl32.h"
48 48
49#define _NATIVE_IOC(cmd, type) \ 49#define _NATIVE_IOC(cmd, type) \
50 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) 50 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
51 51
52#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 52#ifdef BROKEN_X86_ALIGNMENT
53#define BROKEN_X86_ALIGNMENT 53STATIC int
54#define _PACKED __attribute__((packed)) 54xfs_compat_flock64_copyin(
55/* on ia32 l_start is on a 32-bit boundary */ 55 xfs_flock64_t *bf,
56typedef struct xfs_flock64_32 { 56 compat_xfs_flock64_t __user *arg32)
57 __s16 l_type;
58 __s16 l_whence;
59 __s64 l_start __attribute__((packed));
60 /* len == 0 means until end of file */
61 __s64 l_len __attribute__((packed));
62 __s32 l_sysid;
63 __u32 l_pid;
64 __s32 l_pad[4]; /* reserve area */
65} xfs_flock64_32_t;
66
67#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32)
68#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32)
69#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32)
70#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32)
71#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32)
72#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32)
73#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32)
74#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32)
75
76/* just account for different alignment */
77STATIC unsigned long
78xfs_ioctl32_flock(
79 unsigned long arg)
80{ 57{
81 xfs_flock64_32_t __user *p32 = (void __user *)arg; 58 if (get_user(bf->l_type, &arg32->l_type) ||
82 xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); 59 get_user(bf->l_whence, &arg32->l_whence) ||
83 60 get_user(bf->l_start, &arg32->l_start) ||
84 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || 61 get_user(bf->l_len, &arg32->l_len) ||
85 copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || 62 get_user(bf->l_sysid, &arg32->l_sysid) ||
86 copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || 63 get_user(bf->l_pid, &arg32->l_pid) ||
87 copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || 64 copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32)))
88 copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || 65 return -XFS_ERROR(EFAULT);
89 copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || 66 return 0;
90 copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
91 return -EFAULT;
92
93 return (unsigned long)p;
94} 67}
95 68
96typedef struct compat_xfs_fsop_geom_v1 { 69STATIC int
97 __u32 blocksize; /* filesystem (data) block size */ 70xfs_compat_ioc_fsgeometry_v1(
98 __u32 rtextsize; /* realtime extent size */ 71 struct xfs_mount *mp,
99 __u32 agblocks; /* fsblocks in an AG */ 72 compat_xfs_fsop_geom_v1_t __user *arg32)
100 __u32 agcount; /* number of allocation groups */
101 __u32 logblocks; /* fsblocks in the log */
102 __u32 sectsize; /* (data) sector size, bytes */
103 __u32 inodesize; /* inode size in bytes */
104 __u32 imaxpct; /* max allowed inode space(%) */
105 __u64 datablocks; /* fsblocks in data subvolume */
106 __u64 rtblocks; /* fsblocks in realtime subvol */
107 __u64 rtextents; /* rt extents in realtime subvol*/
108 __u64 logstart; /* starting fsblock of the log */
109 unsigned char uuid[16]; /* unique id of the filesystem */
110 __u32 sunit; /* stripe unit, fsblocks */
111 __u32 swidth; /* stripe width, fsblocks */
112 __s32 version; /* structure version */
113 __u32 flags; /* superblock version flags */
114 __u32 logsectsize; /* log sector size, bytes */
115 __u32 rtsectsize; /* realtime sector size, bytes */
116 __u32 dirblocksize; /* directory block size, bytes */
117} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
118
119#define XFS_IOC_FSGEOMETRY_V1_32 \
120 _IOR ('X', 100, struct compat_xfs_fsop_geom_v1)
121
122STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
123{ 73{
124 compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg; 74 xfs_fsop_geom_t fsgeo;
125 xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p)); 75 int error;
126 76
127 if (copy_in_user(p, p32, sizeof(*p32))) 77 error = xfs_fs_geometry(mp, &fsgeo, 3);
128 return -EFAULT; 78 if (error)
129 return (unsigned long)p; 79 return -error;
80 /* The 32-bit variant simply has some padding at the end */
81 if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
82 return -XFS_ERROR(EFAULT);
83 return 0;
130} 84}
131 85
132typedef struct compat_xfs_inogrp { 86STATIC int
133 __u64 xi_startino; /* starting inode number */ 87xfs_compat_growfs_data_copyin(
134 __s32 xi_alloccount; /* # bits set in allocmask */ 88 struct xfs_growfs_data *in,
135 __u64 xi_allocmask; /* mask of allocated inodes */ 89 compat_xfs_growfs_data_t __user *arg32)
136} __attribute__((packed)) compat_xfs_inogrp_t;
137
138STATIC int xfs_inumbers_fmt_compat(
139 void __user *ubuffer,
140 const xfs_inogrp_t *buffer,
141 long count,
142 long *written)
143{ 90{
144 compat_xfs_inogrp_t __user *p32 = ubuffer; 91 if (get_user(in->newblocks, &arg32->newblocks) ||
145 long i; 92 get_user(in->imaxpct, &arg32->imaxpct))
93 return -XFS_ERROR(EFAULT);
94 return 0;
95}
96
97STATIC int
98xfs_compat_growfs_rt_copyin(
99 struct xfs_growfs_rt *in,
100 compat_xfs_growfs_rt_t __user *arg32)
101{
102 if (get_user(in->newblocks, &arg32->newblocks) ||
103 get_user(in->extsize, &arg32->extsize))
104 return -XFS_ERROR(EFAULT);
105 return 0;
106}
107
108STATIC int
109xfs_inumbers_fmt_compat(
110 void __user *ubuffer,
111 const xfs_inogrp_t *buffer,
112 long count,
113 long *written)
114{
115 compat_xfs_inogrp_t __user *p32 = ubuffer;
116 long i;
146 117
147 for (i = 0; i < count; i++) { 118 for (i = 0; i < count; i++) {
148 if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || 119 if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
149 put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || 120 put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
150 put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) 121 put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
151 return -EFAULT; 122 return -XFS_ERROR(EFAULT);
152 } 123 }
153 *written = count * sizeof(*p32); 124 *written = count * sizeof(*p32);
154 return 0; 125 return 0;
155} 126}
156 127
157#else 128#else
158
159#define xfs_inumbers_fmt_compat xfs_inumbers_fmt 129#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
160#define _PACKED 130#endif /* BROKEN_X86_ALIGNMENT */
161 131
162#endif 132STATIC int
133xfs_ioctl32_bstime_copyin(
134 xfs_bstime_t *bstime,
135 compat_xfs_bstime_t __user *bstime32)
136{
137 compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */
163 138
164/* XFS_IOC_FSBULKSTAT and friends */ 139 if (get_user(sec32, &bstime32->tv_sec) ||
140 get_user(bstime->tv_nsec, &bstime32->tv_nsec))
141 return -XFS_ERROR(EFAULT);
142 bstime->tv_sec = sec32;
143 return 0;
144}
145
146/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
147STATIC int
148xfs_ioctl32_bstat_copyin(
149 xfs_bstat_t *bstat,
150 compat_xfs_bstat_t __user *bstat32)
151{
152 if (get_user(bstat->bs_ino, &bstat32->bs_ino) ||
153 get_user(bstat->bs_mode, &bstat32->bs_mode) ||
154 get_user(bstat->bs_nlink, &bstat32->bs_nlink) ||
155 get_user(bstat->bs_uid, &bstat32->bs_uid) ||
156 get_user(bstat->bs_gid, &bstat32->bs_gid) ||
157 get_user(bstat->bs_rdev, &bstat32->bs_rdev) ||
158 get_user(bstat->bs_blksize, &bstat32->bs_blksize) ||
159 get_user(bstat->bs_size, &bstat32->bs_size) ||
160 xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
161 xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
162 xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
163 get_user(bstat->bs_blocks, &bstat32->bs_size) ||
164 get_user(bstat->bs_xflags, &bstat32->bs_size) ||
165 get_user(bstat->bs_extsize, &bstat32->bs_extsize) ||
166 get_user(bstat->bs_extents, &bstat32->bs_extents) ||
167 get_user(bstat->bs_gen, &bstat32->bs_gen) ||
168 get_user(bstat->bs_projid, &bstat32->bs_projid) ||
169 get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
170 get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
171 get_user(bstat->bs_aextents, &bstat32->bs_aextents))
172 return -XFS_ERROR(EFAULT);
173 return 0;
174}
165 175
166typedef struct compat_xfs_bstime { 176/* XFS_IOC_FSBULKSTAT and friends */
167 __s32 tv_sec; /* seconds */
168 __s32 tv_nsec; /* and nanoseconds */
169} compat_xfs_bstime_t;
170 177
171STATIC int xfs_bstime_store_compat( 178STATIC int
172 compat_xfs_bstime_t __user *p32, 179xfs_bstime_store_compat(
173 const xfs_bstime_t *p) 180 compat_xfs_bstime_t __user *p32,
181 const xfs_bstime_t *p)
174{ 182{
175 __s32 sec32; 183 __s32 sec32;
176 184
177 sec32 = p->tv_sec; 185 sec32 = p->tv_sec;
178 if (put_user(sec32, &p32->tv_sec) || 186 if (put_user(sec32, &p32->tv_sec) ||
179 put_user(p->tv_nsec, &p32->tv_nsec)) 187 put_user(p->tv_nsec, &p32->tv_nsec))
180 return -EFAULT; 188 return -XFS_ERROR(EFAULT);
181 return 0; 189 return 0;
182} 190}
183 191
184typedef struct compat_xfs_bstat { 192/* Return 0 on success or positive error (to xfs_bulkstat()) */
185 __u64 bs_ino; /* inode number */ 193STATIC int
186 __u16 bs_mode; /* type and mode */ 194xfs_bulkstat_one_fmt_compat(
187 __u16 bs_nlink; /* number of links */
188 __u32 bs_uid; /* user id */
189 __u32 bs_gid; /* group id */
190 __u32 bs_rdev; /* device value */
191 __s32 bs_blksize; /* block size */
192 __s64 bs_size; /* file size */
193 compat_xfs_bstime_t bs_atime; /* access time */
194 compat_xfs_bstime_t bs_mtime; /* modify time */
195 compat_xfs_bstime_t bs_ctime; /* inode change time */
196 int64_t bs_blocks; /* number of blocks */
197 __u32 bs_xflags; /* extended flags */
198 __s32 bs_extsize; /* extent size */
199 __s32 bs_extents; /* number of extents */
200 __u32 bs_gen; /* generation count */
201 __u16 bs_projid; /* project id */
202 unsigned char bs_pad[14]; /* pad space, unused */
203 __u32 bs_dmevmask; /* DMIG event mask */
204 __u16 bs_dmstate; /* DMIG state info */
205 __u16 bs_aextents; /* attribute number of extents */
206} _PACKED compat_xfs_bstat_t;
207
208STATIC int xfs_bulkstat_one_fmt_compat(
209 void __user *ubuffer, 195 void __user *ubuffer,
196 int ubsize,
197 int *ubused,
210 const xfs_bstat_t *buffer) 198 const xfs_bstat_t *buffer)
211{ 199{
212 compat_xfs_bstat_t __user *p32 = ubuffer; 200 compat_xfs_bstat_t __user *p32 = ubuffer;
213 201
214 if (put_user(buffer->bs_ino, &p32->bs_ino) || 202 if (ubsize < sizeof(*p32))
215 put_user(buffer->bs_mode, &p32->bs_mode) || 203 return XFS_ERROR(ENOMEM);
216 put_user(buffer->bs_nlink, &p32->bs_nlink) || 204
217 put_user(buffer->bs_uid, &p32->bs_uid) || 205 if (put_user(buffer->bs_ino, &p32->bs_ino) ||
218 put_user(buffer->bs_gid, &p32->bs_gid) || 206 put_user(buffer->bs_mode, &p32->bs_mode) ||
219 put_user(buffer->bs_rdev, &p32->bs_rdev) || 207 put_user(buffer->bs_nlink, &p32->bs_nlink) ||
220 put_user(buffer->bs_blksize, &p32->bs_blksize) || 208 put_user(buffer->bs_uid, &p32->bs_uid) ||
221 put_user(buffer->bs_size, &p32->bs_size) || 209 put_user(buffer->bs_gid, &p32->bs_gid) ||
210 put_user(buffer->bs_rdev, &p32->bs_rdev) ||
211 put_user(buffer->bs_blksize, &p32->bs_blksize) ||
212 put_user(buffer->bs_size, &p32->bs_size) ||
222 xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || 213 xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
223 xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || 214 xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
224 xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || 215 xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
225 put_user(buffer->bs_blocks, &p32->bs_blocks) || 216 put_user(buffer->bs_blocks, &p32->bs_blocks) ||
226 put_user(buffer->bs_xflags, &p32->bs_xflags) || 217 put_user(buffer->bs_xflags, &p32->bs_xflags) ||
227 put_user(buffer->bs_extsize, &p32->bs_extsize) || 218 put_user(buffer->bs_extsize, &p32->bs_extsize) ||
228 put_user(buffer->bs_extents, &p32->bs_extents) || 219 put_user(buffer->bs_extents, &p32->bs_extents) ||
229 put_user(buffer->bs_gen, &p32->bs_gen) || 220 put_user(buffer->bs_gen, &p32->bs_gen) ||
230 put_user(buffer->bs_projid, &p32->bs_projid) || 221 put_user(buffer->bs_projid, &p32->bs_projid) ||
231 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || 222 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
232 put_user(buffer->bs_dmstate, &p32->bs_dmstate) || 223 put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
233 put_user(buffer->bs_aextents, &p32->bs_aextents)) 224 put_user(buffer->bs_aextents, &p32->bs_aextents))
234 return -EFAULT; 225 return XFS_ERROR(EFAULT);
235 return sizeof(*p32); 226 if (ubused)
227 *ubused = sizeof(*p32);
228 return 0;
236} 229}
237 230
238 231STATIC int
239 232xfs_bulkstat_one_compat(
240typedef struct compat_xfs_fsop_bulkreq { 233 xfs_mount_t *mp, /* mount point for filesystem */
241 compat_uptr_t lastip; /* last inode # pointer */ 234 xfs_ino_t ino, /* inode number to get data for */
242 __s32 icount; /* count of entries in buffer */ 235 void __user *buffer, /* buffer to place output in */
243 compat_uptr_t ubuffer; /* user buffer for inode desc. */ 236 int ubsize, /* size of buffer */
244 compat_uptr_t ocount; /* output count pointer */ 237 void *private_data, /* my private data */
245} compat_xfs_fsop_bulkreq_t; 238 xfs_daddr_t bno, /* starting bno of inode cluster */
246 239 int *ubused, /* bytes used by me */
247#define XFS_IOC_FSBULKSTAT_32 \ 240 void *dibuff, /* on-disk inode buffer */
248 _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) 241 int *stat) /* BULKSTAT_RV_... */
249#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ 242{
250 _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) 243 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
251#define XFS_IOC_FSINUMBERS_32 \ 244 xfs_bulkstat_one_fmt_compat, bno,
252 _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) 245 ubused, dibuff, stat);
246}
253 247
254/* copied from xfs_ioctl.c */ 248/* copied from xfs_ioctl.c */
255STATIC int 249STATIC int
256xfs_ioc_bulkstat_compat( 250xfs_compat_ioc_bulkstat(
257 xfs_mount_t *mp, 251 xfs_mount_t *mp,
258 unsigned int cmd, 252 unsigned int cmd,
259 void __user *arg) 253 compat_xfs_fsop_bulkreq_t __user *p32)
260{ 254{
261 compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg;
262 u32 addr; 255 u32 addr;
263 xfs_fsop_bulkreq_t bulkreq; 256 xfs_fsop_bulkreq_t bulkreq;
264 int count; /* # of records returned */ 257 int count; /* # of records returned */
@@ -270,20 +263,20 @@ xfs_ioc_bulkstat_compat(
270 /* should be called again (unused here, but used in dmapi) */ 263 /* should be called again (unused here, but used in dmapi) */
271 264
272 if (!capable(CAP_SYS_ADMIN)) 265 if (!capable(CAP_SYS_ADMIN))
273 return -EPERM; 266 return -XFS_ERROR(EPERM);
274 267
275 if (XFS_FORCED_SHUTDOWN(mp)) 268 if (XFS_FORCED_SHUTDOWN(mp))
276 return -XFS_ERROR(EIO); 269 return -XFS_ERROR(EIO);
277 270
278 if (get_user(addr, &p32->lastip)) 271 if (get_user(addr, &p32->lastip))
279 return -EFAULT; 272 return -XFS_ERROR(EFAULT);
280 bulkreq.lastip = compat_ptr(addr); 273 bulkreq.lastip = compat_ptr(addr);
281 if (get_user(bulkreq.icount, &p32->icount) || 274 if (get_user(bulkreq.icount, &p32->icount) ||
282 get_user(addr, &p32->ubuffer)) 275 get_user(addr, &p32->ubuffer))
283 return -EFAULT; 276 return -XFS_ERROR(EFAULT);
284 bulkreq.ubuffer = compat_ptr(addr); 277 bulkreq.ubuffer = compat_ptr(addr);
285 if (get_user(addr, &p32->ocount)) 278 if (get_user(addr, &p32->ocount))
286 return -EFAULT; 279 return -XFS_ERROR(EFAULT);
287 bulkreq.ocount = compat_ptr(addr); 280 bulkreq.ocount = compat_ptr(addr);
288 281
289 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) 282 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
@@ -295,17 +288,22 @@ xfs_ioc_bulkstat_compat(
295 if (bulkreq.ubuffer == NULL) 288 if (bulkreq.ubuffer == NULL)
296 return -XFS_ERROR(EINVAL); 289 return -XFS_ERROR(EINVAL);
297 290
298 if (cmd == XFS_IOC_FSINUMBERS) 291 if (cmd == XFS_IOC_FSINUMBERS_32) {
299 error = xfs_inumbers(mp, &inlast, &count, 292 error = xfs_inumbers(mp, &inlast, &count,
300 bulkreq.ubuffer, xfs_inumbers_fmt_compat); 293 bulkreq.ubuffer, xfs_inumbers_fmt_compat);
301 else { 294 } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
302 /* declare a var to get a warning in case the type changes */ 295 int res;
303 bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat; 296
297 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
298 sizeof(compat_xfs_bstat_t),
299 NULL, 0, NULL, NULL, &res);
300 } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
304 error = xfs_bulkstat(mp, &inlast, &count, 301 error = xfs_bulkstat(mp, &inlast, &count,
305 xfs_bulkstat_one, formatter, 302 xfs_bulkstat_one_compat, NULL,
306 sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, 303 sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
307 BULKSTAT_FG_QUICK, &done); 304 BULKSTAT_FG_QUICK, &done);
308 } 305 } else
306 error = XFS_ERROR(EINVAL);
309 if (error) 307 if (error)
310 return -error; 308 return -error;
311 309
@@ -321,63 +319,306 @@ xfs_ioc_bulkstat_compat(
321 return 0; 319 return 0;
322} 320}
323 321
322STATIC int
323xfs_compat_handlereq_copyin(
324 xfs_fsop_handlereq_t *hreq,
325 compat_xfs_fsop_handlereq_t __user *arg32)
326{
327 compat_xfs_fsop_handlereq_t hreq32;
328
329 if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
330 return -XFS_ERROR(EFAULT);
331
332 hreq->fd = hreq32.fd;
333 hreq->path = compat_ptr(hreq32.path);
334 hreq->oflags = hreq32.oflags;
335 hreq->ihandle = compat_ptr(hreq32.ihandle);
336 hreq->ihandlen = hreq32.ihandlen;
337 hreq->ohandle = compat_ptr(hreq32.ohandle);
338 hreq->ohandlen = compat_ptr(hreq32.ohandlen);
324 339
340 return 0;
341}
325 342
326typedef struct compat_xfs_fsop_handlereq { 343/*
327 __u32 fd; /* fd for FD_TO_HANDLE */ 344 * Convert userspace handle data into inode.
328 compat_uptr_t path; /* user pathname */ 345 *
329 __u32 oflags; /* open flags */ 346 * We use the fact that all the fsop_handlereq ioctl calls have a data
330 compat_uptr_t ihandle; /* user supplied handle */ 347 * structure argument whose first component is always a xfs_fsop_handlereq_t,
331 __u32 ihandlen; /* user supplied length */ 348 * so we can pass that sub structure into this handy, shared routine.
332 compat_uptr_t ohandle; /* user buffer for handle */ 349 *
333 compat_uptr_t ohandlen; /* user buffer length */ 350 * If no error, caller must always iput the returned inode.
334} compat_xfs_fsop_handlereq_t; 351 */
335 352STATIC int
336#define XFS_IOC_PATH_TO_FSHANDLE_32 \ 353xfs_vget_fsop_handlereq_compat(
337 _IOWR('X', 104, struct compat_xfs_fsop_handlereq) 354 xfs_mount_t *mp,
338#define XFS_IOC_PATH_TO_HANDLE_32 \ 355 struct inode *parinode, /* parent inode pointer */
339 _IOWR('X', 105, struct compat_xfs_fsop_handlereq) 356 compat_xfs_fsop_handlereq_t *hreq,
340#define XFS_IOC_FD_TO_HANDLE_32 \ 357 struct inode **inode)
341 _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
342#define XFS_IOC_OPEN_BY_HANDLE_32 \
343 _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
344#define XFS_IOC_READLINK_BY_HANDLE_32 \
345 _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
346
347STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg)
348{ 358{
349 compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg; 359 void __user *hanp;
350 xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p)); 360 size_t hlen;
351 u32 addr; 361 xfs_fid_t *xfid;
352 362 xfs_handle_t *handlep;
353 if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) || 363 xfs_handle_t handle;
354 get_user(addr, &p32->path) || 364 xfs_inode_t *ip;
355 put_user(compat_ptr(addr), &p->path) || 365 xfs_ino_t ino;
356 copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) || 366 __u32 igen;
357 get_user(addr, &p32->ihandle) || 367 int error;
358 put_user(compat_ptr(addr), &p->ihandle) || 368
359 copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) || 369 /*
360 get_user(addr, &p32->ohandle) || 370 * Only allow handle opens under a directory.
361 put_user(compat_ptr(addr), &p->ohandle) || 371 */
362 get_user(addr, &p32->ohandlen) || 372 if (!S_ISDIR(parinode->i_mode))
363 put_user(compat_ptr(addr), &p->ohandlen)) 373 return XFS_ERROR(ENOTDIR);
364 return -EFAULT; 374
365 375 hanp = compat_ptr(hreq->ihandle);
366 return (unsigned long)p; 376 hlen = hreq->ihandlen;
377 handlep = &handle;
378
379 if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep))
380 return XFS_ERROR(EINVAL);
381 if (copy_from_user(handlep, hanp, hlen))
382 return XFS_ERROR(EFAULT);
383 if (hlen < sizeof(*handlep))
384 memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
385 if (hlen > sizeof(handlep->ha_fsid)) {
386 if (handlep->ha_fid.fid_len !=
387 (hlen - sizeof(handlep->ha_fsid) -
388 sizeof(handlep->ha_fid.fid_len)) ||
389 handlep->ha_fid.fid_pad)
390 return XFS_ERROR(EINVAL);
391 }
392
393 /*
394 * Crack the handle, obtain the inode # & generation #
395 */
396 xfid = (struct xfs_fid *)&handlep->ha_fid;
397 if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) {
398 ino = xfid->fid_ino;
399 igen = xfid->fid_gen;
400 } else {
401 return XFS_ERROR(EINVAL);
402 }
403
404 /*
405 * Get the XFS inode, building a Linux inode to go with it.
406 */
407 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
408 if (error)
409 return error;
410 if (ip == NULL)
411 return XFS_ERROR(EIO);
412 if (ip->i_d.di_gen != igen) {
413 xfs_iput_new(ip, XFS_ILOCK_SHARED);
414 return XFS_ERROR(ENOENT);
415 }
416
417 xfs_iunlock(ip, XFS_ILOCK_SHARED);
418
419 *inode = VFS_I(ip);
420 return 0;
367} 421}
368 422
423STATIC int
424xfs_compat_attrlist_by_handle(
425 xfs_mount_t *mp,
426 void __user *arg,
427 struct inode *parinode)
428{
429 int error;
430 attrlist_cursor_kern_t *cursor;
431 compat_xfs_fsop_attrlist_handlereq_t al_hreq;
432 struct inode *inode;
433 char *kbuf;
434
435 if (!capable(CAP_SYS_ADMIN))
436 return -XFS_ERROR(EPERM);
437 if (copy_from_user(&al_hreq, arg,
438 sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
439 return -XFS_ERROR(EFAULT);
440 if (al_hreq.buflen > XATTR_LIST_MAX)
441 return -XFS_ERROR(EINVAL);
442
443 /*
444 * Reject flags, only allow namespaces.
445 */
446 if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
447 return -XFS_ERROR(EINVAL);
448
449 error = xfs_vget_fsop_handlereq_compat(mp, parinode, &al_hreq.hreq,
450 &inode);
451 if (error)
452 goto out;
453
454 kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
455 if (!kbuf)
456 goto out_vn_rele;
457
458 cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
459 error = xfs_attr_list(XFS_I(inode), kbuf, al_hreq.buflen,
460 al_hreq.flags, cursor);
461 if (error)
462 goto out_kfree;
463
464 if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
465 error = -EFAULT;
466
467 out_kfree:
468 kfree(kbuf);
469 out_vn_rele:
470 iput(inode);
471 out:
472 return -error;
473}
369 474
370STATIC long 475STATIC int
371xfs_compat_ioctl( 476xfs_compat_attrmulti_by_handle(
372 int mode, 477 xfs_mount_t *mp,
373 struct file *file, 478 void __user *arg,
374 unsigned cmd, 479 struct inode *parinode)
375 unsigned long arg) 480{
481 int error;
482 compat_xfs_attr_multiop_t *ops;
483 compat_xfs_fsop_attrmulti_handlereq_t am_hreq;
484 struct inode *inode;
485 unsigned int i, size;
486 char *attr_name;
487
488 if (!capable(CAP_SYS_ADMIN))
489 return -XFS_ERROR(EPERM);
490 if (copy_from_user(&am_hreq, arg,
491 sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
492 return -XFS_ERROR(EFAULT);
493
494 error = xfs_vget_fsop_handlereq_compat(mp, parinode, &am_hreq.hreq,
495 &inode);
496 if (error)
497 goto out;
498
499 error = E2BIG;
500 size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
501 if (!size || size > 16 * PAGE_SIZE)
502 goto out_vn_rele;
503
504 error = ENOMEM;
505 ops = kmalloc(size, GFP_KERNEL);
506 if (!ops)
507 goto out_vn_rele;
508
509 error = EFAULT;
510 if (copy_from_user(ops, compat_ptr(am_hreq.ops), size))
511 goto out_kfree_ops;
512
513 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
514 if (!attr_name)
515 goto out_kfree_ops;
516
517
518 error = 0;
519 for (i = 0; i < am_hreq.opcount; i++) {
520 ops[i].am_error = strncpy_from_user(attr_name,
521 compat_ptr(ops[i].am_attrname),
522 MAXNAMELEN);
523 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
524 error = -ERANGE;
525 if (ops[i].am_error < 0)
526 break;
527
528 switch (ops[i].am_opcode) {
529 case ATTR_OP_GET:
530 ops[i].am_error = xfs_attrmulti_attr_get(inode,
531 attr_name,
532 compat_ptr(ops[i].am_attrvalue),
533 &ops[i].am_length, ops[i].am_flags);
534 break;
535 case ATTR_OP_SET:
536 ops[i].am_error = xfs_attrmulti_attr_set(inode,
537 attr_name,
538 compat_ptr(ops[i].am_attrvalue),
539 ops[i].am_length, ops[i].am_flags);
540 break;
541 case ATTR_OP_REMOVE:
542 ops[i].am_error = xfs_attrmulti_attr_remove(inode,
543 attr_name, ops[i].am_flags);
544 break;
545 default:
546 ops[i].am_error = EINVAL;
547 }
548 }
549
550 if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
551 error = XFS_ERROR(EFAULT);
552
553 kfree(attr_name);
554 out_kfree_ops:
555 kfree(ops);
556 out_vn_rele:
557 iput(inode);
558 out:
559 return -error;
560}
561
562STATIC int
563xfs_compat_fssetdm_by_handle(
564 xfs_mount_t *mp,
565 void __user *arg,
566 struct inode *parinode)
567{
568 int error;
569 struct fsdmidata fsd;
570 compat_xfs_fsop_setdm_handlereq_t dmhreq;
571 struct inode *inode;
572
573 if (!capable(CAP_MKNOD))
574 return -XFS_ERROR(EPERM);
575 if (copy_from_user(&dmhreq, arg,
576 sizeof(compat_xfs_fsop_setdm_handlereq_t)))
577 return -XFS_ERROR(EFAULT);
578
579 error = xfs_vget_fsop_handlereq_compat(mp, parinode, &dmhreq.hreq,
580 &inode);
581 if (error)
582 return -error;
583
584 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
585 error = -XFS_ERROR(EPERM);
586 goto out;
587 }
588
589 if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
590 error = -XFS_ERROR(EFAULT);
591 goto out;
592 }
593
594 error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask,
595 fsd.fsd_dmstate);
596
597out:
598 iput(inode);
599 return error;
600}
601
602long
603xfs_file_compat_ioctl(
604 struct file *filp,
605 unsigned cmd,
606 unsigned long p)
376{ 607{
377 struct inode *inode = file->f_path.dentry->d_inode; 608 struct inode *inode = filp->f_path.dentry->d_inode;
378 int error; 609 struct xfs_inode *ip = XFS_I(inode);
610 struct xfs_mount *mp = ip->i_mount;
611 void __user *arg = (void __user *)p;
612 int ioflags = 0;
613 int error;
614
615 if (filp->f_mode & FMODE_NOCMTIME)
616 ioflags |= IO_INVIS;
617
618 xfs_itrace_entry(ip);
379 619
380 switch (cmd) { 620 switch (cmd) {
621 /* No size or alignment issues on any arch */
381 case XFS_IOC_DIOINFO: 622 case XFS_IOC_DIOINFO:
382 case XFS_IOC_FSGEOMETRY: 623 case XFS_IOC_FSGEOMETRY:
383 case XFS_IOC_FSGETXATTR: 624 case XFS_IOC_FSGETXATTR:
@@ -387,48 +628,18 @@ xfs_compat_ioctl(
387 case XFS_IOC_GETBMAP: 628 case XFS_IOC_GETBMAP:
388 case XFS_IOC_GETBMAPA: 629 case XFS_IOC_GETBMAPA:
389 case XFS_IOC_GETBMAPX: 630 case XFS_IOC_GETBMAPX:
390/* not handled
391 case XFS_IOC_FSSETDM_BY_HANDLE:
392 case XFS_IOC_ATTRLIST_BY_HANDLE:
393 case XFS_IOC_ATTRMULTI_BY_HANDLE:
394*/
395 case XFS_IOC_FSCOUNTS: 631 case XFS_IOC_FSCOUNTS:
396 case XFS_IOC_SET_RESBLKS: 632 case XFS_IOC_SET_RESBLKS:
397 case XFS_IOC_GET_RESBLKS: 633 case XFS_IOC_GET_RESBLKS:
398 case XFS_IOC_FSGROWFSDATA:
399 case XFS_IOC_FSGROWFSLOG: 634 case XFS_IOC_FSGROWFSLOG:
400 case XFS_IOC_FSGROWFSRT:
401 case XFS_IOC_FREEZE: 635 case XFS_IOC_FREEZE:
402 case XFS_IOC_THAW: 636 case XFS_IOC_THAW:
403 case XFS_IOC_GOINGDOWN: 637 case XFS_IOC_GOINGDOWN:
404 case XFS_IOC_ERROR_INJECTION: 638 case XFS_IOC_ERROR_INJECTION:
405 case XFS_IOC_ERROR_CLEARALL: 639 case XFS_IOC_ERROR_CLEARALL:
406 break; 640 return xfs_file_ioctl(filp, cmd, p);
407 641#ifndef BROKEN_X86_ALIGNMENT
408 case XFS_IOC32_GETXFLAGS: 642 /* These are handled fine if no alignment issues */
409 case XFS_IOC32_SETXFLAGS:
410 case XFS_IOC32_GETVERSION:
411 cmd = _NATIVE_IOC(cmd, long);
412 break;
413#ifdef BROKEN_X86_ALIGNMENT
414 /* xfs_flock_t has wrong u32 vs u64 alignment */
415 case XFS_IOC_ALLOCSP_32:
416 case XFS_IOC_FREESP_32:
417 case XFS_IOC_ALLOCSP64_32:
418 case XFS_IOC_FREESP64_32:
419 case XFS_IOC_RESVSP_32:
420 case XFS_IOC_UNRESVSP_32:
421 case XFS_IOC_RESVSP64_32:
422 case XFS_IOC_UNRESVSP64_32:
423 arg = xfs_ioctl32_flock(arg);
424 cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
425 break;
426 case XFS_IOC_FSGEOMETRY_V1_32:
427 arg = xfs_ioctl32_geom_v1(arg);
428 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1);
429 break;
430
431#else /* These are handled fine if no alignment issues */
432 case XFS_IOC_ALLOCSP: 643 case XFS_IOC_ALLOCSP:
433 case XFS_IOC_FREESP: 644 case XFS_IOC_FREESP:
434 case XFS_IOC_RESVSP: 645 case XFS_IOC_RESVSP:
@@ -438,51 +649,97 @@ xfs_compat_ioctl(
438 case XFS_IOC_RESVSP64: 649 case XFS_IOC_RESVSP64:
439 case XFS_IOC_UNRESVSP64: 650 case XFS_IOC_UNRESVSP64:
440 case XFS_IOC_FSGEOMETRY_V1: 651 case XFS_IOC_FSGEOMETRY_V1:
441 break; 652 case XFS_IOC_FSGROWFSDATA:
653 case XFS_IOC_FSGROWFSRT:
654 return xfs_file_ioctl(filp, cmd, p);
655#else
656 case XFS_IOC_ALLOCSP_32:
657 case XFS_IOC_FREESP_32:
658 case XFS_IOC_ALLOCSP64_32:
659 case XFS_IOC_FREESP64_32:
660 case XFS_IOC_RESVSP_32:
661 case XFS_IOC_UNRESVSP_32:
662 case XFS_IOC_RESVSP64_32:
663 case XFS_IOC_UNRESVSP64_32: {
664 struct xfs_flock64 bf;
442 665
443 /* xfs_bstat_t still has wrong u32 vs u64 alignment */ 666 if (xfs_compat_flock64_copyin(&bf, arg))
444 case XFS_IOC_SWAPEXT: 667 return -XFS_ERROR(EFAULT);
445 break; 668 cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
669 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
670 }
671 case XFS_IOC_FSGEOMETRY_V1_32:
672 return xfs_compat_ioc_fsgeometry_v1(mp, arg);
673 case XFS_IOC_FSGROWFSDATA_32: {
674 struct xfs_growfs_data in;
675
676 if (xfs_compat_growfs_data_copyin(&in, arg))
677 return -XFS_ERROR(EFAULT);
678 error = xfs_growfs_data(mp, &in);
679 return -error;
680 }
681 case XFS_IOC_FSGROWFSRT_32: {
682 struct xfs_growfs_rt in;
446 683
684 if (xfs_compat_growfs_rt_copyin(&in, arg))
685 return -XFS_ERROR(EFAULT);
686 error = xfs_growfs_rt(mp, &in);
687 return -error;
688 }
447#endif 689#endif
690 /* long changes size, but xfs only copiese out 32 bits */
691 case XFS_IOC_GETXFLAGS_32:
692 case XFS_IOC_SETXFLAGS_32:
693 case XFS_IOC_GETVERSION_32:
694 cmd = _NATIVE_IOC(cmd, long);
695 return xfs_file_ioctl(filp, cmd, p);
696 case XFS_IOC_SWAPEXT: {
697 struct xfs_swapext sxp;
698 struct compat_xfs_swapext __user *sxu = arg;
699
700 /* Bulk copy in up to the sx_stat field, then copy bstat */
701 if (copy_from_user(&sxp, sxu,
702 offsetof(struct xfs_swapext, sx_stat)) ||
703 xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
704 return -XFS_ERROR(EFAULT);
705 error = xfs_swapext(&sxp);
706 return -error;
707 }
448 case XFS_IOC_FSBULKSTAT_32: 708 case XFS_IOC_FSBULKSTAT_32:
449 case XFS_IOC_FSBULKSTAT_SINGLE_32: 709 case XFS_IOC_FSBULKSTAT_SINGLE_32:
450 case XFS_IOC_FSINUMBERS_32: 710 case XFS_IOC_FSINUMBERS_32:
451 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq); 711 return xfs_compat_ioc_bulkstat(mp, cmd, arg);
452 return xfs_ioc_bulkstat_compat(XFS_I(inode)->i_mount,
453 cmd, (void __user*)arg);
454 case XFS_IOC_FD_TO_HANDLE_32: 712 case XFS_IOC_FD_TO_HANDLE_32:
455 case XFS_IOC_PATH_TO_HANDLE_32: 713 case XFS_IOC_PATH_TO_HANDLE_32:
456 case XFS_IOC_PATH_TO_FSHANDLE_32: 714 case XFS_IOC_PATH_TO_FSHANDLE_32: {
457 case XFS_IOC_OPEN_BY_HANDLE_32: 715 struct xfs_fsop_handlereq hreq;
458 case XFS_IOC_READLINK_BY_HANDLE_32: 716
459 arg = xfs_ioctl32_fshandle(arg); 717 if (xfs_compat_handlereq_copyin(&hreq, arg))
718 return -XFS_ERROR(EFAULT);
460 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); 719 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
461 break; 720 return xfs_find_handle(cmd, &hreq);
462 default:
463 return -ENOIOCTLCMD;
464 } 721 }
722 case XFS_IOC_OPEN_BY_HANDLE_32: {
723 struct xfs_fsop_handlereq hreq;
465 724
466 error = xfs_ioctl(XFS_I(inode), file, mode, cmd, (void __user *)arg); 725 if (xfs_compat_handlereq_copyin(&hreq, arg))
467 xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); 726 return -XFS_ERROR(EFAULT);
468 727 return xfs_open_by_handle(mp, &hreq, filp, inode);
469 return error; 728 }
470} 729 case XFS_IOC_READLINK_BY_HANDLE_32: {
471 730 struct xfs_fsop_handlereq hreq;
472long
473xfs_file_compat_ioctl(
474 struct file *file,
475 unsigned cmd,
476 unsigned long arg)
477{
478 return xfs_compat_ioctl(0, file, cmd, arg);
479}
480 731
481long 732 if (xfs_compat_handlereq_copyin(&hreq, arg))
482xfs_file_compat_invis_ioctl( 733 return -XFS_ERROR(EFAULT);
483 struct file *file, 734 return xfs_readlink_by_handle(mp, &hreq, inode);
484 unsigned cmd, 735 }
485 unsigned long arg) 736 case XFS_IOC_ATTRLIST_BY_HANDLE_32:
486{ 737 return xfs_compat_attrlist_by_handle(mp, arg, inode);
487 return xfs_compat_ioctl(IO_INVIS, file, cmd, arg); 738 case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
739 return xfs_compat_attrmulti_by_handle(mp, arg, inode);
740 case XFS_IOC_FSSETDM_BY_HANDLE_32:
741 return xfs_compat_fssetdm_by_handle(mp, arg, inode);
742 default:
743 return -XFS_ERROR(ENOIOCTLCMD);
744 }
488} 745}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 02de6e62ee3..1024c4f8ba0 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -18,7 +18,217 @@
18#ifndef __XFS_IOCTL32_H__ 18#ifndef __XFS_IOCTL32_H__
19#define __XFS_IOCTL32_H__ 19#define __XFS_IOCTL32_H__
20 20
21extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long); 21#include <linux/compat.h>
22extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long); 22
23/*
24 * on 32-bit arches, ioctl argument structures may have different sizes
25 * and/or alignment. We define compat structures which match the
26 * 32-bit sizes/alignments here, and their associated ioctl numbers.
27 *
28 * xfs_ioctl32.c contains routines to copy these structures in and out.
29 */
30
31/* stock kernel-level ioctls we support */
32#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS
33#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS
34#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION
35
36/*
37 * On intel, even if sizes match, alignment and/or padding may differ.
38 */
39#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
40#define BROKEN_X86_ALIGNMENT
41#define __compat_packed __attribute__((packed))
42#else
43#define __compat_packed
44#endif
45
46typedef struct compat_xfs_bstime {
47 compat_time_t tv_sec; /* seconds */
48 __s32 tv_nsec; /* and nanoseconds */
49} compat_xfs_bstime_t;
50
51typedef struct compat_xfs_bstat {
52 __u64 bs_ino; /* inode number */
53 __u16 bs_mode; /* type and mode */
54 __u16 bs_nlink; /* number of links */
55 __u32 bs_uid; /* user id */
56 __u32 bs_gid; /* group id */
57 __u32 bs_rdev; /* device value */
58 __s32 bs_blksize; /* block size */
59 __s64 bs_size; /* file size */
60 compat_xfs_bstime_t bs_atime; /* access time */
61 compat_xfs_bstime_t bs_mtime; /* modify time */
62 compat_xfs_bstime_t bs_ctime; /* inode change time */
63 int64_t bs_blocks; /* number of blocks */
64 __u32 bs_xflags; /* extended flags */
65 __s32 bs_extsize; /* extent size */
66 __s32 bs_extents; /* number of extents */
67 __u32 bs_gen; /* generation count */
68 __u16 bs_projid; /* project id */
69 unsigned char bs_pad[14]; /* pad space, unused */
70 __u32 bs_dmevmask; /* DMIG event mask */
71 __u16 bs_dmstate; /* DMIG state info */
72 __u16 bs_aextents; /* attribute number of extents */
73} __compat_packed compat_xfs_bstat_t;
74
75typedef struct compat_xfs_fsop_bulkreq {
76 compat_uptr_t lastip; /* last inode # pointer */
77 __s32 icount; /* count of entries in buffer */
78 compat_uptr_t ubuffer; /* user buffer for inode desc. */
79 compat_uptr_t ocount; /* output count pointer */
80} compat_xfs_fsop_bulkreq_t;
81
82#define XFS_IOC_FSBULKSTAT_32 \
83 _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
84#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
85 _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
86#define XFS_IOC_FSINUMBERS_32 \
87 _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
88
89typedef struct compat_xfs_fsop_handlereq {
90 __u32 fd; /* fd for FD_TO_HANDLE */
91 compat_uptr_t path; /* user pathname */
92 __u32 oflags; /* open flags */
93 compat_uptr_t ihandle; /* user supplied handle */
94 __u32 ihandlen; /* user supplied length */
95 compat_uptr_t ohandle; /* user buffer for handle */
96 compat_uptr_t ohandlen; /* user buffer length */
97} compat_xfs_fsop_handlereq_t;
98
99#define XFS_IOC_PATH_TO_FSHANDLE_32 \
100 _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
101#define XFS_IOC_PATH_TO_HANDLE_32 \
102 _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
103#define XFS_IOC_FD_TO_HANDLE_32 \
104 _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
105#define XFS_IOC_OPEN_BY_HANDLE_32 \
106 _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
107#define XFS_IOC_READLINK_BY_HANDLE_32 \
108 _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
109
110/* The bstat field in the swapext struct needs translation */
111typedef struct compat_xfs_swapext {
112 __int64_t sx_version; /* version */
113 __int64_t sx_fdtarget; /* fd of target file */
114 __int64_t sx_fdtmp; /* fd of tmp file */
115 xfs_off_t sx_offset; /* offset into file */
116 xfs_off_t sx_length; /* leng from offset */
117 char sx_pad[16]; /* pad space, unused */
118 compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */
119} __compat_packed compat_xfs_swapext_t;
120
121#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext)
122
123typedef struct compat_xfs_fsop_attrlist_handlereq {
124 struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
125 struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */
126 __u32 flags; /* which namespace to use */
127 __u32 buflen; /* length of buffer supplied */
128 compat_uptr_t buffer; /* returned names */
129} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
130
131/* Note: actually this is read/write */
132#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
133 _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
134
135/* am_opcodes defined in xfs_fs.h */
136typedef struct compat_xfs_attr_multiop {
137 __u32 am_opcode;
138 __s32 am_error;
139 compat_uptr_t am_attrname;
140 compat_uptr_t am_attrvalue;
141 __u32 am_length;
142 __u32 am_flags;
143} compat_xfs_attr_multiop_t;
144
145typedef struct compat_xfs_fsop_attrmulti_handlereq {
146 struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
147 __u32 opcount;/* count of following multiop */
148 /* ptr to compat_xfs_attr_multiop */
149 compat_uptr_t ops; /* attr_multi data */
150} compat_xfs_fsop_attrmulti_handlereq_t;
151
152#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
153 _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
154
155typedef struct compat_xfs_fsop_setdm_handlereq {
156 struct compat_xfs_fsop_handlereq hreq; /* handle information */
157 /* ptr to struct fsdmidata */
158 compat_uptr_t data; /* DMAPI data */
159} compat_xfs_fsop_setdm_handlereq_t;
160
161#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
162 _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
163
164#ifdef BROKEN_X86_ALIGNMENT
165/* on ia32 l_start is on a 32-bit boundary */
166typedef struct compat_xfs_flock64 {
167 __s16 l_type;
168 __s16 l_whence;
169 __s64 l_start __attribute__((packed));
170 /* len == 0 means until end of file */
171 __s64 l_len __attribute__((packed));
172 __s32 l_sysid;
173 __u32 l_pid;
174 __s32 l_pad[4]; /* reserve area */
175} compat_xfs_flock64_t;
176
177#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64)
178#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64)
179#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64)
180#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64)
181#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64)
182#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
183#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
184#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
185
186typedef struct compat_xfs_fsop_geom_v1 {
187 __u32 blocksize; /* filesystem (data) block size */
188 __u32 rtextsize; /* realtime extent size */
189 __u32 agblocks; /* fsblocks in an AG */
190 __u32 agcount; /* number of allocation groups */
191 __u32 logblocks; /* fsblocks in the log */
192 __u32 sectsize; /* (data) sector size, bytes */
193 __u32 inodesize; /* inode size in bytes */
194 __u32 imaxpct; /* max allowed inode space(%) */
195 __u64 datablocks; /* fsblocks in data subvolume */
196 __u64 rtblocks; /* fsblocks in realtime subvol */
197 __u64 rtextents; /* rt extents in realtime subvol*/
198 __u64 logstart; /* starting fsblock of the log */
199 unsigned char uuid[16]; /* unique id of the filesystem */
200 __u32 sunit; /* stripe unit, fsblocks */
201 __u32 swidth; /* stripe width, fsblocks */
202 __s32 version; /* structure version */
203 __u32 flags; /* superblock version flags */
204 __u32 logsectsize; /* log sector size, bytes */
205 __u32 rtsectsize; /* realtime sector size, bytes */
206 __u32 dirblocksize; /* directory block size, bytes */
207} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
208
209#define XFS_IOC_FSGEOMETRY_V1_32 \
210 _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
211
212typedef struct compat_xfs_inogrp {
213 __u64 xi_startino; /* starting inode number */
214 __s32 xi_alloccount; /* # bits set in allocmask */
215 __u64 xi_allocmask; /* mask of allocated inodes */
216} __attribute__((packed)) compat_xfs_inogrp_t;
217
218/* These growfs input structures have padding on the end, so must translate */
219typedef struct compat_xfs_growfs_data {
220 __u64 newblocks; /* new data subvol size, fsblocks */
221 __u32 imaxpct; /* new inode space percentage limit */
222} __attribute__((packed)) compat_xfs_growfs_data_t;
223
224typedef struct compat_xfs_growfs_rt {
225 __u64 newblocks; /* new realtime size, fsblocks */
226 __u32 extsize; /* new realtime extent size, fsblocks */
227} __attribute__((packed)) compat_xfs_growfs_rt_t;
228
229#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
230#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt)
231
232#endif /* BROKEN_X86_ALIGNMENT */
23 233
24#endif /* __XFS_IOCTL32_H__ */ 234#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 095d271f343..7aa53fefc67 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -53,6 +53,7 @@
53#include <linux/namei.h> 53#include <linux/namei.h>
54#include <linux/security.h> 54#include <linux/security.h>
55#include <linux/falloc.h> 55#include <linux/falloc.h>
56#include <linux/fiemap.h>
56 57
57/* 58/*
58 * Bring the atime in the XFS inode uptodate. 59 * Bring the atime in the XFS inode uptodate.
@@ -64,14 +65,14 @@ xfs_synchronize_atime(
64{ 65{
65 struct inode *inode = VFS_I(ip); 66 struct inode *inode = VFS_I(ip);
66 67
67 if (inode) { 68 if (!(inode->i_state & I_CLEAR)) {
68 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; 69 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
69 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; 70 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
70 } 71 }
71} 72}
72 73
73/* 74/*
74 * If the linux inode exists, mark it dirty. 75 * If the linux inode is valid, mark it dirty.
75 * Used when commiting a dirty inode into a transaction so that 76 * Used when commiting a dirty inode into a transaction so that
76 * the inode will get written back by the linux code 77 * the inode will get written back by the linux code
77 */ 78 */
@@ -81,7 +82,7 @@ xfs_mark_inode_dirty_sync(
81{ 82{
82 struct inode *inode = VFS_I(ip); 83 struct inode *inode = VFS_I(ip);
83 84
84 if (inode) 85 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
85 mark_inode_dirty_sync(inode); 86 mark_inode_dirty_sync(inode);
86} 87}
87 88
@@ -128,7 +129,7 @@ xfs_ichgtime(
128 if (sync_it) { 129 if (sync_it) {
129 SYNCHRONIZE(); 130 SYNCHRONIZE();
130 ip->i_update_core = 1; 131 ip->i_update_core = 1;
131 mark_inode_dirty_sync(inode); 132 xfs_mark_inode_dirty_sync(ip);
132 } 133 }
133} 134}
134 135
@@ -158,8 +159,6 @@ xfs_init_security(
158 } 159 }
159 160
160 error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); 161 error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
161 if (!error)
162 xfs_iflags_set(ip, XFS_IMODIFIED);
163 162
164 kfree(name); 163 kfree(name);
165 kfree(value); 164 kfree(value);
@@ -260,7 +259,6 @@ xfs_vn_mknod(
260 error = _ACL_INHERIT(inode, mode, default_acl); 259 error = _ACL_INHERIT(inode, mode, default_acl);
261 if (unlikely(error)) 260 if (unlikely(error))
262 goto out_cleanup_inode; 261 goto out_cleanup_inode;
263 xfs_iflags_set(ip, XFS_IMODIFIED);
264 _ACL_FREE(default_acl); 262 _ACL_FREE(default_acl);
265 } 263 }
266 264
@@ -366,21 +364,17 @@ xfs_vn_link(
366 struct inode *dir, 364 struct inode *dir,
367 struct dentry *dentry) 365 struct dentry *dentry)
368{ 366{
369 struct inode *inode; /* inode of guy being linked to */ 367 struct inode *inode = old_dentry->d_inode;
370 struct xfs_name name; 368 struct xfs_name name;
371 int error; 369 int error;
372 370
373 inode = old_dentry->d_inode;
374 xfs_dentry_to_name(&name, dentry); 371 xfs_dentry_to_name(&name, dentry);
375 372
376 igrab(inode);
377 error = xfs_link(XFS_I(dir), XFS_I(inode), &name); 373 error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
378 if (unlikely(error)) { 374 if (unlikely(error))
379 iput(inode);
380 return -error; 375 return -error;
381 }
382 376
383 xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); 377 atomic_inc(&inode->i_count);
384 d_instantiate(dentry, inode); 378 d_instantiate(dentry, inode);
385 return 0; 379 return 0;
386} 380}
@@ -601,7 +595,7 @@ xfs_vn_setattr(
601 struct dentry *dentry, 595 struct dentry *dentry,
602 struct iattr *iattr) 596 struct iattr *iattr)
603{ 597{
604 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL); 598 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
605} 599}
606 600
607/* 601/*
@@ -642,7 +636,7 @@ xfs_vn_fallocate(
642 636
643 xfs_ilock(ip, XFS_IOLOCK_EXCL); 637 xfs_ilock(ip, XFS_IOLOCK_EXCL);
644 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 638 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
645 0, NULL, XFS_ATTR_NOLOCK); 639 0, XFS_ATTR_NOLOCK);
646 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 640 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
647 offset + len > i_size_read(inode)) 641 offset + len > i_size_read(inode))
648 new_size = offset + len; 642 new_size = offset + len;
@@ -653,7 +647,7 @@ xfs_vn_fallocate(
653 647
654 iattr.ia_valid = ATTR_SIZE; 648 iattr.ia_valid = ATTR_SIZE;
655 iattr.ia_size = new_size; 649 iattr.ia_size = new_size;
656 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL); 650 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
657 } 651 }
658 652
659 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 653 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -661,6 +655,88 @@ out_error:
661 return error; 655 return error;
662} 656}
663 657
658#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
659
660/*
661 * Call fiemap helper to fill in user data.
662 * Returns positive errors to xfs_getbmap.
663 */
664STATIC int
665xfs_fiemap_format(
666 void **arg,
667 struct getbmapx *bmv,
668 int *full)
669{
670 int error;
671 struct fiemap_extent_info *fieinfo = *arg;
672 u32 fiemap_flags = 0;
673 u64 logical, physical, length;
674
675 /* Do nothing for a hole */
676 if (bmv->bmv_block == -1LL)
677 return 0;
678
679 logical = BBTOB(bmv->bmv_offset);
680 physical = BBTOB(bmv->bmv_block);
681 length = BBTOB(bmv->bmv_length);
682
683 if (bmv->bmv_oflags & BMV_OF_PREALLOC)
684 fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
685 else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
686 fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
687 physical = 0; /* no block yet */
688 }
689 if (bmv->bmv_oflags & BMV_OF_LAST)
690 fiemap_flags |= FIEMAP_EXTENT_LAST;
691
692 error = fiemap_fill_next_extent(fieinfo, logical, physical,
693 length, fiemap_flags);
694 if (error > 0) {
695 error = 0;
696 *full = 1; /* user array now full */
697 }
698
699 return -error;
700}
701
702STATIC int
703xfs_vn_fiemap(
704 struct inode *inode,
705 struct fiemap_extent_info *fieinfo,
706 u64 start,
707 u64 length)
708{
709 xfs_inode_t *ip = XFS_I(inode);
710 struct getbmapx bm;
711 int error;
712
713 error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
714 if (error)
715 return error;
716
717 /* Set up bmap header for xfs internal routine */
718 bm.bmv_offset = BTOBB(start);
719 /* Special case for whole file */
720 if (length == FIEMAP_MAX_OFFSET)
721 bm.bmv_length = -1LL;
722 else
723 bm.bmv_length = BTOBB(length);
724
725 /* our formatter will tell xfs_getbmap when to stop. */
726 bm.bmv_count = MAXEXTNUM;
727 bm.bmv_iflags = BMV_IF_PREALLOC;
728 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
729 bm.bmv_iflags |= BMV_IF_ATTRFORK;
730 if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
731 bm.bmv_iflags |= BMV_IF_DELALLOC;
732
733 error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
734 if (error)
735 return -error;
736
737 return 0;
738}
739
664static const struct inode_operations xfs_inode_operations = { 740static const struct inode_operations xfs_inode_operations = {
665 .permission = xfs_vn_permission, 741 .permission = xfs_vn_permission,
666 .truncate = xfs_vn_truncate, 742 .truncate = xfs_vn_truncate,
@@ -671,6 +747,7 @@ static const struct inode_operations xfs_inode_operations = {
671 .removexattr = generic_removexattr, 747 .removexattr = generic_removexattr,
672 .listxattr = xfs_vn_listxattr, 748 .listxattr = xfs_vn_listxattr,
673 .fallocate = xfs_vn_fallocate, 749 .fallocate = xfs_vn_fallocate,
750 .fiemap = xfs_vn_fiemap,
674}; 751};
675 752
676static const struct inode_operations xfs_dir_inode_operations = { 753static const struct inode_operations xfs_dir_inode_operations = {
@@ -766,12 +843,20 @@ xfs_diflags_to_iflags(
766 * When reading existing inodes from disk this is called directly 843 * When reading existing inodes from disk this is called directly
767 * from xfs_iget, when creating a new inode it is called from 844 * from xfs_iget, when creating a new inode it is called from
768 * xfs_ialloc after setting up the inode. 845 * xfs_ialloc after setting up the inode.
846 *
847 * We are always called with an uninitialised linux inode here.
848 * We need to initialise the necessary fields and take a reference
849 * on it.
769 */ 850 */
770void 851void
771xfs_setup_inode( 852xfs_setup_inode(
772 struct xfs_inode *ip) 853 struct xfs_inode *ip)
773{ 854{
774 struct inode *inode = ip->i_vnode; 855 struct inode *inode = &ip->i_vnode;
856
857 inode->i_ino = ip->i_ino;
858 inode->i_state = I_NEW|I_LOCK;
859 inode_add_to_lists(ip->i_mount->m_super, inode);
775 860
776 inode->i_mode = ip->i_d.di_mode; 861 inode->i_mode = ip->i_d.di_mode;
777 inode->i_nlink = ip->i_d.di_nlink; 862 inode->i_nlink = ip->i_d.di_nlink;
@@ -799,7 +884,6 @@ xfs_setup_inode(
799 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 884 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
800 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; 885 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
801 xfs_diflags_to_iflags(inode, ip); 886 xfs_diflags_to_iflags(inode, ip);
802 xfs_iflags_clear(ip, XFS_IMODIFIED);
803 887
804 switch (inode->i_mode & S_IFMT) { 888 switch (inode->i_mode & S_IFMT) {
805 case S_IFREG: 889 case S_IFREG:
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 8b1a1e31dc2..ef41c92ce66 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -22,7 +22,6 @@ struct xfs_inode;
22 22
23extern const struct file_operations xfs_file_operations; 23extern const struct file_operations xfs_file_operations;
24extern const struct file_operations xfs_dir_file_operations; 24extern const struct file_operations xfs_dir_file_operations;
25extern const struct file_operations xfs_invis_file_operations;
26 25
27extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); 26extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
28 27
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index cc0f7b3a979..507492d6dcc 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -21,18 +21,12 @@
21#include <linux/types.h> 21#include <linux/types.h>
22 22
23/* 23/*
24 * Some types are conditional depending on the target system.
25 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. 24 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
26 * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well 25 * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
27 * as requiring XFS_BIG_BLKNOS to be set.
28 */ 26 */
29#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) 27#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
30# define XFS_BIG_BLKNOS 1 28# define XFS_BIG_BLKNOS 1
31# if BITS_PER_LONG == 64 29# define XFS_BIG_INUMS 1
32# define XFS_BIG_INUMS 1
33# else
34# define XFS_BIG_INUMS 0
35# endif
36#else 30#else
37# define XFS_BIG_BLKNOS 0 31# define XFS_BIG_BLKNOS 0
38# define XFS_BIG_INUMS 0 32# define XFS_BIG_INUMS 0
@@ -77,6 +71,7 @@
77#include <linux/spinlock.h> 71#include <linux/spinlock.h>
78#include <linux/random.h> 72#include <linux/random.h>
79#include <linux/ctype.h> 73#include <linux/ctype.h>
74#include <linux/writeback.h>
80 75
81#include <asm/page.h> 76#include <asm/page.h>
82#include <asm/div64.h> 77#include <asm/div64.h>
@@ -85,7 +80,6 @@
85#include <asm/byteorder.h> 80#include <asm/byteorder.h>
86#include <asm/unaligned.h> 81#include <asm/unaligned.h>
87 82
88#include <xfs_vfs.h>
89#include <xfs_cred.h> 83#include <xfs_cred.h>
90#include <xfs_vnode.h> 84#include <xfs_vnode.h>
91#include <xfs_stats.h> 85#include <xfs_stats.h>
@@ -107,7 +101,6 @@
107#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 101#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
108#endif 102#endif
109 103
110#define restricted_chown xfs_params.restrict_chown.val
111#define irix_sgid_inherit xfs_params.sgid_inherit.val 104#define irix_sgid_inherit xfs_params.sgid_inherit.val
112#define irix_symlink_mode xfs_params.symlink_mode.val 105#define irix_symlink_mode xfs_params.symlink_mode.val
113#define xfs_panic_mask xfs_params.panic_mask.val 106#define xfs_panic_mask xfs_params.panic_mask.val
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1957e5357d0..7e90daa0d1d 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -51,7 +51,6 @@
51#include "xfs_vnodeops.h" 51#include "xfs_vnodeops.h"
52 52
53#include <linux/capability.h> 53#include <linux/capability.h>
54#include <linux/mount.h>
55#include <linux/writeback.h> 54#include <linux/writeback.h>
56 55
57 56
@@ -243,7 +242,7 @@ xfs_read(
243 242
244 if (unlikely(ioflags & IO_ISDIRECT)) { 243 if (unlikely(ioflags & IO_ISDIRECT)) {
245 if (inode->i_mapping->nrpages) 244 if (inode->i_mapping->nrpages)
246 ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), 245 ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
247 -1, FI_REMAPF_LOCKED); 246 -1, FI_REMAPF_LOCKED);
248 mutex_unlock(&inode->i_mutex); 247 mutex_unlock(&inode->i_mutex);
249 if (ret) { 248 if (ret) {
@@ -668,15 +667,8 @@ start:
668 if (new_size > xip->i_size) 667 if (new_size > xip->i_size)
669 xip->i_new_size = new_size; 668 xip->i_new_size = new_size;
670 669
671 /* 670 if (likely(!(ioflags & IO_INVIS)))
672 * We're not supposed to change timestamps in readonly-mounted
673 * filesystems. Throw it away if anyone asks us.
674 */
675 if (likely(!(ioflags & IO_INVIS) &&
676 !mnt_want_write(file->f_path.mnt))) {
677 xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 671 xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
678 mnt_drop_write(file->f_path.mnt);
679 }
680 672
681 /* 673 /*
682 * If the offset is beyond the size of the file, we have a couple 674 * If the offset is beyond the size of the file, we have a couple
@@ -715,7 +707,6 @@ start:
715 } 707 }
716 } 708 }
717 709
718retry:
719 /* We can write back this queue in page reclaim */ 710 /* We can write back this queue in page reclaim */
720 current->backing_dev_info = mapping->backing_dev_info; 711 current->backing_dev_info = mapping->backing_dev_info;
721 712
@@ -771,6 +762,17 @@ retry:
771 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) 762 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
772 ret = wait_on_sync_kiocb(iocb); 763 ret = wait_on_sync_kiocb(iocb);
773 764
765 isize = i_size_read(inode);
766 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
767 *offset = isize;
768
769 if (*offset > xip->i_size) {
770 xfs_ilock(xip, XFS_ILOCK_EXCL);
771 if (*offset > xip->i_size)
772 xip->i_size = *offset;
773 xfs_iunlock(xip, XFS_ILOCK_EXCL);
774 }
775
774 if (ret == -ENOSPC && 776 if (ret == -ENOSPC &&
775 DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { 777 DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
776 xfs_iunlock(xip, iolock); 778 xfs_iunlock(xip, iolock);
@@ -784,20 +786,7 @@ retry:
784 xfs_ilock(xip, iolock); 786 xfs_ilock(xip, iolock);
785 if (error) 787 if (error)
786 goto out_unlock_internal; 788 goto out_unlock_internal;
787 pos = xip->i_size; 789 goto start;
788 ret = 0;
789 goto retry;
790 }
791
792 isize = i_size_read(inode);
793 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
794 *offset = isize;
795
796 if (*offset > xip->i_size) {
797 xfs_ilock(xip, XFS_ILOCK_EXCL);
798 if (*offset > xip->i_size)
799 xip->i_size = *offset;
800 xfs_iunlock(xip, XFS_ILOCK_EXCL);
801 } 790 }
802 791
803 error = -ret; 792 error = -ret;
@@ -855,13 +844,7 @@ retry:
855int 844int
856xfs_bdstrat_cb(struct xfs_buf *bp) 845xfs_bdstrat_cb(struct xfs_buf *bp)
857{ 846{
858 xfs_mount_t *mp; 847 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
859
860 mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
861 if (!XFS_FORCED_SHUTDOWN(mp)) {
862 xfs_buf_iorequest(bp);
863 return 0;
864 } else {
865 xfs_buftrace("XFS__BDSTRAT IOERROR", bp); 848 xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
866 /* 849 /*
867 * Metadata write that didn't get logged but 850 * Metadata write that didn't get logged but
@@ -874,6 +857,9 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
874 else 857 else
875 return (xfs_bioerror(bp)); 858 return (xfs_bioerror(bp));
876 } 859 }
860
861 xfs_buf_iorequest(bp);
862 return 0;
877} 863}
878 864
879/* 865/*
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 3d5b67c075c..c3526d445f6 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -53,11 +53,15 @@ xfs_read_xfsstats(
53 { "icluster", XFSSTAT_END_INODE_CLUSTER }, 53 { "icluster", XFSSTAT_END_INODE_CLUSTER },
54 { "vnodes", XFSSTAT_END_VNODE_OPS }, 54 { "vnodes", XFSSTAT_END_VNODE_OPS },
55 { "buf", XFSSTAT_END_BUF }, 55 { "buf", XFSSTAT_END_BUF },
56 { "abtb2", XFSSTAT_END_ABTB_V2 },
57 { "abtc2", XFSSTAT_END_ABTC_V2 },
58 { "bmbt2", XFSSTAT_END_BMBT_V2 },
59 { "ibt2", XFSSTAT_END_IBT_V2 },
56 }; 60 };
57 61
58 /* Loop over all stats groups */ 62 /* Loop over all stats groups */
59 for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { 63 for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) {
60 len += sprintf(buffer + len, xstats[i].desc); 64 len += sprintf(buffer + len, "%s", xstats[i].desc);
61 /* inner loop does each group */ 65 /* inner loop does each group */
62 while (j < xstats[i].endpoint) { 66 while (j < xstats[i].endpoint) {
63 val = 0; 67 val = 0;
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index e83820febc9..736854b1ca1 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -118,6 +118,71 @@ struct xfsstats {
118 __uint32_t xb_page_retries; 118 __uint32_t xb_page_retries;
119 __uint32_t xb_page_found; 119 __uint32_t xb_page_found;
120 __uint32_t xb_get_read; 120 __uint32_t xb_get_read;
121/* Version 2 btree counters */
122#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15)
123 __uint32_t xs_abtb_2_lookup;
124 __uint32_t xs_abtb_2_compare;
125 __uint32_t xs_abtb_2_insrec;
126 __uint32_t xs_abtb_2_delrec;
127 __uint32_t xs_abtb_2_newroot;
128 __uint32_t xs_abtb_2_killroot;
129 __uint32_t xs_abtb_2_increment;
130 __uint32_t xs_abtb_2_decrement;
131 __uint32_t xs_abtb_2_lshift;
132 __uint32_t xs_abtb_2_rshift;
133 __uint32_t xs_abtb_2_split;
134 __uint32_t xs_abtb_2_join;
135 __uint32_t xs_abtb_2_alloc;
136 __uint32_t xs_abtb_2_free;
137 __uint32_t xs_abtb_2_moves;
138#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15)
139 __uint32_t xs_abtc_2_lookup;
140 __uint32_t xs_abtc_2_compare;
141 __uint32_t xs_abtc_2_insrec;
142 __uint32_t xs_abtc_2_delrec;
143 __uint32_t xs_abtc_2_newroot;
144 __uint32_t xs_abtc_2_killroot;
145 __uint32_t xs_abtc_2_increment;
146 __uint32_t xs_abtc_2_decrement;
147 __uint32_t xs_abtc_2_lshift;
148 __uint32_t xs_abtc_2_rshift;
149 __uint32_t xs_abtc_2_split;
150 __uint32_t xs_abtc_2_join;
151 __uint32_t xs_abtc_2_alloc;
152 __uint32_t xs_abtc_2_free;
153 __uint32_t xs_abtc_2_moves;
154#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15)
155 __uint32_t xs_bmbt_2_lookup;
156 __uint32_t xs_bmbt_2_compare;
157 __uint32_t xs_bmbt_2_insrec;
158 __uint32_t xs_bmbt_2_delrec;
159 __uint32_t xs_bmbt_2_newroot;
160 __uint32_t xs_bmbt_2_killroot;
161 __uint32_t xs_bmbt_2_increment;
162 __uint32_t xs_bmbt_2_decrement;
163 __uint32_t xs_bmbt_2_lshift;
164 __uint32_t xs_bmbt_2_rshift;
165 __uint32_t xs_bmbt_2_split;
166 __uint32_t xs_bmbt_2_join;
167 __uint32_t xs_bmbt_2_alloc;
168 __uint32_t xs_bmbt_2_free;
169 __uint32_t xs_bmbt_2_moves;
170#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15)
171 __uint32_t xs_ibt_2_lookup;
172 __uint32_t xs_ibt_2_compare;
173 __uint32_t xs_ibt_2_insrec;
174 __uint32_t xs_ibt_2_delrec;
175 __uint32_t xs_ibt_2_newroot;
176 __uint32_t xs_ibt_2_killroot;
177 __uint32_t xs_ibt_2_increment;
178 __uint32_t xs_ibt_2_decrement;
179 __uint32_t xs_ibt_2_lshift;
180 __uint32_t xs_ibt_2_rshift;
181 __uint32_t xs_ibt_2_split;
182 __uint32_t xs_ibt_2_join;
183 __uint32_t xs_ibt_2_alloc;
184 __uint32_t xs_ibt_2_free;
185 __uint32_t xs_ibt_2_moves;
121/* Extra precision counters */ 186/* Extra precision counters */
122 __uint64_t xs_xstrat_bytes; 187 __uint64_t xs_xstrat_bytes;
123 __uint64_t xs_write_bytes; 188 __uint64_t xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 37ebe36056e..36f6cc703ef 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -18,7 +18,6 @@
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_bit.h" 19#include "xfs_bit.h"
20#include "xfs_log.h" 20#include "xfs_log.h"
21#include "xfs_clnt.h"
22#include "xfs_inum.h" 21#include "xfs_inum.h"
23#include "xfs_trans.h" 22#include "xfs_trans.h"
24#include "xfs_sb.h" 23#include "xfs_sb.h"
@@ -36,6 +35,7 @@
36#include "xfs_dinode.h" 35#include "xfs_dinode.h"
37#include "xfs_inode.h" 36#include "xfs_inode.h"
38#include "xfs_btree.h" 37#include "xfs_btree.h"
38#include "xfs_btree_trace.h"
39#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
40#include "xfs_bmap.h" 40#include "xfs_bmap.h"
41#include "xfs_rtalloc.h" 41#include "xfs_rtalloc.h"
@@ -48,7 +48,6 @@
48#include "xfs_buf_item.h" 48#include "xfs_buf_item.h"
49#include "xfs_utils.h" 49#include "xfs_utils.h"
50#include "xfs_vnodeops.h" 50#include "xfs_vnodeops.h"
51#include "xfs_vfsops.h"
52#include "xfs_version.h" 51#include "xfs_version.h"
53#include "xfs_log_priv.h" 52#include "xfs_log_priv.h"
54#include "xfs_trans_priv.h" 53#include "xfs_trans_priv.h"
@@ -58,6 +57,7 @@
58#include "xfs_extfree_item.h" 57#include "xfs_extfree_item.h"
59#include "xfs_mru_cache.h" 58#include "xfs_mru_cache.h"
60#include "xfs_inode_item.h" 59#include "xfs_inode_item.h"
60#include "xfs_sync.h"
61 61
62#include <linux/namei.h> 62#include <linux/namei.h>
63#include <linux/init.h> 63#include <linux/init.h>
@@ -70,36 +70,9 @@
70 70
71static struct quotactl_ops xfs_quotactl_operations; 71static struct quotactl_ops xfs_quotactl_operations;
72static struct super_operations xfs_super_operations; 72static struct super_operations xfs_super_operations;
73static kmem_zone_t *xfs_vnode_zone;
74static kmem_zone_t *xfs_ioend_zone; 73static kmem_zone_t *xfs_ioend_zone;
75mempool_t *xfs_ioend_pool; 74mempool_t *xfs_ioend_pool;
76 75
77STATIC struct xfs_mount_args *
78xfs_args_allocate(
79 struct super_block *sb,
80 int silent)
81{
82 struct xfs_mount_args *args;
83
84 args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
85 if (!args)
86 return NULL;
87
88 args->logbufs = args->logbufsize = -1;
89 strncpy(args->fsname, sb->s_id, MAXNAMELEN);
90
91 /* Copy the already-parsed mount(2) flags we're interested in */
92 if (sb->s_flags & MS_DIRSYNC)
93 args->flags |= XFSMNT_DIRSYNC;
94 if (sb->s_flags & MS_SYNCHRONOUS)
95 args->flags |= XFSMNT_WSYNC;
96 if (silent)
97 args->flags |= XFSMNT_QUIET;
98 args->flags |= XFSMNT_32BITINODES;
99
100 return args;
101}
102
103#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ 76#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
104#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ 77#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
105#define MNTOPT_LOGDEV "logdev" /* log device */ 78#define MNTOPT_LOGDEV "logdev" /* log device */
@@ -188,26 +161,54 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
188 return simple_strtoul((const char *)s, endp, base) << shift_left_factor; 161 return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
189} 162}
190 163
164/*
165 * This function fills in xfs_mount_t fields based on mount args.
166 * Note: the superblock has _not_ yet been read in.
167 *
168 * Note that this function leaks the various device name allocations on
169 * failure. The caller takes care of them.
170 */
191STATIC int 171STATIC int
192xfs_parseargs( 172xfs_parseargs(
193 struct xfs_mount *mp, 173 struct xfs_mount *mp,
194 char *options, 174 char *options,
195 struct xfs_mount_args *args, 175 char **mtpt)
196 int update)
197{ 176{
177 struct super_block *sb = mp->m_super;
198 char *this_char, *value, *eov; 178 char *this_char, *value, *eov;
199 int dsunit, dswidth, vol_dsunit, vol_dswidth; 179 int dsunit = 0;
200 int iosize; 180 int dswidth = 0;
181 int iosize = 0;
201 int dmapi_implies_ikeep = 1; 182 int dmapi_implies_ikeep = 1;
183 uchar_t iosizelog = 0;
184
185 /*
186 * Copy binary VFS mount flags we are interested in.
187 */
188 if (sb->s_flags & MS_RDONLY)
189 mp->m_flags |= XFS_MOUNT_RDONLY;
190 if (sb->s_flags & MS_DIRSYNC)
191 mp->m_flags |= XFS_MOUNT_DIRSYNC;
192 if (sb->s_flags & MS_SYNCHRONOUS)
193 mp->m_flags |= XFS_MOUNT_WSYNC;
194
195 /*
196 * Set some default flags that could be cleared by the mount option
197 * parsing.
198 */
199 mp->m_flags |= XFS_MOUNT_BARRIER;
200 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
201 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
202 202
203 args->flags |= XFSMNT_BARRIER; 203 /*
204 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 204 * These can be overridden by the mount option parsing.
205 */
206 mp->m_logbufs = -1;
207 mp->m_logbsize = -1;
205 208
206 if (!options) 209 if (!options)
207 goto done; 210 goto done;
208 211
209 iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
210
211 while ((this_char = strsep(&options, ",")) != NULL) { 212 while ((this_char = strsep(&options, ",")) != NULL) {
212 if (!*this_char) 213 if (!*this_char)
213 continue; 214 continue;
@@ -221,7 +222,7 @@ xfs_parseargs(
221 this_char); 222 this_char);
222 return EINVAL; 223 return EINVAL;
223 } 224 }
224 args->logbufs = simple_strtoul(value, &eov, 10); 225 mp->m_logbufs = simple_strtoul(value, &eov, 10);
225 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 226 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
226 if (!value || !*value) { 227 if (!value || !*value) {
227 cmn_err(CE_WARN, 228 cmn_err(CE_WARN,
@@ -229,7 +230,7 @@ xfs_parseargs(
229 this_char); 230 this_char);
230 return EINVAL; 231 return EINVAL;
231 } 232 }
232 args->logbufsize = suffix_strtoul(value, &eov, 10); 233 mp->m_logbsize = suffix_strtoul(value, &eov, 10);
233 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 234 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
234 if (!value || !*value) { 235 if (!value || !*value) {
235 cmn_err(CE_WARN, 236 cmn_err(CE_WARN,
@@ -237,7 +238,9 @@ xfs_parseargs(
237 this_char); 238 this_char);
238 return EINVAL; 239 return EINVAL;
239 } 240 }
240 strncpy(args->logname, value, MAXNAMELEN); 241 mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
242 if (!mp->m_logname)
243 return ENOMEM;
241 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 244 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
242 if (!value || !*value) { 245 if (!value || !*value) {
243 cmn_err(CE_WARN, 246 cmn_err(CE_WARN,
@@ -245,7 +248,9 @@ xfs_parseargs(
245 this_char); 248 this_char);
246 return EINVAL; 249 return EINVAL;
247 } 250 }
248 strncpy(args->mtpt, value, MAXNAMELEN); 251 *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
252 if (!*mtpt)
253 return ENOMEM;
249 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 254 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
250 if (!value || !*value) { 255 if (!value || !*value) {
251 cmn_err(CE_WARN, 256 cmn_err(CE_WARN,
@@ -253,7 +258,9 @@ xfs_parseargs(
253 this_char); 258 this_char);
254 return EINVAL; 259 return EINVAL;
255 } 260 }
256 strncpy(args->rtname, value, MAXNAMELEN); 261 mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
262 if (!mp->m_rtname)
263 return ENOMEM;
257 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { 264 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
258 if (!value || !*value) { 265 if (!value || !*value) {
259 cmn_err(CE_WARN, 266 cmn_err(CE_WARN,
@@ -262,8 +269,7 @@ xfs_parseargs(
262 return EINVAL; 269 return EINVAL;
263 } 270 }
264 iosize = simple_strtoul(value, &eov, 10); 271 iosize = simple_strtoul(value, &eov, 10);
265 args->flags |= XFSMNT_IOSIZE; 272 iosizelog = ffs(iosize) - 1;
266 args->iosizelog = (uint8_t) iosize;
267 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 273 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
268 if (!value || !*value) { 274 if (!value || !*value) {
269 cmn_err(CE_WARN, 275 cmn_err(CE_WARN,
@@ -272,8 +278,7 @@ xfs_parseargs(
272 return EINVAL; 278 return EINVAL;
273 } 279 }
274 iosize = suffix_strtoul(value, &eov, 10); 280 iosize = suffix_strtoul(value, &eov, 10);
275 args->flags |= XFSMNT_IOSIZE; 281 iosizelog = ffs(iosize) - 1;
276 args->iosizelog = ffs(iosize) - 1;
277 } else if (!strcmp(this_char, MNTOPT_GRPID) || 282 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
278 !strcmp(this_char, MNTOPT_BSDGROUPS)) { 283 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
279 mp->m_flags |= XFS_MOUNT_GRPID; 284 mp->m_flags |= XFS_MOUNT_GRPID;
@@ -281,23 +286,25 @@ xfs_parseargs(
281 !strcmp(this_char, MNTOPT_SYSVGROUPS)) { 286 !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
282 mp->m_flags &= ~XFS_MOUNT_GRPID; 287 mp->m_flags &= ~XFS_MOUNT_GRPID;
283 } else if (!strcmp(this_char, MNTOPT_WSYNC)) { 288 } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
284 args->flags |= XFSMNT_WSYNC; 289 mp->m_flags |= XFS_MOUNT_WSYNC;
285 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { 290 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
286 args->flags |= XFSMNT_OSYNCISOSYNC; 291 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
287 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { 292 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
288 args->flags |= XFSMNT_NORECOVERY; 293 mp->m_flags |= XFS_MOUNT_NORECOVERY;
289 } else if (!strcmp(this_char, MNTOPT_INO64)) { 294 } else if (!strcmp(this_char, MNTOPT_INO64)) {
290 args->flags |= XFSMNT_INO64; 295#if XFS_BIG_INUMS
291#if !XFS_BIG_INUMS 296 mp->m_flags |= XFS_MOUNT_INO64;
297 mp->m_inoadd = XFS_INO64_OFFSET;
298#else
292 cmn_err(CE_WARN, 299 cmn_err(CE_WARN,
293 "XFS: %s option not allowed on this system", 300 "XFS: %s option not allowed on this system",
294 this_char); 301 this_char);
295 return EINVAL; 302 return EINVAL;
296#endif 303#endif
297 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { 304 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
298 args->flags |= XFSMNT_NOALIGN; 305 mp->m_flags |= XFS_MOUNT_NOALIGN;
299 } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { 306 } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
300 args->flags |= XFSMNT_SWALLOC; 307 mp->m_flags |= XFS_MOUNT_SWALLOC;
301 } else if (!strcmp(this_char, MNTOPT_SUNIT)) { 308 } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
302 if (!value || !*value) { 309 if (!value || !*value) {
303 cmn_err(CE_WARN, 310 cmn_err(CE_WARN,
@@ -315,7 +322,7 @@ xfs_parseargs(
315 } 322 }
316 dswidth = simple_strtoul(value, &eov, 10); 323 dswidth = simple_strtoul(value, &eov, 10);
317 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 324 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
318 args->flags &= ~XFSMNT_32BITINODES; 325 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
319#if !XFS_BIG_INUMS 326#if !XFS_BIG_INUMS
320 cmn_err(CE_WARN, 327 cmn_err(CE_WARN,
321 "XFS: %s option not allowed on this system", 328 "XFS: %s option not allowed on this system",
@@ -323,56 +330,61 @@ xfs_parseargs(
323 return EINVAL; 330 return EINVAL;
324#endif 331#endif
325 } else if (!strcmp(this_char, MNTOPT_NOUUID)) { 332 } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
326 args->flags |= XFSMNT_NOUUID; 333 mp->m_flags |= XFS_MOUNT_NOUUID;
327 } else if (!strcmp(this_char, MNTOPT_BARRIER)) { 334 } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
328 args->flags |= XFSMNT_BARRIER; 335 mp->m_flags |= XFS_MOUNT_BARRIER;
329 } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { 336 } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
330 args->flags &= ~XFSMNT_BARRIER; 337 mp->m_flags &= ~XFS_MOUNT_BARRIER;
331 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 338 } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
332 args->flags |= XFSMNT_IKEEP; 339 mp->m_flags |= XFS_MOUNT_IKEEP;
333 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { 340 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
334 dmapi_implies_ikeep = 0; 341 dmapi_implies_ikeep = 0;
335 args->flags &= ~XFSMNT_IKEEP; 342 mp->m_flags &= ~XFS_MOUNT_IKEEP;
336 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { 343 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
337 args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; 344 mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
338 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { 345 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
339 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 346 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
340 } else if (!strcmp(this_char, MNTOPT_ATTR2)) { 347 } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
341 args->flags |= XFSMNT_ATTR2; 348 mp->m_flags |= XFS_MOUNT_ATTR2;
342 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 349 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
343 args->flags &= ~XFSMNT_ATTR2; 350 mp->m_flags &= ~XFS_MOUNT_ATTR2;
344 args->flags |= XFSMNT_NOATTR2; 351 mp->m_flags |= XFS_MOUNT_NOATTR2;
345 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 352 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
346 args->flags2 |= XFSMNT2_FILESTREAMS; 353 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
347 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { 354 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
348 args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA); 355 mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
349 args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA); 356 XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
357 XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
358 XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
350 } else if (!strcmp(this_char, MNTOPT_QUOTA) || 359 } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
351 !strcmp(this_char, MNTOPT_UQUOTA) || 360 !strcmp(this_char, MNTOPT_UQUOTA) ||
352 !strcmp(this_char, MNTOPT_USRQUOTA)) { 361 !strcmp(this_char, MNTOPT_USRQUOTA)) {
353 args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF; 362 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
363 XFS_UQUOTA_ENFD);
354 } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || 364 } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
355 !strcmp(this_char, MNTOPT_UQUOTANOENF)) { 365 !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
356 args->flags |= XFSMNT_UQUOTA; 366 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
357 args->flags &= ~XFSMNT_UQUOTAENF; 367 mp->m_qflags &= ~XFS_UQUOTA_ENFD;
358 } else if (!strcmp(this_char, MNTOPT_PQUOTA) || 368 } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
359 !strcmp(this_char, MNTOPT_PRJQUOTA)) { 369 !strcmp(this_char, MNTOPT_PRJQUOTA)) {
360 args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; 370 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
371 XFS_OQUOTA_ENFD);
361 } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { 372 } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
362 args->flags |= XFSMNT_PQUOTA; 373 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
363 args->flags &= ~XFSMNT_PQUOTAENF; 374 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
364 } else if (!strcmp(this_char, MNTOPT_GQUOTA) || 375 } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
365 !strcmp(this_char, MNTOPT_GRPQUOTA)) { 376 !strcmp(this_char, MNTOPT_GRPQUOTA)) {
366 args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; 377 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
378 XFS_OQUOTA_ENFD);
367 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { 379 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
368 args->flags |= XFSMNT_GQUOTA; 380 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
369 args->flags &= ~XFSMNT_GQUOTAENF; 381 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
370 } else if (!strcmp(this_char, MNTOPT_DMAPI)) { 382 } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
371 args->flags |= XFSMNT_DMAPI; 383 mp->m_flags |= XFS_MOUNT_DMAPI;
372 } else if (!strcmp(this_char, MNTOPT_XDSM)) { 384 } else if (!strcmp(this_char, MNTOPT_XDSM)) {
373 args->flags |= XFSMNT_DMAPI; 385 mp->m_flags |= XFS_MOUNT_DMAPI;
374 } else if (!strcmp(this_char, MNTOPT_DMI)) { 386 } else if (!strcmp(this_char, MNTOPT_DMI)) {
375 args->flags |= XFSMNT_DMAPI; 387 mp->m_flags |= XFS_MOUNT_DMAPI;
376 } else if (!strcmp(this_char, "ihashsize")) { 388 } else if (!strcmp(this_char, "ihashsize")) {
377 cmn_err(CE_WARN, 389 cmn_err(CE_WARN,
378 "XFS: ihashsize no longer used, option is deprecated."); 390 "XFS: ihashsize no longer used, option is deprecated.");
@@ -390,27 +402,29 @@ xfs_parseargs(
390 } 402 }
391 } 403 }
392 404
393 if (args->flags & XFSMNT_NORECOVERY) { 405 /*
394 if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) { 406 * no recovery flag requires a read-only mount
395 cmn_err(CE_WARN, 407 */
396 "XFS: no-recovery mounts must be read-only."); 408 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
397 return EINVAL; 409 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
398 } 410 cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only.");
411 return EINVAL;
399 } 412 }
400 413
401 if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { 414 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
402 cmn_err(CE_WARN, 415 cmn_err(CE_WARN,
403 "XFS: sunit and swidth options incompatible with the noalign option"); 416 "XFS: sunit and swidth options incompatible with the noalign option");
404 return EINVAL; 417 return EINVAL;
405 } 418 }
406 419
407 if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { 420 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
421 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
408 cmn_err(CE_WARN, 422 cmn_err(CE_WARN,
409 "XFS: cannot mount with both project and group quota"); 423 "XFS: cannot mount with both project and group quota");
410 return EINVAL; 424 return EINVAL;
411 } 425 }
412 426
413 if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') { 427 if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
414 printk("XFS: %s option needs the mount point option as well\n", 428 printk("XFS: %s option needs the mount point option as well\n",
415 MNTOPT_DMAPI); 429 MNTOPT_DMAPI);
416 return EINVAL; 430 return EINVAL;
@@ -438,27 +452,66 @@ xfs_parseargs(
438 * Note that if "ikeep" or "noikeep" mount options are 452 * Note that if "ikeep" or "noikeep" mount options are
439 * supplied, then they are honored. 453 * supplied, then they are honored.
440 */ 454 */
441 if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep) 455 if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
442 args->flags |= XFSMNT_IKEEP; 456 mp->m_flags |= XFS_MOUNT_IKEEP;
443 457
444 if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { 458done:
459 if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
460 /*
461 * At this point the superblock has not been read
462 * in, therefore we do not know the block size.
463 * Before the mount call ends we will convert
464 * these to FSBs.
465 */
445 if (dsunit) { 466 if (dsunit) {
446 args->sunit = dsunit; 467 mp->m_dalign = dsunit;
447 args->flags |= XFSMNT_RETERR; 468 mp->m_flags |= XFS_MOUNT_RETERR;
448 } else {
449 args->sunit = vol_dsunit;
450 } 469 }
451 dswidth ? (args->swidth = dswidth) : 470
452 (args->swidth = vol_dswidth); 471 if (dswidth)
453 } else { 472 mp->m_swidth = dswidth;
454 args->sunit = args->swidth = 0; 473 }
474
475 if (mp->m_logbufs != -1 &&
476 mp->m_logbufs != 0 &&
477 (mp->m_logbufs < XLOG_MIN_ICLOGS ||
478 mp->m_logbufs > XLOG_MAX_ICLOGS)) {
479 cmn_err(CE_WARN,
480 "XFS: invalid logbufs value: %d [not %d-%d]",
481 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
482 return XFS_ERROR(EINVAL);
483 }
484 if (mp->m_logbsize != -1 &&
485 mp->m_logbsize != 0 &&
486 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
487 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
488 !is_power_of_2(mp->m_logbsize))) {
489 cmn_err(CE_WARN,
490 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
491 mp->m_logbsize);
492 return XFS_ERROR(EINVAL);
493 }
494
495 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
496 if (!mp->m_fsname)
497 return ENOMEM;
498 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
499
500 if (iosizelog) {
501 if (iosizelog > XFS_MAX_IO_LOG ||
502 iosizelog < XFS_MIN_IO_LOG) {
503 cmn_err(CE_WARN,
504 "XFS: invalid log iosize: %d [not %d-%d]",
505 iosizelog, XFS_MIN_IO_LOG,
506 XFS_MAX_IO_LOG);
507 return XFS_ERROR(EINVAL);
508 }
509
510 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
511 mp->m_readio_log = iosizelog;
512 mp->m_writeio_log = iosizelog;
455 } 513 }
456 514
457done:
458 if (args->flags & XFSMNT_32BITINODES)
459 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
460 if (args->flags2)
461 args->flags |= XFSMNT_FLAGS2;
462 return 0; 515 return 0;
463} 516}
464 517
@@ -704,8 +757,7 @@ xfs_close_devices(
704 */ 757 */
705STATIC int 758STATIC int
706xfs_open_devices( 759xfs_open_devices(
707 struct xfs_mount *mp, 760 struct xfs_mount *mp)
708 struct xfs_mount_args *args)
709{ 761{
710 struct block_device *ddev = mp->m_super->s_bdev; 762 struct block_device *ddev = mp->m_super->s_bdev;
711 struct block_device *logdev = NULL, *rtdev = NULL; 763 struct block_device *logdev = NULL, *rtdev = NULL;
@@ -714,14 +766,14 @@ xfs_open_devices(
714 /* 766 /*
715 * Open real time and log devices - order is important. 767 * Open real time and log devices - order is important.
716 */ 768 */
717 if (args->logname[0]) { 769 if (mp->m_logname) {
718 error = xfs_blkdev_get(mp, args->logname, &logdev); 770 error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
719 if (error) 771 if (error)
720 goto out; 772 goto out;
721 } 773 }
722 774
723 if (args->rtname[0]) { 775 if (mp->m_rtname) {
724 error = xfs_blkdev_get(mp, args->rtname, &rtdev); 776 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
725 if (error) 777 if (error)
726 goto out_close_logdev; 778 goto out_close_logdev;
727 779
@@ -813,18 +865,18 @@ xfs_setup_devices(
813 */ 865 */
814void 866void
815xfsaild_wakeup( 867xfsaild_wakeup(
816 xfs_mount_t *mp, 868 struct xfs_ail *ailp,
817 xfs_lsn_t threshold_lsn) 869 xfs_lsn_t threshold_lsn)
818{ 870{
819 mp->m_ail.xa_target = threshold_lsn; 871 ailp->xa_target = threshold_lsn;
820 wake_up_process(mp->m_ail.xa_task); 872 wake_up_process(ailp->xa_task);
821} 873}
822 874
823int 875int
824xfsaild( 876xfsaild(
825 void *data) 877 void *data)
826{ 878{
827 xfs_mount_t *mp = (xfs_mount_t *)data; 879 struct xfs_ail *ailp = data;
828 xfs_lsn_t last_pushed_lsn = 0; 880 xfs_lsn_t last_pushed_lsn = 0;
829 long tout = 0; 881 long tout = 0;
830 882
@@ -836,11 +888,11 @@ xfsaild(
836 /* swsusp */ 888 /* swsusp */
837 try_to_freeze(); 889 try_to_freeze();
838 890
839 ASSERT(mp->m_log); 891 ASSERT(ailp->xa_mount->m_log);
840 if (XFS_FORCED_SHUTDOWN(mp)) 892 if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
841 continue; 893 continue;
842 894
843 tout = xfsaild_push(mp, &last_pushed_lsn); 895 tout = xfsaild_push(ailp, &last_pushed_lsn);
844 } 896 }
845 897
846 return 0; 898 return 0;
@@ -848,43 +900,82 @@ xfsaild(
848 900
849int 901int
850xfsaild_start( 902xfsaild_start(
851 xfs_mount_t *mp) 903 struct xfs_ail *ailp)
852{ 904{
853 mp->m_ail.xa_target = 0; 905 ailp->xa_target = 0;
854 mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild"); 906 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild");
855 if (IS_ERR(mp->m_ail.xa_task)) 907 if (IS_ERR(ailp->xa_task))
856 return -PTR_ERR(mp->m_ail.xa_task); 908 return -PTR_ERR(ailp->xa_task);
857 return 0; 909 return 0;
858} 910}
859 911
860void 912void
861xfsaild_stop( 913xfsaild_stop(
862 xfs_mount_t *mp) 914 struct xfs_ail *ailp)
863{ 915{
864 kthread_stop(mp->m_ail.xa_task); 916 kthread_stop(ailp->xa_task);
865} 917}
866 918
867 919
868 920/* Catch misguided souls that try to use this interface on XFS */
869STATIC struct inode * 921STATIC struct inode *
870xfs_fs_alloc_inode( 922xfs_fs_alloc_inode(
871 struct super_block *sb) 923 struct super_block *sb)
872{ 924{
873 return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); 925 BUG();
926 return NULL;
874} 927}
875 928
929/*
930 * Now that the generic code is guaranteed not to be accessing
931 * the linux inode, we can reclaim the inode.
932 */
876STATIC void 933STATIC void
877xfs_fs_destroy_inode( 934xfs_fs_destroy_inode(
878 struct inode *inode) 935 struct inode *inode)
879{ 936{
880 kmem_zone_free(xfs_vnode_zone, inode); 937 xfs_inode_t *ip = XFS_I(inode);
938
939 XFS_STATS_INC(vn_reclaim);
940 if (xfs_reclaim(ip))
941 panic("%s: cannot reclaim 0x%p\n", __func__, inode);
881} 942}
882 943
944/*
945 * Slab object creation initialisation for the XFS inode.
946 * This covers only the idempotent fields in the XFS inode;
947 * all other fields need to be initialised on allocation
948 * from the slab. This avoids the need to repeatedly intialise
949 * fields in the xfs inode that left in the initialise state
950 * when freeing the inode.
951 */
883STATIC void 952STATIC void
884xfs_fs_inode_init_once( 953xfs_fs_inode_init_once(
885 void *vnode) 954 void *inode)
886{ 955{
887 inode_init_once((struct inode *)vnode); 956 struct xfs_inode *ip = inode;
957
958 memset(ip, 0, sizeof(struct xfs_inode));
959
960 /* vfs inode */
961 inode_init_once(VFS_I(ip));
962
963 /* xfs inode */
964 atomic_set(&ip->i_iocount, 0);
965 atomic_set(&ip->i_pincount, 0);
966 spin_lock_init(&ip->i_flags_lock);
967 init_waitqueue_head(&ip->i_ipin_wait);
968 /*
969 * Because we want to use a counting completion, complete
970 * the flush completion once to allow a single access to
971 * the flush completion without blocking.
972 */
973 init_completion(&ip->i_flush);
974 complete(&ip->i_flush);
975
976 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
977 "xfsino", ip->i_ino);
978 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
888} 979}
889 980
890/* 981/*
@@ -898,21 +989,26 @@ xfs_fs_write_inode(
898 struct inode *inode, 989 struct inode *inode,
899 int sync) 990 int sync)
900{ 991{
992 struct xfs_inode *ip = XFS_I(inode);
901 int error = 0; 993 int error = 0;
902 int flags = 0; 994 int flags = 0;
903 995
904 xfs_itrace_entry(XFS_I(inode)); 996 xfs_itrace_entry(ip);
905 if (sync) { 997 if (sync) {
906 filemap_fdatawait(inode->i_mapping); 998 error = xfs_wait_on_pages(ip, 0, -1);
999 if (error)
1000 goto out_error;
907 flags |= FLUSH_SYNC; 1001 flags |= FLUSH_SYNC;
908 } 1002 }
909 error = xfs_inode_flush(XFS_I(inode), flags); 1003 error = xfs_inode_flush(ip, flags);
1004
1005out_error:
910 /* 1006 /*
911 * if we failed to write out the inode then mark 1007 * if we failed to write out the inode then mark
912 * it dirty again so we'll try again later. 1008 * it dirty again so we'll try again later.
913 */ 1009 */
914 if (error) 1010 if (error)
915 mark_inode_dirty_sync(inode); 1011 xfs_mark_inode_dirty_sync(ip);
916 1012
917 return -error; 1013 return -error;
918} 1014}
@@ -923,164 +1019,12 @@ xfs_fs_clear_inode(
923{ 1019{
924 xfs_inode_t *ip = XFS_I(inode); 1020 xfs_inode_t *ip = XFS_I(inode);
925 1021
926 /* 1022 xfs_itrace_entry(ip);
927 * ip can be null when xfs_iget_core calls xfs_idestroy if we 1023 XFS_STATS_INC(vn_rele);
928 * find an inode with di_mode == 0 but without IGET_CREATE set. 1024 XFS_STATS_INC(vn_remove);
929 */ 1025 XFS_STATS_DEC(vn_active);
930 if (ip) {
931 xfs_itrace_entry(ip);
932 XFS_STATS_INC(vn_rele);
933 XFS_STATS_INC(vn_remove);
934 XFS_STATS_INC(vn_reclaim);
935 XFS_STATS_DEC(vn_active);
936
937 xfs_inactive(ip);
938 xfs_iflags_clear(ip, XFS_IMODIFIED);
939 if (xfs_reclaim(ip))
940 panic("%s: cannot reclaim 0x%p\n", __func__, inode);
941 }
942
943 ASSERT(XFS_I(inode) == NULL);
944}
945 1026
946/* 1027 xfs_inactive(ip);
947 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
948 * Doing this has two advantages:
949 * - It saves on stack space, which is tight in certain situations
950 * - It can be used (with care) as a mechanism to avoid deadlocks.
951 * Flushing while allocating in a full filesystem requires both.
952 */
953STATIC void
954xfs_syncd_queue_work(
955 struct xfs_mount *mp,
956 void *data,
957 void (*syncer)(struct xfs_mount *, void *))
958{
959 struct bhv_vfs_sync_work *work;
960
961 work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
962 INIT_LIST_HEAD(&work->w_list);
963 work->w_syncer = syncer;
964 work->w_data = data;
965 work->w_mount = mp;
966 spin_lock(&mp->m_sync_lock);
967 list_add_tail(&work->w_list, &mp->m_sync_list);
968 spin_unlock(&mp->m_sync_lock);
969 wake_up_process(mp->m_sync_task);
970}
971
972/*
973 * Flush delayed allocate data, attempting to free up reserved space
974 * from existing allocations. At this point a new allocation attempt
975 * has failed with ENOSPC and we are in the process of scratching our
976 * heads, looking about for more room...
977 */
978STATIC void
979xfs_flush_inode_work(
980 struct xfs_mount *mp,
981 void *arg)
982{
983 struct inode *inode = arg;
984 filemap_flush(inode->i_mapping);
985 iput(inode);
986}
987
988void
989xfs_flush_inode(
990 xfs_inode_t *ip)
991{
992 struct inode *inode = VFS_I(ip);
993
994 igrab(inode);
995 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
996 delay(msecs_to_jiffies(500));
997}
998
999/*
1000 * This is the "bigger hammer" version of xfs_flush_inode_work...
1001 * (IOW, "If at first you don't succeed, use a Bigger Hammer").
1002 */
1003STATIC void
1004xfs_flush_device_work(
1005 struct xfs_mount *mp,
1006 void *arg)
1007{
1008 struct inode *inode = arg;
1009 sync_blockdev(mp->m_super->s_bdev);
1010 iput(inode);
1011}
1012
1013void
1014xfs_flush_device(
1015 xfs_inode_t *ip)
1016{
1017 struct inode *inode = VFS_I(ip);
1018
1019 igrab(inode);
1020 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
1021 delay(msecs_to_jiffies(500));
1022 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
1023}
1024
1025STATIC void
1026xfs_sync_worker(
1027 struct xfs_mount *mp,
1028 void *unused)
1029{
1030 int error;
1031
1032 if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1033 error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR);
1034 mp->m_sync_seq++;
1035 wake_up(&mp->m_wait_single_sync_task);
1036}
1037
1038STATIC int
1039xfssyncd(
1040 void *arg)
1041{
1042 struct xfs_mount *mp = arg;
1043 long timeleft;
1044 bhv_vfs_sync_work_t *work, *n;
1045 LIST_HEAD (tmp);
1046
1047 set_freezable();
1048 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
1049 for (;;) {
1050 timeleft = schedule_timeout_interruptible(timeleft);
1051 /* swsusp */
1052 try_to_freeze();
1053 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
1054 break;
1055
1056 spin_lock(&mp->m_sync_lock);
1057 /*
1058 * We can get woken by laptop mode, to do a sync -
1059 * that's the (only!) case where the list would be
1060 * empty with time remaining.
1061 */
1062 if (!timeleft || list_empty(&mp->m_sync_list)) {
1063 if (!timeleft)
1064 timeleft = xfs_syncd_centisecs *
1065 msecs_to_jiffies(10);
1066 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
1067 list_add_tail(&mp->m_sync_work.w_list,
1068 &mp->m_sync_list);
1069 }
1070 list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
1071 list_move(&work->w_list, &tmp);
1072 spin_unlock(&mp->m_sync_lock);
1073
1074 list_for_each_entry_safe(work, n, &tmp, w_list) {
1075 (*work->w_syncer)(mp, work->w_data);
1076 list_del(&work->w_list);
1077 if (work == &mp->m_sync_work)
1078 continue;
1079 kmem_free(work);
1080 }
1081 }
1082
1083 return 0;
1084} 1028}
1085 1029
1086STATIC void 1030STATIC void
@@ -1099,11 +1043,9 @@ xfs_fs_put_super(
1099 struct xfs_mount *mp = XFS_M(sb); 1043 struct xfs_mount *mp = XFS_M(sb);
1100 struct xfs_inode *rip = mp->m_rootip; 1044 struct xfs_inode *rip = mp->m_rootip;
1101 int unmount_event_flags = 0; 1045 int unmount_event_flags = 0;
1102 int error;
1103 1046
1104 kthread_stop(mp->m_sync_task); 1047 xfs_syncd_stop(mp);
1105 1048 xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
1106 xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
1107 1049
1108#ifdef HAVE_DMAPI 1050#ifdef HAVE_DMAPI
1109 if (mp->m_flags & XFS_MOUNT_DMAPI) { 1051 if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1128,18 +1070,6 @@ xfs_fs_put_super(
1128 xfs_filestream_unmount(mp); 1070 xfs_filestream_unmount(mp);
1129 1071
1130 XFS_bflush(mp->m_ddev_targp); 1072 XFS_bflush(mp->m_ddev_targp);
1131 error = xfs_unmount_flush(mp, 0);
1132 WARN_ON(error);
1133
1134 /*
1135 * If we're forcing a shutdown, typically because of a media error,
1136 * we want to make sure we invalidate dirty pages that belong to
1137 * referenced vnodes as well.
1138 */
1139 if (XFS_FORCED_SHUTDOWN(mp)) {
1140 error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
1141 ASSERT(error != EFSCORRUPTED);
1142 }
1143 1073
1144 if (mp->m_flags & XFS_MOUNT_DMAPI) { 1074 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1145 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, 1075 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
@@ -1161,7 +1091,7 @@ xfs_fs_write_super(
1161 struct super_block *sb) 1091 struct super_block *sb)
1162{ 1092{
1163 if (!(sb->s_flags & MS_RDONLY)) 1093 if (!(sb->s_flags & MS_RDONLY))
1164 xfs_sync(XFS_M(sb), SYNC_FSDATA); 1094 xfs_sync_fsdata(XFS_M(sb), 0);
1165 sb->s_dirt = 0; 1095 sb->s_dirt = 0;
1166} 1096}
1167 1097
@@ -1172,7 +1102,6 @@ xfs_fs_sync_super(
1172{ 1102{
1173 struct xfs_mount *mp = XFS_M(sb); 1103 struct xfs_mount *mp = XFS_M(sb);
1174 int error; 1104 int error;
1175 int flags;
1176 1105
1177 /* 1106 /*
1178 * Treat a sync operation like a freeze. This is to work 1107 * Treat a sync operation like a freeze. This is to work
@@ -1186,20 +1115,10 @@ xfs_fs_sync_super(
1186 * dirty the Linux inode until after the transaction I/O 1115 * dirty the Linux inode until after the transaction I/O
1187 * completes. 1116 * completes.
1188 */ 1117 */
1189 if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) { 1118 if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE))
1190 /* 1119 error = xfs_quiesce_data(mp);
1191 * First stage of freeze - no more writers will make progress 1120 else
1192 * now we are here, so we flush delwri and delalloc buffers 1121 error = xfs_sync_fsdata(mp, 0);
1193 * here, then wait for all I/O to complete. Data is frozen at
1194 * that point. Metadata is not frozen, transactions can still
1195 * occur here so don't bother flushing the buftarg (i.e
1196 * SYNC_QUIESCE) because it'll just get dirty again.
1197 */
1198 flags = SYNC_DATA_QUIESCE;
1199 } else
1200 flags = SYNC_FSDATA;
1201
1202 error = xfs_sync(mp, flags);
1203 sb->s_dirt = 0; 1122 sb->s_dirt = 0;
1204 1123
1205 if (unlikely(laptop_mode)) { 1124 if (unlikely(laptop_mode)) {
@@ -1337,9 +1256,8 @@ xfs_fs_remount(
1337 1256
1338 /* rw -> ro */ 1257 /* rw -> ro */
1339 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { 1258 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1340 xfs_filestream_flush(mp); 1259 xfs_quiesce_data(mp);
1341 xfs_sync(mp, SYNC_DATA_QUIESCE); 1260 xfs_quiesce_attr(mp);
1342 xfs_attr_quiesce(mp);
1343 mp->m_flags |= XFS_MOUNT_RDONLY; 1261 mp->m_flags |= XFS_MOUNT_RDONLY;
1344 } 1262 }
1345 1263
@@ -1348,7 +1266,7 @@ xfs_fs_remount(
1348 1266
1349/* 1267/*
1350 * Second stage of a freeze. The data is already frozen so we only 1268 * Second stage of a freeze. The data is already frozen so we only
1351 * need to take care of themetadata. Once that's done write a dummy 1269 * need to take care of the metadata. Once that's done write a dummy
1352 * record to dirty the log in case of a crash while frozen. 1270 * record to dirty the log in case of a crash while frozen.
1353 */ 1271 */
1354STATIC void 1272STATIC void
@@ -1357,7 +1275,7 @@ xfs_fs_lockfs(
1357{ 1275{
1358 struct xfs_mount *mp = XFS_M(sb); 1276 struct xfs_mount *mp = XFS_M(sb);
1359 1277
1360 xfs_attr_quiesce(mp); 1278 xfs_quiesce_attr(mp);
1361 xfs_fs_log_dummy(mp); 1279 xfs_fs_log_dummy(mp);
1362} 1280}
1363 1281
@@ -1422,175 +1340,28 @@ xfs_fs_setxquota(
1422 1340
1423/* 1341/*
1424 * This function fills in xfs_mount_t fields based on mount args. 1342 * This function fills in xfs_mount_t fields based on mount args.
1425 * Note: the superblock has _not_ yet been read in.
1426 */
1427STATIC int
1428xfs_start_flags(
1429 struct xfs_mount_args *ap,
1430 struct xfs_mount *mp)
1431{
1432 int error;
1433
1434 /* Values are in BBs */
1435 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
1436 /*
1437 * At this point the superblock has not been read
1438 * in, therefore we do not know the block size.
1439 * Before the mount call ends we will convert
1440 * these to FSBs.
1441 */
1442 mp->m_dalign = ap->sunit;
1443 mp->m_swidth = ap->swidth;
1444 }
1445
1446 if (ap->logbufs != -1 &&
1447 ap->logbufs != 0 &&
1448 (ap->logbufs < XLOG_MIN_ICLOGS ||
1449 ap->logbufs > XLOG_MAX_ICLOGS)) {
1450 cmn_err(CE_WARN,
1451 "XFS: invalid logbufs value: %d [not %d-%d]",
1452 ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1453 return XFS_ERROR(EINVAL);
1454 }
1455 mp->m_logbufs = ap->logbufs;
1456 if (ap->logbufsize != -1 &&
1457 ap->logbufsize != 0 &&
1458 (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
1459 ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
1460 !is_power_of_2(ap->logbufsize))) {
1461 cmn_err(CE_WARN,
1462 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1463 ap->logbufsize);
1464 return XFS_ERROR(EINVAL);
1465 }
1466
1467 error = ENOMEM;
1468
1469 mp->m_logbsize = ap->logbufsize;
1470 mp->m_fsname_len = strlen(ap->fsname) + 1;
1471
1472 mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL);
1473 if (!mp->m_fsname)
1474 goto out;
1475
1476 if (ap->rtname[0]) {
1477 mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL);
1478 if (!mp->m_rtname)
1479 goto out_free_fsname;
1480
1481 }
1482
1483 if (ap->logname[0]) {
1484 mp->m_logname = kstrdup(ap->logname, GFP_KERNEL);
1485 if (!mp->m_logname)
1486 goto out_free_rtname;
1487 }
1488
1489 if (ap->flags & XFSMNT_WSYNC)
1490 mp->m_flags |= XFS_MOUNT_WSYNC;
1491#if XFS_BIG_INUMS
1492 if (ap->flags & XFSMNT_INO64) {
1493 mp->m_flags |= XFS_MOUNT_INO64;
1494 mp->m_inoadd = XFS_INO64_OFFSET;
1495 }
1496#endif
1497 if (ap->flags & XFSMNT_RETERR)
1498 mp->m_flags |= XFS_MOUNT_RETERR;
1499 if (ap->flags & XFSMNT_NOALIGN)
1500 mp->m_flags |= XFS_MOUNT_NOALIGN;
1501 if (ap->flags & XFSMNT_SWALLOC)
1502 mp->m_flags |= XFS_MOUNT_SWALLOC;
1503 if (ap->flags & XFSMNT_OSYNCISOSYNC)
1504 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
1505 if (ap->flags & XFSMNT_32BITINODES)
1506 mp->m_flags |= XFS_MOUNT_32BITINODES;
1507
1508 if (ap->flags & XFSMNT_IOSIZE) {
1509 if (ap->iosizelog > XFS_MAX_IO_LOG ||
1510 ap->iosizelog < XFS_MIN_IO_LOG) {
1511 cmn_err(CE_WARN,
1512 "XFS: invalid log iosize: %d [not %d-%d]",
1513 ap->iosizelog, XFS_MIN_IO_LOG,
1514 XFS_MAX_IO_LOG);
1515 return XFS_ERROR(EINVAL);
1516 }
1517
1518 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
1519 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
1520 }
1521
1522 if (ap->flags & XFSMNT_IKEEP)
1523 mp->m_flags |= XFS_MOUNT_IKEEP;
1524 if (ap->flags & XFSMNT_DIRSYNC)
1525 mp->m_flags |= XFS_MOUNT_DIRSYNC;
1526 if (ap->flags & XFSMNT_ATTR2)
1527 mp->m_flags |= XFS_MOUNT_ATTR2;
1528 if (ap->flags & XFSMNT_NOATTR2)
1529 mp->m_flags |= XFS_MOUNT_NOATTR2;
1530
1531 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
1532 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
1533
1534 /*
1535 * no recovery flag requires a read-only mount
1536 */
1537 if (ap->flags & XFSMNT_NORECOVERY) {
1538 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1539 cmn_err(CE_WARN,
1540 "XFS: tried to mount a FS read-write without recovery!");
1541 return XFS_ERROR(EINVAL);
1542 }
1543 mp->m_flags |= XFS_MOUNT_NORECOVERY;
1544 }
1545
1546 if (ap->flags & XFSMNT_NOUUID)
1547 mp->m_flags |= XFS_MOUNT_NOUUID;
1548 if (ap->flags & XFSMNT_BARRIER)
1549 mp->m_flags |= XFS_MOUNT_BARRIER;
1550 else
1551 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1552
1553 if (ap->flags2 & XFSMNT2_FILESTREAMS)
1554 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
1555
1556 if (ap->flags & XFSMNT_DMAPI)
1557 mp->m_flags |= XFS_MOUNT_DMAPI;
1558 return 0;
1559
1560
1561 out_free_rtname:
1562 kfree(mp->m_rtname);
1563 out_free_fsname:
1564 kfree(mp->m_fsname);
1565 out:
1566 return error;
1567}
1568
1569/*
1570 * This function fills in xfs_mount_t fields based on mount args.
1571 * Note: the superblock _has_ now been read in. 1343 * Note: the superblock _has_ now been read in.
1572 */ 1344 */
1573STATIC int 1345STATIC int
1574xfs_finish_flags( 1346xfs_finish_flags(
1575 struct xfs_mount_args *ap,
1576 struct xfs_mount *mp) 1347 struct xfs_mount *mp)
1577{ 1348{
1578 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); 1349 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1579 1350
1580 /* Fail a mount where the logbuf is smaller then the log stripe */ 1351 /* Fail a mount where the logbuf is smaller then the log stripe */
1581 if (xfs_sb_version_haslogv2(&mp->m_sb)) { 1352 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1582 if ((ap->logbufsize <= 0) && 1353 if (mp->m_logbsize <= 0 &&
1583 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { 1354 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1584 mp->m_logbsize = mp->m_sb.sb_logsunit; 1355 mp->m_logbsize = mp->m_sb.sb_logsunit;
1585 } else if (ap->logbufsize > 0 && 1356 } else if (mp->m_logbsize > 0 &&
1586 ap->logbufsize < mp->m_sb.sb_logsunit) { 1357 mp->m_logbsize < mp->m_sb.sb_logsunit) {
1587 cmn_err(CE_WARN, 1358 cmn_err(CE_WARN,
1588 "XFS: logbuf size must be greater than or equal to log stripe size"); 1359 "XFS: logbuf size must be greater than or equal to log stripe size");
1589 return XFS_ERROR(EINVAL); 1360 return XFS_ERROR(EINVAL);
1590 } 1361 }
1591 } else { 1362 } else {
1592 /* Fail a mount if the logbuf is larger than 32K */ 1363 /* Fail a mount if the logbuf is larger than 32K */
1593 if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) { 1364 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1594 cmn_err(CE_WARN, 1365 cmn_err(CE_WARN,
1595 "XFS: logbuf size for version 1 logs must be 16K or 32K"); 1366 "XFS: logbuf size for version 1 logs must be 16K or 32K");
1596 return XFS_ERROR(EINVAL); 1367 return XFS_ERROR(EINVAL);
@@ -1602,7 +1373,7 @@ xfs_finish_flags(
1602 * told by noattr2 to turn it off 1373 * told by noattr2 to turn it off
1603 */ 1374 */
1604 if (xfs_sb_version_hasattr2(&mp->m_sb) && 1375 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1605 !(ap->flags & XFSMNT_NOATTR2)) 1376 !(mp->m_flags & XFS_MOUNT_NOATTR2))
1606 mp->m_flags |= XFS_MOUNT_ATTR2; 1377 mp->m_flags |= XFS_MOUNT_ATTR2;
1607 1378
1608 /* 1379 /*
@@ -1614,48 +1385,6 @@ xfs_finish_flags(
1614 return XFS_ERROR(EROFS); 1385 return XFS_ERROR(EROFS);
1615 } 1386 }
1616 1387
1617 /*
1618 * check for shared mount.
1619 */
1620 if (ap->flags & XFSMNT_SHARED) {
1621 if (!xfs_sb_version_hasshared(&mp->m_sb))
1622 return XFS_ERROR(EINVAL);
1623
1624 /*
1625 * For IRIX 6.5, shared mounts must have the shared
1626 * version bit set, have the persistent readonly
1627 * field set, must be version 0 and can only be mounted
1628 * read-only.
1629 */
1630 if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
1631 (mp->m_sb.sb_shared_vn != 0))
1632 return XFS_ERROR(EINVAL);
1633
1634 mp->m_flags |= XFS_MOUNT_SHARED;
1635
1636 /*
1637 * Shared XFS V0 can't deal with DMI. Return EINVAL.
1638 */
1639 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
1640 return XFS_ERROR(EINVAL);
1641 }
1642
1643 if (ap->flags & XFSMNT_UQUOTA) {
1644 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
1645 if (ap->flags & XFSMNT_UQUOTAENF)
1646 mp->m_qflags |= XFS_UQUOTA_ENFD;
1647 }
1648
1649 if (ap->flags & XFSMNT_GQUOTA) {
1650 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
1651 if (ap->flags & XFSMNT_GQUOTAENF)
1652 mp->m_qflags |= XFS_OQUOTA_ENFD;
1653 } else if (ap->flags & XFSMNT_PQUOTA) {
1654 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
1655 if (ap->flags & XFSMNT_PQUOTAENF)
1656 mp->m_qflags |= XFS_OQUOTA_ENFD;
1657 }
1658
1659 return 0; 1388 return 0;
1660} 1389}
1661 1390
@@ -1667,19 +1396,14 @@ xfs_fs_fill_super(
1667{ 1396{
1668 struct inode *root; 1397 struct inode *root;
1669 struct xfs_mount *mp = NULL; 1398 struct xfs_mount *mp = NULL;
1670 struct xfs_mount_args *args;
1671 int flags = 0, error = ENOMEM; 1399 int flags = 0, error = ENOMEM;
1672 1400 char *mtpt = NULL;
1673 args = xfs_args_allocate(sb, silent);
1674 if (!args)
1675 return -ENOMEM;
1676 1401
1677 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); 1402 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1678 if (!mp) 1403 if (!mp)
1679 goto out_free_args; 1404 goto out;
1680 1405
1681 spin_lock_init(&mp->m_sb_lock); 1406 spin_lock_init(&mp->m_sb_lock);
1682 mutex_init(&mp->m_ilock);
1683 mutex_init(&mp->m_growlock); 1407 mutex_init(&mp->m_growlock);
1684 atomic_set(&mp->m_active_trans, 0); 1408 atomic_set(&mp->m_active_trans, 0);
1685 INIT_LIST_HEAD(&mp->m_sync_list); 1409 INIT_LIST_HEAD(&mp->m_sync_list);
@@ -1689,12 +1413,9 @@ xfs_fs_fill_super(
1689 mp->m_super = sb; 1413 mp->m_super = sb;
1690 sb->s_fs_info = mp; 1414 sb->s_fs_info = mp;
1691 1415
1692 if (sb->s_flags & MS_RDONLY) 1416 error = xfs_parseargs(mp, (char *)data, &mtpt);
1693 mp->m_flags |= XFS_MOUNT_RDONLY;
1694
1695 error = xfs_parseargs(mp, (char *)data, args, 0);
1696 if (error) 1417 if (error)
1697 goto out_free_mp; 1418 goto out_free_fsname;
1698 1419
1699 sb_min_blocksize(sb, BBSIZE); 1420 sb_min_blocksize(sb, BBSIZE);
1700 sb->s_xattr = xfs_xattr_handlers; 1421 sb->s_xattr = xfs_xattr_handlers;
@@ -1702,33 +1423,28 @@ xfs_fs_fill_super(
1702 sb->s_qcop = &xfs_quotactl_operations; 1423 sb->s_qcop = &xfs_quotactl_operations;
1703 sb->s_op = &xfs_super_operations; 1424 sb->s_op = &xfs_super_operations;
1704 1425
1705 error = xfs_dmops_get(mp, args); 1426 error = xfs_dmops_get(mp);
1706 if (error) 1427 if (error)
1707 goto out_free_mp; 1428 goto out_free_fsname;
1708 error = xfs_qmops_get(mp, args); 1429 error = xfs_qmops_get(mp);
1709 if (error) 1430 if (error)
1710 goto out_put_dmops; 1431 goto out_put_dmops;
1711 1432
1712 if (args->flags & XFSMNT_QUIET) 1433 if (silent)
1713 flags |= XFS_MFSI_QUIET; 1434 flags |= XFS_MFSI_QUIET;
1714 1435
1715 error = xfs_open_devices(mp, args); 1436 error = xfs_open_devices(mp);
1716 if (error) 1437 if (error)
1717 goto out_put_qmops; 1438 goto out_put_qmops;
1718 1439
1719 if (xfs_icsb_init_counters(mp)) 1440 if (xfs_icsb_init_counters(mp))
1720 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 1441 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
1721 1442
1722 /*
1723 * Setup flags based on mount(2) options and then the superblock
1724 */
1725 error = xfs_start_flags(args, mp);
1726 if (error)
1727 goto out_free_fsname;
1728 error = xfs_readsb(mp, flags); 1443 error = xfs_readsb(mp, flags);
1729 if (error) 1444 if (error)
1730 goto out_free_fsname; 1445 goto out_destroy_counters;
1731 error = xfs_finish_flags(args, mp); 1446
1447 error = xfs_finish_flags(mp);
1732 if (error) 1448 if (error)
1733 goto out_free_sb; 1449 goto out_free_sb;
1734 1450
@@ -1747,7 +1463,7 @@ xfs_fs_fill_super(
1747 if (error) 1463 if (error)
1748 goto out_filestream_unmount; 1464 goto out_filestream_unmount;
1749 1465
1750 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname); 1466 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
1751 1467
1752 sb->s_dirt = 1; 1468 sb->s_dirt = 1;
1753 sb->s_magic = XFS_SB_MAGIC; 1469 sb->s_magic = XFS_SB_MAGIC;
@@ -1772,35 +1488,31 @@ xfs_fs_fill_super(
1772 goto fail_vnrele; 1488 goto fail_vnrele;
1773 } 1489 }
1774 1490
1775 mp->m_sync_work.w_syncer = xfs_sync_worker; 1491 error = xfs_syncd_init(mp);
1776 mp->m_sync_work.w_mount = mp; 1492 if (error)
1777 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
1778 if (IS_ERR(mp->m_sync_task)) {
1779 error = -PTR_ERR(mp->m_sync_task);
1780 goto fail_vnrele; 1493 goto fail_vnrele;
1781 }
1782 1494
1783 xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); 1495 kfree(mtpt);
1784 1496
1785 kfree(args); 1497 xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
1786 return 0; 1498 return 0;
1787 1499
1788 out_filestream_unmount: 1500 out_filestream_unmount:
1789 xfs_filestream_unmount(mp); 1501 xfs_filestream_unmount(mp);
1790 out_free_sb: 1502 out_free_sb:
1791 xfs_freesb(mp); 1503 xfs_freesb(mp);
1792 out_free_fsname: 1504 out_destroy_counters:
1793 xfs_free_fsname(mp);
1794 xfs_icsb_destroy_counters(mp); 1505 xfs_icsb_destroy_counters(mp);
1795 xfs_close_devices(mp); 1506 xfs_close_devices(mp);
1796 out_put_qmops: 1507 out_put_qmops:
1797 xfs_qmops_put(mp); 1508 xfs_qmops_put(mp);
1798 out_put_dmops: 1509 out_put_dmops:
1799 xfs_dmops_put(mp); 1510 xfs_dmops_put(mp);
1800 out_free_mp: 1511 out_free_fsname:
1512 xfs_free_fsname(mp);
1513 kfree(mtpt);
1801 kfree(mp); 1514 kfree(mp);
1802 out_free_args: 1515 out:
1803 kfree(args);
1804 return -error; 1516 return -error;
1805 1517
1806 fail_vnrele: 1518 fail_vnrele:
@@ -1820,8 +1532,6 @@ xfs_fs_fill_super(
1820 xfs_filestream_unmount(mp); 1532 xfs_filestream_unmount(mp);
1821 1533
1822 XFS_bflush(mp->m_ddev_targp); 1534 XFS_bflush(mp->m_ddev_targp);
1823 error = xfs_unmount_flush(mp, 0);
1824 WARN_ON(error);
1825 1535
1826 xfs_unmountfs(mp); 1536 xfs_unmountfs(mp);
1827 goto out_free_sb; 1537 goto out_free_sb;
@@ -1882,10 +1592,19 @@ xfs_alloc_trace_bufs(void)
1882 if (!xfs_bmap_trace_buf) 1592 if (!xfs_bmap_trace_buf)
1883 goto out_free_alloc_trace; 1593 goto out_free_alloc_trace;
1884#endif 1594#endif
1885#ifdef XFS_BMBT_TRACE 1595#ifdef XFS_BTREE_TRACE
1596 xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
1597 KM_MAYFAIL);
1598 if (!xfs_allocbt_trace_buf)
1599 goto out_free_bmap_trace;
1600
1601 xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
1602 if (!xfs_inobt_trace_buf)
1603 goto out_free_allocbt_trace;
1604
1886 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL); 1605 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
1887 if (!xfs_bmbt_trace_buf) 1606 if (!xfs_bmbt_trace_buf)
1888 goto out_free_bmap_trace; 1607 goto out_free_inobt_trace;
1889#endif 1608#endif
1890#ifdef XFS_ATTR_TRACE 1609#ifdef XFS_ATTR_TRACE
1891 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL); 1610 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
@@ -1907,8 +1626,12 @@ xfs_alloc_trace_bufs(void)
1907 ktrace_free(xfs_attr_trace_buf); 1626 ktrace_free(xfs_attr_trace_buf);
1908 out_free_bmbt_trace: 1627 out_free_bmbt_trace:
1909#endif 1628#endif
1910#ifdef XFS_BMBT_TRACE 1629#ifdef XFS_BTREE_TRACE
1911 ktrace_free(xfs_bmbt_trace_buf); 1630 ktrace_free(xfs_bmbt_trace_buf);
1631 out_free_inobt_trace:
1632 ktrace_free(xfs_inobt_trace_buf);
1633 out_free_allocbt_trace:
1634 ktrace_free(xfs_allocbt_trace_buf);
1912 out_free_bmap_trace: 1635 out_free_bmap_trace:
1913#endif 1636#endif
1914#ifdef XFS_BMAP_TRACE 1637#ifdef XFS_BMAP_TRACE
@@ -1931,8 +1654,10 @@ xfs_free_trace_bufs(void)
1931#ifdef XFS_ATTR_TRACE 1654#ifdef XFS_ATTR_TRACE
1932 ktrace_free(xfs_attr_trace_buf); 1655 ktrace_free(xfs_attr_trace_buf);
1933#endif 1656#endif
1934#ifdef XFS_BMBT_TRACE 1657#ifdef XFS_BTREE_TRACE
1935 ktrace_free(xfs_bmbt_trace_buf); 1658 ktrace_free(xfs_bmbt_trace_buf);
1659 ktrace_free(xfs_inobt_trace_buf);
1660 ktrace_free(xfs_allocbt_trace_buf);
1936#endif 1661#endif
1937#ifdef XFS_BMAP_TRACE 1662#ifdef XFS_BMAP_TRACE
1938 ktrace_free(xfs_bmap_trace_buf); 1663 ktrace_free(xfs_bmap_trace_buf);
@@ -1945,16 +1670,10 @@ xfs_free_trace_bufs(void)
1945STATIC int __init 1670STATIC int __init
1946xfs_init_zones(void) 1671xfs_init_zones(void)
1947{ 1672{
1948 xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode",
1949 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
1950 KM_ZONE_SPREAD,
1951 xfs_fs_inode_init_once);
1952 if (!xfs_vnode_zone)
1953 goto out;
1954 1673
1955 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); 1674 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
1956 if (!xfs_ioend_zone) 1675 if (!xfs_ioend_zone)
1957 goto out_destroy_vnode_zone; 1676 goto out;
1958 1677
1959 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, 1678 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
1960 xfs_ioend_zone); 1679 xfs_ioend_zone);
@@ -1970,6 +1689,7 @@ xfs_init_zones(void)
1970 "xfs_bmap_free_item"); 1689 "xfs_bmap_free_item");
1971 if (!xfs_bmap_free_item_zone) 1690 if (!xfs_bmap_free_item_zone)
1972 goto out_destroy_log_ticket_zone; 1691 goto out_destroy_log_ticket_zone;
1692
1973 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), 1693 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
1974 "xfs_btree_cur"); 1694 "xfs_btree_cur");
1975 if (!xfs_btree_cur_zone) 1695 if (!xfs_btree_cur_zone)
@@ -2017,8 +1737,8 @@ xfs_init_zones(void)
2017 1737
2018 xfs_inode_zone = 1738 xfs_inode_zone =
2019 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", 1739 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
2020 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | 1740 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
2021 KM_ZONE_SPREAD, NULL); 1741 xfs_fs_inode_init_once);
2022 if (!xfs_inode_zone) 1742 if (!xfs_inode_zone)
2023 goto out_destroy_efi_zone; 1743 goto out_destroy_efi_zone;
2024 1744
@@ -2066,8 +1786,6 @@ xfs_init_zones(void)
2066 mempool_destroy(xfs_ioend_pool); 1786 mempool_destroy(xfs_ioend_pool);
2067 out_destroy_ioend_zone: 1787 out_destroy_ioend_zone:
2068 kmem_zone_destroy(xfs_ioend_zone); 1788 kmem_zone_destroy(xfs_ioend_zone);
2069 out_destroy_vnode_zone:
2070 kmem_zone_destroy(xfs_vnode_zone);
2071 out: 1789 out:
2072 return -ENOMEM; 1790 return -ENOMEM;
2073} 1791}
@@ -2092,7 +1810,6 @@ xfs_destroy_zones(void)
2092 kmem_zone_destroy(xfs_log_ticket_zone); 1810 kmem_zone_destroy(xfs_log_ticket_zone);
2093 mempool_destroy(xfs_ioend_pool); 1811 mempool_destroy(xfs_ioend_pool);
2094 kmem_zone_destroy(xfs_ioend_zone); 1812 kmem_zone_destroy(xfs_ioend_zone);
2095 kmem_zone_destroy(xfs_vnode_zone);
2096 1813
2097} 1814}
2098 1815
@@ -2100,13 +1817,12 @@ STATIC int __init
2100init_xfs_fs(void) 1817init_xfs_fs(void)
2101{ 1818{
2102 int error; 1819 int error;
2103 static char message[] __initdata = KERN_INFO \
2104 XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
2105 1820
2106 printk(message); 1821 printk(KERN_INFO XFS_VERSION_STRING " with "
1822 XFS_BUILD_OPTIONS " enabled\n");
2107 1823
2108 ktrace_init(64); 1824 ktrace_init(64);
2109 vn_init(); 1825 xfs_ioend_init();
2110 xfs_dir_startup(); 1826 xfs_dir_startup();
2111 1827
2112 error = xfs_init_zones(); 1828 error = xfs_init_zones();
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index fe2ef4e6a0f..d5d776d4cd6 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -20,24 +20,12 @@
20 20
21#include <linux/exportfs.h> 21#include <linux/exportfs.h>
22 22
23#ifdef CONFIG_XFS_DMAPI
24# define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops)
25# define vfs_initdmapi() dmapi_init()
26# define vfs_exitdmapi() dmapi_uninit()
27#else
28# define vfs_insertdmapi(vfs) do { } while (0)
29# define vfs_initdmapi() do { } while (0)
30# define vfs_exitdmapi() do { } while (0)
31#endif
32
33#ifdef CONFIG_XFS_QUOTA 23#ifdef CONFIG_XFS_QUOTA
34# define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops)
35extern void xfs_qm_init(void); 24extern void xfs_qm_init(void);
36extern void xfs_qm_exit(void); 25extern void xfs_qm_exit(void);
37# define vfs_initquota() xfs_qm_init() 26# define vfs_initquota() xfs_qm_init()
38# define vfs_exitquota() xfs_qm_exit() 27# define vfs_exitquota() xfs_qm_exit()
39#else 28#else
40# define vfs_insertquota(vfs) do { } while (0)
41# define vfs_initquota() do { } while (0) 29# define vfs_initquota() do { } while (0)
42# define vfs_exitquota() do { } while (0) 30# define vfs_exitquota() do { } while (0)
43#endif 31#endif
@@ -101,9 +89,6 @@ struct block_device;
101 89
102extern __uint64_t xfs_max_file_offset(unsigned int); 90extern __uint64_t xfs_max_file_offset(unsigned int);
103 91
104extern void xfs_flush_inode(struct xfs_inode *);
105extern void xfs_flush_device(struct xfs_inode *);
106
107extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); 92extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
108 93
109extern const struct export_operations xfs_export_operations; 94extern const struct export_operations xfs_export_operations;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
new file mode 100644
index 00000000000..2ed035354c2
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -0,0 +1,762 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_inode.h"
37#include "xfs_dinode.h"
38#include "xfs_error.h"
39#include "xfs_mru_cache.h"
40#include "xfs_filestream.h"
41#include "xfs_vnodeops.h"
42#include "xfs_utils.h"
43#include "xfs_buf_item.h"
44#include "xfs_inode_item.h"
45#include "xfs_rw.h"
46
47#include <linux/kthread.h>
48#include <linux/freezer.h>
49
50/*
51 * Sync all the inodes in the given AG according to the
52 * direction given by the flags.
53 */
54STATIC int
55xfs_sync_inodes_ag(
56 xfs_mount_t *mp,
57 int ag,
58 int flags)
59{
60 xfs_perag_t *pag = &mp->m_perag[ag];
61 int nr_found;
62 uint32_t first_index = 0;
63 int error = 0;
64 int last_error = 0;
65 int fflag = XFS_B_ASYNC;
66
67 if (flags & SYNC_DELWRI)
68 fflag = XFS_B_DELWRI;
69 if (flags & SYNC_WAIT)
70 fflag = 0; /* synchronous overrides all */
71
72 do {
73 struct inode *inode;
74 xfs_inode_t *ip = NULL;
75 int lock_flags = XFS_ILOCK_SHARED;
76
77 /*
78 * use a gang lookup to find the next inode in the tree
79 * as the tree is sparse and a gang lookup walks to find
80 * the number of objects requested.
81 */
82 read_lock(&pag->pag_ici_lock);
83 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
84 (void**)&ip, first_index, 1);
85
86 if (!nr_found) {
87 read_unlock(&pag->pag_ici_lock);
88 break;
89 }
90
91 /*
92 * Update the index for the next lookup. Catch overflows
93 * into the next AG range which can occur if we have inodes
94 * in the last block of the AG and we are currently
95 * pointing to the last inode.
96 */
97 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
98 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
99 read_unlock(&pag->pag_ici_lock);
100 break;
101 }
102
103 /* nothing to sync during shutdown */
104 if (XFS_FORCED_SHUTDOWN(mp)) {
105 read_unlock(&pag->pag_ici_lock);
106 return 0;
107 }
108
109 /*
110 * If we can't get a reference on the inode, it must be
111 * in reclaim. Leave it for the reclaim code to flush.
112 */
113 inode = VFS_I(ip);
114 if (!igrab(inode)) {
115 read_unlock(&pag->pag_ici_lock);
116 continue;
117 }
118 read_unlock(&pag->pag_ici_lock);
119
120 /* avoid new or bad inodes */
121 if (is_bad_inode(inode) ||
122 xfs_iflags_test(ip, XFS_INEW)) {
123 IRELE(ip);
124 continue;
125 }
126
127 /*
128 * If we have to flush data or wait for I/O completion
129 * we need to hold the iolock.
130 */
131 if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
132 xfs_ilock(ip, XFS_IOLOCK_SHARED);
133 lock_flags |= XFS_IOLOCK_SHARED;
134 error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
135 if (flags & SYNC_IOWAIT)
136 xfs_ioend_wait(ip);
137 }
138 xfs_ilock(ip, XFS_ILOCK_SHARED);
139
140 if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
141 if (flags & SYNC_WAIT) {
142 xfs_iflock(ip);
143 if (!xfs_inode_clean(ip))
144 error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
145 else
146 xfs_ifunlock(ip);
147 } else if (xfs_iflock_nowait(ip)) {
148 if (!xfs_inode_clean(ip))
149 error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
150 else
151 xfs_ifunlock(ip);
152 }
153 }
154 xfs_iput(ip, lock_flags);
155
156 if (error)
157 last_error = error;
158 /*
159 * bail out if the filesystem is corrupted.
160 */
161 if (error == EFSCORRUPTED)
162 return XFS_ERROR(error);
163
164 } while (nr_found);
165
166 return last_error;
167}
168
169int
170xfs_sync_inodes(
171 xfs_mount_t *mp,
172 int flags)
173{
174 int error;
175 int last_error;
176 int i;
177 int lflags = XFS_LOG_FORCE;
178
179 if (mp->m_flags & XFS_MOUNT_RDONLY)
180 return 0;
181 error = 0;
182 last_error = 0;
183
184 if (flags & SYNC_WAIT)
185 lflags |= XFS_LOG_SYNC;
186
187 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
188 if (!mp->m_perag[i].pag_ici_init)
189 continue;
190 error = xfs_sync_inodes_ag(mp, i, flags);
191 if (error)
192 last_error = error;
193 if (error == EFSCORRUPTED)
194 break;
195 }
196 if (flags & SYNC_DELWRI)
197 xfs_log_force(mp, 0, lflags);
198
199 return XFS_ERROR(last_error);
200}
201
202STATIC int
203xfs_commit_dummy_trans(
204 struct xfs_mount *mp,
205 uint log_flags)
206{
207 struct xfs_inode *ip = mp->m_rootip;
208 struct xfs_trans *tp;
209 int error;
210
211 /*
212 * Put a dummy transaction in the log to tell recovery
213 * that all others are OK.
214 */
215 tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
216 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
217 if (error) {
218 xfs_trans_cancel(tp, 0);
219 return error;
220 }
221
222 xfs_ilock(ip, XFS_ILOCK_EXCL);
223
224 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
225 xfs_trans_ihold(tp, ip);
226 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
227 /* XXX(hch): ignoring the error here.. */
228 error = xfs_trans_commit(tp, 0);
229
230 xfs_iunlock(ip, XFS_ILOCK_EXCL);
231
232 xfs_log_force(mp, 0, log_flags);
233 return 0;
234}
235
236int
237xfs_sync_fsdata(
238 struct xfs_mount *mp,
239 int flags)
240{
241 struct xfs_buf *bp;
242 struct xfs_buf_log_item *bip;
243 int error = 0;
244
245 /*
246 * If this is xfssyncd() then only sync the superblock if we can
247 * lock it without sleeping and it is not pinned.
248 */
249 if (flags & SYNC_BDFLUSH) {
250 ASSERT(!(flags & SYNC_WAIT));
251
252 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
253 if (!bp)
254 goto out;
255
256 bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
257 if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
258 goto out_brelse;
259 } else {
260 bp = xfs_getsb(mp, 0);
261
262 /*
263 * If the buffer is pinned then push on the log so we won't
264 * get stuck waiting in the write for someone, maybe
265 * ourselves, to flush the log.
266 *
267 * Even though we just pushed the log above, we did not have
268 * the superblock buffer locked at that point so it can
269 * become pinned in between there and here.
270 */
271 if (XFS_BUF_ISPINNED(bp))
272 xfs_log_force(mp, 0, XFS_LOG_FORCE);
273 }
274
275
276 if (flags & SYNC_WAIT)
277 XFS_BUF_UNASYNC(bp);
278 else
279 XFS_BUF_ASYNC(bp);
280
281 return xfs_bwrite(mp, bp);
282
283 out_brelse:
284 xfs_buf_relse(bp);
285 out:
286 return error;
287}
288
289/*
290 * When remounting a filesystem read-only or freezing the filesystem, we have
291 * two phases to execute. This first phase is syncing the data before we
292 * quiesce the filesystem, and the second is flushing all the inodes out after
293 * we've waited for all the transactions created by the first phase to
294 * complete. The second phase ensures that the inodes are written to their
295 * location on disk rather than just existing in transactions in the log. This
296 * means after a quiesce there is no log replay required to write the inodes to
297 * disk (this is the main difference between a sync and a quiesce).
298 */
299/*
300 * First stage of freeze - no writers will make progress now we are here,
301 * so we flush delwri and delalloc buffers here, then wait for all I/O to
302 * complete. Data is frozen at that point. Metadata is not frozen,
303 * transactions can still occur here so don't bother flushing the buftarg
304 * because it'll just get dirty again.
305 */
306int
307xfs_quiesce_data(
308 struct xfs_mount *mp)
309{
310 int error;
311
312 /* push non-blocking */
313 xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
314 XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
315 xfs_filestream_flush(mp);
316
317 /* push and block */
318 xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
319 XFS_QM_DQSYNC(mp, SYNC_WAIT);
320
321 /* write superblock and hoover up shutdown errors */
322 error = xfs_sync_fsdata(mp, 0);
323
324 /* flush data-only devices */
325 if (mp->m_rtdev_targp)
326 XFS_bflush(mp->m_rtdev_targp);
327
328 return error;
329}
330
331STATIC void
332xfs_quiesce_fs(
333 struct xfs_mount *mp)
334{
335 int count = 0, pincount;
336
337 xfs_flush_buftarg(mp->m_ddev_targp, 0);
338 xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
339
340 /*
341 * This loop must run at least twice. The first instance of the loop
342 * will flush most meta data but that will generate more meta data
343 * (typically directory updates). Which then must be flushed and
344 * logged before we can write the unmount record.
345 */
346 do {
347 xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
348 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
349 if (!pincount) {
350 delay(50);
351 count++;
352 }
353 } while (count < 2);
354}
355
356/*
357 * Second stage of a quiesce. The data is already synced, now we have to take
358 * care of the metadata. New transactions are already blocked, so we need to
359 * wait for any remaining transactions to drain out before proceding.
360 */
361void
362xfs_quiesce_attr(
363 struct xfs_mount *mp)
364{
365 int error = 0;
366
367 /* wait for all modifications to complete */
368 while (atomic_read(&mp->m_active_trans) > 0)
369 delay(100);
370
371 /* flush inodes and push all remaining buffers out to disk */
372 xfs_quiesce_fs(mp);
373
374 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
375
376 /* Push the superblock and write an unmount record */
377 error = xfs_log_sbcount(mp, 1);
378 if (error)
379 xfs_fs_cmn_err(CE_WARN, mp,
380 "xfs_attr_quiesce: failed to log sb changes. "
381 "Frozen image may not be consistent.");
382 xfs_log_unmount_write(mp);
383 xfs_unmountfs_writesb(mp);
384}
385
386/*
387 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
388 * Doing this has two advantages:
389 * - It saves on stack space, which is tight in certain situations
390 * - It can be used (with care) as a mechanism to avoid deadlocks.
391 * Flushing while allocating in a full filesystem requires both.
392 */
393STATIC void
394xfs_syncd_queue_work(
395 struct xfs_mount *mp,
396 void *data,
397 void (*syncer)(struct xfs_mount *, void *))
398{
399 struct bhv_vfs_sync_work *work;
400
401 work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
402 INIT_LIST_HEAD(&work->w_list);
403 work->w_syncer = syncer;
404 work->w_data = data;
405 work->w_mount = mp;
406 spin_lock(&mp->m_sync_lock);
407 list_add_tail(&work->w_list, &mp->m_sync_list);
408 spin_unlock(&mp->m_sync_lock);
409 wake_up_process(mp->m_sync_task);
410}
411
412/*
413 * Flush delayed allocate data, attempting to free up reserved space
414 * from existing allocations. At this point a new allocation attempt
415 * has failed with ENOSPC and we are in the process of scratching our
416 * heads, looking about for more room...
417 */
418STATIC void
419xfs_flush_inode_work(
420 struct xfs_mount *mp,
421 void *arg)
422{
423 struct inode *inode = arg;
424 filemap_flush(inode->i_mapping);
425 iput(inode);
426}
427
428void
429xfs_flush_inode(
430 xfs_inode_t *ip)
431{
432 struct inode *inode = VFS_I(ip);
433
434 igrab(inode);
435 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
436 delay(msecs_to_jiffies(500));
437}
438
439/*
440 * This is the "bigger hammer" version of xfs_flush_inode_work...
441 * (IOW, "If at first you don't succeed, use a Bigger Hammer").
442 */
443STATIC void
444xfs_flush_device_work(
445 struct xfs_mount *mp,
446 void *arg)
447{
448 struct inode *inode = arg;
449 sync_blockdev(mp->m_super->s_bdev);
450 iput(inode);
451}
452
453void
454xfs_flush_device(
455 xfs_inode_t *ip)
456{
457 struct inode *inode = VFS_I(ip);
458
459 igrab(inode);
460 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
461 delay(msecs_to_jiffies(500));
462 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
463}
464
465/*
466 * Every sync period we need to unpin all items, reclaim inodes, sync
467 * quota and write out the superblock. We might need to cover the log
468 * to indicate it is idle.
469 */
470STATIC void
471xfs_sync_worker(
472 struct xfs_mount *mp,
473 void *unused)
474{
475 int error;
476
477 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
478 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
479 xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
480 /* dgc: errors ignored here */
481 error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
482 error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
483 if (xfs_log_need_covered(mp))
484 error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
485 }
486 mp->m_sync_seq++;
487 wake_up(&mp->m_wait_single_sync_task);
488}
489
490STATIC int
491xfssyncd(
492 void *arg)
493{
494 struct xfs_mount *mp = arg;
495 long timeleft;
496 bhv_vfs_sync_work_t *work, *n;
497 LIST_HEAD (tmp);
498
499 set_freezable();
500 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
501 for (;;) {
502 timeleft = schedule_timeout_interruptible(timeleft);
503 /* swsusp */
504 try_to_freeze();
505 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
506 break;
507
508 spin_lock(&mp->m_sync_lock);
509 /*
510 * We can get woken by laptop mode, to do a sync -
511 * that's the (only!) case where the list would be
512 * empty with time remaining.
513 */
514 if (!timeleft || list_empty(&mp->m_sync_list)) {
515 if (!timeleft)
516 timeleft = xfs_syncd_centisecs *
517 msecs_to_jiffies(10);
518 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
519 list_add_tail(&mp->m_sync_work.w_list,
520 &mp->m_sync_list);
521 }
522 list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
523 list_move(&work->w_list, &tmp);
524 spin_unlock(&mp->m_sync_lock);
525
526 list_for_each_entry_safe(work, n, &tmp, w_list) {
527 (*work->w_syncer)(mp, work->w_data);
528 list_del(&work->w_list);
529 if (work == &mp->m_sync_work)
530 continue;
531 kmem_free(work);
532 }
533 }
534
535 return 0;
536}
537
538int
539xfs_syncd_init(
540 struct xfs_mount *mp)
541{
542 mp->m_sync_work.w_syncer = xfs_sync_worker;
543 mp->m_sync_work.w_mount = mp;
544 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
545 if (IS_ERR(mp->m_sync_task))
546 return -PTR_ERR(mp->m_sync_task);
547 return 0;
548}
549
550void
551xfs_syncd_stop(
552 struct xfs_mount *mp)
553{
554 kthread_stop(mp->m_sync_task);
555}
556
557int
558xfs_reclaim_inode(
559 xfs_inode_t *ip,
560 int locked,
561 int sync_mode)
562{
563 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
564
565 /* The hash lock here protects a thread in xfs_iget_core from
566 * racing with us on linking the inode back with a vnode.
567 * Once we have the XFS_IRECLAIM flag set it will not touch
568 * us.
569 */
570 write_lock(&pag->pag_ici_lock);
571 spin_lock(&ip->i_flags_lock);
572 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
573 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
574 spin_unlock(&ip->i_flags_lock);
575 write_unlock(&pag->pag_ici_lock);
576 if (locked) {
577 xfs_ifunlock(ip);
578 xfs_iunlock(ip, XFS_ILOCK_EXCL);
579 }
580 return 1;
581 }
582 __xfs_iflags_set(ip, XFS_IRECLAIM);
583 spin_unlock(&ip->i_flags_lock);
584 write_unlock(&pag->pag_ici_lock);
585 xfs_put_perag(ip->i_mount, pag);
586
587 /*
588 * If the inode is still dirty, then flush it out. If the inode
589 * is not in the AIL, then it will be OK to flush it delwri as
590 * long as xfs_iflush() does not keep any references to the inode.
591 * We leave that decision up to xfs_iflush() since it has the
592 * knowledge of whether it's OK to simply do a delwri flush of
593 * the inode or whether we need to wait until the inode is
594 * pulled from the AIL.
595 * We get the flush lock regardless, though, just to make sure
596 * we don't free it while it is being flushed.
597 */
598 if (!locked) {
599 xfs_ilock(ip, XFS_ILOCK_EXCL);
600 xfs_iflock(ip);
601 }
602
603 /*
604 * In the case of a forced shutdown we rely on xfs_iflush() to
605 * wait for the inode to be unpinned before returning an error.
606 */
607 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
608 /* synchronize with xfs_iflush_done */
609 xfs_iflock(ip);
610 xfs_ifunlock(ip);
611 }
612
613 xfs_iunlock(ip, XFS_ILOCK_EXCL);
614 xfs_ireclaim(ip);
615 return 0;
616}
617
618/*
619 * We set the inode flag atomically with the radix tree tag.
620 * Once we get tag lookups on the radix tree, this inode flag
621 * can go away.
622 */
623void
624xfs_inode_set_reclaim_tag(
625 xfs_inode_t *ip)
626{
627 xfs_mount_t *mp = ip->i_mount;
628 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
629
630 read_lock(&pag->pag_ici_lock);
631 spin_lock(&ip->i_flags_lock);
632 radix_tree_tag_set(&pag->pag_ici_root,
633 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
634 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
635 spin_unlock(&ip->i_flags_lock);
636 read_unlock(&pag->pag_ici_lock);
637 xfs_put_perag(mp, pag);
638}
639
640void
641__xfs_inode_clear_reclaim_tag(
642 xfs_mount_t *mp,
643 xfs_perag_t *pag,
644 xfs_inode_t *ip)
645{
646 radix_tree_tag_clear(&pag->pag_ici_root,
647 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
648}
649
650void
651xfs_inode_clear_reclaim_tag(
652 xfs_inode_t *ip)
653{
654 xfs_mount_t *mp = ip->i_mount;
655 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
656
657 read_lock(&pag->pag_ici_lock);
658 spin_lock(&ip->i_flags_lock);
659 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
660 spin_unlock(&ip->i_flags_lock);
661 read_unlock(&pag->pag_ici_lock);
662 xfs_put_perag(mp, pag);
663}
664
665
666STATIC void
667xfs_reclaim_inodes_ag(
668 xfs_mount_t *mp,
669 int ag,
670 int noblock,
671 int mode)
672{
673 xfs_inode_t *ip = NULL;
674 xfs_perag_t *pag = &mp->m_perag[ag];
675 int nr_found;
676 uint32_t first_index;
677 int skipped;
678
679restart:
680 first_index = 0;
681 skipped = 0;
682 do {
683 /*
684 * use a gang lookup to find the next inode in the tree
685 * as the tree is sparse and a gang lookup walks to find
686 * the number of objects requested.
687 */
688 read_lock(&pag->pag_ici_lock);
689 nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
690 (void**)&ip, first_index, 1,
691 XFS_ICI_RECLAIM_TAG);
692
693 if (!nr_found) {
694 read_unlock(&pag->pag_ici_lock);
695 break;
696 }
697
698 /*
699 * Update the index for the next lookup. Catch overflows
700 * into the next AG range which can occur if we have inodes
701 * in the last block of the AG and we are currently
702 * pointing to the last inode.
703 */
704 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
705 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
706 read_unlock(&pag->pag_ici_lock);
707 break;
708 }
709
710 /* ignore if already under reclaim */
711 if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
712 read_unlock(&pag->pag_ici_lock);
713 continue;
714 }
715
716 if (noblock) {
717 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
718 read_unlock(&pag->pag_ici_lock);
719 continue;
720 }
721 if (xfs_ipincount(ip) ||
722 !xfs_iflock_nowait(ip)) {
723 xfs_iunlock(ip, XFS_ILOCK_EXCL);
724 read_unlock(&pag->pag_ici_lock);
725 continue;
726 }
727 }
728 read_unlock(&pag->pag_ici_lock);
729
730 /*
731 * hmmm - this is an inode already in reclaim. Do
732 * we even bother catching it here?
733 */
734 if (xfs_reclaim_inode(ip, noblock, mode))
735 skipped++;
736 } while (nr_found);
737
738 if (skipped) {
739 delay(1);
740 goto restart;
741 }
742 return;
743
744}
745
746int
747xfs_reclaim_inodes(
748 xfs_mount_t *mp,
749 int noblock,
750 int mode)
751{
752 int i;
753
754 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
755 if (!mp->m_perag[i].pag_ici_init)
756 continue;
757 xfs_reclaim_inodes_ag(mp, i, noblock, mode);
758 }
759 return 0;
760}
761
762
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
new file mode 100644
index 00000000000..5f6de1efe1f
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -0,0 +1,55 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef XFS_SYNC_H
19#define XFS_SYNC_H 1
20
21struct xfs_mount;
22
23typedef struct bhv_vfs_sync_work {
24 struct list_head w_list;
25 struct xfs_mount *w_mount;
26 void *w_data; /* syncer routine argument */
27 void (*w_syncer)(struct xfs_mount *, void *);
28} bhv_vfs_sync_work_t;
29
30#define SYNC_ATTR 0x0001 /* sync attributes */
31#define SYNC_DELWRI 0x0002 /* look at delayed writes */
32#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
33#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
34#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
35
36int xfs_syncd_init(struct xfs_mount *mp);
37void xfs_syncd_stop(struct xfs_mount *mp);
38
39int xfs_sync_inodes(struct xfs_mount *mp, int flags);
40int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
41
42int xfs_quiesce_data(struct xfs_mount *mp);
43void xfs_quiesce_attr(struct xfs_mount *mp);
44
45void xfs_flush_inode(struct xfs_inode *ip);
46void xfs_flush_device(struct xfs_inode *ip);
47
48int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
49int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
50
51void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
52void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
53void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54 struct xfs_inode *ip);
55#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 7dacb5bbde3..916c0ffb608 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -56,17 +56,6 @@ xfs_stats_clear_proc_handler(
56 56
57static ctl_table xfs_table[] = { 57static ctl_table xfs_table[] = {
58 { 58 {
59 .ctl_name = XFS_RESTRICT_CHOWN,
60 .procname = "restrict_chown",
61 .data = &xfs_params.restrict_chown.val,
62 .maxlen = sizeof(int),
63 .mode = 0644,
64 .proc_handler = &proc_dointvec_minmax,
65 .strategy = &sysctl_intvec,
66 .extra1 = &xfs_params.restrict_chown.min,
67 .extra2 = &xfs_params.restrict_chown.max
68 },
69 {
70 .ctl_name = XFS_SGID_INHERIT, 59 .ctl_name = XFS_SGID_INHERIT,
71 .procname = "irix_sgid_inherit", 60 .procname = "irix_sgid_inherit",
72 .data = &xfs_params.sgid_inherit.val, 61 .data = &xfs_params.sgid_inherit.val,
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 4aadb8056c3..b9937d450f8 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -31,7 +31,6 @@ typedef struct xfs_sysctl_val {
31} xfs_sysctl_val_t; 31} xfs_sysctl_val_t;
32 32
33typedef struct xfs_param { 33typedef struct xfs_param {
34 xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
35 xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is 34 xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is
36 * not a member of parent dir GID. */ 35 * not a member of parent dir GID. */
37 xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ 36 xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */
@@ -68,7 +67,7 @@ typedef struct xfs_param {
68enum { 67enum {
69 /* XFS_REFCACHE_SIZE = 1 */ 68 /* XFS_REFCACHE_SIZE = 1 */
70 /* XFS_REFCACHE_PURGE = 2 */ 69 /* XFS_REFCACHE_PURGE = 2 */
71 XFS_RESTRICT_CHOWN = 3, 70 /* XFS_RESTRICT_CHOWN = 3 */
72 XFS_SGID_INHERIT = 4, 71 XFS_SGID_INHERIT = 4,
73 XFS_SYMLINK_MODE = 5, 72 XFS_SYMLINK_MODE = 5,
74 XFS_PANIC_MASK = 6, 73 XFS_PANIC_MASK = 6,
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
deleted file mode 100644
index 7e60c7776b1..00000000000
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ /dev/null
@@ -1,77 +0,0 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_VFS_H__
19#define __XFS_VFS_H__
20
21#include <linux/vfs.h>
22#include "xfs_fs.h"
23
24struct inode;
25
26struct fid;
27struct cred;
28struct seq_file;
29struct super_block;
30struct xfs_inode;
31struct xfs_mount;
32struct xfs_mount_args;
33
34typedef struct kstatfs bhv_statvfs_t;
35
36typedef struct bhv_vfs_sync_work {
37 struct list_head w_list;
38 struct xfs_mount *w_mount;
39 void *w_data; /* syncer routine argument */
40 void (*w_syncer)(struct xfs_mount *, void *);
41} bhv_vfs_sync_work_t;
42
43#define SYNC_ATTR 0x0001 /* sync attributes */
44#define SYNC_CLOSE 0x0002 /* close file system down */
45#define SYNC_DELWRI 0x0004 /* look at delayed writes */
46#define SYNC_WAIT 0x0008 /* wait for i/o to complete */
47#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */
48#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
49#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
50#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
51#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
52
53/*
54 * When remounting a filesystem read-only or freezing the filesystem,
55 * we have two phases to execute. This first phase is syncing the data
56 * before we quiesce the fielsystem, and the second is flushing all the
57 * inodes out after we've waited for all the transactions created by
58 * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
59 * to ensure that the inodes are written to their location on disk
60 * rather than just existing in transactions in the log. This means
61 * after a quiesce there is no log replay required to write the inodes
62 * to disk (this is the main difference between a sync and a quiesce).
63 */
64#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
65#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
66
67#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
68#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
69#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */
70#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */
71#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */
72#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */
73
74#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen)
75#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l))
76
77#endif /* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
deleted file mode 100644
index b52528bbbff..00000000000
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ /dev/null
@@ -1,145 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_vnodeops.h"
20#include "xfs_bmap_btree.h"
21#include "xfs_inode.h"
22
23/*
24 * And this gunk is needed for xfs_mount.h"
25 */
26#include "xfs_log.h"
27#include "xfs_trans.h"
28#include "xfs_sb.h"
29#include "xfs_dmapi.h"
30#include "xfs_inum.h"
31#include "xfs_ag.h"
32#include "xfs_mount.h"
33
34
35/*
36 * Dedicated vnode inactive/reclaim sync wait queues.
37 * Prime number of hash buckets since address is used as the key.
38 */
39#define NVSYNC 37
40#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
41static wait_queue_head_t vsync[NVSYNC];
42
43void __init
44vn_init(void)
45{
46 int i;
47
48 for (i = 0; i < NVSYNC; i++)
49 init_waitqueue_head(&vsync[i]);
50}
51
52void
53vn_iowait(
54 xfs_inode_t *ip)
55{
56 wait_queue_head_t *wq = vptosync(ip);
57
58 wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
59}
60
61void
62vn_iowake(
63 xfs_inode_t *ip)
64{
65 if (atomic_dec_and_test(&ip->i_iocount))
66 wake_up(vptosync(ip));
67}
68
69/*
70 * Volume managers supporting multiple paths can send back ENODEV when the
71 * final path disappears. In this case continuing to fill the page cache
72 * with dirty data which cannot be written out is evil, so prevent that.
73 */
74void
75vn_ioerror(
76 xfs_inode_t *ip,
77 int error,
78 char *f,
79 int l)
80{
81 if (unlikely(error == -ENODEV))
82 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
83}
84
85#ifdef XFS_INODE_TRACE
86
87/*
88 * Reference count of Linux inode if present, -1 if the xfs_inode
89 * has no associated Linux inode.
90 */
91static inline int xfs_icount(struct xfs_inode *ip)
92{
93 struct inode *vp = VFS_I(ip);
94
95 if (vp)
96 return vn_count(vp);
97 return -1;
98}
99
100#define KTRACE_ENTER(ip, vk, s, line, ra) \
101 ktrace_enter( (ip)->i_trace, \
102/* 0 */ (void *)(__psint_t)(vk), \
103/* 1 */ (void *)(s), \
104/* 2 */ (void *)(__psint_t) line, \
105/* 3 */ (void *)(__psint_t)xfs_icount(ip), \
106/* 4 */ (void *)(ra), \
107/* 5 */ NULL, \
108/* 6 */ (void *)(__psint_t)current_cpu(), \
109/* 7 */ (void *)(__psint_t)current_pid(), \
110/* 8 */ (void *)__return_address, \
111/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
112
113/*
114 * Vnode tracing code.
115 */
116void
117_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
118{
119 KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
120}
121
122void
123_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
124{
125 KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
126}
127
128void
129xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
130{
131 KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
132}
133
134void
135_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
136{
137 KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
138}
139
140void
141xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
142{
143 KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
144}
145#endif /* XFS_INODE_TRACE */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 683ce16210f..f65983a230d 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -18,7 +18,10 @@
18#ifndef __XFS_VNODE_H__ 18#ifndef __XFS_VNODE_H__
19#define __XFS_VNODE_H__ 19#define __XFS_VNODE_H__
20 20
21#include "xfs_fs.h"
22
21struct file; 23struct file;
24struct xfs_inode;
22struct xfs_iomap; 25struct xfs_iomap;
23struct attrlist_cursor_kern; 26struct attrlist_cursor_kern;
24 27
@@ -51,40 +54,6 @@ struct attrlist_cursor_kern;
51 Prevent VM access to the pages until 54 Prevent VM access to the pages until
52 the operation completes. */ 55 the operation completes. */
53 56
54
55extern void vn_init(void);
56
57/*
58 * Yeah, these don't take vnode anymore at all, all this should be
59 * cleaned up at some point.
60 */
61extern void vn_iowait(struct xfs_inode *ip);
62extern void vn_iowake(struct xfs_inode *ip);
63extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l);
64
65static inline int vn_count(struct inode *vp)
66{
67 return atomic_read(&vp->i_count);
68}
69
70#define IHOLD(ip) \
71do { \
72 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
73 atomic_inc(&(VFS_I(ip)->i_count)); \
74 xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
75} while (0)
76
77#define IRELE(ip) \
78do { \
79 xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
80 iput(VFS_I(ip)); \
81} while (0)
82
83static inline struct inode *vn_grab(struct inode *vp)
84{
85 return igrab(vp);
86}
87
88/* 57/*
89 * Dealing with bad inodes 58 * Dealing with bad inodes
90 */ 59 */
@@ -121,39 +90,4 @@ static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt)
121 PAGECACHE_TAG_DIRTY) 90 PAGECACHE_TAG_DIRTY)
122 91
123 92
124/*
125 * Tracking vnode activity.
126 */
127#if defined(XFS_INODE_TRACE)
128
129#define INODE_TRACE_SIZE 16 /* number of trace entries */
130#define INODE_KTRACE_ENTRY 1
131#define INODE_KTRACE_EXIT 2
132#define INODE_KTRACE_HOLD 3
133#define INODE_KTRACE_REF 4
134#define INODE_KTRACE_RELE 5
135
136extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *);
137extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *);
138extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
139extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
140extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
141#define xfs_itrace_entry(ip) \
142 _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
143#define xfs_itrace_exit(ip) \
144 _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
145#define xfs_itrace_exit_tag(ip, tag) \
146 _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
147#define xfs_itrace_ref(ip) \
148 _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address)
149
150#else
151#define xfs_itrace_entry(a)
152#define xfs_itrace_exit(a)
153#define xfs_itrace_exit_tag(a, b)
154#define xfs_itrace_hold(a, b, c, d)
155#define xfs_itrace_ref(a)
156#define xfs_itrace_rele(a, b, c, d)
157#endif
158
159#endif /* __XFS_VNODE_H__ */ 93#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index f2705f2fd43..591ca6602bf 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,7 +101,7 @@ xfs_qm_dqinit(
101 if (brandnewdquot) { 101 if (brandnewdquot) {
102 dqp->dq_flnext = dqp->dq_flprev = dqp; 102 dqp->dq_flnext = dqp->dq_flprev = dqp;
103 mutex_init(&dqp->q_qlock); 103 mutex_init(&dqp->q_qlock);
104 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); 104 init_waitqueue_head(&dqp->q_pinwait);
105 105
106 /* 106 /*
107 * Because we want to use a counting completion, complete 107 * Because we want to use a counting completion, complete
@@ -131,7 +131,7 @@ xfs_qm_dqinit(
131 dqp->q_res_bcount = 0; 131 dqp->q_res_bcount = 0;
132 dqp->q_res_icount = 0; 132 dqp->q_res_icount = 0;
133 dqp->q_res_rtbcount = 0; 133 dqp->q_res_rtbcount = 0;
134 dqp->q_pincount = 0; 134 atomic_set(&dqp->q_pincount, 0);
135 dqp->q_hash = NULL; 135 dqp->q_hash = NULL;
136 ASSERT(dqp->dq_flnext == dqp->dq_flprev); 136 ASSERT(dqp->dq_flnext == dqp->dq_flprev);
137 137
@@ -1221,16 +1221,14 @@ xfs_qm_dqflush(
1221 xfs_dqtrace_entry(dqp, "DQFLUSH"); 1221 xfs_dqtrace_entry(dqp, "DQFLUSH");
1222 1222
1223 /* 1223 /*
1224 * If not dirty, nada. 1224 * If not dirty, or it's pinned and we are not supposed to
1225 * block, nada.
1225 */ 1226 */
1226 if (!XFS_DQ_IS_DIRTY(dqp)) { 1227 if (!XFS_DQ_IS_DIRTY(dqp) ||
1228 (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) {
1227 xfs_dqfunlock(dqp); 1229 xfs_dqfunlock(dqp);
1228 return (0); 1230 return 0;
1229 } 1231 }
1230
1231 /*
1232 * Cant flush a pinned dquot. Wait for it.
1233 */
1234 xfs_qm_dqunpin_wait(dqp); 1232 xfs_qm_dqunpin_wait(dqp);
1235 1233
1236 /* 1234 /*
@@ -1274,10 +1272,8 @@ xfs_qm_dqflush(
1274 dqp->dq_flags &= ~(XFS_DQ_DIRTY); 1272 dqp->dq_flags &= ~(XFS_DQ_DIRTY);
1275 mp = dqp->q_mount; 1273 mp = dqp->q_mount;
1276 1274
1277 /* lsn is 64 bits */ 1275 xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
1278 spin_lock(&mp->m_ail_lock); 1276 &dqp->q_logitem.qli_item.li_lsn);
1279 dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
1280 spin_unlock(&mp->m_ail_lock);
1281 1277
1282 /* 1278 /*
1283 * Attach an iodone routine so that we can remove this dquot from the 1279 * Attach an iodone routine so that we can remove this dquot from the
@@ -1323,8 +1319,10 @@ xfs_qm_dqflush_done(
1323 xfs_dq_logitem_t *qip) 1319 xfs_dq_logitem_t *qip)
1324{ 1320{
1325 xfs_dquot_t *dqp; 1321 xfs_dquot_t *dqp;
1322 struct xfs_ail *ailp;
1326 1323
1327 dqp = qip->qli_dquot; 1324 dqp = qip->qli_dquot;
1325 ailp = qip->qli_item.li_ailp;
1328 1326
1329 /* 1327 /*
1330 * We only want to pull the item from the AIL if its 1328 * We only want to pull the item from the AIL if its
@@ -1337,15 +1335,12 @@ xfs_qm_dqflush_done(
1337 if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && 1335 if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
1338 qip->qli_item.li_lsn == qip->qli_flush_lsn) { 1336 qip->qli_item.li_lsn == qip->qli_flush_lsn) {
1339 1337
1340 spin_lock(&dqp->q_mount->m_ail_lock); 1338 /* xfs_trans_ail_delete() drops the AIL lock. */
1341 /* 1339 spin_lock(&ailp->xa_lock);
1342 * xfs_trans_delete_ail() drops the AIL lock.
1343 */
1344 if (qip->qli_item.li_lsn == qip->qli_flush_lsn) 1340 if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
1345 xfs_trans_delete_ail(dqp->q_mount, 1341 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip);
1346 (xfs_log_item_t*)qip);
1347 else 1342 else
1348 spin_unlock(&dqp->q_mount->m_ail_lock); 1343 spin_unlock(&ailp->xa_lock);
1349 } 1344 }
1350 1345
1351 /* 1346 /*
@@ -1375,7 +1370,7 @@ xfs_dqunlock(
1375 mutex_unlock(&(dqp->q_qlock)); 1370 mutex_unlock(&(dqp->q_qlock));
1376 if (dqp->q_logitem.qli_dquot == dqp) { 1371 if (dqp->q_logitem.qli_dquot == dqp) {
1377 /* Once was dqp->q_mount, but might just have been cleared */ 1372 /* Once was dqp->q_mount, but might just have been cleared */
1378 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp, 1373 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
1379 (xfs_log_item_t*)&(dqp->q_logitem)); 1374 (xfs_log_item_t*)&(dqp->q_logitem));
1380 } 1375 }
1381} 1376}
@@ -1489,7 +1484,7 @@ xfs_qm_dqpurge(
1489 "xfs_qm_dqpurge: dquot %p flush failed", dqp); 1484 "xfs_qm_dqpurge: dquot %p flush failed", dqp);
1490 xfs_dqflock(dqp); 1485 xfs_dqflock(dqp);
1491 } 1486 }
1492 ASSERT(dqp->q_pincount == 0); 1487 ASSERT(atomic_read(&dqp->q_pincount) == 0);
1493 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 1488 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1494 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); 1489 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1495 1490
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 8958d0faf8d..7e455337e2b 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -83,8 +83,8 @@ typedef struct xfs_dquot {
83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ 83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
84 mutex_t q_qlock; /* quota lock */ 84 mutex_t q_qlock; /* quota lock */
85 struct completion q_flush; /* flush completion queue */ 85 struct completion q_flush; /* flush completion queue */
86 uint q_pincount; /* pin count for this dquot */ 86 atomic_t q_pincount; /* dquot pin count */
87 sv_t q_pinwait; /* sync var for pinning */ 87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
88#ifdef XFS_DQUOT_TRACE 88#ifdef XFS_DQUOT_TRACE
89 struct ktrace *q_trace; /* trace header structure */ 89 struct ktrace *q_trace; /* trace header structure */
90#endif 90#endif
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index f028644caa5..1728f6a7c4f 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -88,25 +88,22 @@ xfs_qm_dquot_logitem_format(
88 88
89/* 89/*
90 * Increment the pin count of the given dquot. 90 * Increment the pin count of the given dquot.
91 * This value is protected by pinlock spinlock in the xQM structure.
92 */ 91 */
93STATIC void 92STATIC void
94xfs_qm_dquot_logitem_pin( 93xfs_qm_dquot_logitem_pin(
95 xfs_dq_logitem_t *logitem) 94 xfs_dq_logitem_t *logitem)
96{ 95{
97 xfs_dquot_t *dqp; 96 xfs_dquot_t *dqp = logitem->qli_dquot;
98 97
99 dqp = logitem->qli_dquot;
100 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 98 ASSERT(XFS_DQ_IS_LOCKED(dqp));
101 spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); 99 atomic_inc(&dqp->q_pincount);
102 dqp->q_pincount++;
103 spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
104} 100}
105 101
106/* 102/*
107 * Decrement the pin count of the given dquot, and wake up 103 * Decrement the pin count of the given dquot, and wake up
108 * anyone in xfs_dqwait_unpin() if the count goes to 0. The 104 * anyone in xfs_dqwait_unpin() if the count goes to 0. The
109 * dquot must have been previously pinned with a call to xfs_dqpin(). 105 * dquot must have been previously pinned with a call to
106 * xfs_qm_dquot_logitem_pin().
110 */ 107 */
111/* ARGSUSED */ 108/* ARGSUSED */
112STATIC void 109STATIC void
@@ -114,16 +111,11 @@ xfs_qm_dquot_logitem_unpin(
114 xfs_dq_logitem_t *logitem, 111 xfs_dq_logitem_t *logitem,
115 int stale) 112 int stale)
116{ 113{
117 xfs_dquot_t *dqp; 114 xfs_dquot_t *dqp = logitem->qli_dquot;
118 115
119 dqp = logitem->qli_dquot; 116 ASSERT(atomic_read(&dqp->q_pincount) > 0);
120 ASSERT(dqp->q_pincount > 0); 117 if (atomic_dec_and_test(&dqp->q_pincount))
121 spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); 118 wake_up(&dqp->q_pinwait);
122 dqp->q_pincount--;
123 if (dqp->q_pincount == 0) {
124 sv_broadcast(&dqp->q_pinwait);
125 }
126 spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
127} 119}
128 120
129/* ARGSUSED */ 121/* ARGSUSED */
@@ -193,21 +185,14 @@ xfs_qm_dqunpin_wait(
193 xfs_dquot_t *dqp) 185 xfs_dquot_t *dqp)
194{ 186{
195 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 187 ASSERT(XFS_DQ_IS_LOCKED(dqp));
196 if (dqp->q_pincount == 0) { 188 if (atomic_read(&dqp->q_pincount) == 0)
197 return; 189 return;
198 }
199 190
200 /* 191 /*
201 * Give the log a push so we don't wait here too long. 192 * Give the log a push so we don't wait here too long.
202 */ 193 */
203 xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); 194 xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
204 spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); 195 wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
205 if (dqp->q_pincount == 0) {
206 spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
207 return;
208 }
209 sv_wait(&(dqp->q_pinwait), PINOD,
210 &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
211} 196}
212 197
213/* 198/*
@@ -310,7 +295,7 @@ xfs_qm_dquot_logitem_trylock(
310 uint retval; 295 uint retval;
311 296
312 dqp = qip->qli_dquot; 297 dqp = qip->qli_dquot;
313 if (dqp->q_pincount > 0) 298 if (atomic_read(&dqp->q_pincount) > 0)
314 return (XFS_ITEM_PINNED); 299 return (XFS_ITEM_PINNED);
315 300
316 if (! xfs_qm_dqlock_nowait(dqp)) 301 if (! xfs_qm_dqlock_nowait(dqp))
@@ -568,14 +553,16 @@ xfs_qm_qoffend_logitem_committed(
568 xfs_lsn_t lsn) 553 xfs_lsn_t lsn)
569{ 554{
570 xfs_qoff_logitem_t *qfs; 555 xfs_qoff_logitem_t *qfs;
556 struct xfs_ail *ailp;
571 557
572 qfs = qfe->qql_start_lip; 558 qfs = qfe->qql_start_lip;
573 spin_lock(&qfs->qql_item.li_mountp->m_ail_lock); 559 ailp = qfs->qql_item.li_ailp;
560 spin_lock(&ailp->xa_lock);
574 /* 561 /*
575 * Delete the qoff-start logitem from the AIL. 562 * Delete the qoff-start logitem from the AIL.
576 * xfs_trans_delete_ail() drops the AIL lock. 563 * xfs_trans_ail_delete() drops the AIL lock.
577 */ 564 */
578 xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); 565 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
579 kmem_free(qfs); 566 kmem_free(qfs);
580 kmem_free(qfe); 567 kmem_free(qfe);
581 return (xfs_lsn_t)-1; 568 return (xfs_lsn_t)-1;
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index df0ffef9775..6b13960cf31 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -20,7 +20,6 @@
20#include "xfs_bit.h" 20#include "xfs_bit.h"
21#include "xfs_log.h" 21#include "xfs_log.h"
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_clnt.h"
24#include "xfs_trans.h" 23#include "xfs_trans.h"
25#include "xfs_sb.h" 24#include "xfs_sb.h"
26#include "xfs_ag.h" 25#include "xfs_ag.h"
@@ -396,13 +395,10 @@ xfs_qm_mount_quotas(
396/* 395/*
397 * Called from the vfsops layer. 396 * Called from the vfsops layer.
398 */ 397 */
399int 398void
400xfs_qm_unmount_quotas( 399xfs_qm_unmount_quotas(
401 xfs_mount_t *mp) 400 xfs_mount_t *mp)
402{ 401{
403 xfs_inode_t *uqp, *gqp;
404 int error = 0;
405
406 /* 402 /*
407 * Release the dquots that root inode, et al might be holding, 403 * Release the dquots that root inode, et al might be holding,
408 * before we flush quotas and blow away the quotainfo structure. 404 * before we flush quotas and blow away the quotainfo structure.
@@ -415,43 +411,18 @@ xfs_qm_unmount_quotas(
415 xfs_qm_dqdetach(mp->m_rsumip); 411 xfs_qm_dqdetach(mp->m_rsumip);
416 412
417 /* 413 /*
418 * Flush out the quota inodes. 414 * Release the quota inodes.
419 */ 415 */
420 uqp = gqp = NULL;
421 if (mp->m_quotainfo) { 416 if (mp->m_quotainfo) {
422 if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) { 417 if (mp->m_quotainfo->qi_uquotaip) {
423 xfs_ilock(uqp, XFS_ILOCK_EXCL); 418 IRELE(mp->m_quotainfo->qi_uquotaip);
424 xfs_iflock(uqp); 419 mp->m_quotainfo->qi_uquotaip = NULL;
425 error = xfs_iflush(uqp, XFS_IFLUSH_SYNC);
426 xfs_iunlock(uqp, XFS_ILOCK_EXCL);
427 if (unlikely(error == EFSCORRUPTED)) {
428 XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)",
429 XFS_ERRLEVEL_LOW, mp);
430 goto out;
431 }
432 } 420 }
433 if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) { 421 if (mp->m_quotainfo->qi_gquotaip) {
434 xfs_ilock(gqp, XFS_ILOCK_EXCL); 422 IRELE(mp->m_quotainfo->qi_gquotaip);
435 xfs_iflock(gqp); 423 mp->m_quotainfo->qi_gquotaip = NULL;
436 error = xfs_iflush(gqp, XFS_IFLUSH_SYNC);
437 xfs_iunlock(gqp, XFS_ILOCK_EXCL);
438 if (unlikely(error == EFSCORRUPTED)) {
439 XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)",
440 XFS_ERRLEVEL_LOW, mp);
441 goto out;
442 }
443 } 424 }
444 } 425 }
445 if (uqp) {
446 IRELE(uqp);
447 mp->m_quotainfo->qi_uquotaip = NULL;
448 }
449 if (gqp) {
450 IRELE(gqp);
451 mp->m_quotainfo->qi_gquotaip = NULL;
452 }
453out:
454 return XFS_ERROR(error);
455} 426}
456 427
457/* 428/*
@@ -987,14 +958,10 @@ xfs_qm_dqdetach(
987} 958}
988 959
989/* 960/*
990 * This is called by VFS_SYNC and flags arg determines the caller, 961 * This is called to sync quotas. We can be told to use non-blocking
991 * and its motives, as done in xfs_sync. 962 * semantics by either the SYNC_BDFLUSH flag or the absence of the
992 * 963 * SYNC_WAIT flag.
993 * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
994 * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
995 * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
996 */ 964 */
997
998int 965int
999xfs_qm_sync( 966xfs_qm_sync(
1000 xfs_mount_t *mp, 967 xfs_mount_t *mp,
@@ -1137,7 +1104,6 @@ xfs_qm_init_quotainfo(
1137 return error; 1104 return error;
1138 } 1105 }
1139 1106
1140 spin_lock_init(&qinf->qi_pinlock);
1141 xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); 1107 xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
1142 qinf->qi_dqreclaims = 0; 1108 qinf->qi_dqreclaims = 0;
1143 1109
@@ -1234,7 +1200,6 @@ xfs_qm_destroy_quotainfo(
1234 */ 1200 */
1235 xfs_qm_rele_quotafs_ref(mp); 1201 xfs_qm_rele_quotafs_ref(mp);
1236 1202
1237 spinlock_destroy(&qi->qi_pinlock);
1238 xfs_qm_list_destroy(&qi->qi_dqlist); 1203 xfs_qm_list_destroy(&qi->qi_dqlist);
1239 1204
1240 if (qi->qi_uquotaip) { 1205 if (qi->qi_uquotaip) {
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 44f25349e47..ddf09166387 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -106,7 +106,6 @@ typedef struct xfs_qm {
106typedef struct xfs_quotainfo { 106typedef struct xfs_quotainfo {
107 xfs_inode_t *qi_uquotaip; /* user quota inode */ 107 xfs_inode_t *qi_uquotaip; /* user quota inode */
108 xfs_inode_t *qi_gquotaip; /* group quota inode */ 108 xfs_inode_t *qi_gquotaip; /* group quota inode */
109 spinlock_t qi_pinlock; /* dquot pinning lock */
110 xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ 109 xfs_dqlist_t qi_dqlist; /* all dquots in filesys */
111 int qi_dqreclaims; /* a change here indicates 110 int qi_dqreclaims; /* a change here indicates
112 a removal in the dqlist */ 111 a removal in the dqlist */
@@ -168,7 +167,7 @@ extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
168extern void xfs_qm_mount_quotas(xfs_mount_t *); 167extern void xfs_qm_mount_quotas(xfs_mount_t *);
169extern int xfs_qm_quotacheck(xfs_mount_t *); 168extern int xfs_qm_quotacheck(xfs_mount_t *);
170extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); 169extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
171extern int xfs_qm_unmount_quotas(xfs_mount_t *); 170extern void xfs_qm_unmount_quotas(xfs_mount_t *);
172extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); 171extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
173extern int xfs_qm_sync(xfs_mount_t *, int); 172extern int xfs_qm_sync(xfs_mount_t *, int);
174 173
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index eea2e60b456..bc6c5cca3e1 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -20,7 +20,6 @@
20#include "xfs_bit.h" 20#include "xfs_bit.h"
21#include "xfs_log.h" 21#include "xfs_log.h"
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_clnt.h"
24#include "xfs_trans.h" 23#include "xfs_trans.h"
25#include "xfs_sb.h" 24#include "xfs_sb.h"
26#include "xfs_ag.h" 25#include "xfs_ag.h"
@@ -51,7 +50,7 @@
51 50
52STATIC void 51STATIC void
53xfs_fill_statvfs_from_dquot( 52xfs_fill_statvfs_from_dquot(
54 bhv_statvfs_t *statp, 53 struct kstatfs *statp,
55 xfs_disk_dquot_t *dp) 54 xfs_disk_dquot_t *dp)
56{ 55{
57 __uint64_t limit; 56 __uint64_t limit;
@@ -88,7 +87,7 @@ xfs_fill_statvfs_from_dquot(
88STATIC void 87STATIC void
89xfs_qm_statvfs( 88xfs_qm_statvfs(
90 xfs_inode_t *ip, 89 xfs_inode_t *ip,
91 bhv_statvfs_t *statp) 90 struct kstatfs *statp)
92{ 91{
93 xfs_mount_t *mp = ip->i_mount; 92 xfs_mount_t *mp = ip->i_mount;
94 xfs_dquot_t *dqp; 93 xfs_dquot_t *dqp;
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 1a3b803dfa5..68139b38aed 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -127,7 +127,7 @@ xfs_qm_quotactl(
127 break; 127 break;
128 128
129 case Q_XQUOTASYNC: 129 case Q_XQUOTASYNC:
130 return (xfs_sync_inodes(mp, SYNC_DELWRI, NULL)); 130 return xfs_sync_inodes(mp, SYNC_DELWRI);
131 131
132 default: 132 default:
133 break; 133 break;
@@ -1022,101 +1022,104 @@ xfs_qm_export_flags(
1022 1022
1023 1023
1024/* 1024/*
1025 * Go thru all the inodes in the file system, releasing their dquots. 1025 * Release all the dquots on the inodes in an AG.
1026 * Note that the mount structure gets modified to indicate that quotas are off
1027 * AFTER this, in the case of quotaoff. This also gets called from
1028 * xfs_rootumount.
1029 */ 1026 */
1030void 1027STATIC void
1031xfs_qm_dqrele_all_inodes( 1028xfs_qm_dqrele_inodes_ag(
1032 struct xfs_mount *mp, 1029 xfs_mount_t *mp,
1033 uint flags) 1030 int ag,
1031 uint flags)
1034{ 1032{
1035 xfs_inode_t *ip, *topino; 1033 xfs_inode_t *ip = NULL;
1036 uint ireclaims; 1034 xfs_perag_t *pag = &mp->m_perag[ag];
1037 struct inode *vp; 1035 int first_index = 0;
1038 boolean_t vnode_refd; 1036 int nr_found;
1039 1037
1040 ASSERT(mp->m_quotainfo);
1041
1042 XFS_MOUNT_ILOCK(mp);
1043again:
1044 ip = mp->m_inodes;
1045 if (ip == NULL) {
1046 XFS_MOUNT_IUNLOCK(mp);
1047 return;
1048 }
1049 do { 1038 do {
1050 /* Skip markers inserted by xfs_sync */ 1039 /*
1051 if (ip->i_mount == NULL) { 1040 * use a gang lookup to find the next inode in the tree
1052 ip = ip->i_mnext; 1041 * as the tree is sparse and a gang lookup walks to find
1053 continue; 1042 * the number of objects requested.
1043 */
1044 read_lock(&pag->pag_ici_lock);
1045 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
1046 (void**)&ip, first_index, 1);
1047
1048 if (!nr_found) {
1049 read_unlock(&pag->pag_ici_lock);
1050 break;
1054 } 1051 }
1055 /* Root inode, rbmip and rsumip have associated blocks */ 1052
1053 /*
1054 * Update the index for the next lookup. Catch overflows
1055 * into the next AG range which can occur if we have inodes
1056 * in the last block of the AG and we are currently
1057 * pointing to the last inode.
1058 */
1059 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
1060 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
1061 read_unlock(&pag->pag_ici_lock);
1062 break;
1063 }
1064
1065 /* skip quota inodes */
1056 if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { 1066 if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
1057 ASSERT(ip->i_udquot == NULL); 1067 ASSERT(ip->i_udquot == NULL);
1058 ASSERT(ip->i_gdquot == NULL); 1068 ASSERT(ip->i_gdquot == NULL);
1059 ip = ip->i_mnext; 1069 read_unlock(&pag->pag_ici_lock);
1060 continue; 1070 continue;
1061 } 1071 }
1062 vp = VFS_I(ip); 1072
1063 if (!vp) { 1073 /*
1064 ASSERT(ip->i_udquot == NULL); 1074 * If we can't get a reference on the inode, it must be
1065 ASSERT(ip->i_gdquot == NULL); 1075 * in reclaim. Leave it for the reclaim code to flush.
1066 ip = ip->i_mnext; 1076 */
1077 if (!igrab(VFS_I(ip))) {
1078 read_unlock(&pag->pag_ici_lock);
1067 continue; 1079 continue;
1068 } 1080 }
1069 vnode_refd = B_FALSE; 1081 read_unlock(&pag->pag_ici_lock);
1070 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { 1082
1071 ireclaims = mp->m_ireclaims; 1083 /* avoid new inodes though we shouldn't find any here */
1072 topino = mp->m_inodes; 1084 if (xfs_iflags_test(ip, XFS_INEW)) {
1073 vp = vn_grab(vp); 1085 IRELE(ip);
1074 if (!vp) 1086 continue;
1075 goto again;
1076
1077 XFS_MOUNT_IUNLOCK(mp);
1078 /* XXX restart limit ? */
1079 xfs_ilock(ip, XFS_ILOCK_EXCL);
1080 vnode_refd = B_TRUE;
1081 } else {
1082 ireclaims = mp->m_ireclaims;
1083 topino = mp->m_inodes;
1084 XFS_MOUNT_IUNLOCK(mp);
1085 } 1087 }
1086 1088
1087 /* 1089 xfs_ilock(ip, XFS_ILOCK_EXCL);
1088 * We don't keep the mountlock across the dqrele() call,
1089 * since it can take a while..
1090 */
1091 if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { 1090 if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
1092 xfs_qm_dqrele(ip->i_udquot); 1091 xfs_qm_dqrele(ip->i_udquot);
1093 ip->i_udquot = NULL; 1092 ip->i_udquot = NULL;
1094 } 1093 }
1095 if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { 1094 if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
1095 ip->i_gdquot) {
1096 xfs_qm_dqrele(ip->i_gdquot); 1096 xfs_qm_dqrele(ip->i_gdquot);
1097 ip->i_gdquot = NULL; 1097 ip->i_gdquot = NULL;
1098 } 1098 }
1099 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1099 xfs_iput(ip, XFS_ILOCK_EXCL);
1100 /* 1100
1101 * Wait until we've dropped the ilock and mountlock to 1101 } while (nr_found);
1102 * do the vn_rele. Or be condemned to an eternity in the 1102}
1103 * inactive code in hell. 1103
1104 */ 1104/*
1105 if (vnode_refd) 1105 * Go thru all the inodes in the file system, releasing their dquots.
1106 IRELE(ip); 1106 * Note that the mount structure gets modified to indicate that quotas are off
1107 XFS_MOUNT_ILOCK(mp); 1107 * AFTER this, in the case of quotaoff. This also gets called from
1108 /* 1108 * xfs_rootumount.
1109 * If an inode was inserted or removed, we gotta 1109 */
1110 * start over again. 1110void
1111 */ 1111xfs_qm_dqrele_all_inodes(
1112 if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { 1112 struct xfs_mount *mp,
1113 /* XXX use a sentinel */ 1113 uint flags)
1114 goto again; 1114{
1115 } 1115 int i;
1116 ip = ip->i_mnext;
1117 } while (ip != mp->m_inodes);
1118 1116
1119 XFS_MOUNT_IUNLOCK(mp); 1117 ASSERT(mp->m_quotainfo);
1118 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
1119 if (!mp->m_perag[i].pag_ici_init)
1120 continue;
1121 xfs_qm_dqrele_inodes_ag(mp, i, flags);
1122 }
1120} 1123}
1121 1124
1122/*------------------------------------------------------------------------*/ 1125/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index c27abef7b84..ae548296542 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -18,6 +18,13 @@
18#include <xfs.h> 18#include <xfs.h>
19#include "debug.h" 19#include "debug.h"
20 20
21/* xfs_mount.h drags a lot of crap in, sorry.. */
22#include "xfs_sb.h"
23#include "xfs_inum.h"
24#include "xfs_ag.h"
25#include "xfs_dmapi.h"
26#include "xfs_mount.h"
27
21static char message[1024]; /* keep it off the stack */ 28static char message[1024]; /* keep it off the stack */
22static DEFINE_SPINLOCK(xfs_err_lock); 29static DEFINE_SPINLOCK(xfs_err_lock);
23 30
@@ -55,22 +62,42 @@ cmn_err(register int level, char *fmt, ...)
55} 62}
56 63
57void 64void
58icmn_err(register int level, char *fmt, va_list ap) 65xfs_fs_vcmn_err(
66 int level,
67 struct xfs_mount *mp,
68 char *fmt,
69 va_list ap)
59{ 70{
60 ulong flags; 71 unsigned long flags;
61 int len; 72 int len = 0;
62 73
63 level &= XFS_ERR_MASK; 74 level &= XFS_ERR_MASK;
64 if(level > XFS_MAX_ERR_LEVEL) 75 if (level > XFS_MAX_ERR_LEVEL)
65 level = XFS_MAX_ERR_LEVEL; 76 level = XFS_MAX_ERR_LEVEL;
77
66 spin_lock_irqsave(&xfs_err_lock,flags); 78 spin_lock_irqsave(&xfs_err_lock,flags);
67 len = vsnprintf(message, sizeof(message), fmt, ap); 79
80 if (mp) {
81 len = sprintf(message, "Filesystem \"%s\": ", mp->m_fsname);
82
83 /*
84 * Skip the printk if we can't print anything useful
85 * due to an over-long device name.
86 */
87 if (len >= sizeof(message))
88 goto out;
89 }
90
91 len = vsnprintf(message + len, sizeof(message) - len, fmt, ap);
68 if (len >= sizeof(message)) 92 if (len >= sizeof(message))
69 len = sizeof(message) - 1; 93 len = sizeof(message) - 1;
70 if (message[len-1] == '\n') 94 if (message[len-1] == '\n')
71 message[len-1] = 0; 95 message[len-1] = 0;
96
72 printk("%s%s\n", err_level[level], message); 97 printk("%s%s\n", err_level[level], message);
98 out:
73 spin_unlock_irqrestore(&xfs_err_lock,flags); 99 spin_unlock_irqrestore(&xfs_err_lock,flags);
100
74 BUG_ON(level == CE_PANIC); 101 BUG_ON(level == CE_PANIC);
75} 102}
76 103
@@ -84,5 +111,5 @@ assfail(char *expr, char *file, int line)
84void 111void
85xfs_hex_dump(void *p, int length) 112xfs_hex_dump(void *p, int length)
86{ 113{
87 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); 114 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
88} 115}
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 75845f95081..6f4fd37c67a 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -27,8 +27,6 @@
27#define CE_ALERT 1 /* alert */ 27#define CE_ALERT 1 /* alert */
28#define CE_PANIC 0 /* panic */ 28#define CE_PANIC 0 /* panic */
29 29
30extern void icmn_err(int, char *, va_list)
31 __attribute__ ((format (printf, 2, 0)));
32extern void cmn_err(int, char *, ...) 30extern void cmn_err(int, char *, ...)
33 __attribute__ ((format (printf, 2, 3))); 31 __attribute__ ((format (printf, 2, 3)));
34extern void assfail(char *expr, char *f, int l); 32extern void assfail(char *expr, char *f, int l);
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index a34ef05489b..2d494c26717 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -113,21 +113,16 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
113void 113void
114ktrace_free(ktrace_t *ktp) 114ktrace_free(ktrace_t *ktp)
115{ 115{
116 int entries_size;
117
118 if (ktp == (ktrace_t *)NULL) 116 if (ktp == (ktrace_t *)NULL)
119 return; 117 return;
120 118
121 /* 119 /*
122 * Special treatment for the Vnode trace buffer. 120 * Special treatment for the Vnode trace buffer.
123 */ 121 */
124 if (ktp->kt_nentries == ktrace_zentries) { 122 if (ktp->kt_nentries == ktrace_zentries)
125 kmem_zone_free(ktrace_ent_zone, ktp->kt_entries); 123 kmem_zone_free(ktrace_ent_zone, ktp->kt_entries);
126 } else { 124 else
127 entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
128
129 kmem_free(ktp->kt_entries); 125 kmem_free(ktp->kt_entries);
130 }
131 126
132 kmem_zone_free(ktrace_hdr_zone, ktp); 127 kmem_zone_free(ktrace_hdr_zone, ktp);
133} 128}
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 540e4c98982..17254b529c5 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -30,7 +30,7 @@
30#define XFS_ATTR_TRACE 1 30#define XFS_ATTR_TRACE 1
31#define XFS_BLI_TRACE 1 31#define XFS_BLI_TRACE 1
32#define XFS_BMAP_TRACE 1 32#define XFS_BMAP_TRACE 1
33#define XFS_BMBT_TRACE 1 33#define XFS_BTREE_TRACE 1
34#define XFS_DIR2_TRACE 1 34#define XFS_DIR2_TRACE 1
35#define XFS_DQUOT_TRACE 1 35#define XFS_DQUOT_TRACE 1
36#define XFS_ILOCK_TRACE 1 36#define XFS_ILOCK_TRACE 1
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index b2f639a1416..a8cdd73999a 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -366,7 +366,7 @@ xfs_acl_allow_set(
366 return ENOTDIR; 366 return ENOTDIR;
367 if (vp->i_sb->s_flags & MS_RDONLY) 367 if (vp->i_sb->s_flags & MS_RDONLY)
368 return EROFS; 368 return EROFS;
369 if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) 369 if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER))
370 return EPERM; 370 return EPERM;
371 return 0; 371 return 0;
372} 372}
@@ -413,13 +413,13 @@ xfs_acl_access(
413 switch (fap->acl_entry[i].ae_tag) { 413 switch (fap->acl_entry[i].ae_tag) {
414 case ACL_USER_OBJ: 414 case ACL_USER_OBJ:
415 seen_userobj = 1; 415 seen_userobj = 1;
416 if (fuid != current->fsuid) 416 if (fuid != current_fsuid())
417 continue; 417 continue;
418 matched.ae_tag = ACL_USER_OBJ; 418 matched.ae_tag = ACL_USER_OBJ;
419 matched.ae_perm = allows; 419 matched.ae_perm = allows;
420 break; 420 break;
421 case ACL_USER: 421 case ACL_USER:
422 if (fap->acl_entry[i].ae_id != current->fsuid) 422 if (fap->acl_entry[i].ae_id != current_fsuid())
423 continue; 423 continue;
424 matched.ae_tag = ACL_USER; 424 matched.ae_tag = ACL_USER;
425 matched.ae_perm = allows; 425 matched.ae_perm = allows;
@@ -758,7 +758,7 @@ xfs_acl_setmode(
758 if (gap && nomask) 758 if (gap && nomask)
759 iattr.ia_mode |= gap->ae_perm << 3; 759 iattr.ia_mode |= gap->ae_perm << 3;
760 760
761 return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred); 761 return xfs_setattr(XFS_I(vp), &iattr, 0);
762} 762}
763 763
764/* 764/*
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 61b292a9fb4..f2e21817a22 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -91,6 +91,8 @@ typedef struct xfs_agf {
91#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) 91#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
92#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp)) 92#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp))
93 93
94extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
95 xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
94 96
95/* 97/*
96 * Size of the unlinked inode hash table in the agi. 98 * Size of the unlinked inode hash table in the agi.
@@ -142,6 +144,9 @@ typedef struct xfs_agi {
142#define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) 144#define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
143#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp)) 145#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp))
144 146
147extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
148 xfs_agnumber_t agno, struct xfs_buf **bpp);
149
145/* 150/*
146 * The third a.g. block contains the a.g. freelist, an array 151 * The third a.g. block contains the a.g. freelist, an array
147 * of block pointers to blocks owned by the allocation btree code. 152 * of block pointers to blocks owned by the allocation btree code.
@@ -192,17 +197,23 @@ typedef struct xfs_perag
192 xfs_agino_t pagi_freecount; /* number of free inodes */ 197 xfs_agino_t pagi_freecount; /* number of free inodes */
193 xfs_agino_t pagi_count; /* number of allocated inodes */ 198 xfs_agino_t pagi_count; /* number of allocated inodes */
194 int pagb_count; /* pagb slots in use */ 199 int pagb_count; /* pagb slots in use */
200 xfs_perag_busy_t *pagb_list; /* unstable blocks */
195#ifdef __KERNEL__ 201#ifdef __KERNEL__
196 spinlock_t pagb_lock; /* lock for pagb_list */ 202 spinlock_t pagb_lock; /* lock for pagb_list */
197#endif 203
198 xfs_perag_busy_t *pagb_list; /* unstable blocks */
199 atomic_t pagf_fstrms; /* # of filestreams active in this AG */ 204 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
200 205
201 int pag_ici_init; /* incore inode cache initialised */ 206 int pag_ici_init; /* incore inode cache initialised */
202 rwlock_t pag_ici_lock; /* incore inode lock */ 207 rwlock_t pag_ici_lock; /* incore inode lock */
203 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 208 struct radix_tree_root pag_ici_root; /* incore inode cache root */
209#endif
204} xfs_perag_t; 210} xfs_perag_t;
205 211
212/*
213 * tags for inode radix tree
214 */
215#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
216
206#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) 217#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
207#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ 218#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
208 (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) 219 (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 1956f83489f..028e44e58ea 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -90,6 +90,92 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
90 */ 90 */
91 91
92/* 92/*
93 * Lookup the record equal to [bno, len] in the btree given by cur.
94 */
95STATIC int /* error */
96xfs_alloc_lookup_eq(
97 struct xfs_btree_cur *cur, /* btree cursor */
98 xfs_agblock_t bno, /* starting block of extent */
99 xfs_extlen_t len, /* length of extent */
100 int *stat) /* success/failure */
101{
102 cur->bc_rec.a.ar_startblock = bno;
103 cur->bc_rec.a.ar_blockcount = len;
104 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
105}
106
107/*
108 * Lookup the first record greater than or equal to [bno, len]
109 * in the btree given by cur.
110 */
111STATIC int /* error */
112xfs_alloc_lookup_ge(
113 struct xfs_btree_cur *cur, /* btree cursor */
114 xfs_agblock_t bno, /* starting block of extent */
115 xfs_extlen_t len, /* length of extent */
116 int *stat) /* success/failure */
117{
118 cur->bc_rec.a.ar_startblock = bno;
119 cur->bc_rec.a.ar_blockcount = len;
120 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
121}
122
123/*
124 * Lookup the first record less than or equal to [bno, len]
125 * in the btree given by cur.
126 */
127STATIC int /* error */
128xfs_alloc_lookup_le(
129 struct xfs_btree_cur *cur, /* btree cursor */
130 xfs_agblock_t bno, /* starting block of extent */
131 xfs_extlen_t len, /* length of extent */
132 int *stat) /* success/failure */
133{
134 cur->bc_rec.a.ar_startblock = bno;
135 cur->bc_rec.a.ar_blockcount = len;
136 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
137}
138
139/*
140 * Update the record referred to by cur to the value given
141 * by [bno, len].
142 * This either works (return 0) or gets an EFSCORRUPTED error.
143 */
144STATIC int /* error */
145xfs_alloc_update(
146 struct xfs_btree_cur *cur, /* btree cursor */
147 xfs_agblock_t bno, /* starting block of extent */
148 xfs_extlen_t len) /* length of extent */
149{
150 union xfs_btree_rec rec;
151
152 rec.alloc.ar_startblock = cpu_to_be32(bno);
153 rec.alloc.ar_blockcount = cpu_to_be32(len);
154 return xfs_btree_update(cur, &rec);
155}
156
157/*
158 * Get the data from the pointed-to record.
159 */
160STATIC int /* error */
161xfs_alloc_get_rec(
162 struct xfs_btree_cur *cur, /* btree cursor */
163 xfs_agblock_t *bno, /* output: starting block of extent */
164 xfs_extlen_t *len, /* output: length of extent */
165 int *stat) /* output: success/failure */
166{
167 union xfs_btree_rec *rec;
168 int error;
169
170 error = xfs_btree_get_rec(cur, &rec, stat);
171 if (!error && *stat == 1) {
172 *bno = be32_to_cpu(rec->alloc.ar_startblock);
173 *len = be32_to_cpu(rec->alloc.ar_blockcount);
174 }
175 return error;
176}
177
178/*
93 * Compute aligned version of the found extent. 179 * Compute aligned version of the found extent.
94 * Takes alignment and min length into account. 180 * Takes alignment and min length into account.
95 */ 181 */
@@ -294,21 +380,20 @@ xfs_alloc_fixup_trees(
294 return error; 380 return error;
295 XFS_WANT_CORRUPTED_RETURN(i == 1); 381 XFS_WANT_CORRUPTED_RETURN(i == 1);
296 } 382 }
383
297#ifdef DEBUG 384#ifdef DEBUG
298 { 385 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
299 xfs_alloc_block_t *bnoblock; 386 struct xfs_btree_block *bnoblock;
300 xfs_alloc_block_t *cntblock; 387 struct xfs_btree_block *cntblock;
301 388
302 if (bno_cur->bc_nlevels == 1 && 389 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
303 cnt_cur->bc_nlevels == 1) { 390 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
304 bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]); 391
305 cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]); 392 XFS_WANT_CORRUPTED_RETURN(
306 XFS_WANT_CORRUPTED_RETURN( 393 bnoblock->bb_numrecs == cntblock->bb_numrecs);
307 be16_to_cpu(bnoblock->bb_numrecs) ==
308 be16_to_cpu(cntblock->bb_numrecs));
309 }
310 } 394 }
311#endif 395#endif
396
312 /* 397 /*
313 * Deal with all four cases: the allocated record is contained 398 * Deal with all four cases: the allocated record is contained
314 * within the freespace record, so we can have new freespace 399 * within the freespace record, so we can have new freespace
@@ -333,7 +418,7 @@ xfs_alloc_fixup_trees(
333 /* 418 /*
334 * Delete the entry from the by-size btree. 419 * Delete the entry from the by-size btree.
335 */ 420 */
336 if ((error = xfs_alloc_delete(cnt_cur, &i))) 421 if ((error = xfs_btree_delete(cnt_cur, &i)))
337 return error; 422 return error;
338 XFS_WANT_CORRUPTED_RETURN(i == 1); 423 XFS_WANT_CORRUPTED_RETURN(i == 1);
339 /* 424 /*
@@ -343,7 +428,7 @@ xfs_alloc_fixup_trees(
343 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 428 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
344 return error; 429 return error;
345 XFS_WANT_CORRUPTED_RETURN(i == 0); 430 XFS_WANT_CORRUPTED_RETURN(i == 0);
346 if ((error = xfs_alloc_insert(cnt_cur, &i))) 431 if ((error = xfs_btree_insert(cnt_cur, &i)))
347 return error; 432 return error;
348 XFS_WANT_CORRUPTED_RETURN(i == 1); 433 XFS_WANT_CORRUPTED_RETURN(i == 1);
349 } 434 }
@@ -351,7 +436,7 @@ xfs_alloc_fixup_trees(
351 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 436 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
352 return error; 437 return error;
353 XFS_WANT_CORRUPTED_RETURN(i == 0); 438 XFS_WANT_CORRUPTED_RETURN(i == 0);
354 if ((error = xfs_alloc_insert(cnt_cur, &i))) 439 if ((error = xfs_btree_insert(cnt_cur, &i)))
355 return error; 440 return error;
356 XFS_WANT_CORRUPTED_RETURN(i == 1); 441 XFS_WANT_CORRUPTED_RETURN(i == 1);
357 } 442 }
@@ -362,7 +447,7 @@ xfs_alloc_fixup_trees(
362 /* 447 /*
363 * No remaining freespace, just delete the by-block tree entry. 448 * No remaining freespace, just delete the by-block tree entry.
364 */ 449 */
365 if ((error = xfs_alloc_delete(bno_cur, &i))) 450 if ((error = xfs_btree_delete(bno_cur, &i)))
366 return error; 451 return error;
367 XFS_WANT_CORRUPTED_RETURN(i == 1); 452 XFS_WANT_CORRUPTED_RETURN(i == 1);
368 } else { 453 } else {
@@ -379,7 +464,7 @@ xfs_alloc_fixup_trees(
379 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 464 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
380 return error; 465 return error;
381 XFS_WANT_CORRUPTED_RETURN(i == 0); 466 XFS_WANT_CORRUPTED_RETURN(i == 0);
382 if ((error = xfs_alloc_insert(bno_cur, &i))) 467 if ((error = xfs_btree_insert(bno_cur, &i)))
383 return error; 468 return error;
384 XFS_WANT_CORRUPTED_RETURN(i == 1); 469 XFS_WANT_CORRUPTED_RETURN(i == 1);
385 } 470 }
@@ -640,8 +725,8 @@ xfs_alloc_ag_vextent_exact(
640 /* 725 /*
641 * Allocate/initialize a cursor for the by-number freespace btree. 726 * Allocate/initialize a cursor for the by-number freespace btree.
642 */ 727 */
643 bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 728 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
644 args->agno, XFS_BTNUM_BNO, NULL, 0); 729 args->agno, XFS_BTNUM_BNO);
645 /* 730 /*
646 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 731 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
647 * Look for the closest free block <= bno, it must contain bno 732 * Look for the closest free block <= bno, it must contain bno
@@ -696,8 +781,8 @@ xfs_alloc_ag_vextent_exact(
696 * We are allocating agbno for rlen [agbno .. end] 781 * We are allocating agbno for rlen [agbno .. end]
697 * Allocate/initialize a cursor for the by-size btree. 782 * Allocate/initialize a cursor for the by-size btree.
698 */ 783 */
699 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 784 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
700 args->agno, XFS_BTNUM_CNT, NULL, 0); 785 args->agno, XFS_BTNUM_CNT);
701 ASSERT(args->agbno + args->len <= 786 ASSERT(args->agbno + args->len <=
702 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 787 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
703 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 788 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
@@ -759,8 +844,8 @@ xfs_alloc_ag_vextent_near(
759 /* 844 /*
760 * Get a cursor for the by-size btree. 845 * Get a cursor for the by-size btree.
761 */ 846 */
762 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 847 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
763 args->agno, XFS_BTNUM_CNT, NULL, 0); 848 args->agno, XFS_BTNUM_CNT);
764 ltlen = 0; 849 ltlen = 0;
765 bno_cur_lt = bno_cur_gt = NULL; 850 bno_cur_lt = bno_cur_gt = NULL;
766 /* 851 /*
@@ -818,7 +903,7 @@ xfs_alloc_ag_vextent_near(
818 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 903 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
819 if (ltlen >= args->minlen) 904 if (ltlen >= args->minlen)
820 break; 905 break;
821 if ((error = xfs_alloc_increment(cnt_cur, 0, &i))) 906 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
822 goto error0; 907 goto error0;
823 } while (i); 908 } while (i);
824 ASSERT(ltlen >= args->minlen); 909 ASSERT(ltlen >= args->minlen);
@@ -828,7 +913,7 @@ xfs_alloc_ag_vextent_near(
828 i = cnt_cur->bc_ptrs[0]; 913 i = cnt_cur->bc_ptrs[0];
829 for (j = 1, blen = 0, bdiff = 0; 914 for (j = 1, blen = 0, bdiff = 0;
830 !error && j && (blen < args->maxlen || bdiff > 0); 915 !error && j && (blen < args->maxlen || bdiff > 0);
831 error = xfs_alloc_increment(cnt_cur, 0, &j)) { 916 error = xfs_btree_increment(cnt_cur, 0, &j)) {
832 /* 917 /*
833 * For each entry, decide if it's better than 918 * For each entry, decide if it's better than
834 * the previous best entry. 919 * the previous best entry.
@@ -886,8 +971,8 @@ xfs_alloc_ag_vextent_near(
886 /* 971 /*
887 * Set up a cursor for the by-bno tree. 972 * Set up a cursor for the by-bno tree.
888 */ 973 */
889 bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, 974 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
890 args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0); 975 args->agbp, args->agno, XFS_BTNUM_BNO);
891 /* 976 /*
892 * Fix up the btree entries. 977 * Fix up the btree entries.
893 */ 978 */
@@ -914,8 +999,8 @@ xfs_alloc_ag_vextent_near(
914 /* 999 /*
915 * Allocate and initialize the cursor for the leftward search. 1000 * Allocate and initialize the cursor for the leftward search.
916 */ 1001 */
917 bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1002 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
918 args->agno, XFS_BTNUM_BNO, NULL, 0); 1003 args->agno, XFS_BTNUM_BNO);
919 /* 1004 /*
920 * Lookup <= bno to find the leftward search's starting point. 1005 * Lookup <= bno to find the leftward search's starting point.
921 */ 1006 */
@@ -938,7 +1023,7 @@ xfs_alloc_ag_vextent_near(
938 * Increment the cursor, so we will point at the entry just right 1023 * Increment the cursor, so we will point at the entry just right
939 * of the leftward entry if any, or to the leftmost entry. 1024 * of the leftward entry if any, or to the leftmost entry.
940 */ 1025 */
941 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) 1026 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
942 goto error0; 1027 goto error0;
943 if (!i) { 1028 if (!i) {
944 /* 1029 /*
@@ -961,7 +1046,7 @@ xfs_alloc_ag_vextent_near(
961 args->minlen, &ltbnoa, &ltlena); 1046 args->minlen, &ltbnoa, &ltlena);
962 if (ltlena >= args->minlen) 1047 if (ltlena >= args->minlen)
963 break; 1048 break;
964 if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) 1049 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
965 goto error0; 1050 goto error0;
966 if (!i) { 1051 if (!i) {
967 xfs_btree_del_cursor(bno_cur_lt, 1052 xfs_btree_del_cursor(bno_cur_lt,
@@ -977,7 +1062,7 @@ xfs_alloc_ag_vextent_near(
977 args->minlen, &gtbnoa, &gtlena); 1062 args->minlen, &gtbnoa, &gtlena);
978 if (gtlena >= args->minlen) 1063 if (gtlena >= args->minlen)
979 break; 1064 break;
980 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) 1065 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
981 goto error0; 1066 goto error0;
982 if (!i) { 1067 if (!i) {
983 xfs_btree_del_cursor(bno_cur_gt, 1068 xfs_btree_del_cursor(bno_cur_gt,
@@ -1066,7 +1151,7 @@ xfs_alloc_ag_vextent_near(
1066 /* 1151 /*
1067 * Fell off the right end. 1152 * Fell off the right end.
1068 */ 1153 */
1069 if ((error = xfs_alloc_increment( 1154 if ((error = xfs_btree_increment(
1070 bno_cur_gt, 0, &i))) 1155 bno_cur_gt, 0, &i)))
1071 goto error0; 1156 goto error0;
1072 if (!i) { 1157 if (!i) {
@@ -1162,7 +1247,7 @@ xfs_alloc_ag_vextent_near(
1162 /* 1247 /*
1163 * Fell off the left end. 1248 * Fell off the left end.
1164 */ 1249 */
1165 if ((error = xfs_alloc_decrement( 1250 if ((error = xfs_btree_decrement(
1166 bno_cur_lt, 0, &i))) 1251 bno_cur_lt, 0, &i)))
1167 goto error0; 1252 goto error0;
1168 if (!i) { 1253 if (!i) {
@@ -1267,8 +1352,8 @@ xfs_alloc_ag_vextent_size(
1267 /* 1352 /*
1268 * Allocate and initialize a cursor for the by-size btree. 1353 * Allocate and initialize a cursor for the by-size btree.
1269 */ 1354 */
1270 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1355 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1271 args->agno, XFS_BTNUM_CNT, NULL, 0); 1356 args->agno, XFS_BTNUM_CNT);
1272 bno_cur = NULL; 1357 bno_cur = NULL;
1273 /* 1358 /*
1274 * Look for an entry >= maxlen+alignment-1 blocks. 1359 * Look for an entry >= maxlen+alignment-1 blocks.
@@ -1321,7 +1406,7 @@ xfs_alloc_ag_vextent_size(
1321 bestflen = flen; 1406 bestflen = flen;
1322 bestfbno = fbno; 1407 bestfbno = fbno;
1323 for (;;) { 1408 for (;;) {
1324 if ((error = xfs_alloc_decrement(cnt_cur, 0, &i))) 1409 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1325 goto error0; 1410 goto error0;
1326 if (i == 0) 1411 if (i == 0)
1327 break; 1412 break;
@@ -1372,8 +1457,8 @@ xfs_alloc_ag_vextent_size(
1372 /* 1457 /*
1373 * Allocate and initialize a cursor for the by-block tree. 1458 * Allocate and initialize a cursor for the by-block tree.
1374 */ 1459 */
1375 bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1460 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1376 args->agno, XFS_BTNUM_BNO, NULL, 0); 1461 args->agno, XFS_BTNUM_BNO);
1377 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1462 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1378 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1463 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1379 goto error0; 1464 goto error0;
@@ -1416,7 +1501,7 @@ xfs_alloc_ag_vextent_small(
1416 xfs_extlen_t flen; 1501 xfs_extlen_t flen;
1417 int i; 1502 int i;
1418 1503
1419 if ((error = xfs_alloc_decrement(ccur, 0, &i))) 1504 if ((error = xfs_btree_decrement(ccur, 0, &i)))
1420 goto error0; 1505 goto error0;
1421 if (i) { 1506 if (i) {
1422 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1507 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
@@ -1515,8 +1600,7 @@ xfs_free_ag_extent(
1515 /* 1600 /*
1516 * Allocate and initialize a cursor for the by-block btree. 1601 * Allocate and initialize a cursor for the by-block btree.
1517 */ 1602 */
1518 bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL, 1603 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1519 0);
1520 cnt_cur = NULL; 1604 cnt_cur = NULL;
1521 /* 1605 /*
1522 * Look for a neighboring block on the left (lower block numbers) 1606 * Look for a neighboring block on the left (lower block numbers)
@@ -1549,7 +1633,7 @@ xfs_free_ag_extent(
1549 * Look for a neighboring block on the right (higher block numbers) 1633 * Look for a neighboring block on the right (higher block numbers)
1550 * that is contiguous with this space. 1634 * that is contiguous with this space.
1551 */ 1635 */
1552 if ((error = xfs_alloc_increment(bno_cur, 0, &haveright))) 1636 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1553 goto error0; 1637 goto error0;
1554 if (haveright) { 1638 if (haveright) {
1555 /* 1639 /*
@@ -1575,8 +1659,7 @@ xfs_free_ag_extent(
1575 /* 1659 /*
1576 * Now allocate and initialize a cursor for the by-size tree. 1660 * Now allocate and initialize a cursor for the by-size tree.
1577 */ 1661 */
1578 cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL, 1662 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1579 0);
1580 /* 1663 /*
1581 * Have both left and right contiguous neighbors. 1664 * Have both left and right contiguous neighbors.
1582 * Merge all three into a single free block. 1665 * Merge all three into a single free block.
@@ -1588,7 +1671,7 @@ xfs_free_ag_extent(
1588 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1671 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1589 goto error0; 1672 goto error0;
1590 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1673 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1591 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1674 if ((error = xfs_btree_delete(cnt_cur, &i)))
1592 goto error0; 1675 goto error0;
1593 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1676 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1594 /* 1677 /*
@@ -1597,19 +1680,19 @@ xfs_free_ag_extent(
1597 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1680 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1598 goto error0; 1681 goto error0;
1599 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1682 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1600 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1683 if ((error = xfs_btree_delete(cnt_cur, &i)))
1601 goto error0; 1684 goto error0;
1602 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1685 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1603 /* 1686 /*
1604 * Delete the old by-block entry for the right block. 1687 * Delete the old by-block entry for the right block.
1605 */ 1688 */
1606 if ((error = xfs_alloc_delete(bno_cur, &i))) 1689 if ((error = xfs_btree_delete(bno_cur, &i)))
1607 goto error0; 1690 goto error0;
1608 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1691 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1609 /* 1692 /*
1610 * Move the by-block cursor back to the left neighbor. 1693 * Move the by-block cursor back to the left neighbor.
1611 */ 1694 */
1612 if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) 1695 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1613 goto error0; 1696 goto error0;
1614 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1697 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1615#ifdef DEBUG 1698#ifdef DEBUG
@@ -1648,14 +1731,14 @@ xfs_free_ag_extent(
1648 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1731 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1649 goto error0; 1732 goto error0;
1650 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1733 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1651 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1734 if ((error = xfs_btree_delete(cnt_cur, &i)))
1652 goto error0; 1735 goto error0;
1653 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1736 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1654 /* 1737 /*
1655 * Back up the by-block cursor to the left neighbor, and 1738 * Back up the by-block cursor to the left neighbor, and
1656 * update its length. 1739 * update its length.
1657 */ 1740 */
1658 if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) 1741 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1659 goto error0; 1742 goto error0;
1660 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1743 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1661 nbno = ltbno; 1744 nbno = ltbno;
@@ -1674,7 +1757,7 @@ xfs_free_ag_extent(
1674 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1757 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1675 goto error0; 1758 goto error0;
1676 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1759 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1677 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1760 if ((error = xfs_btree_delete(cnt_cur, &i)))
1678 goto error0; 1761 goto error0;
1679 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1762 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1680 /* 1763 /*
@@ -1693,7 +1776,7 @@ xfs_free_ag_extent(
1693 else { 1776 else {
1694 nbno = bno; 1777 nbno = bno;
1695 nlen = len; 1778 nlen = len;
1696 if ((error = xfs_alloc_insert(bno_cur, &i))) 1779 if ((error = xfs_btree_insert(bno_cur, &i)))
1697 goto error0; 1780 goto error0;
1698 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1781 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1699 } 1782 }
@@ -1705,7 +1788,7 @@ xfs_free_ag_extent(
1705 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1788 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1706 goto error0; 1789 goto error0;
1707 XFS_WANT_CORRUPTED_GOTO(i == 0, error0); 1790 XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
1708 if ((error = xfs_alloc_insert(cnt_cur, &i))) 1791 if ((error = xfs_btree_insert(cnt_cur, &i)))
1709 goto error0; 1792 goto error0;
1710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1793 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1711 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1794 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -2150,51 +2233,83 @@ xfs_alloc_put_freelist(
2150 * Read in the allocation group header (free/alloc section). 2233 * Read in the allocation group header (free/alloc section).
2151 */ 2234 */
2152int /* error */ 2235int /* error */
2153xfs_alloc_read_agf( 2236xfs_read_agf(
2154 xfs_mount_t *mp, /* mount point structure */ 2237 struct xfs_mount *mp, /* mount point structure */
2155 xfs_trans_t *tp, /* transaction pointer */ 2238 struct xfs_trans *tp, /* transaction pointer */
2156 xfs_agnumber_t agno, /* allocation group number */ 2239 xfs_agnumber_t agno, /* allocation group number */
2157 int flags, /* XFS_ALLOC_FLAG_... */ 2240 int flags, /* XFS_BUF_ */
2158 xfs_buf_t **bpp) /* buffer for the ag freelist header */ 2241 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2159{ 2242{
2160 xfs_agf_t *agf; /* ag freelist header */ 2243 struct xfs_agf *agf; /* ag freelist header */
2161 int agf_ok; /* set if agf is consistent */ 2244 int agf_ok; /* set if agf is consistent */
2162 xfs_buf_t *bp; /* return value */
2163 xfs_perag_t *pag; /* per allocation group data */
2164 int error; 2245 int error;
2165 2246
2166 ASSERT(agno != NULLAGNUMBER); 2247 ASSERT(agno != NULLAGNUMBER);
2167 error = xfs_trans_read_buf( 2248 error = xfs_trans_read_buf(
2168 mp, tp, mp->m_ddev_targp, 2249 mp, tp, mp->m_ddev_targp,
2169 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 2250 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2170 XFS_FSS_TO_BB(mp, 1), 2251 XFS_FSS_TO_BB(mp, 1), flags, bpp);
2171 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
2172 &bp);
2173 if (error) 2252 if (error)
2174 return error; 2253 return error;
2175 ASSERT(!bp || !XFS_BUF_GETERROR(bp)); 2254 if (!*bpp)
2176 if (!bp) {
2177 *bpp = NULL;
2178 return 0; 2255 return 0;
2179 } 2256
2257 ASSERT(!XFS_BUF_GETERROR(*bpp));
2258 agf = XFS_BUF_TO_AGF(*bpp);
2259
2180 /* 2260 /*
2181 * Validate the magic number of the agf block. 2261 * Validate the magic number of the agf block.
2182 */ 2262 */
2183 agf = XFS_BUF_TO_AGF(bp);
2184 agf_ok = 2263 agf_ok =
2185 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && 2264 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
2186 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2265 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2187 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2266 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2188 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2267 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2189 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2268 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2190 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); 2269 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) &&
2270 be32_to_cpu(agf->agf_seqno) == agno;
2271 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
2272 agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
2273 be32_to_cpu(agf->agf_length);
2191 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, 2274 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2192 XFS_RANDOM_ALLOC_READ_AGF))) { 2275 XFS_RANDOM_ALLOC_READ_AGF))) {
2193 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", 2276 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
2194 XFS_ERRLEVEL_LOW, mp, agf); 2277 XFS_ERRLEVEL_LOW, mp, agf);
2195 xfs_trans_brelse(tp, bp); 2278 xfs_trans_brelse(tp, *bpp);
2196 return XFS_ERROR(EFSCORRUPTED); 2279 return XFS_ERROR(EFSCORRUPTED);
2197 } 2280 }
2281
2282 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
2283 return 0;
2284}
2285
2286/*
2287 * Read in the allocation group header (free/alloc section).
2288 */
2289int /* error */
2290xfs_alloc_read_agf(
2291 struct xfs_mount *mp, /* mount point structure */
2292 struct xfs_trans *tp, /* transaction pointer */
2293 xfs_agnumber_t agno, /* allocation group number */
2294 int flags, /* XFS_ALLOC_FLAG_... */
2295 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2296{
2297 struct xfs_agf *agf; /* ag freelist header */
2298 struct xfs_perag *pag; /* per allocation group data */
2299 int error;
2300
2301 ASSERT(agno != NULLAGNUMBER);
2302
2303 error = xfs_read_agf(mp, tp, agno,
2304 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0,
2305 bpp);
2306 if (error)
2307 return error;
2308 if (!*bpp)
2309 return 0;
2310 ASSERT(!XFS_BUF_GETERROR(*bpp));
2311
2312 agf = XFS_BUF_TO_AGF(*bpp);
2198 pag = &mp->m_perag[agno]; 2313 pag = &mp->m_perag[agno];
2199 if (!pag->pagf_init) { 2314 if (!pag->pagf_init) {
2200 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2315 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
@@ -2213,6 +2328,7 @@ xfs_alloc_read_agf(
2213#ifdef DEBUG 2328#ifdef DEBUG
2214 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2329 else if (!XFS_FORCED_SHUTDOWN(mp)) {
2215 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 2330 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2331 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2216 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 2332 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2217 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 2333 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2218 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 2334 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
@@ -2221,8 +2337,6 @@ xfs_alloc_read_agf(
2221 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2337 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2222 } 2338 }
2223#endif 2339#endif
2224 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
2225 *bpp = bp;
2226 return 0; 2340 return 0;
2227} 2341}
2228 2342
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5aec15d0651..588172796f7 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -121,6 +121,19 @@ extern ktrace_t *xfs_alloc_trace_buf;
121#define XFS_ALLOC_KTRACE_BUSYSEARCH 6 121#define XFS_ALLOC_KTRACE_BUSYSEARCH 6
122#endif 122#endif
123 123
124void
125xfs_alloc_mark_busy(xfs_trans_t *tp,
126 xfs_agnumber_t agno,
127 xfs_agblock_t bno,
128 xfs_extlen_t len);
129
130void
131xfs_alloc_clear_busy(xfs_trans_t *tp,
132 xfs_agnumber_t ag,
133 int idx);
134
135#endif /* __KERNEL__ */
136
124/* 137/*
125 * Compute and fill in value of m_ag_maxlevels. 138 * Compute and fill in value of m_ag_maxlevels.
126 */ 139 */
@@ -196,18 +209,4 @@ xfs_free_extent(
196 xfs_fsblock_t bno, /* starting block number of extent */ 209 xfs_fsblock_t bno, /* starting block number of extent */
197 xfs_extlen_t len); /* length of extent */ 210 xfs_extlen_t len); /* length of extent */
198 211
199void
200xfs_alloc_mark_busy(xfs_trans_t *tp,
201 xfs_agnumber_t agno,
202 xfs_agblock_t bno,
203 xfs_extlen_t len);
204
205void
206xfs_alloc_clear_busy(xfs_trans_t *tp,
207 xfs_agnumber_t ag,
208 int idx);
209
210
211#endif /* __KERNEL__ */
212
213#endif /* __XFS_ALLOC_H__ */ 212#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3ce2645508a..733cb75a8c5 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -35,2177 +35,464 @@
35#include "xfs_dinode.h" 35#include "xfs_dinode.h"
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_btree.h" 37#include "xfs_btree.h"
38#include "xfs_btree_trace.h"
38#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 40#include "xfs_alloc.h"
40#include "xfs_error.h" 41#include "xfs_error.h"
41 42
42/*
43 * Prototypes for internal functions.
44 */
45 43
46STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int); 44STATIC struct xfs_btree_cur *
47STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); 45xfs_allocbt_dup_cursor(
48STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); 46 struct xfs_btree_cur *cur)
49STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); 47{
50STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *); 48 return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
51STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); 49 cur->bc_private.a.agbp, cur->bc_private.a.agno,
52STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *); 50 cur->bc_btnum);
53STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, 51}
54 xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
55STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int);
56 52
57/* 53STATIC void
58 * Internal functions. 54xfs_allocbt_set_root(
59 */ 55 struct xfs_btree_cur *cur,
56 union xfs_btree_ptr *ptr,
57 int inc)
58{
59 struct xfs_buf *agbp = cur->bc_private.a.agbp;
60 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
61 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
62 int btnum = cur->bc_btnum;
60 63
61/* 64 ASSERT(ptr->s != 0);
62 * Single level of the xfs_alloc_delete record deletion routine. 65
63 * Delete record pointed to by cur/level. 66 agf->agf_roots[btnum] = ptr->s;
64 * Remove the record from its block then rebalance the tree. 67 be32_add_cpu(&agf->agf_levels[btnum], inc);
65 * Return 0 for error, 1 for done, 2 to go on to the next level. 68 cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc;
66 */ 69
67STATIC int /* error */ 70 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
68xfs_alloc_delrec( 71}
69 xfs_btree_cur_t *cur, /* btree cursor */ 72
70 int level, /* level removing record from */ 73STATIC int
71 int *stat) /* fail/done/go-on */ 74xfs_allocbt_alloc_block(
75 struct xfs_btree_cur *cur,
76 union xfs_btree_ptr *start,
77 union xfs_btree_ptr *new,
78 int length,
79 int *stat)
72{ 80{
73 xfs_agf_t *agf; /* allocation group freelist header */ 81 int error;
74 xfs_alloc_block_t *block; /* btree block record/key lives in */ 82 xfs_agblock_t bno;
75 xfs_agblock_t bno; /* btree block number */
76 xfs_buf_t *bp; /* buffer for block */
77 int error; /* error return value */
78 int i; /* loop index */
79 xfs_alloc_key_t key; /* kp points here if block is level 0 */
80 xfs_agblock_t lbno; /* left block's block number */
81 xfs_buf_t *lbp; /* left block's buffer pointer */
82 xfs_alloc_block_t *left; /* left btree block */
83 xfs_alloc_key_t *lkp=NULL; /* left block key pointer */
84 xfs_alloc_ptr_t *lpp=NULL; /* left block address pointer */
85 int lrecs=0; /* number of records in left block */
86 xfs_alloc_rec_t *lrp; /* left block record pointer */
87 xfs_mount_t *mp; /* mount structure */
88 int ptr; /* index in btree block for this rec */
89 xfs_agblock_t rbno; /* right block's block number */
90 xfs_buf_t *rbp; /* right block's buffer pointer */
91 xfs_alloc_block_t *right; /* right btree block */
92 xfs_alloc_key_t *rkp; /* right block key pointer */
93 xfs_alloc_ptr_t *rpp; /* right block address pointer */
94 int rrecs=0; /* number of records in right block */
95 int numrecs;
96 xfs_alloc_rec_t *rrp; /* right block record pointer */
97 xfs_btree_cur_t *tcur; /* temporary btree cursor */
98 83
99 /* 84 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
100 * Get the index of the entry being deleted, check for nothing there. 85
101 */ 86 /* Allocate the new block from the freelist. If we can't, give up. */
102 ptr = cur->bc_ptrs[level]; 87 error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
103 if (ptr == 0) { 88 &bno, 1);
104 *stat = 0; 89 if (error) {
105 return 0; 90 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
106 }
107 /*
108 * Get the buffer & block containing the record or key/ptr.
109 */
110 bp = cur->bc_bufs[level];
111 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
112#ifdef DEBUG
113 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
114 return error; 91 return error;
115#endif 92 }
116 /* 93
117 * Fail if we're off the end of the block. 94 if (bno == NULLAGBLOCK) {
118 */ 95 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
119 numrecs = be16_to_cpu(block->bb_numrecs);
120 if (ptr > numrecs) {
121 *stat = 0; 96 *stat = 0;
122 return 0; 97 return 0;
123 } 98 }
124 XFS_STATS_INC(xs_abt_delrec);
125 /*
126 * It's a nonleaf. Excise the key and ptr being deleted, by
127 * sliding the entries past them down one.
128 * Log the changed areas of the block.
129 */
130 if (level > 0) {
131 lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
132 lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
133#ifdef DEBUG
134 for (i = ptr; i < numrecs; i++) {
135 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
136 return error;
137 }
138#endif
139 if (ptr < numrecs) {
140 memmove(&lkp[ptr - 1], &lkp[ptr],
141 (numrecs - ptr) * sizeof(*lkp));
142 memmove(&lpp[ptr - 1], &lpp[ptr],
143 (numrecs - ptr) * sizeof(*lpp));
144 xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1);
145 xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1);
146 }
147 }
148 /*
149 * It's a leaf. Excise the record being deleted, by sliding the
150 * entries past it down one. Log the changed areas of the block.
151 */
152 else {
153 lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
154 if (ptr < numrecs) {
155 memmove(&lrp[ptr - 1], &lrp[ptr],
156 (numrecs - ptr) * sizeof(*lrp));
157 xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1);
158 }
159 /*
160 * If it's the first record in the block, we'll need a key
161 * structure to pass up to the next level (updkey).
162 */
163 if (ptr == 1) {
164 key.ar_startblock = lrp->ar_startblock;
165 key.ar_blockcount = lrp->ar_blockcount;
166 lkp = &key;
167 }
168 }
169 /*
170 * Decrement and log the number of entries in the block.
171 */
172 numrecs--;
173 block->bb_numrecs = cpu_to_be16(numrecs);
174 xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
175 /*
176 * See if the longest free extent in the allocation group was
177 * changed by this operation. True if it's the by-size btree, and
178 * this is the leaf level, and there is no right sibling block,
179 * and this was the last record.
180 */
181 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
182 mp = cur->bc_mp;
183 99
184 if (level == 0 && 100 xfs_trans_agbtree_delta(cur->bc_tp, 1);
185 cur->bc_btnum == XFS_BTNUM_CNT && 101 new->s = cpu_to_be32(bno);
186 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
187 ptr > numrecs) {
188 ASSERT(ptr == numrecs + 1);
189 /*
190 * There are still records in the block. Grab the size
191 * from the last one.
192 */
193 if (numrecs) {
194 rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur);
195 agf->agf_longest = rrp->ar_blockcount;
196 }
197 /*
198 * No free extents left.
199 */
200 else
201 agf->agf_longest = 0;
202 mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest =
203 be32_to_cpu(agf->agf_longest);
204 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
205 XFS_AGF_LONGEST);
206 }
207 /*
208 * Is this the root level? If so, we're almost done.
209 */
210 if (level == cur->bc_nlevels - 1) {
211 /*
212 * If this is the root level,
213 * and there's only one entry left,
214 * and it's NOT the leaf level,
215 * then we can get rid of this level.
216 */
217 if (numrecs == 1 && level > 0) {
218 /*
219 * lpp is still set to the first pointer in the block.
220 * Make it the new root of the btree.
221 */
222 bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
223 agf->agf_roots[cur->bc_btnum] = *lpp;
224 be32_add_cpu(&agf->agf_levels[cur->bc_btnum], -1);
225 mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
226 /*
227 * Put this buffer/block on the ag's freelist.
228 */
229 error = xfs_alloc_put_freelist(cur->bc_tp,
230 cur->bc_private.a.agbp, NULL, bno, 1);
231 if (error)
232 return error;
233 /*
234 * Since blocks move to the free list without the
235 * coordination used in xfs_bmap_finish, we can't allow
236 * block to be available for reallocation and
237 * non-transaction writing (user data) until we know
238 * that the transaction that moved it to the free list
239 * is permanently on disk. We track the blocks by
240 * declaring these blocks as "busy"; the busy list is
241 * maintained on a per-ag basis and each transaction
242 * records which entries should be removed when the
243 * iclog commits to disk. If a busy block is
244 * allocated, the iclog is pushed up to the LSN
245 * that freed the block.
246 */
247 xfs_alloc_mark_busy(cur->bc_tp,
248 be32_to_cpu(agf->agf_seqno), bno, 1);
249 102
250 xfs_trans_agbtree_delta(cur->bc_tp, -1); 103 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
251 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, 104 *stat = 1;
252 XFS_AGF_ROOTS | XFS_AGF_LEVELS); 105 return 0;
253 /* 106}
254 * Update the cursor so there's one fewer level.
255 */
256 xfs_btree_setbuf(cur, level, NULL);
257 cur->bc_nlevels--;
258 } else if (level > 0 &&
259 (error = xfs_alloc_decrement(cur, level, &i)))
260 return error;
261 *stat = 1;
262 return 0;
263 }
264 /*
265 * If we deleted the leftmost entry in the block, update the
266 * key values above us in the tree.
267 */
268 if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
269 return error;
270 /*
271 * If the number of records remaining in the block is at least
272 * the minimum, we're done.
273 */
274 if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
275 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
276 return error;
277 *stat = 1;
278 return 0;
279 }
280 /*
281 * Otherwise, we have to move some records around to keep the
282 * tree balanced. Look at the left and right sibling blocks to
283 * see if we can re-balance by moving only one record.
284 */
285 rbno = be32_to_cpu(block->bb_rightsib);
286 lbno = be32_to_cpu(block->bb_leftsib);
287 bno = NULLAGBLOCK;
288 ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
289 /*
290 * Duplicate the cursor so our btree manipulations here won't
291 * disrupt the next level up.
292 */
293 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
294 return error;
295 /*
296 * If there's a right sibling, see if it's ok to shift an entry
297 * out of it.
298 */
299 if (rbno != NULLAGBLOCK) {
300 /*
301 * Move the temp cursor to the last entry in the next block.
302 * Actually any entry but the first would suffice.
303 */
304 i = xfs_btree_lastrec(tcur, level);
305 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
306 if ((error = xfs_alloc_increment(tcur, level, &i)))
307 goto error0;
308 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
309 i = xfs_btree_lastrec(tcur, level);
310 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
311 /*
312 * Grab a pointer to the block.
313 */
314 rbp = tcur->bc_bufs[level];
315 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
316#ifdef DEBUG
317 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
318 goto error0;
319#endif
320 /*
321 * Grab the current block number, for future use.
322 */
323 bno = be32_to_cpu(right->bb_leftsib);
324 /*
325 * If right block is full enough so that removing one entry
326 * won't make it too empty, and left-shifting an entry out
327 * of right to us works, we're done.
328 */
329 if (be16_to_cpu(right->bb_numrecs) - 1 >=
330 XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
331 if ((error = xfs_alloc_lshift(tcur, level, &i)))
332 goto error0;
333 if (i) {
334 ASSERT(be16_to_cpu(block->bb_numrecs) >=
335 XFS_ALLOC_BLOCK_MINRECS(level, cur));
336 xfs_btree_del_cursor(tcur,
337 XFS_BTREE_NOERROR);
338 if (level > 0 &&
339 (error = xfs_alloc_decrement(cur, level,
340 &i)))
341 return error;
342 *stat = 1;
343 return 0;
344 }
345 }
346 /*
347 * Otherwise, grab the number of records in right for
348 * future reference, and fix up the temp cursor to point
349 * to our block again (last record).
350 */
351 rrecs = be16_to_cpu(right->bb_numrecs);
352 if (lbno != NULLAGBLOCK) {
353 i = xfs_btree_firstrec(tcur, level);
354 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
355 if ((error = xfs_alloc_decrement(tcur, level, &i)))
356 goto error0;
357 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
358 }
359 }
360 /*
361 * If there's a left sibling, see if it's ok to shift an entry
362 * out of it.
363 */
364 if (lbno != NULLAGBLOCK) {
365 /*
366 * Move the temp cursor to the first entry in the
367 * previous block.
368 */
369 i = xfs_btree_firstrec(tcur, level);
370 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
371 if ((error = xfs_alloc_decrement(tcur, level, &i)))
372 goto error0;
373 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
374 xfs_btree_firstrec(tcur, level);
375 /*
376 * Grab a pointer to the block.
377 */
378 lbp = tcur->bc_bufs[level];
379 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
380#ifdef DEBUG
381 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
382 goto error0;
383#endif
384 /*
385 * Grab the current block number, for future use.
386 */
387 bno = be32_to_cpu(left->bb_rightsib);
388 /*
389 * If left block is full enough so that removing one entry
390 * won't make it too empty, and right-shifting an entry out
391 * of left to us works, we're done.
392 */
393 if (be16_to_cpu(left->bb_numrecs) - 1 >=
394 XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
395 if ((error = xfs_alloc_rshift(tcur, level, &i)))
396 goto error0;
397 if (i) {
398 ASSERT(be16_to_cpu(block->bb_numrecs) >=
399 XFS_ALLOC_BLOCK_MINRECS(level, cur));
400 xfs_btree_del_cursor(tcur,
401 XFS_BTREE_NOERROR);
402 if (level == 0)
403 cur->bc_ptrs[0]++;
404 *stat = 1;
405 return 0;
406 }
407 }
408 /*
409 * Otherwise, grab the number of records in right for
410 * future reference.
411 */
412 lrecs = be16_to_cpu(left->bb_numrecs);
413 }
414 /*
415 * Delete the temp cursor, we're done with it.
416 */
417 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
418 /*
419 * If here, we need to do a join to keep the tree balanced.
420 */
421 ASSERT(bno != NULLAGBLOCK);
422 /*
423 * See if we can join with the left neighbor block.
424 */
425 if (lbno != NULLAGBLOCK &&
426 lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
427 /*
428 * Set "right" to be the starting block,
429 * "left" to be the left neighbor.
430 */
431 rbno = bno;
432 right = block;
433 rrecs = be16_to_cpu(right->bb_numrecs);
434 rbp = bp;
435 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
436 cur->bc_private.a.agno, lbno, 0, &lbp,
437 XFS_ALLOC_BTREE_REF)))
438 return error;
439 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
440 lrecs = be16_to_cpu(left->bb_numrecs);
441 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
442 return error;
443 }
444 /*
445 * If that won't work, see if we can join with the right neighbor block.
446 */
447 else if (rbno != NULLAGBLOCK &&
448 rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
449 /*
450 * Set "left" to be the starting block,
451 * "right" to be the right neighbor.
452 */
453 lbno = bno;
454 left = block;
455 lrecs = be16_to_cpu(left->bb_numrecs);
456 lbp = bp;
457 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
458 cur->bc_private.a.agno, rbno, 0, &rbp,
459 XFS_ALLOC_BTREE_REF)))
460 return error;
461 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
462 rrecs = be16_to_cpu(right->bb_numrecs);
463 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
464 return error;
465 }
466 /*
467 * Otherwise, we can't fix the imbalance.
468 * Just return. This is probably a logic error, but it's not fatal.
469 */
470 else {
471 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
472 return error;
473 *stat = 1;
474 return 0;
475 }
476 /*
477 * We're now going to join "left" and "right" by moving all the stuff
478 * in "right" to "left" and deleting "right".
479 */
480 if (level > 0) {
481 /*
482 * It's a non-leaf. Move keys and pointers.
483 */
484 lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur);
485 lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur);
486 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
487 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
488#ifdef DEBUG
489 for (i = 0; i < rrecs; i++) {
490 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
491 return error;
492 }
493#endif
494 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
495 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
496 xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
497 xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
498 } else {
499 /*
500 * It's a leaf. Move records.
501 */
502 lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur);
503 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
504 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
505 xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
506 }
507 /*
508 * If we joined with the left neighbor, set the buffer in the
509 * cursor to the left block, and fix up the index.
510 */
511 if (bp != lbp) {
512 xfs_btree_setbuf(cur, level, lbp);
513 cur->bc_ptrs[level] += lrecs;
514 }
515 /*
516 * If we joined with the right neighbor and there's a level above
517 * us, increment the cursor at that level.
518 */
519 else if (level + 1 < cur->bc_nlevels &&
520 (error = xfs_alloc_increment(cur, level + 1, &i)))
521 return error;
522 /*
523 * Fix up the number of records in the surviving block.
524 */
525 lrecs += rrecs;
526 left->bb_numrecs = cpu_to_be16(lrecs);
527 /*
528 * Fix up the right block pointer in the surviving block, and log it.
529 */
530 left->bb_rightsib = right->bb_rightsib;
531 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
532 /*
533 * If there is a right sibling now, make it point to the
534 * remaining block.
535 */
536 if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
537 xfs_alloc_block_t *rrblock;
538 xfs_buf_t *rrbp;
539 107
540 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 108STATIC int
541 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, 109xfs_allocbt_free_block(
542 &rrbp, XFS_ALLOC_BTREE_REF))) 110 struct xfs_btree_cur *cur,
543 return error; 111 struct xfs_buf *bp)
544 rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); 112{
545 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) 113 struct xfs_buf *agbp = cur->bc_private.a.agbp;
546 return error; 114 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
547 rrblock->bb_leftsib = cpu_to_be32(lbno); 115 xfs_agblock_t bno;
548 xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); 116 int error;
549 } 117
550 /* 118 bno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(bp));
551 * Free the deleting block by putting it on the freelist. 119 error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
552 */
553 error = xfs_alloc_put_freelist(cur->bc_tp,
554 cur->bc_private.a.agbp, NULL, rbno, 1);
555 if (error) 120 if (error)
556 return error; 121 return error;
122
557 /* 123 /*
558 * Since blocks move to the free list without the coordination 124 * Since blocks move to the free list without the coordination used in
559 * used in xfs_bmap_finish, we can't allow block to be available 125 * xfs_bmap_finish, we can't allow block to be available for
560 * for reallocation and non-transaction writing (user data) 126 * reallocation and non-transaction writing (user data) until we know
561 * until we know that the transaction that moved it to the free 127 * that the transaction that moved it to the free list is permanently
562 * list is permanently on disk. We track the blocks by declaring 128 * on disk. We track the blocks by declaring these blocks as "busy";
563 * these blocks as "busy"; the busy list is maintained on a 129 * the busy list is maintained on a per-ag basis and each transaction
564 * per-ag basis and each transaction records which entries 130 * records which entries should be removed when the iclog commits to
565 * should be removed when the iclog commits to disk. If a 131 * disk. If a busy block is allocated, the iclog is pushed up to the
566 * busy block is allocated, the iclog is pushed up to the
567 * LSN that freed the block. 132 * LSN that freed the block.
568 */ 133 */
569 xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); 134 xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
570 xfs_trans_agbtree_delta(cur->bc_tp, -1); 135 xfs_trans_agbtree_delta(cur->bc_tp, -1);
571
572 /*
573 * Adjust the current level's cursor so that we're left referring
574 * to the right node, after we're done.
575 * If this leaves the ptr value 0 our caller will fix it up.
576 */
577 if (level > 0)
578 cur->bc_ptrs[level]--;
579 /*
580 * Return value means the next level up has something to do.
581 */
582 *stat = 2;
583 return 0; 136 return 0;
584
585error0:
586 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
587 return error;
588} 137}
589 138
590/* 139/*
591 * Insert one record/level. Return information to the caller 140 * Update the longest extent in the AGF
592 * allowing the next level up to proceed if necessary.
593 */ 141 */
594STATIC int /* error */ 142STATIC void
595xfs_alloc_insrec( 143xfs_allocbt_update_lastrec(
596 xfs_btree_cur_t *cur, /* btree cursor */ 144 struct xfs_btree_cur *cur,
597 int level, /* level to insert record at */ 145 struct xfs_btree_block *block,
598 xfs_agblock_t *bnop, /* i/o: block number inserted */ 146 union xfs_btree_rec *rec,
599 xfs_alloc_rec_t *recp, /* i/o: record data inserted */ 147 int ptr,
600 xfs_btree_cur_t **curp, /* output: new cursor replacing cur */ 148 int reason)
601 int *stat) /* output: success/failure */
602{ 149{
603 xfs_agf_t *agf; /* allocation group freelist header */ 150 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
604 xfs_alloc_block_t *block; /* btree block record/key lives in */ 151 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
605 xfs_buf_t *bp; /* buffer for block */ 152 __be32 len;
606 int error; /* error return value */
607 int i; /* loop index */
608 xfs_alloc_key_t key; /* key value being inserted */
609 xfs_alloc_key_t *kp; /* pointer to btree keys */
610 xfs_agblock_t nbno; /* block number of allocated block */
611 xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
612 xfs_alloc_key_t nkey; /* new key value, from split */
613 xfs_alloc_rec_t nrec; /* new record value, for caller */
614 int numrecs; 153 int numrecs;
615 int optr; /* old ptr value */
616 xfs_alloc_ptr_t *pp; /* pointer to btree addresses */
617 int ptr; /* index in btree block for this rec */
618 xfs_alloc_rec_t *rp; /* pointer to btree records */
619 154
620 ASSERT(be32_to_cpu(recp->ar_blockcount) > 0); 155 ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
156
157 switch (reason) {
158 case LASTREC_UPDATE:
159 /*
160 * If this is the last leaf block and it's the last record,
161 * then update the size of the longest extent in the AG.
162 */
163 if (ptr != xfs_btree_get_numrecs(block))
164 return;
165 len = rec->alloc.ar_blockcount;
166 break;
167 case LASTREC_INSREC:
168 if (be32_to_cpu(rec->alloc.ar_blockcount) <=
169 be32_to_cpu(agf->agf_longest))
170 return;
171 len = rec->alloc.ar_blockcount;
172 break;
173 case LASTREC_DELREC:
174 numrecs = xfs_btree_get_numrecs(block);
175 if (ptr <= numrecs)
176 return;
177 ASSERT(ptr == numrecs + 1);
621 178
622 /* 179 if (numrecs) {
623 * GCC doesn't understand the (arguably complex) control flow in 180 xfs_alloc_rec_t *rrp;
624 * this function and complains about uninitialized structure fields
625 * without this.
626 */
627 memset(&nrec, 0, sizeof(nrec));
628 181
629 /* 182 rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
630 * If we made it to the root level, allocate a new root block 183 len = rrp->ar_blockcount;
631 * and we're done.
632 */
633 if (level >= cur->bc_nlevels) {
634 XFS_STATS_INC(xs_abt_insrec);
635 if ((error = xfs_alloc_newroot(cur, &i)))
636 return error;
637 *bnop = NULLAGBLOCK;
638 *stat = i;
639 return 0;
640 }
641 /*
642 * Make a key out of the record data to be inserted, and save it.
643 */
644 key.ar_startblock = recp->ar_startblock;
645 key.ar_blockcount = recp->ar_blockcount;
646 optr = ptr = cur->bc_ptrs[level];
647 /*
648 * If we're off the left edge, return failure.
649 */
650 if (ptr == 0) {
651 *stat = 0;
652 return 0;
653 }
654 XFS_STATS_INC(xs_abt_insrec);
655 /*
656 * Get pointers to the btree buffer and block.
657 */
658 bp = cur->bc_bufs[level];
659 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
660 numrecs = be16_to_cpu(block->bb_numrecs);
661#ifdef DEBUG
662 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
663 return error;
664 /*
665 * Check that the new entry is being inserted in the right place.
666 */
667 if (ptr <= numrecs) {
668 if (level == 0) {
669 rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
670 xfs_btree_check_rec(cur->bc_btnum, recp, rp);
671 } else { 184 } else {
672 kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); 185 len = 0;
673 xfs_btree_check_key(cur->bc_btnum, &key, kp);
674 }
675 }
676#endif
677 nbno = NULLAGBLOCK;
678 ncur = NULL;
679 /*
680 * If the block is full, we can't insert the new entry until we
681 * make the block un-full.
682 */
683 if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
684 /*
685 * First, try shifting an entry to the right neighbor.
686 */
687 if ((error = xfs_alloc_rshift(cur, level, &i)))
688 return error;
689 if (i) {
690 /* nothing */
691 }
692 /*
693 * Next, try shifting an entry to the left neighbor.
694 */
695 else {
696 if ((error = xfs_alloc_lshift(cur, level, &i)))
697 return error;
698 if (i)
699 optr = ptr = cur->bc_ptrs[level];
700 else {
701 /*
702 * Next, try splitting the current block in
703 * half. If this works we have to re-set our
704 * variables because we could be in a
705 * different block now.
706 */
707 if ((error = xfs_alloc_split(cur, level, &nbno,
708 &nkey, &ncur, &i)))
709 return error;
710 if (i) {
711 bp = cur->bc_bufs[level];
712 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
713#ifdef DEBUG
714 if ((error =
715 xfs_btree_check_sblock(cur,
716 block, level, bp)))
717 return error;
718#endif
719 ptr = cur->bc_ptrs[level];
720 nrec.ar_startblock = nkey.ar_startblock;
721 nrec.ar_blockcount = nkey.ar_blockcount;
722 }
723 /*
724 * Otherwise the insert fails.
725 */
726 else {
727 *stat = 0;
728 return 0;
729 }
730 }
731 }
732 }
733 /*
734 * At this point we know there's room for our new entry in the block
735 * we're pointing at.
736 */
737 numrecs = be16_to_cpu(block->bb_numrecs);
738 if (level > 0) {
739 /*
740 * It's a non-leaf entry. Make a hole for the new data
741 * in the key and ptr regions of the block.
742 */
743 kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
744 pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
745#ifdef DEBUG
746 for (i = numrecs; i >= ptr; i--) {
747 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
748 return error;
749 } 186 }
750#endif
751 memmove(&kp[ptr], &kp[ptr - 1],
752 (numrecs - ptr + 1) * sizeof(*kp));
753 memmove(&pp[ptr], &pp[ptr - 1],
754 (numrecs - ptr + 1) * sizeof(*pp));
755#ifdef DEBUG
756 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
757 return error;
758#endif
759 /*
760 * Now stuff the new data in, bump numrecs and log the new data.
761 */
762 kp[ptr - 1] = key;
763 pp[ptr - 1] = cpu_to_be32(*bnop);
764 numrecs++;
765 block->bb_numrecs = cpu_to_be16(numrecs);
766 xfs_alloc_log_keys(cur, bp, ptr, numrecs);
767 xfs_alloc_log_ptrs(cur, bp, ptr, numrecs);
768#ifdef DEBUG
769 if (ptr < numrecs)
770 xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
771 kp + ptr);
772#endif
773 } else {
774 /*
775 * It's a leaf entry. Make a hole for the new record.
776 */
777 rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
778 memmove(&rp[ptr], &rp[ptr - 1],
779 (numrecs - ptr + 1) * sizeof(*rp));
780 /*
781 * Now stuff the new record in, bump numrecs
782 * and log the new data.
783 */
784 rp[ptr - 1] = *recp;
785 numrecs++;
786 block->bb_numrecs = cpu_to_be16(numrecs);
787 xfs_alloc_log_recs(cur, bp, ptr, numrecs);
788#ifdef DEBUG
789 if (ptr < numrecs)
790 xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
791 rp + ptr);
792#endif
793 }
794 /*
795 * Log the new number of records in the btree header.
796 */
797 xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
798 /*
799 * If we inserted at the start of a block, update the parents' keys.
800 */
801 if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
802 return error;
803 /*
804 * Look to see if the longest extent in the allocation group
805 * needs to be updated.
806 */
807 187
808 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); 188 break;
809 if (level == 0 && 189 default:
810 cur->bc_btnum == XFS_BTNUM_CNT && 190 ASSERT(0);
811 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && 191 return;
812 be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) {
813 /*
814 * If this is a leaf in the by-size btree and there
815 * is no right sibling block and this block is bigger
816 * than the previous longest block, update it.
817 */
818 agf->agf_longest = recp->ar_blockcount;
819 cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest
820 = be32_to_cpu(recp->ar_blockcount);
821 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
822 XFS_AGF_LONGEST);
823 } 192 }
824 /* 193
825 * Return the new block number, if any. 194 agf->agf_longest = len;
826 * If there is one, give back a record value and a cursor too. 195 cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len);
827 */ 196 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
828 *bnop = nbno;
829 if (nbno != NULLAGBLOCK) {
830 *recp = nrec;
831 *curp = ncur;
832 }
833 *stat = 1;
834 return 0;
835} 197}
836 198
837/* 199STATIC int
838 * Log header fields from a btree block. 200xfs_allocbt_get_minrecs(
839 */ 201 struct xfs_btree_cur *cur,
840STATIC void 202 int level)
841xfs_alloc_log_block(
842 xfs_trans_t *tp, /* transaction pointer */
843 xfs_buf_t *bp, /* buffer containing btree block */
844 int fields) /* mask of fields: XFS_BB_... */
845{ 203{
846 int first; /* first byte offset logged */ 204 return cur->bc_mp->m_alloc_mnr[level != 0];
847 int last; /* last byte offset logged */ 205}
848 static const short offsets[] = { /* table of offsets */
849 offsetof(xfs_alloc_block_t, bb_magic),
850 offsetof(xfs_alloc_block_t, bb_level),
851 offsetof(xfs_alloc_block_t, bb_numrecs),
852 offsetof(xfs_alloc_block_t, bb_leftsib),
853 offsetof(xfs_alloc_block_t, bb_rightsib),
854 sizeof(xfs_alloc_block_t)
855 };
856 206
857 xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); 207STATIC int
858 xfs_trans_log_buf(tp, bp, first, last); 208xfs_allocbt_get_maxrecs(
209 struct xfs_btree_cur *cur,
210 int level)
211{
212 return cur->bc_mp->m_alloc_mxr[level != 0];
859} 213}
860 214
861/*
862 * Log keys from a btree block (nonleaf).
863 */
864STATIC void 215STATIC void
865xfs_alloc_log_keys( 216xfs_allocbt_init_key_from_rec(
866 xfs_btree_cur_t *cur, /* btree cursor */ 217 union xfs_btree_key *key,
867 xfs_buf_t *bp, /* buffer containing btree block */ 218 union xfs_btree_rec *rec)
868 int kfirst, /* index of first key to log */
869 int klast) /* index of last key to log */
870{ 219{
871 xfs_alloc_block_t *block; /* btree block to log from */ 220 ASSERT(rec->alloc.ar_startblock != 0);
872 int first; /* first byte offset logged */
873 xfs_alloc_key_t *kp; /* key pointer in btree block */
874 int last; /* last byte offset logged */
875 221
876 block = XFS_BUF_TO_ALLOC_BLOCK(bp); 222 key->alloc.ar_startblock = rec->alloc.ar_startblock;
877 kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); 223 key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
878 first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
879 last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
880 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
881} 224}
882 225
883/*
884 * Log block pointer fields from a btree block (nonleaf).
885 */
886STATIC void 226STATIC void
887xfs_alloc_log_ptrs( 227xfs_allocbt_init_rec_from_key(
888 xfs_btree_cur_t *cur, /* btree cursor */ 228 union xfs_btree_key *key,
889 xfs_buf_t *bp, /* buffer containing btree block */ 229 union xfs_btree_rec *rec)
890 int pfirst, /* index of first pointer to log */
891 int plast) /* index of last pointer to log */
892{ 230{
893 xfs_alloc_block_t *block; /* btree block to log from */ 231 ASSERT(key->alloc.ar_startblock != 0);
894 int first; /* first byte offset logged */
895 int last; /* last byte offset logged */
896 xfs_alloc_ptr_t *pp; /* block-pointer pointer in btree blk */
897 232
898 block = XFS_BUF_TO_ALLOC_BLOCK(bp); 233 rec->alloc.ar_startblock = key->alloc.ar_startblock;
899 pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); 234 rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
900 first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
901 last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
902 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
903} 235}
904 236
905/*
906 * Log records from a btree block (leaf).
907 */
908STATIC void 237STATIC void
909xfs_alloc_log_recs( 238xfs_allocbt_init_rec_from_cur(
910 xfs_btree_cur_t *cur, /* btree cursor */ 239 struct xfs_btree_cur *cur,
911 xfs_buf_t *bp, /* buffer containing btree block */ 240 union xfs_btree_rec *rec)
912 int rfirst, /* index of first record to log */
913 int rlast) /* index of last record to log */
914{ 241{
915 xfs_alloc_block_t *block; /* btree block to log from */ 242 ASSERT(cur->bc_rec.a.ar_startblock != 0);
916 int first; /* first byte offset logged */
917 int last; /* last byte offset logged */
918 xfs_alloc_rec_t *rp; /* record pointer for btree block */
919
920 243
921 block = XFS_BUF_TO_ALLOC_BLOCK(bp); 244 rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
922 rp = XFS_ALLOC_REC_ADDR(block, 1, cur); 245 rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
923#ifdef DEBUG
924 {
925 xfs_agf_t *agf;
926 xfs_alloc_rec_t *p;
927
928 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
929 for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
930 ASSERT(be32_to_cpu(p->ar_startblock) +
931 be32_to_cpu(p->ar_blockcount) <=
932 be32_to_cpu(agf->agf_length));
933 }
934#endif
935 first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
936 last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
937 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
938} 246}
939 247
940/* 248STATIC void
941 * Lookup the record. The cursor is made to point to it, based on dir. 249xfs_allocbt_init_ptr_from_cur(
942 * Return 0 if can't find any such record, 1 for success. 250 struct xfs_btree_cur *cur,
943 */ 251 union xfs_btree_ptr *ptr)
944STATIC int /* error */
945xfs_alloc_lookup(
946 xfs_btree_cur_t *cur, /* btree cursor */
947 xfs_lookup_t dir, /* <=, ==, or >= */
948 int *stat) /* success/failure */
949{ 252{
950 xfs_agblock_t agbno; /* a.g. relative btree block number */ 253 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
951 xfs_agnumber_t agno; /* allocation group number */
952 xfs_alloc_block_t *block=NULL; /* current btree block */
953 int diff; /* difference for the current key */
954 int error; /* error return value */
955 int keyno=0; /* current key number */
956 int level; /* level in the btree */
957 xfs_mount_t *mp; /* file system mount point */
958
959 XFS_STATS_INC(xs_abt_lookup);
960 /*
961 * Get the allocation group header, and the root block number.
962 */
963 mp = cur->bc_mp;
964
965 {
966 xfs_agf_t *agf; /* a.g. freespace header */
967
968 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
969 agno = be32_to_cpu(agf->agf_seqno);
970 agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
971 }
972 /*
973 * Iterate over each level in the btree, starting at the root.
974 * For each level above the leaves, find the key we need, based
975 * on the lookup record, then follow the corresponding block
976 * pointer down to the next level.
977 */
978 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
979 xfs_buf_t *bp; /* buffer pointer for btree block */
980 xfs_daddr_t d; /* disk address of btree block */
981
982 /*
983 * Get the disk address we're looking for.
984 */
985 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
986 /*
987 * If the old buffer at this level is for a different block,
988 * throw it away, otherwise just use it.
989 */
990 bp = cur->bc_bufs[level];
991 if (bp && XFS_BUF_ADDR(bp) != d)
992 bp = NULL;
993 if (!bp) {
994 /*
995 * Need to get a new buffer. Read it, then
996 * set it in the cursor, releasing the old one.
997 */
998 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
999 agbno, 0, &bp, XFS_ALLOC_BTREE_REF)))
1000 return error;
1001 xfs_btree_setbuf(cur, level, bp);
1002 /*
1003 * Point to the btree block, now that we have the buffer
1004 */
1005 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1006 if ((error = xfs_btree_check_sblock(cur, block, level,
1007 bp)))
1008 return error;
1009 } else
1010 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1011 /*
1012 * If we already had a key match at a higher level, we know
1013 * we need to use the first entry in this block.
1014 */
1015 if (diff == 0)
1016 keyno = 1;
1017 /*
1018 * Otherwise we need to search this block. Do a binary search.
1019 */
1020 else {
1021 int high; /* high entry number */
1022 xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */
1023 xfs_alloc_rec_t *krbase=NULL;/* base of records in block */
1024 int low; /* low entry number */
1025
1026 /*
1027 * Get a pointer to keys or records.
1028 */
1029 if (level > 0)
1030 kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
1031 else
1032 krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
1033 /*
1034 * Set low and high entry numbers, 1-based.
1035 */
1036 low = 1;
1037 if (!(high = be16_to_cpu(block->bb_numrecs))) {
1038 /*
1039 * If the block is empty, the tree must
1040 * be an empty leaf.
1041 */
1042 ASSERT(level == 0 && cur->bc_nlevels == 1);
1043 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1044 *stat = 0;
1045 return 0;
1046 }
1047 /*
1048 * Binary search the block.
1049 */
1050 while (low <= high) {
1051 xfs_extlen_t blockcount; /* key value */
1052 xfs_agblock_t startblock; /* key value */
1053
1054 XFS_STATS_INC(xs_abt_compare);
1055 /*
1056 * keyno is average of low and high.
1057 */
1058 keyno = (low + high) >> 1;
1059 /*
1060 * Get startblock & blockcount.
1061 */
1062 if (level > 0) {
1063 xfs_alloc_key_t *kkp;
1064
1065 kkp = kkbase + keyno - 1;
1066 startblock = be32_to_cpu(kkp->ar_startblock);
1067 blockcount = be32_to_cpu(kkp->ar_blockcount);
1068 } else {
1069 xfs_alloc_rec_t *krp;
1070 254
1071 krp = krbase + keyno - 1; 255 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
1072 startblock = be32_to_cpu(krp->ar_startblock); 256 ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
1073 blockcount = be32_to_cpu(krp->ar_blockcount);
1074 }
1075 /*
1076 * Compute difference to get next direction.
1077 */
1078 if (cur->bc_btnum == XFS_BTNUM_BNO)
1079 diff = (int)startblock -
1080 (int)cur->bc_rec.a.ar_startblock;
1081 else if (!(diff = (int)blockcount -
1082 (int)cur->bc_rec.a.ar_blockcount))
1083 diff = (int)startblock -
1084 (int)cur->bc_rec.a.ar_startblock;
1085 /*
1086 * Less than, move right.
1087 */
1088 if (diff < 0)
1089 low = keyno + 1;
1090 /*
1091 * Greater than, move left.
1092 */
1093 else if (diff > 0)
1094 high = keyno - 1;
1095 /*
1096 * Equal, we're done.
1097 */
1098 else
1099 break;
1100 }
1101 }
1102 /*
1103 * If there are more levels, set up for the next level
1104 * by getting the block number and filling in the cursor.
1105 */
1106 if (level > 0) {
1107 /*
1108 * If we moved left, need the previous key number,
1109 * unless there isn't one.
1110 */
1111 if (diff > 0 && --keyno < 1)
1112 keyno = 1;
1113 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur));
1114#ifdef DEBUG
1115 if ((error = xfs_btree_check_sptr(cur, agbno, level)))
1116 return error;
1117#endif
1118 cur->bc_ptrs[level] = keyno;
1119 }
1120 }
1121 /*
1122 * Done with the search.
1123 * See if we need to adjust the results.
1124 */
1125 if (dir != XFS_LOOKUP_LE && diff < 0) {
1126 keyno++;
1127 /*
1128 * If ge search and we went off the end of the block, but it's
1129 * not the last block, we're in the wrong block.
1130 */
1131 if (dir == XFS_LOOKUP_GE &&
1132 keyno > be16_to_cpu(block->bb_numrecs) &&
1133 be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1134 int i;
1135 257
1136 cur->bc_ptrs[0] = keyno; 258 ptr->s = agf->agf_roots[cur->bc_btnum];
1137 if ((error = xfs_alloc_increment(cur, 0, &i)))
1138 return error;
1139 XFS_WANT_CORRUPTED_RETURN(i == 1);
1140 *stat = 1;
1141 return 0;
1142 }
1143 }
1144 else if (dir == XFS_LOOKUP_LE && diff > 0)
1145 keyno--;
1146 cur->bc_ptrs[0] = keyno;
1147 /*
1148 * Return if we succeeded or not.
1149 */
1150 if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1151 *stat = 0;
1152 else
1153 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1154 return 0;
1155} 259}
1156 260
1157/* 261STATIC __int64_t
1158 * Move 1 record left from cur/level if possible. 262xfs_allocbt_key_diff(
1159 * Update cur to reflect the new path. 263 struct xfs_btree_cur *cur,
1160 */ 264 union xfs_btree_key *key)
1161STATIC int /* error */
1162xfs_alloc_lshift(
1163 xfs_btree_cur_t *cur, /* btree cursor */
1164 int level, /* level to shift record on */
1165 int *stat) /* success/failure */
1166{ 265{
1167 int error; /* error return value */ 266 xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
1168#ifdef DEBUG 267 xfs_alloc_key_t *kp = &key->alloc;
1169 int i; /* loop index */ 268 __int64_t diff;
1170#endif
1171 xfs_alloc_key_t key; /* key value for leaf level upward */
1172 xfs_buf_t *lbp; /* buffer for left neighbor block */
1173 xfs_alloc_block_t *left; /* left neighbor btree block */
1174 int nrec; /* new number of left block entries */
1175 xfs_buf_t *rbp; /* buffer for right (current) block */
1176 xfs_alloc_block_t *right; /* right (current) btree block */
1177 xfs_alloc_key_t *rkp=NULL; /* key pointer for right block */
1178 xfs_alloc_ptr_t *rpp=NULL; /* address pointer for right block */
1179 xfs_alloc_rec_t *rrp=NULL; /* record pointer for right block */
1180 269
1181 /* 270 if (cur->bc_btnum == XFS_BTNUM_BNO) {
1182 * Set up variables for this block as "right". 271 return (__int64_t)be32_to_cpu(kp->ar_startblock) -
1183 */ 272 rec->ar_startblock;
1184 rbp = cur->bc_bufs[level];
1185 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1186#ifdef DEBUG
1187 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1188 return error;
1189#endif
1190 /*
1191 * If we've got no left sibling then we can't shift an entry left.
1192 */
1193 if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1194 *stat = 0;
1195 return 0;
1196 }
1197 /*
1198 * If the cursor entry is the one that would be moved, don't
1199 * do it... it's too complicated.
1200 */
1201 if (cur->bc_ptrs[level] <= 1) {
1202 *stat = 0;
1203 return 0;
1204 }
1205 /*
1206 * Set up the left neighbor as "left".
1207 */
1208 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1209 cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1210 0, &lbp, XFS_ALLOC_BTREE_REF)))
1211 return error;
1212 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1213 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1214 return error;
1215 /*
1216 * If it's full, it can't take another entry.
1217 */
1218 if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1219 *stat = 0;
1220 return 0;
1221 } 273 }
1222 nrec = be16_to_cpu(left->bb_numrecs) + 1;
1223 /*
1224 * If non-leaf, copy a key and a ptr to the left block.
1225 */
1226 if (level > 0) {
1227 xfs_alloc_key_t *lkp; /* key pointer for left block */
1228 xfs_alloc_ptr_t *lpp; /* address pointer for left block */
1229 274
1230 lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur); 275 diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
1231 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); 276 if (diff)
1232 *lkp = *rkp; 277 return diff;
1233 xfs_alloc_log_keys(cur, lbp, nrec, nrec);
1234 lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
1235 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1236#ifdef DEBUG
1237 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1238 return error;
1239#endif
1240 *lpp = *rpp;
1241 xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
1242 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1243 }
1244 /*
1245 * If leaf, copy a record to the left block.
1246 */
1247 else {
1248 xfs_alloc_rec_t *lrp; /* record pointer for left block */
1249 278
1250 lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur); 279 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
1251 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1252 *lrp = *rrp;
1253 xfs_alloc_log_recs(cur, lbp, nrec, nrec);
1254 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1255 }
1256 /*
1257 * Bump and log left's numrecs, decrement and log right's numrecs.
1258 */
1259 be16_add_cpu(&left->bb_numrecs, 1);
1260 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1261 be16_add_cpu(&right->bb_numrecs, -1);
1262 xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1263 /*
1264 * Slide the contents of right down one entry.
1265 */
1266 if (level > 0) {
1267#ifdef DEBUG
1268 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1269 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1270 level)))
1271 return error;
1272 }
1273#endif
1274 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1275 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1276 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1277 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1278 } else {
1279 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1280 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1281 key.ar_startblock = rrp->ar_startblock;
1282 key.ar_blockcount = rrp->ar_blockcount;
1283 rkp = &key;
1284 }
1285 /*
1286 * Update the parent key values of right.
1287 */
1288 if ((error = xfs_alloc_updkey(cur, rkp, level + 1)))
1289 return error;
1290 /*
1291 * Slide the cursor value left one.
1292 */
1293 cur->bc_ptrs[level]--;
1294 *stat = 1;
1295 return 0;
1296} 280}
1297 281
1298/* 282STATIC int
1299 * Allocate a new root block, fill it in. 283xfs_allocbt_kill_root(
1300 */ 284 struct xfs_btree_cur *cur,
1301STATIC int /* error */ 285 struct xfs_buf *bp,
1302xfs_alloc_newroot( 286 int level,
1303 xfs_btree_cur_t *cur, /* btree cursor */ 287 union xfs_btree_ptr *newroot)
1304 int *stat) /* success/failure */
1305{ 288{
1306 int error; /* error return value */ 289 int error;
1307 xfs_agblock_t lbno; /* left block number */
1308 xfs_buf_t *lbp; /* left btree buffer */
1309 xfs_alloc_block_t *left; /* left btree block */
1310 xfs_mount_t *mp; /* mount structure */
1311 xfs_agblock_t nbno; /* new block number */
1312 xfs_buf_t *nbp; /* new (root) buffer */
1313 xfs_alloc_block_t *new; /* new (root) btree block */
1314 int nptr; /* new value for key index, 1 or 2 */
1315 xfs_agblock_t rbno; /* right block number */
1316 xfs_buf_t *rbp; /* right btree buffer */
1317 xfs_alloc_block_t *right; /* right btree block */
1318
1319 mp = cur->bc_mp;
1320 290
1321 ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp)); 291 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1322 /* 292 XFS_BTREE_STATS_INC(cur, killroot);
1323 * Get a buffer from the freelist blocks, for the new root.
1324 */
1325 error = xfs_alloc_get_freelist(cur->bc_tp,
1326 cur->bc_private.a.agbp, &nbno, 1);
1327 if (error)
1328 return error;
1329 /*
1330 * None available, we fail.
1331 */
1332 if (nbno == NULLAGBLOCK) {
1333 *stat = 0;
1334 return 0;
1335 }
1336 xfs_trans_agbtree_delta(cur->bc_tp, 1);
1337 nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
1338 0);
1339 new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
1340 /*
1341 * Set the root data in the a.g. freespace structure.
1342 */
1343 {
1344 xfs_agf_t *agf; /* a.g. freespace header */
1345 xfs_agnumber_t seqno;
1346 293
1347 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
1348 agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
1349 be32_add_cpu(&agf->agf_levels[cur->bc_btnum], 1);
1350 seqno = be32_to_cpu(agf->agf_seqno);
1351 mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
1352 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
1353 XFS_AGF_ROOTS | XFS_AGF_LEVELS);
1354 }
1355 /* 294 /*
1356 * At the previous root level there are now two blocks: the old 295 * Update the root pointer, decreasing the level by 1 and then
1357 * root, and the new block generated when it was split. 296 * free the old root.
1358 * We don't know which one the cursor is pointing at, so we
1359 * set up variables "left" and "right" for each case.
1360 */ 297 */
1361 lbp = cur->bc_bufs[cur->bc_nlevels - 1]; 298 xfs_allocbt_set_root(cur, newroot, -1);
1362 left = XFS_BUF_TO_ALLOC_BLOCK(lbp); 299 error = xfs_allocbt_free_block(cur, bp);
1363#ifdef DEBUG 300 if (error) {
1364 if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp))) 301 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1365 return error; 302 return error;
1366#endif
1367 if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
1368 /*
1369 * Our block is left, pick up the right block.
1370 */
1371 lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
1372 rbno = be32_to_cpu(left->bb_rightsib);
1373 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1374 cur->bc_private.a.agno, rbno, 0, &rbp,
1375 XFS_ALLOC_BTREE_REF)))
1376 return error;
1377 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1378 if ((error = xfs_btree_check_sblock(cur, right,
1379 cur->bc_nlevels - 1, rbp)))
1380 return error;
1381 nptr = 1;
1382 } else {
1383 /*
1384 * Our block is right, pick up the left block.
1385 */
1386 rbp = lbp;
1387 right = left;
1388 rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
1389 lbno = be32_to_cpu(right->bb_leftsib);
1390 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1391 cur->bc_private.a.agno, lbno, 0, &lbp,
1392 XFS_ALLOC_BTREE_REF)))
1393 return error;
1394 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1395 if ((error = xfs_btree_check_sblock(cur, left,
1396 cur->bc_nlevels - 1, lbp)))
1397 return error;
1398 nptr = 2;
1399 } 303 }
1400 /*
1401 * Fill in the new block's btree header and log it.
1402 */
1403 new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1404 new->bb_level = cpu_to_be16(cur->bc_nlevels);
1405 new->bb_numrecs = cpu_to_be16(2);
1406 new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1407 new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1408 xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
1409 ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1410 /*
1411 * Fill in the key data in the new root.
1412 */
1413 {
1414 xfs_alloc_key_t *kp; /* btree key pointer */
1415 304
1416 kp = XFS_ALLOC_KEY_ADDR(new, 1, cur); 305 XFS_BTREE_STATS_INC(cur, free);
1417 if (be16_to_cpu(left->bb_level) > 0) {
1418 kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur);
1419 kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);
1420 } else {
1421 xfs_alloc_rec_t *rp; /* btree record pointer */
1422 306
1423 rp = XFS_ALLOC_REC_ADDR(left, 1, cur); 307 xfs_btree_setbuf(cur, level, NULL);
1424 kp[0].ar_startblock = rp->ar_startblock; 308 cur->bc_nlevels--;
1425 kp[0].ar_blockcount = rp->ar_blockcount;
1426 rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1427 kp[1].ar_startblock = rp->ar_startblock;
1428 kp[1].ar_blockcount = rp->ar_blockcount;
1429 }
1430 }
1431 xfs_alloc_log_keys(cur, nbp, 1, 2);
1432 /*
1433 * Fill in the pointer data in the new root.
1434 */
1435 {
1436 xfs_alloc_ptr_t *pp; /* btree address pointer */
1437 309
1438 pp = XFS_ALLOC_PTR_ADDR(new, 1, cur); 310 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1439 pp[0] = cpu_to_be32(lbno);
1440 pp[1] = cpu_to_be32(rbno);
1441 }
1442 xfs_alloc_log_ptrs(cur, nbp, 1, 2);
1443 /*
1444 * Fix up the cursor.
1445 */
1446 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1447 cur->bc_ptrs[cur->bc_nlevels] = nptr;
1448 cur->bc_nlevels++;
1449 *stat = 1;
1450 return 0; 311 return 0;
1451} 312}
1452 313
1453/*
1454 * Move 1 record right from cur/level if possible.
1455 * Update cur to reflect the new path.
1456 */
1457STATIC int /* error */
1458xfs_alloc_rshift(
1459 xfs_btree_cur_t *cur, /* btree cursor */
1460 int level, /* level to shift record on */
1461 int *stat) /* success/failure */
1462{
1463 int error; /* error return value */
1464 int i; /* loop index */
1465 xfs_alloc_key_t key; /* key value for leaf level upward */
1466 xfs_buf_t *lbp; /* buffer for left (current) block */
1467 xfs_alloc_block_t *left; /* left (current) btree block */
1468 xfs_buf_t *rbp; /* buffer for right neighbor block */
1469 xfs_alloc_block_t *right; /* right neighbor btree block */
1470 xfs_alloc_key_t *rkp; /* key pointer for right block */
1471 xfs_btree_cur_t *tcur; /* temporary cursor */
1472
1473 /*
1474 * Set up variables for this block as "left".
1475 */
1476 lbp = cur->bc_bufs[level];
1477 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1478#ifdef DEBUG
1479 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1480 return error;
1481#endif
1482 /*
1483 * If we've got no right sibling then we can't shift an entry right.
1484 */
1485 if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1486 *stat = 0;
1487 return 0;
1488 }
1489 /*
1490 * If the cursor entry is the one that would be moved, don't
1491 * do it... it's too complicated.
1492 */
1493 if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1494 *stat = 0;
1495 return 0;
1496 }
1497 /*
1498 * Set up the right neighbor as "right".
1499 */
1500 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1501 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1502 0, &rbp, XFS_ALLOC_BTREE_REF)))
1503 return error;
1504 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1505 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1506 return error;
1507 /*
1508 * If it's full, it can't take another entry.
1509 */
1510 if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1511 *stat = 0;
1512 return 0;
1513 }
1514 /*
1515 * Make a hole at the start of the right neighbor block, then
1516 * copy the last left block entry to the hole.
1517 */
1518 if (level > 0) {
1519 xfs_alloc_key_t *lkp; /* key pointer for left block */
1520 xfs_alloc_ptr_t *lpp; /* address pointer for left block */
1521 xfs_alloc_ptr_t *rpp; /* address pointer for right block */
1522
1523 lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1524 lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1525 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1526 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1527#ifdef DEBUG 314#ifdef DEBUG
1528 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { 315STATIC int
1529 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) 316xfs_allocbt_keys_inorder(
1530 return error; 317 struct xfs_btree_cur *cur,
1531 } 318 union xfs_btree_key *k1,
1532#endif 319 union xfs_btree_key *k2)
1533 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); 320{
1534 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); 321 if (cur->bc_btnum == XFS_BTNUM_BNO) {
1535#ifdef DEBUG 322 return be32_to_cpu(k1->alloc.ar_startblock) <
1536 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) 323 be32_to_cpu(k2->alloc.ar_startblock);
1537 return error;
1538#endif
1539 *rkp = *lkp;
1540 *rpp = *lpp;
1541 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1542 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1543 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1544 } else { 324 } else {
1545 xfs_alloc_rec_t *lrp; /* record pointer for left block */ 325 return be32_to_cpu(k1->alloc.ar_blockcount) <
1546 xfs_alloc_rec_t *rrp; /* record pointer for right block */ 326 be32_to_cpu(k2->alloc.ar_blockcount) ||
1547 327 (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
1548 lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); 328 be32_to_cpu(k1->alloc.ar_startblock) <
1549 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); 329 be32_to_cpu(k2->alloc.ar_startblock));
1550 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1551 *rrp = *lrp;
1552 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1553 key.ar_startblock = rrp->ar_startblock;
1554 key.ar_blockcount = rrp->ar_blockcount;
1555 rkp = &key;
1556 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1557 } 330 }
1558 /*
1559 * Decrement and log left's numrecs, bump and log right's numrecs.
1560 */
1561 be16_add_cpu(&left->bb_numrecs, -1);
1562 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1563 be16_add_cpu(&right->bb_numrecs, 1);
1564 xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1565 /*
1566 * Using a temporary cursor, update the parent key values of the
1567 * block on the right.
1568 */
1569 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1570 return error;
1571 i = xfs_btree_lastrec(tcur, level);
1572 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1573 if ((error = xfs_alloc_increment(tcur, level, &i)) ||
1574 (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
1575 goto error0;
1576 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1577 *stat = 1;
1578 return 0;
1579error0:
1580 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1581 return error;
1582} 331}
1583 332
1584/* 333STATIC int
1585 * Split cur/level block in half. 334xfs_allocbt_recs_inorder(
1586 * Return new block number and its first record (to be inserted into parent). 335 struct xfs_btree_cur *cur,
1587 */ 336 union xfs_btree_rec *r1,
1588STATIC int /* error */ 337 union xfs_btree_rec *r2)
1589xfs_alloc_split(
1590 xfs_btree_cur_t *cur, /* btree cursor */
1591 int level, /* level to split */
1592 xfs_agblock_t *bnop, /* output: block number allocated */
1593 xfs_alloc_key_t *keyp, /* output: first key of new block */
1594 xfs_btree_cur_t **curp, /* output: new cursor */
1595 int *stat) /* success/failure */
1596{ 338{
1597 int error; /* error return value */ 339 if (cur->bc_btnum == XFS_BTNUM_BNO) {
1598 int i; /* loop index/record number */ 340 return be32_to_cpu(r1->alloc.ar_startblock) +
1599 xfs_agblock_t lbno; /* left (current) block number */ 341 be32_to_cpu(r1->alloc.ar_blockcount) <=
1600 xfs_buf_t *lbp; /* buffer for left block */ 342 be32_to_cpu(r2->alloc.ar_startblock);
1601 xfs_alloc_block_t *left; /* left (current) btree block */ 343 } else {
1602 xfs_agblock_t rbno; /* right (new) block number */ 344 return be32_to_cpu(r1->alloc.ar_blockcount) <
1603 xfs_buf_t *rbp; /* buffer for right block */ 345 be32_to_cpu(r2->alloc.ar_blockcount) ||
1604 xfs_alloc_block_t *right; /* right (new) btree block */ 346 (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
1605 347 be32_to_cpu(r1->alloc.ar_startblock) <
1606 /* 348 be32_to_cpu(r2->alloc.ar_startblock));
1607 * Allocate the new block from the freelist.
1608 * If we can't do it, we're toast. Give up.
1609 */
1610 error = xfs_alloc_get_freelist(cur->bc_tp,
1611 cur->bc_private.a.agbp, &rbno, 1);
1612 if (error)
1613 return error;
1614 if (rbno == NULLAGBLOCK) {
1615 *stat = 0;
1616 return 0;
1617 }
1618 xfs_trans_agbtree_delta(cur->bc_tp, 1);
1619 rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
1620 rbno, 0);
1621 /*
1622 * Set up the new block as "right".
1623 */
1624 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1625 /*
1626 * "Left" is the current (according to the cursor) block.
1627 */
1628 lbp = cur->bc_bufs[level];
1629 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1630#ifdef DEBUG
1631 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1632 return error;
1633#endif
1634 /*
1635 * Fill in the btree header for the new block.
1636 */
1637 right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1638 right->bb_level = left->bb_level;
1639 right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1640 /*
1641 * Make sure that if there's an odd number of entries now, that
1642 * each new block will have the same number of entries.
1643 */
1644 if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1645 cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1646 be16_add_cpu(&right->bb_numrecs, 1);
1647 i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1648 /*
1649 * For non-leaf blocks, copy keys and addresses over to the new block.
1650 */
1651 if (level > 0) {
1652 xfs_alloc_key_t *lkp; /* left btree key pointer */
1653 xfs_alloc_ptr_t *lpp; /* left btree address pointer */
1654 xfs_alloc_key_t *rkp; /* right btree key pointer */
1655 xfs_alloc_ptr_t *rpp; /* right btree address pointer */
1656
1657 lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
1658 lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
1659 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1660 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1661#ifdef DEBUG
1662 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1663 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1664 return error;
1665 }
1666#endif
1667 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1668 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1669 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1670 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1671 *keyp = *rkp;
1672 } 349 }
1673 /* 350}
1674 * For leaf blocks, copy records over to the new block. 351#endif /* DEBUG */
1675 */
1676 else {
1677 xfs_alloc_rec_t *lrp; /* left btree record pointer */
1678 xfs_alloc_rec_t *rrp; /* right btree record pointer */
1679 352
1680 lrp = XFS_ALLOC_REC_ADDR(left, i, cur); 353#ifdef XFS_BTREE_TRACE
1681 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); 354ktrace_t *xfs_allocbt_trace_buf;
1682 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1683 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1684 keyp->ar_startblock = rrp->ar_startblock;
1685 keyp->ar_blockcount = rrp->ar_blockcount;
1686 }
1687 /*
1688 * Find the left block number by looking in the buffer.
1689 * Adjust numrecs, sibling pointers.
1690 */
1691 lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
1692 be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1693 right->bb_rightsib = left->bb_rightsib;
1694 left->bb_rightsib = cpu_to_be32(rbno);
1695 right->bb_leftsib = cpu_to_be32(lbno);
1696 xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
1697 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1698 /*
1699 * If there's a block to the new block's right, make that block
1700 * point back to right instead of to left.
1701 */
1702 if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1703 xfs_alloc_block_t *rrblock; /* rr btree block */
1704 xfs_buf_t *rrbp; /* buffer for rrblock */
1705 355
1706 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 356STATIC void
1707 cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0, 357xfs_allocbt_trace_enter(
1708 &rrbp, XFS_ALLOC_BTREE_REF))) 358 struct xfs_btree_cur *cur,
1709 return error; 359 const char *func,
1710 rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); 360 char *s,
1711 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) 361 int type,
1712 return error; 362 int line,
1713 rrblock->bb_leftsib = cpu_to_be32(rbno); 363 __psunsigned_t a0,
1714 xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); 364 __psunsigned_t a1,
1715 } 365 __psunsigned_t a2,
1716 /* 366 __psunsigned_t a3,
1717 * If the cursor is really in the right block, move it there. 367 __psunsigned_t a4,
1718 * If it's just pointing past the last entry in left, then we'll 368 __psunsigned_t a5,
1719 * insert there, so don't change anything in that case. 369 __psunsigned_t a6,
1720 */ 370 __psunsigned_t a7,
1721 if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) { 371 __psunsigned_t a8,
1722 xfs_btree_setbuf(cur, level, rbp); 372 __psunsigned_t a9,
1723 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs); 373 __psunsigned_t a10)
1724 } 374{
1725 /* 375 ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type,
1726 * If there are more levels, we'll need another cursor which refers to 376 (void *)func, (void *)s, NULL, (void *)cur,
1727 * the right block, no matter where this cursor was. 377 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
1728 */ 378 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
1729 if (level + 1 < cur->bc_nlevels) { 379 (void *)a8, (void *)a9, (void *)a10);
1730 if ((error = xfs_btree_dup_cursor(cur, curp)))
1731 return error;
1732 (*curp)->bc_ptrs[level + 1]++;
1733 }
1734 *bnop = rbno;
1735 *stat = 1;
1736 return 0;
1737} 380}
1738 381
1739/* 382STATIC void
1740 * Update keys at all levels from here to the root along the cursor's path. 383xfs_allocbt_trace_cursor(
1741 */ 384 struct xfs_btree_cur *cur,
1742STATIC int /* error */ 385 __uint32_t *s0,
1743xfs_alloc_updkey( 386 __uint64_t *l0,
1744 xfs_btree_cur_t *cur, /* btree cursor */ 387 __uint64_t *l1)
1745 xfs_alloc_key_t *keyp, /* new key value to update to */
1746 int level) /* starting level for update */
1747{ 388{
1748 int ptr; /* index of key in block */ 389 *s0 = cur->bc_private.a.agno;
1749 390 *l0 = cur->bc_rec.a.ar_startblock;
1750 /* 391 *l1 = cur->bc_rec.a.ar_blockcount;
1751 * Go up the tree from this level toward the root.
1752 * At each level, update the key value to the value input.
1753 * Stop when we reach a level where the cursor isn't pointing
1754 * at the first entry in the block.
1755 */
1756 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1757 xfs_alloc_block_t *block; /* btree block */
1758 xfs_buf_t *bp; /* buffer for block */
1759#ifdef DEBUG
1760 int error; /* error return value */
1761#endif
1762 xfs_alloc_key_t *kp; /* ptr to btree block keys */
1763
1764 bp = cur->bc_bufs[level];
1765 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1766#ifdef DEBUG
1767 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1768 return error;
1769#endif
1770 ptr = cur->bc_ptrs[level];
1771 kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
1772 *kp = *keyp;
1773 xfs_alloc_log_keys(cur, bp, ptr, ptr);
1774 }
1775 return 0;
1776} 392}
1777 393
1778/* 394STATIC void
1779 * Externally visible routines. 395xfs_allocbt_trace_key(
1780 */ 396 struct xfs_btree_cur *cur,
1781 397 union xfs_btree_key *key,
1782/* 398 __uint64_t *l0,
1783 * Decrement cursor by one record at the level. 399 __uint64_t *l1)
1784 * For nonzero levels the leaf-ward information is untouched.
1785 */
1786int /* error */
1787xfs_alloc_decrement(
1788 xfs_btree_cur_t *cur, /* btree cursor */
1789 int level, /* level in btree, 0 is leaf */
1790 int *stat) /* success/failure */
1791{ 400{
1792 xfs_alloc_block_t *block; /* btree block */ 401 *l0 = be32_to_cpu(key->alloc.ar_startblock);
1793 int error; /* error return value */ 402 *l1 = be32_to_cpu(key->alloc.ar_blockcount);
1794 int lev; /* btree level */
1795
1796 ASSERT(level < cur->bc_nlevels);
1797 /*
1798 * Read-ahead to the left at this level.
1799 */
1800 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1801 /*
1802 * Decrement the ptr at this level. If we're still in the block
1803 * then we're done.
1804 */
1805 if (--cur->bc_ptrs[level] > 0) {
1806 *stat = 1;
1807 return 0;
1808 }
1809 /*
1810 * Get a pointer to the btree block.
1811 */
1812 block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
1813#ifdef DEBUG
1814 if ((error = xfs_btree_check_sblock(cur, block, level,
1815 cur->bc_bufs[level])))
1816 return error;
1817#endif
1818 /*
1819 * If we just went off the left edge of the tree, return failure.
1820 */
1821 if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1822 *stat = 0;
1823 return 0;
1824 }
1825 /*
1826 * March up the tree decrementing pointers.
1827 * Stop when we don't go off the left edge of a block.
1828 */
1829 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1830 if (--cur->bc_ptrs[lev] > 0)
1831 break;
1832 /*
1833 * Read-ahead the left block, we're going to read it
1834 * in the next loop.
1835 */
1836 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1837 }
1838 /*
1839 * If we went off the root then we are seriously confused.
1840 */
1841 ASSERT(lev < cur->bc_nlevels);
1842 /*
1843 * Now walk back down the tree, fixing up the cursor's buffer
1844 * pointers and key numbers.
1845 */
1846 for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1847 xfs_agblock_t agbno; /* block number of btree block */
1848 xfs_buf_t *bp; /* buffer pointer for block */
1849
1850 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1851 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1852 cur->bc_private.a.agno, agbno, 0, &bp,
1853 XFS_ALLOC_BTREE_REF)))
1854 return error;
1855 lev--;
1856 xfs_btree_setbuf(cur, lev, bp);
1857 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1858 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1859 return error;
1860 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1861 }
1862 *stat = 1;
1863 return 0;
1864} 403}
1865 404
1866/* 405STATIC void
1867 * Delete the record pointed to by cur. 406xfs_allocbt_trace_record(
1868 * The cursor refers to the place where the record was (could be inserted) 407 struct xfs_btree_cur *cur,
1869 * when the operation returns. 408 union xfs_btree_rec *rec,
1870 */ 409 __uint64_t *l0,
1871int /* error */ 410 __uint64_t *l1,
1872xfs_alloc_delete( 411 __uint64_t *l2)
1873 xfs_btree_cur_t *cur, /* btree cursor */
1874 int *stat) /* success/failure */
1875{ 412{
1876 int error; /* error return value */ 413 *l0 = be32_to_cpu(rec->alloc.ar_startblock);
1877 int i; /* result code */ 414 *l1 = be32_to_cpu(rec->alloc.ar_blockcount);
1878 int level; /* btree level */ 415 *l2 = 0;
1879
1880 /*
1881 * Go up the tree, starting at leaf level.
1882 * If 2 is returned then a join was done; go to the next level.
1883 * Otherwise we are done.
1884 */
1885 for (level = 0, i = 2; i == 2; level++) {
1886 if ((error = xfs_alloc_delrec(cur, level, &i)))
1887 return error;
1888 }
1889 if (i == 0) {
1890 for (level = 1; level < cur->bc_nlevels; level++) {
1891 if (cur->bc_ptrs[level] == 0) {
1892 if ((error = xfs_alloc_decrement(cur, level, &i)))
1893 return error;
1894 break;
1895 }
1896 }
1897 }
1898 *stat = i;
1899 return 0;
1900} 416}
417#endif /* XFS_BTREE_TRACE */
418
419static const struct xfs_btree_ops xfs_allocbt_ops = {
420 .rec_len = sizeof(xfs_alloc_rec_t),
421 .key_len = sizeof(xfs_alloc_key_t),
422
423 .dup_cursor = xfs_allocbt_dup_cursor,
424 .set_root = xfs_allocbt_set_root,
425 .kill_root = xfs_allocbt_kill_root,
426 .alloc_block = xfs_allocbt_alloc_block,
427 .free_block = xfs_allocbt_free_block,
428 .update_lastrec = xfs_allocbt_update_lastrec,
429 .get_minrecs = xfs_allocbt_get_minrecs,
430 .get_maxrecs = xfs_allocbt_get_maxrecs,
431 .init_key_from_rec = xfs_allocbt_init_key_from_rec,
432 .init_rec_from_key = xfs_allocbt_init_rec_from_key,
433 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
434 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
435 .key_diff = xfs_allocbt_key_diff,
1901 436
1902/*
1903 * Get the data from the pointed-to record.
1904 */
1905int /* error */
1906xfs_alloc_get_rec(
1907 xfs_btree_cur_t *cur, /* btree cursor */
1908 xfs_agblock_t *bno, /* output: starting block of extent */
1909 xfs_extlen_t *len, /* output: length of extent */
1910 int *stat) /* output: success/failure */
1911{
1912 xfs_alloc_block_t *block; /* btree block */
1913#ifdef DEBUG 437#ifdef DEBUG
1914 int error; /* error return value */ 438 .keys_inorder = xfs_allocbt_keys_inorder,
439 .recs_inorder = xfs_allocbt_recs_inorder,
1915#endif 440#endif
1916 int ptr; /* record number */
1917 441
1918 ptr = cur->bc_ptrs[0]; 442#ifdef XFS_BTREE_TRACE
1919 block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); 443 .trace_enter = xfs_allocbt_trace_enter,
1920#ifdef DEBUG 444 .trace_cursor = xfs_allocbt_trace_cursor,
1921 if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))) 445 .trace_key = xfs_allocbt_trace_key,
1922 return error; 446 .trace_record = xfs_allocbt_trace_record,
1923#endif 447#endif
1924 /* 448};
1925 * Off the right end or left end, return failure.
1926 */
1927 if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1928 *stat = 0;
1929 return 0;
1930 }
1931 /*
1932 * Point to the record and extract its data.
1933 */
1934 {
1935 xfs_alloc_rec_t *rec; /* record data */
1936
1937 rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
1938 *bno = be32_to_cpu(rec->ar_startblock);
1939 *len = be32_to_cpu(rec->ar_blockcount);
1940 }
1941 *stat = 1;
1942 return 0;
1943}
1944 449
1945/* 450/*
1946 * Increment cursor by one record at the level. 451 * Allocate a new allocation btree cursor.
1947 * For nonzero levels the leaf-ward information is untouched.
1948 */ 452 */
1949int /* error */ 453struct xfs_btree_cur * /* new alloc btree cursor */
1950xfs_alloc_increment( 454xfs_allocbt_init_cursor(
1951 xfs_btree_cur_t *cur, /* btree cursor */ 455 struct xfs_mount *mp, /* file system mount point */
1952 int level, /* level in btree, 0 is leaf */ 456 struct xfs_trans *tp, /* transaction pointer */
1953 int *stat) /* success/failure */ 457 struct xfs_buf *agbp, /* buffer for agf structure */
458 xfs_agnumber_t agno, /* allocation group number */
459 xfs_btnum_t btnum) /* btree identifier */
1954{ 460{
1955 xfs_alloc_block_t *block; /* btree block */ 461 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
1956 xfs_buf_t *bp; /* tree block buffer */ 462 struct xfs_btree_cur *cur;
1957 int error; /* error return value */
1958 int lev; /* btree level */
1959
1960 ASSERT(level < cur->bc_nlevels);
1961 /*
1962 * Read-ahead to the right at this level.
1963 */
1964 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1965 /*
1966 * Get a pointer to the btree block.
1967 */
1968 bp = cur->bc_bufs[level];
1969 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1970#ifdef DEBUG
1971 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1972 return error;
1973#endif
1974 /*
1975 * Increment the ptr at this level. If we're still in the block
1976 * then we're done.
1977 */
1978 if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1979 *stat = 1;
1980 return 0;
1981 }
1982 /*
1983 * If we just went off the right edge of the tree, return failure.
1984 */
1985 if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1986 *stat = 0;
1987 return 0;
1988 }
1989 /*
1990 * March up the tree incrementing pointers.
1991 * Stop when we don't go off the right edge of a block.
1992 */
1993 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1994 bp = cur->bc_bufs[lev];
1995 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1996#ifdef DEBUG
1997 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1998 return error;
1999#endif
2000 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
2001 break;
2002 /*
2003 * Read-ahead the right block, we're going to read it
2004 * in the next loop.
2005 */
2006 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
2007 }
2008 /*
2009 * If we went off the root then we are seriously confused.
2010 */
2011 ASSERT(lev < cur->bc_nlevels);
2012 /*
2013 * Now walk back down the tree, fixing up the cursor's buffer
2014 * pointers and key numbers.
2015 */
2016 for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2017 lev > level; ) {
2018 xfs_agblock_t agbno; /* block number of btree block */
2019 463
2020 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); 464 ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
2021 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
2022 cur->bc_private.a.agno, agbno, 0, &bp,
2023 XFS_ALLOC_BTREE_REF)))
2024 return error;
2025 lev--;
2026 xfs_btree_setbuf(cur, lev, bp);
2027 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2028 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
2029 return error;
2030 cur->bc_ptrs[lev] = 1;
2031 }
2032 *stat = 1;
2033 return 0;
2034}
2035 465
2036/* 466 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
2037 * Insert the current record at the point referenced by cur.
2038 * The cursor may be inconsistent on return if splits have been done.
2039 */
2040int /* error */
2041xfs_alloc_insert(
2042 xfs_btree_cur_t *cur, /* btree cursor */
2043 int *stat) /* success/failure */
2044{
2045 int error; /* error return value */
2046 int i; /* result value, 0 for failure */
2047 int level; /* current level number in btree */
2048 xfs_agblock_t nbno; /* new block number (split result) */
2049 xfs_btree_cur_t *ncur; /* new cursor (split result) */
2050 xfs_alloc_rec_t nrec; /* record being inserted this level */
2051 xfs_btree_cur_t *pcur; /* previous level's cursor */
2052 467
2053 level = 0; 468 cur->bc_tp = tp;
2054 nbno = NULLAGBLOCK; 469 cur->bc_mp = mp;
2055 nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); 470 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
2056 nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); 471 cur->bc_btnum = btnum;
2057 ncur = NULL; 472 cur->bc_blocklog = mp->m_sb.sb_blocklog;
2058 pcur = cur;
2059 /*
2060 * Loop going up the tree, starting at the leaf level.
2061 * Stop when we don't get a split block, that must mean that
2062 * the insert is finished with this level.
2063 */
2064 do {
2065 /*
2066 * Insert nrec/nbno into this level of the tree.
2067 * Note if we fail, nbno will be null.
2068 */
2069 if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
2070 &i))) {
2071 if (pcur != cur)
2072 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2073 return error;
2074 }
2075 /*
2076 * See if the cursor we just used is trash.
2077 * Can't trash the caller's cursor, but otherwise we should
2078 * if ncur is a new cursor or we're about to be done.
2079 */
2080 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
2081 cur->bc_nlevels = pcur->bc_nlevels;
2082 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2083 }
2084 /*
2085 * If we got a new cursor, switch to it.
2086 */
2087 if (ncur) {
2088 pcur = ncur;
2089 ncur = NULL;
2090 }
2091 } while (nbno != NULLAGBLOCK);
2092 *stat = i;
2093 return 0;
2094}
2095 473
2096/* 474 cur->bc_ops = &xfs_allocbt_ops;
2097 * Lookup the record equal to [bno, len] in the btree given by cur. 475 if (btnum == XFS_BTNUM_CNT)
2098 */ 476 cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
2099int /* error */
2100xfs_alloc_lookup_eq(
2101 xfs_btree_cur_t *cur, /* btree cursor */
2102 xfs_agblock_t bno, /* starting block of extent */
2103 xfs_extlen_t len, /* length of extent */
2104 int *stat) /* success/failure */
2105{
2106 cur->bc_rec.a.ar_startblock = bno;
2107 cur->bc_rec.a.ar_blockcount = len;
2108 return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
2109}
2110 477
2111/* 478 cur->bc_private.a.agbp = agbp;
2112 * Lookup the first record greater than or equal to [bno, len] 479 cur->bc_private.a.agno = agno;
2113 * in the btree given by cur.
2114 */
2115int /* error */
2116xfs_alloc_lookup_ge(
2117 xfs_btree_cur_t *cur, /* btree cursor */
2118 xfs_agblock_t bno, /* starting block of extent */
2119 xfs_extlen_t len, /* length of extent */
2120 int *stat) /* success/failure */
2121{
2122 cur->bc_rec.a.ar_startblock = bno;
2123 cur->bc_rec.a.ar_blockcount = len;
2124 return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
2125}
2126 480
2127/* 481 return cur;
2128 * Lookup the first record less than or equal to [bno, len]
2129 * in the btree given by cur.
2130 */
2131int /* error */
2132xfs_alloc_lookup_le(
2133 xfs_btree_cur_t *cur, /* btree cursor */
2134 xfs_agblock_t bno, /* starting block of extent */
2135 xfs_extlen_t len, /* length of extent */
2136 int *stat) /* success/failure */
2137{
2138 cur->bc_rec.a.ar_startblock = bno;
2139 cur->bc_rec.a.ar_blockcount = len;
2140 return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
2141} 482}
2142 483
2143/* 484/*
2144 * Update the record referred to by cur, to the value given by [bno, len]. 485 * Calculate number of records in an alloc btree block.
2145 * This either works (return 0) or gets an EFSCORRUPTED error.
2146 */ 486 */
2147int /* error */ 487int
2148xfs_alloc_update( 488xfs_allocbt_maxrecs(
2149 xfs_btree_cur_t *cur, /* btree cursor */ 489 struct xfs_mount *mp,
2150 xfs_agblock_t bno, /* starting block of extent */ 490 int blocklen,
2151 xfs_extlen_t len) /* length of extent */ 491 int leaf)
2152{ 492{
2153 xfs_alloc_block_t *block; /* btree block to update */ 493 blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
2154 int error; /* error return value */
2155 int ptr; /* current record number (updating) */
2156 494
2157 ASSERT(len > 0); 495 if (leaf)
2158 /* 496 return blocklen / sizeof(xfs_alloc_rec_t);
2159 * Pick up the a.g. freelist struct and the current block. 497 return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
2160 */
2161 block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
2162#ifdef DEBUG
2163 if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
2164 return error;
2165#endif
2166 /*
2167 * Get the address of the rec to be updated.
2168 */
2169 ptr = cur->bc_ptrs[0];
2170 {
2171 xfs_alloc_rec_t *rp; /* pointer to updated record */
2172
2173 rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
2174 /*
2175 * Fill in the new contents and log them.
2176 */
2177 rp->ar_startblock = cpu_to_be32(bno);
2178 rp->ar_blockcount = cpu_to_be32(len);
2179 xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
2180 }
2181 /*
2182 * If it's the by-size btree and it's the last leaf block and
2183 * it's the last record... then update the size of the longest
2184 * extent in the a.g., which we cache in the a.g. freelist header.
2185 */
2186 if (cur->bc_btnum == XFS_BTNUM_CNT &&
2187 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
2188 ptr == be16_to_cpu(block->bb_numrecs)) {
2189 xfs_agf_t *agf; /* a.g. freespace header */
2190 xfs_agnumber_t seqno;
2191
2192 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
2193 seqno = be32_to_cpu(agf->agf_seqno);
2194 cur->bc_mp->m_perag[seqno].pagf_longest = len;
2195 agf->agf_longest = cpu_to_be32(len);
2196 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
2197 XFS_AGF_LONGEST);
2198 }
2199 /*
2200 * Updating first record in leaf. Pass new key value up to our parent.
2201 */
2202 if (ptr == 1) {
2203 xfs_alloc_key_t key; /* key containing [bno, len] */
2204
2205 key.ar_startblock = cpu_to_be32(bno);
2206 key.ar_blockcount = cpu_to_be32(len);
2207 if ((error = xfs_alloc_updkey(cur, &key, 1)))
2208 return error;
2209 }
2210 return 0;
2211} 498}
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index 5bd1a2c8bd0..a6caa0022c9 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -24,7 +24,6 @@
24 24
25struct xfs_buf; 25struct xfs_buf;
26struct xfs_btree_cur; 26struct xfs_btree_cur;
27struct xfs_btree_sblock;
28struct xfs_mount; 27struct xfs_mount;
29 28
30/* 29/*
@@ -50,16 +49,6 @@ typedef struct xfs_alloc_rec_incore {
50 49
51/* btree pointer type */ 50/* btree pointer type */
52typedef __be32 xfs_alloc_ptr_t; 51typedef __be32 xfs_alloc_ptr_t;
53/* btree block header type */
54typedef struct xfs_btree_sblock xfs_alloc_block_t;
55
56#define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)XFS_BUF_PTR(bp))
57
58/*
59 * Real block structures have a size equal to the disk block size.
60 */
61#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0])
62#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0])
63 52
64/* 53/*
65 * Minimum and maximum blocksize and sectorsize. 54 * Minimum and maximum blocksize and sectorsize.
@@ -83,73 +72,39 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t;
83#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) 72#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
84 73
85/* 74/*
86 * Record, key, and pointer address macros for btree blocks. 75 * Btree block header size depends on a superblock flag.
87 */ 76 *
88#define XFS_ALLOC_REC_ADDR(bb,i,cur) \ 77 * (not quite yet, but soon)
89 XFS_BTREE_REC_ADDR(xfs_alloc, bb, i)
90
91#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \
92 XFS_BTREE_KEY_ADDR(xfs_alloc, bb, i)
93
94#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \
95 XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
96
97/*
98 * Decrement cursor by one record at the level.
99 * For nonzero levels the leaf-ward information is untouched.
100 */
101extern int xfs_alloc_decrement(struct xfs_btree_cur *cur, int level, int *stat);
102
103/*
104 * Delete the record pointed to by cur.
105 * The cursor refers to the place where the record was (could be inserted)
106 * when the operation returns.
107 */
108extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat);
109
110/*
111 * Get the data from the pointed-to record.
112 */
113extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno,
114 xfs_extlen_t *len, int *stat);
115
116/*
117 * Increment cursor by one record at the level.
118 * For nonzero levels the leaf-ward information is untouched.
119 */
120extern int xfs_alloc_increment(struct xfs_btree_cur *cur, int level, int *stat);
121
122/*
123 * Insert the current record at the point referenced by cur.
124 * The cursor may be inconsistent on return if splits have been done.
125 */
126extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat);
127
128/*
129 * Lookup the record equal to [bno, len] in the btree given by cur.
130 */
131extern int xfs_alloc_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
132 xfs_extlen_t len, int *stat);
133
134/*
135 * Lookup the first record greater than or equal to [bno, len]
136 * in the btree given by cur.
137 */
138extern int xfs_alloc_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno,
139 xfs_extlen_t len, int *stat);
140
141/*
142 * Lookup the first record less than or equal to [bno, len]
143 * in the btree given by cur.
144 */ 78 */
145extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, 79#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
146 xfs_extlen_t len, int *stat);
147 80
148/* 81/*
149 * Update the record referred to by cur, to the value given by [bno, len]. 82 * Record, key, and pointer address macros for btree blocks.
150 * This either works (return 0) or gets an EFSCORRUPTED error. 83 *
151 */ 84 * (note that some of these may appear unused, but they are used in userspace)
152extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno, 85 */
153 xfs_extlen_t len); 86#define XFS_ALLOC_REC_ADDR(mp, block, index) \
87 ((xfs_alloc_rec_t *) \
88 ((char *)(block) + \
89 XFS_ALLOC_BLOCK_LEN(mp) + \
90 (((index) - 1) * sizeof(xfs_alloc_rec_t))))
91
92#define XFS_ALLOC_KEY_ADDR(mp, block, index) \
93 ((xfs_alloc_key_t *) \
94 ((char *)(block) + \
95 XFS_ALLOC_BLOCK_LEN(mp) + \
96 ((index) - 1) * sizeof(xfs_alloc_key_t)))
97
98#define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \
99 ((xfs_alloc_ptr_t *) \
100 ((char *)(block) + \
101 XFS_ALLOC_BLOCK_LEN(mp) + \
102 (maxrecs) * sizeof(xfs_alloc_key_t) + \
103 ((index) - 1) * sizeof(xfs_alloc_ptr_t)))
104
105extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
106 struct xfs_trans *, struct xfs_buf *,
107 xfs_agnumber_t, xfs_btnum_t);
108extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
154 109
155#endif /* __XFS_ALLOC_BTREE_H__ */ 110#endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 0b3b5efe848..53d5e70d136 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -41,21 +41,36 @@
41#endif 41#endif
42 42
43#ifdef XFS_NATIVE_HOST 43#ifdef XFS_NATIVE_HOST
44#define cpu_to_be16(val) ((__be16)(val)) 44#define cpu_to_be16(val) ((__force __be16)(__u16)(val))
45#define cpu_to_be32(val) ((__be32)(val)) 45#define cpu_to_be32(val) ((__force __be32)(__u32)(val))
46#define cpu_to_be64(val) ((__be64)(val)) 46#define cpu_to_be64(val) ((__force __be64)(__u64)(val))
47#define be16_to_cpu(val) ((__uint16_t)(val)) 47#define be16_to_cpu(val) ((__force __u16)(__be16)(val))
48#define be32_to_cpu(val) ((__uint32_t)(val)) 48#define be32_to_cpu(val) ((__force __u32)(__be32)(val))
49#define be64_to_cpu(val) ((__uint64_t)(val)) 49#define be64_to_cpu(val) ((__force __u64)(__be64)(val))
50#else 50#else
51#define cpu_to_be16(val) (__swab16((__uint16_t)(val))) 51#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val)))
52#define cpu_to_be32(val) (__swab32((__uint32_t)(val))) 52#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val)))
53#define cpu_to_be64(val) (__swab64((__uint64_t)(val))) 53#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val)))
54#define be16_to_cpu(val) (__swab16((__be16)(val))) 54#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val)))
55#define be32_to_cpu(val) (__swab32((__be32)(val))) 55#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val)))
56#define be64_to_cpu(val) (__swab64((__be64)(val))) 56#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val)))
57#endif 57#endif
58 58
59static inline void be16_add_cpu(__be16 *a, __s16 b)
60{
61 *a = cpu_to_be16(be16_to_cpu(*a) + b);
62}
63
64static inline void be32_add_cpu(__be32 *a, __s32 b)
65{
66 *a = cpu_to_be32(be32_to_cpu(*a) + b);
67}
68
69static inline void be64_add_cpu(__be64 *a, __s64 b)
70{
71 *a = cpu_to_be64(be64_to_cpu(*a) + b);
72}
73
59#endif /* __KERNEL__ */ 74#endif /* __KERNEL__ */
60 75
61/* do we need conversion? */ 76/* do we need conversion? */
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 8e0e463dae2..bca7b243c31 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -61,8 +61,7 @@ static inline int xfs_highbit64(__uint64_t v)
61/* Get low bit set out of 32-bit argument, -1 if none set */ 61/* Get low bit set out of 32-bit argument, -1 if none set */
62static inline int xfs_lowbit32(__uint32_t v) 62static inline int xfs_lowbit32(__uint32_t v)
63{ 63{
64 unsigned long t = v; 64 return ffs(v) - 1;
65 return (v) ? find_first_bit(&t, 32) : -1;
66} 65}
67 66
68/* Get low bit set out of 64-bit argument, -1 if none set */ 67/* Get low bit set out of 64-bit argument, -1 if none set */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index a1aab9275d5..138308e70d1 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -393,8 +393,8 @@ xfs_bmap_count_leaves(
393 393
394STATIC void 394STATIC void
395xfs_bmap_disk_count_leaves( 395xfs_bmap_disk_count_leaves(
396 xfs_extnum_t idx, 396 struct xfs_mount *mp,
397 xfs_bmbt_block_t *block, 397 struct xfs_btree_block *block,
398 int numrecs, 398 int numrecs,
399 int *count); 399 int *count);
400 400
@@ -402,6 +402,53 @@ xfs_bmap_disk_count_leaves(
402 * Bmap internal routines. 402 * Bmap internal routines.
403 */ 403 */
404 404
405STATIC int /* error */
406xfs_bmbt_lookup_eq(
407 struct xfs_btree_cur *cur,
408 xfs_fileoff_t off,
409 xfs_fsblock_t bno,
410 xfs_filblks_t len,
411 int *stat) /* success/failure */
412{
413 cur->bc_rec.b.br_startoff = off;
414 cur->bc_rec.b.br_startblock = bno;
415 cur->bc_rec.b.br_blockcount = len;
416 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
417}
418
419STATIC int /* error */
420xfs_bmbt_lookup_ge(
421 struct xfs_btree_cur *cur,
422 xfs_fileoff_t off,
423 xfs_fsblock_t bno,
424 xfs_filblks_t len,
425 int *stat) /* success/failure */
426{
427 cur->bc_rec.b.br_startoff = off;
428 cur->bc_rec.b.br_startblock = bno;
429 cur->bc_rec.b.br_blockcount = len;
430 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
431}
432
433/*
434* Update the record referred to by cur to the value given
435 * by [off, bno, len, state].
436 * This either works (return 0) or gets an EFSCORRUPTED error.
437 */
438STATIC int
439xfs_bmbt_update(
440 struct xfs_btree_cur *cur,
441 xfs_fileoff_t off,
442 xfs_fsblock_t bno,
443 xfs_filblks_t len,
444 xfs_exntst_t state)
445{
446 union xfs_btree_rec rec;
447
448 xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
449 return xfs_btree_update(cur, &rec);
450}
451
405/* 452/*
406 * Called from xfs_bmap_add_attrfork to handle btree format files. 453 * Called from xfs_bmap_add_attrfork to handle btree format files.
407 */ 454 */
@@ -422,15 +469,14 @@ xfs_bmap_add_attrfork_btree(
422 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) 469 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
423 *flags |= XFS_ILOG_DBROOT; 470 *flags |= XFS_ILOG_DBROOT;
424 else { 471 else {
425 cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, 472 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
426 XFS_DATA_FORK);
427 cur->bc_private.b.flist = flist; 473 cur->bc_private.b.flist = flist;
428 cur->bc_private.b.firstblock = *firstblock; 474 cur->bc_private.b.firstblock = *firstblock;
429 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 475 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
430 goto error0; 476 goto error0;
431 /* must be at least one entry */ 477 /* must be at least one entry */
432 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); 478 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
433 if ((error = xfs_bmbt_newroot(cur, flags, &stat))) 479 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
434 goto error0; 480 goto error0;
435 if (stat == 0) { 481 if (stat == 0) {
436 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 482 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
@@ -818,10 +864,10 @@ xfs_bmap_add_extent_delay_real(
818 RIGHT.br_blockcount, &i))) 864 RIGHT.br_blockcount, &i)))
819 goto done; 865 goto done;
820 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 866 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
821 if ((error = xfs_bmbt_delete(cur, &i))) 867 if ((error = xfs_btree_delete(cur, &i)))
822 goto done; 868 goto done;
823 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 869 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
824 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 870 if ((error = xfs_btree_decrement(cur, 0, &i)))
825 goto done; 871 goto done;
826 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 872 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
827 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 873 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -931,7 +977,7 @@ xfs_bmap_add_extent_delay_real(
931 goto done; 977 goto done;
932 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 978 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
933 cur->bc_rec.b.br_state = XFS_EXT_NORM; 979 cur->bc_rec.b.br_state = XFS_EXT_NORM;
934 if ((error = xfs_bmbt_insert(cur, &i))) 980 if ((error = xfs_btree_insert(cur, &i)))
935 goto done; 981 goto done;
936 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 982 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
937 } 983 }
@@ -1007,7 +1053,7 @@ xfs_bmap_add_extent_delay_real(
1007 goto done; 1053 goto done;
1008 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1054 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1009 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1055 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1010 if ((error = xfs_bmbt_insert(cur, &i))) 1056 if ((error = xfs_btree_insert(cur, &i)))
1011 goto done; 1057 goto done;
1012 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1058 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1013 } 1059 }
@@ -1097,7 +1143,7 @@ xfs_bmap_add_extent_delay_real(
1097 goto done; 1143 goto done;
1098 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1144 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1099 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1145 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1100 if ((error = xfs_bmbt_insert(cur, &i))) 1146 if ((error = xfs_btree_insert(cur, &i)))
1101 goto done; 1147 goto done;
1102 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1148 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1103 } 1149 }
@@ -1152,7 +1198,7 @@ xfs_bmap_add_extent_delay_real(
1152 goto done; 1198 goto done;
1153 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1199 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1154 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1200 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1155 if ((error = xfs_bmbt_insert(cur, &i))) 1201 if ((error = xfs_btree_insert(cur, &i)))
1156 goto done; 1202 goto done;
1157 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1203 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1158 } 1204 }
@@ -1379,16 +1425,16 @@ xfs_bmap_add_extent_unwritten_real(
1379 RIGHT.br_blockcount, &i))) 1425 RIGHT.br_blockcount, &i)))
1380 goto done; 1426 goto done;
1381 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1427 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1382 if ((error = xfs_bmbt_delete(cur, &i))) 1428 if ((error = xfs_btree_delete(cur, &i)))
1383 goto done; 1429 goto done;
1384 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1430 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1385 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1431 if ((error = xfs_btree_decrement(cur, 0, &i)))
1386 goto done; 1432 goto done;
1387 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1433 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1388 if ((error = xfs_bmbt_delete(cur, &i))) 1434 if ((error = xfs_btree_delete(cur, &i)))
1389 goto done; 1435 goto done;
1390 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1436 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1391 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1437 if ((error = xfs_btree_decrement(cur, 0, &i)))
1392 goto done; 1438 goto done;
1393 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1439 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1394 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1440 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -1428,10 +1474,10 @@ xfs_bmap_add_extent_unwritten_real(
1428 &i))) 1474 &i)))
1429 goto done; 1475 goto done;
1430 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1476 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1431 if ((error = xfs_bmbt_delete(cur, &i))) 1477 if ((error = xfs_btree_delete(cur, &i)))
1432 goto done; 1478 goto done;
1433 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1479 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1434 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1480 if ((error = xfs_btree_decrement(cur, 0, &i)))
1435 goto done; 1481 goto done;
1436 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1482 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1437 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1483 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -1471,10 +1517,10 @@ xfs_bmap_add_extent_unwritten_real(
1471 RIGHT.br_blockcount, &i))) 1517 RIGHT.br_blockcount, &i)))
1472 goto done; 1518 goto done;
1473 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1519 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1474 if ((error = xfs_bmbt_delete(cur, &i))) 1520 if ((error = xfs_btree_delete(cur, &i)))
1475 goto done; 1521 goto done;
1476 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1522 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1477 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1523 if ((error = xfs_btree_decrement(cur, 0, &i)))
1478 goto done; 1524 goto done;
1479 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1525 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1480 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1526 if ((error = xfs_bmbt_update(cur, new->br_startoff,
@@ -1557,7 +1603,7 @@ xfs_bmap_add_extent_unwritten_real(
1557 PREV.br_blockcount - new->br_blockcount, 1603 PREV.br_blockcount - new->br_blockcount,
1558 oldext))) 1604 oldext)))
1559 goto done; 1605 goto done;
1560 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1606 if ((error = xfs_btree_decrement(cur, 0, &i)))
1561 goto done; 1607 goto done;
1562 if (xfs_bmbt_update(cur, LEFT.br_startoff, 1608 if (xfs_bmbt_update(cur, LEFT.br_startoff,
1563 LEFT.br_startblock, 1609 LEFT.br_startblock,
@@ -1605,7 +1651,7 @@ xfs_bmap_add_extent_unwritten_real(
1605 oldext))) 1651 oldext)))
1606 goto done; 1652 goto done;
1607 cur->bc_rec.b = *new; 1653 cur->bc_rec.b = *new;
1608 if ((error = xfs_bmbt_insert(cur, &i))) 1654 if ((error = xfs_btree_insert(cur, &i)))
1609 goto done; 1655 goto done;
1610 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1656 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1611 } 1657 }
@@ -1647,7 +1693,7 @@ xfs_bmap_add_extent_unwritten_real(
1647 PREV.br_blockcount - new->br_blockcount, 1693 PREV.br_blockcount - new->br_blockcount,
1648 oldext))) 1694 oldext)))
1649 goto done; 1695 goto done;
1650 if ((error = xfs_bmbt_increment(cur, 0, &i))) 1696 if ((error = xfs_btree_increment(cur, 0, &i)))
1651 goto done; 1697 goto done;
1652 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1698 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1653 new->br_startblock, 1699 new->br_startblock,
@@ -1695,7 +1741,7 @@ xfs_bmap_add_extent_unwritten_real(
1695 goto done; 1741 goto done;
1696 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1742 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1697 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1743 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1698 if ((error = xfs_bmbt_insert(cur, &i))) 1744 if ((error = xfs_btree_insert(cur, &i)))
1699 goto done; 1745 goto done;
1700 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1746 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1701 } 1747 }
@@ -1743,7 +1789,7 @@ xfs_bmap_add_extent_unwritten_real(
1743 cur->bc_rec.b = PREV; 1789 cur->bc_rec.b = PREV;
1744 cur->bc_rec.b.br_blockcount = 1790 cur->bc_rec.b.br_blockcount =
1745 new->br_startoff - PREV.br_startoff; 1791 new->br_startoff - PREV.br_startoff;
1746 if ((error = xfs_bmbt_insert(cur, &i))) 1792 if ((error = xfs_btree_insert(cur, &i)))
1747 goto done; 1793 goto done;
1748 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1794 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1749 /* 1795 /*
@@ -1758,7 +1804,7 @@ xfs_bmap_add_extent_unwritten_real(
1758 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1804 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1759 /* new middle extent - newext */ 1805 /* new middle extent - newext */
1760 cur->bc_rec.b.br_state = new->br_state; 1806 cur->bc_rec.b.br_state = new->br_state;
1761 if ((error = xfs_bmbt_insert(cur, &i))) 1807 if ((error = xfs_btree_insert(cur, &i)))
1762 goto done; 1808 goto done;
1763 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1809 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1764 } 1810 }
@@ -2106,10 +2152,10 @@ xfs_bmap_add_extent_hole_real(
2106 right.br_blockcount, &i))) 2152 right.br_blockcount, &i)))
2107 goto done; 2153 goto done;
2108 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2154 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2109 if ((error = xfs_bmbt_delete(cur, &i))) 2155 if ((error = xfs_btree_delete(cur, &i)))
2110 goto done; 2156 goto done;
2111 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2157 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2112 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 2158 if ((error = xfs_btree_decrement(cur, 0, &i)))
2113 goto done; 2159 goto done;
2114 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2160 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2115 if ((error = xfs_bmbt_update(cur, left.br_startoff, 2161 if ((error = xfs_bmbt_update(cur, left.br_startoff,
@@ -2218,7 +2264,7 @@ xfs_bmap_add_extent_hole_real(
2218 goto done; 2264 goto done;
2219 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2265 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
2220 cur->bc_rec.b.br_state = new->br_state; 2266 cur->bc_rec.b.br_state = new->br_state;
2221 if ((error = xfs_bmbt_insert(cur, &i))) 2267 if ((error = xfs_btree_insert(cur, &i)))
2222 goto done; 2268 goto done;
2223 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2269 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2224 } 2270 }
@@ -2996,24 +3042,24 @@ xfs_bmap_btree_to_extents(
2996 int whichfork) /* data or attr fork */ 3042 int whichfork) /* data or attr fork */
2997{ 3043{
2998 /* REFERENCED */ 3044 /* REFERENCED */
2999 xfs_bmbt_block_t *cblock;/* child btree block */ 3045 struct xfs_btree_block *cblock;/* child btree block */
3000 xfs_fsblock_t cbno; /* child block number */ 3046 xfs_fsblock_t cbno; /* child block number */
3001 xfs_buf_t *cbp; /* child block's buffer */ 3047 xfs_buf_t *cbp; /* child block's buffer */
3002 int error; /* error return value */ 3048 int error; /* error return value */
3003 xfs_ifork_t *ifp; /* inode fork data */ 3049 xfs_ifork_t *ifp; /* inode fork data */
3004 xfs_mount_t *mp; /* mount point structure */ 3050 xfs_mount_t *mp; /* mount point structure */
3005 __be64 *pp; /* ptr to block address */ 3051 __be64 *pp; /* ptr to block address */
3006 xfs_bmbt_block_t *rblock;/* root btree block */ 3052 struct xfs_btree_block *rblock;/* root btree block */
3007 3053
3054 mp = ip->i_mount;
3008 ifp = XFS_IFORK_PTR(ip, whichfork); 3055 ifp = XFS_IFORK_PTR(ip, whichfork);
3009 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 3056 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3010 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 3057 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
3011 rblock = ifp->if_broot; 3058 rblock = ifp->if_broot;
3012 ASSERT(be16_to_cpu(rblock->bb_level) == 1); 3059 ASSERT(be16_to_cpu(rblock->bb_level) == 1);
3013 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); 3060 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
3014 ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1); 3061 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
3015 mp = ip->i_mount; 3062 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
3016 pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
3017 cbno = be64_to_cpu(*pp); 3063 cbno = be64_to_cpu(*pp);
3018 *logflagsp = 0; 3064 *logflagsp = 0;
3019#ifdef DEBUG 3065#ifdef DEBUG
@@ -3023,8 +3069,8 @@ xfs_bmap_btree_to_extents(
3023 if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, 3069 if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
3024 XFS_BMAP_BTREE_REF))) 3070 XFS_BMAP_BTREE_REF)))
3025 return error; 3071 return error;
3026 cblock = XFS_BUF_TO_BMBT_BLOCK(cbp); 3072 cblock = XFS_BUF_TO_BLOCK(cbp);
3027 if ((error = xfs_btree_check_lblock(cur, cblock, 0, cbp))) 3073 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
3028 return error; 3074 return error;
3029 xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); 3075 xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
3030 ip->i_d.di_nblocks--; 3076 ip->i_d.di_nblocks--;
@@ -3170,7 +3216,7 @@ xfs_bmap_del_extent(
3170 flags |= XFS_ILOG_FEXT(whichfork); 3216 flags |= XFS_ILOG_FEXT(whichfork);
3171 break; 3217 break;
3172 } 3218 }
3173 if ((error = xfs_bmbt_delete(cur, &i))) 3219 if ((error = xfs_btree_delete(cur, &i)))
3174 goto done; 3220 goto done;
3175 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3221 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3176 break; 3222 break;
@@ -3254,10 +3300,10 @@ xfs_bmap_del_extent(
3254 got.br_startblock, temp, 3300 got.br_startblock, temp,
3255 got.br_state))) 3301 got.br_state)))
3256 goto done; 3302 goto done;
3257 if ((error = xfs_bmbt_increment(cur, 0, &i))) 3303 if ((error = xfs_btree_increment(cur, 0, &i)))
3258 goto done; 3304 goto done;
3259 cur->bc_rec.b = new; 3305 cur->bc_rec.b = new;
3260 error = xfs_bmbt_insert(cur, &i); 3306 error = xfs_btree_insert(cur, &i);
3261 if (error && error != ENOSPC) 3307 if (error && error != ENOSPC)
3262 goto done; 3308 goto done;
3263 /* 3309 /*
@@ -3404,11 +3450,11 @@ xfs_bmap_extents_to_btree(
3404 int *logflagsp, /* inode logging flags */ 3450 int *logflagsp, /* inode logging flags */
3405 int whichfork) /* data or attr fork */ 3451 int whichfork) /* data or attr fork */
3406{ 3452{
3407 xfs_bmbt_block_t *ablock; /* allocated (child) bt block */ 3453 struct xfs_btree_block *ablock; /* allocated (child) bt block */
3408 xfs_buf_t *abp; /* buffer for ablock */ 3454 xfs_buf_t *abp; /* buffer for ablock */
3409 xfs_alloc_arg_t args; /* allocation arguments */ 3455 xfs_alloc_arg_t args; /* allocation arguments */
3410 xfs_bmbt_rec_t *arp; /* child record pointer */ 3456 xfs_bmbt_rec_t *arp; /* child record pointer */
3411 xfs_bmbt_block_t *block; /* btree root block */ 3457 struct xfs_btree_block *block; /* btree root block */
3412 xfs_btree_cur_t *cur; /* bmap btree cursor */ 3458 xfs_btree_cur_t *cur; /* bmap btree cursor */
3413 xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 3459 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
3414 int error; /* error return value */ 3460 int error; /* error return value */
@@ -3428,6 +3474,7 @@ xfs_bmap_extents_to_btree(
3428 */ 3474 */
3429 xfs_iroot_realloc(ip, 1, whichfork); 3475 xfs_iroot_realloc(ip, 1, whichfork);
3430 ifp->if_flags |= XFS_IFBROOT; 3476 ifp->if_flags |= XFS_IFBROOT;
3477
3431 /* 3478 /*
3432 * Fill in the root. 3479 * Fill in the root.
3433 */ 3480 */
@@ -3435,14 +3482,14 @@ xfs_bmap_extents_to_btree(
3435 block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); 3482 block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3436 block->bb_level = cpu_to_be16(1); 3483 block->bb_level = cpu_to_be16(1);
3437 block->bb_numrecs = cpu_to_be16(1); 3484 block->bb_numrecs = cpu_to_be16(1);
3438 block->bb_leftsib = cpu_to_be64(NULLDFSBNO); 3485 block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3439 block->bb_rightsib = cpu_to_be64(NULLDFSBNO); 3486 block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3487
3440 /* 3488 /*
3441 * Need a cursor. Can't allocate until bb_level is filled in. 3489 * Need a cursor. Can't allocate until bb_level is filled in.
3442 */ 3490 */
3443 mp = ip->i_mount; 3491 mp = ip->i_mount;
3444 cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, 3492 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
3445 whichfork);
3446 cur->bc_private.b.firstblock = *firstblock; 3493 cur->bc_private.b.firstblock = *firstblock;
3447 cur->bc_private.b.flist = flist; 3494 cur->bc_private.b.flist = flist;
3448 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; 3495 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
@@ -3489,12 +3536,12 @@ xfs_bmap_extents_to_btree(
3489 /* 3536 /*
3490 * Fill in the child block. 3537 * Fill in the child block.
3491 */ 3538 */
3492 ablock = XFS_BUF_TO_BMBT_BLOCK(abp); 3539 ablock = XFS_BUF_TO_BLOCK(abp);
3493 ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); 3540 ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3494 ablock->bb_level = 0; 3541 ablock->bb_level = 0;
3495 ablock->bb_leftsib = cpu_to_be64(NULLDFSBNO); 3542 ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3496 ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO); 3543 ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3497 arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); 3544 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3498 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3545 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3499 for (cnt = i = 0; i < nextents; i++) { 3546 for (cnt = i = 0; i < nextents; i++) {
3500 ep = xfs_iext_get_ext(ifp, i); 3547 ep = xfs_iext_get_ext(ifp, i);
@@ -3505,21 +3552,24 @@ xfs_bmap_extents_to_btree(
3505 } 3552 }
3506 } 3553 }
3507 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); 3554 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
3508 ablock->bb_numrecs = cpu_to_be16(cnt); 3555 xfs_btree_set_numrecs(ablock, cnt);
3556
3509 /* 3557 /*
3510 * Fill in the root key and pointer. 3558 * Fill in the root key and pointer.
3511 */ 3559 */
3512 kp = XFS_BMAP_KEY_IADDR(block, 1, cur); 3560 kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
3513 arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); 3561 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3514 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); 3562 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
3515 pp = XFS_BMAP_PTR_IADDR(block, 1, cur); 3563 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
3564 be16_to_cpu(block->bb_level)));
3516 *pp = cpu_to_be64(args.fsbno); 3565 *pp = cpu_to_be64(args.fsbno);
3566
3517 /* 3567 /*
3518 * Do all this logging at the end so that 3568 * Do all this logging at the end so that
3519 * the root is at the right level. 3569 * the root is at the right level.
3520 */ 3570 */
3521 xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS); 3571 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
3522 xfs_bmbt_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); 3572 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
3523 ASSERT(*curp == NULL); 3573 ASSERT(*curp == NULL);
3524 *curp = cur; 3574 *curp = cur;
3525 *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork); 3575 *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork);
@@ -4176,7 +4226,7 @@ xfs_bmap_compute_maxlevels(
4176 maxleafents = MAXAEXTNUM; 4226 maxleafents = MAXAEXTNUM;
4177 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); 4227 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
4178 } 4228 }
4179 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); 4229 maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
4180 minleafrecs = mp->m_bmap_dmnr[0]; 4230 minleafrecs = mp->m_bmap_dmnr[0];
4181 minnoderecs = mp->m_bmap_dmnr[1]; 4231 minnoderecs = mp->m_bmap_dmnr[1];
4182 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 4232 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
@@ -4242,9 +4292,15 @@ xfs_bmap_finish(
4242 * We have a new transaction, so we should return committed=1, 4292 * We have a new transaction, so we should return committed=1,
4243 * even though we're returning an error. 4293 * even though we're returning an error.
4244 */ 4294 */
4245 if (error) { 4295 if (error)
4246 return error; 4296 return error;
4247 } 4297
4298 /*
4299 * transaction commit worked ok so we can drop the extra ticket
4300 * reference that we gained in xfs_trans_dup()
4301 */
4302 xfs_log_ticket_put(ntp->t_ticket);
4303
4248 if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES, 4304 if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
4249 logcount))) 4305 logcount)))
4250 return error; 4306 return error;
@@ -4474,6 +4530,22 @@ xfs_bmap_one_block(
4474 return rval; 4530 return rval;
4475} 4531}
4476 4532
4533STATIC int
4534xfs_bmap_sanity_check(
4535 struct xfs_mount *mp,
4536 struct xfs_buf *bp,
4537 int level)
4538{
4539 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4540
4541 if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC ||
4542 be16_to_cpu(block->bb_level) != level ||
4543 be16_to_cpu(block->bb_numrecs) == 0 ||
4544 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
4545 return 0;
4546 return 1;
4547}
4548
4477/* 4549/*
4478 * Read in the extents to if_extents. 4550 * Read in the extents to if_extents.
4479 * All inode fields are set up by caller, we just traverse the btree 4551 * All inode fields are set up by caller, we just traverse the btree
@@ -4486,7 +4558,7 @@ xfs_bmap_read_extents(
4486 xfs_inode_t *ip, /* incore inode */ 4558 xfs_inode_t *ip, /* incore inode */
4487 int whichfork) /* data or attr fork */ 4559 int whichfork) /* data or attr fork */
4488{ 4560{
4489 xfs_bmbt_block_t *block; /* current btree block */ 4561 struct xfs_btree_block *block; /* current btree block */
4490 xfs_fsblock_t bno; /* block # of "block" */ 4562 xfs_fsblock_t bno; /* block # of "block" */
4491 xfs_buf_t *bp; /* buffer for "block" */ 4563 xfs_buf_t *bp; /* buffer for "block" */
4492 int error; /* error return value */ 4564 int error; /* error return value */
@@ -4510,7 +4582,7 @@ xfs_bmap_read_extents(
4510 */ 4582 */
4511 level = be16_to_cpu(block->bb_level); 4583 level = be16_to_cpu(block->bb_level);
4512 ASSERT(level > 0); 4584 ASSERT(level > 0);
4513 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 4585 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
4514 bno = be64_to_cpu(*pp); 4586 bno = be64_to_cpu(*pp);
4515 ASSERT(bno != NULLDFSBNO); 4587 ASSERT(bno != NULLDFSBNO);
4516 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 4588 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
@@ -4523,13 +4595,13 @@ xfs_bmap_read_extents(
4523 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 4595 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4524 XFS_BMAP_BTREE_REF))) 4596 XFS_BMAP_BTREE_REF)))
4525 return error; 4597 return error;
4526 block = XFS_BUF_TO_BMBT_BLOCK(bp); 4598 block = XFS_BUF_TO_BLOCK(bp);
4527 XFS_WANT_CORRUPTED_GOTO( 4599 XFS_WANT_CORRUPTED_GOTO(
4528 XFS_BMAP_SANITY_CHECK(mp, block, level), 4600 xfs_bmap_sanity_check(mp, bp, level),
4529 error0); 4601 error0);
4530 if (level == 0) 4602 if (level == 0)
4531 break; 4603 break;
4532 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); 4604 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
4533 bno = be64_to_cpu(*pp); 4605 bno = be64_to_cpu(*pp);
4534 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 4606 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
4535 xfs_trans_brelse(tp, bp); 4607 xfs_trans_brelse(tp, bp);
@@ -4549,7 +4621,7 @@ xfs_bmap_read_extents(
4549 xfs_extnum_t start; 4621 xfs_extnum_t start;
4550 4622
4551 4623
4552 num_recs = be16_to_cpu(block->bb_numrecs); 4624 num_recs = xfs_btree_get_numrecs(block);
4553 if (unlikely(i + num_recs > room)) { 4625 if (unlikely(i + num_recs > room)) {
4554 ASSERT(i + num_recs <= room); 4626 ASSERT(i + num_recs <= room);
4555 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 4627 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
@@ -4561,18 +4633,18 @@ xfs_bmap_read_extents(
4561 goto error0; 4633 goto error0;
4562 } 4634 }
4563 XFS_WANT_CORRUPTED_GOTO( 4635 XFS_WANT_CORRUPTED_GOTO(
4564 XFS_BMAP_SANITY_CHECK(mp, block, 0), 4636 xfs_bmap_sanity_check(mp, bp, 0),
4565 error0); 4637 error0);
4566 /* 4638 /*
4567 * Read-ahead the next leaf block, if any. 4639 * Read-ahead the next leaf block, if any.
4568 */ 4640 */
4569 nextbno = be64_to_cpu(block->bb_rightsib); 4641 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
4570 if (nextbno != NULLFSBLOCK) 4642 if (nextbno != NULLFSBLOCK)
4571 xfs_btree_reada_bufl(mp, nextbno, 1); 4643 xfs_btree_reada_bufl(mp, nextbno, 1);
4572 /* 4644 /*
4573 * Copy records into the extent records. 4645 * Copy records into the extent records.
4574 */ 4646 */
4575 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); 4647 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
4576 start = i; 4648 start = i;
4577 for (j = 0; j < num_recs; j++, i++, frp++) { 4649 for (j = 0; j < num_recs; j++, i++, frp++) {
4578 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); 4650 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
@@ -4603,7 +4675,7 @@ xfs_bmap_read_extents(
4603 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 4675 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4604 XFS_BMAP_BTREE_REF))) 4676 XFS_BMAP_BTREE_REF)))
4605 return error; 4677 return error;
4606 block = XFS_BUF_TO_BMBT_BLOCK(bp); 4678 block = XFS_BUF_TO_BLOCK(bp);
4607 } 4679 }
4608 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); 4680 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4609 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); 4681 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
@@ -5029,8 +5101,7 @@ xfs_bmapi(
5029 if (abno == NULLFSBLOCK) 5101 if (abno == NULLFSBLOCK)
5030 break; 5102 break;
5031 if ((ifp->if_flags & XFS_IFBROOT) && !cur) { 5103 if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
5032 cur = xfs_btree_init_cursor(mp, 5104 cur = xfs_bmbt_init_cursor(mp, tp,
5033 tp, NULL, 0, XFS_BTNUM_BMAP,
5034 ip, whichfork); 5105 ip, whichfork);
5035 cur->bc_private.b.firstblock = 5106 cur->bc_private.b.firstblock =
5036 *firstblock; 5107 *firstblock;
@@ -5147,9 +5218,8 @@ xfs_bmapi(
5147 */ 5218 */
5148 ASSERT(mval->br_blockcount <= len); 5219 ASSERT(mval->br_blockcount <= len);
5149 if ((ifp->if_flags & XFS_IFBROOT) && !cur) { 5220 if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
5150 cur = xfs_btree_init_cursor(mp, 5221 cur = xfs_bmbt_init_cursor(mp,
5151 tp, NULL, 0, XFS_BTNUM_BMAP, 5222 tp, ip, whichfork);
5152 ip, whichfork);
5153 cur->bc_private.b.firstblock = 5223 cur->bc_private.b.firstblock =
5154 *firstblock; 5224 *firstblock;
5155 cur->bc_private.b.flist = flist; 5225 cur->bc_private.b.flist = flist;
@@ -5440,8 +5510,7 @@ xfs_bunmapi(
5440 logflags = 0; 5510 logflags = 0;
5441 if (ifp->if_flags & XFS_IFBROOT) { 5511 if (ifp->if_flags & XFS_IFBROOT) {
5442 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 5512 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5443 cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, 5513 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5444 whichfork);
5445 cur->bc_private.b.firstblock = *firstblock; 5514 cur->bc_private.b.firstblock = *firstblock;
5446 cur->bc_private.b.flist = flist; 5515 cur->bc_private.b.flist = flist;
5447 cur->bc_private.b.flags = 0; 5516 cur->bc_private.b.flags = 0;
@@ -5742,14 +5811,17 @@ error0:
5742STATIC int 5811STATIC int
5743xfs_getbmapx_fix_eof_hole( 5812xfs_getbmapx_fix_eof_hole(
5744 xfs_inode_t *ip, /* xfs incore inode pointer */ 5813 xfs_inode_t *ip, /* xfs incore inode pointer */
5745 struct getbmap *out, /* output structure */ 5814 struct getbmapx *out, /* output structure */
5746 int prealloced, /* this is a file with 5815 int prealloced, /* this is a file with
5747 * preallocated data space */ 5816 * preallocated data space */
5748 __int64_t end, /* last block requested */ 5817 __int64_t end, /* last block requested */
5749 xfs_fsblock_t startblock) 5818 xfs_fsblock_t startblock)
5750{ 5819{
5751 __int64_t fixlen; 5820 __int64_t fixlen;
5752 xfs_mount_t *mp; /* file system mount point */ 5821 xfs_mount_t *mp; /* file system mount point */
5822 xfs_ifork_t *ifp; /* inode fork pointer */
5823 xfs_extnum_t lastx; /* last extent pointer */
5824 xfs_fileoff_t fileblock;
5753 5825
5754 if (startblock == HOLESTARTBLOCK) { 5826 if (startblock == HOLESTARTBLOCK) {
5755 mp = ip->i_mount; 5827 mp = ip->i_mount;
@@ -5763,21 +5835,33 @@ xfs_getbmapx_fix_eof_hole(
5763 out->bmv_length = fixlen; 5835 out->bmv_length = fixlen;
5764 } 5836 }
5765 } else { 5837 } else {
5766 out->bmv_block = XFS_FSB_TO_DB(ip, startblock); 5838 if (startblock == DELAYSTARTBLOCK)
5839 out->bmv_block = -2;
5840 else
5841 out->bmv_block = XFS_FSB_TO_DB(ip, startblock);
5842 fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
5843 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
5844 if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
5845 (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
5846 out->bmv_oflags |= BMV_OF_LAST;
5767 } 5847 }
5768 5848
5769 return 1; 5849 return 1;
5770} 5850}
5771 5851
5772/* 5852/*
5773 * Fcntl interface to xfs_bmapi. 5853 * Get inode's extents as described in bmv, and format for output.
5854 * Calls formatter to fill the user's buffer until all extents
5855 * are mapped, until the passed-in bmv->bmv_count slots have
5856 * been filled, or until the formatter short-circuits the loop,
5857 * if it is tracking filled-in extents on its own.
5774 */ 5858 */
5775int /* error code */ 5859int /* error code */
5776xfs_getbmap( 5860xfs_getbmap(
5777 xfs_inode_t *ip, 5861 xfs_inode_t *ip,
5778 struct getbmap *bmv, /* user bmap structure */ 5862 struct getbmapx *bmv, /* user bmap structure */
5779 void __user *ap, /* pointer to user's array */ 5863 xfs_bmap_format_t formatter, /* format to user */
5780 int interface) /* interface flags */ 5864 void *arg) /* formatter arg */
5781{ 5865{
5782 __int64_t bmvend; /* last block requested */ 5866 __int64_t bmvend; /* last block requested */
5783 int error; /* return value */ 5867 int error; /* return value */
@@ -5790,19 +5874,17 @@ xfs_getbmap(
5790 int nexleft; /* # of user extents left */ 5874 int nexleft; /* # of user extents left */
5791 int subnex; /* # of bmapi's can do */ 5875 int subnex; /* # of bmapi's can do */
5792 int nmap; /* number of map entries */ 5876 int nmap; /* number of map entries */
5793 struct getbmap out; /* output structure */ 5877 struct getbmapx out; /* output structure */
5794 int whichfork; /* data or attr fork */ 5878 int whichfork; /* data or attr fork */
5795 int prealloced; /* this is a file with 5879 int prealloced; /* this is a file with
5796 * preallocated data space */ 5880 * preallocated data space */
5797 int sh_unwritten; /* true, if unwritten */ 5881 int iflags; /* interface flags */
5798 /* extents listed separately */
5799 int bmapi_flags; /* flags for xfs_bmapi */ 5882 int bmapi_flags; /* flags for xfs_bmapi */
5800 __int32_t oflags; /* getbmapx bmv_oflags field */
5801 5883
5802 mp = ip->i_mount; 5884 mp = ip->i_mount;
5885 iflags = bmv->bmv_iflags;
5803 5886
5804 whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; 5887 whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
5805 sh_unwritten = (interface & BMV_IF_PREALLOC) != 0;
5806 5888
5807 /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not 5889 /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not
5808 * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ 5890 * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ
@@ -5817,7 +5899,7 @@ xfs_getbmap(
5817 * could misinterpret holes in a DMAPI file as true holes, 5899 * could misinterpret holes in a DMAPI file as true holes,
5818 * when in fact they may represent offline user data. 5900 * when in fact they may represent offline user data.
5819 */ 5901 */
5820 if ((interface & BMV_IF_NO_DMAPI_READ) == 0 && 5902 if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 &&
5821 DM_EVENT_ENABLED(ip, DM_EVENT_READ) && 5903 DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
5822 whichfork == XFS_DATA_FORK) { 5904 whichfork == XFS_DATA_FORK) {
5823 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); 5905 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
@@ -5873,8 +5955,9 @@ xfs_getbmap(
5873 5955
5874 xfs_ilock(ip, XFS_IOLOCK_SHARED); 5956 xfs_ilock(ip, XFS_IOLOCK_SHARED);
5875 5957
5876 if (whichfork == XFS_DATA_FORK && 5958 if (((iflags & BMV_IF_DELALLOC) == 0) &&
5877 (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { 5959 (whichfork == XFS_DATA_FORK) &&
5960 (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) {
5878 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ 5961 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
5879 error = xfs_flush_pages(ip, (xfs_off_t)0, 5962 error = xfs_flush_pages(ip, (xfs_off_t)0,
5880 -1, 0, FI_REMAPF); 5963 -1, 0, FI_REMAPF);
@@ -5884,7 +5967,8 @@ xfs_getbmap(
5884 } 5967 }
5885 } 5968 }
5886 5969
5887 ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); 5970 ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) ||
5971 ip->i_delayed_blks == 0);
5888 5972
5889 lock = xfs_ilock_map_shared(ip); 5973 lock = xfs_ilock_map_shared(ip);
5890 5974
@@ -5896,7 +5980,7 @@ xfs_getbmap(
5896 nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; 5980 nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
5897 5981
5898 bmapi_flags = XFS_BMAPI_AFLAG(whichfork) | 5982 bmapi_flags = XFS_BMAPI_AFLAG(whichfork) |
5899 ((sh_unwritten) ? 0 : XFS_BMAPI_IGSTATE); 5983 ((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE);
5900 5984
5901 /* 5985 /*
5902 * Allocate enough space to handle "subnex" maps at a time. 5986 * Allocate enough space to handle "subnex" maps at a time.
@@ -5906,9 +5990,12 @@ xfs_getbmap(
5906 5990
5907 bmv->bmv_entries = 0; 5991 bmv->bmv_entries = 0;
5908 5992
5909 if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0) { 5993 if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) {
5910 error = 0; 5994 if (((iflags & BMV_IF_DELALLOC) == 0) ||
5911 goto unlock_and_return; 5995 whichfork == XFS_ATTR_FORK) {
5996 error = 0;
5997 goto unlock_and_return;
5998 }
5912 } 5999 }
5913 6000
5914 nexleft = nex; 6001 nexleft = nex;
@@ -5924,52 +6011,40 @@ xfs_getbmap(
5924 ASSERT(nmap <= subnex); 6011 ASSERT(nmap <= subnex);
5925 6012
5926 for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { 6013 for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
5927 nexleft--; 6014 out.bmv_oflags = 0;
5928 oflags = (map[i].br_state == XFS_EXT_UNWRITTEN) ? 6015 if (map[i].br_state == XFS_EXT_UNWRITTEN)
5929 BMV_OF_PREALLOC : 0; 6016 out.bmv_oflags |= BMV_OF_PREALLOC;
6017 else if (map[i].br_startblock == DELAYSTARTBLOCK)
6018 out.bmv_oflags |= BMV_OF_DELALLOC;
5930 out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); 6019 out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff);
5931 out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 6020 out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
5932 ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); 6021 out.bmv_unused1 = out.bmv_unused2 = 0;
6022 ASSERT(((iflags & BMV_IF_DELALLOC) != 0) ||
6023 (map[i].br_startblock != DELAYSTARTBLOCK));
5933 if (map[i].br_startblock == HOLESTARTBLOCK && 6024 if (map[i].br_startblock == HOLESTARTBLOCK &&
5934 whichfork == XFS_ATTR_FORK) { 6025 whichfork == XFS_ATTR_FORK) {
5935 /* came to the end of attribute fork */ 6026 /* came to the end of attribute fork */
6027 out.bmv_oflags |= BMV_OF_LAST;
5936 goto unlock_and_return; 6028 goto unlock_and_return;
5937 } else { 6029 } else {
6030 int full = 0; /* user array is full */
6031
5938 if (!xfs_getbmapx_fix_eof_hole(ip, &out, 6032 if (!xfs_getbmapx_fix_eof_hole(ip, &out,
5939 prealloced, bmvend, 6033 prealloced, bmvend,
5940 map[i].br_startblock)) { 6034 map[i].br_startblock)) {
5941 goto unlock_and_return; 6035 goto unlock_and_return;
5942 } 6036 }
5943 6037
5944 /* return either getbmap/getbmapx structure. */ 6038 /* format results & advance arg */
5945 if (interface & BMV_IF_EXTENDED) { 6039 error = formatter(&arg, &out, &full);
5946 struct getbmapx outx; 6040 if (error || full)
5947 6041 goto unlock_and_return;
5948 GETBMAP_CONVERT(out,outx); 6042 nexleft--;
5949 outx.bmv_oflags = oflags;
5950 outx.bmv_unused1 = outx.bmv_unused2 = 0;
5951 if (copy_to_user(ap, &outx,
5952 sizeof(outx))) {
5953 error = XFS_ERROR(EFAULT);
5954 goto unlock_and_return;
5955 }
5956 } else {
5957 if (copy_to_user(ap, &out,
5958 sizeof(out))) {
5959 error = XFS_ERROR(EFAULT);
5960 goto unlock_and_return;
5961 }
5962 }
5963 bmv->bmv_offset = 6043 bmv->bmv_offset =
5964 out.bmv_offset + out.bmv_length; 6044 out.bmv_offset + out.bmv_length;
5965 bmv->bmv_length = MAX((__int64_t)0, 6045 bmv->bmv_length = MAX((__int64_t)0,
5966 (__int64_t)(bmvend - bmv->bmv_offset)); 6046 (__int64_t)(bmvend - bmv->bmv_offset));
5967 bmv->bmv_entries++; 6047 bmv->bmv_entries++;
5968 ap = (interface & BMV_IF_EXTENDED) ?
5969 (void __user *)
5970 ((struct getbmapx __user *)ap + 1) :
5971 (void __user *)
5972 ((struct getbmap __user *)ap + 1);
5973 } 6048 }
5974 } 6049 }
5975 } while (nmap && nexleft && bmv->bmv_length); 6050 } while (nmap && nexleft && bmv->bmv_length);
@@ -6131,7 +6206,7 @@ xfs_bmap_get_bp(
6131 6206
6132void 6207void
6133xfs_check_block( 6208xfs_check_block(
6134 xfs_bmbt_block_t *block, 6209 struct xfs_btree_block *block,
6135 xfs_mount_t *mp, 6210 xfs_mount_t *mp,
6136 int root, 6211 int root,
6137 short sz) 6212 short sz)
@@ -6143,36 +6218,29 @@ xfs_check_block(
6143 ASSERT(be16_to_cpu(block->bb_level) > 0); 6218 ASSERT(be16_to_cpu(block->bb_level) > 0);
6144 6219
6145 prevp = NULL; 6220 prevp = NULL;
6146 for( i = 1; i <= be16_to_cpu(block->bb_numrecs); i++) { 6221 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
6147 dmxr = mp->m_bmap_dmxr[0]; 6222 dmxr = mp->m_bmap_dmxr[0];
6148 6223 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
6149 if (root) {
6150 keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz);
6151 } else {
6152 keyp = XFS_BTREE_KEY_ADDR(xfs_bmbt, block, i);
6153 }
6154 6224
6155 if (prevp) { 6225 if (prevp) {
6156 xfs_btree_check_key(XFS_BTNUM_BMAP, prevp, keyp); 6226 ASSERT(be64_to_cpu(prevp->br_startoff) <
6227 be64_to_cpu(keyp->br_startoff));
6157 } 6228 }
6158 prevp = keyp; 6229 prevp = keyp;
6159 6230
6160 /* 6231 /*
6161 * Compare the block numbers to see if there are dups. 6232 * Compare the block numbers to see if there are dups.
6162 */ 6233 */
6234 if (root)
6235 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
6236 else
6237 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
6163 6238
6164 if (root) {
6165 pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz);
6166 } else {
6167 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr);
6168 }
6169 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { 6239 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
6170 if (root) { 6240 if (root)
6171 thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz); 6241 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
6172 } else { 6242 else
6173 thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j, 6243 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
6174 dmxr);
6175 }
6176 if (*thispa == *pp) { 6244 if (*thispa == *pp) {
6177 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", 6245 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
6178 __func__, j, i, 6246 __func__, j, i,
@@ -6195,7 +6263,7 @@ xfs_bmap_check_leaf_extents(
6195 xfs_inode_t *ip, /* incore inode pointer */ 6263 xfs_inode_t *ip, /* incore inode pointer */
6196 int whichfork) /* data or attr fork */ 6264 int whichfork) /* data or attr fork */
6197{ 6265{
6198 xfs_bmbt_block_t *block; /* current btree block */ 6266 struct xfs_btree_block *block; /* current btree block */
6199 xfs_fsblock_t bno; /* block # of "block" */ 6267 xfs_fsblock_t bno; /* block # of "block" */
6200 xfs_buf_t *bp; /* buffer for "block" */ 6268 xfs_buf_t *bp; /* buffer for "block" */
6201 int error; /* error return value */ 6269 int error; /* error return value */
@@ -6223,7 +6291,7 @@ xfs_bmap_check_leaf_extents(
6223 level = be16_to_cpu(block->bb_level); 6291 level = be16_to_cpu(block->bb_level);
6224 ASSERT(level > 0); 6292 ASSERT(level > 0);
6225 xfs_check_block(block, mp, 1, ifp->if_broot_bytes); 6293 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
6226 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 6294 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
6227 bno = be64_to_cpu(*pp); 6295 bno = be64_to_cpu(*pp);
6228 6296
6229 ASSERT(bno != NULLDFSBNO); 6297 ASSERT(bno != NULLDFSBNO);
@@ -6245,9 +6313,9 @@ xfs_bmap_check_leaf_extents(
6245 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 6313 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
6246 XFS_BMAP_BTREE_REF))) 6314 XFS_BMAP_BTREE_REF)))
6247 goto error_norelse; 6315 goto error_norelse;
6248 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6316 block = XFS_BUF_TO_BLOCK(bp);
6249 XFS_WANT_CORRUPTED_GOTO( 6317 XFS_WANT_CORRUPTED_GOTO(
6250 XFS_BMAP_SANITY_CHECK(mp, block, level), 6318 xfs_bmap_sanity_check(mp, bp, level),
6251 error0); 6319 error0);
6252 if (level == 0) 6320 if (level == 0)
6253 break; 6321 break;
@@ -6258,7 +6326,7 @@ xfs_bmap_check_leaf_extents(
6258 */ 6326 */
6259 6327
6260 xfs_check_block(block, mp, 0, 0); 6328 xfs_check_block(block, mp, 0, 0);
6261 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); 6329 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
6262 bno = be64_to_cpu(*pp); 6330 bno = be64_to_cpu(*pp);
6263 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 6331 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
6264 if (bp_release) { 6332 if (bp_release) {
@@ -6280,13 +6348,13 @@ xfs_bmap_check_leaf_extents(
6280 xfs_extnum_t num_recs; 6348 xfs_extnum_t num_recs;
6281 6349
6282 6350
6283 num_recs = be16_to_cpu(block->bb_numrecs); 6351 num_recs = xfs_btree_get_numrecs(block);
6284 6352
6285 /* 6353 /*
6286 * Read-ahead the next leaf block, if any. 6354 * Read-ahead the next leaf block, if any.
6287 */ 6355 */
6288 6356
6289 nextbno = be64_to_cpu(block->bb_rightsib); 6357 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6290 6358
6291 /* 6359 /*
6292 * Check all the extents to make sure they are OK. 6360 * Check all the extents to make sure they are OK.
@@ -6294,13 +6362,17 @@ xfs_bmap_check_leaf_extents(
6294 * conform with the first entry in this one. 6362 * conform with the first entry in this one.
6295 */ 6363 */
6296 6364
6297 ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); 6365 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
6298 if (i) { 6366 if (i) {
6299 xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep); 6367 ASSERT(xfs_bmbt_disk_get_startoff(&last) +
6368 xfs_bmbt_disk_get_blockcount(&last) <=
6369 xfs_bmbt_disk_get_startoff(ep));
6300 } 6370 }
6301 for (j = 1; j < num_recs; j++) { 6371 for (j = 1; j < num_recs; j++) {
6302 nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); 6372 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
6303 xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp); 6373 ASSERT(xfs_bmbt_disk_get_startoff(ep) +
6374 xfs_bmbt_disk_get_blockcount(ep) <=
6375 xfs_bmbt_disk_get_startoff(nextp));
6304 ep = nextp; 6376 ep = nextp;
6305 } 6377 }
6306 6378
@@ -6326,7 +6398,7 @@ xfs_bmap_check_leaf_extents(
6326 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 6398 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
6327 XFS_BMAP_BTREE_REF))) 6399 XFS_BMAP_BTREE_REF)))
6328 goto error_norelse; 6400 goto error_norelse;
6329 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6401 block = XFS_BUF_TO_BLOCK(bp);
6330 } 6402 }
6331 if (bp_release) { 6403 if (bp_release) {
6332 bp_release = 0; 6404 bp_release = 0;
@@ -6356,7 +6428,7 @@ xfs_bmap_count_blocks(
6356 int whichfork, /* data or attr fork */ 6428 int whichfork, /* data or attr fork */
6357 int *count) /* out: count of blocks */ 6429 int *count) /* out: count of blocks */
6358{ 6430{
6359 xfs_bmbt_block_t *block; /* current btree block */ 6431 struct xfs_btree_block *block; /* current btree block */
6360 xfs_fsblock_t bno; /* block # of "block" */ 6432 xfs_fsblock_t bno; /* block # of "block" */
6361 xfs_ifork_t *ifp; /* fork structure */ 6433 xfs_ifork_t *ifp; /* fork structure */
6362 int level; /* btree level, for checking */ 6434 int level; /* btree level, for checking */
@@ -6379,7 +6451,7 @@ xfs_bmap_count_blocks(
6379 block = ifp->if_broot; 6451 block = ifp->if_broot;
6380 level = be16_to_cpu(block->bb_level); 6452 level = be16_to_cpu(block->bb_level);
6381 ASSERT(level > 0); 6453 ASSERT(level > 0);
6382 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 6454 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
6383 bno = be64_to_cpu(*pp); 6455 bno = be64_to_cpu(*pp);
6384 ASSERT(bno != NULLDFSBNO); 6456 ASSERT(bno != NULLDFSBNO);
6385 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 6457 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
@@ -6413,29 +6485,29 @@ xfs_bmap_count_tree(
6413 __be64 *pp; 6485 __be64 *pp;
6414 xfs_fsblock_t bno = blockno; 6486 xfs_fsblock_t bno = blockno;
6415 xfs_fsblock_t nextbno; 6487 xfs_fsblock_t nextbno;
6416 xfs_bmbt_block_t *block, *nextblock; 6488 struct xfs_btree_block *block, *nextblock;
6417 int numrecs; 6489 int numrecs;
6418 6490
6419 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) 6491 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
6420 return error; 6492 return error;
6421 *count += 1; 6493 *count += 1;
6422 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6494 block = XFS_BUF_TO_BLOCK(bp);
6423 6495
6424 if (--level) { 6496 if (--level) {
6425 /* Not at node above leafs, count this level of nodes */ 6497 /* Not at node above leafs, count this level of nodes */
6426 nextbno = be64_to_cpu(block->bb_rightsib); 6498 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6427 while (nextbno != NULLFSBLOCK) { 6499 while (nextbno != NULLFSBLOCK) {
6428 if ((error = xfs_btree_read_bufl(mp, tp, nextbno, 6500 if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
6429 0, &nbp, XFS_BMAP_BTREE_REF))) 6501 0, &nbp, XFS_BMAP_BTREE_REF)))
6430 return error; 6502 return error;
6431 *count += 1; 6503 *count += 1;
6432 nextblock = XFS_BUF_TO_BMBT_BLOCK(nbp); 6504 nextblock = XFS_BUF_TO_BLOCK(nbp);
6433 nextbno = be64_to_cpu(nextblock->bb_rightsib); 6505 nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
6434 xfs_trans_brelse(tp, nbp); 6506 xfs_trans_brelse(tp, nbp);
6435 } 6507 }
6436 6508
6437 /* Dive to the next level */ 6509 /* Dive to the next level */
6438 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); 6510 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
6439 bno = be64_to_cpu(*pp); 6511 bno = be64_to_cpu(*pp);
6440 if (unlikely((error = 6512 if (unlikely((error =
6441 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { 6513 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
@@ -6448,9 +6520,9 @@ xfs_bmap_count_tree(
6448 } else { 6520 } else {
6449 /* count all level 1 nodes and their leaves */ 6521 /* count all level 1 nodes and their leaves */
6450 for (;;) { 6522 for (;;) {
6451 nextbno = be64_to_cpu(block->bb_rightsib); 6523 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6452 numrecs = be16_to_cpu(block->bb_numrecs); 6524 numrecs = be16_to_cpu(block->bb_numrecs);
6453 xfs_bmap_disk_count_leaves(0, block, numrecs, count); 6525 xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
6454 xfs_trans_brelse(tp, bp); 6526 xfs_trans_brelse(tp, bp);
6455 if (nextbno == NULLFSBLOCK) 6527 if (nextbno == NULLFSBLOCK)
6456 break; 6528 break;
@@ -6459,7 +6531,7 @@ xfs_bmap_count_tree(
6459 XFS_BMAP_BTREE_REF))) 6531 XFS_BMAP_BTREE_REF)))
6460 return error; 6532 return error;
6461 *count += 1; 6533 *count += 1;
6462 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6534 block = XFS_BUF_TO_BLOCK(bp);
6463 } 6535 }
6464 } 6536 }
6465 return 0; 6537 return 0;
@@ -6489,8 +6561,8 @@ xfs_bmap_count_leaves(
6489 */ 6561 */
6490STATIC void 6562STATIC void
6491xfs_bmap_disk_count_leaves( 6563xfs_bmap_disk_count_leaves(
6492 xfs_extnum_t idx, 6564 struct xfs_mount *mp,
6493 xfs_bmbt_block_t *block, 6565 struct xfs_btree_block *block,
6494 int numrecs, 6566 int numrecs,
6495 int *count) 6567 int *count)
6496{ 6568{
@@ -6498,7 +6570,7 @@ xfs_bmap_disk_count_leaves(
6498 xfs_bmbt_rec_t *frp; 6570 xfs_bmbt_rec_t *frp;
6499 6571
6500 for (b = 1; b <= numrecs; b++) { 6572 for (b = 1; b <= numrecs; b++) {
6501 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); 6573 frp = XFS_BMBT_REC_ADDR(mp, block, b);
6502 *count += xfs_bmbt_disk_get_blockcount(frp); 6574 *count += xfs_bmbt_disk_get_blockcount(frp);
6503 } 6575 }
6504} 6576}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 9f3e3a836d1..284571c05ed 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -137,9 +137,7 @@ typedef struct xfs_bmalloca {
137 char conv; /* overwriting unwritten extents */ 137 char conv; /* overwriting unwritten extents */
138} xfs_bmalloca_t; 138} xfs_bmalloca_t;
139 139
140#ifdef __KERNEL__ 140#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE)
141
142#if defined(XFS_BMAP_TRACE)
143/* 141/*
144 * Trace operations for bmap extent tracing 142 * Trace operations for bmap extent tracing
145 */ 143 */
@@ -163,9 +161,12 @@ xfs_bmap_trace_exlist(
163 int whichfork); /* data or attr fork */ 161 int whichfork); /* data or attr fork */
164#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ 162#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
165 xfs_bmap_trace_exlist(__func__,ip,c,w) 163 xfs_bmap_trace_exlist(__func__,ip,c,w)
166#else 164
165#else /* __KERNEL__ && XFS_BMAP_TRACE */
166
167#define XFS_BMAP_TRACE_EXLIST(ip,c,w) 167#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
168#endif 168
169#endif /* __KERNEL__ && XFS_BMAP_TRACE */
169 170
170/* 171/*
171 * Convert inode from non-attributed to attributed. 172 * Convert inode from non-attributed to attributed.
@@ -206,20 +207,6 @@ xfs_bmap_compute_maxlevels(
206 int whichfork); /* data or attr fork */ 207 int whichfork); /* data or attr fork */
207 208
208/* 209/*
209 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
210 * caller. Frees all the extents that need freeing, which must be done
211 * last due to locking considerations.
212 *
213 * Return 1 if the given transaction was committed and a new one allocated,
214 * and 0 otherwise.
215 */
216int /* error */
217xfs_bmap_finish(
218 struct xfs_trans **tp, /* transaction pointer addr */
219 xfs_bmap_free_t *flist, /* i/o: list extents to free */
220 int *committed); /* xact committed or not */
221
222/*
223 * Returns the file-relative block number of the first unused block in the file. 210 * Returns the file-relative block number of the first unused block in the file.
224 * This is the lowest-address hole if the file has holes, else the first block 211 * This is the lowest-address hole if the file has holes, else the first block
225 * past the end of file. 212 * past the end of file.
@@ -344,14 +331,43 @@ xfs_bunmapi(
344 int *done); /* set if not done yet */ 331 int *done); /* set if not done yet */
345 332
346/* 333/*
347 * Fcntl interface to xfs_bmapi. 334 * Check an extent list, which has just been read, for
335 * any bit in the extent flag field.
336 */
337int
338xfs_check_nostate_extents(
339 struct xfs_ifork *ifp,
340 xfs_extnum_t idx,
341 xfs_extnum_t num);
342
343#ifdef __KERNEL__
344
345/*
346 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
347 * caller. Frees all the extents that need freeing, which must be done
348 * last due to locking considerations.
349 *
350 * Return 1 if the given transaction was committed and a new one allocated,
351 * and 0 otherwise.
352 */
353int /* error */
354xfs_bmap_finish(
355 struct xfs_trans **tp, /* transaction pointer addr */
356 xfs_bmap_free_t *flist, /* i/o: list extents to free */
357 int *committed); /* xact committed or not */
358
359/* bmap to userspace formatter - copy to user & advance pointer */
360typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
361
362/*
363 * Get inode's extents as described in bmv, and format for output.
348 */ 364 */
349int /* error code */ 365int /* error code */
350xfs_getbmap( 366xfs_getbmap(
351 xfs_inode_t *ip, 367 xfs_inode_t *ip,
352 struct getbmap *bmv, /* user bmap structure */ 368 struct getbmapx *bmv, /* user bmap structure */
353 void __user *ap, /* pointer to user's array */ 369 xfs_bmap_format_t formatter, /* format to user */
354 int iflags); /* interface flags */ 370 void *arg); /* formatter arg */
355 371
356/* 372/*
357 * Check if the endoff is outside the last extent. If so the caller will grow 373 * Check if the endoff is outside the last extent. If so the caller will grow
@@ -375,16 +391,6 @@ xfs_bmap_count_blocks(
375 int *count); 391 int *count);
376 392
377/* 393/*
378 * Check an extent list, which has just been read, for
379 * any bit in the extent flag field.
380 */
381int
382xfs_check_nostate_extents(
383 struct xfs_ifork *ifp,
384 xfs_extnum_t idx,
385 xfs_extnum_t num);
386
387/*
388 * Search the extent records for the entry containing block bno. 394 * Search the extent records for the entry containing block bno.
389 * If bno lies in a hole, point to the next entry. If bno lies 395 * If bno lies in a hole, point to the next entry. If bno lies
390 * past eof, *eofp will be set, and *prevp will contain the last 396 * past eof, *eofp will be set, and *prevp will contain the last
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 23efad29a5c..8f1ec73725d 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -37,1406 +37,13 @@
37#include "xfs_inode_item.h" 37#include "xfs_inode_item.h"
38#include "xfs_alloc.h" 38#include "xfs_alloc.h"
39#include "xfs_btree.h" 39#include "xfs_btree.h"
40#include "xfs_btree_trace.h"
40#include "xfs_ialloc.h" 41#include "xfs_ialloc.h"
41#include "xfs_itable.h" 42#include "xfs_itable.h"
42#include "xfs_bmap.h" 43#include "xfs_bmap.h"
43#include "xfs_error.h" 44#include "xfs_error.h"
44#include "xfs_quota.h" 45#include "xfs_quota.h"
45 46
46#if defined(XFS_BMBT_TRACE)
47ktrace_t *xfs_bmbt_trace_buf;
48#endif
49
50/*
51 * Prototypes for internal btree functions.
52 */
53
54
55STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *);
56STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
57STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
58STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *);
59STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *);
60STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *,
61 __uint64_t *, xfs_btree_cur_t **, int *);
62STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int);
63
64
65#if defined(XFS_BMBT_TRACE)
66
67static char ARGS[] = "args";
68static char ENTRY[] = "entry";
69static char ERROR[] = "error";
70#undef EXIT
71static char EXIT[] = "exit";
72
73/*
74 * Add a trace buffer entry for the arguments given to the routine,
75 * generic form.
76 */
77STATIC void
78xfs_bmbt_trace_enter(
79 const char *func,
80 xfs_btree_cur_t *cur,
81 char *s,
82 int type,
83 int line,
84 __psunsigned_t a0,
85 __psunsigned_t a1,
86 __psunsigned_t a2,
87 __psunsigned_t a3,
88 __psunsigned_t a4,
89 __psunsigned_t a5,
90 __psunsigned_t a6,
91 __psunsigned_t a7,
92 __psunsigned_t a8,
93 __psunsigned_t a9,
94 __psunsigned_t a10)
95{
96 xfs_inode_t *ip;
97 int whichfork;
98
99 ip = cur->bc_private.b.ip;
100 whichfork = cur->bc_private.b.whichfork;
101 ktrace_enter(xfs_bmbt_trace_buf,
102 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
103 (void *)func, (void *)s, (void *)ip, (void *)cur,
104 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
105 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
106 (void *)a8, (void *)a9, (void *)a10);
107 ASSERT(ip->i_btrace);
108 ktrace_enter(ip->i_btrace,
109 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
110 (void *)func, (void *)s, (void *)ip, (void *)cur,
111 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
112 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
113 (void *)a8, (void *)a9, (void *)a10);
114}
115/*
116 * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
117 */
118STATIC void
119xfs_bmbt_trace_argbi(
120 const char *func,
121 xfs_btree_cur_t *cur,
122 xfs_buf_t *b,
123 int i,
124 int line)
125{
126 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBI, line,
127 (__psunsigned_t)b, i, 0, 0,
128 0, 0, 0, 0,
129 0, 0, 0);
130}
131
132/*
133 * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
134 */
135STATIC void
136xfs_bmbt_trace_argbii(
137 const char *func,
138 xfs_btree_cur_t *cur,
139 xfs_buf_t *b,
140 int i0,
141 int i1,
142 int line)
143{
144 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBII, line,
145 (__psunsigned_t)b, i0, i1, 0,
146 0, 0, 0, 0,
147 0, 0, 0);
148}
149
150/*
151 * Add a trace buffer entry for arguments, for 3 block-length args
152 * and an integer arg.
153 */
154STATIC void
155xfs_bmbt_trace_argfffi(
156 const char *func,
157 xfs_btree_cur_t *cur,
158 xfs_dfiloff_t o,
159 xfs_dfsbno_t b,
160 xfs_dfilblks_t i,
161 int j,
162 int line)
163{
164 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGFFFI, line,
165 o >> 32, (int)o, b >> 32, (int)b,
166 i >> 32, (int)i, (int)j, 0,
167 0, 0, 0);
168}
169
170/*
171 * Add a trace buffer entry for arguments, for one integer arg.
172 */
173STATIC void
174xfs_bmbt_trace_argi(
175 const char *func,
176 xfs_btree_cur_t *cur,
177 int i,
178 int line)
179{
180 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGI, line,
181 i, 0, 0, 0,
182 0, 0, 0, 0,
183 0, 0, 0);
184}
185
186/*
187 * Add a trace buffer entry for arguments, for int, fsblock, key.
188 */
189STATIC void
190xfs_bmbt_trace_argifk(
191 const char *func,
192 xfs_btree_cur_t *cur,
193 int i,
194 xfs_fsblock_t f,
195 xfs_dfiloff_t o,
196 int line)
197{
198 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
199 i, (xfs_dfsbno_t)f >> 32, (int)f, o >> 32,
200 (int)o, 0, 0, 0,
201 0, 0, 0);
202}
203
204/*
205 * Add a trace buffer entry for arguments, for int, fsblock, rec.
206 */
207STATIC void
208xfs_bmbt_trace_argifr(
209 const char *func,
210 xfs_btree_cur_t *cur,
211 int i,
212 xfs_fsblock_t f,
213 xfs_bmbt_rec_t *r,
214 int line)
215{
216 xfs_dfsbno_t b;
217 xfs_dfilblks_t c;
218 xfs_dfsbno_t d;
219 xfs_dfiloff_t o;
220 xfs_bmbt_irec_t s;
221
222 d = (xfs_dfsbno_t)f;
223 xfs_bmbt_disk_get_all(r, &s);
224 o = (xfs_dfiloff_t)s.br_startoff;
225 b = (xfs_dfsbno_t)s.br_startblock;
226 c = s.br_blockcount;
227 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFR, line,
228 i, d >> 32, (int)d, o >> 32,
229 (int)o, b >> 32, (int)b, c >> 32,
230 (int)c, 0, 0);
231}
232
233/*
234 * Add a trace buffer entry for arguments, for int, key.
235 */
236STATIC void
237xfs_bmbt_trace_argik(
238 const char *func,
239 xfs_btree_cur_t *cur,
240 int i,
241 xfs_bmbt_key_t *k,
242 int line)
243{
244 xfs_dfiloff_t o;
245
246 o = be64_to_cpu(k->br_startoff);
247 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
248 i, o >> 32, (int)o, 0,
249 0, 0, 0, 0,
250 0, 0, 0);
251}
252
253/*
254 * Add a trace buffer entry for the cursor/operation.
255 */
256STATIC void
257xfs_bmbt_trace_cursor(
258 const char *func,
259 xfs_btree_cur_t *cur,
260 char *s,
261 int line)
262{
263 xfs_bmbt_rec_host_t r;
264
265 xfs_bmbt_set_all(&r, &cur->bc_rec.b);
266 xfs_bmbt_trace_enter(func, cur, s, XFS_BMBT_KTRACE_CUR, line,
267 (cur->bc_nlevels << 24) | (cur->bc_private.b.flags << 16) |
268 cur->bc_private.b.allocated,
269 r.l0 >> 32, (int)r.l0,
270 r.l1 >> 32, (int)r.l1,
271 (unsigned long)cur->bc_bufs[0], (unsigned long)cur->bc_bufs[1],
272 (unsigned long)cur->bc_bufs[2], (unsigned long)cur->bc_bufs[3],
273 (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
274 (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
275}
276
277#define XFS_BMBT_TRACE_ARGBI(c,b,i) \
278 xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__)
279#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \
280 xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__)
281#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \
282 xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
283#define XFS_BMBT_TRACE_ARGI(c,i) \
284 xfs_bmbt_trace_argi(__func__, c, i, __LINE__)
285#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \
286 xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__)
287#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \
288 xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__)
289#define XFS_BMBT_TRACE_ARGIK(c,i,k) \
290 xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__)
291#define XFS_BMBT_TRACE_CURSOR(c,s) \
292 xfs_bmbt_trace_cursor(__func__, c, s, __LINE__)
293#else
294#define XFS_BMBT_TRACE_ARGBI(c,b,i)
295#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
296#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)
297#define XFS_BMBT_TRACE_ARGI(c,i)
298#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s)
299#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)
300#define XFS_BMBT_TRACE_ARGIK(c,i,k)
301#define XFS_BMBT_TRACE_CURSOR(c,s)
302#endif /* XFS_BMBT_TRACE */
303
304
305/*
306 * Internal functions.
307 */
308
309/*
310 * Delete record pointed to by cur/level.
311 */
312STATIC int /* error */
313xfs_bmbt_delrec(
314 xfs_btree_cur_t *cur,
315 int level,
316 int *stat) /* success/failure */
317{
318 xfs_bmbt_block_t *block; /* bmap btree block */
319 xfs_fsblock_t bno; /* fs-relative block number */
320 xfs_buf_t *bp; /* buffer for block */
321 int error; /* error return value */
322 int i; /* loop counter */
323 int j; /* temp state */
324 xfs_bmbt_key_t key; /* bmap btree key */
325 xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
326 xfs_fsblock_t lbno; /* left sibling block number */
327 xfs_buf_t *lbp; /* left buffer pointer */
328 xfs_bmbt_block_t *left; /* left btree block */
329 xfs_bmbt_key_t *lkp; /* left btree key */
330 xfs_bmbt_ptr_t *lpp; /* left address pointer */
331 int lrecs=0; /* left record count */
332 xfs_bmbt_rec_t *lrp; /* left record pointer */
333 xfs_mount_t *mp; /* file system mount point */
334 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
335 int ptr; /* key/record index */
336 xfs_fsblock_t rbno; /* right sibling block number */
337 xfs_buf_t *rbp; /* right buffer pointer */
338 xfs_bmbt_block_t *right; /* right btree block */
339 xfs_bmbt_key_t *rkp; /* right btree key */
340 xfs_bmbt_rec_t *rp; /* pointer to bmap btree rec */
341 xfs_bmbt_ptr_t *rpp; /* right address pointer */
342 xfs_bmbt_block_t *rrblock; /* right-right btree block */
343 xfs_buf_t *rrbp; /* right-right buffer pointer */
344 int rrecs=0; /* right record count */
345 xfs_bmbt_rec_t *rrp; /* right record pointer */
346 xfs_btree_cur_t *tcur; /* temporary btree cursor */
347 int numrecs; /* temporary numrec count */
348 int numlrecs, numrrecs;
349
350 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
351 XFS_BMBT_TRACE_ARGI(cur, level);
352 ptr = cur->bc_ptrs[level];
353 tcur = NULL;
354 if (ptr == 0) {
355 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
356 *stat = 0;
357 return 0;
358 }
359 block = xfs_bmbt_get_block(cur, level, &bp);
360 numrecs = be16_to_cpu(block->bb_numrecs);
361#ifdef DEBUG
362 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
363 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
364 goto error0;
365 }
366#endif
367 if (ptr > numrecs) {
368 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
369 *stat = 0;
370 return 0;
371 }
372 XFS_STATS_INC(xs_bmbt_delrec);
373 if (level > 0) {
374 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
375 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
376#ifdef DEBUG
377 for (i = ptr; i < numrecs; i++) {
378 if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
379 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
380 goto error0;
381 }
382 }
383#endif
384 if (ptr < numrecs) {
385 memmove(&kp[ptr - 1], &kp[ptr],
386 (numrecs - ptr) * sizeof(*kp));
387 memmove(&pp[ptr - 1], &pp[ptr],
388 (numrecs - ptr) * sizeof(*pp));
389 xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs - 1);
390 xfs_bmbt_log_keys(cur, bp, ptr, numrecs - 1);
391 }
392 } else {
393 rp = XFS_BMAP_REC_IADDR(block, 1, cur);
394 if (ptr < numrecs) {
395 memmove(&rp[ptr - 1], &rp[ptr],
396 (numrecs - ptr) * sizeof(*rp));
397 xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1);
398 }
399 if (ptr == 1) {
400 key.br_startoff =
401 cpu_to_be64(xfs_bmbt_disk_get_startoff(rp));
402 kp = &key;
403 }
404 }
405 numrecs--;
406 block->bb_numrecs = cpu_to_be16(numrecs);
407 xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
408 /*
409 * We're at the root level.
410 * First, shrink the root block in-memory.
411 * Try to get rid of the next level down.
412 * If we can't then there's nothing left to do.
413 */
414 if (level == cur->bc_nlevels - 1) {
415 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
416 cur->bc_private.b.whichfork);
417 if ((error = xfs_bmbt_killroot(cur))) {
418 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
419 goto error0;
420 }
421 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
422 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
423 goto error0;
424 }
425 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
426 *stat = 1;
427 return 0;
428 }
429 if (ptr == 1 && (error = xfs_bmbt_updkey(cur, kp, level + 1))) {
430 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
431 goto error0;
432 }
433 if (numrecs >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
434 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
435 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
436 goto error0;
437 }
438 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
439 *stat = 1;
440 return 0;
441 }
442 rbno = be64_to_cpu(block->bb_rightsib);
443 lbno = be64_to_cpu(block->bb_leftsib);
444 /*
445 * One child of root, need to get a chance to copy its contents
446 * into the root and delete it. Can't go up to next level,
447 * there's nothing to delete there.
448 */
449 if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK &&
450 level == cur->bc_nlevels - 2) {
451 if ((error = xfs_bmbt_killroot(cur))) {
452 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
453 goto error0;
454 }
455 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
456 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
457 goto error0;
458 }
459 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
460 *stat = 1;
461 return 0;
462 }
463 ASSERT(rbno != NULLFSBLOCK || lbno != NULLFSBLOCK);
464 if ((error = xfs_btree_dup_cursor(cur, &tcur))) {
465 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
466 goto error0;
467 }
468 bno = NULLFSBLOCK;
469 if (rbno != NULLFSBLOCK) {
470 i = xfs_btree_lastrec(tcur, level);
471 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
472 if ((error = xfs_bmbt_increment(tcur, level, &i))) {
473 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
474 goto error0;
475 }
476 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
477 i = xfs_btree_lastrec(tcur, level);
478 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
479 rbp = tcur->bc_bufs[level];
480 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
481#ifdef DEBUG
482 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
483 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
484 goto error0;
485 }
486#endif
487 bno = be64_to_cpu(right->bb_leftsib);
488 if (be16_to_cpu(right->bb_numrecs) - 1 >=
489 XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
490 if ((error = xfs_bmbt_lshift(tcur, level, &i))) {
491 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
492 goto error0;
493 }
494 if (i) {
495 ASSERT(be16_to_cpu(block->bb_numrecs) >=
496 XFS_BMAP_BLOCK_IMINRECS(level, tcur));
497 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
498 tcur = NULL;
499 if (level > 0) {
500 if ((error = xfs_bmbt_decrement(cur,
501 level, &i))) {
502 XFS_BMBT_TRACE_CURSOR(cur,
503 ERROR);
504 goto error0;
505 }
506 }
507 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
508 *stat = 1;
509 return 0;
510 }
511 }
512 rrecs = be16_to_cpu(right->bb_numrecs);
513 if (lbno != NULLFSBLOCK) {
514 i = xfs_btree_firstrec(tcur, level);
515 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
516 if ((error = xfs_bmbt_decrement(tcur, level, &i))) {
517 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
518 goto error0;
519 }
520 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
521 }
522 }
523 if (lbno != NULLFSBLOCK) {
524 i = xfs_btree_firstrec(tcur, level);
525 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
526 /*
527 * decrement to last in block
528 */
529 if ((error = xfs_bmbt_decrement(tcur, level, &i))) {
530 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
531 goto error0;
532 }
533 i = xfs_btree_firstrec(tcur, level);
534 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
535 lbp = tcur->bc_bufs[level];
536 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
537#ifdef DEBUG
538 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
539 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
540 goto error0;
541 }
542#endif
543 bno = be64_to_cpu(left->bb_rightsib);
544 if (be16_to_cpu(left->bb_numrecs) - 1 >=
545 XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
546 if ((error = xfs_bmbt_rshift(tcur, level, &i))) {
547 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
548 goto error0;
549 }
550 if (i) {
551 ASSERT(be16_to_cpu(block->bb_numrecs) >=
552 XFS_BMAP_BLOCK_IMINRECS(level, tcur));
553 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
554 tcur = NULL;
555 if (level == 0)
556 cur->bc_ptrs[0]++;
557 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
558 *stat = 1;
559 return 0;
560 }
561 }
562 lrecs = be16_to_cpu(left->bb_numrecs);
563 }
564 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
565 tcur = NULL;
566 mp = cur->bc_mp;
567 ASSERT(bno != NULLFSBLOCK);
568 if (lbno != NULLFSBLOCK &&
569 lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
570 rbno = bno;
571 right = block;
572 rbp = bp;
573 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, lbno, 0, &lbp,
574 XFS_BMAP_BTREE_REF))) {
575 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
576 goto error0;
577 }
578 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
579 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
580 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
581 goto error0;
582 }
583 } else if (rbno != NULLFSBLOCK &&
584 rrecs + be16_to_cpu(block->bb_numrecs) <=
585 XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
586 lbno = bno;
587 left = block;
588 lbp = bp;
589 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, rbno, 0, &rbp,
590 XFS_BMAP_BTREE_REF))) {
591 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
592 goto error0;
593 }
594 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
595 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
596 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
597 goto error0;
598 }
599 lrecs = be16_to_cpu(left->bb_numrecs);
600 } else {
601 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
602 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
603 goto error0;
604 }
605 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
606 *stat = 1;
607 return 0;
608 }
609 numlrecs = be16_to_cpu(left->bb_numrecs);
610 numrrecs = be16_to_cpu(right->bb_numrecs);
611 if (level > 0) {
612 lkp = XFS_BMAP_KEY_IADDR(left, numlrecs + 1, cur);
613 lpp = XFS_BMAP_PTR_IADDR(left, numlrecs + 1, cur);
614 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
615 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
616#ifdef DEBUG
617 for (i = 0; i < numrrecs; i++) {
618 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
619 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
620 goto error0;
621 }
622 }
623#endif
624 memcpy(lkp, rkp, numrrecs * sizeof(*lkp));
625 memcpy(lpp, rpp, numrrecs * sizeof(*lpp));
626 xfs_bmbt_log_keys(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
627 xfs_bmbt_log_ptrs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
628 } else {
629 lrp = XFS_BMAP_REC_IADDR(left, numlrecs + 1, cur);
630 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
631 memcpy(lrp, rrp, numrrecs * sizeof(*lrp));
632 xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
633 }
634 be16_add_cpu(&left->bb_numrecs, numrrecs);
635 left->bb_rightsib = right->bb_rightsib;
636 xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS);
637 if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) {
638 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp,
639 be64_to_cpu(left->bb_rightsib),
640 0, &rrbp, XFS_BMAP_BTREE_REF))) {
641 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
642 goto error0;
643 }
644 rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
645 if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) {
646 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
647 goto error0;
648 }
649 rrblock->bb_leftsib = cpu_to_be64(lbno);
650 xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
651 }
652 xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1,
653 cur->bc_private.b.flist, mp);
654 cur->bc_private.b.ip->i_d.di_nblocks--;
655 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
656 XFS_TRANS_MOD_DQUOT_BYINO(mp, cur->bc_tp, cur->bc_private.b.ip,
657 XFS_TRANS_DQ_BCOUNT, -1L);
658 xfs_trans_binval(cur->bc_tp, rbp);
659 if (bp != lbp) {
660 cur->bc_bufs[level] = lbp;
661 cur->bc_ptrs[level] += lrecs;
662 cur->bc_ra[level] = 0;
663 } else if ((error = xfs_bmbt_increment(cur, level + 1, &i))) {
664 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
665 goto error0;
666 }
667 if (level > 0)
668 cur->bc_ptrs[level]--;
669 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
670 *stat = 2;
671 return 0;
672
673error0:
674 if (tcur)
675 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
676 return error;
677}
678
679/*
680 * Insert one record/level. Return information to the caller
681 * allowing the next level up to proceed if necessary.
682 */
683STATIC int /* error */
684xfs_bmbt_insrec(
685 xfs_btree_cur_t *cur,
686 int level,
687 xfs_fsblock_t *bnop,
688 xfs_bmbt_rec_t *recp,
689 xfs_btree_cur_t **curp,
690 int *stat) /* no-go/done/continue */
691{
692 xfs_bmbt_block_t *block; /* bmap btree block */
693 xfs_buf_t *bp; /* buffer for block */
694 int error; /* error return value */
695 int i; /* loop index */
696 xfs_bmbt_key_t key; /* bmap btree key */
697 xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
698 int logflags; /* inode logging flags */
699 xfs_fsblock_t nbno; /* new block number */
700 struct xfs_btree_cur *ncur; /* new btree cursor */
701 __uint64_t startoff; /* new btree key value */
702 xfs_bmbt_rec_t nrec; /* new record count */
703 int optr; /* old key/record index */
704 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
705 int ptr; /* key/record index */
706 xfs_bmbt_rec_t *rp=NULL; /* pointer to bmap btree rec */
707 int numrecs;
708
709 ASSERT(level < cur->bc_nlevels);
710 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
711 XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
712 ncur = NULL;
713 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(recp));
714 optr = ptr = cur->bc_ptrs[level];
715 if (ptr == 0) {
716 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
717 *stat = 0;
718 return 0;
719 }
720 XFS_STATS_INC(xs_bmbt_insrec);
721 block = xfs_bmbt_get_block(cur, level, &bp);
722 numrecs = be16_to_cpu(block->bb_numrecs);
723#ifdef DEBUG
724 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
725 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
726 return error;
727 }
728 if (ptr <= numrecs) {
729 if (level == 0) {
730 rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
731 xfs_btree_check_rec(XFS_BTNUM_BMAP, recp, rp);
732 } else {
733 kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
734 xfs_btree_check_key(XFS_BTNUM_BMAP, &key, kp);
735 }
736 }
737#endif
738 nbno = NULLFSBLOCK;
739 if (numrecs == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
740 if (numrecs < XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
741 /*
742 * A root block, that can be made bigger.
743 */
744 xfs_iroot_realloc(cur->bc_private.b.ip, 1,
745 cur->bc_private.b.whichfork);
746 block = xfs_bmbt_get_block(cur, level, &bp);
747 } else if (level == cur->bc_nlevels - 1) {
748 if ((error = xfs_bmbt_newroot(cur, &logflags, stat)) ||
749 *stat == 0) {
750 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
751 return error;
752 }
753 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
754 logflags);
755 block = xfs_bmbt_get_block(cur, level, &bp);
756 } else {
757 if ((error = xfs_bmbt_rshift(cur, level, &i))) {
758 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
759 return error;
760 }
761 if (i) {
762 /* nothing */
763 } else {
764 if ((error = xfs_bmbt_lshift(cur, level, &i))) {
765 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
766 return error;
767 }
768 if (i) {
769 optr = ptr = cur->bc_ptrs[level];
770 } else {
771 if ((error = xfs_bmbt_split(cur, level,
772 &nbno, &startoff, &ncur,
773 &i))) {
774 XFS_BMBT_TRACE_CURSOR(cur,
775 ERROR);
776 return error;
777 }
778 if (i) {
779 block = xfs_bmbt_get_block(
780 cur, level, &bp);
781#ifdef DEBUG
782 if ((error =
783 xfs_btree_check_lblock(cur,
784 block, level, bp))) {
785 XFS_BMBT_TRACE_CURSOR(
786 cur, ERROR);
787 return error;
788 }
789#endif
790 ptr = cur->bc_ptrs[level];
791 xfs_bmbt_disk_set_allf(&nrec,
792 startoff, 0, 0,
793 XFS_EXT_NORM);
794 } else {
795 XFS_BMBT_TRACE_CURSOR(cur,
796 EXIT);
797 *stat = 0;
798 return 0;
799 }
800 }
801 }
802 }
803 }
804 numrecs = be16_to_cpu(block->bb_numrecs);
805 if (level > 0) {
806 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
807 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
808#ifdef DEBUG
809 for (i = numrecs; i >= ptr; i--) {
810 if ((error = xfs_btree_check_lptr_disk(cur, pp[i - 1],
811 level))) {
812 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
813 return error;
814 }
815 }
816#endif
817 memmove(&kp[ptr], &kp[ptr - 1],
818 (numrecs - ptr + 1) * sizeof(*kp));
819 memmove(&pp[ptr], &pp[ptr - 1],
820 (numrecs - ptr + 1) * sizeof(*pp));
821#ifdef DEBUG
822 if ((error = xfs_btree_check_lptr(cur, *bnop, level))) {
823 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
824 return error;
825 }
826#endif
827 kp[ptr - 1] = key;
828 pp[ptr - 1] = cpu_to_be64(*bnop);
829 numrecs++;
830 block->bb_numrecs = cpu_to_be16(numrecs);
831 xfs_bmbt_log_keys(cur, bp, ptr, numrecs);
832 xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs);
833 } else {
834 rp = XFS_BMAP_REC_IADDR(block, 1, cur);
835 memmove(&rp[ptr], &rp[ptr - 1],
836 (numrecs - ptr + 1) * sizeof(*rp));
837 rp[ptr - 1] = *recp;
838 numrecs++;
839 block->bb_numrecs = cpu_to_be16(numrecs);
840 xfs_bmbt_log_recs(cur, bp, ptr, numrecs);
841 }
842 xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
843#ifdef DEBUG
844 if (ptr < numrecs) {
845 if (level == 0)
846 xfs_btree_check_rec(XFS_BTNUM_BMAP, rp + ptr - 1,
847 rp + ptr);
848 else
849 xfs_btree_check_key(XFS_BTNUM_BMAP, kp + ptr - 1,
850 kp + ptr);
851 }
852#endif
853 if (optr == 1 && (error = xfs_bmbt_updkey(cur, &key, level + 1))) {
854 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
855 return error;
856 }
857 *bnop = nbno;
858 if (nbno != NULLFSBLOCK) {
859 *recp = nrec;
860 *curp = ncur;
861 }
862 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
863 *stat = 1;
864 return 0;
865}
866
867STATIC int
868xfs_bmbt_killroot(
869 xfs_btree_cur_t *cur)
870{
871 xfs_bmbt_block_t *block;
872 xfs_bmbt_block_t *cblock;
873 xfs_buf_t *cbp;
874 xfs_bmbt_key_t *ckp;
875 xfs_bmbt_ptr_t *cpp;
876#ifdef DEBUG
877 int error;
878#endif
879 int i;
880 xfs_bmbt_key_t *kp;
881 xfs_inode_t *ip;
882 xfs_ifork_t *ifp;
883 int level;
884 xfs_bmbt_ptr_t *pp;
885
886 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
887 level = cur->bc_nlevels - 1;
888 ASSERT(level >= 1);
889 /*
890 * Don't deal with the root block needs to be a leaf case.
891 * We're just going to turn the thing back into extents anyway.
892 */
893 if (level == 1) {
894 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
895 return 0;
896 }
897 block = xfs_bmbt_get_block(cur, level, &cbp);
898 /*
899 * Give up if the root has multiple children.
900 */
901 if (be16_to_cpu(block->bb_numrecs) != 1) {
902 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
903 return 0;
904 }
905 /*
906 * Only do this if the next level will fit.
907 * Then the data must be copied up to the inode,
908 * instead of freeing the root you free the next level.
909 */
910 cbp = cur->bc_bufs[level - 1];
911 cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
912 if (be16_to_cpu(cblock->bb_numrecs) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
913 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
914 return 0;
915 }
916 ASSERT(be64_to_cpu(cblock->bb_leftsib) == NULLDFSBNO);
917 ASSERT(be64_to_cpu(cblock->bb_rightsib) == NULLDFSBNO);
918 ip = cur->bc_private.b.ip;
919 ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork);
920 ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) ==
921 XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes));
922 i = (int)(be16_to_cpu(cblock->bb_numrecs) - XFS_BMAP_BLOCK_IMAXRECS(level, cur));
923 if (i) {
924 xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork);
925 block = ifp->if_broot;
926 }
927 be16_add_cpu(&block->bb_numrecs, i);
928 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
929 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
930 ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
931 memcpy(kp, ckp, be16_to_cpu(block->bb_numrecs) * sizeof(*kp));
932 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
933 cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
934#ifdef DEBUG
935 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
936 if ((error = xfs_btree_check_lptr_disk(cur, cpp[i], level - 1))) {
937 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
938 return error;
939 }
940 }
941#endif
942 memcpy(pp, cpp, be16_to_cpu(block->bb_numrecs) * sizeof(*pp));
943 xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
944 cur->bc_private.b.flist, cur->bc_mp);
945 ip->i_d.di_nblocks--;
946 XFS_TRANS_MOD_DQUOT_BYINO(cur->bc_mp, cur->bc_tp, ip,
947 XFS_TRANS_DQ_BCOUNT, -1L);
948 xfs_trans_binval(cur->bc_tp, cbp);
949 cur->bc_bufs[level - 1] = NULL;
950 be16_add_cpu(&block->bb_level, -1);
951 xfs_trans_log_inode(cur->bc_tp, ip,
952 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
953 cur->bc_nlevels--;
954 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
955 return 0;
956}
957
958/*
959 * Log key values from the btree block.
960 */
961STATIC void
962xfs_bmbt_log_keys(
963 xfs_btree_cur_t *cur,
964 xfs_buf_t *bp,
965 int kfirst,
966 int klast)
967{
968 xfs_trans_t *tp;
969
970 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
971 XFS_BMBT_TRACE_ARGBII(cur, bp, kfirst, klast);
972 tp = cur->bc_tp;
973 if (bp) {
974 xfs_bmbt_block_t *block;
975 int first;
976 xfs_bmbt_key_t *kp;
977 int last;
978
979 block = XFS_BUF_TO_BMBT_BLOCK(bp);
980 kp = XFS_BMAP_KEY_DADDR(block, 1, cur);
981 first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
982 last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
983 xfs_trans_log_buf(tp, bp, first, last);
984 } else {
985 xfs_inode_t *ip;
986
987 ip = cur->bc_private.b.ip;
988 xfs_trans_log_inode(tp, ip,
989 XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
990 }
991 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
992}
993
994/*
995 * Log pointer values from the btree block.
996 */
997STATIC void
998xfs_bmbt_log_ptrs(
999 xfs_btree_cur_t *cur,
1000 xfs_buf_t *bp,
1001 int pfirst,
1002 int plast)
1003{
1004 xfs_trans_t *tp;
1005
1006 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1007 XFS_BMBT_TRACE_ARGBII(cur, bp, pfirst, plast);
1008 tp = cur->bc_tp;
1009 if (bp) {
1010 xfs_bmbt_block_t *block;
1011 int first;
1012 int last;
1013 xfs_bmbt_ptr_t *pp;
1014
1015 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1016 pp = XFS_BMAP_PTR_DADDR(block, 1, cur);
1017 first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
1018 last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
1019 xfs_trans_log_buf(tp, bp, first, last);
1020 } else {
1021 xfs_inode_t *ip;
1022
1023 ip = cur->bc_private.b.ip;
1024 xfs_trans_log_inode(tp, ip,
1025 XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
1026 }
1027 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1028}
1029
1030/*
1031 * Lookup the record. The cursor is made to point to it, based on dir.
1032 */
1033STATIC int /* error */
1034xfs_bmbt_lookup(
1035 xfs_btree_cur_t *cur,
1036 xfs_lookup_t dir,
1037 int *stat) /* success/failure */
1038{
1039 xfs_bmbt_block_t *block=NULL;
1040 xfs_buf_t *bp;
1041 xfs_daddr_t d;
1042 xfs_sfiloff_t diff;
1043 int error; /* error return value */
1044 xfs_fsblock_t fsbno=0;
1045 int high;
1046 int i;
1047 int keyno=0;
1048 xfs_bmbt_key_t *kkbase=NULL;
1049 xfs_bmbt_key_t *kkp;
1050 xfs_bmbt_rec_t *krbase=NULL;
1051 xfs_bmbt_rec_t *krp;
1052 int level;
1053 int low;
1054 xfs_mount_t *mp;
1055 xfs_bmbt_ptr_t *pp;
1056 xfs_bmbt_irec_t *rp;
1057 xfs_fileoff_t startoff;
1058 xfs_trans_t *tp;
1059
1060 XFS_STATS_INC(xs_bmbt_lookup);
1061 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1062 XFS_BMBT_TRACE_ARGI(cur, (int)dir);
1063 tp = cur->bc_tp;
1064 mp = cur->bc_mp;
1065 rp = &cur->bc_rec.b;
1066 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1067 if (level < cur->bc_nlevels - 1) {
1068 d = XFS_FSB_TO_DADDR(mp, fsbno);
1069 bp = cur->bc_bufs[level];
1070 if (bp && XFS_BUF_ADDR(bp) != d)
1071 bp = NULL;
1072 if (!bp) {
1073 if ((error = xfs_btree_read_bufl(mp, tp, fsbno,
1074 0, &bp, XFS_BMAP_BTREE_REF))) {
1075 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1076 return error;
1077 }
1078 xfs_btree_setbuf(cur, level, bp);
1079 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1080 if ((error = xfs_btree_check_lblock(cur, block,
1081 level, bp))) {
1082 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1083 return error;
1084 }
1085 } else
1086 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1087 } else
1088 block = xfs_bmbt_get_block(cur, level, &bp);
1089 if (diff == 0)
1090 keyno = 1;
1091 else {
1092 if (level > 0)
1093 kkbase = XFS_BMAP_KEY_IADDR(block, 1, cur);
1094 else
1095 krbase = XFS_BMAP_REC_IADDR(block, 1, cur);
1096 low = 1;
1097 if (!(high = be16_to_cpu(block->bb_numrecs))) {
1098 ASSERT(level == 0);
1099 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1100 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1101 *stat = 0;
1102 return 0;
1103 }
1104 while (low <= high) {
1105 XFS_STATS_INC(xs_bmbt_compare);
1106 keyno = (low + high) >> 1;
1107 if (level > 0) {
1108 kkp = kkbase + keyno - 1;
1109 startoff = be64_to_cpu(kkp->br_startoff);
1110 } else {
1111 krp = krbase + keyno - 1;
1112 startoff = xfs_bmbt_disk_get_startoff(krp);
1113 }
1114 diff = (xfs_sfiloff_t)
1115 (startoff - rp->br_startoff);
1116 if (diff < 0)
1117 low = keyno + 1;
1118 else if (diff > 0)
1119 high = keyno - 1;
1120 else
1121 break;
1122 }
1123 }
1124 if (level > 0) {
1125 if (diff > 0 && --keyno < 1)
1126 keyno = 1;
1127 pp = XFS_BMAP_PTR_IADDR(block, keyno, cur);
1128 fsbno = be64_to_cpu(*pp);
1129#ifdef DEBUG
1130 if ((error = xfs_btree_check_lptr(cur, fsbno, level))) {
1131 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1132 return error;
1133 }
1134#endif
1135 cur->bc_ptrs[level] = keyno;
1136 }
1137 }
1138 if (dir != XFS_LOOKUP_LE && diff < 0) {
1139 keyno++;
1140 /*
1141 * If ge search and we went off the end of the block, but it's
1142 * not the last block, we're in the wrong block.
1143 */
1144 if (dir == XFS_LOOKUP_GE && keyno > be16_to_cpu(block->bb_numrecs) &&
1145 be64_to_cpu(block->bb_rightsib) != NULLDFSBNO) {
1146 cur->bc_ptrs[0] = keyno;
1147 if ((error = xfs_bmbt_increment(cur, 0, &i))) {
1148 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1149 return error;
1150 }
1151 XFS_WANT_CORRUPTED_RETURN(i == 1);
1152 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1153 *stat = 1;
1154 return 0;
1155 }
1156 }
1157 else if (dir == XFS_LOOKUP_LE && diff > 0)
1158 keyno--;
1159 cur->bc_ptrs[0] = keyno;
1160 if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) {
1161 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1162 *stat = 0;
1163 } else {
1164 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1165 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1166 }
1167 return 0;
1168}
1169
1170/*
1171 * Move 1 record left from cur/level if possible.
1172 * Update cur to reflect the new path.
1173 */
1174STATIC int /* error */
1175xfs_bmbt_lshift(
1176 xfs_btree_cur_t *cur,
1177 int level,
1178 int *stat) /* success/failure */
1179{
1180 int error; /* error return value */
1181#ifdef DEBUG
1182 int i; /* loop counter */
1183#endif
1184 xfs_bmbt_key_t key; /* bmap btree key */
1185 xfs_buf_t *lbp; /* left buffer pointer */
1186 xfs_bmbt_block_t *left; /* left btree block */
1187 xfs_bmbt_key_t *lkp=NULL; /* left btree key */
1188 xfs_bmbt_ptr_t *lpp; /* left address pointer */
1189 int lrecs; /* left record count */
1190 xfs_bmbt_rec_t *lrp=NULL; /* left record pointer */
1191 xfs_mount_t *mp; /* file system mount point */
1192 xfs_buf_t *rbp; /* right buffer pointer */
1193 xfs_bmbt_block_t *right; /* right btree block */
1194 xfs_bmbt_key_t *rkp=NULL; /* right btree key */
1195 xfs_bmbt_ptr_t *rpp=NULL; /* right address pointer */
1196 xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */
1197 int rrecs; /* right record count */
1198
1199 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1200 XFS_BMBT_TRACE_ARGI(cur, level);
1201 if (level == cur->bc_nlevels - 1) {
1202 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1203 *stat = 0;
1204 return 0;
1205 }
1206 rbp = cur->bc_bufs[level];
1207 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
1208#ifdef DEBUG
1209 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
1210 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1211 return error;
1212 }
1213#endif
1214 if (be64_to_cpu(right->bb_leftsib) == NULLDFSBNO) {
1215 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1216 *stat = 0;
1217 return 0;
1218 }
1219 if (cur->bc_ptrs[level] <= 1) {
1220 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1221 *stat = 0;
1222 return 0;
1223 }
1224 mp = cur->bc_mp;
1225 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(right->bb_leftsib), 0,
1226 &lbp, XFS_BMAP_BTREE_REF))) {
1227 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1228 return error;
1229 }
1230 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1231 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
1232 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1233 return error;
1234 }
1235 if (be16_to_cpu(left->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
1236 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1237 *stat = 0;
1238 return 0;
1239 }
1240 lrecs = be16_to_cpu(left->bb_numrecs) + 1;
1241 if (level > 0) {
1242 lkp = XFS_BMAP_KEY_IADDR(left, lrecs, cur);
1243 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
1244 *lkp = *rkp;
1245 xfs_bmbt_log_keys(cur, lbp, lrecs, lrecs);
1246 lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur);
1247 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1248#ifdef DEBUG
1249 if ((error = xfs_btree_check_lptr_disk(cur, *rpp, level))) {
1250 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1251 return error;
1252 }
1253#endif
1254 *lpp = *rpp;
1255 xfs_bmbt_log_ptrs(cur, lbp, lrecs, lrecs);
1256 } else {
1257 lrp = XFS_BMAP_REC_IADDR(left, lrecs, cur);
1258 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
1259 *lrp = *rrp;
1260 xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs);
1261 }
1262 left->bb_numrecs = cpu_to_be16(lrecs);
1263 xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
1264#ifdef DEBUG
1265 if (level > 0)
1266 xfs_btree_check_key(XFS_BTNUM_BMAP, lkp - 1, lkp);
1267 else
1268 xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp);
1269#endif
1270 rrecs = be16_to_cpu(right->bb_numrecs) - 1;
1271 right->bb_numrecs = cpu_to_be16(rrecs);
1272 xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
1273 if (level > 0) {
1274#ifdef DEBUG
1275 for (i = 0; i < rrecs; i++) {
1276 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i + 1],
1277 level))) {
1278 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1279 return error;
1280 }
1281 }
1282#endif
1283 memmove(rkp, rkp + 1, rrecs * sizeof(*rkp));
1284 memmove(rpp, rpp + 1, rrecs * sizeof(*rpp));
1285 xfs_bmbt_log_keys(cur, rbp, 1, rrecs);
1286 xfs_bmbt_log_ptrs(cur, rbp, 1, rrecs);
1287 } else {
1288 memmove(rrp, rrp + 1, rrecs * sizeof(*rrp));
1289 xfs_bmbt_log_recs(cur, rbp, 1, rrecs);
1290 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
1291 rkp = &key;
1292 }
1293 if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) {
1294 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1295 return error;
1296 }
1297 cur->bc_ptrs[level]--;
1298 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1299 *stat = 1;
1300 return 0;
1301}
1302
1303/*
1304 * Move 1 record right from cur/level if possible.
1305 * Update cur to reflect the new path.
1306 */
1307STATIC int /* error */
1308xfs_bmbt_rshift(
1309 xfs_btree_cur_t *cur,
1310 int level,
1311 int *stat) /* success/failure */
1312{
1313 int error; /* error return value */
1314 int i; /* loop counter */
1315 xfs_bmbt_key_t key; /* bmap btree key */
1316 xfs_buf_t *lbp; /* left buffer pointer */
1317 xfs_bmbt_block_t *left; /* left btree block */
1318 xfs_bmbt_key_t *lkp; /* left btree key */
1319 xfs_bmbt_ptr_t *lpp; /* left address pointer */
1320 xfs_bmbt_rec_t *lrp; /* left record pointer */
1321 xfs_mount_t *mp; /* file system mount point */
1322 xfs_buf_t *rbp; /* right buffer pointer */
1323 xfs_bmbt_block_t *right; /* right btree block */
1324 xfs_bmbt_key_t *rkp; /* right btree key */
1325 xfs_bmbt_ptr_t *rpp; /* right address pointer */
1326 xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */
1327 struct xfs_btree_cur *tcur; /* temporary btree cursor */
1328
1329 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1330 XFS_BMBT_TRACE_ARGI(cur, level);
1331 if (level == cur->bc_nlevels - 1) {
1332 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1333 *stat = 0;
1334 return 0;
1335 }
1336 lbp = cur->bc_bufs[level];
1337 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1338#ifdef DEBUG
1339 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
1340 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1341 return error;
1342 }
1343#endif
1344 if (be64_to_cpu(left->bb_rightsib) == NULLDFSBNO) {
1345 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1346 *stat = 0;
1347 return 0;
1348 }
1349 if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1350 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1351 *stat = 0;
1352 return 0;
1353 }
1354 mp = cur->bc_mp;
1355 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(left->bb_rightsib), 0,
1356 &rbp, XFS_BMAP_BTREE_REF))) {
1357 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1358 return error;
1359 }
1360 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
1361 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
1362 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1363 return error;
1364 }
1365 if (be16_to_cpu(right->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
1366 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1367 *stat = 0;
1368 return 0;
1369 }
1370 if (level > 0) {
1371 lkp = XFS_BMAP_KEY_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1372 lpp = XFS_BMAP_PTR_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1373 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
1374 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1375#ifdef DEBUG
1376 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1377 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
1378 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1379 return error;
1380 }
1381 }
1382#endif
1383 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1384 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1385#ifdef DEBUG
1386 if ((error = xfs_btree_check_lptr_disk(cur, *lpp, level))) {
1387 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1388 return error;
1389 }
1390#endif
1391 *rkp = *lkp;
1392 *rpp = *lpp;
1393 xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1394 xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1395 } else {
1396 lrp = XFS_BMAP_REC_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1397 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
1398 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1399 *rrp = *lrp;
1400 xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1401 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
1402 rkp = &key;
1403 }
1404 be16_add_cpu(&left->bb_numrecs, -1);
1405 xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
1406 be16_add_cpu(&right->bb_numrecs, 1);
1407#ifdef DEBUG
1408 if (level > 0)
1409 xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1);
1410 else
1411 xfs_btree_check_rec(XFS_BTNUM_BMAP, rrp, rrp + 1);
1412#endif
1413 xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
1414 if ((error = xfs_btree_dup_cursor(cur, &tcur))) {
1415 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1416 return error;
1417 }
1418 i = xfs_btree_lastrec(tcur, level);
1419 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1420 if ((error = xfs_bmbt_increment(tcur, level, &i))) {
1421 XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
1422 goto error1;
1423 }
1424 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1425 if ((error = xfs_bmbt_updkey(tcur, rkp, level + 1))) {
1426 XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
1427 goto error1;
1428 }
1429 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1430 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1431 *stat = 1;
1432 return 0;
1433error0:
1434 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1435error1:
1436 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1437 return error;
1438}
1439
1440/* 47/*
1441 * Determine the extent state. 48 * Determine the extent state.
1442 */ 49 */
@@ -1453,229 +60,15 @@ xfs_extent_state(
1453 return XFS_EXT_NORM; 60 return XFS_EXT_NORM;
1454} 61}
1455 62
1456
1457/*
1458 * Split cur/level block in half.
1459 * Return new block number and its first record (to be inserted into parent).
1460 */
1461STATIC int /* error */
1462xfs_bmbt_split(
1463 xfs_btree_cur_t *cur,
1464 int level,
1465 xfs_fsblock_t *bnop,
1466 __uint64_t *startoff,
1467 xfs_btree_cur_t **curp,
1468 int *stat) /* success/failure */
1469{
1470 xfs_alloc_arg_t args; /* block allocation args */
1471 int error; /* error return value */
1472 int i; /* loop counter */
1473 xfs_fsblock_t lbno; /* left sibling block number */
1474 xfs_buf_t *lbp; /* left buffer pointer */
1475 xfs_bmbt_block_t *left; /* left btree block */
1476 xfs_bmbt_key_t *lkp; /* left btree key */
1477 xfs_bmbt_ptr_t *lpp; /* left address pointer */
1478 xfs_bmbt_rec_t *lrp; /* left record pointer */
1479 xfs_buf_t *rbp; /* right buffer pointer */
1480 xfs_bmbt_block_t *right; /* right btree block */
1481 xfs_bmbt_key_t *rkp; /* right btree key */
1482 xfs_bmbt_ptr_t *rpp; /* right address pointer */
1483 xfs_bmbt_block_t *rrblock; /* right-right btree block */
1484 xfs_buf_t *rrbp; /* right-right buffer pointer */
1485 xfs_bmbt_rec_t *rrp; /* right record pointer */
1486
1487 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1488 XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff);
1489 args.tp = cur->bc_tp;
1490 args.mp = cur->bc_mp;
1491 lbp = cur->bc_bufs[level];
1492 lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
1493 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1494 args.fsbno = cur->bc_private.b.firstblock;
1495 args.firstblock = args.fsbno;
1496 args.minleft = 0;
1497 if (args.fsbno == NULLFSBLOCK) {
1498 args.fsbno = lbno;
1499 args.type = XFS_ALLOCTYPE_START_BNO;
1500 /*
1501 * Make sure there is sufficient room left in the AG to
1502 * complete a full tree split for an extent insert. If
1503 * we are converting the middle part of an extent then
1504 * we may need space for two tree splits.
1505 *
1506 * We are relying on the caller to make the correct block
1507 * reservation for this operation to succeed. If the
1508 * reservation amount is insufficient then we may fail a
1509 * block allocation here and corrupt the filesystem.
1510 */
1511 args.minleft = xfs_trans_get_block_res(args.tp);
1512 } else if (cur->bc_private.b.flist->xbf_low)
1513 args.type = XFS_ALLOCTYPE_START_BNO;
1514 else
1515 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1516 args.mod = args.alignment = args.total = args.isfl =
1517 args.userdata = args.minalignslop = 0;
1518 args.minlen = args.maxlen = args.prod = 1;
1519 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
1520 if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
1521 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1522 return XFS_ERROR(ENOSPC);
1523 }
1524 if ((error = xfs_alloc_vextent(&args))) {
1525 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1526 return error;
1527 }
1528 if (args.fsbno == NULLFSBLOCK && args.minleft) {
1529 /*
1530 * Could not find an AG with enough free space to satisfy
1531 * a full btree split. Try again without minleft and if
1532 * successful activate the lowspace algorithm.
1533 */
1534 args.fsbno = 0;
1535 args.type = XFS_ALLOCTYPE_FIRST_AG;
1536 args.minleft = 0;
1537 if ((error = xfs_alloc_vextent(&args))) {
1538 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1539 return error;
1540 }
1541 cur->bc_private.b.flist->xbf_low = 1;
1542 }
1543 if (args.fsbno == NULLFSBLOCK) {
1544 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1545 *stat = 0;
1546 return 0;
1547 }
1548 ASSERT(args.len == 1);
1549 cur->bc_private.b.firstblock = args.fsbno;
1550 cur->bc_private.b.allocated++;
1551 cur->bc_private.b.ip->i_d.di_nblocks++;
1552 xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
1553 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
1554 XFS_TRANS_DQ_BCOUNT, 1L);
1555 rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0);
1556 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
1557#ifdef DEBUG
1558 if ((error = xfs_btree_check_lblock(cur, left, level, rbp))) {
1559 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1560 return error;
1561 }
1562#endif
1563 right->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
1564 right->bb_level = left->bb_level;
1565 right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1566 if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1567 cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1568 be16_add_cpu(&right->bb_numrecs, 1);
1569 i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1570 if (level > 0) {
1571 lkp = XFS_BMAP_KEY_IADDR(left, i, cur);
1572 lpp = XFS_BMAP_PTR_IADDR(left, i, cur);
1573 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
1574 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1575#ifdef DEBUG
1576 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1577 if ((error = xfs_btree_check_lptr_disk(cur, lpp[i], level))) {
1578 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1579 return error;
1580 }
1581 }
1582#endif
1583 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1584 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1585 xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1586 xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1587 *startoff = be64_to_cpu(rkp->br_startoff);
1588 } else {
1589 lrp = XFS_BMAP_REC_IADDR(left, i, cur);
1590 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
1591 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1592 xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1593 *startoff = xfs_bmbt_disk_get_startoff(rrp);
1594 }
1595 be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1596 right->bb_rightsib = left->bb_rightsib;
1597 left->bb_rightsib = cpu_to_be64(args.fsbno);
1598 right->bb_leftsib = cpu_to_be64(lbno);
1599 xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS);
1600 xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1601 if (be64_to_cpu(right->bb_rightsib) != NULLDFSBNO) {
1602 if ((error = xfs_btree_read_bufl(args.mp, args.tp,
1603 be64_to_cpu(right->bb_rightsib), 0, &rrbp,
1604 XFS_BMAP_BTREE_REF))) {
1605 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1606 return error;
1607 }
1608 rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
1609 if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) {
1610 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1611 return error;
1612 }
1613 rrblock->bb_leftsib = cpu_to_be64(args.fsbno);
1614 xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
1615 }
1616 if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1617 xfs_btree_setbuf(cur, level, rbp);
1618 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1619 }
1620 if (level + 1 < cur->bc_nlevels) {
1621 if ((error = xfs_btree_dup_cursor(cur, curp))) {
1622 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1623 return error;
1624 }
1625 (*curp)->bc_ptrs[level + 1]++;
1626 }
1627 *bnop = args.fsbno;
1628 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1629 *stat = 1;
1630 return 0;
1631}
1632
1633
1634/*
1635 * Update keys for the record.
1636 */
1637STATIC int
1638xfs_bmbt_updkey(
1639 xfs_btree_cur_t *cur,
1640 xfs_bmbt_key_t *keyp, /* on-disk format */
1641 int level)
1642{
1643 xfs_bmbt_block_t *block;
1644 xfs_buf_t *bp;
1645#ifdef DEBUG
1646 int error;
1647#endif
1648 xfs_bmbt_key_t *kp;
1649 int ptr;
1650
1651 ASSERT(level >= 1);
1652 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1653 XFS_BMBT_TRACE_ARGIK(cur, level, keyp);
1654 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1655 block = xfs_bmbt_get_block(cur, level, &bp);
1656#ifdef DEBUG
1657 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
1658 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1659 return error;
1660 }
1661#endif
1662 ptr = cur->bc_ptrs[level];
1663 kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
1664 *kp = *keyp;
1665 xfs_bmbt_log_keys(cur, bp, ptr, ptr);
1666 }
1667 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1668 return 0;
1669}
1670
1671/* 63/*
1672 * Convert on-disk form of btree root to in-memory form. 64 * Convert on-disk form of btree root to in-memory form.
1673 */ 65 */
1674void 66void
1675xfs_bmdr_to_bmbt( 67xfs_bmdr_to_bmbt(
68 struct xfs_mount *mp,
1676 xfs_bmdr_block_t *dblock, 69 xfs_bmdr_block_t *dblock,
1677 int dblocklen, 70 int dblocklen,
1678 xfs_bmbt_block_t *rblock, 71 struct xfs_btree_block *rblock,
1679 int rblocklen) 72 int rblocklen)
1680{ 73{
1681 int dmxr; 74 int dmxr;
@@ -1688,129 +81,19 @@ xfs_bmdr_to_bmbt(
1688 rblock->bb_level = dblock->bb_level; 81 rblock->bb_level = dblock->bb_level;
1689 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 82 ASSERT(be16_to_cpu(rblock->bb_level) > 0);
1690 rblock->bb_numrecs = dblock->bb_numrecs; 83 rblock->bb_numrecs = dblock->bb_numrecs;
1691 rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO); 84 rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
1692 rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO); 85 rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
1693 dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); 86 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
1694 fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); 87 fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
1695 tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); 88 tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
1696 fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); 89 fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
1697 tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); 90 tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
1698 dmxr = be16_to_cpu(dblock->bb_numrecs); 91 dmxr = be16_to_cpu(dblock->bb_numrecs);
1699 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 92 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
1700 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 93 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
1701} 94}
1702 95
1703/* 96/*
1704 * Decrement cursor by one record at the level.
1705 * For nonzero levels the leaf-ward information is untouched.
1706 */
1707int /* error */
1708xfs_bmbt_decrement(
1709 xfs_btree_cur_t *cur,
1710 int level,
1711 int *stat) /* success/failure */
1712{
1713 xfs_bmbt_block_t *block;
1714 xfs_buf_t *bp;
1715 int error; /* error return value */
1716 xfs_fsblock_t fsbno;
1717 int lev;
1718 xfs_mount_t *mp;
1719 xfs_trans_t *tp;
1720
1721 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1722 XFS_BMBT_TRACE_ARGI(cur, level);
1723 ASSERT(level < cur->bc_nlevels);
1724 if (level < cur->bc_nlevels - 1)
1725 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1726 if (--cur->bc_ptrs[level] > 0) {
1727 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1728 *stat = 1;
1729 return 0;
1730 }
1731 block = xfs_bmbt_get_block(cur, level, &bp);
1732#ifdef DEBUG
1733 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
1734 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1735 return error;
1736 }
1737#endif
1738 if (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO) {
1739 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1740 *stat = 0;
1741 return 0;
1742 }
1743 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1744 if (--cur->bc_ptrs[lev] > 0)
1745 break;
1746 if (lev < cur->bc_nlevels - 1)
1747 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1748 }
1749 if (lev == cur->bc_nlevels) {
1750 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1751 *stat = 0;
1752 return 0;
1753 }
1754 tp = cur->bc_tp;
1755 mp = cur->bc_mp;
1756 for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
1757 fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
1758 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
1759 XFS_BMAP_BTREE_REF))) {
1760 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1761 return error;
1762 }
1763 lev--;
1764 xfs_btree_setbuf(cur, lev, bp);
1765 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1766 if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
1767 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1768 return error;
1769 }
1770 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1771 }
1772 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1773 *stat = 1;
1774 return 0;
1775}
1776
1777/*
1778 * Delete the record pointed to by cur.
1779 */
1780int /* error */
1781xfs_bmbt_delete(
1782 xfs_btree_cur_t *cur,
1783 int *stat) /* success/failure */
1784{
1785 int error; /* error return value */
1786 int i;
1787 int level;
1788
1789 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1790 for (level = 0, i = 2; i == 2; level++) {
1791 if ((error = xfs_bmbt_delrec(cur, level, &i))) {
1792 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1793 return error;
1794 }
1795 }
1796 if (i == 0) {
1797 for (level = 1; level < cur->bc_nlevels; level++) {
1798 if (cur->bc_ptrs[level] == 0) {
1799 if ((error = xfs_bmbt_decrement(cur, level,
1800 &i))) {
1801 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1802 return error;
1803 }
1804 break;
1805 }
1806 }
1807 }
1808 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1809 *stat = i;
1810 return 0;
1811}
1812
1813/*
1814 * Convert a compressed bmap extent record to an uncompressed form. 97 * Convert a compressed bmap extent record to an uncompressed form.
1815 * This code must be in sync with the routines xfs_bmbt_get_startoff, 98 * This code must be in sync with the routines xfs_bmbt_get_startoff,
1816 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. 99 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
@@ -1864,31 +147,6 @@ xfs_bmbt_get_all(
1864} 147}
1865 148
1866/* 149/*
1867 * Get the block pointer for the given level of the cursor.
1868 * Fill in the buffer pointer, if applicable.
1869 */
1870xfs_bmbt_block_t *
1871xfs_bmbt_get_block(
1872 xfs_btree_cur_t *cur,
1873 int level,
1874 xfs_buf_t **bpp)
1875{
1876 xfs_ifork_t *ifp;
1877 xfs_bmbt_block_t *rval;
1878
1879 if (level < cur->bc_nlevels - 1) {
1880 *bpp = cur->bc_bufs[level];
1881 rval = XFS_BUF_TO_BMBT_BLOCK(*bpp);
1882 } else {
1883 *bpp = NULL;
1884 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
1885 cur->bc_private.b.whichfork);
1886 rval = ifp->if_broot;
1887 }
1888 return rval;
1889}
1890
1891/*
1892 * Extract the blockcount field from an in memory bmap extent record. 150 * Extract the blockcount field from an in memory bmap extent record.
1893 */ 151 */
1894xfs_filblks_t 152xfs_filblks_t
@@ -1950,7 +208,8 @@ xfs_bmbt_disk_get_all(
1950 xfs_bmbt_rec_t *r, 208 xfs_bmbt_rec_t *r,
1951 xfs_bmbt_irec_t *s) 209 xfs_bmbt_irec_t *s)
1952{ 210{
1953 __xfs_bmbt_get_all(be64_to_cpu(r->l0), be64_to_cpu(r->l1), s); 211 __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
212 get_unaligned_be64(&r->l1), s);
1954} 213}
1955 214
1956/* 215/*
@@ -1974,348 +233,6 @@ xfs_bmbt_disk_get_startoff(
1974 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 233 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
1975} 234}
1976 235
1977/*
1978 * Increment cursor by one record at the level.
1979 * For nonzero levels the leaf-ward information is untouched.
1980 */
1981int /* error */
1982xfs_bmbt_increment(
1983 xfs_btree_cur_t *cur,
1984 int level,
1985 int *stat) /* success/failure */
1986{
1987 xfs_bmbt_block_t *block;
1988 xfs_buf_t *bp;
1989 int error; /* error return value */
1990 xfs_fsblock_t fsbno;
1991 int lev;
1992 xfs_mount_t *mp;
1993 xfs_trans_t *tp;
1994
1995 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1996 XFS_BMBT_TRACE_ARGI(cur, level);
1997 ASSERT(level < cur->bc_nlevels);
1998 if (level < cur->bc_nlevels - 1)
1999 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
2000 block = xfs_bmbt_get_block(cur, level, &bp);
2001#ifdef DEBUG
2002 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
2003 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2004 return error;
2005 }
2006#endif
2007 if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
2008 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2009 *stat = 1;
2010 return 0;
2011 }
2012 if (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO) {
2013 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2014 *stat = 0;
2015 return 0;
2016 }
2017 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
2018 block = xfs_bmbt_get_block(cur, lev, &bp);
2019#ifdef DEBUG
2020 if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
2021 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2022 return error;
2023 }
2024#endif
2025 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
2026 break;
2027 if (lev < cur->bc_nlevels - 1)
2028 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
2029 }
2030 if (lev == cur->bc_nlevels) {
2031 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2032 *stat = 0;
2033 return 0;
2034 }
2035 tp = cur->bc_tp;
2036 mp = cur->bc_mp;
2037 for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
2038 fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
2039 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
2040 XFS_BMAP_BTREE_REF))) {
2041 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2042 return error;
2043 }
2044 lev--;
2045 xfs_btree_setbuf(cur, lev, bp);
2046 block = XFS_BUF_TO_BMBT_BLOCK(bp);
2047 if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
2048 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2049 return error;
2050 }
2051 cur->bc_ptrs[lev] = 1;
2052 }
2053 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2054 *stat = 1;
2055 return 0;
2056}
2057
2058/*
2059 * Insert the current record at the point referenced by cur.
2060 *
2061 * A multi-level split of the tree on insert will invalidate the original
2062 * cursor. All callers of this function should assume that the cursor is
2063 * no longer valid and revalidate it.
2064 */
2065int /* error */
2066xfs_bmbt_insert(
2067 xfs_btree_cur_t *cur,
2068 int *stat) /* success/failure */
2069{
2070 int error; /* error return value */
2071 int i;
2072 int level;
2073 xfs_fsblock_t nbno;
2074 xfs_btree_cur_t *ncur;
2075 xfs_bmbt_rec_t nrec;
2076 xfs_btree_cur_t *pcur;
2077
2078 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2079 level = 0;
2080 nbno = NULLFSBLOCK;
2081 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
2082 ncur = NULL;
2083 pcur = cur;
2084 do {
2085 if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur,
2086 &i))) {
2087 if (pcur != cur)
2088 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2089 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2090 return error;
2091 }
2092 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2093 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
2094 cur->bc_nlevels = pcur->bc_nlevels;
2095 cur->bc_private.b.allocated +=
2096 pcur->bc_private.b.allocated;
2097 pcur->bc_private.b.allocated = 0;
2098 ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) ||
2099 XFS_IS_REALTIME_INODE(cur->bc_private.b.ip));
2100 cur->bc_private.b.firstblock =
2101 pcur->bc_private.b.firstblock;
2102 ASSERT(cur->bc_private.b.flist ==
2103 pcur->bc_private.b.flist);
2104 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2105 }
2106 if (ncur) {
2107 pcur = ncur;
2108 ncur = NULL;
2109 }
2110 } while (nbno != NULLFSBLOCK);
2111 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2112 *stat = i;
2113 return 0;
2114error0:
2115 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2116 return error;
2117}
2118
2119/*
2120 * Log fields from the btree block header.
2121 */
2122void
2123xfs_bmbt_log_block(
2124 xfs_btree_cur_t *cur,
2125 xfs_buf_t *bp,
2126 int fields)
2127{
2128 int first;
2129 int last;
2130 xfs_trans_t *tp;
2131 static const short offsets[] = {
2132 offsetof(xfs_bmbt_block_t, bb_magic),
2133 offsetof(xfs_bmbt_block_t, bb_level),
2134 offsetof(xfs_bmbt_block_t, bb_numrecs),
2135 offsetof(xfs_bmbt_block_t, bb_leftsib),
2136 offsetof(xfs_bmbt_block_t, bb_rightsib),
2137 sizeof(xfs_bmbt_block_t)
2138 };
2139
2140 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2141 XFS_BMBT_TRACE_ARGBI(cur, bp, fields);
2142 tp = cur->bc_tp;
2143 if (bp) {
2144 xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first,
2145 &last);
2146 xfs_trans_log_buf(tp, bp, first, last);
2147 } else
2148 xfs_trans_log_inode(tp, cur->bc_private.b.ip,
2149 XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
2150 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2151}
2152
2153/*
2154 * Log record values from the btree block.
2155 */
2156void
2157xfs_bmbt_log_recs(
2158 xfs_btree_cur_t *cur,
2159 xfs_buf_t *bp,
2160 int rfirst,
2161 int rlast)
2162{
2163 xfs_bmbt_block_t *block;
2164 int first;
2165 int last;
2166 xfs_bmbt_rec_t *rp;
2167 xfs_trans_t *tp;
2168
2169 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2170 XFS_BMBT_TRACE_ARGBII(cur, bp, rfirst, rlast);
2171 ASSERT(bp);
2172 tp = cur->bc_tp;
2173 block = XFS_BUF_TO_BMBT_BLOCK(bp);
2174 rp = XFS_BMAP_REC_DADDR(block, 1, cur);
2175 first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
2176 last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
2177 xfs_trans_log_buf(tp, bp, first, last);
2178 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2179}
2180
2181int /* error */
2182xfs_bmbt_lookup_eq(
2183 xfs_btree_cur_t *cur,
2184 xfs_fileoff_t off,
2185 xfs_fsblock_t bno,
2186 xfs_filblks_t len,
2187 int *stat) /* success/failure */
2188{
2189 cur->bc_rec.b.br_startoff = off;
2190 cur->bc_rec.b.br_startblock = bno;
2191 cur->bc_rec.b.br_blockcount = len;
2192 return xfs_bmbt_lookup(cur, XFS_LOOKUP_EQ, stat);
2193}
2194
2195int /* error */
2196xfs_bmbt_lookup_ge(
2197 xfs_btree_cur_t *cur,
2198 xfs_fileoff_t off,
2199 xfs_fsblock_t bno,
2200 xfs_filblks_t len,
2201 int *stat) /* success/failure */
2202{
2203 cur->bc_rec.b.br_startoff = off;
2204 cur->bc_rec.b.br_startblock = bno;
2205 cur->bc_rec.b.br_blockcount = len;
2206 return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat);
2207}
2208
2209/*
2210 * Give the bmap btree a new root block. Copy the old broot contents
2211 * down into a real block and make the broot point to it.
2212 */
2213int /* error */
2214xfs_bmbt_newroot(
2215 xfs_btree_cur_t *cur, /* btree cursor */
2216 int *logflags, /* logging flags for inode */
2217 int *stat) /* return status - 0 fail */
2218{
2219 xfs_alloc_arg_t args; /* allocation arguments */
2220 xfs_bmbt_block_t *block; /* bmap btree block */
2221 xfs_buf_t *bp; /* buffer for block */
2222 xfs_bmbt_block_t *cblock; /* child btree block */
2223 xfs_bmbt_key_t *ckp; /* child key pointer */
2224 xfs_bmbt_ptr_t *cpp; /* child ptr pointer */
2225 int error; /* error return code */
2226#ifdef DEBUG
2227 int i; /* loop counter */
2228#endif
2229 xfs_bmbt_key_t *kp; /* pointer to bmap btree key */
2230 int level; /* btree level */
2231 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
2232
2233 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2234 level = cur->bc_nlevels - 1;
2235 block = xfs_bmbt_get_block(cur, level, &bp);
2236 /*
2237 * Copy the root into a real block.
2238 */
2239 args.mp = cur->bc_mp;
2240 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
2241 args.tp = cur->bc_tp;
2242 args.fsbno = cur->bc_private.b.firstblock;
2243 args.mod = args.minleft = args.alignment = args.total = args.isfl =
2244 args.userdata = args.minalignslop = 0;
2245 args.minlen = args.maxlen = args.prod = 1;
2246 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
2247 args.firstblock = args.fsbno;
2248 if (args.fsbno == NULLFSBLOCK) {
2249#ifdef DEBUG
2250 if ((error = xfs_btree_check_lptr_disk(cur, *pp, level))) {
2251 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2252 return error;
2253 }
2254#endif
2255 args.fsbno = be64_to_cpu(*pp);
2256 args.type = XFS_ALLOCTYPE_START_BNO;
2257 } else if (cur->bc_private.b.flist->xbf_low)
2258 args.type = XFS_ALLOCTYPE_START_BNO;
2259 else
2260 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2261 if ((error = xfs_alloc_vextent(&args))) {
2262 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2263 return error;
2264 }
2265 if (args.fsbno == NULLFSBLOCK) {
2266 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2267 *stat = 0;
2268 return 0;
2269 }
2270 ASSERT(args.len == 1);
2271 cur->bc_private.b.firstblock = args.fsbno;
2272 cur->bc_private.b.allocated++;
2273 cur->bc_private.b.ip->i_d.di_nblocks++;
2274 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
2275 XFS_TRANS_DQ_BCOUNT, 1L);
2276 bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0);
2277 cblock = XFS_BUF_TO_BMBT_BLOCK(bp);
2278 *cblock = *block;
2279 be16_add_cpu(&block->bb_level, 1);
2280 block->bb_numrecs = cpu_to_be16(1);
2281 cur->bc_nlevels++;
2282 cur->bc_ptrs[level + 1] = 1;
2283 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
2284 ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
2285 memcpy(ckp, kp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*kp));
2286 cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
2287#ifdef DEBUG
2288 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2289 if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
2290 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2291 return error;
2292 }
2293 }
2294#endif
2295 memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp));
2296#ifdef DEBUG
2297 if ((error = xfs_btree_check_lptr(cur, args.fsbno, level))) {
2298 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2299 return error;
2300 }
2301#endif
2302 *pp = cpu_to_be64(args.fsbno);
2303 xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs),
2304 cur->bc_private.b.whichfork);
2305 xfs_btree_setbuf(cur, level, bp);
2306 /*
2307 * Do all this logging at the end so that
2308 * the root is at the right level.
2309 */
2310 xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS);
2311 xfs_bmbt_log_keys(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
2312 xfs_bmbt_log_ptrs(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
2313 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2314 *logflags |=
2315 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
2316 *stat = 1;
2317 return 0;
2318}
2319 236
2320/* 237/*
2321 * Set all the fields in a bmap extent record from the arguments. 238 * Set all the fields in a bmap extent record from the arguments.
@@ -2512,7 +429,8 @@ xfs_bmbt_set_state(
2512 */ 429 */
2513void 430void
2514xfs_bmbt_to_bmdr( 431xfs_bmbt_to_bmdr(
2515 xfs_bmbt_block_t *rblock, 432 struct xfs_mount *mp,
433 struct xfs_btree_block *rblock,
2516 int rblocklen, 434 int rblocklen,
2517 xfs_bmdr_block_t *dblock, 435 xfs_bmdr_block_t *dblock,
2518 int dblocklen) 436 int dblocklen)
@@ -2524,67 +442,22 @@ xfs_bmbt_to_bmdr(
2524 __be64 *tpp; 442 __be64 *tpp;
2525 443
2526 ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); 444 ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
2527 ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO); 445 ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO);
2528 ASSERT(be64_to_cpu(rblock->bb_rightsib) == NULLDFSBNO); 446 ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO);
2529 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 447 ASSERT(be16_to_cpu(rblock->bb_level) > 0);
2530 dblock->bb_level = rblock->bb_level; 448 dblock->bb_level = rblock->bb_level;
2531 dblock->bb_numrecs = rblock->bb_numrecs; 449 dblock->bb_numrecs = rblock->bb_numrecs;
2532 dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); 450 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
2533 fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); 451 fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
2534 tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); 452 tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
2535 fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); 453 fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
2536 tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); 454 tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
2537 dmxr = be16_to_cpu(dblock->bb_numrecs); 455 dmxr = be16_to_cpu(dblock->bb_numrecs);
2538 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 456 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
2539 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 457 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
2540} 458}
2541 459
2542/* 460/*
2543 * Update the record to the passed values.
2544 */
2545int
2546xfs_bmbt_update(
2547 xfs_btree_cur_t *cur,
2548 xfs_fileoff_t off,
2549 xfs_fsblock_t bno,
2550 xfs_filblks_t len,
2551 xfs_exntst_t state)
2552{
2553 xfs_bmbt_block_t *block;
2554 xfs_buf_t *bp;
2555 int error;
2556 xfs_bmbt_key_t key;
2557 int ptr;
2558 xfs_bmbt_rec_t *rp;
2559
2560 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2561 XFS_BMBT_TRACE_ARGFFFI(cur, (xfs_dfiloff_t)off, (xfs_dfsbno_t)bno,
2562 (xfs_dfilblks_t)len, (int)state);
2563 block = xfs_bmbt_get_block(cur, 0, &bp);
2564#ifdef DEBUG
2565 if ((error = xfs_btree_check_lblock(cur, block, 0, bp))) {
2566 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2567 return error;
2568 }
2569#endif
2570 ptr = cur->bc_ptrs[0];
2571 rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
2572 xfs_bmbt_disk_set_allf(rp, off, bno, len, state);
2573 xfs_bmbt_log_recs(cur, bp, ptr, ptr);
2574 if (ptr > 1) {
2575 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2576 return 0;
2577 }
2578 key.br_startoff = cpu_to_be64(off);
2579 if ((error = xfs_bmbt_updkey(cur, &key, 1))) {
2580 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2581 return error;
2582 }
2583 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2584 return 0;
2585}
2586
2587/*
2588 * Check extent records, which have just been read, for 461 * Check extent records, which have just been read, for
2589 * any bit in the extent flag field. ASSERT on debug 462 * any bit in the extent flag field. ASSERT on debug
2590 * kernels, as this condition should not occur. 463 * kernels, as this condition should not occur.
@@ -2608,3 +481,451 @@ xfs_check_nostate_extents(
2608 } 481 }
2609 return 0; 482 return 0;
2610} 483}
484
485
486STATIC struct xfs_btree_cur *
487xfs_bmbt_dup_cursor(
488 struct xfs_btree_cur *cur)
489{
490 struct xfs_btree_cur *new;
491
492 new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
493 cur->bc_private.b.ip, cur->bc_private.b.whichfork);
494
495 /*
496 * Copy the firstblock, flist, and flags values,
497 * since init cursor doesn't get them.
498 */
499 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
500 new->bc_private.b.flist = cur->bc_private.b.flist;
501 new->bc_private.b.flags = cur->bc_private.b.flags;
502
503 return new;
504}
505
506STATIC void
507xfs_bmbt_update_cursor(
508 struct xfs_btree_cur *src,
509 struct xfs_btree_cur *dst)
510{
511 ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
512 (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
513 ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist);
514
515 dst->bc_private.b.allocated += src->bc_private.b.allocated;
516 dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
517
518 src->bc_private.b.allocated = 0;
519}
520
521STATIC int
522xfs_bmbt_alloc_block(
523 struct xfs_btree_cur *cur,
524 union xfs_btree_ptr *start,
525 union xfs_btree_ptr *new,
526 int length,
527 int *stat)
528{
529 xfs_alloc_arg_t args; /* block allocation args */
530 int error; /* error return value */
531
532 memset(&args, 0, sizeof(args));
533 args.tp = cur->bc_tp;
534 args.mp = cur->bc_mp;
535 args.fsbno = cur->bc_private.b.firstblock;
536 args.firstblock = args.fsbno;
537
538 if (args.fsbno == NULLFSBLOCK) {
539 args.fsbno = be64_to_cpu(start->l);
540 args.type = XFS_ALLOCTYPE_START_BNO;
541 /*
542 * Make sure there is sufficient room left in the AG to
543 * complete a full tree split for an extent insert. If
544 * we are converting the middle part of an extent then
545 * we may need space for two tree splits.
546 *
547 * We are relying on the caller to make the correct block
548 * reservation for this operation to succeed. If the
549 * reservation amount is insufficient then we may fail a
550 * block allocation here and corrupt the filesystem.
551 */
552 args.minleft = xfs_trans_get_block_res(args.tp);
553 } else if (cur->bc_private.b.flist->xbf_low) {
554 args.type = XFS_ALLOCTYPE_START_BNO;
555 } else {
556 args.type = XFS_ALLOCTYPE_NEAR_BNO;
557 }
558
559 args.minlen = args.maxlen = args.prod = 1;
560 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
561 if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
562 error = XFS_ERROR(ENOSPC);
563 goto error0;
564 }
565 error = xfs_alloc_vextent(&args);
566 if (error)
567 goto error0;
568
569 if (args.fsbno == NULLFSBLOCK && args.minleft) {
570 /*
571 * Could not find an AG with enough free space to satisfy
572 * a full btree split. Try again without minleft and if
573 * successful activate the lowspace algorithm.
574 */
575 args.fsbno = 0;
576 args.type = XFS_ALLOCTYPE_FIRST_AG;
577 args.minleft = 0;
578 error = xfs_alloc_vextent(&args);
579 if (error)
580 goto error0;
581 cur->bc_private.b.flist->xbf_low = 1;
582 }
583 if (args.fsbno == NULLFSBLOCK) {
584 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
585 *stat = 0;
586 return 0;
587 }
588 ASSERT(args.len == 1);
589 cur->bc_private.b.firstblock = args.fsbno;
590 cur->bc_private.b.allocated++;
591 cur->bc_private.b.ip->i_d.di_nblocks++;
592 xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
593 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
594 XFS_TRANS_DQ_BCOUNT, 1L);
595
596 new->l = cpu_to_be64(args.fsbno);
597
598 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
599 *stat = 1;
600 return 0;
601
602 error0:
603 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
604 return error;
605}
606
607STATIC int
608xfs_bmbt_free_block(
609 struct xfs_btree_cur *cur,
610 struct xfs_buf *bp)
611{
612 struct xfs_mount *mp = cur->bc_mp;
613 struct xfs_inode *ip = cur->bc_private.b.ip;
614 struct xfs_trans *tp = cur->bc_tp;
615 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
616
617 xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
618 ip->i_d.di_nblocks--;
619
620 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
621 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
622 xfs_trans_binval(tp, bp);
623 return 0;
624}
625
626STATIC int
627xfs_bmbt_get_minrecs(
628 struct xfs_btree_cur *cur,
629 int level)
630{
631 if (level == cur->bc_nlevels - 1) {
632 struct xfs_ifork *ifp;
633
634 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
635 cur->bc_private.b.whichfork);
636
637 return xfs_bmbt_maxrecs(cur->bc_mp,
638 ifp->if_broot_bytes, level == 0) / 2;
639 }
640
641 return cur->bc_mp->m_bmap_dmnr[level != 0];
642}
643
644int
645xfs_bmbt_get_maxrecs(
646 struct xfs_btree_cur *cur,
647 int level)
648{
649 if (level == cur->bc_nlevels - 1) {
650 struct xfs_ifork *ifp;
651
652 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
653 cur->bc_private.b.whichfork);
654
655 return xfs_bmbt_maxrecs(cur->bc_mp,
656 ifp->if_broot_bytes, level == 0);
657 }
658
659 return cur->bc_mp->m_bmap_dmxr[level != 0];
660
661}
662
663/*
664 * Get the maximum records we could store in the on-disk format.
665 *
666 * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
667 * for the root node this checks the available space in the dinode fork
668 * so that we can resize the in-memory buffer to match it. After a
669 * resize to the maximum size this function returns the same value
670 * as xfs_bmbt_get_maxrecs for the root node, too.
671 */
672STATIC int
673xfs_bmbt_get_dmaxrecs(
674 struct xfs_btree_cur *cur,
675 int level)
676{
677 if (level != cur->bc_nlevels - 1)
678 return cur->bc_mp->m_bmap_dmxr[level != 0];
679 return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize,
680 level == 0);
681}
682
683STATIC void
684xfs_bmbt_init_key_from_rec(
685 union xfs_btree_key *key,
686 union xfs_btree_rec *rec)
687{
688 key->bmbt.br_startoff =
689 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
690}
691
692STATIC void
693xfs_bmbt_init_rec_from_key(
694 union xfs_btree_key *key,
695 union xfs_btree_rec *rec)
696{
697 ASSERT(key->bmbt.br_startoff != 0);
698
699 xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
700 0, 0, XFS_EXT_NORM);
701}
702
703STATIC void
704xfs_bmbt_init_rec_from_cur(
705 struct xfs_btree_cur *cur,
706 union xfs_btree_rec *rec)
707{
708 xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
709}
710
711STATIC void
712xfs_bmbt_init_ptr_from_cur(
713 struct xfs_btree_cur *cur,
714 union xfs_btree_ptr *ptr)
715{
716 ptr->l = 0;
717}
718
719STATIC __int64_t
720xfs_bmbt_key_diff(
721 struct xfs_btree_cur *cur,
722 union xfs_btree_key *key)
723{
724 return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
725 cur->bc_rec.b.br_startoff;
726}
727
728#ifdef DEBUG
729STATIC int
730xfs_bmbt_keys_inorder(
731 struct xfs_btree_cur *cur,
732 union xfs_btree_key *k1,
733 union xfs_btree_key *k2)
734{
735 return be64_to_cpu(k1->bmbt.br_startoff) <
736 be64_to_cpu(k2->bmbt.br_startoff);
737}
738
739STATIC int
740xfs_bmbt_recs_inorder(
741 struct xfs_btree_cur *cur,
742 union xfs_btree_rec *r1,
743 union xfs_btree_rec *r2)
744{
745 return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
746 xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
747 xfs_bmbt_disk_get_startoff(&r2->bmbt);
748}
749#endif /* DEBUG */
750
751#ifdef XFS_BTREE_TRACE
752ktrace_t *xfs_bmbt_trace_buf;
753
754STATIC void
755xfs_bmbt_trace_enter(
756 struct xfs_btree_cur *cur,
757 const char *func,
758 char *s,
759 int type,
760 int line,
761 __psunsigned_t a0,
762 __psunsigned_t a1,
763 __psunsigned_t a2,
764 __psunsigned_t a3,
765 __psunsigned_t a4,
766 __psunsigned_t a5,
767 __psunsigned_t a6,
768 __psunsigned_t a7,
769 __psunsigned_t a8,
770 __psunsigned_t a9,
771 __psunsigned_t a10)
772{
773 struct xfs_inode *ip = cur->bc_private.b.ip;
774 int whichfork = cur->bc_private.b.whichfork;
775
776 ktrace_enter(xfs_bmbt_trace_buf,
777 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
778 (void *)func, (void *)s, (void *)ip, (void *)cur,
779 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
780 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
781 (void *)a8, (void *)a9, (void *)a10);
782 ktrace_enter(ip->i_btrace,
783 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
784 (void *)func, (void *)s, (void *)ip, (void *)cur,
785 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
786 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
787 (void *)a8, (void *)a9, (void *)a10);
788}
789
790STATIC void
791xfs_bmbt_trace_cursor(
792 struct xfs_btree_cur *cur,
793 __uint32_t *s0,
794 __uint64_t *l0,
795 __uint64_t *l1)
796{
797 struct xfs_bmbt_rec_host r;
798
799 xfs_bmbt_set_all(&r, &cur->bc_rec.b);
800
801 *s0 = (cur->bc_nlevels << 24) |
802 (cur->bc_private.b.flags << 16) |
803 cur->bc_private.b.allocated;
804 *l0 = r.l0;
805 *l1 = r.l1;
806}
807
808STATIC void
809xfs_bmbt_trace_key(
810 struct xfs_btree_cur *cur,
811 union xfs_btree_key *key,
812 __uint64_t *l0,
813 __uint64_t *l1)
814{
815 *l0 = be64_to_cpu(key->bmbt.br_startoff);
816 *l1 = 0;
817}
818
819STATIC void
820xfs_bmbt_trace_record(
821 struct xfs_btree_cur *cur,
822 union xfs_btree_rec *rec,
823 __uint64_t *l0,
824 __uint64_t *l1,
825 __uint64_t *l2)
826{
827 struct xfs_bmbt_irec irec;
828
829 xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
830 *l0 = irec.br_startoff;
831 *l1 = irec.br_startblock;
832 *l2 = irec.br_blockcount;
833}
834#endif /* XFS_BTREE_TRACE */
835
836static const struct xfs_btree_ops xfs_bmbt_ops = {
837 .rec_len = sizeof(xfs_bmbt_rec_t),
838 .key_len = sizeof(xfs_bmbt_key_t),
839
840 .dup_cursor = xfs_bmbt_dup_cursor,
841 .update_cursor = xfs_bmbt_update_cursor,
842 .alloc_block = xfs_bmbt_alloc_block,
843 .free_block = xfs_bmbt_free_block,
844 .get_maxrecs = xfs_bmbt_get_maxrecs,
845 .get_minrecs = xfs_bmbt_get_minrecs,
846 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
847 .init_key_from_rec = xfs_bmbt_init_key_from_rec,
848 .init_rec_from_key = xfs_bmbt_init_rec_from_key,
849 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
850 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
851 .key_diff = xfs_bmbt_key_diff,
852
853#ifdef DEBUG
854 .keys_inorder = xfs_bmbt_keys_inorder,
855 .recs_inorder = xfs_bmbt_recs_inorder,
856#endif
857
858#ifdef XFS_BTREE_TRACE
859 .trace_enter = xfs_bmbt_trace_enter,
860 .trace_cursor = xfs_bmbt_trace_cursor,
861 .trace_key = xfs_bmbt_trace_key,
862 .trace_record = xfs_bmbt_trace_record,
863#endif
864};
865
866/*
867 * Allocate a new bmap btree cursor.
868 */
869struct xfs_btree_cur * /* new bmap btree cursor */
870xfs_bmbt_init_cursor(
871 struct xfs_mount *mp, /* file system mount point */
872 struct xfs_trans *tp, /* transaction pointer */
873 struct xfs_inode *ip, /* inode owning the btree */
874 int whichfork) /* data or attr fork */
875{
876 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
877 struct xfs_btree_cur *cur;
878
879 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
880
881 cur->bc_tp = tp;
882 cur->bc_mp = mp;
883 cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
884 cur->bc_btnum = XFS_BTNUM_BMAP;
885 cur->bc_blocklog = mp->m_sb.sb_blocklog;
886
887 cur->bc_ops = &xfs_bmbt_ops;
888 cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
889
890 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
891 cur->bc_private.b.ip = ip;
892 cur->bc_private.b.firstblock = NULLFSBLOCK;
893 cur->bc_private.b.flist = NULL;
894 cur->bc_private.b.allocated = 0;
895 cur->bc_private.b.flags = 0;
896 cur->bc_private.b.whichfork = whichfork;
897
898 return cur;
899}
900
901/*
902 * Calculate number of records in a bmap btree block.
903 */
904int
905xfs_bmbt_maxrecs(
906 struct xfs_mount *mp,
907 int blocklen,
908 int leaf)
909{
910 blocklen -= XFS_BMBT_BLOCK_LEN(mp);
911
912 if (leaf)
913 return blocklen / sizeof(xfs_bmbt_rec_t);
914 return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
915}
916
917/*
918 * Calculate number of records in a bmap btree inode root.
919 */
920int
921xfs_bmdr_maxrecs(
922 struct xfs_mount *mp,
923 int blocklen,
924 int leaf)
925{
926 blocklen -= sizeof(xfs_bmdr_block_t);
927
928 if (leaf)
929 return blocklen / sizeof(xfs_bmdr_rec_t);
930 return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
931}
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index cd0d4b4bb81..a4555abb662 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -21,9 +21,10 @@
21#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ 21#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
22 22
23struct xfs_btree_cur; 23struct xfs_btree_cur;
24struct xfs_btree_lblock; 24struct xfs_btree_block;
25struct xfs_mount; 25struct xfs_mount;
26struct xfs_inode; 26struct xfs_inode;
27struct xfs_trans;
27 28
28/* 29/*
29 * Bmap root header, on-disk form only. 30 * Bmap root header, on-disk form only.
@@ -145,71 +146,60 @@ typedef struct xfs_bmbt_key {
145/* btree pointer type */ 146/* btree pointer type */
146typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; 147typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
147 148
148/* btree block header type */ 149/*
149typedef struct xfs_btree_lblock xfs_bmbt_block_t; 150 * Btree block header size depends on a superblock flag.
150 151 *
151#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) 152 * (not quite yet, but soon)
152 153 */
153#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize) 154#define XFS_BMBT_BLOCK_LEN(mp) XFS_BTREE_LBLOCK_LEN
154#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \ 155
155 ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \ 156#define XFS_BMBT_REC_ADDR(mp, block, index) \
156 (cur)->bc_private.b.whichfork)->if_broot_bytes) 157 ((xfs_bmbt_rec_t *) \
157 158 ((char *)(block) + \
158#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \ 159 XFS_BMBT_BLOCK_LEN(mp) + \
159 (((lev) == (cur)->bc_nlevels - 1 ? \ 160 ((index) - 1) * sizeof(xfs_bmbt_rec_t)))
160 XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \ 161
161 xfs_bmdr, (lev) == 0) : \ 162#define XFS_BMBT_KEY_ADDR(mp, block, index) \
162 ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))) 163 ((xfs_bmbt_key_t *) \
163#define XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \ 164 ((char *)(block) + \
164 (((lev) == (cur)->bc_nlevels - 1 ? \ 165 XFS_BMBT_BLOCK_LEN(mp) + \
165 XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\ 166 ((index) - 1) * sizeof(xfs_bmbt_key_t)))
166 xfs_bmbt, (lev) == 0) : \ 167
167 ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))) 168#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \
168 169 ((xfs_bmbt_ptr_t *) \
169#define XFS_BMAP_BLOCK_DMINRECS(lev,cur) \ 170 ((char *)(block) + \
170 (((lev) == (cur)->bc_nlevels - 1 ? \ 171 XFS_BMBT_BLOCK_LEN(mp) + \
171 XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur),\ 172 (maxrecs) * sizeof(xfs_bmbt_key_t) + \
172 xfs_bmdr, (lev) == 0) : \ 173 ((index) - 1) * sizeof(xfs_bmbt_ptr_t)))
173 ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) 174
174#define XFS_BMAP_BLOCK_IMINRECS(lev,cur) \ 175#define XFS_BMDR_REC_ADDR(block, index) \
175 (((lev) == (cur)->bc_nlevels - 1 ? \ 176 ((xfs_bmdr_rec_t *) \
176 XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\ 177 ((char *)(block) + \
177 xfs_bmbt, (lev) == 0) : \ 178 sizeof(struct xfs_bmdr_block) + \
178 ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) 179 ((index) - 1) * sizeof(xfs_bmdr_rec_t)))
179 180
180#define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) 181#define XFS_BMDR_KEY_ADDR(block, index) \
181 182 ((xfs_bmdr_key_t *) \
182#define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) 183 ((char *)(block) + \
183 184 sizeof(struct xfs_bmdr_block) + \
184#define XFS_BMAP_KEY_DADDR(bb,i,cur) \ 185 ((index) - 1) * sizeof(xfs_bmdr_key_t)))
185 (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i)) 186
186 187#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \
187#define XFS_BMAP_KEY_IADDR(bb,i,cur) \ 188 ((xfs_bmdr_ptr_t *) \
188 (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i)) 189 ((char *)(block) + \
189 190 sizeof(struct xfs_bmdr_block) + \
190#define XFS_BMAP_PTR_DADDR(bb,i,cur) \ 191 (maxrecs) * sizeof(xfs_bmdr_key_t) + \
191 (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \ 192 ((index) - 1) * sizeof(xfs_bmdr_ptr_t)))
192 be16_to_cpu((bb)->bb_level), cur)))
193#define XFS_BMAP_PTR_IADDR(bb,i,cur) \
194 (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
195 be16_to_cpu((bb)->bb_level), cur)))
196 193
197/* 194/*
198 * These are to be used when we know the size of the block and 195 * These are to be used when we know the size of the block and
199 * we don't have a cursor. 196 * we don't have a cursor.
200 */ 197 */
201#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \ 198#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
202 (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i)) 199 XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
203#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \
204 (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i))
205#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \
206 (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
207
208#define XFS_BMAP_BROOT_NUMRECS(bb) be16_to_cpu((bb)->bb_numrecs)
209#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
210 200
211#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ 201#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
212 (int)(sizeof(xfs_bmbt_block_t) + \ 202 (int)(XFS_BTREE_LBLOCK_LEN + \
213 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) 203 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
214 204
215#define XFS_BMAP_BROOT_SPACE(bb) \ 205#define XFS_BMAP_BROOT_SPACE(bb) \
@@ -223,42 +213,12 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
223 */ 213 */
224#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)]) 214#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)])
225 215
226#define XFS_BMAP_SANITY_CHECK(mp,bb,level) \
227 (be32_to_cpu((bb)->bb_magic) == XFS_BMAP_MAGIC && \
228 be16_to_cpu((bb)->bb_level) == level && \
229 be16_to_cpu((bb)->bb_numrecs) > 0 && \
230 be16_to_cpu((bb)->bb_numrecs) <= (mp)->m_bmap_dmxr[(level) != 0])
231
232
233#ifdef __KERNEL__
234
235#if defined(XFS_BMBT_TRACE)
236/*
237 * Trace buffer entry types.
238 */
239#define XFS_BMBT_KTRACE_ARGBI 1
240#define XFS_BMBT_KTRACE_ARGBII 2
241#define XFS_BMBT_KTRACE_ARGFFFI 3
242#define XFS_BMBT_KTRACE_ARGI 4
243#define XFS_BMBT_KTRACE_ARGIFK 5
244#define XFS_BMBT_KTRACE_ARGIFR 6
245#define XFS_BMBT_KTRACE_ARGIK 7
246#define XFS_BMBT_KTRACE_CUR 8
247
248#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
249#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
250extern ktrace_t *xfs_bmbt_trace_buf;
251#endif
252
253/* 216/*
254 * Prototypes for xfs_bmap.c to call. 217 * Prototypes for xfs_bmap.c to call.
255 */ 218 */
256extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int); 219extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int,
257extern int xfs_bmbt_decrement(struct xfs_btree_cur *, int, int *); 220 struct xfs_btree_block *, int);
258extern int xfs_bmbt_delete(struct xfs_btree_cur *, int *);
259extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); 221extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
260extern xfs_bmbt_block_t *xfs_bmbt_get_block(struct xfs_btree_cur *cur,
261 int, struct xfs_buf **bpp);
262extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); 222extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
263extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); 223extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
264extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); 224extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
@@ -268,22 +228,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
268extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); 228extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
269extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); 229extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
270 230
271extern int xfs_bmbt_increment(struct xfs_btree_cur *, int, int *);
272extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *);
273extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
274extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int,
275 int);
276extern int xfs_bmbt_lookup_eq(struct xfs_btree_cur *, xfs_fileoff_t,
277 xfs_fsblock_t, xfs_filblks_t, int *);
278extern int xfs_bmbt_lookup_ge(struct xfs_btree_cur *, xfs_fileoff_t,
279 xfs_fsblock_t, xfs_filblks_t, int *);
280
281/*
282 * Give the bmap btree a new root block. Copy the old broot contents
283 * down into a real block and make the broot point to it.
284 */
285extern int xfs_bmbt_newroot(struct xfs_btree_cur *cur, int *lflags, int *stat);
286
287extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); 231extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
288extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, 232extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o,
289 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 233 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
@@ -296,10 +240,15 @@ extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
296extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, 240extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
297 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 241 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
298 242
299extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); 243extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
300extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t, 244 xfs_bmdr_block_t *, int);
301 xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t); 245
246extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
247extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
248extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
249
250extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
251 struct xfs_trans *, struct xfs_inode *, int);
302 252
303#endif /* __KERNEL__ */
304 253
305#endif /* __XFS_BMAP_BTREE_H__ */ 254#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index cc593a84c34..7ed59267420 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -34,7 +34,9 @@
34#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 35#include "xfs_dinode.h"
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_inode_item.h"
37#include "xfs_btree.h" 38#include "xfs_btree.h"
39#include "xfs_btree_trace.h"
38#include "xfs_ialloc.h" 40#include "xfs_ialloc.h"
39#include "xfs_error.h" 41#include "xfs_error.h"
40 42
@@ -50,135 +52,33 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
50 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC 52 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
51}; 53};
52 54
53/*
54 * Checking routine: return maxrecs for the block.
55 */
56STATIC int /* number of records fitting in block */
57xfs_btree_maxrecs(
58 xfs_btree_cur_t *cur, /* btree cursor */
59 xfs_btree_block_t *block) /* generic btree block pointer */
60{
61 switch (cur->bc_btnum) {
62 case XFS_BTNUM_BNO:
63 case XFS_BTNUM_CNT:
64 return (int)XFS_ALLOC_BLOCK_MAXRECS(
65 be16_to_cpu(block->bb_h.bb_level), cur);
66 case XFS_BTNUM_BMAP:
67 return (int)XFS_BMAP_BLOCK_IMAXRECS(
68 be16_to_cpu(block->bb_h.bb_level), cur);
69 case XFS_BTNUM_INO:
70 return (int)XFS_INOBT_BLOCK_MAXRECS(
71 be16_to_cpu(block->bb_h.bb_level), cur);
72 default:
73 ASSERT(0);
74 return 0;
75 }
76}
77
78/*
79 * External routines.
80 */
81
82#ifdef DEBUG
83/*
84 * Debug routine: check that block header is ok.
85 */
86void
87xfs_btree_check_block(
88 xfs_btree_cur_t *cur, /* btree cursor */
89 xfs_btree_block_t *block, /* generic btree block pointer */
90 int level, /* level of the btree block */
91 xfs_buf_t *bp) /* buffer containing block, if any */
92{
93 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
94 xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
95 bp);
96 else
97 xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
98 bp);
99}
100
101/*
102 * Debug routine: check that keys are in the right order.
103 */
104void
105xfs_btree_check_key(
106 xfs_btnum_t btnum, /* btree identifier */
107 void *ak1, /* pointer to left (lower) key */
108 void *ak2) /* pointer to right (higher) key */
109{
110 switch (btnum) {
111 case XFS_BTNUM_BNO: {
112 xfs_alloc_key_t *k1;
113 xfs_alloc_key_t *k2;
114
115 k1 = ak1;
116 k2 = ak2;
117 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
118 break;
119 }
120 case XFS_BTNUM_CNT: {
121 xfs_alloc_key_t *k1;
122 xfs_alloc_key_t *k2;
123
124 k1 = ak1;
125 k2 = ak2;
126 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
127 (k1->ar_blockcount == k2->ar_blockcount &&
128 be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
129 break;
130 }
131 case XFS_BTNUM_BMAP: {
132 xfs_bmbt_key_t *k1;
133 xfs_bmbt_key_t *k2;
134
135 k1 = ak1;
136 k2 = ak2;
137 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
138 break;
139 }
140 case XFS_BTNUM_INO: {
141 xfs_inobt_key_t *k1;
142 xfs_inobt_key_t *k2;
143
144 k1 = ak1;
145 k2 = ak2;
146 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
147 break;
148 }
149 default:
150 ASSERT(0);
151 }
152}
153#endif /* DEBUG */
154 55
155/* 56STATIC int /* error (0 or EFSCORRUPTED) */
156 * Checking routine: check that long form block header is ok.
157 */
158/* ARGSUSED */
159int /* error (0 or EFSCORRUPTED) */
160xfs_btree_check_lblock( 57xfs_btree_check_lblock(
161 xfs_btree_cur_t *cur, /* btree cursor */ 58 struct xfs_btree_cur *cur, /* btree cursor */
162 xfs_btree_lblock_t *block, /* btree long form block pointer */ 59 struct xfs_btree_block *block, /* btree long form block pointer */
163 int level, /* level of the btree block */ 60 int level, /* level of the btree block */
164 xfs_buf_t *bp) /* buffer for block, if any */ 61 struct xfs_buf *bp) /* buffer for block, if any */
165{ 62{
166 int lblock_ok; /* block passes checks */ 63 int lblock_ok; /* block passes checks */
167 xfs_mount_t *mp; /* file system mount point */ 64 struct xfs_mount *mp; /* file system mount point */
168 65
169 mp = cur->bc_mp; 66 mp = cur->bc_mp;
170 lblock_ok = 67 lblock_ok =
171 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && 68 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
172 be16_to_cpu(block->bb_level) == level && 69 be16_to_cpu(block->bb_level) == level &&
173 be16_to_cpu(block->bb_numrecs) <= 70 be16_to_cpu(block->bb_numrecs) <=
174 xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && 71 cur->bc_ops->get_maxrecs(cur, level) &&
175 block->bb_leftsib && 72 block->bb_u.l.bb_leftsib &&
176 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || 73 (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
177 XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && 74 XFS_FSB_SANITY_CHECK(mp,
178 block->bb_rightsib && 75 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
179 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO || 76 block->bb_u.l.bb_rightsib &&
180 XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib))); 77 (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
181 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK, 78 XFS_FSB_SANITY_CHECK(mp,
79 be64_to_cpu(block->bb_u.l.bb_rightsib)));
80 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
81 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
182 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 82 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
183 if (bp) 83 if (bp)
184 xfs_buftrace("LBTREE ERROR", bp); 84 xfs_buftrace("LBTREE ERROR", bp);
@@ -189,98 +89,15 @@ xfs_btree_check_lblock(
189 return 0; 89 return 0;
190} 90}
191 91
192/* 92STATIC int /* error (0 or EFSCORRUPTED) */
193 * Checking routine: check that (long) pointer is ok.
194 */
195int /* error (0 or EFSCORRUPTED) */
196xfs_btree_check_lptr(
197 xfs_btree_cur_t *cur, /* btree cursor */
198 xfs_dfsbno_t ptr, /* btree block disk address */
199 int level) /* btree block level */
200{
201 xfs_mount_t *mp; /* file system mount point */
202
203 mp = cur->bc_mp;
204 XFS_WANT_CORRUPTED_RETURN(
205 level > 0 &&
206 ptr != NULLDFSBNO &&
207 XFS_FSB_SANITY_CHECK(mp, ptr));
208 return 0;
209}
210
211#ifdef DEBUG
212/*
213 * Debug routine: check that records are in the right order.
214 */
215void
216xfs_btree_check_rec(
217 xfs_btnum_t btnum, /* btree identifier */
218 void *ar1, /* pointer to left (lower) record */
219 void *ar2) /* pointer to right (higher) record */
220{
221 switch (btnum) {
222 case XFS_BTNUM_BNO: {
223 xfs_alloc_rec_t *r1;
224 xfs_alloc_rec_t *r2;
225
226 r1 = ar1;
227 r2 = ar2;
228 ASSERT(be32_to_cpu(r1->ar_startblock) +
229 be32_to_cpu(r1->ar_blockcount) <=
230 be32_to_cpu(r2->ar_startblock));
231 break;
232 }
233 case XFS_BTNUM_CNT: {
234 xfs_alloc_rec_t *r1;
235 xfs_alloc_rec_t *r2;
236
237 r1 = ar1;
238 r2 = ar2;
239 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
240 (r1->ar_blockcount == r2->ar_blockcount &&
241 be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
242 break;
243 }
244 case XFS_BTNUM_BMAP: {
245 xfs_bmbt_rec_t *r1;
246 xfs_bmbt_rec_t *r2;
247
248 r1 = ar1;
249 r2 = ar2;
250 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
251 xfs_bmbt_disk_get_blockcount(r1) <=
252 xfs_bmbt_disk_get_startoff(r2));
253 break;
254 }
255 case XFS_BTNUM_INO: {
256 xfs_inobt_rec_t *r1;
257 xfs_inobt_rec_t *r2;
258
259 r1 = ar1;
260 r2 = ar2;
261 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
262 be32_to_cpu(r2->ir_startino));
263 break;
264 }
265 default:
266 ASSERT(0);
267 }
268}
269#endif /* DEBUG */
270
271/*
272 * Checking routine: check that block header is ok.
273 */
274/* ARGSUSED */
275int /* error (0 or EFSCORRUPTED) */
276xfs_btree_check_sblock( 93xfs_btree_check_sblock(
277 xfs_btree_cur_t *cur, /* btree cursor */ 94 struct xfs_btree_cur *cur, /* btree cursor */
278 xfs_btree_sblock_t *block, /* btree short form block pointer */ 95 struct xfs_btree_block *block, /* btree short form block pointer */
279 int level, /* level of the btree block */ 96 int level, /* level of the btree block */
280 xfs_buf_t *bp) /* buffer containing block */ 97 struct xfs_buf *bp) /* buffer containing block */
281{ 98{
282 xfs_buf_t *agbp; /* buffer for ag. freespace struct */ 99 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
283 xfs_agf_t *agf; /* ag. freespace structure */ 100 struct xfs_agf *agf; /* ag. freespace structure */
284 xfs_agblock_t agflen; /* native ag. freespace length */ 101 xfs_agblock_t agflen; /* native ag. freespace length */
285 int sblock_ok; /* block passes checks */ 102 int sblock_ok; /* block passes checks */
286 103
@@ -291,13 +108,13 @@ xfs_btree_check_sblock(
291 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && 108 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
292 be16_to_cpu(block->bb_level) == level && 109 be16_to_cpu(block->bb_level) == level &&
293 be16_to_cpu(block->bb_numrecs) <= 110 be16_to_cpu(block->bb_numrecs) <=
294 xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && 111 cur->bc_ops->get_maxrecs(cur, level) &&
295 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK || 112 (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
296 be32_to_cpu(block->bb_leftsib) < agflen) && 113 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
297 block->bb_leftsib && 114 block->bb_u.s.bb_leftsib &&
298 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK || 115 (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
299 be32_to_cpu(block->bb_rightsib) < agflen) && 116 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
300 block->bb_rightsib; 117 block->bb_u.s.bb_rightsib;
301 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, 118 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
302 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 119 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
303 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
@@ -311,27 +128,78 @@ xfs_btree_check_sblock(
311} 128}
312 129
313/* 130/*
314 * Checking routine: check that (short) pointer is ok. 131 * Debug routine: check that block header is ok.
132 */
133int
134xfs_btree_check_block(
135 struct xfs_btree_cur *cur, /* btree cursor */
136 struct xfs_btree_block *block, /* generic btree block pointer */
137 int level, /* level of the btree block */
138 struct xfs_buf *bp) /* buffer containing block, if any */
139{
140 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
141 return xfs_btree_check_lblock(cur, block, level, bp);
142 else
143 return xfs_btree_check_sblock(cur, block, level, bp);
144}
145
146/*
147 * Check that (long) pointer is ok.
315 */ 148 */
316int /* error (0 or EFSCORRUPTED) */ 149int /* error (0 or EFSCORRUPTED) */
150xfs_btree_check_lptr(
151 struct xfs_btree_cur *cur, /* btree cursor */
152 xfs_dfsbno_t bno, /* btree block disk address */
153 int level) /* btree block level */
154{
155 XFS_WANT_CORRUPTED_RETURN(
156 level > 0 &&
157 bno != NULLDFSBNO &&
158 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
159 return 0;
160}
161
162#ifdef DEBUG
163/*
164 * Check that (short) pointer is ok.
165 */
166STATIC int /* error (0 or EFSCORRUPTED) */
317xfs_btree_check_sptr( 167xfs_btree_check_sptr(
318 xfs_btree_cur_t *cur, /* btree cursor */ 168 struct xfs_btree_cur *cur, /* btree cursor */
319 xfs_agblock_t ptr, /* btree block disk address */ 169 xfs_agblock_t bno, /* btree block disk address */
320 int level) /* btree block level */ 170 int level) /* btree block level */
321{ 171{
322 xfs_buf_t *agbp; /* buffer for ag. freespace struct */ 172 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
323 xfs_agf_t *agf; /* ag. freespace structure */
324 173
325 agbp = cur->bc_private.a.agbp;
326 agf = XFS_BUF_TO_AGF(agbp);
327 XFS_WANT_CORRUPTED_RETURN( 174 XFS_WANT_CORRUPTED_RETURN(
328 level > 0 && 175 level > 0 &&
329 ptr != NULLAGBLOCK && ptr != 0 && 176 bno != NULLAGBLOCK &&
330 ptr < be32_to_cpu(agf->agf_length)); 177 bno != 0 &&
178 bno < agblocks);
331 return 0; 179 return 0;
332} 180}
333 181
334/* 182/*
183 * Check that block ptr is ok.
184 */
185STATIC int /* error (0 or EFSCORRUPTED) */
186xfs_btree_check_ptr(
187 struct xfs_btree_cur *cur, /* btree cursor */
188 union xfs_btree_ptr *ptr, /* btree block disk address */
189 int index, /* offset from ptr to check */
190 int level) /* btree block level */
191{
192 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
193 return xfs_btree_check_lptr(cur,
194 be64_to_cpu((&ptr->l)[index]), level);
195 } else {
196 return xfs_btree_check_sptr(cur,
197 be32_to_cpu((&ptr->s)[index]), level);
198 }
199}
200#endif
201
202/*
335 * Delete the btree cursor. 203 * Delete the btree cursor.
336 */ 204 */
337void 205void
@@ -387,16 +255,17 @@ xfs_btree_dup_cursor(
387 255
388 tp = cur->bc_tp; 256 tp = cur->bc_tp;
389 mp = cur->bc_mp; 257 mp = cur->bc_mp;
258
390 /* 259 /*
391 * Allocate a new cursor like the old one. 260 * Allocate a new cursor like the old one.
392 */ 261 */
393 new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp, 262 new = cur->bc_ops->dup_cursor(cur);
394 cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip, 263
395 cur->bc_private.b.whichfork);
396 /* 264 /*
397 * Copy the record currently in the cursor. 265 * Copy the record currently in the cursor.
398 */ 266 */
399 new->bc_rec = cur->bc_rec; 267 new->bc_rec = cur->bc_rec;
268
400 /* 269 /*
401 * For each level current, re-get the buffer and copy the ptr value. 270 * For each level current, re-get the buffer and copy the ptr value.
402 */ 271 */
@@ -416,46 +285,174 @@ xfs_btree_dup_cursor(
416 } else 285 } else
417 new->bc_bufs[i] = NULL; 286 new->bc_bufs[i] = NULL;
418 } 287 }
419 /*
420 * For bmap btrees, copy the firstblock, flist, and flags values,
421 * since init cursor doesn't get them.
422 */
423 if (new->bc_btnum == XFS_BTNUM_BMAP) {
424 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
425 new->bc_private.b.flist = cur->bc_private.b.flist;
426 new->bc_private.b.flags = cur->bc_private.b.flags;
427 }
428 *ncur = new; 288 *ncur = new;
429 return 0; 289 return 0;
430} 290}
431 291
432/* 292/*
293 * XFS btree block layout and addressing:
294 *
295 * There are two types of blocks in the btree: leaf and non-leaf blocks.
296 *
297 * The leaf record start with a header then followed by records containing
298 * the values. A non-leaf block also starts with the same header, and
299 * then first contains lookup keys followed by an equal number of pointers
300 * to the btree blocks at the previous level.
301 *
302 * +--------+-------+-------+-------+-------+-------+-------+
303 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
304 * +--------+-------+-------+-------+-------+-------+-------+
305 *
306 * +--------+-------+-------+-------+-------+-------+-------+
307 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
308 * +--------+-------+-------+-------+-------+-------+-------+
309 *
310 * The header is called struct xfs_btree_block for reasons better left unknown
311 * and comes in different versions for short (32bit) and long (64bit) block
312 * pointers. The record and key structures are defined by the btree instances
313 * and opaque to the btree core. The block pointers are simple disk endian
314 * integers, available in a short (32bit) and long (64bit) variant.
315 *
316 * The helpers below calculate the offset of a given record, key or pointer
317 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
318 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
319 * inside the btree block is done using indices starting at one, not zero!
320 */
321
322/*
323 * Return size of the btree block header for this btree instance.
324 */
325static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
326{
327 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
328 XFS_BTREE_LBLOCK_LEN :
329 XFS_BTREE_SBLOCK_LEN;
330}
331
332/*
333 * Return size of btree block pointers for this btree instance.
334 */
335static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
336{
337 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
338 sizeof(__be64) : sizeof(__be32);
339}
340
341/*
342 * Calculate offset of the n-th record in a btree block.
343 */
344STATIC size_t
345xfs_btree_rec_offset(
346 struct xfs_btree_cur *cur,
347 int n)
348{
349 return xfs_btree_block_len(cur) +
350 (n - 1) * cur->bc_ops->rec_len;
351}
352
353/*
354 * Calculate offset of the n-th key in a btree block.
355 */
356STATIC size_t
357xfs_btree_key_offset(
358 struct xfs_btree_cur *cur,
359 int n)
360{
361 return xfs_btree_block_len(cur) +
362 (n - 1) * cur->bc_ops->key_len;
363}
364
365/*
366 * Calculate offset of the n-th block pointer in a btree block.
367 */
368STATIC size_t
369xfs_btree_ptr_offset(
370 struct xfs_btree_cur *cur,
371 int n,
372 int level)
373{
374 return xfs_btree_block_len(cur) +
375 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
376 (n - 1) * xfs_btree_ptr_len(cur);
377}
378
379/*
380 * Return a pointer to the n-th record in the btree block.
381 */
382STATIC union xfs_btree_rec *
383xfs_btree_rec_addr(
384 struct xfs_btree_cur *cur,
385 int n,
386 struct xfs_btree_block *block)
387{
388 return (union xfs_btree_rec *)
389 ((char *)block + xfs_btree_rec_offset(cur, n));
390}
391
392/*
393 * Return a pointer to the n-th key in the btree block.
394 */
395STATIC union xfs_btree_key *
396xfs_btree_key_addr(
397 struct xfs_btree_cur *cur,
398 int n,
399 struct xfs_btree_block *block)
400{
401 return (union xfs_btree_key *)
402 ((char *)block + xfs_btree_key_offset(cur, n));
403}
404
405/*
406 * Return a pointer to the n-th block pointer in the btree block.
407 */
408STATIC union xfs_btree_ptr *
409xfs_btree_ptr_addr(
410 struct xfs_btree_cur *cur,
411 int n,
412 struct xfs_btree_block *block)
413{
414 int level = xfs_btree_get_level(block);
415
416 ASSERT(block->bb_level != 0);
417
418 return (union xfs_btree_ptr *)
419 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
420}
421
422/*
423 * Get a the root block which is stored in the inode.
424 *
425 * For now this btree implementation assumes the btree root is always
426 * stored in the if_broot field of an inode fork.
427 */
428STATIC struct xfs_btree_block *
429xfs_btree_get_iroot(
430 struct xfs_btree_cur *cur)
431{
432 struct xfs_ifork *ifp;
433
434 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
435 return (struct xfs_btree_block *)ifp->if_broot;
436}
437
438/*
433 * Retrieve the block pointer from the cursor at the given level. 439 * Retrieve the block pointer from the cursor at the given level.
434 * This may be a bmap btree root or from a buffer. 440 * This may be an inode btree root or from a buffer.
435 */ 441 */
436STATIC xfs_btree_block_t * /* generic btree block pointer */ 442STATIC struct xfs_btree_block * /* generic btree block pointer */
437xfs_btree_get_block( 443xfs_btree_get_block(
438 xfs_btree_cur_t *cur, /* btree cursor */ 444 struct xfs_btree_cur *cur, /* btree cursor */
439 int level, /* level in btree */ 445 int level, /* level in btree */
440 xfs_buf_t **bpp) /* buffer containing the block */ 446 struct xfs_buf **bpp) /* buffer containing the block */
441{ 447{
442 xfs_btree_block_t *block; /* return value */ 448 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
443 xfs_buf_t *bp; /* return buffer */ 449 (level == cur->bc_nlevels - 1)) {
444 xfs_ifork_t *ifp; /* inode fork pointer */ 450 *bpp = NULL;
445 int whichfork; /* data or attr fork */ 451 return xfs_btree_get_iroot(cur);
446
447 if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
448 whichfork = cur->bc_private.b.whichfork;
449 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
450 block = (xfs_btree_block_t *)ifp->if_broot;
451 bp = NULL;
452 } else {
453 bp = cur->bc_bufs[level];
454 block = XFS_BUF_TO_BLOCK(bp);
455 } 452 }
456 ASSERT(block != NULL); 453
457 *bpp = bp; 454 *bpp = cur->bc_bufs[level];
458 return block; 455 return XFS_BUF_TO_BLOCK(*bpp);
459} 456}
460 457
461/* 458/*
@@ -505,97 +502,6 @@ xfs_btree_get_bufs(
505} 502}
506 503
507/* 504/*
508 * Allocate a new btree cursor.
509 * The cursor is either for allocation (A) or bmap (B) or inodes (I).
510 */
511xfs_btree_cur_t * /* new btree cursor */
512xfs_btree_init_cursor(
513 xfs_mount_t *mp, /* file system mount point */
514 xfs_trans_t *tp, /* transaction pointer */
515 xfs_buf_t *agbp, /* (A only) buffer for agf structure */
516 /* (I only) buffer for agi structure */
517 xfs_agnumber_t agno, /* (AI only) allocation group number */
518 xfs_btnum_t btnum, /* btree identifier */
519 xfs_inode_t *ip, /* (B only) inode owning the btree */
520 int whichfork) /* (B only) data or attr fork */
521{
522 xfs_agf_t *agf; /* (A) allocation group freespace */
523 xfs_agi_t *agi; /* (I) allocation group inodespace */
524 xfs_btree_cur_t *cur; /* return value */
525 xfs_ifork_t *ifp; /* (I) inode fork pointer */
526 int nlevels=0; /* number of levels in the btree */
527
528 ASSERT(xfs_btree_cur_zone != NULL);
529 /*
530 * Allocate a new cursor.
531 */
532 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
533 /*
534 * Deduce the number of btree levels from the arguments.
535 */
536 switch (btnum) {
537 case XFS_BTNUM_BNO:
538 case XFS_BTNUM_CNT:
539 agf = XFS_BUF_TO_AGF(agbp);
540 nlevels = be32_to_cpu(agf->agf_levels[btnum]);
541 break;
542 case XFS_BTNUM_BMAP:
543 ifp = XFS_IFORK_PTR(ip, whichfork);
544 nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
545 break;
546 case XFS_BTNUM_INO:
547 agi = XFS_BUF_TO_AGI(agbp);
548 nlevels = be32_to_cpu(agi->agi_level);
549 break;
550 default:
551 ASSERT(0);
552 }
553 /*
554 * Fill in the common fields.
555 */
556 cur->bc_tp = tp;
557 cur->bc_mp = mp;
558 cur->bc_nlevels = nlevels;
559 cur->bc_btnum = btnum;
560 cur->bc_blocklog = mp->m_sb.sb_blocklog;
561 /*
562 * Fill in private fields.
563 */
564 switch (btnum) {
565 case XFS_BTNUM_BNO:
566 case XFS_BTNUM_CNT:
567 /*
568 * Allocation btree fields.
569 */
570 cur->bc_private.a.agbp = agbp;
571 cur->bc_private.a.agno = agno;
572 break;
573 case XFS_BTNUM_INO:
574 /*
575 * Inode allocation btree fields.
576 */
577 cur->bc_private.a.agbp = agbp;
578 cur->bc_private.a.agno = agno;
579 break;
580 case XFS_BTNUM_BMAP:
581 /*
582 * Bmap btree fields.
583 */
584 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
585 cur->bc_private.b.ip = ip;
586 cur->bc_private.b.firstblock = NULLFSBLOCK;
587 cur->bc_private.b.flist = NULL;
588 cur->bc_private.b.allocated = 0;
589 cur->bc_private.b.flags = 0;
590 cur->bc_private.b.whichfork = whichfork;
591 break;
592 default:
593 ASSERT(0);
594 }
595 return cur;
596}
597
598/*
599 * Check for the cursor referring to the last block at the given level. 505 * Check for the cursor referring to the last block at the given level.
600 */ 506 */
601int /* 1=is last block, 0=not last block */ 507int /* 1=is last block, 0=not last block */
@@ -603,12 +509,12 @@ xfs_btree_islastblock(
603 xfs_btree_cur_t *cur, /* btree cursor */ 509 xfs_btree_cur_t *cur, /* btree cursor */
604 int level) /* level to check */ 510 int level) /* level to check */
605{ 511{
606 xfs_btree_block_t *block; /* generic btree block pointer */ 512 struct xfs_btree_block *block; /* generic btree block pointer */
607 xfs_buf_t *bp; /* buffer containing block */ 513 xfs_buf_t *bp; /* buffer containing block */
608 514
609 block = xfs_btree_get_block(cur, level, &bp); 515 block = xfs_btree_get_block(cur, level, &bp);
610 xfs_btree_check_block(cur, block, level, bp); 516 xfs_btree_check_block(cur, block, level, bp);
611 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) 517 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
612 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; 518 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
613 else 519 else
614 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; 520 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
@@ -618,12 +524,12 @@ xfs_btree_islastblock(
618 * Change the cursor to point to the first record at the given level. 524 * Change the cursor to point to the first record at the given level.
619 * Other levels are unaffected. 525 * Other levels are unaffected.
620 */ 526 */
621int /* success=1, failure=0 */ 527STATIC int /* success=1, failure=0 */
622xfs_btree_firstrec( 528xfs_btree_firstrec(
623 xfs_btree_cur_t *cur, /* btree cursor */ 529 xfs_btree_cur_t *cur, /* btree cursor */
624 int level) /* level to change */ 530 int level) /* level to change */
625{ 531{
626 xfs_btree_block_t *block; /* generic btree block pointer */ 532 struct xfs_btree_block *block; /* generic btree block pointer */
627 xfs_buf_t *bp; /* buffer containing block */ 533 xfs_buf_t *bp; /* buffer containing block */
628 534
629 /* 535 /*
@@ -634,7 +540,7 @@ xfs_btree_firstrec(
634 /* 540 /*
635 * It's empty, there is no such record. 541 * It's empty, there is no such record.
636 */ 542 */
637 if (!block->bb_h.bb_numrecs) 543 if (!block->bb_numrecs)
638 return 0; 544 return 0;
639 /* 545 /*
640 * Set the ptr value to 1, that's the first record/key. 546 * Set the ptr value to 1, that's the first record/key.
@@ -647,12 +553,12 @@ xfs_btree_firstrec(
647 * Change the cursor to point to the last record in the current block 553 * Change the cursor to point to the last record in the current block
648 * at the given level. Other levels are unaffected. 554 * at the given level. Other levels are unaffected.
649 */ 555 */
650int /* success=1, failure=0 */ 556STATIC int /* success=1, failure=0 */
651xfs_btree_lastrec( 557xfs_btree_lastrec(
652 xfs_btree_cur_t *cur, /* btree cursor */ 558 xfs_btree_cur_t *cur, /* btree cursor */
653 int level) /* level to change */ 559 int level) /* level to change */
654{ 560{
655 xfs_btree_block_t *block; /* generic btree block pointer */ 561 struct xfs_btree_block *block; /* generic btree block pointer */
656 xfs_buf_t *bp; /* buffer containing block */ 562 xfs_buf_t *bp; /* buffer containing block */
657 563
658 /* 564 /*
@@ -663,12 +569,12 @@ xfs_btree_lastrec(
663 /* 569 /*
664 * It's empty, there is no such record. 570 * It's empty, there is no such record.
665 */ 571 */
666 if (!block->bb_h.bb_numrecs) 572 if (!block->bb_numrecs)
667 return 0; 573 return 0;
668 /* 574 /*
669 * Set the ptr value to numrecs, that's the last record/key. 575 * Set the ptr value to numrecs, that's the last record/key.
670 */ 576 */
671 cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs); 577 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
672 return 1; 578 return 1;
673} 579}
674 580
@@ -817,66 +723,84 @@ xfs_btree_reada_bufs(
817 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); 723 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
818} 724}
819 725
726STATIC int
727xfs_btree_readahead_lblock(
728 struct xfs_btree_cur *cur,
729 int lr,
730 struct xfs_btree_block *block)
731{
732 int rval = 0;
733 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
734 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
735
736 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
737 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
738 rval++;
739 }
740
741 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
742 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
743 rval++;
744 }
745
746 return rval;
747}
748
749STATIC int
750xfs_btree_readahead_sblock(
751 struct xfs_btree_cur *cur,
752 int lr,
753 struct xfs_btree_block *block)
754{
755 int rval = 0;
756 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
757 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
758
759
760 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
761 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
762 left, 1);
763 rval++;
764 }
765
766 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
767 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
768 right, 1);
769 rval++;
770 }
771
772 return rval;
773}
774
820/* 775/*
821 * Read-ahead btree blocks, at the given level. 776 * Read-ahead btree blocks, at the given level.
822 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 777 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
823 */ 778 */
824int 779STATIC int
825xfs_btree_readahead_core( 780xfs_btree_readahead(
826 xfs_btree_cur_t *cur, /* btree cursor */ 781 struct xfs_btree_cur *cur, /* btree cursor */
827 int lev, /* level in btree */ 782 int lev, /* level in btree */
828 int lr) /* left/right bits */ 783 int lr) /* left/right bits */
829{ 784{
830 xfs_alloc_block_t *a; 785 struct xfs_btree_block *block;
831 xfs_bmbt_block_t *b; 786
832 xfs_inobt_block_t *i; 787 /*
833 int rval = 0; 788 * No readahead needed if we are at the root level and the
789 * btree root is stored in the inode.
790 */
791 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
792 (lev == cur->bc_nlevels - 1))
793 return 0;
794
795 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
796 return 0;
834 797
835 ASSERT(cur->bc_bufs[lev] != NULL);
836 cur->bc_ra[lev] |= lr; 798 cur->bc_ra[lev] |= lr;
837 switch (cur->bc_btnum) { 799 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
838 case XFS_BTNUM_BNO: 800
839 case XFS_BTNUM_CNT: 801 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
840 a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); 802 return xfs_btree_readahead_lblock(cur, lr, block);
841 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) { 803 return xfs_btree_readahead_sblock(cur, lr, block);
842 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
843 be32_to_cpu(a->bb_leftsib), 1);
844 rval++;
845 }
846 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) {
847 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
848 be32_to_cpu(a->bb_rightsib), 1);
849 rval++;
850 }
851 break;
852 case XFS_BTNUM_BMAP:
853 b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
854 if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) {
855 xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1);
856 rval++;
857 }
858 if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) {
859 xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1);
860 rval++;
861 }
862 break;
863 case XFS_BTNUM_INO:
864 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
865 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
866 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
867 be32_to_cpu(i->bb_leftsib), 1);
868 rval++;
869 }
870 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
871 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
872 be32_to_cpu(i->bb_rightsib), 1);
873 rval++;
874 }
875 break;
876 default:
877 ASSERT(0);
878 }
879 return rval;
880} 804}
881 805
882/* 806/*
@@ -889,7 +813,7 @@ xfs_btree_setbuf(
889 int lev, /* level in btree */ 813 int lev, /* level in btree */
890 xfs_buf_t *bp) /* new buffer to set */ 814 xfs_buf_t *bp) /* new buffer to set */
891{ 815{
892 xfs_btree_block_t *b; /* btree block */ 816 struct xfs_btree_block *b; /* btree block */
893 xfs_buf_t *obp; /* old buffer pointer */ 817 xfs_buf_t *obp; /* old buffer pointer */
894 818
895 obp = cur->bc_bufs[lev]; 819 obp = cur->bc_bufs[lev];
@@ -900,7 +824,7 @@ xfs_btree_setbuf(
900 if (!bp) 824 if (!bp)
901 return; 825 return;
902 b = XFS_BUF_TO_BLOCK(bp); 826 b = XFS_BUF_TO_BLOCK(bp);
903 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) { 827 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
904 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) 828 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
905 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 829 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
906 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) 830 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
@@ -912,3 +836,2855 @@ xfs_btree_setbuf(
912 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 836 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
913 } 837 }
914} 838}
839
840STATIC int
841xfs_btree_ptr_is_null(
842 struct xfs_btree_cur *cur,
843 union xfs_btree_ptr *ptr)
844{
845 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
846 return be64_to_cpu(ptr->l) == NULLFSBLOCK;
847 else
848 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
849}
850
851STATIC void
852xfs_btree_set_ptr_null(
853 struct xfs_btree_cur *cur,
854 union xfs_btree_ptr *ptr)
855{
856 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
857 ptr->l = cpu_to_be64(NULLFSBLOCK);
858 else
859 ptr->s = cpu_to_be32(NULLAGBLOCK);
860}
861
862/*
863 * Get/set/init sibling pointers
864 */
865STATIC void
866xfs_btree_get_sibling(
867 struct xfs_btree_cur *cur,
868 struct xfs_btree_block *block,
869 union xfs_btree_ptr *ptr,
870 int lr)
871{
872 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
873
874 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
875 if (lr == XFS_BB_RIGHTSIB)
876 ptr->l = block->bb_u.l.bb_rightsib;
877 else
878 ptr->l = block->bb_u.l.bb_leftsib;
879 } else {
880 if (lr == XFS_BB_RIGHTSIB)
881 ptr->s = block->bb_u.s.bb_rightsib;
882 else
883 ptr->s = block->bb_u.s.bb_leftsib;
884 }
885}
886
887STATIC void
888xfs_btree_set_sibling(
889 struct xfs_btree_cur *cur,
890 struct xfs_btree_block *block,
891 union xfs_btree_ptr *ptr,
892 int lr)
893{
894 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
895
896 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
897 if (lr == XFS_BB_RIGHTSIB)
898 block->bb_u.l.bb_rightsib = ptr->l;
899 else
900 block->bb_u.l.bb_leftsib = ptr->l;
901 } else {
902 if (lr == XFS_BB_RIGHTSIB)
903 block->bb_u.s.bb_rightsib = ptr->s;
904 else
905 block->bb_u.s.bb_leftsib = ptr->s;
906 }
907}
908
909STATIC void
910xfs_btree_init_block(
911 struct xfs_btree_cur *cur,
912 int level,
913 int numrecs,
914 struct xfs_btree_block *new) /* new block */
915{
916 new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
917 new->bb_level = cpu_to_be16(level);
918 new->bb_numrecs = cpu_to_be16(numrecs);
919
920 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
921 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
922 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
923 } else {
924 new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
925 new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
926 }
927}
928
929/*
930 * Return true if ptr is the last record in the btree and
931 * we need to track updateѕ to this record. The decision
932 * will be further refined in the update_lastrec method.
933 */
934STATIC int
935xfs_btree_is_lastrec(
936 struct xfs_btree_cur *cur,
937 struct xfs_btree_block *block,
938 int level)
939{
940 union xfs_btree_ptr ptr;
941
942 if (level > 0)
943 return 0;
944 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
945 return 0;
946
947 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
948 if (!xfs_btree_ptr_is_null(cur, &ptr))
949 return 0;
950 return 1;
951}
952
953STATIC void
954xfs_btree_buf_to_ptr(
955 struct xfs_btree_cur *cur,
956 struct xfs_buf *bp,
957 union xfs_btree_ptr *ptr)
958{
959 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
960 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
961 XFS_BUF_ADDR(bp)));
962 else {
963 ptr->s = cpu_to_be32(XFS_DADDR_TO_AGBNO(cur->bc_mp,
964 XFS_BUF_ADDR(bp)));
965 }
966}
967
968STATIC xfs_daddr_t
969xfs_btree_ptr_to_daddr(
970 struct xfs_btree_cur *cur,
971 union xfs_btree_ptr *ptr)
972{
973 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
974 ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
975
976 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
977 } else {
978 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
979 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
980
981 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
982 be32_to_cpu(ptr->s));
983 }
984}
985
986STATIC void
987xfs_btree_set_refs(
988 struct xfs_btree_cur *cur,
989 struct xfs_buf *bp)
990{
991 switch (cur->bc_btnum) {
992 case XFS_BTNUM_BNO:
993 case XFS_BTNUM_CNT:
994 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
995 break;
996 case XFS_BTNUM_INO:
997 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
998 break;
999 case XFS_BTNUM_BMAP:
1000 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1001 break;
1002 default:
1003 ASSERT(0);
1004 }
1005}
1006
1007STATIC int
1008xfs_btree_get_buf_block(
1009 struct xfs_btree_cur *cur,
1010 union xfs_btree_ptr *ptr,
1011 int flags,
1012 struct xfs_btree_block **block,
1013 struct xfs_buf **bpp)
1014{
1015 struct xfs_mount *mp = cur->bc_mp;
1016 xfs_daddr_t d;
1017
1018 /* need to sort out how callers deal with failures first */
1019 ASSERT(!(flags & XFS_BUF_TRYLOCK));
1020
1021 d = xfs_btree_ptr_to_daddr(cur, ptr);
1022 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1023 mp->m_bsize, flags);
1024
1025 ASSERT(*bpp);
1026 ASSERT(!XFS_BUF_GETERROR(*bpp));
1027
1028 *block = XFS_BUF_TO_BLOCK(*bpp);
1029 return 0;
1030}
1031
1032/*
1033 * Read in the buffer at the given ptr and return the buffer and
1034 * the block pointer within the buffer.
1035 */
1036STATIC int
1037xfs_btree_read_buf_block(
1038 struct xfs_btree_cur *cur,
1039 union xfs_btree_ptr *ptr,
1040 int level,
1041 int flags,
1042 struct xfs_btree_block **block,
1043 struct xfs_buf **bpp)
1044{
1045 struct xfs_mount *mp = cur->bc_mp;
1046 xfs_daddr_t d;
1047 int error;
1048
1049 /* need to sort out how callers deal with failures first */
1050 ASSERT(!(flags & XFS_BUF_TRYLOCK));
1051
1052 d = xfs_btree_ptr_to_daddr(cur, ptr);
1053 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1054 mp->m_bsize, flags, bpp);
1055 if (error)
1056 return error;
1057
1058 ASSERT(*bpp != NULL);
1059 ASSERT(!XFS_BUF_GETERROR(*bpp));
1060
1061 xfs_btree_set_refs(cur, *bpp);
1062 *block = XFS_BUF_TO_BLOCK(*bpp);
1063
1064 error = xfs_btree_check_block(cur, *block, level, *bpp);
1065 if (error)
1066 xfs_trans_brelse(cur->bc_tp, *bpp);
1067 return error;
1068}
1069
1070/*
1071 * Copy keys from one btree block to another.
1072 */
1073STATIC void
1074xfs_btree_copy_keys(
1075 struct xfs_btree_cur *cur,
1076 union xfs_btree_key *dst_key,
1077 union xfs_btree_key *src_key,
1078 int numkeys)
1079{
1080 ASSERT(numkeys >= 0);
1081 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1082}
1083
1084/*
1085 * Copy records from one btree block to another.
1086 */
1087STATIC void
1088xfs_btree_copy_recs(
1089 struct xfs_btree_cur *cur,
1090 union xfs_btree_rec *dst_rec,
1091 union xfs_btree_rec *src_rec,
1092 int numrecs)
1093{
1094 ASSERT(numrecs >= 0);
1095 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1096}
1097
1098/*
1099 * Copy block pointers from one btree block to another.
1100 */
1101STATIC void
1102xfs_btree_copy_ptrs(
1103 struct xfs_btree_cur *cur,
1104 union xfs_btree_ptr *dst_ptr,
1105 union xfs_btree_ptr *src_ptr,
1106 int numptrs)
1107{
1108 ASSERT(numptrs >= 0);
1109 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1110}
1111
1112/*
1113 * Shift keys one index left/right inside a single btree block.
1114 */
1115STATIC void
1116xfs_btree_shift_keys(
1117 struct xfs_btree_cur *cur,
1118 union xfs_btree_key *key,
1119 int dir,
1120 int numkeys)
1121{
1122 char *dst_key;
1123
1124 ASSERT(numkeys >= 0);
1125 ASSERT(dir == 1 || dir == -1);
1126
1127 dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1128 memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1129}
1130
1131/*
1132 * Shift records one index left/right inside a single btree block.
1133 */
1134STATIC void
1135xfs_btree_shift_recs(
1136 struct xfs_btree_cur *cur,
1137 union xfs_btree_rec *rec,
1138 int dir,
1139 int numrecs)
1140{
1141 char *dst_rec;
1142
1143 ASSERT(numrecs >= 0);
1144 ASSERT(dir == 1 || dir == -1);
1145
1146 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1147 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1148}
1149
1150/*
1151 * Shift block pointers one index left/right inside a single btree block.
1152 */
1153STATIC void
1154xfs_btree_shift_ptrs(
1155 struct xfs_btree_cur *cur,
1156 union xfs_btree_ptr *ptr,
1157 int dir,
1158 int numptrs)
1159{
1160 char *dst_ptr;
1161
1162 ASSERT(numptrs >= 0);
1163 ASSERT(dir == 1 || dir == -1);
1164
1165 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1166 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1167}
1168
1169/*
1170 * Log key values from the btree block.
1171 */
1172STATIC void
1173xfs_btree_log_keys(
1174 struct xfs_btree_cur *cur,
1175 struct xfs_buf *bp,
1176 int first,
1177 int last)
1178{
1179 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1180 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1181
1182 if (bp) {
1183 xfs_trans_log_buf(cur->bc_tp, bp,
1184 xfs_btree_key_offset(cur, first),
1185 xfs_btree_key_offset(cur, last + 1) - 1);
1186 } else {
1187 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1188 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1189 }
1190
1191 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1192}
1193
1194/*
1195 * Log record values from the btree block.
1196 */
1197void
1198xfs_btree_log_recs(
1199 struct xfs_btree_cur *cur,
1200 struct xfs_buf *bp,
1201 int first,
1202 int last)
1203{
1204 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1205 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1206
1207 xfs_trans_log_buf(cur->bc_tp, bp,
1208 xfs_btree_rec_offset(cur, first),
1209 xfs_btree_rec_offset(cur, last + 1) - 1);
1210
1211 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1212}
1213
1214/*
1215 * Log block pointer fields from a btree block (nonleaf).
1216 */
1217STATIC void
1218xfs_btree_log_ptrs(
1219 struct xfs_btree_cur *cur, /* btree cursor */
1220 struct xfs_buf *bp, /* buffer containing btree block */
1221 int first, /* index of first pointer to log */
1222 int last) /* index of last pointer to log */
1223{
1224 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1225 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1226
1227 if (bp) {
1228 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1229 int level = xfs_btree_get_level(block);
1230
1231 xfs_trans_log_buf(cur->bc_tp, bp,
1232 xfs_btree_ptr_offset(cur, first, level),
1233 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1234 } else {
1235 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1236 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1237 }
1238
1239 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1240}
1241
1242/*
1243 * Log fields from a btree block header.
1244 */
1245void
1246xfs_btree_log_block(
1247 struct xfs_btree_cur *cur, /* btree cursor */
1248 struct xfs_buf *bp, /* buffer containing btree block */
1249 int fields) /* mask of fields: XFS_BB_... */
1250{
1251 int first; /* first byte offset logged */
1252 int last; /* last byte offset logged */
1253 static const short soffsets[] = { /* table of offsets (short) */
1254 offsetof(struct xfs_btree_block, bb_magic),
1255 offsetof(struct xfs_btree_block, bb_level),
1256 offsetof(struct xfs_btree_block, bb_numrecs),
1257 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1258 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1259 XFS_BTREE_SBLOCK_LEN
1260 };
1261 static const short loffsets[] = { /* table of offsets (long) */
1262 offsetof(struct xfs_btree_block, bb_magic),
1263 offsetof(struct xfs_btree_block, bb_level),
1264 offsetof(struct xfs_btree_block, bb_numrecs),
1265 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1266 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1267 XFS_BTREE_LBLOCK_LEN
1268 };
1269
1270 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1271 XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1272
1273 if (bp) {
1274 xfs_btree_offsets(fields,
1275 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1276 loffsets : soffsets,
1277 XFS_BB_NUM_BITS, &first, &last);
1278 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1279 } else {
1280 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1281 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1282 }
1283
1284 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1285}
1286
1287/*
1288 * Increment cursor by one record at the level.
1289 * For nonzero levels the leaf-ward information is untouched.
1290 */
1291int /* error */
1292xfs_btree_increment(
1293 struct xfs_btree_cur *cur,
1294 int level,
1295 int *stat) /* success/failure */
1296{
1297 struct xfs_btree_block *block;
1298 union xfs_btree_ptr ptr;
1299 struct xfs_buf *bp;
1300 int error; /* error return value */
1301 int lev;
1302
1303 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1304 XFS_BTREE_TRACE_ARGI(cur, level);
1305
1306 ASSERT(level < cur->bc_nlevels);
1307
1308 /* Read-ahead to the right at this level. */
1309 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1310
1311 /* Get a pointer to the btree block. */
1312 block = xfs_btree_get_block(cur, level, &bp);
1313
1314#ifdef DEBUG
1315 error = xfs_btree_check_block(cur, block, level, bp);
1316 if (error)
1317 goto error0;
1318#endif
1319
1320 /* We're done if we remain in the block after the increment. */
1321 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1322 goto out1;
1323
1324 /* Fail if we just went off the right edge of the tree. */
1325 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1326 if (xfs_btree_ptr_is_null(cur, &ptr))
1327 goto out0;
1328
1329 XFS_BTREE_STATS_INC(cur, increment);
1330
1331 /*
1332 * March up the tree incrementing pointers.
1333 * Stop when we don't go off the right edge of a block.
1334 */
1335 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1336 block = xfs_btree_get_block(cur, lev, &bp);
1337
1338#ifdef DEBUG
1339 error = xfs_btree_check_block(cur, block, lev, bp);
1340 if (error)
1341 goto error0;
1342#endif
1343
1344 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1345 break;
1346
1347 /* Read-ahead the right block for the next loop. */
1348 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1349 }
1350
1351 /*
1352 * If we went off the root then we are either seriously
1353 * confused or have the tree root in an inode.
1354 */
1355 if (lev == cur->bc_nlevels) {
1356 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1357 goto out0;
1358 ASSERT(0);
1359 error = EFSCORRUPTED;
1360 goto error0;
1361 }
1362 ASSERT(lev < cur->bc_nlevels);
1363
1364 /*
1365 * Now walk back down the tree, fixing up the cursor's buffer
1366 * pointers and key numbers.
1367 */
1368 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1369 union xfs_btree_ptr *ptrp;
1370
1371 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1372 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1373 0, &block, &bp);
1374 if (error)
1375 goto error0;
1376
1377 xfs_btree_setbuf(cur, lev, bp);
1378 cur->bc_ptrs[lev] = 1;
1379 }
1380out1:
1381 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1382 *stat = 1;
1383 return 0;
1384
1385out0:
1386 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1387 *stat = 0;
1388 return 0;
1389
1390error0:
1391 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1392 return error;
1393}
1394
1395/*
1396 * Decrement cursor by one record at the level.
1397 * For nonzero levels the leaf-ward information is untouched.
1398 */
1399int /* error */
1400xfs_btree_decrement(
1401 struct xfs_btree_cur *cur,
1402 int level,
1403 int *stat) /* success/failure */
1404{
1405 struct xfs_btree_block *block;
1406 xfs_buf_t *bp;
1407 int error; /* error return value */
1408 int lev;
1409 union xfs_btree_ptr ptr;
1410
1411 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1412 XFS_BTREE_TRACE_ARGI(cur, level);
1413
1414 ASSERT(level < cur->bc_nlevels);
1415
1416 /* Read-ahead to the left at this level. */
1417 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1418
1419 /* We're done if we remain in the block after the decrement. */
1420 if (--cur->bc_ptrs[level] > 0)
1421 goto out1;
1422
1423 /* Get a pointer to the btree block. */
1424 block = xfs_btree_get_block(cur, level, &bp);
1425
1426#ifdef DEBUG
1427 error = xfs_btree_check_block(cur, block, level, bp);
1428 if (error)
1429 goto error0;
1430#endif
1431
1432 /* Fail if we just went off the left edge of the tree. */
1433 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1434 if (xfs_btree_ptr_is_null(cur, &ptr))
1435 goto out0;
1436
1437 XFS_BTREE_STATS_INC(cur, decrement);
1438
1439 /*
1440 * March up the tree decrementing pointers.
1441 * Stop when we don't go off the left edge of a block.
1442 */
1443 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1444 if (--cur->bc_ptrs[lev] > 0)
1445 break;
1446 /* Read-ahead the left block for the next loop. */
1447 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1448 }
1449
1450 /*
1451 * If we went off the root then we are seriously confused.
1452 * or the root of the tree is in an inode.
1453 */
1454 if (lev == cur->bc_nlevels) {
1455 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1456 goto out0;
1457 ASSERT(0);
1458 error = EFSCORRUPTED;
1459 goto error0;
1460 }
1461 ASSERT(lev < cur->bc_nlevels);
1462
1463 /*
1464 * Now walk back down the tree, fixing up the cursor's buffer
1465 * pointers and key numbers.
1466 */
1467 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1468 union xfs_btree_ptr *ptrp;
1469
1470 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1471 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1472 0, &block, &bp);
1473 if (error)
1474 goto error0;
1475 xfs_btree_setbuf(cur, lev, bp);
1476 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1477 }
1478out1:
1479 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1480 *stat = 1;
1481 return 0;
1482
1483out0:
1484 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1485 *stat = 0;
1486 return 0;
1487
1488error0:
1489 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1490 return error;
1491}
1492
1493STATIC int
1494xfs_btree_lookup_get_block(
1495 struct xfs_btree_cur *cur, /* btree cursor */
1496 int level, /* level in the btree */
1497 union xfs_btree_ptr *pp, /* ptr to btree block */
1498 struct xfs_btree_block **blkp) /* return btree block */
1499{
1500 struct xfs_buf *bp; /* buffer pointer for btree block */
1501 int error = 0;
1502
1503 /* special case the root block if in an inode */
1504 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1505 (level == cur->bc_nlevels - 1)) {
1506 *blkp = xfs_btree_get_iroot(cur);
1507 return 0;
1508 }
1509
1510 /*
1511 * If the old buffer at this level for the disk address we are
1512 * looking for re-use it.
1513 *
1514 * Otherwise throw it away and get a new one.
1515 */
1516 bp = cur->bc_bufs[level];
1517 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1518 *blkp = XFS_BUF_TO_BLOCK(bp);
1519 return 0;
1520 }
1521
1522 error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1523 if (error)
1524 return error;
1525
1526 xfs_btree_setbuf(cur, level, bp);
1527 return 0;
1528}
1529
1530/*
1531 * Get current search key. For level 0 we don't actually have a key
1532 * structure so we make one up from the record. For all other levels
1533 * we just return the right key.
1534 */
1535STATIC union xfs_btree_key *
1536xfs_lookup_get_search_key(
1537 struct xfs_btree_cur *cur,
1538 int level,
1539 int keyno,
1540 struct xfs_btree_block *block,
1541 union xfs_btree_key *kp)
1542{
1543 if (level == 0) {
1544 cur->bc_ops->init_key_from_rec(kp,
1545 xfs_btree_rec_addr(cur, keyno, block));
1546 return kp;
1547 }
1548
1549 return xfs_btree_key_addr(cur, keyno, block);
1550}
1551
1552/*
1553 * Lookup the record. The cursor is made to point to it, based on dir.
1554 * Return 0 if can't find any such record, 1 for success.
1555 */
1556int /* error */
1557xfs_btree_lookup(
1558 struct xfs_btree_cur *cur, /* btree cursor */
1559 xfs_lookup_t dir, /* <=, ==, or >= */
1560 int *stat) /* success/failure */
1561{
1562 struct xfs_btree_block *block; /* current btree block */
1563 __int64_t diff; /* difference for the current key */
1564 int error; /* error return value */
1565 int keyno; /* current key number */
1566 int level; /* level in the btree */
1567 union xfs_btree_ptr *pp; /* ptr to btree block */
1568 union xfs_btree_ptr ptr; /* ptr to btree block */
1569
1570 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1571 XFS_BTREE_TRACE_ARGI(cur, dir);
1572
1573 XFS_BTREE_STATS_INC(cur, lookup);
1574
1575 block = NULL;
1576 keyno = 0;
1577
1578 /* initialise start pointer from cursor */
1579 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1580 pp = &ptr;
1581
1582 /*
1583 * Iterate over each level in the btree, starting at the root.
1584 * For each level above the leaves, find the key we need, based
1585 * on the lookup record, then follow the corresponding block
1586 * pointer down to the next level.
1587 */
1588 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1589 /* Get the block we need to do the lookup on. */
1590 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1591 if (error)
1592 goto error0;
1593
1594 if (diff == 0) {
1595 /*
1596 * If we already had a key match at a higher level, we
1597 * know we need to use the first entry in this block.
1598 */
1599 keyno = 1;
1600 } else {
1601 /* Otherwise search this block. Do a binary search. */
1602
1603 int high; /* high entry number */
1604 int low; /* low entry number */
1605
1606 /* Set low and high entry numbers, 1-based. */
1607 low = 1;
1608 high = xfs_btree_get_numrecs(block);
1609 if (!high) {
1610 /* Block is empty, must be an empty leaf. */
1611 ASSERT(level == 0 && cur->bc_nlevels == 1);
1612
1613 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1614 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1615 *stat = 0;
1616 return 0;
1617 }
1618
1619 /* Binary search the block. */
1620 while (low <= high) {
1621 union xfs_btree_key key;
1622 union xfs_btree_key *kp;
1623
1624 XFS_BTREE_STATS_INC(cur, compare);
1625
1626 /* keyno is average of low and high. */
1627 keyno = (low + high) >> 1;
1628
1629 /* Get current search key */
1630 kp = xfs_lookup_get_search_key(cur, level,
1631 keyno, block, &key);
1632
1633 /*
1634 * Compute difference to get next direction:
1635 * - less than, move right
1636 * - greater than, move left
1637 * - equal, we're done
1638 */
1639 diff = cur->bc_ops->key_diff(cur, kp);
1640 if (diff < 0)
1641 low = keyno + 1;
1642 else if (diff > 0)
1643 high = keyno - 1;
1644 else
1645 break;
1646 }
1647 }
1648
1649 /*
1650 * If there are more levels, set up for the next level
1651 * by getting the block number and filling in the cursor.
1652 */
1653 if (level > 0) {
1654 /*
1655 * If we moved left, need the previous key number,
1656 * unless there isn't one.
1657 */
1658 if (diff > 0 && --keyno < 1)
1659 keyno = 1;
1660 pp = xfs_btree_ptr_addr(cur, keyno, block);
1661
1662#ifdef DEBUG
1663 error = xfs_btree_check_ptr(cur, pp, 0, level);
1664 if (error)
1665 goto error0;
1666#endif
1667 cur->bc_ptrs[level] = keyno;
1668 }
1669 }
1670
1671 /* Done with the search. See if we need to adjust the results. */
1672 if (dir != XFS_LOOKUP_LE && diff < 0) {
1673 keyno++;
1674 /*
1675 * If ge search and we went off the end of the block, but it's
1676 * not the last block, we're in the wrong block.
1677 */
1678 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1679 if (dir == XFS_LOOKUP_GE &&
1680 keyno > xfs_btree_get_numrecs(block) &&
1681 !xfs_btree_ptr_is_null(cur, &ptr)) {
1682 int i;
1683
1684 cur->bc_ptrs[0] = keyno;
1685 error = xfs_btree_increment(cur, 0, &i);
1686 if (error)
1687 goto error0;
1688 XFS_WANT_CORRUPTED_RETURN(i == 1);
1689 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1690 *stat = 1;
1691 return 0;
1692 }
1693 } else if (dir == XFS_LOOKUP_LE && diff > 0)
1694 keyno--;
1695 cur->bc_ptrs[0] = keyno;
1696
1697 /* Return if we succeeded or not. */
1698 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1699 *stat = 0;
1700 else if (dir != XFS_LOOKUP_EQ || diff == 0)
1701 *stat = 1;
1702 else
1703 *stat = 0;
1704 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1705 return 0;
1706
1707error0:
1708 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1709 return error;
1710}
1711
1712/*
1713 * Update keys at all levels from here to the root along the cursor's path.
1714 */
1715STATIC int
1716xfs_btree_updkey(
1717 struct xfs_btree_cur *cur,
1718 union xfs_btree_key *keyp,
1719 int level)
1720{
1721 struct xfs_btree_block *block;
1722 struct xfs_buf *bp;
1723 union xfs_btree_key *kp;
1724 int ptr;
1725
1726 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1727 XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1728
1729 ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1730
1731 /*
1732 * Go up the tree from this level toward the root.
1733 * At each level, update the key value to the value input.
1734 * Stop when we reach a level where the cursor isn't pointing
1735 * at the first entry in the block.
1736 */
1737 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1738#ifdef DEBUG
1739 int error;
1740#endif
1741 block = xfs_btree_get_block(cur, level, &bp);
1742#ifdef DEBUG
1743 error = xfs_btree_check_block(cur, block, level, bp);
1744 if (error) {
1745 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1746 return error;
1747 }
1748#endif
1749 ptr = cur->bc_ptrs[level];
1750 kp = xfs_btree_key_addr(cur, ptr, block);
1751 xfs_btree_copy_keys(cur, kp, keyp, 1);
1752 xfs_btree_log_keys(cur, bp, ptr, ptr);
1753 }
1754
1755 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1756 return 0;
1757}
1758
1759/*
1760 * Update the record referred to by cur to the value in the
1761 * given record. This either works (return 0) or gets an
1762 * EFSCORRUPTED error.
1763 */
1764int
1765xfs_btree_update(
1766 struct xfs_btree_cur *cur,
1767 union xfs_btree_rec *rec)
1768{
1769 struct xfs_btree_block *block;
1770 struct xfs_buf *bp;
1771 int error;
1772 int ptr;
1773 union xfs_btree_rec *rp;
1774
1775 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1776 XFS_BTREE_TRACE_ARGR(cur, rec);
1777
1778 /* Pick up the current block. */
1779 block = xfs_btree_get_block(cur, 0, &bp);
1780
1781#ifdef DEBUG
1782 error = xfs_btree_check_block(cur, block, 0, bp);
1783 if (error)
1784 goto error0;
1785#endif
1786 /* Get the address of the rec to be updated. */
1787 ptr = cur->bc_ptrs[0];
1788 rp = xfs_btree_rec_addr(cur, ptr, block);
1789
1790 /* Fill in the new contents and log them. */
1791 xfs_btree_copy_recs(cur, rp, rec, 1);
1792 xfs_btree_log_recs(cur, bp, ptr, ptr);
1793
1794 /*
1795 * If we are tracking the last record in the tree and
1796 * we are at the far right edge of the tree, update it.
1797 */
1798 if (xfs_btree_is_lastrec(cur, block, 0)) {
1799 cur->bc_ops->update_lastrec(cur, block, rec,
1800 ptr, LASTREC_UPDATE);
1801 }
1802
1803 /* Updating first rec in leaf. Pass new key value up to our parent. */
1804 if (ptr == 1) {
1805 union xfs_btree_key key;
1806
1807 cur->bc_ops->init_key_from_rec(&key, rec);
1808 error = xfs_btree_updkey(cur, &key, 1);
1809 if (error)
1810 goto error0;
1811 }
1812
1813 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1814 return 0;
1815
1816error0:
1817 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1818 return error;
1819}
1820
1821/*
1822 * Move 1 record left from cur/level if possible.
1823 * Update cur to reflect the new path.
1824 */
1825STATIC int /* error */
1826xfs_btree_lshift(
1827 struct xfs_btree_cur *cur,
1828 int level,
1829 int *stat) /* success/failure */
1830{
1831 union xfs_btree_key key; /* btree key */
1832 struct xfs_buf *lbp; /* left buffer pointer */
1833 struct xfs_btree_block *left; /* left btree block */
1834 int lrecs; /* left record count */
1835 struct xfs_buf *rbp; /* right buffer pointer */
1836 struct xfs_btree_block *right; /* right btree block */
1837 int rrecs; /* right record count */
1838 union xfs_btree_ptr lptr; /* left btree pointer */
1839 union xfs_btree_key *rkp = NULL; /* right btree key */
1840 union xfs_btree_ptr *rpp = NULL; /* right address pointer */
1841 union xfs_btree_rec *rrp = NULL; /* right record pointer */
1842 int error; /* error return value */
1843
1844 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1845 XFS_BTREE_TRACE_ARGI(cur, level);
1846
1847 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1848 level == cur->bc_nlevels - 1)
1849 goto out0;
1850
1851 /* Set up variables for this block as "right". */
1852 right = xfs_btree_get_block(cur, level, &rbp);
1853
1854#ifdef DEBUG
1855 error = xfs_btree_check_block(cur, right, level, rbp);
1856 if (error)
1857 goto error0;
1858#endif
1859
1860 /* If we've got no left sibling then we can't shift an entry left. */
1861 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1862 if (xfs_btree_ptr_is_null(cur, &lptr))
1863 goto out0;
1864
1865 /*
1866 * If the cursor entry is the one that would be moved, don't
1867 * do it... it's too complicated.
1868 */
1869 if (cur->bc_ptrs[level] <= 1)
1870 goto out0;
1871
1872 /* Set up the left neighbor as "left". */
1873 error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1874 if (error)
1875 goto error0;
1876
1877 /* If it's full, it can't take another entry. */
1878 lrecs = xfs_btree_get_numrecs(left);
1879 if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1880 goto out0;
1881
1882 rrecs = xfs_btree_get_numrecs(right);
1883
1884 /*
1885 * We add one entry to the left side and remove one for the right side.
1886 * Accout for it here, the changes will be updated on disk and logged
1887 * later.
1888 */
1889 lrecs++;
1890 rrecs--;
1891
1892 XFS_BTREE_STATS_INC(cur, lshift);
1893 XFS_BTREE_STATS_ADD(cur, moves, 1);
1894
1895 /*
1896 * If non-leaf, copy a key and a ptr to the left block.
1897 * Log the changes to the left block.
1898 */
1899 if (level > 0) {
1900 /* It's a non-leaf. Move keys and pointers. */
1901 union xfs_btree_key *lkp; /* left btree key */
1902 union xfs_btree_ptr *lpp; /* left address pointer */
1903
1904 lkp = xfs_btree_key_addr(cur, lrecs, left);
1905 rkp = xfs_btree_key_addr(cur, 1, right);
1906
1907 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1908 rpp = xfs_btree_ptr_addr(cur, 1, right);
1909#ifdef DEBUG
1910 error = xfs_btree_check_ptr(cur, rpp, 0, level);
1911 if (error)
1912 goto error0;
1913#endif
1914 xfs_btree_copy_keys(cur, lkp, rkp, 1);
1915 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1916
1917 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1918 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1919
1920 ASSERT(cur->bc_ops->keys_inorder(cur,
1921 xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1922 } else {
1923 /* It's a leaf. Move records. */
1924 union xfs_btree_rec *lrp; /* left record pointer */
1925
1926 lrp = xfs_btree_rec_addr(cur, lrecs, left);
1927 rrp = xfs_btree_rec_addr(cur, 1, right);
1928
1929 xfs_btree_copy_recs(cur, lrp, rrp, 1);
1930 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1931
1932 ASSERT(cur->bc_ops->recs_inorder(cur,
1933 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1934 }
1935
1936 xfs_btree_set_numrecs(left, lrecs);
1937 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
1938
1939 xfs_btree_set_numrecs(right, rrecs);
1940 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
1941
1942 /*
1943 * Slide the contents of right down one entry.
1944 */
1945 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1946 if (level > 0) {
1947 /* It's a nonleaf. operate on keys and ptrs */
1948#ifdef DEBUG
1949 int i; /* loop index */
1950
1951 for (i = 0; i < rrecs; i++) {
1952 error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1953 if (error)
1954 goto error0;
1955 }
1956#endif
1957 xfs_btree_shift_keys(cur,
1958 xfs_btree_key_addr(cur, 2, right),
1959 -1, rrecs);
1960 xfs_btree_shift_ptrs(cur,
1961 xfs_btree_ptr_addr(cur, 2, right),
1962 -1, rrecs);
1963
1964 xfs_btree_log_keys(cur, rbp, 1, rrecs);
1965 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1966 } else {
1967 /* It's a leaf. operate on records */
1968 xfs_btree_shift_recs(cur,
1969 xfs_btree_rec_addr(cur, 2, right),
1970 -1, rrecs);
1971 xfs_btree_log_recs(cur, rbp, 1, rrecs);
1972
1973 /*
1974 * If it's the first record in the block, we'll need a key
1975 * structure to pass up to the next level (updkey).
1976 */
1977 cur->bc_ops->init_key_from_rec(&key,
1978 xfs_btree_rec_addr(cur, 1, right));
1979 rkp = &key;
1980 }
1981
1982 /* Update the parent key values of right. */
1983 error = xfs_btree_updkey(cur, rkp, level + 1);
1984 if (error)
1985 goto error0;
1986
1987 /* Slide the cursor value left one. */
1988 cur->bc_ptrs[level]--;
1989
1990 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1991 *stat = 1;
1992 return 0;
1993
1994out0:
1995 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1996 *stat = 0;
1997 return 0;
1998
1999error0:
2000 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2001 return error;
2002}
2003
2004/*
2005 * Move 1 record right from cur/level if possible.
2006 * Update cur to reflect the new path.
2007 */
2008STATIC int /* error */
2009xfs_btree_rshift(
2010 struct xfs_btree_cur *cur,
2011 int level,
2012 int *stat) /* success/failure */
2013{
2014 union xfs_btree_key key; /* btree key */
2015 struct xfs_buf *lbp; /* left buffer pointer */
2016 struct xfs_btree_block *left; /* left btree block */
2017 struct xfs_buf *rbp; /* right buffer pointer */
2018 struct xfs_btree_block *right; /* right btree block */
2019 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2020 union xfs_btree_ptr rptr; /* right block pointer */
2021 union xfs_btree_key *rkp; /* right btree key */
2022 int rrecs; /* right record count */
2023 int lrecs; /* left record count */
2024 int error; /* error return value */
2025 int i; /* loop counter */
2026
2027 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2028 XFS_BTREE_TRACE_ARGI(cur, level);
2029
2030 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2031 (level == cur->bc_nlevels - 1))
2032 goto out0;
2033
2034 /* Set up variables for this block as "left". */
2035 left = xfs_btree_get_block(cur, level, &lbp);
2036
2037#ifdef DEBUG
2038 error = xfs_btree_check_block(cur, left, level, lbp);
2039 if (error)
2040 goto error0;
2041#endif
2042
2043 /* If we've got no right sibling then we can't shift an entry right. */
2044 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2045 if (xfs_btree_ptr_is_null(cur, &rptr))
2046 goto out0;
2047
2048 /*
2049 * If the cursor entry is the one that would be moved, don't
2050 * do it... it's too complicated.
2051 */
2052 lrecs = xfs_btree_get_numrecs(left);
2053 if (cur->bc_ptrs[level] >= lrecs)
2054 goto out0;
2055
2056 /* Set up the right neighbor as "right". */
2057 error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2058 if (error)
2059 goto error0;
2060
2061 /* If it's full, it can't take another entry. */
2062 rrecs = xfs_btree_get_numrecs(right);
2063 if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2064 goto out0;
2065
2066 XFS_BTREE_STATS_INC(cur, rshift);
2067 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2068
2069 /*
2070 * Make a hole at the start of the right neighbor block, then
2071 * copy the last left block entry to the hole.
2072 */
2073 if (level > 0) {
2074 /* It's a nonleaf. make a hole in the keys and ptrs */
2075 union xfs_btree_key *lkp;
2076 union xfs_btree_ptr *lpp;
2077 union xfs_btree_ptr *rpp;
2078
2079 lkp = xfs_btree_key_addr(cur, lrecs, left);
2080 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2081 rkp = xfs_btree_key_addr(cur, 1, right);
2082 rpp = xfs_btree_ptr_addr(cur, 1, right);
2083
2084#ifdef DEBUG
2085 for (i = rrecs - 1; i >= 0; i--) {
2086 error = xfs_btree_check_ptr(cur, rpp, i, level);
2087 if (error)
2088 goto error0;
2089 }
2090#endif
2091
2092 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2093 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2094
2095#ifdef DEBUG
2096 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2097 if (error)
2098 goto error0;
2099#endif
2100
2101 /* Now put the new data in, and log it. */
2102 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2103 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2104
2105 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2106 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2107
2108 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2109 xfs_btree_key_addr(cur, 2, right)));
2110 } else {
2111 /* It's a leaf. make a hole in the records */
2112 union xfs_btree_rec *lrp;
2113 union xfs_btree_rec *rrp;
2114
2115 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2116 rrp = xfs_btree_rec_addr(cur, 1, right);
2117
2118 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2119
2120 /* Now put the new data in, and log it. */
2121 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2122 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2123
2124 cur->bc_ops->init_key_from_rec(&key, rrp);
2125 rkp = &key;
2126
2127 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2128 xfs_btree_rec_addr(cur, 2, right)));
2129 }
2130
2131 /*
2132 * Decrement and log left's numrecs, bump and log right's numrecs.
2133 */
2134 xfs_btree_set_numrecs(left, --lrecs);
2135 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2136
2137 xfs_btree_set_numrecs(right, ++rrecs);
2138 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2139
2140 /*
2141 * Using a temporary cursor, update the parent key values of the
2142 * block on the right.
2143 */
2144 error = xfs_btree_dup_cursor(cur, &tcur);
2145 if (error)
2146 goto error0;
2147 i = xfs_btree_lastrec(tcur, level);
2148 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2149
2150 error = xfs_btree_increment(tcur, level, &i);
2151 if (error)
2152 goto error1;
2153
2154 error = xfs_btree_updkey(tcur, rkp, level + 1);
2155 if (error)
2156 goto error1;
2157
2158 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2159
2160 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2161 *stat = 1;
2162 return 0;
2163
2164out0:
2165 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2166 *stat = 0;
2167 return 0;
2168
2169error0:
2170 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2171 return error;
2172
2173error1:
2174 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2175 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2176 return error;
2177}
2178
2179/*
2180 * Split cur/level block in half.
2181 * Return new block number and the key to its first
2182 * record (to be inserted into parent).
2183 */
2184STATIC int /* error */
2185xfs_btree_split(
2186 struct xfs_btree_cur *cur,
2187 int level,
2188 union xfs_btree_ptr *ptrp,
2189 union xfs_btree_key *key,
2190 struct xfs_btree_cur **curp,
2191 int *stat) /* success/failure */
2192{
2193 union xfs_btree_ptr lptr; /* left sibling block ptr */
2194 struct xfs_buf *lbp; /* left buffer pointer */
2195 struct xfs_btree_block *left; /* left btree block */
2196 union xfs_btree_ptr rptr; /* right sibling block ptr */
2197 struct xfs_buf *rbp; /* right buffer pointer */
2198 struct xfs_btree_block *right; /* right btree block */
2199 union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2200 struct xfs_buf *rrbp; /* right-right buffer pointer */
2201 struct xfs_btree_block *rrblock; /* right-right btree block */
2202 int lrecs;
2203 int rrecs;
2204 int src_index;
2205 int error; /* error return value */
2206#ifdef DEBUG
2207 int i;
2208#endif
2209
2210 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2211 XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2212
2213 XFS_BTREE_STATS_INC(cur, split);
2214
2215 /* Set up left block (current one). */
2216 left = xfs_btree_get_block(cur, level, &lbp);
2217
2218#ifdef DEBUG
2219 error = xfs_btree_check_block(cur, left, level, lbp);
2220 if (error)
2221 goto error0;
2222#endif
2223
2224 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2225
2226 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2227 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2228 if (error)
2229 goto error0;
2230 if (*stat == 0)
2231 goto out0;
2232 XFS_BTREE_STATS_INC(cur, alloc);
2233
2234 /* Set up the new block as "right". */
2235 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2236 if (error)
2237 goto error0;
2238
2239 /* Fill in the btree header for the new right block. */
2240 xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2241
2242 /*
2243 * Split the entries between the old and the new block evenly.
2244 * Make sure that if there's an odd number of entries now, that
2245 * each new block will have the same number of entries.
2246 */
2247 lrecs = xfs_btree_get_numrecs(left);
2248 rrecs = lrecs / 2;
2249 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2250 rrecs++;
2251 src_index = (lrecs - rrecs + 1);
2252
2253 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2254
2255 /*
2256 * Copy btree block entries from the left block over to the
2257 * new block, the right. Update the right block and log the
2258 * changes.
2259 */
2260 if (level > 0) {
2261 /* It's a non-leaf. Move keys and pointers. */
2262 union xfs_btree_key *lkp; /* left btree key */
2263 union xfs_btree_ptr *lpp; /* left address pointer */
2264 union xfs_btree_key *rkp; /* right btree key */
2265 union xfs_btree_ptr *rpp; /* right address pointer */
2266
2267 lkp = xfs_btree_key_addr(cur, src_index, left);
2268 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2269 rkp = xfs_btree_key_addr(cur, 1, right);
2270 rpp = xfs_btree_ptr_addr(cur, 1, right);
2271
2272#ifdef DEBUG
2273 for (i = src_index; i < rrecs; i++) {
2274 error = xfs_btree_check_ptr(cur, lpp, i, level);
2275 if (error)
2276 goto error0;
2277 }
2278#endif
2279
2280 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2281 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2282
2283 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2284 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2285
2286 /* Grab the keys to the entries moved to the right block */
2287 xfs_btree_copy_keys(cur, key, rkp, 1);
2288 } else {
2289 /* It's a leaf. Move records. */
2290 union xfs_btree_rec *lrp; /* left record pointer */
2291 union xfs_btree_rec *rrp; /* right record pointer */
2292
2293 lrp = xfs_btree_rec_addr(cur, src_index, left);
2294 rrp = xfs_btree_rec_addr(cur, 1, right);
2295
2296 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2297 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2298
2299 cur->bc_ops->init_key_from_rec(key,
2300 xfs_btree_rec_addr(cur, 1, right));
2301 }
2302
2303
2304 /*
2305 * Find the left block number by looking in the buffer.
2306 * Adjust numrecs, sibling pointers.
2307 */
2308 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2309 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2310 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2311 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2312
2313 lrecs -= rrecs;
2314 xfs_btree_set_numrecs(left, lrecs);
2315 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2316
2317 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2318 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2319
2320 /*
2321 * If there's a block to the new block's right, make that block
2322 * point back to right instead of to left.
2323 */
2324 if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2325 error = xfs_btree_read_buf_block(cur, &rrptr, level,
2326 0, &rrblock, &rrbp);
2327 if (error)
2328 goto error0;
2329 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2330 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2331 }
2332 /*
2333 * If the cursor is really in the right block, move it there.
2334 * If it's just pointing past the last entry in left, then we'll
2335 * insert there, so don't change anything in that case.
2336 */
2337 if (cur->bc_ptrs[level] > lrecs + 1) {
2338 xfs_btree_setbuf(cur, level, rbp);
2339 cur->bc_ptrs[level] -= lrecs;
2340 }
2341 /*
2342 * If there are more levels, we'll need another cursor which refers
2343 * the right block, no matter where this cursor was.
2344 */
2345 if (level + 1 < cur->bc_nlevels) {
2346 error = xfs_btree_dup_cursor(cur, curp);
2347 if (error)
2348 goto error0;
2349 (*curp)->bc_ptrs[level + 1]++;
2350 }
2351 *ptrp = rptr;
2352 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2353 *stat = 1;
2354 return 0;
2355out0:
2356 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2357 *stat = 0;
2358 return 0;
2359
2360error0:
2361 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2362 return error;
2363}
2364
2365/*
2366 * Copy the old inode root contents into a real block and make the
2367 * broot point to it.
2368 */
2369int /* error */
2370xfs_btree_new_iroot(
2371 struct xfs_btree_cur *cur, /* btree cursor */
2372 int *logflags, /* logging flags for inode */
2373 int *stat) /* return status - 0 fail */
2374{
2375 struct xfs_buf *cbp; /* buffer for cblock */
2376 struct xfs_btree_block *block; /* btree block */
2377 struct xfs_btree_block *cblock; /* child btree block */
2378 union xfs_btree_key *ckp; /* child key pointer */
2379 union xfs_btree_ptr *cpp; /* child ptr pointer */
2380 union xfs_btree_key *kp; /* pointer to btree key */
2381 union xfs_btree_ptr *pp; /* pointer to block addr */
2382 union xfs_btree_ptr nptr; /* new block addr */
2383 int level; /* btree level */
2384 int error; /* error return code */
2385#ifdef DEBUG
2386 int i; /* loop counter */
2387#endif
2388
2389 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2390 XFS_BTREE_STATS_INC(cur, newroot);
2391
2392 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2393
2394 level = cur->bc_nlevels - 1;
2395
2396 block = xfs_btree_get_iroot(cur);
2397 pp = xfs_btree_ptr_addr(cur, 1, block);
2398
2399 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2400 error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2401 if (error)
2402 goto error0;
2403 if (*stat == 0) {
2404 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2405 return 0;
2406 }
2407 XFS_BTREE_STATS_INC(cur, alloc);
2408
2409 /* Copy the root into a real block. */
2410 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2411 if (error)
2412 goto error0;
2413
2414 memcpy(cblock, block, xfs_btree_block_len(cur));
2415
2416 be16_add_cpu(&block->bb_level, 1);
2417 xfs_btree_set_numrecs(block, 1);
2418 cur->bc_nlevels++;
2419 cur->bc_ptrs[level + 1] = 1;
2420
2421 kp = xfs_btree_key_addr(cur, 1, block);
2422 ckp = xfs_btree_key_addr(cur, 1, cblock);
2423 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2424
2425 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2426#ifdef DEBUG
2427 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2428 error = xfs_btree_check_ptr(cur, pp, i, level);
2429 if (error)
2430 goto error0;
2431 }
2432#endif
2433 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2434
2435#ifdef DEBUG
2436 error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2437 if (error)
2438 goto error0;
2439#endif
2440 xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2441
2442 xfs_iroot_realloc(cur->bc_private.b.ip,
2443 1 - xfs_btree_get_numrecs(cblock),
2444 cur->bc_private.b.whichfork);
2445
2446 xfs_btree_setbuf(cur, level, cbp);
2447
2448 /*
2449 * Do all this logging at the end so that
2450 * the root is at the right level.
2451 */
2452 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2453 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2454 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2455
2456 *logflags |=
2457 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
2458 *stat = 1;
2459 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2460 return 0;
2461error0:
2462 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2463 return error;
2464}
2465
2466/*
2467 * Allocate a new root block, fill it in.
2468 */
2469STATIC int /* error */
2470xfs_btree_new_root(
2471 struct xfs_btree_cur *cur, /* btree cursor */
2472 int *stat) /* success/failure */
2473{
2474 struct xfs_btree_block *block; /* one half of the old root block */
2475 struct xfs_buf *bp; /* buffer containing block */
2476 int error; /* error return value */
2477 struct xfs_buf *lbp; /* left buffer pointer */
2478 struct xfs_btree_block *left; /* left btree block */
2479 struct xfs_buf *nbp; /* new (root) buffer */
2480 struct xfs_btree_block *new; /* new (root) btree block */
2481 int nptr; /* new value for key index, 1 or 2 */
2482 struct xfs_buf *rbp; /* right buffer pointer */
2483 struct xfs_btree_block *right; /* right btree block */
2484 union xfs_btree_ptr rptr;
2485 union xfs_btree_ptr lptr;
2486
2487 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2488 XFS_BTREE_STATS_INC(cur, newroot);
2489
2490 /* initialise our start point from the cursor */
2491 cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2492
2493 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2494 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2495 if (error)
2496 goto error0;
2497 if (*stat == 0)
2498 goto out0;
2499 XFS_BTREE_STATS_INC(cur, alloc);
2500
2501 /* Set up the new block. */
2502 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2503 if (error)
2504 goto error0;
2505
2506 /* Set the root in the holding structure increasing the level by 1. */
2507 cur->bc_ops->set_root(cur, &lptr, 1);
2508
2509 /*
2510 * At the previous root level there are now two blocks: the old root,
2511 * and the new block generated when it was split. We don't know which
2512 * one the cursor is pointing at, so we set up variables "left" and
2513 * "right" for each case.
2514 */
2515 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2516
2517#ifdef DEBUG
2518 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2519 if (error)
2520 goto error0;
2521#endif
2522
2523 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2524 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2525 /* Our block is left, pick up the right block. */
2526 lbp = bp;
2527 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2528 left = block;
2529 error = xfs_btree_read_buf_block(cur, &rptr,
2530 cur->bc_nlevels - 1, 0, &right, &rbp);
2531 if (error)
2532 goto error0;
2533 bp = rbp;
2534 nptr = 1;
2535 } else {
2536 /* Our block is right, pick up the left block. */
2537 rbp = bp;
2538 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2539 right = block;
2540 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2541 error = xfs_btree_read_buf_block(cur, &lptr,
2542 cur->bc_nlevels - 1, 0, &left, &lbp);
2543 if (error)
2544 goto error0;
2545 bp = lbp;
2546 nptr = 2;
2547 }
2548 /* Fill in the new block's btree header and log it. */
2549 xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
2550 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2551 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2552 !xfs_btree_ptr_is_null(cur, &rptr));
2553
2554 /* Fill in the key data in the new root. */
2555 if (xfs_btree_get_level(left) > 0) {
2556 xfs_btree_copy_keys(cur,
2557 xfs_btree_key_addr(cur, 1, new),
2558 xfs_btree_key_addr(cur, 1, left), 1);
2559 xfs_btree_copy_keys(cur,
2560 xfs_btree_key_addr(cur, 2, new),
2561 xfs_btree_key_addr(cur, 1, right), 1);
2562 } else {
2563 cur->bc_ops->init_key_from_rec(
2564 xfs_btree_key_addr(cur, 1, new),
2565 xfs_btree_rec_addr(cur, 1, left));
2566 cur->bc_ops->init_key_from_rec(
2567 xfs_btree_key_addr(cur, 2, new),
2568 xfs_btree_rec_addr(cur, 1, right));
2569 }
2570 xfs_btree_log_keys(cur, nbp, 1, 2);
2571
2572 /* Fill in the pointer data in the new root. */
2573 xfs_btree_copy_ptrs(cur,
2574 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2575 xfs_btree_copy_ptrs(cur,
2576 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2577 xfs_btree_log_ptrs(cur, nbp, 1, 2);
2578
2579 /* Fix up the cursor. */
2580 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2581 cur->bc_ptrs[cur->bc_nlevels] = nptr;
2582 cur->bc_nlevels++;
2583 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2584 *stat = 1;
2585 return 0;
2586error0:
2587 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2588 return error;
2589out0:
2590 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2591 *stat = 0;
2592 return 0;
2593}
2594
2595STATIC int
2596xfs_btree_make_block_unfull(
2597 struct xfs_btree_cur *cur, /* btree cursor */
2598 int level, /* btree level */
2599 int numrecs,/* # of recs in block */
2600 int *oindex,/* old tree index */
2601 int *index, /* new tree index */
2602 union xfs_btree_ptr *nptr, /* new btree ptr */
2603 struct xfs_btree_cur **ncur, /* new btree cursor */
2604 union xfs_btree_rec *nrec, /* new record */
2605 int *stat)
2606{
2607 union xfs_btree_key key; /* new btree key value */
2608 int error = 0;
2609
2610 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2611 level == cur->bc_nlevels - 1) {
2612 struct xfs_inode *ip = cur->bc_private.b.ip;
2613
2614 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2615 /* A root block that can be made bigger. */
2616
2617 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2618 } else {
2619 /* A root block that needs replacing */
2620 int logflags = 0;
2621
2622 error = xfs_btree_new_iroot(cur, &logflags, stat);
2623 if (error || *stat == 0)
2624 return error;
2625
2626 xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2627 }
2628
2629 return 0;
2630 }
2631
2632 /* First, try shifting an entry to the right neighbor. */
2633 error = xfs_btree_rshift(cur, level, stat);
2634 if (error || *stat)
2635 return error;
2636
2637 /* Next, try shifting an entry to the left neighbor. */
2638 error = xfs_btree_lshift(cur, level, stat);
2639 if (error)
2640 return error;
2641
2642 if (*stat) {
2643 *oindex = *index = cur->bc_ptrs[level];
2644 return 0;
2645 }
2646
2647 /*
2648 * Next, try splitting the current block in half.
2649 *
2650 * If this works we have to re-set our variables because we
2651 * could be in a different block now.
2652 */
2653 error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2654 if (error || *stat == 0)
2655 return error;
2656
2657
2658 *index = cur->bc_ptrs[level];
2659 cur->bc_ops->init_rec_from_key(&key, nrec);
2660 return 0;
2661}
2662
2663/*
2664 * Insert one record/level. Return information to the caller
2665 * allowing the next level up to proceed if necessary.
2666 */
2667STATIC int
2668xfs_btree_insrec(
2669 struct xfs_btree_cur *cur, /* btree cursor */
2670 int level, /* level to insert record at */
2671 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
2672 union xfs_btree_rec *recp, /* i/o: record data inserted */
2673 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
2674 int *stat) /* success/failure */
2675{
2676 struct xfs_btree_block *block; /* btree block */
2677 struct xfs_buf *bp; /* buffer for block */
2678 union xfs_btree_key key; /* btree key */
2679 union xfs_btree_ptr nptr; /* new block ptr */
2680 struct xfs_btree_cur *ncur; /* new btree cursor */
2681 union xfs_btree_rec nrec; /* new record count */
2682 int optr; /* old key/record index */
2683 int ptr; /* key/record index */
2684 int numrecs;/* number of records */
2685 int error; /* error return value */
2686#ifdef DEBUG
2687 int i;
2688#endif
2689
2690 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2691 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2692
2693 ncur = NULL;
2694
2695 /*
2696 * If we have an external root pointer, and we've made it to the
2697 * root level, allocate a new root block and we're done.
2698 */
2699 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2700 (level >= cur->bc_nlevels)) {
2701 error = xfs_btree_new_root(cur, stat);
2702 xfs_btree_set_ptr_null(cur, ptrp);
2703
2704 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2705 return error;
2706 }
2707
2708 /* If we're off the left edge, return failure. */
2709 ptr = cur->bc_ptrs[level];
2710 if (ptr == 0) {
2711 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2712 *stat = 0;
2713 return 0;
2714 }
2715
2716 /* Make a key out of the record data to be inserted, and save it. */
2717 cur->bc_ops->init_key_from_rec(&key, recp);
2718
2719 optr = ptr;
2720
2721 XFS_BTREE_STATS_INC(cur, insrec);
2722
2723 /* Get pointers to the btree buffer and block. */
2724 block = xfs_btree_get_block(cur, level, &bp);
2725 numrecs = xfs_btree_get_numrecs(block);
2726
2727#ifdef DEBUG
2728 error = xfs_btree_check_block(cur, block, level, bp);
2729 if (error)
2730 goto error0;
2731
2732 /* Check that the new entry is being inserted in the right place. */
2733 if (ptr <= numrecs) {
2734 if (level == 0) {
2735 ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2736 xfs_btree_rec_addr(cur, ptr, block)));
2737 } else {
2738 ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2739 xfs_btree_key_addr(cur, ptr, block)));
2740 }
2741 }
2742#endif
2743
2744 /*
2745 * If the block is full, we can't insert the new entry until we
2746 * make the block un-full.
2747 */
2748 xfs_btree_set_ptr_null(cur, &nptr);
2749 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2750 error = xfs_btree_make_block_unfull(cur, level, numrecs,
2751 &optr, &ptr, &nptr, &ncur, &nrec, stat);
2752 if (error || *stat == 0)
2753 goto error0;
2754 }
2755
2756 /*
2757 * The current block may have changed if the block was
2758 * previously full and we have just made space in it.
2759 */
2760 block = xfs_btree_get_block(cur, level, &bp);
2761 numrecs = xfs_btree_get_numrecs(block);
2762
2763#ifdef DEBUG
2764 error = xfs_btree_check_block(cur, block, level, bp);
2765 if (error)
2766 return error;
2767#endif
2768
2769 /*
2770 * At this point we know there's room for our new entry in the block
2771 * we're pointing at.
2772 */
2773 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2774
2775 if (level > 0) {
2776 /* It's a nonleaf. make a hole in the keys and ptrs */
2777 union xfs_btree_key *kp;
2778 union xfs_btree_ptr *pp;
2779
2780 kp = xfs_btree_key_addr(cur, ptr, block);
2781 pp = xfs_btree_ptr_addr(cur, ptr, block);
2782
2783#ifdef DEBUG
2784 for (i = numrecs - ptr; i >= 0; i--) {
2785 error = xfs_btree_check_ptr(cur, pp, i, level);
2786 if (error)
2787 return error;
2788 }
2789#endif
2790
2791 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2792 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2793
2794#ifdef DEBUG
2795 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2796 if (error)
2797 goto error0;
2798#endif
2799
2800 /* Now put the new data in, bump numrecs and log it. */
2801 xfs_btree_copy_keys(cur, kp, &key, 1);
2802 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2803 numrecs++;
2804 xfs_btree_set_numrecs(block, numrecs);
2805 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2806 xfs_btree_log_keys(cur, bp, ptr, numrecs);
2807#ifdef DEBUG
2808 if (ptr < numrecs) {
2809 ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2810 xfs_btree_key_addr(cur, ptr + 1, block)));
2811 }
2812#endif
2813 } else {
2814 /* It's a leaf. make a hole in the records */
2815 union xfs_btree_rec *rp;
2816
2817 rp = xfs_btree_rec_addr(cur, ptr, block);
2818
2819 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2820
2821 /* Now put the new data in, bump numrecs and log it. */
2822 xfs_btree_copy_recs(cur, rp, recp, 1);
2823 xfs_btree_set_numrecs(block, ++numrecs);
2824 xfs_btree_log_recs(cur, bp, ptr, numrecs);
2825#ifdef DEBUG
2826 if (ptr < numrecs) {
2827 ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2828 xfs_btree_rec_addr(cur, ptr + 1, block)));
2829 }
2830#endif
2831 }
2832
2833 /* Log the new number of records in the btree header. */
2834 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2835
2836 /* If we inserted at the start of a block, update the parents' keys. */
2837 if (optr == 1) {
2838 error = xfs_btree_updkey(cur, &key, level + 1);
2839 if (error)
2840 goto error0;
2841 }
2842
2843 /*
2844 * If we are tracking the last record in the tree and
2845 * we are at the far right edge of the tree, update it.
2846 */
2847 if (xfs_btree_is_lastrec(cur, block, level)) {
2848 cur->bc_ops->update_lastrec(cur, block, recp,
2849 ptr, LASTREC_INSREC);
2850 }
2851
2852 /*
2853 * Return the new block number, if any.
2854 * If there is one, give back a record value and a cursor too.
2855 */
2856 *ptrp = nptr;
2857 if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2858 *recp = nrec;
2859 *curp = ncur;
2860 }
2861
2862 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2863 *stat = 1;
2864 return 0;
2865
2866error0:
2867 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2868 return error;
2869}
2870
2871/*
2872 * Insert the record at the point referenced by cur.
2873 *
2874 * A multi-level split of the tree on insert will invalidate the original
2875 * cursor. All callers of this function should assume that the cursor is
2876 * no longer valid and revalidate it.
2877 */
2878int
2879xfs_btree_insert(
2880 struct xfs_btree_cur *cur,
2881 int *stat)
2882{
2883 int error; /* error return value */
2884 int i; /* result value, 0 for failure */
2885 int level; /* current level number in btree */
2886 union xfs_btree_ptr nptr; /* new block number (split result) */
2887 struct xfs_btree_cur *ncur; /* new cursor (split result) */
2888 struct xfs_btree_cur *pcur; /* previous level's cursor */
2889 union xfs_btree_rec rec; /* record to insert */
2890
2891 level = 0;
2892 ncur = NULL;
2893 pcur = cur;
2894
2895 xfs_btree_set_ptr_null(cur, &nptr);
2896 cur->bc_ops->init_rec_from_cur(cur, &rec);
2897
2898 /*
2899 * Loop going up the tree, starting at the leaf level.
2900 * Stop when we don't get a split block, that must mean that
2901 * the insert is finished with this level.
2902 */
2903 do {
2904 /*
2905 * Insert nrec/nptr into this level of the tree.
2906 * Note if we fail, nptr will be null.
2907 */
2908 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2909 if (error) {
2910 if (pcur != cur)
2911 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2912 goto error0;
2913 }
2914
2915 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2916 level++;
2917
2918 /*
2919 * See if the cursor we just used is trash.
2920 * Can't trash the caller's cursor, but otherwise we should
2921 * if ncur is a new cursor or we're about to be done.
2922 */
2923 if (pcur != cur &&
2924 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2925 /* Save the state from the cursor before we trash it */
2926 if (cur->bc_ops->update_cursor)
2927 cur->bc_ops->update_cursor(pcur, cur);
2928 cur->bc_nlevels = pcur->bc_nlevels;
2929 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2930 }
2931 /* If we got a new cursor, switch to it. */
2932 if (ncur) {
2933 pcur = ncur;
2934 ncur = NULL;
2935 }
2936 } while (!xfs_btree_ptr_is_null(cur, &nptr));
2937
2938 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2939 *stat = i;
2940 return 0;
2941error0:
2942 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2943 return error;
2944}
2945
2946/*
2947 * Try to merge a non-leaf block back into the inode root.
2948 *
2949 * Note: the killroot names comes from the fact that we're effectively
2950 * killing the old root block. But because we can't just delete the
2951 * inode we have to copy the single block it was pointing to into the
2952 * inode.
2953 */
2954int
2955xfs_btree_kill_iroot(
2956 struct xfs_btree_cur *cur)
2957{
2958 int whichfork = cur->bc_private.b.whichfork;
2959 struct xfs_inode *ip = cur->bc_private.b.ip;
2960 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
2961 struct xfs_btree_block *block;
2962 struct xfs_btree_block *cblock;
2963 union xfs_btree_key *kp;
2964 union xfs_btree_key *ckp;
2965 union xfs_btree_ptr *pp;
2966 union xfs_btree_ptr *cpp;
2967 struct xfs_buf *cbp;
2968 int level;
2969 int index;
2970 int numrecs;
2971#ifdef DEBUG
2972 union xfs_btree_ptr ptr;
2973 int i;
2974#endif
2975
2976 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2977
2978 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2979 ASSERT(cur->bc_nlevels > 1);
2980
2981 /*
2982 * Don't deal with the root block needs to be a leaf case.
2983 * We're just going to turn the thing back into extents anyway.
2984 */
2985 level = cur->bc_nlevels - 1;
2986 if (level == 1)
2987 goto out0;
2988
2989 /*
2990 * Give up if the root has multiple children.
2991 */
2992 block = xfs_btree_get_iroot(cur);
2993 if (xfs_btree_get_numrecs(block) != 1)
2994 goto out0;
2995
2996 cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2997 numrecs = xfs_btree_get_numrecs(cblock);
2998
2999 /*
3000 * Only do this if the next level will fit.
3001 * Then the data must be copied up to the inode,
3002 * instead of freeing the root you free the next level.
3003 */
3004 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3005 goto out0;
3006
3007 XFS_BTREE_STATS_INC(cur, killroot);
3008
3009#ifdef DEBUG
3010 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3011 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3012 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3013 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3014#endif
3015
3016 index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3017 if (index) {
3018 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3019 cur->bc_private.b.whichfork);
3020 block = ifp->if_broot;
3021 }
3022
3023 be16_add_cpu(&block->bb_numrecs, index);
3024 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3025
3026 kp = xfs_btree_key_addr(cur, 1, block);
3027 ckp = xfs_btree_key_addr(cur, 1, cblock);
3028 xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3029
3030 pp = xfs_btree_ptr_addr(cur, 1, block);
3031 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3032#ifdef DEBUG
3033 for (i = 0; i < numrecs; i++) {
3034 int error;
3035
3036 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3037 if (error) {
3038 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3039 return error;
3040 }
3041 }
3042#endif
3043 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3044
3045 cur->bc_ops->free_block(cur, cbp);
3046 XFS_BTREE_STATS_INC(cur, free);
3047
3048 cur->bc_bufs[level - 1] = NULL;
3049 be16_add_cpu(&block->bb_level, -1);
3050 xfs_trans_log_inode(cur->bc_tp, ip,
3051 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
3052 cur->bc_nlevels--;
3053out0:
3054 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3055 return 0;
3056}
3057
3058STATIC int
3059xfs_btree_dec_cursor(
3060 struct xfs_btree_cur *cur,
3061 int level,
3062 int *stat)
3063{
3064 int error;
3065 int i;
3066
3067 if (level > 0) {
3068 error = xfs_btree_decrement(cur, level, &i);
3069 if (error)
3070 return error;
3071 }
3072
3073 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3074 *stat = 1;
3075 return 0;
3076}
3077
3078/*
3079 * Single level of the btree record deletion routine.
3080 * Delete record pointed to by cur/level.
3081 * Remove the record from its block then rebalance the tree.
3082 * Return 0 for error, 1 for done, 2 to go on to the next level.
3083 */
3084STATIC int /* error */
3085xfs_btree_delrec(
3086 struct xfs_btree_cur *cur, /* btree cursor */
3087 int level, /* level removing record from */
3088 int *stat) /* fail/done/go-on */
3089{
3090 struct xfs_btree_block *block; /* btree block */
3091 union xfs_btree_ptr cptr; /* current block ptr */
3092 struct xfs_buf *bp; /* buffer for block */
3093 int error; /* error return value */
3094 int i; /* loop counter */
3095 union xfs_btree_key key; /* storage for keyp */
3096 union xfs_btree_key *keyp = &key; /* passed to the next level */
3097 union xfs_btree_ptr lptr; /* left sibling block ptr */
3098 struct xfs_buf *lbp; /* left buffer pointer */
3099 struct xfs_btree_block *left; /* left btree block */
3100 int lrecs = 0; /* left record count */
3101 int ptr; /* key/record index */
3102 union xfs_btree_ptr rptr; /* right sibling block ptr */
3103 struct xfs_buf *rbp; /* right buffer pointer */
3104 struct xfs_btree_block *right; /* right btree block */
3105 struct xfs_btree_block *rrblock; /* right-right btree block */
3106 struct xfs_buf *rrbp; /* right-right buffer pointer */
3107 int rrecs = 0; /* right record count */
3108 struct xfs_btree_cur *tcur; /* temporary btree cursor */
3109 int numrecs; /* temporary numrec count */
3110
3111 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3112 XFS_BTREE_TRACE_ARGI(cur, level);
3113
3114 tcur = NULL;
3115
3116 /* Get the index of the entry being deleted, check for nothing there. */
3117 ptr = cur->bc_ptrs[level];
3118 if (ptr == 0) {
3119 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3120 *stat = 0;
3121 return 0;
3122 }
3123
3124 /* Get the buffer & block containing the record or key/ptr. */
3125 block = xfs_btree_get_block(cur, level, &bp);
3126 numrecs = xfs_btree_get_numrecs(block);
3127
3128#ifdef DEBUG
3129 error = xfs_btree_check_block(cur, block, level, bp);
3130 if (error)
3131 goto error0;
3132#endif
3133
3134 /* Fail if we're off the end of the block. */
3135 if (ptr > numrecs) {
3136 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3137 *stat = 0;
3138 return 0;
3139 }
3140
3141 XFS_BTREE_STATS_INC(cur, delrec);
3142 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3143
3144 /* Excise the entries being deleted. */
3145 if (level > 0) {
3146 /* It's a nonleaf. operate on keys and ptrs */
3147 union xfs_btree_key *lkp;
3148 union xfs_btree_ptr *lpp;
3149
3150 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3151 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3152
3153#ifdef DEBUG
3154 for (i = 0; i < numrecs - ptr; i++) {
3155 error = xfs_btree_check_ptr(cur, lpp, i, level);
3156 if (error)
3157 goto error0;
3158 }
3159#endif
3160
3161 if (ptr < numrecs) {
3162 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3163 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3164 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3165 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3166 }
3167
3168 /*
3169 * If it's the first record in the block, we'll need to pass a
3170 * key up to the next level (updkey).
3171 */
3172 if (ptr == 1)
3173 keyp = xfs_btree_key_addr(cur, 1, block);
3174 } else {
3175 /* It's a leaf. operate on records */
3176 if (ptr < numrecs) {
3177 xfs_btree_shift_recs(cur,
3178 xfs_btree_rec_addr(cur, ptr + 1, block),
3179 -1, numrecs - ptr);
3180 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3181 }
3182
3183 /*
3184 * If it's the first record in the block, we'll need a key
3185 * structure to pass up to the next level (updkey).
3186 */
3187 if (ptr == 1) {
3188 cur->bc_ops->init_key_from_rec(&key,
3189 xfs_btree_rec_addr(cur, 1, block));
3190 keyp = &key;
3191 }
3192 }
3193
3194 /*
3195 * Decrement and log the number of entries in the block.
3196 */
3197 xfs_btree_set_numrecs(block, --numrecs);
3198 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3199
3200 /*
3201 * If we are tracking the last record in the tree and
3202 * we are at the far right edge of the tree, update it.
3203 */
3204 if (xfs_btree_is_lastrec(cur, block, level)) {
3205 cur->bc_ops->update_lastrec(cur, block, NULL,
3206 ptr, LASTREC_DELREC);
3207 }
3208
3209 /*
3210 * We're at the root level. First, shrink the root block in-memory.
3211 * Try to get rid of the next level down. If we can't then there's
3212 * nothing left to do.
3213 */
3214 if (level == cur->bc_nlevels - 1) {
3215 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3216 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3217 cur->bc_private.b.whichfork);
3218
3219 error = xfs_btree_kill_iroot(cur);
3220 if (error)
3221 goto error0;
3222
3223 error = xfs_btree_dec_cursor(cur, level, stat);
3224 if (error)
3225 goto error0;
3226 *stat = 1;
3227 return 0;
3228 }
3229
3230 /*
3231 * If this is the root level, and there's only one entry left,
3232 * and it's NOT the leaf level, then we can get rid of this
3233 * level.
3234 */
3235 if (numrecs == 1 && level > 0) {
3236 union xfs_btree_ptr *pp;
3237 /*
3238 * pp is still set to the first pointer in the block.
3239 * Make it the new root of the btree.
3240 */
3241 pp = xfs_btree_ptr_addr(cur, 1, block);
3242 error = cur->bc_ops->kill_root(cur, bp, level, pp);
3243 if (error)
3244 goto error0;
3245 } else if (level > 0) {
3246 error = xfs_btree_dec_cursor(cur, level, stat);
3247 if (error)
3248 goto error0;
3249 }
3250 *stat = 1;
3251 return 0;
3252 }
3253
3254 /*
3255 * If we deleted the leftmost entry in the block, update the
3256 * key values above us in the tree.
3257 */
3258 if (ptr == 1) {
3259 error = xfs_btree_updkey(cur, keyp, level + 1);
3260 if (error)
3261 goto error0;
3262 }
3263
3264 /*
3265 * If the number of records remaining in the block is at least
3266 * the minimum, we're done.
3267 */
3268 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3269 error = xfs_btree_dec_cursor(cur, level, stat);
3270 if (error)
3271 goto error0;
3272 return 0;
3273 }
3274
3275 /*
3276 * Otherwise, we have to move some records around to keep the
3277 * tree balanced. Look at the left and right sibling blocks to
3278 * see if we can re-balance by moving only one record.
3279 */
3280 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3281 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3282
3283 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3284 /*
3285 * One child of root, need to get a chance to copy its contents
3286 * into the root and delete it. Can't go up to next level,
3287 * there's nothing to delete there.
3288 */
3289 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3290 xfs_btree_ptr_is_null(cur, &lptr) &&
3291 level == cur->bc_nlevels - 2) {
3292 error = xfs_btree_kill_iroot(cur);
3293 if (!error)
3294 error = xfs_btree_dec_cursor(cur, level, stat);
3295 if (error)
3296 goto error0;
3297 return 0;
3298 }
3299 }
3300
3301 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3302 !xfs_btree_ptr_is_null(cur, &lptr));
3303
3304 /*
3305 * Duplicate the cursor so our btree manipulations here won't
3306 * disrupt the next level up.
3307 */
3308 error = xfs_btree_dup_cursor(cur, &tcur);
3309 if (error)
3310 goto error0;
3311
3312 /*
3313 * If there's a right sibling, see if it's ok to shift an entry
3314 * out of it.
3315 */
3316 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3317 /*
3318 * Move the temp cursor to the last entry in the next block.
3319 * Actually any entry but the first would suffice.
3320 */
3321 i = xfs_btree_lastrec(tcur, level);
3322 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3323
3324 error = xfs_btree_increment(tcur, level, &i);
3325 if (error)
3326 goto error0;
3327 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3328
3329 i = xfs_btree_lastrec(tcur, level);
3330 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3331
3332 /* Grab a pointer to the block. */
3333 right = xfs_btree_get_block(tcur, level, &rbp);
3334#ifdef DEBUG
3335 error = xfs_btree_check_block(tcur, right, level, rbp);
3336 if (error)
3337 goto error0;
3338#endif
3339 /* Grab the current block number, for future use. */
3340 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3341
3342 /*
3343 * If right block is full enough so that removing one entry
3344 * won't make it too empty, and left-shifting an entry out
3345 * of right to us works, we're done.
3346 */
3347 if (xfs_btree_get_numrecs(right) - 1 >=
3348 cur->bc_ops->get_minrecs(tcur, level)) {
3349 error = xfs_btree_lshift(tcur, level, &i);
3350 if (error)
3351 goto error0;
3352 if (i) {
3353 ASSERT(xfs_btree_get_numrecs(block) >=
3354 cur->bc_ops->get_minrecs(tcur, level));
3355
3356 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3357 tcur = NULL;
3358
3359 error = xfs_btree_dec_cursor(cur, level, stat);
3360 if (error)
3361 goto error0;
3362 return 0;
3363 }
3364 }
3365
3366 /*
3367 * Otherwise, grab the number of records in right for
3368 * future reference, and fix up the temp cursor to point
3369 * to our block again (last record).
3370 */
3371 rrecs = xfs_btree_get_numrecs(right);
3372 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3373 i = xfs_btree_firstrec(tcur, level);
3374 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3375
3376 error = xfs_btree_decrement(tcur, level, &i);
3377 if (error)
3378 goto error0;
3379 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3380 }
3381 }
3382
3383 /*
3384 * If there's a left sibling, see if it's ok to shift an entry
3385 * out of it.
3386 */
3387 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3388 /*
3389 * Move the temp cursor to the first entry in the
3390 * previous block.
3391 */
3392 i = xfs_btree_firstrec(tcur, level);
3393 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3394
3395 error = xfs_btree_decrement(tcur, level, &i);
3396 if (error)
3397 goto error0;
3398 i = xfs_btree_firstrec(tcur, level);
3399 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3400
3401 /* Grab a pointer to the block. */
3402 left = xfs_btree_get_block(tcur, level, &lbp);
3403#ifdef DEBUG
3404 error = xfs_btree_check_block(cur, left, level, lbp);
3405 if (error)
3406 goto error0;
3407#endif
3408 /* Grab the current block number, for future use. */
3409 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3410
3411 /*
3412 * If left block is full enough so that removing one entry
3413 * won't make it too empty, and right-shifting an entry out
3414 * of left to us works, we're done.
3415 */
3416 if (xfs_btree_get_numrecs(left) - 1 >=
3417 cur->bc_ops->get_minrecs(tcur, level)) {
3418 error = xfs_btree_rshift(tcur, level, &i);
3419 if (error)
3420 goto error0;
3421 if (i) {
3422 ASSERT(xfs_btree_get_numrecs(block) >=
3423 cur->bc_ops->get_minrecs(tcur, level));
3424 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3425 tcur = NULL;
3426 if (level == 0)
3427 cur->bc_ptrs[0]++;
3428 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3429 *stat = 1;
3430 return 0;
3431 }
3432 }
3433
3434 /*
3435 * Otherwise, grab the number of records in right for
3436 * future reference.
3437 */
3438 lrecs = xfs_btree_get_numrecs(left);
3439 }
3440
3441 /* Delete the temp cursor, we're done with it. */
3442 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3443 tcur = NULL;
3444
3445 /* If here, we need to do a join to keep the tree balanced. */
3446 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3447
3448 if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3449 lrecs + xfs_btree_get_numrecs(block) <=
3450 cur->bc_ops->get_maxrecs(cur, level)) {
3451 /*
3452 * Set "right" to be the starting block,
3453 * "left" to be the left neighbor.
3454 */
3455 rptr = cptr;
3456 right = block;
3457 rbp = bp;
3458 error = xfs_btree_read_buf_block(cur, &lptr, level,
3459 0, &left, &lbp);
3460 if (error)
3461 goto error0;
3462
3463 /*
3464 * If that won't work, see if we can join with the right neighbor block.
3465 */
3466 } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3467 rrecs + xfs_btree_get_numrecs(block) <=
3468 cur->bc_ops->get_maxrecs(cur, level)) {
3469 /*
3470 * Set "left" to be the starting block,
3471 * "right" to be the right neighbor.
3472 */
3473 lptr = cptr;
3474 left = block;
3475 lbp = bp;
3476 error = xfs_btree_read_buf_block(cur, &rptr, level,
3477 0, &right, &rbp);
3478 if (error)
3479 goto error0;
3480
3481 /*
3482 * Otherwise, we can't fix the imbalance.
3483 * Just return. This is probably a logic error, but it's not fatal.
3484 */
3485 } else {
3486 error = xfs_btree_dec_cursor(cur, level, stat);
3487 if (error)
3488 goto error0;
3489 return 0;
3490 }
3491
3492 rrecs = xfs_btree_get_numrecs(right);
3493 lrecs = xfs_btree_get_numrecs(left);
3494
3495 /*
3496 * We're now going to join "left" and "right" by moving all the stuff
3497 * in "right" to "left" and deleting "right".
3498 */
3499 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3500 if (level > 0) {
3501 /* It's a non-leaf. Move keys and pointers. */
3502 union xfs_btree_key *lkp; /* left btree key */
3503 union xfs_btree_ptr *lpp; /* left address pointer */
3504 union xfs_btree_key *rkp; /* right btree key */
3505 union xfs_btree_ptr *rpp; /* right address pointer */
3506
3507 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3508 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3509 rkp = xfs_btree_key_addr(cur, 1, right);
3510 rpp = xfs_btree_ptr_addr(cur, 1, right);
3511#ifdef DEBUG
3512 for (i = 1; i < rrecs; i++) {
3513 error = xfs_btree_check_ptr(cur, rpp, i, level);
3514 if (error)
3515 goto error0;
3516 }
3517#endif
3518 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3519 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3520
3521 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3522 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3523 } else {
3524 /* It's a leaf. Move records. */
3525 union xfs_btree_rec *lrp; /* left record pointer */
3526 union xfs_btree_rec *rrp; /* right record pointer */
3527
3528 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3529 rrp = xfs_btree_rec_addr(cur, 1, right);
3530
3531 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3532 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3533 }
3534
3535 XFS_BTREE_STATS_INC(cur, join);
3536
3537 /*
3538 * Fix up the the number of records and right block pointer in the
3539 * surviving block, and log it.
3540 */
3541 xfs_btree_set_numrecs(left, lrecs + rrecs);
3542 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3543 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3544 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3545
3546 /* If there is a right sibling, point it to the remaining block. */
3547 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3548 if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3549 error = xfs_btree_read_buf_block(cur, &cptr, level,
3550 0, &rrblock, &rrbp);
3551 if (error)
3552 goto error0;
3553 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3554 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3555 }
3556
3557 /* Free the deleted block. */
3558 error = cur->bc_ops->free_block(cur, rbp);
3559 if (error)
3560 goto error0;
3561 XFS_BTREE_STATS_INC(cur, free);
3562
3563 /*
3564 * If we joined with the left neighbor, set the buffer in the
3565 * cursor to the left block, and fix up the index.
3566 */
3567 if (bp != lbp) {
3568 cur->bc_bufs[level] = lbp;
3569 cur->bc_ptrs[level] += lrecs;
3570 cur->bc_ra[level] = 0;
3571 }
3572 /*
3573 * If we joined with the right neighbor and there's a level above
3574 * us, increment the cursor at that level.
3575 */
3576 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3577 (level + 1 < cur->bc_nlevels)) {
3578 error = xfs_btree_increment(cur, level + 1, &i);
3579 if (error)
3580 goto error0;
3581 }
3582
3583 /*
3584 * Readjust the ptr at this level if it's not a leaf, since it's
3585 * still pointing at the deletion point, which makes the cursor
3586 * inconsistent. If this makes the ptr 0, the caller fixes it up.
3587 * We can't use decrement because it would change the next level up.
3588 */
3589 if (level > 0)
3590 cur->bc_ptrs[level]--;
3591
3592 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3593 /* Return value means the next level up has something to do. */
3594 *stat = 2;
3595 return 0;
3596
3597error0:
3598 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3599 if (tcur)
3600 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3601 return error;
3602}
3603
3604/*
3605 * Delete the record pointed to by cur.
3606 * The cursor refers to the place where the record was (could be inserted)
3607 * when the operation returns.
3608 */
3609int /* error */
3610xfs_btree_delete(
3611 struct xfs_btree_cur *cur,
3612 int *stat) /* success/failure */
3613{
3614 int error; /* error return value */
3615 int level;
3616 int i;
3617
3618 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3619
3620 /*
3621 * Go up the tree, starting at leaf level.
3622 *
3623 * If 2 is returned then a join was done; go to the next level.
3624 * Otherwise we are done.
3625 */
3626 for (level = 0, i = 2; i == 2; level++) {
3627 error = xfs_btree_delrec(cur, level, &i);
3628 if (error)
3629 goto error0;
3630 }
3631
3632 if (i == 0) {
3633 for (level = 1; level < cur->bc_nlevels; level++) {
3634 if (cur->bc_ptrs[level] == 0) {
3635 error = xfs_btree_decrement(cur, level, &i);
3636 if (error)
3637 goto error0;
3638 break;
3639 }
3640 }
3641 }
3642
3643 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3644 *stat = i;
3645 return 0;
3646error0:
3647 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3648 return error;
3649}
3650
3651/*
3652 * Get the data from the pointed-to record.
3653 */
3654int /* error */
3655xfs_btree_get_rec(
3656 struct xfs_btree_cur *cur, /* btree cursor */
3657 union xfs_btree_rec **recp, /* output: btree record */
3658 int *stat) /* output: success/failure */
3659{
3660 struct xfs_btree_block *block; /* btree block */
3661 struct xfs_buf *bp; /* buffer pointer */
3662 int ptr; /* record number */
3663#ifdef DEBUG
3664 int error; /* error return value */
3665#endif
3666
3667 ptr = cur->bc_ptrs[0];
3668 block = xfs_btree_get_block(cur, 0, &bp);
3669
3670#ifdef DEBUG
3671 error = xfs_btree_check_block(cur, block, 0, bp);
3672 if (error)
3673 return error;
3674#endif
3675
3676 /*
3677 * Off the right end or left end, return failure.
3678 */
3679 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3680 *stat = 0;
3681 return 0;
3682 }
3683
3684 /*
3685 * Point to the record and extract its data.
3686 */
3687 *recp = xfs_btree_rec_addr(cur, ptr, block);
3688 *stat = 1;
3689 return 0;
3690}
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 1f528a2a375..789fffdf8b2 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -39,39 +39,19 @@ extern kmem_zone_t *xfs_btree_cur_zone;
39#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) 39#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
40 40
41/* 41/*
42 * Short form header: space allocation btrees. 42 * Generic btree header.
43 */ 43 *
44typedef struct xfs_btree_sblock { 44 * This is a comination of the actual format used on disk for short and long
45 __be32 bb_magic; /* magic number for block type */ 45 * format btrees. The first three fields are shared by both format, but
46 __be16 bb_level; /* 0 is a leaf */ 46 * the pointers are different and should be used with care.
47 __be16 bb_numrecs; /* current # of data records */ 47 *
48 __be32 bb_leftsib; /* left sibling block or NULLAGBLOCK */ 48 * To get the size of the actual short or long form headers please use
49 __be32 bb_rightsib; /* right sibling block or NULLAGBLOCK */ 49 * the size macros below. Never use sizeof(xfs_btree_block).
50} xfs_btree_sblock_t;
51
52/*
53 * Long form header: bmap btrees.
54 */
55typedef struct xfs_btree_lblock {
56 __be32 bb_magic; /* magic number for block type */
57 __be16 bb_level; /* 0 is a leaf */
58 __be16 bb_numrecs; /* current # of data records */
59 __be64 bb_leftsib; /* left sibling block or NULLDFSBNO */
60 __be64 bb_rightsib; /* right sibling block or NULLDFSBNO */
61} xfs_btree_lblock_t;
62
63/*
64 * Combined header and structure, used by common code.
65 */ 50 */
66typedef struct xfs_btree_hdr 51struct xfs_btree_block {
67{
68 __be32 bb_magic; /* magic number for block type */ 52 __be32 bb_magic; /* magic number for block type */
69 __be16 bb_level; /* 0 is a leaf */ 53 __be16 bb_level; /* 0 is a leaf */
70 __be16 bb_numrecs; /* current # of data records */ 54 __be16 bb_numrecs; /* current # of data records */
71} xfs_btree_hdr_t;
72
73typedef struct xfs_btree_block {
74 xfs_btree_hdr_t bb_h; /* header */
75 union { 55 union {
76 struct { 56 struct {
77 __be32 bb_leftsib; 57 __be32 bb_leftsib;
@@ -82,7 +62,36 @@ typedef struct xfs_btree_block {
82 __be64 bb_rightsib; 62 __be64 bb_rightsib;
83 } l; /* long form pointers */ 63 } l; /* long form pointers */
84 } bb_u; /* rest */ 64 } bb_u; /* rest */
85} xfs_btree_block_t; 65};
66
67#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
68#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
69
70
71/*
72 * Generic key, ptr and record wrapper structures.
73 *
74 * These are disk format structures, and are converted where necessary
75 * by the btree specific code that needs to interpret them.
76 */
77union xfs_btree_ptr {
78 __be32 s; /* short form ptr */
79 __be64 l; /* long form ptr */
80};
81
82union xfs_btree_key {
83 xfs_bmbt_key_t bmbt;
84 xfs_bmdr_key_t bmbr; /* bmbt root block */
85 xfs_alloc_key_t alloc;
86 xfs_inobt_key_t inobt;
87};
88
89union xfs_btree_rec {
90 xfs_bmbt_rec_t bmbt;
91 xfs_bmdr_rec_t bmbr; /* bmbt root block */
92 xfs_alloc_rec_t alloc;
93 xfs_inobt_rec_t inobt;
94};
86 95
87/* 96/*
88 * For logging record fields. 97 * For logging record fields.
@@ -96,46 +105,131 @@ typedef struct xfs_btree_block {
96#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1) 105#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1)
97 106
98/* 107/*
99 * Boolean to select which form of xfs_btree_block_t.bb_u to use.
100 */
101#define XFS_BTREE_LONG_PTRS(btnum) ((btnum) == XFS_BTNUM_BMAP)
102
103/*
104 * Magic numbers for btree blocks. 108 * Magic numbers for btree blocks.
105 */ 109 */
106extern const __uint32_t xfs_magics[]; 110extern const __uint32_t xfs_magics[];
107 111
108/* 112/*
109 * Maximum and minimum records in a btree block. 113 * Generic stats interface
110 * Given block size, type prefix, and leaf flag (0 or 1). 114 */
111 * The divisor below is equivalent to lf ? (e1) : (e2) but that produces 115#define __XFS_BTREE_STATS_INC(type, stat) \
112 * compiler warnings. 116 XFS_STATS_INC(xs_ ## type ## _2_ ## stat)
113 */ 117#define XFS_BTREE_STATS_INC(cur, stat) \
114#define XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) \ 118do { \
115 ((int)(((bsz) - (uint)sizeof(t ## _block_t)) / \ 119 switch (cur->bc_btnum) { \
116 (((lf) * (uint)sizeof(t ## _rec_t)) + \ 120 case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \
117 ((1 - (lf)) * \ 121 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
118 ((uint)sizeof(t ## _key_t) + (uint)sizeof(t ## _ptr_t)))))) 122 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
119#define XFS_BTREE_BLOCK_MINRECS(bsz,t,lf) \ 123 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
120 (XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) / 2) 124 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
121 125 } \
122/* 126} while (0)
123 * Record, key, and pointer address calculation macros. 127
124 * Given block size, type prefix, block pointer, and index of requested entry 128#define __XFS_BTREE_STATS_ADD(type, stat, val) \
125 * (first entry numbered 1). 129 XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val)
126 */ 130#define XFS_BTREE_STATS_ADD(cur, stat, val) \
127#define XFS_BTREE_REC_ADDR(t,bb,i) \ 131do { \
128 ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 132 switch (cur->bc_btnum) { \
129 ((i) - 1) * sizeof(t ## _rec_t))) 133 case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \
130#define XFS_BTREE_KEY_ADDR(t,bb,i) \ 134 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
131 ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 135 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
132 ((i) - 1) * sizeof(t ## _key_t))) 136 case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
133#define XFS_BTREE_PTR_ADDR(t,bb,i,mxr) \ 137 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
134 ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 138 } \
135 (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t))) 139} while (0)
136 140
137#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ 141#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */
138 142
143struct xfs_btree_ops {
144 /* size of the key and record structures */
145 size_t key_len;
146 size_t rec_len;
147
148 /* cursor operations */
149 struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *);
150 void (*update_cursor)(struct xfs_btree_cur *src,
151 struct xfs_btree_cur *dst);
152
153 /* update btree root pointer */
154 void (*set_root)(struct xfs_btree_cur *cur,
155 union xfs_btree_ptr *nptr, int level_change);
156 int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp,
157 int level, union xfs_btree_ptr *newroot);
158
159 /* block allocation / freeing */
160 int (*alloc_block)(struct xfs_btree_cur *cur,
161 union xfs_btree_ptr *start_bno,
162 union xfs_btree_ptr *new_bno,
163 int length, int *stat);
164 int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
165
166 /* update last record information */
167 void (*update_lastrec)(struct xfs_btree_cur *cur,
168 struct xfs_btree_block *block,
169 union xfs_btree_rec *rec,
170 int ptr, int reason);
171
172 /* records in block/level */
173 int (*get_minrecs)(struct xfs_btree_cur *cur, int level);
174 int (*get_maxrecs)(struct xfs_btree_cur *cur, int level);
175
176 /* records on disk. Matter for the root in inode case. */
177 int (*get_dmaxrecs)(struct xfs_btree_cur *cur, int level);
178
179 /* init values of btree structures */
180 void (*init_key_from_rec)(union xfs_btree_key *key,
181 union xfs_btree_rec *rec);
182 void (*init_rec_from_key)(union xfs_btree_key *key,
183 union xfs_btree_rec *rec);
184 void (*init_rec_from_cur)(struct xfs_btree_cur *cur,
185 union xfs_btree_rec *rec);
186 void (*init_ptr_from_cur)(struct xfs_btree_cur *cur,
187 union xfs_btree_ptr *ptr);
188
189 /* difference between key value and cursor value */
190 __int64_t (*key_diff)(struct xfs_btree_cur *cur,
191 union xfs_btree_key *key);
192
193#ifdef DEBUG
194 /* check that k1 is lower than k2 */
195 int (*keys_inorder)(struct xfs_btree_cur *cur,
196 union xfs_btree_key *k1,
197 union xfs_btree_key *k2);
198
199 /* check that r1 is lower than r2 */
200 int (*recs_inorder)(struct xfs_btree_cur *cur,
201 union xfs_btree_rec *r1,
202 union xfs_btree_rec *r2);
203#endif
204
205 /* btree tracing */
206#ifdef XFS_BTREE_TRACE
207 void (*trace_enter)(struct xfs_btree_cur *, const char *,
208 char *, int, int, __psunsigned_t,
209 __psunsigned_t, __psunsigned_t,
210 __psunsigned_t, __psunsigned_t,
211 __psunsigned_t, __psunsigned_t,
212 __psunsigned_t, __psunsigned_t,
213 __psunsigned_t, __psunsigned_t);
214 void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *,
215 __uint64_t *, __uint64_t *);
216 void (*trace_key)(struct xfs_btree_cur *,
217 union xfs_btree_key *, __uint64_t *,
218 __uint64_t *);
219 void (*trace_record)(struct xfs_btree_cur *,
220 union xfs_btree_rec *, __uint64_t *,
221 __uint64_t *, __uint64_t *);
222#endif
223};
224
225/*
226 * Reasons for the update_lastrec method to be called.
227 */
228#define LASTREC_UPDATE 0
229#define LASTREC_INSREC 1
230#define LASTREC_DELREC 2
231
232
139/* 233/*
140 * Btree cursor structure. 234 * Btree cursor structure.
141 * This collects all information needed by the btree code in one place. 235 * This collects all information needed by the btree code in one place.
@@ -144,6 +238,8 @@ typedef struct xfs_btree_cur
144{ 238{
145 struct xfs_trans *bc_tp; /* transaction we're in, if any */ 239 struct xfs_trans *bc_tp; /* transaction we're in, if any */
146 struct xfs_mount *bc_mp; /* file system mount struct */ 240 struct xfs_mount *bc_mp; /* file system mount struct */
241 const struct xfs_btree_ops *bc_ops;
242 uint bc_flags; /* btree features - below */
147 union { 243 union {
148 xfs_alloc_rec_incore_t a; 244 xfs_alloc_rec_incore_t a;
149 xfs_bmbt_irec_t b; 245 xfs_bmbt_irec_t b;
@@ -175,94 +271,40 @@ typedef struct xfs_btree_cur
175 } bc_private; /* per-btree type data */ 271 } bc_private; /* per-btree type data */
176} xfs_btree_cur_t; 272} xfs_btree_cur_t;
177 273
274/* cursor flags */
275#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */
276#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
277#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
278
279
178#define XFS_BTREE_NOERROR 0 280#define XFS_BTREE_NOERROR 0
179#define XFS_BTREE_ERROR 1 281#define XFS_BTREE_ERROR 1
180 282
181/* 283/*
182 * Convert from buffer to btree block header. 284 * Convert from buffer to btree block header.
183 */ 285 */
184#define XFS_BUF_TO_BLOCK(bp) ((xfs_btree_block_t *)XFS_BUF_PTR(bp)) 286#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp))
185#define XFS_BUF_TO_LBLOCK(bp) ((xfs_btree_lblock_t *)XFS_BUF_PTR(bp))
186#define XFS_BUF_TO_SBLOCK(bp) ((xfs_btree_sblock_t *)XFS_BUF_PTR(bp))
187 287
188 288
189#ifdef __KERNEL__
190
191#ifdef DEBUG
192/* 289/*
193 * Debug routine: check that block header is ok. 290 * Check that block header is ok.
194 */ 291 */
195void 292int
196xfs_btree_check_block( 293xfs_btree_check_block(
197 xfs_btree_cur_t *cur, /* btree cursor */ 294 struct xfs_btree_cur *cur, /* btree cursor */
198 xfs_btree_block_t *block, /* generic btree block pointer */ 295 struct xfs_btree_block *block, /* generic btree block pointer */
199 int level, /* level of the btree block */
200 struct xfs_buf *bp); /* buffer containing block, if any */
201
202/*
203 * Debug routine: check that keys are in the right order.
204 */
205void
206xfs_btree_check_key(
207 xfs_btnum_t btnum, /* btree identifier */
208 void *ak1, /* pointer to left (lower) key */
209 void *ak2); /* pointer to right (higher) key */
210
211/*
212 * Debug routine: check that records are in the right order.
213 */
214void
215xfs_btree_check_rec(
216 xfs_btnum_t btnum, /* btree identifier */
217 void *ar1, /* pointer to left (lower) record */
218 void *ar2); /* pointer to right (higher) record */
219#else
220#define xfs_btree_check_block(a,b,c,d)
221#define xfs_btree_check_key(a,b,c)
222#define xfs_btree_check_rec(a,b,c)
223#endif /* DEBUG */
224
225/*
226 * Checking routine: check that long form block header is ok.
227 */
228int /* error (0 or EFSCORRUPTED) */
229xfs_btree_check_lblock(
230 xfs_btree_cur_t *cur, /* btree cursor */
231 xfs_btree_lblock_t *block, /* btree long form block pointer */
232 int level, /* level of the btree block */ 296 int level, /* level of the btree block */
233 struct xfs_buf *bp); /* buffer containing block, if any */ 297 struct xfs_buf *bp); /* buffer containing block, if any */
234 298
235/* 299/*
236 * Checking routine: check that (long) pointer is ok. 300 * Check that (long) pointer is ok.
237 */ 301 */
238int /* error (0 or EFSCORRUPTED) */ 302int /* error (0 or EFSCORRUPTED) */
239xfs_btree_check_lptr( 303xfs_btree_check_lptr(
240 xfs_btree_cur_t *cur, /* btree cursor */ 304 struct xfs_btree_cur *cur, /* btree cursor */
241 xfs_dfsbno_t ptr, /* btree block disk address */ 305 xfs_dfsbno_t ptr, /* btree block disk address */
242 int level); /* btree block level */ 306 int level); /* btree block level */
243 307
244#define xfs_btree_check_lptr_disk(cur, ptr, level) \
245 xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level)
246
247/*
248 * Checking routine: check that short form block header is ok.
249 */
250int /* error (0 or EFSCORRUPTED) */
251xfs_btree_check_sblock(
252 xfs_btree_cur_t *cur, /* btree cursor */
253 xfs_btree_sblock_t *block, /* btree short form block pointer */
254 int level, /* level of the btree block */
255 struct xfs_buf *bp); /* buffer containing block */
256
257/*
258 * Checking routine: check that (short) pointer is ok.
259 */
260int /* error (0 or EFSCORRUPTED) */
261xfs_btree_check_sptr(
262 xfs_btree_cur_t *cur, /* btree cursor */
263 xfs_agblock_t ptr, /* btree block disk address */
264 int level); /* btree block level */
265
266/* 308/*
267 * Delete the btree cursor. 309 * Delete the btree cursor.
268 */ 310 */
@@ -281,15 +323,6 @@ xfs_btree_dup_cursor(
281 xfs_btree_cur_t **ncur);/* output cursor */ 323 xfs_btree_cur_t **ncur);/* output cursor */
282 324
283/* 325/*
284 * Change the cursor to point to the first record in the current block
285 * at the given level. Other levels are unaffected.
286 */
287int /* success=1, failure=0 */
288xfs_btree_firstrec(
289 xfs_btree_cur_t *cur, /* btree cursor */
290 int level); /* level to change */
291
292/*
293 * Get a buffer for the block, return it with no data read. 326 * Get a buffer for the block, return it with no data read.
294 * Long-form addressing. 327 * Long-form addressing.
295 */ 328 */
@@ -313,20 +346,6 @@ xfs_btree_get_bufs(
313 uint lock); /* lock flags for get_buf */ 346 uint lock); /* lock flags for get_buf */
314 347
315/* 348/*
316 * Allocate a new btree cursor.
317 * The cursor is either for allocation (A) or bmap (B).
318 */
319xfs_btree_cur_t * /* new btree cursor */
320xfs_btree_init_cursor(
321 struct xfs_mount *mp, /* file system mount point */
322 struct xfs_trans *tp, /* transaction pointer */
323 struct xfs_buf *agbp, /* (A only) buffer for agf structure */
324 xfs_agnumber_t agno, /* (A only) allocation group number */
325 xfs_btnum_t btnum, /* btree identifier */
326 struct xfs_inode *ip, /* (B only) inode owning the btree */
327 int whichfork); /* (B only) data/attr fork */
328
329/*
330 * Check for the cursor referring to the last block at the given level. 349 * Check for the cursor referring to the last block at the given level.
331 */ 350 */
332int /* 1=is last block, 0=not last block */ 351int /* 1=is last block, 0=not last block */
@@ -335,15 +354,6 @@ xfs_btree_islastblock(
335 int level); /* level to check */ 354 int level); /* level to check */
336 355
337/* 356/*
338 * Change the cursor to point to the last record in the current block
339 * at the given level. Other levels are unaffected.
340 */
341int /* success=1, failure=0 */
342xfs_btree_lastrec(
343 xfs_btree_cur_t *cur, /* btree cursor */
344 int level); /* level to change */
345
346/*
347 * Compute first and last byte offsets for the fields given. 357 * Compute first and last byte offsets for the fields given.
348 * Interprets the offsets table, which contains struct field offsets. 358 * Interprets the offsets table, which contains struct field offsets.
349 */ 359 */
@@ -404,39 +414,53 @@ xfs_btree_reada_bufs(
404 xfs_extlen_t count); /* count of filesystem blocks */ 414 xfs_extlen_t count); /* count of filesystem blocks */
405 415
406/* 416/*
407 * Read-ahead btree blocks, at the given level. 417 * Set the buffer for level "lev" in the cursor to bp, releasing
408 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 418 * any previous buffer.
409 */ 419 */
410int /* readahead block count */ 420void
411xfs_btree_readahead_core( 421xfs_btree_setbuf(
412 xfs_btree_cur_t *cur, /* btree cursor */ 422 xfs_btree_cur_t *cur, /* btree cursor */
413 int lev, /* level in btree */ 423 int lev, /* level in btree */
414 int lr); /* left/right bits */ 424 struct xfs_buf *bp); /* new buffer to set */
415 425
416static inline int /* readahead block count */
417xfs_btree_readahead(
418 xfs_btree_cur_t *cur, /* btree cursor */
419 int lev, /* level in btree */
420 int lr) /* left/right bits */
421{
422 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
423 return 0;
424 426
425 return xfs_btree_readahead_core(cur, lev, lr); 427/*
426} 428 * Common btree core entry points.
429 */
430int xfs_btree_increment(struct xfs_btree_cur *, int, int *);
431int xfs_btree_decrement(struct xfs_btree_cur *, int, int *);
432int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *);
433int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *);
434int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
435int xfs_btree_kill_iroot(struct xfs_btree_cur *);
436int xfs_btree_insert(struct xfs_btree_cur *, int *);
437int xfs_btree_delete(struct xfs_btree_cur *, int *);
438int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
427 439
440/*
441 * Internal btree helpers also used by xfs_bmap.c.
442 */
443void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
444void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int);
428 445
429/* 446/*
430 * Set the buffer for level "lev" in the cursor to bp, releasing 447 * Helpers.
431 * any previous buffer.
432 */ 448 */
433void 449static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
434xfs_btree_setbuf( 450{
435 xfs_btree_cur_t *cur, /* btree cursor */ 451 return be16_to_cpu(block->bb_numrecs);
436 int lev, /* level in btree */ 452}
437 struct xfs_buf *bp); /* new buffer to set */ 453
454static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
455 __uint16_t numrecs)
456{
457 block->bb_numrecs = cpu_to_be16(numrecs);
458}
438 459
439#endif /* __KERNEL__ */ 460static inline int xfs_btree_get_level(struct xfs_btree_block *block)
461{
462 return be16_to_cpu(block->bb_level);
463}
440 464
441 465
442/* 466/*
diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c
new file mode 100644
index 00000000000..44ff942a0fd
--- /dev/null
+++ b/fs/xfs/xfs_btree_trace.c
@@ -0,0 +1,249 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_types.h"
20#include "xfs_inum.h"
21#include "xfs_bmap_btree.h"
22#include "xfs_alloc_btree.h"
23#include "xfs_ialloc_btree.h"
24#include "xfs_inode.h"
25#include "xfs_btree.h"
26#include "xfs_btree_trace.h"
27
28STATIC void
29xfs_btree_trace_ptr(
30 struct xfs_btree_cur *cur,
31 union xfs_btree_ptr ptr,
32 __psunsigned_t *high,
33 __psunsigned_t *low)
34{
35 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
36 __u64 val = be64_to_cpu(ptr.l);
37 *high = val >> 32;
38 *low = (int)val;
39 } else {
40 *high = 0;
41 *low = be32_to_cpu(ptr.s);
42 }
43}
44
45/*
46 * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
47 */
48void
49xfs_btree_trace_argbi(
50 const char *func,
51 struct xfs_btree_cur *cur,
52 struct xfs_buf *b,
53 int i,
54 int line)
55{
56 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI,
57 line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0,
58 0, 0, 0, 0);
59}
60
61/*
62 * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
63 */
64void
65xfs_btree_trace_argbii(
66 const char *func,
67 struct xfs_btree_cur *cur,
68 struct xfs_buf *b,
69 int i0,
70 int i1,
71 int line)
72{
73 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII,
74 line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0,
75 0, 0, 0, 0);
76}
77
78/*
79 * Add a trace buffer entry for arguments, for 3 block-length args
80 * and an integer arg.
81 */
82void
83xfs_btree_trace_argfffi(
84 const char *func,
85 struct xfs_btree_cur *cur,
86 xfs_dfiloff_t o,
87 xfs_dfsbno_t b,
88 xfs_dfilblks_t i,
89 int j,
90 int line)
91{
92 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI,
93 line,
94 o >> 32, (int)o,
95 b >> 32, (int)b,
96 i >> 32, (int)i,
97 (int)j, 0, 0, 0, 0);
98}
99
100/*
101 * Add a trace buffer entry for arguments, for one integer arg.
102 */
103void
104xfs_btree_trace_argi(
105 const char *func,
106 struct xfs_btree_cur *cur,
107 int i,
108 int line)
109{
110 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI,
111 line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
112}
113
114/*
115 * Add a trace buffer entry for arguments, for int, fsblock, key.
116 */
117void
118xfs_btree_trace_argipk(
119 const char *func,
120 struct xfs_btree_cur *cur,
121 int i,
122 union xfs_btree_ptr ptr,
123 union xfs_btree_key *key,
124 int line)
125{
126 __psunsigned_t high, low;
127 __uint64_t l0, l1;
128
129 xfs_btree_trace_ptr(cur, ptr, &high, &low);
130 cur->bc_ops->trace_key(cur, key, &l0, &l1);
131 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK,
132 line, i, high, low,
133 l0 >> 32, (int)l0,
134 l1 >> 32, (int)l1,
135 0, 0, 0, 0);
136}
137
138/*
139 * Add a trace buffer entry for arguments, for int, fsblock, rec.
140 */
141void
142xfs_btree_trace_argipr(
143 const char *func,
144 struct xfs_btree_cur *cur,
145 int i,
146 union xfs_btree_ptr ptr,
147 union xfs_btree_rec *rec,
148 int line)
149{
150 __psunsigned_t high, low;
151 __uint64_t l0, l1, l2;
152
153 xfs_btree_trace_ptr(cur, ptr, &high, &low);
154 cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
155 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR,
156 line, i,
157 high, low,
158 l0 >> 32, (int)l0,
159 l1 >> 32, (int)l1,
160 l2 >> 32, (int)l2,
161 0, 0);
162}
163
164/*
165 * Add a trace buffer entry for arguments, for int, key.
166 */
167void
168xfs_btree_trace_argik(
169 const char *func,
170 struct xfs_btree_cur *cur,
171 int i,
172 union xfs_btree_key *key,
173 int line)
174{
175 __uint64_t l0, l1;
176
177 cur->bc_ops->trace_key(cur, key, &l0, &l1);
178 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK,
179 line, i,
180 l0 >> 32, (int)l0,
181 l1 >> 32, (int)l1,
182 0, 0, 0, 0, 0, 0);
183}
184
185/*
186 * Add a trace buffer entry for arguments, for record.
187 */
188void
189xfs_btree_trace_argr(
190 const char *func,
191 struct xfs_btree_cur *cur,
192 union xfs_btree_rec *rec,
193 int line)
194{
195 __uint64_t l0, l1, l2;
196
197 cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
198 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR,
199 line,
200 l0 >> 32, (int)l0,
201 l1 >> 32, (int)l1,
202 l2 >> 32, (int)l2,
203 0, 0, 0, 0, 0);
204}
205
206/*
207 * Add a trace buffer entry for the cursor/operation.
208 */
209void
210xfs_btree_trace_cursor(
211 const char *func,
212 struct xfs_btree_cur *cur,
213 int type,
214 int line)
215{
216 __uint32_t s0;
217 __uint64_t l0, l1;
218 char *s;
219
220 switch (type) {
221 case XBT_ARGS:
222 s = "args";
223 break;
224 case XBT_ENTRY:
225 s = "entry";
226 break;
227 case XBT_ERROR:
228 s = "error";
229 break;
230 case XBT_EXIT:
231 s = "exit";
232 break;
233 default:
234 s = "unknown";
235 break;
236 }
237
238 cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1);
239 cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line,
240 s0,
241 l0 >> 32, (int)l0,
242 l1 >> 32, (int)l1,
243 (__psunsigned_t)cur->bc_bufs[0],
244 (__psunsigned_t)cur->bc_bufs[1],
245 (__psunsigned_t)cur->bc_bufs[2],
246 (__psunsigned_t)cur->bc_bufs[3],
247 (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
248 (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
249}
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
new file mode 100644
index 00000000000..b3f5eb3c3c6
--- /dev/null
+++ b/fs/xfs/xfs_btree_trace.h
@@ -0,0 +1,116 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_BTREE_TRACE_H__
19#define __XFS_BTREE_TRACE_H__
20
21struct xfs_btree_cur;
22struct xfs_buf;
23
24
25/*
26 * Trace hooks.
27 * i,j = integer (32 bit)
28 * b = btree block buffer (xfs_buf_t)
29 * p = btree ptr
30 * r = btree record
31 * k = btree key
32 */
33
34#ifdef XFS_BTREE_TRACE
35
36/*
37 * Trace buffer entry types.
38 */
39#define XFS_BTREE_KTRACE_ARGBI 1
40#define XFS_BTREE_KTRACE_ARGBII 2
41#define XFS_BTREE_KTRACE_ARGFFFI 3
42#define XFS_BTREE_KTRACE_ARGI 4
43#define XFS_BTREE_KTRACE_ARGIPK 5
44#define XFS_BTREE_KTRACE_ARGIPR 6
45#define XFS_BTREE_KTRACE_ARGIK 7
46#define XFS_BTREE_KTRACE_ARGR 8
47#define XFS_BTREE_KTRACE_CUR 9
48
49/*
50 * Sub-types for cursor traces.
51 */
52#define XBT_ARGS 0
53#define XBT_ENTRY 1
54#define XBT_ERROR 2
55#define XBT_EXIT 3
56
57void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
58 struct xfs_buf *, int, int);
59void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
60 struct xfs_buf *, int, int, int);
61void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *,
62 xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int);
63void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
64void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
65 union xfs_btree_ptr, union xfs_btree_key *, int);
66void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int,
67 union xfs_btree_ptr, union xfs_btree_rec *, int);
68void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int,
69 union xfs_btree_key *, int);
70void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
71 union xfs_btree_rec *, int);
72void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
73
74
75#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */
76extern ktrace_t *xfs_allocbt_trace_buf;
77
78#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */
79extern ktrace_t *xfs_inobt_trace_buf;
80
81#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
82#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
83extern ktrace_t *xfs_bmbt_trace_buf;
84
85
86#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
87 xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
88#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
89 xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
90#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \
91 xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
92#define XFS_BTREE_TRACE_ARGI(c, i) \
93 xfs_btree_trace_argi(__func__, c, i, __LINE__)
94#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
95 xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__)
96#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \
97 xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__)
98#define XFS_BTREE_TRACE_ARGIK(c, i, k) \
99 xfs_btree_trace_argik(__func__, c, i, k, __LINE__)
100#define XFS_BTREE_TRACE_ARGR(c, r) \
101 xfs_btree_trace_argr(__func__, c, r, __LINE__)
102#define XFS_BTREE_TRACE_CURSOR(c, t) \
103 xfs_btree_trace_cursor(__func__, c, t, __LINE__)
104#else
105#define XFS_BTREE_TRACE_ARGBI(c, b, i)
106#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
107#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j)
108#define XFS_BTREE_TRACE_ARGI(c, i)
109#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
110#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
111#define XFS_BTREE_TRACE_ARGIK(c, i, k)
112#define XFS_BTREE_TRACE_ARGR(c, r)
113#define XFS_BTREE_TRACE_CURSOR(c, t)
114#endif /* XFS_BTREE_TRACE */
115
116#endif /* __XFS_BTREE_TRACE_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 002fc2617c8..92af4098c7e 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -375,7 +375,7 @@ xfs_buf_item_unpin(
375 xfs_buf_log_item_t *bip, 375 xfs_buf_log_item_t *bip,
376 int stale) 376 int stale)
377{ 377{
378 xfs_mount_t *mp; 378 struct xfs_ail *ailp;
379 xfs_buf_t *bp; 379 xfs_buf_t *bp;
380 int freed; 380 int freed;
381 381
@@ -387,7 +387,7 @@ xfs_buf_item_unpin(
387 xfs_buftrace("XFS_UNPIN", bp); 387 xfs_buftrace("XFS_UNPIN", bp);
388 388
389 freed = atomic_dec_and_test(&bip->bli_refcount); 389 freed = atomic_dec_and_test(&bip->bli_refcount);
390 mp = bip->bli_item.li_mountp; 390 ailp = bip->bli_item.li_ailp;
391 xfs_bunpin(bp); 391 xfs_bunpin(bp);
392 if (freed && stale) { 392 if (freed && stale) {
393 ASSERT(bip->bli_flags & XFS_BLI_STALE); 393 ASSERT(bip->bli_flags & XFS_BLI_STALE);
@@ -399,17 +399,17 @@ xfs_buf_item_unpin(
399 xfs_buftrace("XFS_UNPIN STALE", bp); 399 xfs_buftrace("XFS_UNPIN STALE", bp);
400 /* 400 /*
401 * If we get called here because of an IO error, we may 401 * If we get called here because of an IO error, we may
402 * or may not have the item on the AIL. xfs_trans_delete_ail() 402 * or may not have the item on the AIL. xfs_trans_ail_delete()
403 * will take care of that situation. 403 * will take care of that situation.
404 * xfs_trans_delete_ail() drops the AIL lock. 404 * xfs_trans_ail_delete() drops the AIL lock.
405 */ 405 */
406 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 406 if (bip->bli_flags & XFS_BLI_STALE_INODE) {
407 xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); 407 xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip);
408 XFS_BUF_SET_FSPRIVATE(bp, NULL); 408 XFS_BUF_SET_FSPRIVATE(bp, NULL);
409 XFS_BUF_CLR_IODONE_FUNC(bp); 409 XFS_BUF_CLR_IODONE_FUNC(bp);
410 } else { 410 } else {
411 spin_lock(&mp->m_ail_lock); 411 spin_lock(&ailp->xa_lock);
412 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); 412 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
413 xfs_buf_item_relse(bp); 413 xfs_buf_item_relse(bp);
414 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); 414 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
415 } 415 }
@@ -707,8 +707,8 @@ xfs_buf_item_init(
707 * the first. If we do already have one, there is 707 * the first. If we do already have one, there is
708 * nothing to do here so return. 708 * nothing to do here so return.
709 */ 709 */
710 if (XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *) != mp) 710 if (bp->b_mount != mp)
711 XFS_BUF_SET_FSPRIVATE3(bp, mp); 711 bp->b_mount = mp;
712 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); 712 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
713 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 713 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
714 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 714 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
@@ -731,6 +731,7 @@ xfs_buf_item_init(
731 bip->bli_item.li_type = XFS_LI_BUF; 731 bip->bli_item.li_type = XFS_LI_BUF;
732 bip->bli_item.li_ops = &xfs_buf_item_ops; 732 bip->bli_item.li_ops = &xfs_buf_item_ops;
733 bip->bli_item.li_mountp = mp; 733 bip->bli_item.li_mountp = mp;
734 bip->bli_item.li_ailp = mp->m_ail;
734 bip->bli_buf = bp; 735 bip->bli_buf = bp;
735 xfs_buf_hold(bp); 736 xfs_buf_hold(bp);
736 bip->bli_format.blf_type = XFS_LI_BUF; 737 bip->bli_format.blf_type = XFS_LI_BUF;
@@ -997,21 +998,7 @@ xfs_buf_iodone_callbacks(
997 xfs_buf_do_callbacks(bp, lip); 998 xfs_buf_do_callbacks(bp, lip);
998 XFS_BUF_SET_FSPRIVATE(bp, NULL); 999 XFS_BUF_SET_FSPRIVATE(bp, NULL);
999 XFS_BUF_CLR_IODONE_FUNC(bp); 1000 XFS_BUF_CLR_IODONE_FUNC(bp);
1000 1001 xfs_biodone(bp);
1001 /*
1002 * XFS_SHUT flag gets set when we go thru the
1003 * entire buffer cache and deliberately start
1004 * throwing away delayed write buffers.
1005 * Since there's no biowait done on those,
1006 * we should just brelse them.
1007 */
1008 if (XFS_BUF_ISSHUT(bp)) {
1009 XFS_BUF_UNSHUT(bp);
1010 xfs_buf_relse(bp);
1011 } else {
1012 xfs_biodone(bp);
1013 }
1014
1015 return; 1002 return;
1016 } 1003 }
1017 1004
@@ -1122,27 +1109,23 @@ xfs_buf_iodone(
1122 xfs_buf_t *bp, 1109 xfs_buf_t *bp,
1123 xfs_buf_log_item_t *bip) 1110 xfs_buf_log_item_t *bip)
1124{ 1111{
1125 struct xfs_mount *mp; 1112 struct xfs_ail *ailp = bip->bli_item.li_ailp;
1126 1113
1127 ASSERT(bip->bli_buf == bp); 1114 ASSERT(bip->bli_buf == bp);
1128 1115
1129 xfs_buf_rele(bp); 1116 xfs_buf_rele(bp);
1130 mp = bip->bli_item.li_mountp;
1131 1117
1132 /* 1118 /*
1133 * If we are forcibly shutting down, this may well be 1119 * If we are forcibly shutting down, this may well be
1134 * off the AIL already. That's because we simulate the 1120 * off the AIL already. That's because we simulate the
1135 * log-committed callbacks to unpin these buffers. Or we may never 1121 * log-committed callbacks to unpin these buffers. Or we may never
1136 * have put this item on AIL because of the transaction was 1122 * have put this item on AIL because of the transaction was
1137 * aborted forcibly. xfs_trans_delete_ail() takes care of these. 1123 * aborted forcibly. xfs_trans_ail_delete() takes care of these.
1138 * 1124 *
1139 * Either way, AIL is useless if we're forcing a shutdown. 1125 * Either way, AIL is useless if we're forcing a shutdown.
1140 */ 1126 */
1141 spin_lock(&mp->m_ail_lock); 1127 spin_lock(&ailp->xa_lock);
1142 /* 1128 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
1143 * xfs_trans_delete_ail() drops the AIL lock.
1144 */
1145 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
1146 xfs_buf_item_free(bip); 1129 xfs_buf_item_free(bip);
1147} 1130}
1148 1131
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
deleted file mode 100644
index d2ce5dd70d8..00000000000
--- a/fs/xfs/xfs_clnt.h
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_CLNT_H__
19#define __XFS_CLNT_H__
20
21/*
22 * XFS arguments structure, constructed from the arguments we
23 * are passed via the mount system call.
24 *
25 * NOTE: The mount system call is handled differently between
26 * Linux and IRIX. In IRIX we worked work with a binary data
27 * structure coming in across the syscall interface from user
28 * space (the mount userspace knows about each filesystem type
29 * and the set of valid options for it, and converts the users
30 * argument string into a binary structure _before_ making the
31 * system call), and the ABI issues that this implies.
32 *
33 * In Linux, we are passed a comma separated set of options;
34 * ie. a NULL terminated string of characters. Userspace mount
35 * code does not have any knowledge of mount options expected by
36 * each filesystem type and so each filesystem parses its mount
37 * options in kernel space.
38 *
39 * For the Linux port, we kept this structure pretty much intact
40 * and use it internally (because the existing code groks it).
41 */
42struct xfs_mount_args {
43 int flags; /* flags -> see XFSMNT_... macros below */
44 int flags2; /* flags -> see XFSMNT2_... macros below */
45 int logbufs; /* Number of log buffers, -1 to default */
46 int logbufsize; /* Size of log buffers, -1 to default */
47 char fsname[MAXNAMELEN+1]; /* data device name */
48 char rtname[MAXNAMELEN+1]; /* realtime device filename */
49 char logname[MAXNAMELEN+1]; /* journal device filename */
50 char mtpt[MAXNAMELEN+1]; /* filesystem mount point */
51 int sunit; /* stripe unit (BBs) */
52 int swidth; /* stripe width (BBs), multiple of sunit */
53 uchar_t iosizelog; /* log2 of the preferred I/O size */
54 int ihashsize; /* inode hash table size (buckets) */
55};
56
57/*
58 * XFS mount option flags -- args->flags1
59 */
60#define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */
61#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount
62 * compatible */
63#define XFSMNT_INO64 0x00000004 /* move inode numbers up
64 * past 2^32 */
65#define XFSMNT_UQUOTA 0x00000008 /* user quota accounting */
66#define XFSMNT_PQUOTA 0x00000010 /* IRIX prj quota accounting */
67#define XFSMNT_UQUOTAENF 0x00000020 /* user quota limit
68 * enforcement */
69#define XFSMNT_PQUOTAENF 0x00000040 /* IRIX project quota limit
70 * enforcement */
71#define XFSMNT_QUIET 0x00000080 /* don't report mount errors */
72#define XFSMNT_NOALIGN 0x00000200 /* don't allocate at
73 * stripe boundaries*/
74#define XFSMNT_RETERR 0x00000400 /* return error to user */
75#define XFSMNT_NORECOVERY 0x00000800 /* no recovery, implies
76 * read-only mount */
77#define XFSMNT_SHARED 0x00001000 /* shared XFS mount */
78#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */
79#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */
80 /* (osyncisdsync is default) */
81#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */
82#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32
83 * bits of address space */
84#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */
85#define XFSMNT_GQUOTAENF 0x00800000 /* group quota limit
86 * enforcement */
87#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */
88#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */
89#define XFSMNT_BARRIER 0x04000000 /* use write barriers */
90#define XFSMNT_IKEEP 0x08000000 /* inode cluster delete */
91#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width
92 * allocation */
93#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
94 * symlink,mkdir,rmdir,mknod */
95#define XFSMNT_FLAGS2 0x80000000 /* more flags set in flags2 */
96
97/*
98 * XFS mount option flags -- args->flags2
99 */
100#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred
101 * I/O size in stat(2) */
102#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams
103 * allocator */
104
105#endif /* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 8be0b00ede9..70b710c1792 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -72,27 +72,7 @@ typedef struct xfs_da_intnode {
72typedef struct xfs_da_node_hdr xfs_da_node_hdr_t; 72typedef struct xfs_da_node_hdr xfs_da_node_hdr_t;
73typedef struct xfs_da_node_entry xfs_da_node_entry_t; 73typedef struct xfs_da_node_entry xfs_da_node_entry_t;
74 74
75#define XFS_DA_MAXHASH ((xfs_dahash_t)-1) /* largest valid hash value */
76
77#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize 75#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize
78#define XFS_LBLOG(mp) (mp)->m_sb.sb_blocklog
79
80#define XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \
81 (((bno) << (mp)->m_dircook_elog) | (entry))
82#define XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \
83 (((xfs_off_t)XFS_DA_MAKE_BNOENTRY(mp, bno, entry) << 32) | (hash))
84#define XFS_DA_COOKIE_HASH(mp,cookie) ((xfs_dahash_t)cookie)
85#define XFS_DA_COOKIE_BNO(mp,cookie) \
86 ((((xfs_off_t)(cookie) >> 31) == -1LL ? \
87 (xfs_dablk_t)0 : \
88 (xfs_dablk_t)((xfs_off_t)(cookie) >> \
89 ((mp)->m_dircook_elog + 32))))
90#define XFS_DA_COOKIE_ENTRY(mp,cookie) \
91 ((((xfs_off_t)(cookie) >> 31) == -1LL ? \
92 (xfs_dablk_t)0 : \
93 (xfs_dablk_t)(((xfs_off_t)(cookie) >> 32) & \
94 ((1 << (mp)->m_dircook_elog) - 1))))
95
96 76
97/*======================================================================== 77/*========================================================================
98 * Btree searching and modification structure definitions. 78 * Btree searching and modification structure definitions.
@@ -226,9 +206,8 @@ struct xfs_nameops {
226}; 206};
227 207
228 208
229#ifdef __KERNEL__
230/*======================================================================== 209/*========================================================================
231 * Function prototypes for the kernel. 210 * Function prototypes.
232 *========================================================================*/ 211 *========================================================================*/
233 212
234/* 213/*
@@ -289,6 +268,5 @@ xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
289 268
290extern struct kmem_zone *xfs_da_state_zone; 269extern struct kmem_zone *xfs_da_state_zone;
291extern struct kmem_zone *xfs_dabuf_zone; 270extern struct kmem_zone *xfs_dabuf_zone;
292#endif /* __KERNEL__ */
293 271
294#endif /* __XFS_DA_BTREE_H__ */ 272#endif /* __XFS_DA_BTREE_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 75b0cd4da0e..b4c1ee71349 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -49,9 +49,8 @@
49 */ 49 */
50int 50int
51xfs_swapext( 51xfs_swapext(
52 xfs_swapext_t __user *sxu) 52 xfs_swapext_t *sxp)
53{ 53{
54 xfs_swapext_t *sxp;
55 xfs_inode_t *ip, *tip; 54 xfs_inode_t *ip, *tip;
56 struct file *file, *target_file; 55 struct file *file, *target_file;
57 int error = 0; 56 int error = 0;
@@ -62,11 +61,6 @@ xfs_swapext(
62 goto out; 61 goto out;
63 } 62 }
64 63
65 if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) {
66 error = XFS_ERROR(EFAULT);
67 goto out_free_sxp;
68 }
69
70 /* Pull information for the target fd */ 64 /* Pull information for the target fd */
71 file = fget((int)sxp->sx_fdtarget); 65 file = fget((int)sxp->sx_fdtarget);
72 if (!file) { 66 if (!file) {
diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h
index da178205be6..4f55a630655 100644
--- a/fs/xfs/xfs_dfrag.h
+++ b/fs/xfs/xfs_dfrag.h
@@ -46,7 +46,7 @@ typedef struct xfs_swapext
46/* 46/*
47 * Syscall interface for xfs_swapext 47 * Syscall interface for xfs_swapext
48 */ 48 */
49int xfs_swapext(struct xfs_swapext __user *sx); 49int xfs_swapext(struct xfs_swapext *sx);
50 50
51int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, 51int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
52 struct xfs_swapext *sxp); 52 struct xfs_swapext *sxp);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index c9065eaf2a4..162e8726df5 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -18,32 +18,29 @@
18#ifndef __XFS_DINODE_H__ 18#ifndef __XFS_DINODE_H__
19#define __XFS_DINODE_H__ 19#define __XFS_DINODE_H__
20 20
21struct xfs_buf; 21#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
22struct xfs_mount; 22#define XFS_DINODE_GOOD_VERSION(v) (((v) == 1 || (v) == 2))
23 23
24#define XFS_DINODE_VERSION_1 1
25#define XFS_DINODE_VERSION_2 2
26#define XFS_DINODE_GOOD_VERSION(v) \
27 (((v) == XFS_DINODE_VERSION_1 || (v) == XFS_DINODE_VERSION_2))
28#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
29
30/*
31 * Disk inode structure.
32 * This is just the header; the inode is expanded to fill a variable size
33 * with the last field expanding. It is split into the core and "other"
34 * because we only need the core part in the in-core inode.
35 */
36typedef struct xfs_timestamp { 24typedef struct xfs_timestamp {
37 __be32 t_sec; /* timestamp seconds */ 25 __be32 t_sec; /* timestamp seconds */
38 __be32 t_nsec; /* timestamp nanoseconds */ 26 __be32 t_nsec; /* timestamp nanoseconds */
39} xfs_timestamp_t; 27} xfs_timestamp_t;
40 28
41/* 29/*
42 * Note: Coordinate changes to this structure with the XFS_DI_* #defines 30 * On-disk inode structure.
43 * below, the offsets table in xfs_ialloc_log_di() and struct xfs_icdinode 31 *
44 * in xfs_inode.h. 32 * This is just the header or "dinode core", the inode is expanded to fill a
33 * variable size the leftover area split into a data and an attribute fork.
34 * The format of the data and attribute fork depends on the format of the
35 * inode as indicated by di_format and di_aformat. To access the data and
36 * attribute use the XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR macros
37 * below.
38 *
39 * There is a very similar struct icdinode in xfs_inode which matches the
40 * layout of the first 96 bytes of this structure, but is kept in native
41 * format instead of big endian.
45 */ 42 */
46typedef struct xfs_dinode_core { 43typedef struct xfs_dinode {
47 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ 44 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
48 __be16 di_mode; /* mode and type of file */ 45 __be16 di_mode; /* mode and type of file */
49 __u8 di_version; /* inode version */ 46 __u8 di_version; /* inode version */
@@ -69,34 +66,12 @@ typedef struct xfs_dinode_core {
69 __be16 di_dmstate; /* DMIG state info */ 66 __be16 di_dmstate; /* DMIG state info */
70 __be16 di_flags; /* random flags, XFS_DIFLAG_... */ 67 __be16 di_flags; /* random flags, XFS_DIFLAG_... */
71 __be32 di_gen; /* generation number */ 68 __be32 di_gen; /* generation number */
72} xfs_dinode_core_t;
73 69
74#define DI_MAX_FLUSH 0xffff 70 /* di_next_unlinked is the only non-core field in the old dinode */
71 __be32 di_next_unlinked;/* agi unlinked list ptr */
72} __attribute__((packed)) xfs_dinode_t;
75 73
76typedef struct xfs_dinode 74#define DI_MAX_FLUSH 0xffff
77{
78 xfs_dinode_core_t di_core;
79 /*
80 * In adding anything between the core and the union, be
81 * sure to update the macros like XFS_LITINO below and
82 * XFS_BMAP_RBLOCK_DSIZE in xfs_bmap_btree.h.
83 */
84 __be32 di_next_unlinked;/* agi unlinked list ptr */
85 union {
86 xfs_bmdr_block_t di_bmbt; /* btree root block */
87 xfs_bmbt_rec_32_t di_bmx[1]; /* extent list */
88 xfs_dir2_sf_t di_dir2sf; /* shortform directory v2 */
89 char di_c[1]; /* local contents */
90 __be32 di_dev; /* device for S_IFCHR/S_IFBLK */
91 uuid_t di_muuid; /* mount point value */
92 char di_symlink[1]; /* local symbolic link */
93 } di_u;
94 union {
95 xfs_bmdr_block_t di_abmbt; /* btree root block */
96 xfs_bmbt_rec_32_t di_abmx[1]; /* extent list */
97 xfs_attr_shortform_t di_attrsf; /* shortform attribute list */
98 } di_a;
99} xfs_dinode_t;
100 75
101/* 76/*
102 * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. 77 * The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
@@ -107,50 +82,14 @@ typedef struct xfs_dinode
107#define XFS_MAXLINK_1 65535U 82#define XFS_MAXLINK_1 65535U
108 83
109/* 84/*
110 * Bit names for logging disk inodes only
111 */
112#define XFS_DI_MAGIC 0x0000001
113#define XFS_DI_MODE 0x0000002
114#define XFS_DI_VERSION 0x0000004
115#define XFS_DI_FORMAT 0x0000008
116#define XFS_DI_ONLINK 0x0000010
117#define XFS_DI_UID 0x0000020
118#define XFS_DI_GID 0x0000040
119#define XFS_DI_NLINK 0x0000080
120#define XFS_DI_PROJID 0x0000100
121#define XFS_DI_PAD 0x0000200
122#define XFS_DI_ATIME 0x0000400
123#define XFS_DI_MTIME 0x0000800
124#define XFS_DI_CTIME 0x0001000
125#define XFS_DI_SIZE 0x0002000
126#define XFS_DI_NBLOCKS 0x0004000
127#define XFS_DI_EXTSIZE 0x0008000
128#define XFS_DI_NEXTENTS 0x0010000
129#define XFS_DI_NAEXTENTS 0x0020000
130#define XFS_DI_FORKOFF 0x0040000
131#define XFS_DI_AFORMAT 0x0080000
132#define XFS_DI_DMEVMASK 0x0100000
133#define XFS_DI_DMSTATE 0x0200000
134#define XFS_DI_FLAGS 0x0400000
135#define XFS_DI_GEN 0x0800000
136#define XFS_DI_NEXT_UNLINKED 0x1000000
137#define XFS_DI_U 0x2000000
138#define XFS_DI_A 0x4000000
139#define XFS_DI_NUM_BITS 27
140#define XFS_DI_ALL_BITS ((1 << XFS_DI_NUM_BITS) - 1)
141#define XFS_DI_CORE_BITS (XFS_DI_ALL_BITS & ~(XFS_DI_U|XFS_DI_A))
142
143/*
144 * Values for di_format 85 * Values for di_format
145 */ 86 */
146typedef enum xfs_dinode_fmt 87typedef enum xfs_dinode_fmt {
147{ 88 XFS_DINODE_FMT_DEV, /* xfs_dev_t */
148 XFS_DINODE_FMT_DEV, /* CHR, BLK: di_dev */ 89 XFS_DINODE_FMT_LOCAL, /* bulk data */
149 XFS_DINODE_FMT_LOCAL, /* DIR, REG: di_c */ 90 XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */
150 /* LNK: di_symlink */ 91 XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */
151 XFS_DINODE_FMT_EXTENTS, /* DIR, REG, LNK: di_bmx */ 92 XFS_DINODE_FMT_UUID /* uuid_t */
152 XFS_DINODE_FMT_BTREE, /* DIR, REG, LNK: di_bmbt */
153 XFS_DINODE_FMT_UUID /* MNT: di_uuid */
154} xfs_dinode_fmt_t; 93} xfs_dinode_fmt_t;
155 94
156/* 95/*
@@ -166,13 +105,13 @@ typedef enum xfs_dinode_fmt
166 */ 105 */
167#define XFS_LITINO(mp) ((mp)->m_litino) 106#define XFS_LITINO(mp) ((mp)->m_litino)
168#define XFS_BROOT_SIZE_ADJ \ 107#define XFS_BROOT_SIZE_ADJ \
169 (sizeof(xfs_bmbt_block_t) - sizeof(xfs_bmdr_block_t)) 108 (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t))
170 109
171/* 110/*
172 * Inode data & attribute fork sizes, per inode. 111 * Inode data & attribute fork sizes, per inode.
173 */ 112 */
174#define XFS_DFORK_Q(dip) ((dip)->di_core.di_forkoff != 0) 113#define XFS_DFORK_Q(dip) ((dip)->di_forkoff != 0)
175#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_core.di_forkoff << 3)) 114#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3))
176 115
177#define XFS_DFORK_DSIZE(dip,mp) \ 116#define XFS_DFORK_DSIZE(dip,mp) \
178 (XFS_DFORK_Q(dip) ? \ 117 (XFS_DFORK_Q(dip) ? \
@@ -187,23 +126,42 @@ typedef enum xfs_dinode_fmt
187 XFS_DFORK_DSIZE(dip, mp) : \ 126 XFS_DFORK_DSIZE(dip, mp) : \
188 XFS_DFORK_ASIZE(dip, mp)) 127 XFS_DFORK_ASIZE(dip, mp))
189 128
190#define XFS_DFORK_DPTR(dip) ((dip)->di_u.di_c) 129/*
130 * Return pointers to the data or attribute forks.
131 */
132#define XFS_DFORK_DPTR(dip) \
133 ((char *)(dip) + sizeof(struct xfs_dinode))
191#define XFS_DFORK_APTR(dip) \ 134#define XFS_DFORK_APTR(dip) \
192 ((dip)->di_u.di_c + XFS_DFORK_BOFF(dip)) 135 (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip))
193#define XFS_DFORK_PTR(dip,w) \ 136#define XFS_DFORK_PTR(dip,w) \
194 ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip)) 137 ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip))
138
195#define XFS_DFORK_FORMAT(dip,w) \ 139#define XFS_DFORK_FORMAT(dip,w) \
196 ((w) == XFS_DATA_FORK ? \ 140 ((w) == XFS_DATA_FORK ? \
197 (dip)->di_core.di_format : \ 141 (dip)->di_format : \
198 (dip)->di_core.di_aformat) 142 (dip)->di_aformat)
199#define XFS_DFORK_NEXTENTS(dip,w) \ 143#define XFS_DFORK_NEXTENTS(dip,w) \
200 ((w) == XFS_DATA_FORK ? \ 144 ((w) == XFS_DATA_FORK ? \
201 be32_to_cpu((dip)->di_core.di_nextents) : \ 145 be32_to_cpu((dip)->di_nextents) : \
202 be16_to_cpu((dip)->di_core.di_anextents)) 146 be16_to_cpu((dip)->di_anextents))
203 147
204#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) 148#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp))
205 149
206/* 150/*
151 * For block and character special files the 32bit dev_t is stored at the
152 * beginning of the data fork.
153 */
154static inline xfs_dev_t xfs_dinode_get_rdev(struct xfs_dinode *dip)
155{
156 return be32_to_cpu(*(__be32 *)XFS_DFORK_DPTR(dip));
157}
158
159static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
160{
161 *(__be32 *)XFS_DFORK_DPTR(dip) = cpu_to_be32(rdev);
162}
163
164/*
207 * Values for di_flags 165 * Values for di_flags
208 * There should be a one-to-one correspondence between these flags and the 166 * There should be a one-to-one correspondence between these flags and the
209 * XFS_XFLAG_s. 167 * XFS_XFLAG_s.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index deecc9d238f..6ac44b550d3 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -34,13 +34,6 @@ struct xfs_mount;
34struct xfs_trans; 34struct xfs_trans;
35 35
36/* 36/*
37 * Maximum size of a shortform directory.
38 */
39#define XFS_DIR2_SF_MAX_SIZE \
40 (XFS_DINODE_MAX_SIZE - (uint)sizeof(xfs_dinode_core_t) - \
41 (uint)sizeof(xfs_agino_t))
42
43/*
44 * Inode number stored as 8 8-bit values. 37 * Inode number stored as 8 8-bit values.
45 */ 38 */
46typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; 39typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
index a1e55fb9d5d..e71e2581c0c 100644
--- a/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@ -25,7 +25,6 @@
25#include "xfs_inum.h" 25#include "xfs_inum.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_mount.h" 27#include "xfs_mount.h"
28#include "xfs_clnt.h"
29 28
30 29
31static struct xfs_dmops xfs_dmcore_stub = { 30static struct xfs_dmops xfs_dmcore_stub = {
@@ -38,9 +37,9 @@ static struct xfs_dmops xfs_dmcore_stub = {
38}; 37};
39 38
40int 39int
41xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) 40xfs_dmops_get(struct xfs_mount *mp)
42{ 41{
43 if (args->flags & XFSMNT_DMAPI) { 42 if (mp->m_flags & XFS_MOUNT_DMAPI) {
44 cmn_err(CE_WARN, 43 cmn_err(CE_WARN,
45 "XFS: dmapi support not available in this kernel."); 44 "XFS: dmapi support not available in this kernel.");
46 return EINVAL; 45 return EINVAL;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index f227ecd1a29..92d5cd5bf4f 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -153,21 +153,6 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
153} 153}
154#endif /* DEBUG */ 154#endif /* DEBUG */
155 155
156static void
157xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
158{
159 if (mp != NULL) {
160 char *newfmt;
161 int len = 16 + mp->m_fsname_len + strlen(fmt);
162
163 newfmt = kmem_alloc(len, KM_SLEEP);
164 sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt);
165 icmn_err(level, newfmt, ap);
166 kmem_free(newfmt);
167 } else {
168 icmn_err(level, fmt, ap);
169 }
170}
171 156
172void 157void
173xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...) 158xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...)
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 11543f10b0c..0c93051c465 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -159,11 +159,15 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
159#define XFS_PTAG_FSBLOCK_ZERO 0x00000080 159#define XFS_PTAG_FSBLOCK_ZERO 0x00000080
160 160
161struct xfs_mount; 161struct xfs_mount;
162/* PRINTFLIKE4 */ 162
163extern void xfs_fs_vcmn_err(int level, struct xfs_mount *mp,
164 char *fmt, va_list ap)
165 __attribute__ ((format (printf, 3, 0)));
163extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, 166extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
164 char *fmt, ...); 167 char *fmt, ...)
165/* PRINTFLIKE3 */ 168 __attribute__ ((format (printf, 4, 5)));
166extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); 169extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...)
170 __attribute__ ((format (printf, 3, 4)));
167 171
168extern void xfs_hex_dump(void *p, int length); 172extern void xfs_hex_dump(void *p, int length);
169 173
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 8aa28f751b2..05a4bdd4be3 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -108,19 +108,16 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip)
108STATIC void 108STATIC void
109xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) 109xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
110{ 110{
111 xfs_mount_t *mp; 111 struct xfs_ail *ailp = efip->efi_item.li_ailp;
112 112
113 mp = efip->efi_item.li_mountp; 113 spin_lock(&ailp->xa_lock);
114 spin_lock(&mp->m_ail_lock);
115 if (efip->efi_flags & XFS_EFI_CANCELED) { 114 if (efip->efi_flags & XFS_EFI_CANCELED) {
116 /* 115 /* xfs_trans_ail_delete() drops the AIL lock. */
117 * xfs_trans_delete_ail() drops the AIL lock. 116 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
118 */
119 xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
120 xfs_efi_item_free(efip); 117 xfs_efi_item_free(efip);
121 } else { 118 } else {
122 efip->efi_flags |= XFS_EFI_COMMITTED; 119 efip->efi_flags |= XFS_EFI_COMMITTED;
123 spin_unlock(&mp->m_ail_lock); 120 spin_unlock(&ailp->xa_lock);
124 } 121 }
125} 122}
126 123
@@ -134,26 +131,23 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
134STATIC void 131STATIC void
135xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) 132xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
136{ 133{
137 xfs_mount_t *mp; 134 struct xfs_ail *ailp = efip->efi_item.li_ailp;
138 xfs_log_item_desc_t *lidp; 135 xfs_log_item_desc_t *lidp;
139 136
140 mp = efip->efi_item.li_mountp; 137 spin_lock(&ailp->xa_lock);
141 spin_lock(&mp->m_ail_lock);
142 if (efip->efi_flags & XFS_EFI_CANCELED) { 138 if (efip->efi_flags & XFS_EFI_CANCELED) {
143 /* 139 /*
144 * free the xaction descriptor pointing to this item 140 * free the xaction descriptor pointing to this item
145 */ 141 */
146 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip); 142 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip);
147 xfs_trans_free_item(tp, lidp); 143 xfs_trans_free_item(tp, lidp);
148 /* 144
149 * pull the item off the AIL. 145 /* xfs_trans_ail_delete() drops the AIL lock. */
150 * xfs_trans_delete_ail() drops the AIL lock. 146 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
151 */
152 xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
153 xfs_efi_item_free(efip); 147 xfs_efi_item_free(efip);
154 } else { 148 } else {
155 efip->efi_flags |= XFS_EFI_COMMITTED; 149 efip->efi_flags |= XFS_EFI_COMMITTED;
156 spin_unlock(&mp->m_ail_lock); 150 spin_unlock(&ailp->xa_lock);
157 } 151 }
158} 152}
159 153
@@ -268,6 +262,7 @@ xfs_efi_init(xfs_mount_t *mp,
268 efip->efi_item.li_type = XFS_LI_EFI; 262 efip->efi_item.li_type = XFS_LI_EFI;
269 efip->efi_item.li_ops = &xfs_efi_item_ops; 263 efip->efi_item.li_ops = &xfs_efi_item_ops;
270 efip->efi_item.li_mountp = mp; 264 efip->efi_item.li_mountp = mp;
265 efip->efi_item.li_ailp = mp->m_ail;
271 efip->efi_format.efi_nextents = nextents; 266 efip->efi_format.efi_nextents = nextents;
272 efip->efi_format.efi_id = (__psint_t)(void*)efip; 267 efip->efi_format.efi_id = (__psint_t)(void*)efip;
273 268
@@ -345,25 +340,22 @@ void
345xfs_efi_release(xfs_efi_log_item_t *efip, 340xfs_efi_release(xfs_efi_log_item_t *efip,
346 uint nextents) 341 uint nextents)
347{ 342{
348 xfs_mount_t *mp; 343 struct xfs_ail *ailp = efip->efi_item.li_ailp;
349 int extents_left; 344 int extents_left;
350 345
351 mp = efip->efi_item.li_mountp;
352 ASSERT(efip->efi_next_extent > 0); 346 ASSERT(efip->efi_next_extent > 0);
353 ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); 347 ASSERT(efip->efi_flags & XFS_EFI_COMMITTED);
354 348
355 spin_lock(&mp->m_ail_lock); 349 spin_lock(&ailp->xa_lock);
356 ASSERT(efip->efi_next_extent >= nextents); 350 ASSERT(efip->efi_next_extent >= nextents);
357 efip->efi_next_extent -= nextents; 351 efip->efi_next_extent -= nextents;
358 extents_left = efip->efi_next_extent; 352 extents_left = efip->efi_next_extent;
359 if (extents_left == 0) { 353 if (extents_left == 0) {
360 /* 354 /* xfs_trans_ail_delete() drops the AIL lock. */
361 * xfs_trans_delete_ail() drops the AIL lock. 355 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
362 */
363 xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
364 xfs_efi_item_free(efip); 356 xfs_efi_item_free(efip);
365 } else { 357 } else {
366 spin_unlock(&mp->m_ail_lock); 358 spin_unlock(&ailp->xa_lock);
367 } 359 }
368} 360}
369 361
@@ -565,6 +557,7 @@ xfs_efd_init(xfs_mount_t *mp,
565 efdp->efd_item.li_type = XFS_LI_EFD; 557 efdp->efd_item.li_type = XFS_LI_EFD;
566 efdp->efd_item.li_ops = &xfs_efd_item_ops; 558 efdp->efd_item.li_ops = &xfs_efd_item_ops;
567 efdp->efd_item.li_mountp = mp; 559 efdp->efd_item.li_mountp = mp;
560 efdp->efd_item.li_ailp = mp->m_ail;
568 efdp->efd_efip = efip; 561 efdp->efd_efip = efip;
569 efdp->efd_format.efd_nextents = nextents; 562 efdp->efd_format.efd_nextents = nextents;
570 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 563 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 01c0cc88d3f..589c41c3844 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -113,22 +113,14 @@ struct getbmapx {
113#define BMV_IF_ATTRFORK 0x1 /* return attr fork rather than data */ 113#define BMV_IF_ATTRFORK 0x1 /* return attr fork rather than data */
114#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ 114#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */
115#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ 115#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */
116#define BMV_IF_VALID (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC) 116#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */
117#ifdef __KERNEL__ 117#define BMV_IF_VALID \
118#define BMV_IF_EXTENDED 0x40000000 /* getpmapx if set */ 118 (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC)
119#endif
120 119
121/* bmv_oflags values - returned for for each non-header segment */ 120/* bmv_oflags values - returned for for each non-header segment */
122#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ 121#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
123 122#define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */
124/* Convert getbmap <-> getbmapx - move fields from p1 to p2. */ 123#define BMV_OF_LAST 0x4 /* segment is the last in the file */
125#define GETBMAP_CONVERT(p1,p2) { \
126 p2.bmv_offset = p1.bmv_offset; \
127 p2.bmv_block = p1.bmv_block; \
128 p2.bmv_length = p1.bmv_length; \
129 p2.bmv_count = p1.bmv_count; \
130 p2.bmv_entries = p1.bmv_entries; }
131
132 124
133/* 125/*
134 * Structure for XFS_IOC_FSSETDM. 126 * Structure for XFS_IOC_FSSETDM.
@@ -426,10 +418,6 @@ typedef struct xfs_handle {
426#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS 418#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS
427#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS 419#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS
428#define XFS_IOC_GETVERSION FS_IOC_GETVERSION 420#define XFS_IOC_GETVERSION FS_IOC_GETVERSION
429/* 32-bit compat counterparts */
430#define XFS_IOC32_GETXFLAGS FS_IOC32_GETFLAGS
431#define XFS_IOC32_SETXFLAGS FS_IOC32_SETFLAGS
432#define XFS_IOC32_GETVERSION FS_IOC32_GETVERSION
433 421
434/* 422/*
435 * ioctl commands that replace IRIX fcntl()'s 423 * ioctl commands that replace IRIX fcntl()'s
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 84583cf73db..852b6d32e8d 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -126,7 +126,7 @@ xfs_growfs_data_private(
126 xfs_extlen_t agsize; 126 xfs_extlen_t agsize;
127 xfs_extlen_t tmpsize; 127 xfs_extlen_t tmpsize;
128 xfs_alloc_rec_t *arec; 128 xfs_alloc_rec_t *arec;
129 xfs_btree_sblock_t *block; 129 struct xfs_btree_block *block;
130 xfs_buf_t *bp; 130 xfs_buf_t *bp;
131 int bucket; 131 int bucket;
132 int dpct; 132 int dpct;
@@ -251,14 +251,14 @@ xfs_growfs_data_private(
251 bp = xfs_buf_get(mp->m_ddev_targp, 251 bp = xfs_buf_get(mp->m_ddev_targp,
252 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), 252 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
253 BTOBB(mp->m_sb.sb_blocksize), 0); 253 BTOBB(mp->m_sb.sb_blocksize), 0);
254 block = XFS_BUF_TO_SBLOCK(bp); 254 block = XFS_BUF_TO_BLOCK(bp);
255 memset(block, 0, mp->m_sb.sb_blocksize); 255 memset(block, 0, mp->m_sb.sb_blocksize);
256 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); 256 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
257 block->bb_level = 0; 257 block->bb_level = 0;
258 block->bb_numrecs = cpu_to_be16(1); 258 block->bb_numrecs = cpu_to_be16(1);
259 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 259 block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
260 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 260 block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
261 arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1); 261 arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
262 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 262 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
263 arec->ar_blockcount = cpu_to_be32( 263 arec->ar_blockcount = cpu_to_be32(
264 agsize - be32_to_cpu(arec->ar_startblock)); 264 agsize - be32_to_cpu(arec->ar_startblock));
@@ -272,14 +272,14 @@ xfs_growfs_data_private(
272 bp = xfs_buf_get(mp->m_ddev_targp, 272 bp = xfs_buf_get(mp->m_ddev_targp,
273 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), 273 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
274 BTOBB(mp->m_sb.sb_blocksize), 0); 274 BTOBB(mp->m_sb.sb_blocksize), 0);
275 block = XFS_BUF_TO_SBLOCK(bp); 275 block = XFS_BUF_TO_BLOCK(bp);
276 memset(block, 0, mp->m_sb.sb_blocksize); 276 memset(block, 0, mp->m_sb.sb_blocksize);
277 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); 277 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
278 block->bb_level = 0; 278 block->bb_level = 0;
279 block->bb_numrecs = cpu_to_be16(1); 279 block->bb_numrecs = cpu_to_be16(1);
280 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 280 block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
281 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 281 block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
282 arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1); 282 arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
283 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 283 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
284 arec->ar_blockcount = cpu_to_be32( 284 arec->ar_blockcount = cpu_to_be32(
285 agsize - be32_to_cpu(arec->ar_startblock)); 285 agsize - be32_to_cpu(arec->ar_startblock));
@@ -294,13 +294,13 @@ xfs_growfs_data_private(
294 bp = xfs_buf_get(mp->m_ddev_targp, 294 bp = xfs_buf_get(mp->m_ddev_targp,
295 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), 295 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
296 BTOBB(mp->m_sb.sb_blocksize), 0); 296 BTOBB(mp->m_sb.sb_blocksize), 0);
297 block = XFS_BUF_TO_SBLOCK(bp); 297 block = XFS_BUF_TO_BLOCK(bp);
298 memset(block, 0, mp->m_sb.sb_blocksize); 298 memset(block, 0, mp->m_sb.sb_blocksize);
299 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); 299 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
300 block->bb_level = 0; 300 block->bb_level = 0;
301 block->bb_numrecs = 0; 301 block->bb_numrecs = 0;
302 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 302 block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
303 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 303 block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
304 error = xfs_bwrite(mp, bp); 304 error = xfs_bwrite(mp, bp);
305 if (error) { 305 if (error) {
306 goto error0; 306 goto error0;
@@ -435,6 +435,9 @@ xfs_growfs_data(
435 xfs_growfs_data_t *in) 435 xfs_growfs_data_t *in)
436{ 436{
437 int error; 437 int error;
438
439 if (!capable(CAP_SYS_ADMIN))
440 return XFS_ERROR(EPERM);
438 if (!mutex_trylock(&mp->m_growlock)) 441 if (!mutex_trylock(&mp->m_growlock))
439 return XFS_ERROR(EWOULDBLOCK); 442 return XFS_ERROR(EWOULDBLOCK);
440 error = xfs_growfs_data_private(mp, in); 443 error = xfs_growfs_data_private(mp, in);
@@ -448,6 +451,9 @@ xfs_growfs_log(
448 xfs_growfs_log_t *in) 451 xfs_growfs_log_t *in)
449{ 452{
450 int error; 453 int error;
454
455 if (!capable(CAP_SYS_ADMIN))
456 return XFS_ERROR(EPERM);
451 if (!mutex_trylock(&mp->m_growlock)) 457 if (!mutex_trylock(&mp->m_growlock))
452 return XFS_ERROR(EWOULDBLOCK); 458 return XFS_ERROR(EWOULDBLOCK);
453 error = xfs_growfs_log_private(mp, in); 459 error = xfs_growfs_log_private(mp, in);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index aad8c5da38a..e6ebbaeb4dc 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -41,68 +41,6 @@
41#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_bmap.h" 42#include "xfs_bmap.h"
43 43
44/*
45 * Log specified fields for the inode given by bp and off.
46 */
47STATIC void
48xfs_ialloc_log_di(
49 xfs_trans_t *tp, /* transaction pointer */
50 xfs_buf_t *bp, /* inode buffer */
51 int off, /* index of inode in buffer */
52 int fields) /* bitmask of fields to log */
53{
54 int first; /* first byte number */
55 int ioffset; /* off in bytes */
56 int last; /* last byte number */
57 xfs_mount_t *mp; /* mount point structure */
58 static const short offsets[] = { /* field offsets */
59 /* keep in sync with bits */
60 offsetof(xfs_dinode_core_t, di_magic),
61 offsetof(xfs_dinode_core_t, di_mode),
62 offsetof(xfs_dinode_core_t, di_version),
63 offsetof(xfs_dinode_core_t, di_format),
64 offsetof(xfs_dinode_core_t, di_onlink),
65 offsetof(xfs_dinode_core_t, di_uid),
66 offsetof(xfs_dinode_core_t, di_gid),
67 offsetof(xfs_dinode_core_t, di_nlink),
68 offsetof(xfs_dinode_core_t, di_projid),
69 offsetof(xfs_dinode_core_t, di_pad),
70 offsetof(xfs_dinode_core_t, di_atime),
71 offsetof(xfs_dinode_core_t, di_mtime),
72 offsetof(xfs_dinode_core_t, di_ctime),
73 offsetof(xfs_dinode_core_t, di_size),
74 offsetof(xfs_dinode_core_t, di_nblocks),
75 offsetof(xfs_dinode_core_t, di_extsize),
76 offsetof(xfs_dinode_core_t, di_nextents),
77 offsetof(xfs_dinode_core_t, di_anextents),
78 offsetof(xfs_dinode_core_t, di_forkoff),
79 offsetof(xfs_dinode_core_t, di_aformat),
80 offsetof(xfs_dinode_core_t, di_dmevmask),
81 offsetof(xfs_dinode_core_t, di_dmstate),
82 offsetof(xfs_dinode_core_t, di_flags),
83 offsetof(xfs_dinode_core_t, di_gen),
84 offsetof(xfs_dinode_t, di_next_unlinked),
85 offsetof(xfs_dinode_t, di_u),
86 offsetof(xfs_dinode_t, di_a),
87 sizeof(xfs_dinode_t)
88 };
89
90
91 ASSERT(offsetof(xfs_dinode_t, di_core) == 0);
92 ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0);
93 mp = tp->t_mountp;
94 /*
95 * Get the inode-relative first and last bytes for these fields
96 */
97 xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last);
98 /*
99 * Convert to buffer offsets and log it.
100 */
101 ioffset = off << mp->m_sb.sb_inodelog;
102 first += ioffset;
103 last += ioffset;
104 xfs_trans_log_buf(tp, bp, first, last);
105}
106 44
107/* 45/*
108 * Allocation group level functions. 46 * Allocation group level functions.
@@ -119,6 +57,102 @@ xfs_ialloc_cluster_alignment(
119} 57}
120 58
121/* 59/*
60 * Lookup the record equal to ino in the btree given by cur.
61 */
62STATIC int /* error */
63xfs_inobt_lookup_eq(
64 struct xfs_btree_cur *cur, /* btree cursor */
65 xfs_agino_t ino, /* starting inode of chunk */
66 __int32_t fcnt, /* free inode count */
67 xfs_inofree_t free, /* free inode mask */
68 int *stat) /* success/failure */
69{
70 cur->bc_rec.i.ir_startino = ino;
71 cur->bc_rec.i.ir_freecount = fcnt;
72 cur->bc_rec.i.ir_free = free;
73 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
74}
75
76/*
77 * Lookup the first record greater than or equal to ino
78 * in the btree given by cur.
79 */
80int /* error */
81xfs_inobt_lookup_ge(
82 struct xfs_btree_cur *cur, /* btree cursor */
83 xfs_agino_t ino, /* starting inode of chunk */
84 __int32_t fcnt, /* free inode count */
85 xfs_inofree_t free, /* free inode mask */
86 int *stat) /* success/failure */
87{
88 cur->bc_rec.i.ir_startino = ino;
89 cur->bc_rec.i.ir_freecount = fcnt;
90 cur->bc_rec.i.ir_free = free;
91 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
92}
93
94/*
95 * Lookup the first record less than or equal to ino
96 * in the btree given by cur.
97 */
98int /* error */
99xfs_inobt_lookup_le(
100 struct xfs_btree_cur *cur, /* btree cursor */
101 xfs_agino_t ino, /* starting inode of chunk */
102 __int32_t fcnt, /* free inode count */
103 xfs_inofree_t free, /* free inode mask */
104 int *stat) /* success/failure */
105{
106 cur->bc_rec.i.ir_startino = ino;
107 cur->bc_rec.i.ir_freecount = fcnt;
108 cur->bc_rec.i.ir_free = free;
109 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
110}
111
112/*
113 * Update the record referred to by cur to the value given
114 * by [ino, fcnt, free].
115 * This either works (return 0) or gets an EFSCORRUPTED error.
116 */
117STATIC int /* error */
118xfs_inobt_update(
119 struct xfs_btree_cur *cur, /* btree cursor */
120 xfs_agino_t ino, /* starting inode of chunk */
121 __int32_t fcnt, /* free inode count */
122 xfs_inofree_t free) /* free inode mask */
123{
124 union xfs_btree_rec rec;
125
126 rec.inobt.ir_startino = cpu_to_be32(ino);
127 rec.inobt.ir_freecount = cpu_to_be32(fcnt);
128 rec.inobt.ir_free = cpu_to_be64(free);
129 return xfs_btree_update(cur, &rec);
130}
131
132/*
133 * Get the data from the pointed-to record.
134 */
135int /* error */
136xfs_inobt_get_rec(
137 struct xfs_btree_cur *cur, /* btree cursor */
138 xfs_agino_t *ino, /* output: starting inode of chunk */
139 __int32_t *fcnt, /* output: number of free inodes */
140 xfs_inofree_t *free, /* output: free inode mask */
141 int *stat) /* output: success/failure */
142{
143 union xfs_btree_rec *rec;
144 int error;
145
146 error = xfs_btree_get_rec(cur, &rec, stat);
147 if (!error && *stat == 1) {
148 *ino = be32_to_cpu(rec->inobt.ir_startino);
149 *fcnt = be32_to_cpu(rec->inobt.ir_freecount);
150 *free = be64_to_cpu(rec->inobt.ir_free);
151 }
152 return error;
153}
154
155/*
122 * Allocate new inodes in the allocation group specified by agbp. 156 * Allocate new inodes in the allocation group specified by agbp.
123 * Return 0 for success, else error code. 157 * Return 0 for success, else error code.
124 */ 158 */
@@ -287,9 +321,9 @@ xfs_ialloc_ag_alloc(
287 * able to use the file system. 321 * able to use the file system.
288 */ 322 */
289 if (xfs_sb_version_hasnlink(&args.mp->m_sb)) 323 if (xfs_sb_version_hasnlink(&args.mp->m_sb))
290 version = XFS_DINODE_VERSION_2; 324 version = 2;
291 else 325 else
292 version = XFS_DINODE_VERSION_1; 326 version = 1;
293 327
294 /* 328 /*
295 * Seed the new inode cluster with a random generation number. This 329 * Seed the new inode cluster with a random generation number. This
@@ -310,18 +344,25 @@ xfs_ialloc_ag_alloc(
310 XFS_BUF_LOCK); 344 XFS_BUF_LOCK);
311 ASSERT(fbuf); 345 ASSERT(fbuf);
312 ASSERT(!XFS_BUF_GETERROR(fbuf)); 346 ASSERT(!XFS_BUF_GETERROR(fbuf));
347
313 /* 348 /*
314 * Set initial values for the inodes in this buffer. 349 * Initialize all inodes in this buffer and then log them.
350 *
351 * XXX: It would be much better if we had just one transaction to
352 * log a whole cluster of inodes instead of all the indivdual
353 * transactions causing a lot of log traffic.
315 */ 354 */
316 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); 355 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
317 for (i = 0; i < ninodes; i++) { 356 for (i = 0; i < ninodes; i++) {
357 int ioffset = i << args.mp->m_sb.sb_inodelog;
358 uint isize = sizeof(struct xfs_dinode);
359
318 free = XFS_MAKE_IPTR(args.mp, fbuf, i); 360 free = XFS_MAKE_IPTR(args.mp, fbuf, i);
319 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 361 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
320 free->di_core.di_version = version; 362 free->di_version = version;
321 free->di_core.di_gen = cpu_to_be32(gen); 363 free->di_gen = cpu_to_be32(gen);
322 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 364 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
323 xfs_ialloc_log_di(tp, fbuf, i, 365 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
324 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
325 } 366 }
326 xfs_trans_inode_alloc_buf(tp, fbuf); 367 xfs_trans_inode_alloc_buf(tp, fbuf);
327 } 368 }
@@ -335,8 +376,7 @@ xfs_ialloc_ag_alloc(
335 /* 376 /*
336 * Insert records describing the new inode chunk into the btree. 377 * Insert records describing the new inode chunk into the btree.
337 */ 378 */
338 cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno, 379 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
339 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
340 for (thisino = newino; 380 for (thisino = newino;
341 thisino < newino + newlen; 381 thisino < newino + newlen;
342 thisino += XFS_INODES_PER_CHUNK) { 382 thisino += XFS_INODES_PER_CHUNK) {
@@ -346,7 +386,7 @@ xfs_ialloc_ag_alloc(
346 return error; 386 return error;
347 } 387 }
348 ASSERT(i == 0); 388 ASSERT(i == 0);
349 if ((error = xfs_inobt_insert(cur, &i))) { 389 if ((error = xfs_btree_insert(cur, &i))) {
350 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 390 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
351 return error; 391 return error;
352 } 392 }
@@ -676,8 +716,7 @@ nextag:
676 */ 716 */
677 agno = tagno; 717 agno = tagno;
678 *IO_agbp = NULL; 718 *IO_agbp = NULL;
679 cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno), 719 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
680 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
681 /* 720 /*
682 * If pagino is 0 (this is the root inode allocation) use newino. 721 * If pagino is 0 (this is the root inode allocation) use newino.
683 * This must work because we've just allocated some. 722 * This must work because we've just allocated some.
@@ -697,7 +736,7 @@ nextag:
697 goto error0; 736 goto error0;
698 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 737 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
699 freecount += rec.ir_freecount; 738 freecount += rec.ir_freecount;
700 if ((error = xfs_inobt_increment(cur, 0, &i))) 739 if ((error = xfs_btree_increment(cur, 0, &i)))
701 goto error0; 740 goto error0;
702 } while (i == 1); 741 } while (i == 1);
703 742
@@ -741,7 +780,7 @@ nextag:
741 /* 780 /*
742 * Search left with tcur, back up 1 record. 781 * Search left with tcur, back up 1 record.
743 */ 782 */
744 if ((error = xfs_inobt_decrement(tcur, 0, &i))) 783 if ((error = xfs_btree_decrement(tcur, 0, &i)))
745 goto error1; 784 goto error1;
746 doneleft = !i; 785 doneleft = !i;
747 if (!doneleft) { 786 if (!doneleft) {
@@ -755,7 +794,7 @@ nextag:
755 /* 794 /*
756 * Search right with cur, go forward 1 record. 795 * Search right with cur, go forward 1 record.
757 */ 796 */
758 if ((error = xfs_inobt_increment(cur, 0, &i))) 797 if ((error = xfs_btree_increment(cur, 0, &i)))
759 goto error1; 798 goto error1;
760 doneright = !i; 799 doneright = !i;
761 if (!doneright) { 800 if (!doneright) {
@@ -817,7 +856,7 @@ nextag:
817 * further left. 856 * further left.
818 */ 857 */
819 if (useleft) { 858 if (useleft) {
820 if ((error = xfs_inobt_decrement(tcur, 0, 859 if ((error = xfs_btree_decrement(tcur, 0,
821 &i))) 860 &i)))
822 goto error1; 861 goto error1;
823 doneleft = !i; 862 doneleft = !i;
@@ -837,7 +876,7 @@ nextag:
837 * further right. 876 * further right.
838 */ 877 */
839 else { 878 else {
840 if ((error = xfs_inobt_increment(cur, 0, 879 if ((error = xfs_btree_increment(cur, 0,
841 &i))) 880 &i)))
842 goto error1; 881 goto error1;
843 doneright = !i; 882 doneright = !i;
@@ -892,7 +931,7 @@ nextag:
892 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 931 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
893 if (rec.ir_freecount > 0) 932 if (rec.ir_freecount > 0)
894 break; 933 break;
895 if ((error = xfs_inobt_increment(cur, 0, &i))) 934 if ((error = xfs_btree_increment(cur, 0, &i)))
896 goto error0; 935 goto error0;
897 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 936 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
898 } 937 }
@@ -926,7 +965,7 @@ nextag:
926 goto error0; 965 goto error0;
927 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 966 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
928 freecount += rec.ir_freecount; 967 freecount += rec.ir_freecount;
929 if ((error = xfs_inobt_increment(cur, 0, &i))) 968 if ((error = xfs_btree_increment(cur, 0, &i)))
930 goto error0; 969 goto error0;
931 } while (i == 1); 970 } while (i == 1);
932 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 971 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
@@ -1022,8 +1061,7 @@ xfs_difree(
1022 /* 1061 /*
1023 * Initialize the cursor. 1062 * Initialize the cursor.
1024 */ 1063 */
1025 cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, 1064 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1026 (xfs_inode_t *)0, 0);
1027#ifdef DEBUG 1065#ifdef DEBUG
1028 if (cur->bc_nlevels == 1) { 1066 if (cur->bc_nlevels == 1) {
1029 int freecount = 0; 1067 int freecount = 0;
@@ -1036,7 +1074,7 @@ xfs_difree(
1036 goto error0; 1074 goto error0;
1037 if (i) { 1075 if (i) {
1038 freecount += rec.ir_freecount; 1076 freecount += rec.ir_freecount;
1039 if ((error = xfs_inobt_increment(cur, 0, &i))) 1077 if ((error = xfs_btree_increment(cur, 0, &i)))
1040 goto error0; 1078 goto error0;
1041 } 1079 }
1042 } while (i == 1); 1080 } while (i == 1);
@@ -1098,8 +1136,8 @@ xfs_difree(
1098 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1136 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1099 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1137 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1100 1138
1101 if ((error = xfs_inobt_delete(cur, &i))) { 1139 if ((error = xfs_btree_delete(cur, &i))) {
1102 cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n", 1140 cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n",
1103 error, mp->m_fsname); 1141 error, mp->m_fsname);
1104 goto error0; 1142 goto error0;
1105 } 1143 }
@@ -1141,7 +1179,7 @@ xfs_difree(
1141 goto error0; 1179 goto error0;
1142 if (i) { 1180 if (i) {
1143 freecount += rec.ir_freecount; 1181 freecount += rec.ir_freecount;
1144 if ((error = xfs_inobt_increment(cur, 0, &i))) 1182 if ((error = xfs_btree_increment(cur, 0, &i)))
1145 goto error0; 1183 goto error0;
1146 } 1184 }
1147 } while (i == 1); 1185 } while (i == 1);
@@ -1158,36 +1196,28 @@ error0:
1158} 1196}
1159 1197
1160/* 1198/*
1161 * Return the location of the inode in bno/off, for mapping it into a buffer. 1199 * Return the location of the inode in imap, for mapping it into a buffer.
1162 */ 1200 */
1163/*ARGSUSED*/
1164int 1201int
1165xfs_dilocate( 1202xfs_imap(
1166 xfs_mount_t *mp, /* file system mount structure */ 1203 xfs_mount_t *mp, /* file system mount structure */
1167 xfs_trans_t *tp, /* transaction pointer */ 1204 xfs_trans_t *tp, /* transaction pointer */
1168 xfs_ino_t ino, /* inode to locate */ 1205 xfs_ino_t ino, /* inode to locate */
1169 xfs_fsblock_t *bno, /* output: block containing inode */ 1206 struct xfs_imap *imap, /* location map structure */
1170 int *len, /* output: num blocks in inode cluster */ 1207 uint flags) /* flags for inode btree lookup */
1171 int *off, /* output: index in block of inode */
1172 uint flags) /* flags concerning inode lookup */
1173{ 1208{
1174 xfs_agblock_t agbno; /* block number of inode in the alloc group */ 1209 xfs_agblock_t agbno; /* block number of inode in the alloc group */
1175 xfs_buf_t *agbp; /* agi buffer */
1176 xfs_agino_t agino; /* inode number within alloc group */ 1210 xfs_agino_t agino; /* inode number within alloc group */
1177 xfs_agnumber_t agno; /* allocation group number */ 1211 xfs_agnumber_t agno; /* allocation group number */
1178 int blks_per_cluster; /* num blocks per inode cluster */ 1212 int blks_per_cluster; /* num blocks per inode cluster */
1179 xfs_agblock_t chunk_agbno; /* first block in inode chunk */ 1213 xfs_agblock_t chunk_agbno; /* first block in inode chunk */
1180 xfs_agino_t chunk_agino; /* first agino in inode chunk */
1181 __int32_t chunk_cnt; /* count of free inodes in chunk */
1182 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
1183 xfs_agblock_t cluster_agbno; /* first block in inode cluster */ 1214 xfs_agblock_t cluster_agbno; /* first block in inode cluster */
1184 xfs_btree_cur_t *cur; /* inode btree cursor */
1185 int error; /* error code */ 1215 int error; /* error code */
1186 int i; /* temp state */
1187 int offset; /* index of inode in its buffer */ 1216 int offset; /* index of inode in its buffer */
1188 int offset_agbno; /* blks from chunk start to inode */ 1217 int offset_agbno; /* blks from chunk start to inode */
1189 1218
1190 ASSERT(ino != NULLFSINO); 1219 ASSERT(ino != NULLFSINO);
1220
1191 /* 1221 /*
1192 * Split up the inode number into its parts. 1222 * Split up the inode number into its parts.
1193 */ 1223 */
@@ -1198,24 +1228,24 @@ xfs_dilocate(
1198 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1228 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1199#ifdef DEBUG 1229#ifdef DEBUG
1200 /* no diagnostics for bulkstat, ino comes from userspace */ 1230 /* no diagnostics for bulkstat, ino comes from userspace */
1201 if (flags & XFS_IMAP_BULKSTAT) 1231 if (flags & XFS_IGET_BULKSTAT)
1202 return XFS_ERROR(EINVAL); 1232 return XFS_ERROR(EINVAL);
1203 if (agno >= mp->m_sb.sb_agcount) { 1233 if (agno >= mp->m_sb.sb_agcount) {
1204 xfs_fs_cmn_err(CE_ALERT, mp, 1234 xfs_fs_cmn_err(CE_ALERT, mp,
1205 "xfs_dilocate: agno (%d) >= " 1235 "xfs_imap: agno (%d) >= "
1206 "mp->m_sb.sb_agcount (%d)", 1236 "mp->m_sb.sb_agcount (%d)",
1207 agno, mp->m_sb.sb_agcount); 1237 agno, mp->m_sb.sb_agcount);
1208 } 1238 }
1209 if (agbno >= mp->m_sb.sb_agblocks) { 1239 if (agbno >= mp->m_sb.sb_agblocks) {
1210 xfs_fs_cmn_err(CE_ALERT, mp, 1240 xfs_fs_cmn_err(CE_ALERT, mp,
1211 "xfs_dilocate: agbno (0x%llx) >= " 1241 "xfs_imap: agbno (0x%llx) >= "
1212 "mp->m_sb.sb_agblocks (0x%lx)", 1242 "mp->m_sb.sb_agblocks (0x%lx)",
1213 (unsigned long long) agbno, 1243 (unsigned long long) agbno,
1214 (unsigned long) mp->m_sb.sb_agblocks); 1244 (unsigned long) mp->m_sb.sb_agblocks);
1215 } 1245 }
1216 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1246 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1217 xfs_fs_cmn_err(CE_ALERT, mp, 1247 xfs_fs_cmn_err(CE_ALERT, mp,
1218 "xfs_dilocate: ino (0x%llx) != " 1248 "xfs_imap: ino (0x%llx) != "
1219 "XFS_AGINO_TO_INO(mp, agno, agino) " 1249 "XFS_AGINO_TO_INO(mp, agno, agino) "
1220 "(0x%llx)", 1250 "(0x%llx)",
1221 ino, XFS_AGINO_TO_INO(mp, agno, agino)); 1251 ino, XFS_AGINO_TO_INO(mp, agno, agino));
@@ -1224,65 +1254,89 @@ xfs_dilocate(
1224#endif /* DEBUG */ 1254#endif /* DEBUG */
1225 return XFS_ERROR(EINVAL); 1255 return XFS_ERROR(EINVAL);
1226 } 1256 }
1227 if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) || 1257
1228 !(flags & XFS_IMAP_LOOKUP)) { 1258 /*
1259 * If the inode cluster size is the same as the blocksize or
1260 * smaller we get to the buffer by simple arithmetics.
1261 */
1262 if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) {
1229 offset = XFS_INO_TO_OFFSET(mp, ino); 1263 offset = XFS_INO_TO_OFFSET(mp, ino);
1230 ASSERT(offset < mp->m_sb.sb_inopblock); 1264 ASSERT(offset < mp->m_sb.sb_inopblock);
1231 *bno = XFS_AGB_TO_FSB(mp, agno, agbno); 1265
1232 *off = offset; 1266 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
1233 *len = 1; 1267 imap->im_len = XFS_FSB_TO_BB(mp, 1);
1268 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1234 return 0; 1269 return 0;
1235 } 1270 }
1271
1236 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; 1272 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1237 if (*bno != NULLFSBLOCK) { 1273
1274 /*
1275 * If we get a block number passed from bulkstat we can use it to
1276 * find the buffer easily.
1277 */
1278 if (imap->im_blkno) {
1238 offset = XFS_INO_TO_OFFSET(mp, ino); 1279 offset = XFS_INO_TO_OFFSET(mp, ino);
1239 ASSERT(offset < mp->m_sb.sb_inopblock); 1280 ASSERT(offset < mp->m_sb.sb_inopblock);
1240 cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno); 1281
1241 *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1282 cluster_agbno = XFS_DADDR_TO_AGBNO(mp, imap->im_blkno);
1242 offset; 1283 offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
1243 *len = blks_per_cluster; 1284
1285 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1286 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1244 return 0; 1287 return 0;
1245 } 1288 }
1289
1290 /*
1291 * If the inode chunks are aligned then use simple maths to
1292 * find the location. Otherwise we have to do a btree
1293 * lookup to find the location.
1294 */
1246 if (mp->m_inoalign_mask) { 1295 if (mp->m_inoalign_mask) {
1247 offset_agbno = agbno & mp->m_inoalign_mask; 1296 offset_agbno = agbno & mp->m_inoalign_mask;
1248 chunk_agbno = agbno - offset_agbno; 1297 chunk_agbno = agbno - offset_agbno;
1249 } else { 1298 } else {
1299 xfs_btree_cur_t *cur; /* inode btree cursor */
1300 xfs_agino_t chunk_agino; /* first agino in inode chunk */
1301 __int32_t chunk_cnt; /* count of free inodes in chunk */
1302 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
1303 xfs_buf_t *agbp; /* agi buffer */
1304 int i; /* temp state */
1305
1250 down_read(&mp->m_peraglock); 1306 down_read(&mp->m_peraglock);
1251 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1307 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1252 up_read(&mp->m_peraglock); 1308 up_read(&mp->m_peraglock);
1253 if (error) { 1309 if (error) {
1254#ifdef DEBUG 1310 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1255 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
1256 "xfs_ialloc_read_agi() returned " 1311 "xfs_ialloc_read_agi() returned "
1257 "error %d, agno %d", 1312 "error %d, agno %d",
1258 error, agno); 1313 error, agno);
1259#endif /* DEBUG */
1260 return error; 1314 return error;
1261 } 1315 }
1262 cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, 1316
1263 (xfs_inode_t *)0, 0); 1317 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1264 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { 1318 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i);
1265#ifdef DEBUG 1319 if (error) {
1266 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1320 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1267 "xfs_inobt_lookup_le() failed"); 1321 "xfs_inobt_lookup_le() failed");
1268#endif /* DEBUG */
1269 goto error0; 1322 goto error0;
1270 } 1323 }
1271 if ((error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, 1324
1272 &chunk_free, &i))) { 1325 error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
1273#ifdef DEBUG 1326 &chunk_free, &i);
1274 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1327 if (error) {
1328 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1275 "xfs_inobt_get_rec() failed"); 1329 "xfs_inobt_get_rec() failed");
1276#endif /* DEBUG */
1277 goto error0; 1330 goto error0;
1278 } 1331 }
1279 if (i == 0) { 1332 if (i == 0) {
1280#ifdef DEBUG 1333#ifdef DEBUG
1281 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1334 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1282 "xfs_inobt_get_rec() failed"); 1335 "xfs_inobt_get_rec() failed");
1283#endif /* DEBUG */ 1336#endif /* DEBUG */
1284 error = XFS_ERROR(EINVAL); 1337 error = XFS_ERROR(EINVAL);
1285 } 1338 }
1339 error0:
1286 xfs_trans_brelse(tp, agbp); 1340 xfs_trans_brelse(tp, agbp);
1287 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1341 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1288 if (error) 1342 if (error)
@@ -1290,19 +1344,35 @@ xfs_dilocate(
1290 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); 1344 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
1291 offset_agbno = agbno - chunk_agbno; 1345 offset_agbno = agbno - chunk_agbno;
1292 } 1346 }
1347
1293 ASSERT(agbno >= chunk_agbno); 1348 ASSERT(agbno >= chunk_agbno);
1294 cluster_agbno = chunk_agbno + 1349 cluster_agbno = chunk_agbno +
1295 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1350 ((offset_agbno / blks_per_cluster) * blks_per_cluster);
1296 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1351 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
1297 XFS_INO_TO_OFFSET(mp, ino); 1352 XFS_INO_TO_OFFSET(mp, ino);
1298 *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno); 1353
1299 *off = offset; 1354 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
1300 *len = blks_per_cluster; 1355 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1356 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1357
1358 /*
1359 * If the inode number maps to a block outside the bounds
1360 * of the file system then return NULL rather than calling
1361 * read_buf and panicing when we get an error from the
1362 * driver.
1363 */
1364 if ((imap->im_blkno + imap->im_len) >
1365 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
1366 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1367 "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
1368 " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
1369 (unsigned long long) imap->im_blkno,
1370 (unsigned long long) imap->im_len,
1371 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1372 return XFS_ERROR(EINVAL);
1373 }
1374
1301 return 0; 1375 return 0;
1302error0:
1303 xfs_trans_brelse(tp, agbp);
1304 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1305 return error;
1306} 1376}
1307 1377
1308/* 1378/*
@@ -1370,70 +1440,95 @@ xfs_ialloc_log_agi(
1370 xfs_trans_log_buf(tp, bp, first, last); 1440 xfs_trans_log_buf(tp, bp, first, last);
1371} 1441}
1372 1442
1443#ifdef DEBUG
1444STATIC void
1445xfs_check_agi_unlinked(
1446 struct xfs_agi *agi)
1447{
1448 int i;
1449
1450 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
1451 ASSERT(agi->agi_unlinked[i]);
1452}
1453#else
1454#define xfs_check_agi_unlinked(agi)
1455#endif
1456
1373/* 1457/*
1374 * Read in the allocation group header (inode allocation section) 1458 * Read in the allocation group header (inode allocation section)
1375 */ 1459 */
1376int 1460int
1377xfs_ialloc_read_agi( 1461xfs_read_agi(
1378 xfs_mount_t *mp, /* file system mount structure */ 1462 struct xfs_mount *mp, /* file system mount structure */
1379 xfs_trans_t *tp, /* transaction pointer */ 1463 struct xfs_trans *tp, /* transaction pointer */
1380 xfs_agnumber_t agno, /* allocation group number */ 1464 xfs_agnumber_t agno, /* allocation group number */
1381 xfs_buf_t **bpp) /* allocation group hdr buf */ 1465 struct xfs_buf **bpp) /* allocation group hdr buf */
1382{ 1466{
1383 xfs_agi_t *agi; /* allocation group header */ 1467 struct xfs_agi *agi; /* allocation group header */
1384 int agi_ok; /* agi is consistent */ 1468 int agi_ok; /* agi is consistent */
1385 xfs_buf_t *bp; /* allocation group hdr buf */ 1469 int error;
1386 xfs_perag_t *pag; /* per allocation group data */
1387 int error;
1388 1470
1389 ASSERT(agno != NULLAGNUMBER); 1471 ASSERT(agno != NULLAGNUMBER);
1390 error = xfs_trans_read_buf( 1472
1391 mp, tp, mp->m_ddev_targp, 1473 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
1392 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 1474 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
1393 XFS_FSS_TO_BB(mp, 1), 0, &bp); 1475 XFS_FSS_TO_BB(mp, 1), 0, bpp);
1394 if (error) 1476 if (error)
1395 return error; 1477 return error;
1396 ASSERT(bp && !XFS_BUF_GETERROR(bp)); 1478
1479 ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp));
1480 agi = XFS_BUF_TO_AGI(*bpp);
1397 1481
1398 /* 1482 /*
1399 * Validate the magic number of the agi block. 1483 * Validate the magic number of the agi block.
1400 */ 1484 */
1401 agi = XFS_BUF_TO_AGI(bp); 1485 agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1402 agi_ok = 1486 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
1403 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && 1487 be32_to_cpu(agi->agi_seqno) == agno;
1404 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
1405 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, 1488 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1406 XFS_RANDOM_IALLOC_READ_AGI))) { 1489 XFS_RANDOM_IALLOC_READ_AGI))) {
1407 XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW, 1490 XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
1408 mp, agi); 1491 mp, agi);
1409 xfs_trans_brelse(tp, bp); 1492 xfs_trans_brelse(tp, *bpp);
1410 return XFS_ERROR(EFSCORRUPTED); 1493 return XFS_ERROR(EFSCORRUPTED);
1411 } 1494 }
1495
1496 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF);
1497
1498 xfs_check_agi_unlinked(agi);
1499 return 0;
1500}
1501
1502int
1503xfs_ialloc_read_agi(
1504 struct xfs_mount *mp, /* file system mount structure */
1505 struct xfs_trans *tp, /* transaction pointer */
1506 xfs_agnumber_t agno, /* allocation group number */
1507 struct xfs_buf **bpp) /* allocation group hdr buf */
1508{
1509 struct xfs_agi *agi; /* allocation group header */
1510 struct xfs_perag *pag; /* per allocation group data */
1511 int error;
1512
1513 error = xfs_read_agi(mp, tp, agno, bpp);
1514 if (error)
1515 return error;
1516
1517 agi = XFS_BUF_TO_AGI(*bpp);
1412 pag = &mp->m_perag[agno]; 1518 pag = &mp->m_perag[agno];
1519
1413 if (!pag->pagi_init) { 1520 if (!pag->pagi_init) {
1414 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1521 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1415 pag->pagi_count = be32_to_cpu(agi->agi_count); 1522 pag->pagi_count = be32_to_cpu(agi->agi_count);
1416 pag->pagi_init = 1; 1523 pag->pagi_init = 1;
1417 } else {
1418 /*
1419 * It's possible for these to be out of sync if
1420 * we are in the middle of a forced shutdown.
1421 */
1422 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1423 XFS_FORCED_SHUTDOWN(mp));
1424 } 1524 }
1425 1525
1426#ifdef DEBUG 1526 /*
1427 { 1527 * It's possible for these to be out of sync if
1428 int i; 1528 * we are in the middle of a forced shutdown.
1429 1529 */
1430 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) 1530 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1431 ASSERT(agi->agi_unlinked[i]); 1531 XFS_FORCED_SHUTDOWN(mp));
1432 }
1433#endif
1434
1435 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF);
1436 *bpp = bp;
1437 return 0; 1532 return 0;
1438} 1533}
1439 1534
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 4e30ec1d13b..50f558a4e0a 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -20,6 +20,7 @@
20 20
21struct xfs_buf; 21struct xfs_buf;
22struct xfs_dinode; 22struct xfs_dinode;
23struct xfs_imap;
23struct xfs_mount; 24struct xfs_mount;
24struct xfs_trans; 25struct xfs_trans;
25 26
@@ -56,7 +57,6 @@ static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
56} 57}
57 58
58 59
59#ifdef __KERNEL__
60/* 60/*
61 * Allocate an inode on disk. 61 * Allocate an inode on disk.
62 * Mode is used to tell whether the new inode will need space, and whether 62 * Mode is used to tell whether the new inode will need space, and whether
@@ -105,17 +105,14 @@ xfs_difree(
105 xfs_ino_t *first_ino); /* first inode in deleted cluster */ 105 xfs_ino_t *first_ino); /* first inode in deleted cluster */
106 106
107/* 107/*
108 * Return the location of the inode in bno/len/off, 108 * Return the location of the inode in imap, for mapping it into a buffer.
109 * for mapping it into a buffer.
110 */ 109 */
111int 110int
112xfs_dilocate( 111xfs_imap(
113 struct xfs_mount *mp, /* file system mount structure */ 112 struct xfs_mount *mp, /* file system mount structure */
114 struct xfs_trans *tp, /* transaction pointer */ 113 struct xfs_trans *tp, /* transaction pointer */
115 xfs_ino_t ino, /* inode to locate */ 114 xfs_ino_t ino, /* inode to locate */
116 xfs_fsblock_t *bno, /* output: block containing inode */ 115 struct xfs_imap *imap, /* location map structure */
117 int *len, /* output: num blocks in cluster*/
118 int *off, /* output: index in block of inode */
119 uint flags); /* flags for inode btree lookup */ 116 uint flags); /* flags for inode btree lookup */
120 117
121/* 118/*
@@ -154,6 +151,24 @@ xfs_ialloc_pagi_init(
154 struct xfs_trans *tp, /* transaction pointer */ 151 struct xfs_trans *tp, /* transaction pointer */
155 xfs_agnumber_t agno); /* allocation group number */ 152 xfs_agnumber_t agno); /* allocation group number */
156 153
157#endif /* __KERNEL__ */ 154/*
155 * Lookup the first record greater than or equal to ino
156 * in the btree given by cur.
157 */
158int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino,
159 __int32_t fcnt, xfs_inofree_t free, int *stat);
160
161/*
162 * Lookup the first record less than or equal to ino
163 * in the btree given by cur.
164 */
165int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino,
166 __int32_t fcnt, xfs_inofree_t free, int *stat);
167
168/*
169 * Get the data from the pointed-to record.
170 */
171extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino,
172 __int32_t *fcnt, xfs_inofree_t *free, int *stat);
158 173
159#endif /* __XFS_IALLOC_H__ */ 174#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 83502f3edef..99f2408e8d8 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -35,2044 +35,349 @@
35#include "xfs_dinode.h" 35#include "xfs_dinode.h"
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_btree.h" 37#include "xfs_btree.h"
38#include "xfs_btree_trace.h"
38#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 40#include "xfs_alloc.h"
40#include "xfs_error.h" 41#include "xfs_error.h"
41 42
42STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
43STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
44STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
45STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
46STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
47STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
48STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *);
49STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
50 xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
51STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
52 43
53/* 44STATIC int
54 * Single level of the xfs_inobt_delete record deletion routine. 45xfs_inobt_get_minrecs(
55 * Delete record pointed to by cur/level. 46 struct xfs_btree_cur *cur,
56 * Remove the record from its block then rebalance the tree. 47 int level)
57 * Return 0 for error, 1 for done, 2 to go on to the next level.
58 */
59STATIC int /* error */
60xfs_inobt_delrec(
61 xfs_btree_cur_t *cur, /* btree cursor */
62 int level, /* level removing record from */
63 int *stat) /* fail/done/go-on */
64{ 48{
65 xfs_buf_t *agbp; /* buffer for a.g. inode header */ 49 return cur->bc_mp->m_inobt_mnr[level != 0];
66 xfs_mount_t *mp; /* mount structure */ 50}
67 xfs_agi_t *agi; /* allocation group inode header */
68 xfs_inobt_block_t *block; /* btree block record/key lives in */
69 xfs_agblock_t bno; /* btree block number */
70 xfs_buf_t *bp; /* buffer for block */
71 int error; /* error return value */
72 int i; /* loop index */
73 xfs_inobt_key_t key; /* kp points here if block is level 0 */
74 xfs_inobt_key_t *kp = NULL; /* pointer to btree keys */
75 xfs_agblock_t lbno; /* left block's block number */
76 xfs_buf_t *lbp; /* left block's buffer pointer */
77 xfs_inobt_block_t *left; /* left btree block */
78 xfs_inobt_key_t *lkp; /* left block key pointer */
79 xfs_inobt_ptr_t *lpp; /* left block address pointer */
80 int lrecs = 0; /* number of records in left block */
81 xfs_inobt_rec_t *lrp; /* left block record pointer */
82 xfs_inobt_ptr_t *pp = NULL; /* pointer to btree addresses */
83 int ptr; /* index in btree block for this rec */
84 xfs_agblock_t rbno; /* right block's block number */
85 xfs_buf_t *rbp; /* right block's buffer pointer */
86 xfs_inobt_block_t *right; /* right btree block */
87 xfs_inobt_key_t *rkp; /* right block key pointer */
88 xfs_inobt_rec_t *rp; /* pointer to btree records */
89 xfs_inobt_ptr_t *rpp; /* right block address pointer */
90 int rrecs = 0; /* number of records in right block */
91 int numrecs;
92 xfs_inobt_rec_t *rrp; /* right block record pointer */
93 xfs_btree_cur_t *tcur; /* temporary btree cursor */
94
95 mp = cur->bc_mp;
96
97 /*
98 * Get the index of the entry being deleted, check for nothing there.
99 */
100 ptr = cur->bc_ptrs[level];
101 if (ptr == 0) {
102 *stat = 0;
103 return 0;
104 }
105
106 /*
107 * Get the buffer & block containing the record or key/ptr.
108 */
109 bp = cur->bc_bufs[level];
110 block = XFS_BUF_TO_INOBT_BLOCK(bp);
111#ifdef DEBUG
112 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
113 return error;
114#endif
115 /*
116 * Fail if we're off the end of the block.
117 */
118 51
119 numrecs = be16_to_cpu(block->bb_numrecs); 52STATIC struct xfs_btree_cur *
120 if (ptr > numrecs) { 53xfs_inobt_dup_cursor(
121 *stat = 0; 54 struct xfs_btree_cur *cur)
122 return 0; 55{
123 } 56 return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
124 /* 57 cur->bc_private.a.agbp, cur->bc_private.a.agno);
125 * It's a nonleaf. Excise the key and ptr being deleted, by 58}
126 * sliding the entries past them down one.
127 * Log the changed areas of the block.
128 */
129 if (level > 0) {
130 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
131 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
132#ifdef DEBUG
133 for (i = ptr; i < numrecs; i++) {
134 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
135 return error;
136 }
137#endif
138 if (ptr < numrecs) {
139 memmove(&kp[ptr - 1], &kp[ptr],
140 (numrecs - ptr) * sizeof(*kp));
141 memmove(&pp[ptr - 1], &pp[ptr],
142 (numrecs - ptr) * sizeof(*kp));
143 xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
144 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
145 }
146 }
147 /*
148 * It's a leaf. Excise the record being deleted, by sliding the
149 * entries past it down one. Log the changed areas of the block.
150 */
151 else {
152 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
153 if (ptr < numrecs) {
154 memmove(&rp[ptr - 1], &rp[ptr],
155 (numrecs - ptr) * sizeof(*rp));
156 xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
157 }
158 /*
159 * If it's the first record in the block, we'll need a key
160 * structure to pass up to the next level (updkey).
161 */
162 if (ptr == 1) {
163 key.ir_startino = rp->ir_startino;
164 kp = &key;
165 }
166 }
167 /*
168 * Decrement and log the number of entries in the block.
169 */
170 numrecs--;
171 block->bb_numrecs = cpu_to_be16(numrecs);
172 xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
173 /*
174 * Is this the root level? If so, we're almost done.
175 */
176 if (level == cur->bc_nlevels - 1) {
177 /*
178 * If this is the root level,
179 * and there's only one entry left,
180 * and it's NOT the leaf level,
181 * then we can get rid of this level.
182 */
183 if (numrecs == 1 && level > 0) {
184 agbp = cur->bc_private.a.agbp;
185 agi = XFS_BUF_TO_AGI(agbp);
186 /*
187 * pp is still set to the first pointer in the block.
188 * Make it the new root of the btree.
189 */
190 bno = be32_to_cpu(agi->agi_root);
191 agi->agi_root = *pp;
192 be32_add_cpu(&agi->agi_level, -1);
193 /*
194 * Free the block.
195 */
196 if ((error = xfs_free_extent(cur->bc_tp,
197 XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))
198 return error;
199 xfs_trans_binval(cur->bc_tp, bp);
200 xfs_ialloc_log_agi(cur->bc_tp, agbp,
201 XFS_AGI_ROOT | XFS_AGI_LEVEL);
202 /*
203 * Update the cursor so there's one fewer level.
204 */
205 cur->bc_bufs[level] = NULL;
206 cur->bc_nlevels--;
207 } else if (level > 0 &&
208 (error = xfs_inobt_decrement(cur, level, &i)))
209 return error;
210 *stat = 1;
211 return 0;
212 }
213 /*
214 * If we deleted the leftmost entry in the block, update the
215 * key values above us in the tree.
216 */
217 if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1)))
218 return error;
219 /*
220 * If the number of records remaining in the block is at least
221 * the minimum, we're done.
222 */
223 if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
224 if (level > 0 &&
225 (error = xfs_inobt_decrement(cur, level, &i)))
226 return error;
227 *stat = 1;
228 return 0;
229 }
230 /*
231 * Otherwise, we have to move some records around to keep the
232 * tree balanced. Look at the left and right sibling blocks to
233 * see if we can re-balance by moving only one record.
234 */
235 rbno = be32_to_cpu(block->bb_rightsib);
236 lbno = be32_to_cpu(block->bb_leftsib);
237 bno = NULLAGBLOCK;
238 ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
239 /*
240 * Duplicate the cursor so our btree manipulations here won't
241 * disrupt the next level up.
242 */
243 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
244 return error;
245 /*
246 * If there's a right sibling, see if it's ok to shift an entry
247 * out of it.
248 */
249 if (rbno != NULLAGBLOCK) {
250 /*
251 * Move the temp cursor to the last entry in the next block.
252 * Actually any entry but the first would suffice.
253 */
254 i = xfs_btree_lastrec(tcur, level);
255 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
256 if ((error = xfs_inobt_increment(tcur, level, &i)))
257 goto error0;
258 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
259 i = xfs_btree_lastrec(tcur, level);
260 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
261 /*
262 * Grab a pointer to the block.
263 */
264 rbp = tcur->bc_bufs[level];
265 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
266#ifdef DEBUG
267 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
268 goto error0;
269#endif
270 /*
271 * Grab the current block number, for future use.
272 */
273 bno = be32_to_cpu(right->bb_leftsib);
274 /*
275 * If right block is full enough so that removing one entry
276 * won't make it too empty, and left-shifting an entry out
277 * of right to us works, we're done.
278 */
279 if (be16_to_cpu(right->bb_numrecs) - 1 >=
280 XFS_INOBT_BLOCK_MINRECS(level, cur)) {
281 if ((error = xfs_inobt_lshift(tcur, level, &i)))
282 goto error0;
283 if (i) {
284 ASSERT(be16_to_cpu(block->bb_numrecs) >=
285 XFS_INOBT_BLOCK_MINRECS(level, cur));
286 xfs_btree_del_cursor(tcur,
287 XFS_BTREE_NOERROR);
288 if (level > 0 &&
289 (error = xfs_inobt_decrement(cur, level,
290 &i)))
291 return error;
292 *stat = 1;
293 return 0;
294 }
295 }
296 /*
297 * Otherwise, grab the number of records in right for
298 * future reference, and fix up the temp cursor to point
299 * to our block again (last record).
300 */
301 rrecs = be16_to_cpu(right->bb_numrecs);
302 if (lbno != NULLAGBLOCK) {
303 xfs_btree_firstrec(tcur, level);
304 if ((error = xfs_inobt_decrement(tcur, level, &i)))
305 goto error0;
306 }
307 }
308 /*
309 * If there's a left sibling, see if it's ok to shift an entry
310 * out of it.
311 */
312 if (lbno != NULLAGBLOCK) {
313 /*
314 * Move the temp cursor to the first entry in the
315 * previous block.
316 */
317 xfs_btree_firstrec(tcur, level);
318 if ((error = xfs_inobt_decrement(tcur, level, &i)))
319 goto error0;
320 xfs_btree_firstrec(tcur, level);
321 /*
322 * Grab a pointer to the block.
323 */
324 lbp = tcur->bc_bufs[level];
325 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
326#ifdef DEBUG
327 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
328 goto error0;
329#endif
330 /*
331 * Grab the current block number, for future use.
332 */
333 bno = be32_to_cpu(left->bb_rightsib);
334 /*
335 * If left block is full enough so that removing one entry
336 * won't make it too empty, and right-shifting an entry out
337 * of left to us works, we're done.
338 */
339 if (be16_to_cpu(left->bb_numrecs) - 1 >=
340 XFS_INOBT_BLOCK_MINRECS(level, cur)) {
341 if ((error = xfs_inobt_rshift(tcur, level, &i)))
342 goto error0;
343 if (i) {
344 ASSERT(be16_to_cpu(block->bb_numrecs) >=
345 XFS_INOBT_BLOCK_MINRECS(level, cur));
346 xfs_btree_del_cursor(tcur,
347 XFS_BTREE_NOERROR);
348 if (level == 0)
349 cur->bc_ptrs[0]++;
350 *stat = 1;
351 return 0;
352 }
353 }
354 /*
355 * Otherwise, grab the number of records in right for
356 * future reference.
357 */
358 lrecs = be16_to_cpu(left->bb_numrecs);
359 }
360 /*
361 * Delete the temp cursor, we're done with it.
362 */
363 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
364 /*
365 * If here, we need to do a join to keep the tree balanced.
366 */
367 ASSERT(bno != NULLAGBLOCK);
368 /*
369 * See if we can join with the left neighbor block.
370 */
371 if (lbno != NULLAGBLOCK &&
372 lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
373 /*
374 * Set "right" to be the starting block,
375 * "left" to be the left neighbor.
376 */
377 rbno = bno;
378 right = block;
379 rrecs = be16_to_cpu(right->bb_numrecs);
380 rbp = bp;
381 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
382 cur->bc_private.a.agno, lbno, 0, &lbp,
383 XFS_INO_BTREE_REF)))
384 return error;
385 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
386 lrecs = be16_to_cpu(left->bb_numrecs);
387 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
388 return error;
389 }
390 /*
391 * If that won't work, see if we can join with the right neighbor block.
392 */
393 else if (rbno != NULLAGBLOCK &&
394 rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
395 /*
396 * Set "left" to be the starting block,
397 * "right" to be the right neighbor.
398 */
399 lbno = bno;
400 left = block;
401 lrecs = be16_to_cpu(left->bb_numrecs);
402 lbp = bp;
403 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
404 cur->bc_private.a.agno, rbno, 0, &rbp,
405 XFS_INO_BTREE_REF)))
406 return error;
407 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
408 rrecs = be16_to_cpu(right->bb_numrecs);
409 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
410 return error;
411 }
412 /*
413 * Otherwise, we can't fix the imbalance.
414 * Just return. This is probably a logic error, but it's not fatal.
415 */
416 else {
417 if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i)))
418 return error;
419 *stat = 1;
420 return 0;
421 }
422 /*
423 * We're now going to join "left" and "right" by moving all the stuff
424 * in "right" to "left" and deleting "right".
425 */
426 if (level > 0) {
427 /*
428 * It's a non-leaf. Move keys and pointers.
429 */
430 lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
431 lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
432 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
433 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
434#ifdef DEBUG
435 for (i = 0; i < rrecs; i++) {
436 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
437 return error;
438 }
439#endif
440 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
441 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
442 xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
443 xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
444 } else {
445 /*
446 * It's a leaf. Move records.
447 */
448 lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
449 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
450 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
451 xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
452 }
453 /*
454 * If we joined with the left neighbor, set the buffer in the
455 * cursor to the left block, and fix up the index.
456 */
457 if (bp != lbp) {
458 xfs_btree_setbuf(cur, level, lbp);
459 cur->bc_ptrs[level] += lrecs;
460 }
461 /*
462 * If we joined with the right neighbor and there's a level above
463 * us, increment the cursor at that level.
464 */
465 else if (level + 1 < cur->bc_nlevels &&
466 (error = xfs_alloc_increment(cur, level + 1, &i)))
467 return error;
468 /*
469 * Fix up the number of records in the surviving block.
470 */
471 lrecs += rrecs;
472 left->bb_numrecs = cpu_to_be16(lrecs);
473 /*
474 * Fix up the right block pointer in the surviving block, and log it.
475 */
476 left->bb_rightsib = right->bb_rightsib;
477 xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
478 /*
479 * If there is a right sibling now, make it point to the
480 * remaining block.
481 */
482 if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
483 xfs_inobt_block_t *rrblock;
484 xfs_buf_t *rrbp;
485 59
486 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 60STATIC void
487 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, 61xfs_inobt_set_root(
488 &rrbp, XFS_INO_BTREE_REF))) 62 struct xfs_btree_cur *cur,
489 return error; 63 union xfs_btree_ptr *nptr,
490 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); 64 int inc) /* level change */
491 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) 65{
492 return error; 66 struct xfs_buf *agbp = cur->bc_private.a.agbp;
493 rrblock->bb_leftsib = cpu_to_be32(lbno); 67 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
494 xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
495 }
496 /*
497 * Free the deleting block.
498 */
499 if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
500 cur->bc_private.a.agno, rbno), 1)))
501 return error;
502 xfs_trans_binval(cur->bc_tp, rbp);
503 /*
504 * Readjust the ptr at this level if it's not a leaf, since it's
505 * still pointing at the deletion point, which makes the cursor
506 * inconsistent. If this makes the ptr 0, the caller fixes it up.
507 * We can't use decrement because it would change the next level up.
508 */
509 if (level > 0)
510 cur->bc_ptrs[level]--;
511 /*
512 * Return value means the next level up has something to do.
513 */
514 *stat = 2;
515 return 0;
516 68
517error0: 69 agi->agi_root = nptr->s;
518 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 70 be32_add_cpu(&agi->agi_level, inc);
519 return error; 71 xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
520} 72}
521 73
522/* 74STATIC int
523 * Insert one record/level. Return information to the caller 75xfs_inobt_alloc_block(
524 * allowing the next level up to proceed if necessary. 76 struct xfs_btree_cur *cur,
525 */ 77 union xfs_btree_ptr *start,
526STATIC int /* error */ 78 union xfs_btree_ptr *new,
527xfs_inobt_insrec( 79 int length,
528 xfs_btree_cur_t *cur, /* btree cursor */ 80 int *stat)
529 int level, /* level to insert record at */
530 xfs_agblock_t *bnop, /* i/o: block number inserted */
531 xfs_inobt_rec_t *recp, /* i/o: record data inserted */
532 xfs_btree_cur_t **curp, /* output: new cursor replacing cur */
533 int *stat) /* success/failure */
534{ 81{
535 xfs_inobt_block_t *block; /* btree block record/key lives in */ 82 xfs_alloc_arg_t args; /* block allocation args */
536 xfs_buf_t *bp; /* buffer for block */ 83 int error; /* error return value */
537 int error; /* error return value */ 84 xfs_agblock_t sbno = be32_to_cpu(start->s);
538 int i; /* loop index */
539 xfs_inobt_key_t key; /* key value being inserted */
540 xfs_inobt_key_t *kp=NULL; /* pointer to btree keys */
541 xfs_agblock_t nbno; /* block number of allocated block */
542 xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
543 xfs_inobt_key_t nkey; /* new key value, from split */
544 xfs_inobt_rec_t nrec; /* new record value, for caller */
545 int numrecs;
546 int optr; /* old ptr value */
547 xfs_inobt_ptr_t *pp; /* pointer to btree addresses */
548 int ptr; /* index in btree block for this rec */
549 xfs_inobt_rec_t *rp=NULL; /* pointer to btree records */
550 85
551 /* 86 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
552 * GCC doesn't understand the (arguably complex) control flow in
553 * this function and complains about uninitialized structure fields
554 * without this.
555 */
556 memset(&nrec, 0, sizeof(nrec));
557 87
558 /* 88 memset(&args, 0, sizeof(args));
559 * If we made it to the root level, allocate a new root block 89 args.tp = cur->bc_tp;
560 * and we're done. 90 args.mp = cur->bc_mp;
561 */ 91 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
562 if (level >= cur->bc_nlevels) { 92 args.minlen = 1;
563 error = xfs_inobt_newroot(cur, &i); 93 args.maxlen = 1;
564 *bnop = NULLAGBLOCK; 94 args.prod = 1;
565 *stat = i; 95 args.type = XFS_ALLOCTYPE_NEAR_BNO;
96
97 error = xfs_alloc_vextent(&args);
98 if (error) {
99 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
566 return error; 100 return error;
567 } 101 }
568 /* 102 if (args.fsbno == NULLFSBLOCK) {
569 * Make a key out of the record data to be inserted, and save it. 103 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
570 */
571 key.ir_startino = recp->ir_startino;
572 optr = ptr = cur->bc_ptrs[level];
573 /*
574 * If we're off the left edge, return failure.
575 */
576 if (ptr == 0) {
577 *stat = 0; 104 *stat = 0;
578 return 0; 105 return 0;
579 } 106 }
580 /* 107 ASSERT(args.len == 1);
581 * Get pointers to the btree buffer and block. 108 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
582 */ 109
583 bp = cur->bc_bufs[level]; 110 new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
584 block = XFS_BUF_TO_INOBT_BLOCK(bp);
585 numrecs = be16_to_cpu(block->bb_numrecs);
586#ifdef DEBUG
587 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
588 return error;
589 /*
590 * Check that the new entry is being inserted in the right place.
591 */
592 if (ptr <= numrecs) {
593 if (level == 0) {
594 rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
595 xfs_btree_check_rec(cur->bc_btnum, recp, rp);
596 } else {
597 kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
598 xfs_btree_check_key(cur->bc_btnum, &key, kp);
599 }
600 }
601#endif
602 nbno = NULLAGBLOCK;
603 ncur = NULL;
604 /*
605 * If the block is full, we can't insert the new entry until we
606 * make the block un-full.
607 */
608 if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
609 /*
610 * First, try shifting an entry to the right neighbor.
611 */
612 if ((error = xfs_inobt_rshift(cur, level, &i)))
613 return error;
614 if (i) {
615 /* nothing */
616 }
617 /*
618 * Next, try shifting an entry to the left neighbor.
619 */
620 else {
621 if ((error = xfs_inobt_lshift(cur, level, &i)))
622 return error;
623 if (i) {
624 optr = ptr = cur->bc_ptrs[level];
625 } else {
626 /*
627 * Next, try splitting the current block
628 * in half. If this works we have to
629 * re-set our variables because
630 * we could be in a different block now.
631 */
632 if ((error = xfs_inobt_split(cur, level, &nbno,
633 &nkey, &ncur, &i)))
634 return error;
635 if (i) {
636 bp = cur->bc_bufs[level];
637 block = XFS_BUF_TO_INOBT_BLOCK(bp);
638#ifdef DEBUG
639 if ((error = xfs_btree_check_sblock(cur,
640 block, level, bp)))
641 return error;
642#endif
643 ptr = cur->bc_ptrs[level];
644 nrec.ir_startino = nkey.ir_startino;
645 } else {
646 /*
647 * Otherwise the insert fails.
648 */
649 *stat = 0;
650 return 0;
651 }
652 }
653 }
654 }
655 /*
656 * At this point we know there's room for our new entry in the block
657 * we're pointing at.
658 */
659 numrecs = be16_to_cpu(block->bb_numrecs);
660 if (level > 0) {
661 /*
662 * It's a non-leaf entry. Make a hole for the new data
663 * in the key and ptr regions of the block.
664 */
665 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
666 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
667#ifdef DEBUG
668 for (i = numrecs; i >= ptr; i--) {
669 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
670 return error;
671 }
672#endif
673 memmove(&kp[ptr], &kp[ptr - 1],
674 (numrecs - ptr + 1) * sizeof(*kp));
675 memmove(&pp[ptr], &pp[ptr - 1],
676 (numrecs - ptr + 1) * sizeof(*pp));
677 /*
678 * Now stuff the new data in, bump numrecs and log the new data.
679 */
680#ifdef DEBUG
681 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
682 return error;
683#endif
684 kp[ptr - 1] = key;
685 pp[ptr - 1] = cpu_to_be32(*bnop);
686 numrecs++;
687 block->bb_numrecs = cpu_to_be16(numrecs);
688 xfs_inobt_log_keys(cur, bp, ptr, numrecs);
689 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
690 } else {
691 /*
692 * It's a leaf entry. Make a hole for the new record.
693 */
694 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
695 memmove(&rp[ptr], &rp[ptr - 1],
696 (numrecs - ptr + 1) * sizeof(*rp));
697 /*
698 * Now stuff the new record in, bump numrecs
699 * and log the new data.
700 */
701 rp[ptr - 1] = *recp;
702 numrecs++;
703 block->bb_numrecs = cpu_to_be16(numrecs);
704 xfs_inobt_log_recs(cur, bp, ptr, numrecs);
705 }
706 /*
707 * Log the new number of records in the btree header.
708 */
709 xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
710#ifdef DEBUG
711 /*
712 * Check that the key/record is in the right place, now.
713 */
714 if (ptr < numrecs) {
715 if (level == 0)
716 xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
717 rp + ptr);
718 else
719 xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
720 kp + ptr);
721 }
722#endif
723 /*
724 * If we inserted at the start of a block, update the parents' keys.
725 */
726 if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
727 return error;
728 /*
729 * Return the new block number, if any.
730 * If there is one, give back a record value and a cursor too.
731 */
732 *bnop = nbno;
733 if (nbno != NULLAGBLOCK) {
734 *recp = nrec;
735 *curp = ncur;
736 }
737 *stat = 1; 111 *stat = 1;
738 return 0; 112 return 0;
739} 113}
740 114
741/* 115STATIC int
742 * Log header fields from a btree block. 116xfs_inobt_free_block(
743 */ 117 struct xfs_btree_cur *cur,
744STATIC void 118 struct xfs_buf *bp)
745xfs_inobt_log_block(
746 xfs_trans_t *tp, /* transaction pointer */
747 xfs_buf_t *bp, /* buffer containing btree block */
748 int fields) /* mask of fields: XFS_BB_... */
749{ 119{
750 int first; /* first byte offset logged */ 120 xfs_fsblock_t fsbno;
751 int last; /* last byte offset logged */ 121 int error;
752 static const short offsets[] = { /* table of offsets */
753 offsetof(xfs_inobt_block_t, bb_magic),
754 offsetof(xfs_inobt_block_t, bb_level),
755 offsetof(xfs_inobt_block_t, bb_numrecs),
756 offsetof(xfs_inobt_block_t, bb_leftsib),
757 offsetof(xfs_inobt_block_t, bb_rightsib),
758 sizeof(xfs_inobt_block_t)
759 };
760 122
761 xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); 123 fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
762 xfs_trans_log_buf(tp, bp, first, last); 124 error = xfs_free_extent(cur->bc_tp, fsbno, 1);
125 if (error)
126 return error;
127
128 xfs_trans_binval(cur->bc_tp, bp);
129 return error;
763} 130}
764 131
765/* 132STATIC int
766 * Log keys from a btree block (nonleaf). 133xfs_inobt_get_maxrecs(
767 */ 134 struct xfs_btree_cur *cur,
768STATIC void 135 int level)
769xfs_inobt_log_keys(
770 xfs_btree_cur_t *cur, /* btree cursor */
771 xfs_buf_t *bp, /* buffer containing btree block */
772 int kfirst, /* index of first key to log */
773 int klast) /* index of last key to log */
774{ 136{
775 xfs_inobt_block_t *block; /* btree block to log from */ 137 return cur->bc_mp->m_inobt_mxr[level != 0];
776 int first; /* first byte offset logged */
777 xfs_inobt_key_t *kp; /* key pointer in btree block */
778 int last; /* last byte offset logged */
779
780 block = XFS_BUF_TO_INOBT_BLOCK(bp);
781 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
782 first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
783 last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
784 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
785} 138}
786 139
787/*
788 * Log block pointer fields from a btree block (nonleaf).
789 */
790STATIC void 140STATIC void
791xfs_inobt_log_ptrs( 141xfs_inobt_init_key_from_rec(
792 xfs_btree_cur_t *cur, /* btree cursor */ 142 union xfs_btree_key *key,
793 xfs_buf_t *bp, /* buffer containing btree block */ 143 union xfs_btree_rec *rec)
794 int pfirst, /* index of first pointer to log */
795 int plast) /* index of last pointer to log */
796{ 144{
797 xfs_inobt_block_t *block; /* btree block to log from */ 145 key->inobt.ir_startino = rec->inobt.ir_startino;
798 int first; /* first byte offset logged */
799 int last; /* last byte offset logged */
800 xfs_inobt_ptr_t *pp; /* block-pointer pointer in btree blk */
801
802 block = XFS_BUF_TO_INOBT_BLOCK(bp);
803 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
804 first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
805 last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
806 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
807} 146}
808 147
809/*
810 * Log records from a btree block (leaf).
811 */
812STATIC void 148STATIC void
813xfs_inobt_log_recs( 149xfs_inobt_init_rec_from_key(
814 xfs_btree_cur_t *cur, /* btree cursor */ 150 union xfs_btree_key *key,
815 xfs_buf_t *bp, /* buffer containing btree block */ 151 union xfs_btree_rec *rec)
816 int rfirst, /* index of first record to log */
817 int rlast) /* index of last record to log */
818{ 152{
819 xfs_inobt_block_t *block; /* btree block to log from */ 153 rec->inobt.ir_startino = key->inobt.ir_startino;
820 int first; /* first byte offset logged */ 154}
821 int last; /* last byte offset logged */
822 xfs_inobt_rec_t *rp; /* record pointer for btree block */
823 155
824 block = XFS_BUF_TO_INOBT_BLOCK(bp); 156STATIC void
825 rp = XFS_INOBT_REC_ADDR(block, 1, cur); 157xfs_inobt_init_rec_from_cur(
826 first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); 158 struct xfs_btree_cur *cur,
827 last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); 159 union xfs_btree_rec *rec)
828 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 160{
161 rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
162 rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
163 rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
829} 164}
830 165
831/* 166/*
832 * Lookup the record. The cursor is made to point to it, based on dir. 167 * intial value of ptr for lookup
833 * Return 0 if can't find any such record, 1 for success.
834 */ 168 */
835STATIC int /* error */ 169STATIC void
836xfs_inobt_lookup( 170xfs_inobt_init_ptr_from_cur(
837 xfs_btree_cur_t *cur, /* btree cursor */ 171 struct xfs_btree_cur *cur,
838 xfs_lookup_t dir, /* <=, ==, or >= */ 172 union xfs_btree_ptr *ptr)
839 int *stat) /* success/failure */
840{ 173{
841 xfs_agblock_t agbno; /* a.g. relative btree block number */ 174 struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
842 xfs_agnumber_t agno; /* allocation group number */
843 xfs_inobt_block_t *block=NULL; /* current btree block */
844 __int64_t diff; /* difference for the current key */
845 int error; /* error return value */
846 int keyno=0; /* current key number */
847 int level; /* level in the btree */
848 xfs_mount_t *mp; /* file system mount point */
849
850 /*
851 * Get the allocation group header, and the root block number.
852 */
853 mp = cur->bc_mp;
854 {
855 xfs_agi_t *agi; /* a.g. inode header */
856
857 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
858 agno = be32_to_cpu(agi->agi_seqno);
859 agbno = be32_to_cpu(agi->agi_root);
860 }
861 /*
862 * Iterate over each level in the btree, starting at the root.
863 * For each level above the leaves, find the key we need, based
864 * on the lookup record, then follow the corresponding block
865 * pointer down to the next level.
866 */
867 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
868 xfs_buf_t *bp; /* buffer pointer for btree block */
869 xfs_daddr_t d; /* disk address of btree block */
870
871 /*
872 * Get the disk address we're looking for.
873 */
874 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
875 /*
876 * If the old buffer at this level is for a different block,
877 * throw it away, otherwise just use it.
878 */
879 bp = cur->bc_bufs[level];
880 if (bp && XFS_BUF_ADDR(bp) != d)
881 bp = NULL;
882 if (!bp) {
883 /*
884 * Need to get a new buffer. Read it, then
885 * set it in the cursor, releasing the old one.
886 */
887 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
888 agno, agbno, 0, &bp, XFS_INO_BTREE_REF)))
889 return error;
890 xfs_btree_setbuf(cur, level, bp);
891 /*
892 * Point to the btree block, now that we have the buffer
893 */
894 block = XFS_BUF_TO_INOBT_BLOCK(bp);
895 if ((error = xfs_btree_check_sblock(cur, block, level,
896 bp)))
897 return error;
898 } else
899 block = XFS_BUF_TO_INOBT_BLOCK(bp);
900 /*
901 * If we already had a key match at a higher level, we know
902 * we need to use the first entry in this block.
903 */
904 if (diff == 0)
905 keyno = 1;
906 /*
907 * Otherwise we need to search this block. Do a binary search.
908 */
909 else {
910 int high; /* high entry number */
911 xfs_inobt_key_t *kkbase=NULL;/* base of keys in block */
912 xfs_inobt_rec_t *krbase=NULL;/* base of records in block */
913 int low; /* low entry number */
914 175
915 /* 176 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
916 * Get a pointer to keys or records.
917 */
918 if (level > 0)
919 kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
920 else
921 krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
922 /*
923 * Set low and high entry numbers, 1-based.
924 */
925 low = 1;
926 if (!(high = be16_to_cpu(block->bb_numrecs))) {
927 /*
928 * If the block is empty, the tree must
929 * be an empty leaf.
930 */
931 ASSERT(level == 0 && cur->bc_nlevels == 1);
932 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
933 *stat = 0;
934 return 0;
935 }
936 /*
937 * Binary search the block.
938 */
939 while (low <= high) {
940 xfs_agino_t startino; /* key value */
941
942 /*
943 * keyno is average of low and high.
944 */
945 keyno = (low + high) >> 1;
946 /*
947 * Get startino.
948 */
949 if (level > 0) {
950 xfs_inobt_key_t *kkp;
951
952 kkp = kkbase + keyno - 1;
953 startino = be32_to_cpu(kkp->ir_startino);
954 } else {
955 xfs_inobt_rec_t *krp;
956
957 krp = krbase + keyno - 1;
958 startino = be32_to_cpu(krp->ir_startino);
959 }
960 /*
961 * Compute difference to get next direction.
962 */
963 diff = (__int64_t)
964 startino - cur->bc_rec.i.ir_startino;
965 /*
966 * Less than, move right.
967 */
968 if (diff < 0)
969 low = keyno + 1;
970 /*
971 * Greater than, move left.
972 */
973 else if (diff > 0)
974 high = keyno - 1;
975 /*
976 * Equal, we're done.
977 */
978 else
979 break;
980 }
981 }
982 /*
983 * If there are more levels, set up for the next level
984 * by getting the block number and filling in the cursor.
985 */
986 if (level > 0) {
987 /*
988 * If we moved left, need the previous key number,
989 * unless there isn't one.
990 */
991 if (diff > 0 && --keyno < 1)
992 keyno = 1;
993 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur));
994#ifdef DEBUG
995 if ((error = xfs_btree_check_sptr(cur, agbno, level)))
996 return error;
997#endif
998 cur->bc_ptrs[level] = keyno;
999 }
1000 }
1001 /*
1002 * Done with the search.
1003 * See if we need to adjust the results.
1004 */
1005 if (dir != XFS_LOOKUP_LE && diff < 0) {
1006 keyno++;
1007 /*
1008 * If ge search and we went off the end of the block, but it's
1009 * not the last block, we're in the wrong block.
1010 */
1011 if (dir == XFS_LOOKUP_GE &&
1012 keyno > be16_to_cpu(block->bb_numrecs) &&
1013 be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1014 int i;
1015 177
1016 cur->bc_ptrs[0] = keyno; 178 ptr->s = agi->agi_root;
1017 if ((error = xfs_inobt_increment(cur, 0, &i)))
1018 return error;
1019 ASSERT(i == 1);
1020 *stat = 1;
1021 return 0;
1022 }
1023 }
1024 else if (dir == XFS_LOOKUP_LE && diff > 0)
1025 keyno--;
1026 cur->bc_ptrs[0] = keyno;
1027 /*
1028 * Return if we succeeded or not.
1029 */
1030 if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1031 *stat = 0;
1032 else
1033 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1034 return 0;
1035} 179}
1036 180
1037/* 181STATIC __int64_t
1038 * Move 1 record left from cur/level if possible. 182xfs_inobt_key_diff(
1039 * Update cur to reflect the new path. 183 struct xfs_btree_cur *cur,
1040 */ 184 union xfs_btree_key *key)
1041STATIC int /* error */
1042xfs_inobt_lshift(
1043 xfs_btree_cur_t *cur, /* btree cursor */
1044 int level, /* level to shift record on */
1045 int *stat) /* success/failure */
1046{ 185{
1047 int error; /* error return value */ 186 return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
1048#ifdef DEBUG 187 cur->bc_rec.i.ir_startino;
1049 int i; /* loop index */
1050#endif
1051 xfs_inobt_key_t key; /* key value for leaf level upward */
1052 xfs_buf_t *lbp; /* buffer for left neighbor block */
1053 xfs_inobt_block_t *left; /* left neighbor btree block */
1054 xfs_inobt_key_t *lkp=NULL; /* key pointer for left block */
1055 xfs_inobt_ptr_t *lpp; /* address pointer for left block */
1056 xfs_inobt_rec_t *lrp=NULL; /* record pointer for left block */
1057 int nrec; /* new number of left block entries */
1058 xfs_buf_t *rbp; /* buffer for right (current) block */
1059 xfs_inobt_block_t *right; /* right (current) btree block */
1060 xfs_inobt_key_t *rkp=NULL; /* key pointer for right block */
1061 xfs_inobt_ptr_t *rpp=NULL; /* address pointer for right block */
1062 xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */
1063
1064 /*
1065 * Set up variables for this block as "right".
1066 */
1067 rbp = cur->bc_bufs[level];
1068 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1069#ifdef DEBUG
1070 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1071 return error;
1072#endif
1073 /*
1074 * If we've got no left sibling then we can't shift an entry left.
1075 */
1076 if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1077 *stat = 0;
1078 return 0;
1079 }
1080 /*
1081 * If the cursor entry is the one that would be moved, don't
1082 * do it... it's too complicated.
1083 */
1084 if (cur->bc_ptrs[level] <= 1) {
1085 *stat = 0;
1086 return 0;
1087 }
1088 /*
1089 * Set up the left neighbor as "left".
1090 */
1091 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1092 cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1093 0, &lbp, XFS_INO_BTREE_REF)))
1094 return error;
1095 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1096 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1097 return error;
1098 /*
1099 * If it's full, it can't take another entry.
1100 */
1101 if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1102 *stat = 0;
1103 return 0;
1104 }
1105 nrec = be16_to_cpu(left->bb_numrecs) + 1;
1106 /*
1107 * If non-leaf, copy a key and a ptr to the left block.
1108 */
1109 if (level > 0) {
1110 lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
1111 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1112 *lkp = *rkp;
1113 xfs_inobt_log_keys(cur, lbp, nrec, nrec);
1114 lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
1115 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1116#ifdef DEBUG
1117 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1118 return error;
1119#endif
1120 *lpp = *rpp;
1121 xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
1122 }
1123 /*
1124 * If leaf, copy a record to the left block.
1125 */
1126 else {
1127 lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
1128 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1129 *lrp = *rrp;
1130 xfs_inobt_log_recs(cur, lbp, nrec, nrec);
1131 }
1132 /*
1133 * Bump and log left's numrecs, decrement and log right's numrecs.
1134 */
1135 be16_add_cpu(&left->bb_numrecs, 1);
1136 xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1137#ifdef DEBUG
1138 if (level > 0)
1139 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1140 else
1141 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1142#endif
1143 be16_add_cpu(&right->bb_numrecs, -1);
1144 xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1145 /*
1146 * Slide the contents of right down one entry.
1147 */
1148 if (level > 0) {
1149#ifdef DEBUG
1150 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1151 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1152 level)))
1153 return error;
1154 }
1155#endif
1156 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1157 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1158 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1159 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1160 } else {
1161 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1162 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1163 key.ir_startino = rrp->ir_startino;
1164 rkp = &key;
1165 }
1166 /*
1167 * Update the parent key values of right.
1168 */
1169 if ((error = xfs_inobt_updkey(cur, rkp, level + 1)))
1170 return error;
1171 /*
1172 * Slide the cursor value left one.
1173 */
1174 cur->bc_ptrs[level]--;
1175 *stat = 1;
1176 return 0;
1177} 188}
1178 189
1179/* 190STATIC int
1180 * Allocate a new root block, fill it in. 191xfs_inobt_kill_root(
1181 */ 192 struct xfs_btree_cur *cur,
1182STATIC int /* error */ 193 struct xfs_buf *bp,
1183xfs_inobt_newroot( 194 int level,
1184 xfs_btree_cur_t *cur, /* btree cursor */ 195 union xfs_btree_ptr *newroot)
1185 int *stat) /* success/failure */
1186{ 196{
1187 xfs_agi_t *agi; /* a.g. inode header */ 197 int error;
1188 xfs_alloc_arg_t args; /* allocation argument structure */
1189 xfs_inobt_block_t *block; /* one half of the old root block */
1190 xfs_buf_t *bp; /* buffer containing block */
1191 int error; /* error return value */
1192 xfs_inobt_key_t *kp; /* btree key pointer */
1193 xfs_agblock_t lbno; /* left block number */
1194 xfs_buf_t *lbp; /* left buffer pointer */
1195 xfs_inobt_block_t *left; /* left btree block */
1196 xfs_buf_t *nbp; /* new (root) buffer */
1197 xfs_inobt_block_t *new; /* new (root) btree block */
1198 int nptr; /* new value for key index, 1 or 2 */
1199 xfs_inobt_ptr_t *pp; /* btree address pointer */
1200 xfs_agblock_t rbno; /* right block number */
1201 xfs_buf_t *rbp; /* right buffer pointer */
1202 xfs_inobt_block_t *right; /* right btree block */
1203 xfs_inobt_rec_t *rp; /* btree record pointer */
1204 198
1205 ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp)); 199 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
200 XFS_BTREE_STATS_INC(cur, killroot);
1206 201
1207 /* 202 /*
1208 * Get a block & a buffer. 203 * Update the root pointer, decreasing the level by 1 and then
204 * free the old root.
1209 */ 205 */
1210 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); 206 xfs_inobt_set_root(cur, newroot, -1);
1211 args.tp = cur->bc_tp; 207 error = xfs_inobt_free_block(cur, bp);
1212 args.mp = cur->bc_mp; 208 if (error) {
1213 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, 209 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1214 be32_to_cpu(agi->agi_root));
1215 args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1216 args.isfl = args.userdata = args.minalignslop = 0;
1217 args.minlen = args.maxlen = args.prod = 1;
1218 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1219 if ((error = xfs_alloc_vextent(&args)))
1220 return error; 210 return error;
1221 /*
1222 * None available, we fail.
1223 */
1224 if (args.fsbno == NULLFSBLOCK) {
1225 *stat = 0;
1226 return 0;
1227 }
1228 ASSERT(args.len == 1);
1229 nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1230 new = XFS_BUF_TO_INOBT_BLOCK(nbp);
1231 /*
1232 * Set the root data in the a.g. inode structure.
1233 */
1234 agi->agi_root = cpu_to_be32(args.agbno);
1235 be32_add_cpu(&agi->agi_level, 1);
1236 xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp,
1237 XFS_AGI_ROOT | XFS_AGI_LEVEL);
1238 /*
1239 * At the previous root level there are now two blocks: the old
1240 * root, and the new block generated when it was split.
1241 * We don't know which one the cursor is pointing at, so we
1242 * set up variables "left" and "right" for each case.
1243 */
1244 bp = cur->bc_bufs[cur->bc_nlevels - 1];
1245 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1246#ifdef DEBUG
1247 if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
1248 return error;
1249#endif
1250 if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1251 /*
1252 * Our block is left, pick up the right block.
1253 */
1254 lbp = bp;
1255 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1256 left = block;
1257 rbno = be32_to_cpu(left->bb_rightsib);
1258 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1259 rbno, 0, &rbp, XFS_INO_BTREE_REF)))
1260 return error;
1261 bp = rbp;
1262 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1263 if ((error = xfs_btree_check_sblock(cur, right,
1264 cur->bc_nlevels - 1, rbp)))
1265 return error;
1266 nptr = 1;
1267 } else {
1268 /*
1269 * Our block is right, pick up the left block.
1270 */
1271 rbp = bp;
1272 rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
1273 right = block;
1274 lbno = be32_to_cpu(right->bb_leftsib);
1275 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1276 lbno, 0, &lbp, XFS_INO_BTREE_REF)))
1277 return error;
1278 bp = lbp;
1279 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1280 if ((error = xfs_btree_check_sblock(cur, left,
1281 cur->bc_nlevels - 1, lbp)))
1282 return error;
1283 nptr = 2;
1284 }
1285 /*
1286 * Fill in the new block's btree header and log it.
1287 */
1288 new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1289 new->bb_level = cpu_to_be16(cur->bc_nlevels);
1290 new->bb_numrecs = cpu_to_be16(2);
1291 new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1292 new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1293 xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
1294 ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1295 /*
1296 * Fill in the key data in the new root.
1297 */
1298 kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1299 if (be16_to_cpu(left->bb_level) > 0) {
1300 kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur);
1301 kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur);
1302 } else {
1303 rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1304 kp[0].ir_startino = rp->ir_startino;
1305 rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1306 kp[1].ir_startino = rp->ir_startino;
1307 } 211 }
1308 xfs_inobt_log_keys(cur, nbp, 1, 2);
1309 /*
1310 * Fill in the pointer data in the new root.
1311 */
1312 pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
1313 pp[0] = cpu_to_be32(lbno);
1314 pp[1] = cpu_to_be32(rbno);
1315 xfs_inobt_log_ptrs(cur, nbp, 1, 2);
1316 /*
1317 * Fix up the cursor.
1318 */
1319 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1320 cur->bc_ptrs[cur->bc_nlevels] = nptr;
1321 cur->bc_nlevels++;
1322 *stat = 1;
1323 return 0;
1324}
1325 212
1326/* 213 XFS_BTREE_STATS_INC(cur, free);
1327 * Move 1 record right from cur/level if possible.
1328 * Update cur to reflect the new path.
1329 */
1330STATIC int /* error */
1331xfs_inobt_rshift(
1332 xfs_btree_cur_t *cur, /* btree cursor */
1333 int level, /* level to shift record on */
1334 int *stat) /* success/failure */
1335{
1336 int error; /* error return value */
1337 int i; /* loop index */
1338 xfs_inobt_key_t key; /* key value for leaf level upward */
1339 xfs_buf_t *lbp; /* buffer for left (current) block */
1340 xfs_inobt_block_t *left; /* left (current) btree block */
1341 xfs_inobt_key_t *lkp; /* key pointer for left block */
1342 xfs_inobt_ptr_t *lpp; /* address pointer for left block */
1343 xfs_inobt_rec_t *lrp; /* record pointer for left block */
1344 xfs_buf_t *rbp; /* buffer for right neighbor block */
1345 xfs_inobt_block_t *right; /* right neighbor btree block */
1346 xfs_inobt_key_t *rkp; /* key pointer for right block */
1347 xfs_inobt_ptr_t *rpp; /* address pointer for right block */
1348 xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */
1349 xfs_btree_cur_t *tcur; /* temporary cursor */
1350 214
1351 /* 215 cur->bc_bufs[level] = NULL;
1352 * Set up variables for this block as "left". 216 cur->bc_nlevels--;
1353 */ 217
1354 lbp = cur->bc_bufs[level]; 218 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1355 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1356#ifdef DEBUG
1357 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1358 return error;
1359#endif
1360 /*
1361 * If we've got no right sibling then we can't shift an entry right.
1362 */
1363 if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1364 *stat = 0;
1365 return 0;
1366 }
1367 /*
1368 * If the cursor entry is the one that would be moved, don't
1369 * do it... it's too complicated.
1370 */
1371 if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1372 *stat = 0;
1373 return 0;
1374 }
1375 /*
1376 * Set up the right neighbor as "right".
1377 */
1378 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1379 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1380 0, &rbp, XFS_INO_BTREE_REF)))
1381 return error;
1382 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1383 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1384 return error;
1385 /*
1386 * If it's full, it can't take another entry.
1387 */
1388 if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1389 *stat = 0;
1390 return 0;
1391 }
1392 /*
1393 * Make a hole at the start of the right neighbor block, then
1394 * copy the last left block entry to the hole.
1395 */
1396 if (level > 0) {
1397 lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1398 lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1399 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1400 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1401#ifdef DEBUG
1402 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1403 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
1404 return error;
1405 }
1406#endif
1407 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1408 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1409#ifdef DEBUG
1410 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1411 return error;
1412#endif
1413 *rkp = *lkp;
1414 *rpp = *lpp;
1415 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1416 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1417 } else {
1418 lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1419 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1420 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1421 *rrp = *lrp;
1422 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1423 key.ir_startino = rrp->ir_startino;
1424 rkp = &key;
1425 }
1426 /*
1427 * Decrement and log left's numrecs, bump and log right's numrecs.
1428 */
1429 be16_add_cpu(&left->bb_numrecs, -1);
1430 xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1431 be16_add_cpu(&right->bb_numrecs, 1);
1432#ifdef DEBUG
1433 if (level > 0)
1434 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1435 else
1436 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1437#endif
1438 xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1439 /*
1440 * Using a temporary cursor, update the parent key values of the
1441 * block on the right.
1442 */
1443 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1444 return error;
1445 xfs_btree_lastrec(tcur, level);
1446 if ((error = xfs_inobt_increment(tcur, level, &i)) ||
1447 (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
1448 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1449 return error;
1450 }
1451 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1452 *stat = 1;
1453 return 0; 219 return 0;
1454} 220}
1455 221
1456/*
1457 * Split cur/level block in half.
1458 * Return new block number and its first record (to be inserted into parent).
1459 */
1460STATIC int /* error */
1461xfs_inobt_split(
1462 xfs_btree_cur_t *cur, /* btree cursor */
1463 int level, /* level to split */
1464 xfs_agblock_t *bnop, /* output: block number allocated */
1465 xfs_inobt_key_t *keyp, /* output: first key of new block */
1466 xfs_btree_cur_t **curp, /* output: new cursor */
1467 int *stat) /* success/failure */
1468{
1469 xfs_alloc_arg_t args; /* allocation argument structure */
1470 int error; /* error return value */
1471 int i; /* loop index/record number */
1472 xfs_agblock_t lbno; /* left (current) block number */
1473 xfs_buf_t *lbp; /* buffer for left block */
1474 xfs_inobt_block_t *left; /* left (current) btree block */
1475 xfs_inobt_key_t *lkp; /* left btree key pointer */
1476 xfs_inobt_ptr_t *lpp; /* left btree address pointer */
1477 xfs_inobt_rec_t *lrp; /* left btree record pointer */
1478 xfs_buf_t *rbp; /* buffer for right block */
1479 xfs_inobt_block_t *right; /* right (new) btree block */
1480 xfs_inobt_key_t *rkp; /* right btree key pointer */
1481 xfs_inobt_ptr_t *rpp; /* right btree address pointer */
1482 xfs_inobt_rec_t *rrp; /* right btree record pointer */
1483
1484 /*
1485 * Set up left block (current one).
1486 */
1487 lbp = cur->bc_bufs[level];
1488 args.tp = cur->bc_tp;
1489 args.mp = cur->bc_mp;
1490 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1491 /*
1492 * Allocate the new block.
1493 * If we can't do it, we're toast. Give up.
1494 */
1495 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno);
1496 args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1497 args.isfl = args.userdata = args.minalignslop = 0;
1498 args.minlen = args.maxlen = args.prod = 1;
1499 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1500 if ((error = xfs_alloc_vextent(&args)))
1501 return error;
1502 if (args.fsbno == NULLFSBLOCK) {
1503 *stat = 0;
1504 return 0;
1505 }
1506 ASSERT(args.len == 1);
1507 rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1508 /*
1509 * Set up the new block as "right".
1510 */
1511 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1512 /*
1513 * "Left" is the current (according to the cursor) block.
1514 */
1515 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1516#ifdef DEBUG 222#ifdef DEBUG
1517 if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) 223STATIC int
1518 return error; 224xfs_inobt_keys_inorder(
1519#endif 225 struct xfs_btree_cur *cur,
1520 /* 226 union xfs_btree_key *k1,
1521 * Fill in the btree header for the new block. 227 union xfs_btree_key *k2)
1522 */ 228{
1523 right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); 229 return be32_to_cpu(k1->inobt.ir_startino) <
1524 right->bb_level = left->bb_level; 230 be32_to_cpu(k2->inobt.ir_startino);
1525 right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1526 /*
1527 * Make sure that if there's an odd number of entries now, that
1528 * each new block will have the same number of entries.
1529 */
1530 if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1531 cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1532 be16_add_cpu(&right->bb_numrecs, 1);
1533 i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1534 /*
1535 * For non-leaf blocks, copy keys and addresses over to the new block.
1536 */
1537 if (level > 0) {
1538 lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
1539 lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
1540 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1541 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1542#ifdef DEBUG
1543 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1544 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1545 return error;
1546 }
1547#endif
1548 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1549 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1550 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1551 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1552 *keyp = *rkp;
1553 }
1554 /*
1555 * For leaf blocks, copy records over to the new block.
1556 */
1557 else {
1558 lrp = XFS_INOBT_REC_ADDR(left, i, cur);
1559 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1560 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1561 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1562 keyp->ir_startino = rrp->ir_startino;
1563 }
1564 /*
1565 * Find the left block number by looking in the buffer.
1566 * Adjust numrecs, sibling pointers.
1567 */
1568 be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1569 right->bb_rightsib = left->bb_rightsib;
1570 left->bb_rightsib = cpu_to_be32(args.agbno);
1571 right->bb_leftsib = cpu_to_be32(lbno);
1572 xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
1573 xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1574 /*
1575 * If there's a block to the new block's right, make that block
1576 * point back to right instead of to left.
1577 */
1578 if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1579 xfs_inobt_block_t *rrblock; /* rr btree block */
1580 xfs_buf_t *rrbp; /* buffer for rrblock */
1581
1582 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1583 be32_to_cpu(right->bb_rightsib), 0, &rrbp,
1584 XFS_INO_BTREE_REF)))
1585 return error;
1586 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
1587 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1588 return error;
1589 rrblock->bb_leftsib = cpu_to_be32(args.agbno);
1590 xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
1591 }
1592 /*
1593 * If the cursor is really in the right block, move it there.
1594 * If it's just pointing past the last entry in left, then we'll
1595 * insert there, so don't change anything in that case.
1596 */
1597 if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1598 xfs_btree_setbuf(cur, level, rbp);
1599 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1600 }
1601 /*
1602 * If there are more levels, we'll need another cursor which refers
1603 * the right block, no matter where this cursor was.
1604 */
1605 if (level + 1 < cur->bc_nlevels) {
1606 if ((error = xfs_btree_dup_cursor(cur, curp)))
1607 return error;
1608 (*curp)->bc_ptrs[level + 1]++;
1609 }
1610 *bnop = args.agbno;
1611 *stat = 1;
1612 return 0;
1613} 231}
1614 232
1615/* 233STATIC int
1616 * Update keys at all levels from here to the root along the cursor's path. 234xfs_inobt_recs_inorder(
1617 */ 235 struct xfs_btree_cur *cur,
1618STATIC int /* error */ 236 union xfs_btree_rec *r1,
1619xfs_inobt_updkey( 237 union xfs_btree_rec *r2)
1620 xfs_btree_cur_t *cur, /* btree cursor */
1621 xfs_inobt_key_t *keyp, /* new key value to update to */
1622 int level) /* starting level for update */
1623{ 238{
1624 int ptr; /* index of key in block */ 239 return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
1625 240 be32_to_cpu(r2->inobt.ir_startino);
1626 /*
1627 * Go up the tree from this level toward the root.
1628 * At each level, update the key value to the value input.
1629 * Stop when we reach a level where the cursor isn't pointing
1630 * at the first entry in the block.
1631 */
1632 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1633 xfs_buf_t *bp; /* buffer for block */
1634 xfs_inobt_block_t *block; /* btree block */
1635#ifdef DEBUG
1636 int error; /* error return value */
1637#endif
1638 xfs_inobt_key_t *kp; /* ptr to btree block keys */
1639
1640 bp = cur->bc_bufs[level];
1641 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1642#ifdef DEBUG
1643 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1644 return error;
1645#endif
1646 ptr = cur->bc_ptrs[level];
1647 kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
1648 *kp = *keyp;
1649 xfs_inobt_log_keys(cur, bp, ptr, ptr);
1650 }
1651 return 0;
1652} 241}
242#endif /* DEBUG */
1653 243
1654/* 244#ifdef XFS_BTREE_TRACE
1655 * Externally visible routines. 245ktrace_t *xfs_inobt_trace_buf;
1656 */
1657 246
1658/* 247STATIC void
1659 * Decrement cursor by one record at the level. 248xfs_inobt_trace_enter(
1660 * For nonzero levels the leaf-ward information is untouched. 249 struct xfs_btree_cur *cur,
1661 */ 250 const char *func,
1662int /* error */ 251 char *s,
1663xfs_inobt_decrement( 252 int type,
1664 xfs_btree_cur_t *cur, /* btree cursor */ 253 int line,
1665 int level, /* level in btree, 0 is leaf */ 254 __psunsigned_t a0,
1666 int *stat) /* success/failure */ 255 __psunsigned_t a1,
256 __psunsigned_t a2,
257 __psunsigned_t a3,
258 __psunsigned_t a4,
259 __psunsigned_t a5,
260 __psunsigned_t a6,
261 __psunsigned_t a7,
262 __psunsigned_t a8,
263 __psunsigned_t a9,
264 __psunsigned_t a10)
1667{ 265{
1668 xfs_inobt_block_t *block; /* btree block */ 266 ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
1669 int error; 267 (void *)func, (void *)s, NULL, (void *)cur,
1670 int lev; /* btree level */ 268 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
1671 269 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
1672 ASSERT(level < cur->bc_nlevels); 270 (void *)a8, (void *)a9, (void *)a10);
1673 /*
1674 * Read-ahead to the left at this level.
1675 */
1676 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1677 /*
1678 * Decrement the ptr at this level. If we're still in the block
1679 * then we're done.
1680 */
1681 if (--cur->bc_ptrs[level] > 0) {
1682 *stat = 1;
1683 return 0;
1684 }
1685 /*
1686 * Get a pointer to the btree block.
1687 */
1688 block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
1689#ifdef DEBUG
1690 if ((error = xfs_btree_check_sblock(cur, block, level,
1691 cur->bc_bufs[level])))
1692 return error;
1693#endif
1694 /*
1695 * If we just went off the left edge of the tree, return failure.
1696 */
1697 if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1698 *stat = 0;
1699 return 0;
1700 }
1701 /*
1702 * March up the tree decrementing pointers.
1703 * Stop when we don't go off the left edge of a block.
1704 */
1705 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1706 if (--cur->bc_ptrs[lev] > 0)
1707 break;
1708 /*
1709 * Read-ahead the left block, we're going to read it
1710 * in the next loop.
1711 */
1712 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1713 }
1714 /*
1715 * If we went off the root then we are seriously confused.
1716 */
1717 ASSERT(lev < cur->bc_nlevels);
1718 /*
1719 * Now walk back down the tree, fixing up the cursor's buffer
1720 * pointers and key numbers.
1721 */
1722 for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1723 xfs_agblock_t agbno; /* block number of btree block */
1724 xfs_buf_t *bp; /* buffer containing btree block */
1725
1726 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1727 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1728 cur->bc_private.a.agno, agbno, 0, &bp,
1729 XFS_INO_BTREE_REF)))
1730 return error;
1731 lev--;
1732 xfs_btree_setbuf(cur, lev, bp);
1733 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1734 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1735 return error;
1736 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1737 }
1738 *stat = 1;
1739 return 0;
1740} 271}
1741 272
1742/* 273STATIC void
1743 * Delete the record pointed to by cur. 274xfs_inobt_trace_cursor(
1744 * The cursor refers to the place where the record was (could be inserted) 275 struct xfs_btree_cur *cur,
1745 * when the operation returns. 276 __uint32_t *s0,
1746 */ 277 __uint64_t *l0,
1747int /* error */ 278 __uint64_t *l1)
1748xfs_inobt_delete(
1749 xfs_btree_cur_t *cur, /* btree cursor */
1750 int *stat) /* success/failure */
1751{ 279{
1752 int error; 280 *s0 = cur->bc_private.a.agno;
1753 int i; /* result code */ 281 *l0 = cur->bc_rec.i.ir_startino;
1754 int level; /* btree level */ 282 *l1 = cur->bc_rec.i.ir_free;
1755
1756 /*
1757 * Go up the tree, starting at leaf level.
1758 * If 2 is returned then a join was done; go to the next level.
1759 * Otherwise we are done.
1760 */
1761 for (level = 0, i = 2; i == 2; level++) {
1762 if ((error = xfs_inobt_delrec(cur, level, &i)))
1763 return error;
1764 }
1765 if (i == 0) {
1766 for (level = 1; level < cur->bc_nlevels; level++) {
1767 if (cur->bc_ptrs[level] == 0) {
1768 if ((error = xfs_inobt_decrement(cur, level, &i)))
1769 return error;
1770 break;
1771 }
1772 }
1773 }
1774 *stat = i;
1775 return 0;
1776} 283}
1777 284
1778 285STATIC void
1779/* 286xfs_inobt_trace_key(
1780 * Get the data from the pointed-to record. 287 struct xfs_btree_cur *cur,
1781 */ 288 union xfs_btree_key *key,
1782int /* error */ 289 __uint64_t *l0,
1783xfs_inobt_get_rec( 290 __uint64_t *l1)
1784 xfs_btree_cur_t *cur, /* btree cursor */
1785 xfs_agino_t *ino, /* output: starting inode of chunk */
1786 __int32_t *fcnt, /* output: number of free inodes */
1787 xfs_inofree_t *free, /* output: free inode mask */
1788 int *stat) /* output: success/failure */
1789{ 291{
1790 xfs_inobt_block_t *block; /* btree block */ 292 *l0 = be32_to_cpu(key->inobt.ir_startino);
1791 xfs_buf_t *bp; /* buffer containing btree block */ 293 *l1 = 0;
1792#ifdef DEBUG
1793 int error; /* error return value */
1794#endif
1795 int ptr; /* record number */
1796 xfs_inobt_rec_t *rec; /* record data */
1797
1798 bp = cur->bc_bufs[0];
1799 ptr = cur->bc_ptrs[0];
1800 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1801#ifdef DEBUG
1802 if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
1803 return error;
1804#endif
1805 /*
1806 * Off the right end or left end, return failure.
1807 */
1808 if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1809 *stat = 0;
1810 return 0;
1811 }
1812 /*
1813 * Point to the record and extract its data.
1814 */
1815 rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1816 *ino = be32_to_cpu(rec->ir_startino);
1817 *fcnt = be32_to_cpu(rec->ir_freecount);
1818 *free = be64_to_cpu(rec->ir_free);
1819 *stat = 1;
1820 return 0;
1821} 294}
1822 295
1823/* 296STATIC void
1824 * Increment cursor by one record at the level. 297xfs_inobt_trace_record(
1825 * For nonzero levels the leaf-ward information is untouched. 298 struct xfs_btree_cur *cur,
1826 */ 299 union xfs_btree_rec *rec,
1827int /* error */ 300 __uint64_t *l0,
1828xfs_inobt_increment( 301 __uint64_t *l1,
1829 xfs_btree_cur_t *cur, /* btree cursor */ 302 __uint64_t *l2)
1830 int level, /* level in btree, 0 is leaf */
1831 int *stat) /* success/failure */
1832{ 303{
1833 xfs_inobt_block_t *block; /* btree block */ 304 *l0 = be32_to_cpu(rec->inobt.ir_startino);
1834 xfs_buf_t *bp; /* buffer containing btree block */ 305 *l1 = be32_to_cpu(rec->inobt.ir_freecount);
1835 int error; /* error return value */ 306 *l2 = be64_to_cpu(rec->inobt.ir_free);
1836 int lev; /* btree level */ 307}
308#endif /* XFS_BTREE_TRACE */
309
310static const struct xfs_btree_ops xfs_inobt_ops = {
311 .rec_len = sizeof(xfs_inobt_rec_t),
312 .key_len = sizeof(xfs_inobt_key_t),
313
314 .dup_cursor = xfs_inobt_dup_cursor,
315 .set_root = xfs_inobt_set_root,
316 .kill_root = xfs_inobt_kill_root,
317 .alloc_block = xfs_inobt_alloc_block,
318 .free_block = xfs_inobt_free_block,
319 .get_minrecs = xfs_inobt_get_minrecs,
320 .get_maxrecs = xfs_inobt_get_maxrecs,
321 .init_key_from_rec = xfs_inobt_init_key_from_rec,
322 .init_rec_from_key = xfs_inobt_init_rec_from_key,
323 .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
324 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
325 .key_diff = xfs_inobt_key_diff,
1837 326
1838 ASSERT(level < cur->bc_nlevels);
1839 /*
1840 * Read-ahead to the right at this level.
1841 */
1842 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1843 /*
1844 * Get a pointer to the btree block.
1845 */
1846 bp = cur->bc_bufs[level];
1847 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1848#ifdef DEBUG
1849 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1850 return error;
1851#endif
1852 /*
1853 * Increment the ptr at this level. If we're still in the block
1854 * then we're done.
1855 */
1856 if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1857 *stat = 1;
1858 return 0;
1859 }
1860 /*
1861 * If we just went off the right edge of the tree, return failure.
1862 */
1863 if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1864 *stat = 0;
1865 return 0;
1866 }
1867 /*
1868 * March up the tree incrementing pointers.
1869 * Stop when we don't go off the right edge of a block.
1870 */
1871 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1872 bp = cur->bc_bufs[lev];
1873 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1874#ifdef DEBUG 327#ifdef DEBUG
1875 if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) 328 .keys_inorder = xfs_inobt_keys_inorder,
1876 return error; 329 .recs_inorder = xfs_inobt_recs_inorder,
1877#endif 330#endif
1878 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
1879 break;
1880 /*
1881 * Read-ahead the right block, we're going to read it
1882 * in the next loop.
1883 */
1884 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1885 }
1886 /*
1887 * If we went off the root then we are seriously confused.
1888 */
1889 ASSERT(lev < cur->bc_nlevels);
1890 /*
1891 * Now walk back down the tree, fixing up the cursor's buffer
1892 * pointers and key numbers.
1893 */
1894 for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
1895 lev > level; ) {
1896 xfs_agblock_t agbno; /* block number of btree block */
1897 331
1898 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); 332#ifdef XFS_BTREE_TRACE
1899 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 333 .trace_enter = xfs_inobt_trace_enter,
1900 cur->bc_private.a.agno, agbno, 0, &bp, 334 .trace_cursor = xfs_inobt_trace_cursor,
1901 XFS_INO_BTREE_REF))) 335 .trace_key = xfs_inobt_trace_key,
1902 return error; 336 .trace_record = xfs_inobt_trace_record,
1903 lev--; 337#endif
1904 xfs_btree_setbuf(cur, lev, bp); 338};
1905 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1906 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1907 return error;
1908 cur->bc_ptrs[lev] = 1;
1909 }
1910 *stat = 1;
1911 return 0;
1912}
1913 339
1914/* 340/*
1915 * Insert the current record at the point referenced by cur. 341 * Allocate a new inode btree cursor.
1916 * The cursor may be inconsistent on return if splits have been done.
1917 */ 342 */
1918int /* error */ 343struct xfs_btree_cur * /* new inode btree cursor */
1919xfs_inobt_insert( 344xfs_inobt_init_cursor(
1920 xfs_btree_cur_t *cur, /* btree cursor */ 345 struct xfs_mount *mp, /* file system mount point */
1921 int *stat) /* success/failure */ 346 struct xfs_trans *tp, /* transaction pointer */
347 struct xfs_buf *agbp, /* buffer for agi structure */
348 xfs_agnumber_t agno) /* allocation group number */
1922{ 349{
1923 int error; /* error return value */ 350 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
1924 int i; /* result value, 0 for failure */ 351 struct xfs_btree_cur *cur;
1925 int level; /* current level number in btree */
1926 xfs_agblock_t nbno; /* new block number (split result) */
1927 xfs_btree_cur_t *ncur; /* new cursor (split result) */
1928 xfs_inobt_rec_t nrec; /* record being inserted this level */
1929 xfs_btree_cur_t *pcur; /* previous level's cursor */
1930 352
1931 level = 0; 353 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
1932 nbno = NULLAGBLOCK;
1933 nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
1934 nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
1935 nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
1936 ncur = NULL;
1937 pcur = cur;
1938 /*
1939 * Loop going up the tree, starting at the leaf level.
1940 * Stop when we don't get a split block, that must mean that
1941 * the insert is finished with this level.
1942 */
1943 do {
1944 /*
1945 * Insert nrec/nbno into this level of the tree.
1946 * Note if we fail, nbno will be null.
1947 */
1948 if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
1949 &i))) {
1950 if (pcur != cur)
1951 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
1952 return error;
1953 }
1954 /*
1955 * See if the cursor we just used is trash.
1956 * Can't trash the caller's cursor, but otherwise we should
1957 * if ncur is a new cursor or we're about to be done.
1958 */
1959 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
1960 cur->bc_nlevels = pcur->bc_nlevels;
1961 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
1962 }
1963 /*
1964 * If we got a new cursor, switch to it.
1965 */
1966 if (ncur) {
1967 pcur = ncur;
1968 ncur = NULL;
1969 }
1970 } while (nbno != NULLAGBLOCK);
1971 *stat = i;
1972 return 0;
1973}
1974 354
1975/* 355 cur->bc_tp = tp;
1976 * Lookup the record equal to ino in the btree given by cur. 356 cur->bc_mp = mp;
1977 */ 357 cur->bc_nlevels = be32_to_cpu(agi->agi_level);
1978int /* error */ 358 cur->bc_btnum = XFS_BTNUM_INO;
1979xfs_inobt_lookup_eq( 359 cur->bc_blocklog = mp->m_sb.sb_blocklog;
1980 xfs_btree_cur_t *cur, /* btree cursor */
1981 xfs_agino_t ino, /* starting inode of chunk */
1982 __int32_t fcnt, /* free inode count */
1983 xfs_inofree_t free, /* free inode mask */
1984 int *stat) /* success/failure */
1985{
1986 cur->bc_rec.i.ir_startino = ino;
1987 cur->bc_rec.i.ir_freecount = fcnt;
1988 cur->bc_rec.i.ir_free = free;
1989 return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
1990}
1991 360
1992/* 361 cur->bc_ops = &xfs_inobt_ops;
1993 * Lookup the first record greater than or equal to ino
1994 * in the btree given by cur.
1995 */
1996int /* error */
1997xfs_inobt_lookup_ge(
1998 xfs_btree_cur_t *cur, /* btree cursor */
1999 xfs_agino_t ino, /* starting inode of chunk */
2000 __int32_t fcnt, /* free inode count */
2001 xfs_inofree_t free, /* free inode mask */
2002 int *stat) /* success/failure */
2003{
2004 cur->bc_rec.i.ir_startino = ino;
2005 cur->bc_rec.i.ir_freecount = fcnt;
2006 cur->bc_rec.i.ir_free = free;
2007 return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
2008}
2009 362
2010/* 363 cur->bc_private.a.agbp = agbp;
2011 * Lookup the first record less than or equal to ino 364 cur->bc_private.a.agno = agno;
2012 * in the btree given by cur. 365
2013 */ 366 return cur;
2014int /* error */
2015xfs_inobt_lookup_le(
2016 xfs_btree_cur_t *cur, /* btree cursor */
2017 xfs_agino_t ino, /* starting inode of chunk */
2018 __int32_t fcnt, /* free inode count */
2019 xfs_inofree_t free, /* free inode mask */
2020 int *stat) /* success/failure */
2021{
2022 cur->bc_rec.i.ir_startino = ino;
2023 cur->bc_rec.i.ir_freecount = fcnt;
2024 cur->bc_rec.i.ir_free = free;
2025 return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
2026} 367}
2027 368
2028/* 369/*
2029 * Update the record referred to by cur, to the value given 370 * Calculate number of records in an inobt btree block.
2030 * by [ino, fcnt, free].
2031 * This either works (return 0) or gets an EFSCORRUPTED error.
2032 */ 371 */
2033int /* error */ 372int
2034xfs_inobt_update( 373xfs_inobt_maxrecs(
2035 xfs_btree_cur_t *cur, /* btree cursor */ 374 struct xfs_mount *mp,
2036 xfs_agino_t ino, /* starting inode of chunk */ 375 int blocklen,
2037 __int32_t fcnt, /* free inode count */ 376 int leaf)
2038 xfs_inofree_t free) /* free inode mask */
2039{ 377{
2040 xfs_inobt_block_t *block; /* btree block to update */ 378 blocklen -= XFS_INOBT_BLOCK_LEN(mp);
2041 xfs_buf_t *bp; /* buffer containing btree block */
2042 int error; /* error return value */
2043 int ptr; /* current record number (updating) */
2044 xfs_inobt_rec_t *rp; /* pointer to updated record */
2045 379
2046 /* 380 if (leaf)
2047 * Pick up the current block. 381 return blocklen / sizeof(xfs_inobt_rec_t);
2048 */ 382 return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
2049 bp = cur->bc_bufs[0];
2050 block = XFS_BUF_TO_INOBT_BLOCK(bp);
2051#ifdef DEBUG
2052 if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
2053 return error;
2054#endif
2055 /*
2056 * Get the address of the rec to be updated.
2057 */
2058 ptr = cur->bc_ptrs[0];
2059 rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
2060 /*
2061 * Fill in the new contents and log them.
2062 */
2063 rp->ir_startino = cpu_to_be32(ino);
2064 rp->ir_freecount = cpu_to_be32(fcnt);
2065 rp->ir_free = cpu_to_be64(free);
2066 xfs_inobt_log_recs(cur, bp, ptr, ptr);
2067 /*
2068 * Updating first record in leaf. Pass new key value up to our parent.
2069 */
2070 if (ptr == 1) {
2071 xfs_inobt_key_t key; /* key containing [ino] */
2072
2073 key.ir_startino = cpu_to_be32(ino);
2074 if ((error = xfs_inobt_updkey(cur, &key, 1)))
2075 return error;
2076 }
2077 return 0;
2078} 383}
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 8efc4a5b8b9..37e5dd01a57 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -24,7 +24,6 @@
24 24
25struct xfs_buf; 25struct xfs_buf;
26struct xfs_btree_cur; 26struct xfs_btree_cur;
27struct xfs_btree_sblock;
28struct xfs_mount; 27struct xfs_mount;
29 28
30/* 29/*
@@ -70,11 +69,6 @@ typedef struct xfs_inobt_key {
70/* btree pointer type */ 69/* btree pointer type */
71typedef __be32 xfs_inobt_ptr_t; 70typedef __be32 xfs_inobt_ptr_t;
72 71
73/* btree block header type */
74typedef struct xfs_btree_sblock xfs_inobt_block_t;
75
76#define XFS_BUF_TO_INOBT_BLOCK(bp) ((xfs_inobt_block_t *)XFS_BUF_PTR(bp))
77
78/* 72/*
79 * Bit manipulations for ir_free. 73 * Bit manipulations for ir_free.
80 */ 74 */
@@ -85,14 +79,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
85#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i)) 79#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i))
86 80
87/* 81/*
88 * Real block structures have a size equal to the disk block size.
89 */
90#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_inobt_mxr[lev != 0])
91#define XFS_INOBT_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_inobt_mnr[lev != 0])
92#define XFS_INOBT_IS_LAST_REC(cur) \
93 ((cur)->bc_ptrs[0] == be16_to_cpu(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs))
94
95/*
96 * Maximum number of inode btree levels. 82 * Maximum number of inode btree levels.
97 */ 83 */
98#define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels) 84#define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels)
@@ -104,75 +90,38 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
104#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) 90#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
105 91
106/* 92/*
107 * Record, key, and pointer address macros for btree blocks. 93 * Btree block header size depends on a superblock flag.
108 */ 94 *
109#define XFS_INOBT_REC_ADDR(bb,i,cur) \ 95 * (not quite yet, but soon)
110 (XFS_BTREE_REC_ADDR(xfs_inobt, bb, i))
111
112#define XFS_INOBT_KEY_ADDR(bb,i,cur) \
113 (XFS_BTREE_KEY_ADDR(xfs_inobt, bb, i))
114
115#define XFS_INOBT_PTR_ADDR(bb,i,cur) \
116 (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \
117 i, XFS_INOBT_BLOCK_MAXRECS(1, cur)))
118
119/*
120 * Decrement cursor by one record at the level.
121 * For nonzero levels the leaf-ward information is untouched.
122 */
123extern int xfs_inobt_decrement(struct xfs_btree_cur *cur, int level, int *stat);
124
125/*
126 * Delete the record pointed to by cur.
127 * The cursor refers to the place where the record was (could be inserted)
128 * when the operation returns.
129 */
130extern int xfs_inobt_delete(struct xfs_btree_cur *cur, int *stat);
131
132/*
133 * Get the data from the pointed-to record.
134 */
135extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino,
136 __int32_t *fcnt, xfs_inofree_t *free, int *stat);
137
138/*
139 * Increment cursor by one record at the level.
140 * For nonzero levels the leaf-ward information is untouched.
141 */
142extern int xfs_inobt_increment(struct xfs_btree_cur *cur, int level, int *stat);
143
144/*
145 * Insert the current record at the point referenced by cur.
146 * The cursor may be inconsistent on return if splits have been done.
147 */
148extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat);
149
150/*
151 * Lookup the record equal to ino in the btree given by cur.
152 */
153extern int xfs_inobt_lookup_eq(struct xfs_btree_cur *cur, xfs_agino_t ino,
154 __int32_t fcnt, xfs_inofree_t free, int *stat);
155
156/*
157 * Lookup the first record greater than or equal to ino
158 * in the btree given by cur.
159 */
160extern int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino,
161 __int32_t fcnt, xfs_inofree_t free, int *stat);
162
163/*
164 * Lookup the first record less than or equal to ino
165 * in the btree given by cur.
166 */ 96 */
167extern int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, 97#define XFS_INOBT_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
168 __int32_t fcnt, xfs_inofree_t free, int *stat);
169 98
170/* 99/*
171 * Update the record referred to by cur, to the value given 100 * Record, key, and pointer address macros for btree blocks.
172 * by [ino, fcnt, free]. 101 *
173 * This either works (return 0) or gets an EFSCORRUPTED error. 102 * (note that some of these may appear unused, but they are used in userspace)
174 */ 103 */
175extern int xfs_inobt_update(struct xfs_btree_cur *cur, xfs_agino_t ino, 104#define XFS_INOBT_REC_ADDR(mp, block, index) \
176 __int32_t fcnt, xfs_inofree_t free); 105 ((xfs_inobt_rec_t *) \
106 ((char *)(block) + \
107 XFS_INOBT_BLOCK_LEN(mp) + \
108 (((index) - 1) * sizeof(xfs_inobt_rec_t))))
109
110#define XFS_INOBT_KEY_ADDR(mp, block, index) \
111 ((xfs_inobt_key_t *) \
112 ((char *)(block) + \
113 XFS_INOBT_BLOCK_LEN(mp) + \
114 ((index) - 1) * sizeof(xfs_inobt_key_t)))
115
116#define XFS_INOBT_PTR_ADDR(mp, block, index, maxrecs) \
117 ((xfs_inobt_ptr_t *) \
118 ((char *)(block) + \
119 XFS_INOBT_BLOCK_LEN(mp) + \
120 (maxrecs) * sizeof(xfs_inobt_key_t) + \
121 ((index) - 1) * sizeof(xfs_inobt_ptr_t)))
122
123extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
124 struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t);
125extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
177 126
178#endif /* __XFS_IALLOC_BTREE_H__ */ 127#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e229e9e001c..e2fb6210d4c 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -38,281 +38,283 @@
38#include "xfs_ialloc.h" 38#include "xfs_ialloc.h"
39#include "xfs_quota.h" 39#include "xfs_quota.h"
40#include "xfs_utils.h" 40#include "xfs_utils.h"
41#include "xfs_trans_priv.h"
42#include "xfs_inode_item.h"
43#include "xfs_bmap.h"
44#include "xfs_btree_trace.h"
45#include "xfs_dir2_trace.h"
46
41 47
42/* 48/*
43 * Look up an inode by number in the given file system. 49 * Allocate and initialise an xfs_inode.
44 * The inode is looked up in the cache held in each AG.
45 * If the inode is found in the cache, attach it to the provided
46 * vnode.
47 *
48 * If it is not in core, read it in from the file system's device,
49 * add it to the cache and attach the provided vnode.
50 *
51 * The inode is locked according to the value of the lock_flags parameter.
52 * This flag parameter indicates how and if the inode's IO lock and inode lock
53 * should be taken.
54 *
55 * mp -- the mount point structure for the current file system. It points
56 * to the inode hash table.
57 * tp -- a pointer to the current transaction if there is one. This is
58 * simply passed through to the xfs_iread() call.
59 * ino -- the number of the inode desired. This is the unique identifier
60 * within the file system for the inode being requested.
61 * lock_flags -- flags indicating how to lock the inode. See the comment
62 * for xfs_ilock() for a list of valid values.
63 * bno -- the block number starting the buffer containing the inode,
64 * if known (as by bulkstat), else 0.
65 */ 50 */
66STATIC int 51STATIC struct xfs_inode *
67xfs_iget_core( 52xfs_inode_alloc(
68 struct inode *inode, 53 struct xfs_mount *mp,
69 xfs_mount_t *mp, 54 xfs_ino_t ino)
70 xfs_trans_t *tp,
71 xfs_ino_t ino,
72 uint flags,
73 uint lock_flags,
74 xfs_inode_t **ipp,
75 xfs_daddr_t bno)
76{ 55{
77 struct inode *old_inode; 56 struct xfs_inode *ip;
78 xfs_inode_t *ip;
79 xfs_inode_t *iq;
80 int error;
81 unsigned long first_index, mask;
82 xfs_perag_t *pag;
83 xfs_agino_t agino;
84 57
85 /* the radix tree exists only in inode capable AGs */ 58 /*
86 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) 59 * if this didn't occur in transactions, we could use
87 return EINVAL; 60 * KM_MAYFAIL and return NULL here on ENOMEM. Set the
61 * code up to do this anyway.
62 */
63 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
64 if (!ip)
65 return NULL;
88 66
89 /* get the perag structure and ensure that it's inode capable */ 67 ASSERT(atomic_read(&ip->i_iocount) == 0);
90 pag = xfs_get_perag(mp, ino); 68 ASSERT(atomic_read(&ip->i_pincount) == 0);
91 if (!pag->pagi_inodeok) 69 ASSERT(!spin_is_locked(&ip->i_flags_lock));
92 return EINVAL; 70 ASSERT(completion_done(&ip->i_flush));
93 ASSERT(pag->pag_ici_init);
94 agino = XFS_INO_TO_AGINO(mp, ino);
95 71
96again: 72 /*
97 read_lock(&pag->pag_ici_lock); 73 * initialise the VFS inode here to get failures
98 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 74 * out of the way early.
75 */
76 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
77 kmem_zone_free(xfs_inode_zone, ip);
78 return NULL;
79 }
80
81 /* initialise the xfs inode */
82 ip->i_ino = ino;
83 ip->i_mount = mp;
84 memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
85 ip->i_afp = NULL;
86 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
87 ip->i_flags = 0;
88 ip->i_update_core = 0;
89 ip->i_update_size = 0;
90 ip->i_delayed_blks = 0;
91 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
92 ip->i_size = 0;
93 ip->i_new_size = 0;
94
95 /*
96 * Initialize inode's trace buffers.
97 */
98#ifdef XFS_INODE_TRACE
99 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
100#endif
101#ifdef XFS_BMAP_TRACE
102 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
103#endif
104#ifdef XFS_BTREE_TRACE
105 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
106#endif
107#ifdef XFS_RW_TRACE
108 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
109#endif
110#ifdef XFS_ILOCK_TRACE
111 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
112#endif
113#ifdef XFS_DIR2_TRACE
114 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
115#endif
116
117 return ip;
118}
119
120/*
121 * Check the validity of the inode we just found it the cache
122 */
123static int
124xfs_iget_cache_hit(
125 struct xfs_perag *pag,
126 struct xfs_inode *ip,
127 int flags,
128 int lock_flags) __releases(pag->pag_ici_lock)
129{
130 struct xfs_mount *mp = ip->i_mount;
131 int error = EAGAIN;
132
133 /*
134 * If INEW is set this inode is being set up
135 * If IRECLAIM is set this inode is being torn down
136 * Pause and try again.
137 */
138 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
139 XFS_STATS_INC(xs_ig_frecycle);
140 goto out_error;
141 }
142
143 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */
144 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
99 145
100 if (ip != NULL) {
101 /* 146 /*
102 * If INEW is set this inode is being set up 147 * If lookup is racing with unlink, then we should return an
103 * we need to pause and try again. 148 * error immediately so we don't remove it from the reclaim
149 * list and potentially leak the inode.
104 */ 150 */
105 if (xfs_iflags_test(ip, XFS_INEW)) { 151 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
106 read_unlock(&pag->pag_ici_lock); 152 error = ENOENT;
107 delay(1); 153 goto out_error;
108 XFS_STATS_INC(xs_ig_frecycle);
109
110 goto again;
111 } 154 }
112 155
113 old_inode = ip->i_vnode; 156 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
114 if (old_inode == NULL) {
115 /*
116 * If IRECLAIM is set this inode is
117 * on its way out of the system,
118 * we need to pause and try again.
119 */
120 if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
121 read_unlock(&pag->pag_ici_lock);
122 delay(1);
123 XFS_STATS_INC(xs_ig_frecycle);
124
125 goto again;
126 }
127 ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
128
129 /*
130 * If lookup is racing with unlink, then we
131 * should return an error immediately so we
132 * don't remove it from the reclaim list and
133 * potentially leak the inode.
134 */
135 if ((ip->i_d.di_mode == 0) &&
136 !(flags & XFS_IGET_CREATE)) {
137 read_unlock(&pag->pag_ici_lock);
138 xfs_put_perag(mp, pag);
139 return ENOENT;
140 }
141
142 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
143
144 XFS_STATS_INC(xs_ig_found);
145 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
146 read_unlock(&pag->pag_ici_lock);
147
148 XFS_MOUNT_ILOCK(mp);
149 list_del_init(&ip->i_reclaim);
150 XFS_MOUNT_IUNLOCK(mp);
151
152 goto finish_inode;
153
154 } else if (inode != old_inode) {
155 /* The inode is being torn down, pause and
156 * try again.
157 */
158 if (old_inode->i_state & (I_FREEING | I_CLEAR)) {
159 read_unlock(&pag->pag_ici_lock);
160 delay(1);
161 XFS_STATS_INC(xs_ig_frecycle);
162
163 goto again;
164 }
165/* Chances are the other vnode (the one in the inode) is being torn
166* down right now, and we landed on top of it. Question is, what do
167* we do? Unhook the old inode and hook up the new one?
168*/
169 cmn_err(CE_PANIC,
170 "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
171 old_inode, inode);
172 }
173 157
174 /* 158 /*
175 * Inode cache hit 159 * We need to re-initialise the VFS inode as it has been
160 * 'freed' by the VFS. Do this here so we can deal with
161 * errors cleanly, then tag it so it can be set up correctly
162 * later.
176 */ 163 */
177 read_unlock(&pag->pag_ici_lock); 164 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
178 XFS_STATS_INC(xs_ig_found); 165 error = ENOMEM;
179 166 goto out_error;
180finish_inode:
181 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
182 xfs_put_perag(mp, pag);
183 return ENOENT;
184 } 167 }
185 168
186 if (lock_flags != 0) 169 /*
187 xfs_ilock(ip, lock_flags); 170 * We must set the XFS_INEW flag before clearing the
171 * XFS_IRECLAIMABLE flag so that if a racing lookup does
172 * not find the XFS_IRECLAIMABLE above but has the igrab()
173 * below succeed we can safely check XFS_INEW to detect
174 * that this inode is still being initialised.
175 */
176 xfs_iflags_set(ip, XFS_INEW);
177 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
178
179 /* clear the radix tree reclaim flag as well. */
180 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
181 } else if (!igrab(VFS_I(ip))) {
182 /* If the VFS inode is being torn down, pause and try again. */
183 XFS_STATS_INC(xs_ig_frecycle);
184 goto out_error;
185 } else if (xfs_iflags_test(ip, XFS_INEW)) {
186 /*
187 * We are racing with another cache hit that is
188 * currently recycling this inode out of the XFS_IRECLAIMABLE
189 * state. Wait for the initialisation to complete before
190 * continuing.
191 */
192 wait_on_inode(VFS_I(ip));
193 }
188 194
189 xfs_iflags_clear(ip, XFS_ISTALE); 195 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
190 xfs_itrace_exit_tag(ip, "xfs_iget.found"); 196 error = ENOENT;
191 goto return_ip; 197 iput(VFS_I(ip));
198 goto out_error;
192 } 199 }
193 200
194 /* 201 /* We've got a live one. */
195 * Inode cache miss
196 */
197 read_unlock(&pag->pag_ici_lock); 202 read_unlock(&pag->pag_ici_lock);
198 XFS_STATS_INC(xs_ig_missed);
199 203
200 /* 204 if (lock_flags != 0)
201 * Read the disk inode attributes into a new inode structure and get 205 xfs_ilock(ip, lock_flags);
202 * a new vnode for it. This should also initialize i_ino and i_mount.
203 */
204 error = xfs_iread(mp, tp, ino, &ip, bno,
205 (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
206 if (error) {
207 xfs_put_perag(mp, pag);
208 return error;
209 }
210 206
211 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 207 xfs_iflags_clear(ip, XFS_ISTALE);
208 xfs_itrace_exit_tag(ip, "xfs_iget.found");
209 XFS_STATS_INC(xs_ig_found);
210 return 0;
211
212out_error:
213 read_unlock(&pag->pag_ici_lock);
214 return error;
215}
212 216
213 217
214 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 218static int
215 "xfsino", ip->i_ino); 219xfs_iget_cache_miss(
216 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 220 struct xfs_mount *mp,
217 init_waitqueue_head(&ip->i_ipin_wait); 221 struct xfs_perag *pag,
218 atomic_set(&ip->i_pincount, 0); 222 xfs_trans_t *tp,
223 xfs_ino_t ino,
224 struct xfs_inode **ipp,
225 xfs_daddr_t bno,
226 int flags,
227 int lock_flags) __releases(pag->pag_ici_lock)
228{
229 struct xfs_inode *ip;
230 int error;
231 unsigned long first_index, mask;
232 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
219 233
220 /* 234 ip = xfs_inode_alloc(mp, ino);
221 * Because we want to use a counting completion, complete 235 if (!ip)
222 * the flush completion once to allow a single access to 236 return ENOMEM;
223 * the flush completion without blocking.
224 */
225 init_completion(&ip->i_flush);
226 complete(&ip->i_flush);
227 237
228 if (lock_flags) 238 error = xfs_iread(mp, tp, ip, bno, flags);
229 xfs_ilock(ip, lock_flags); 239 if (error)
240 goto out_destroy;
241
242 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
230 243
231 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 244 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
232 xfs_idestroy(ip); 245 error = ENOENT;
233 xfs_put_perag(mp, pag); 246 goto out_destroy;
234 return ENOENT;
235 } 247 }
236 248
249 if (lock_flags)
250 xfs_ilock(ip, lock_flags);
251
237 /* 252 /*
238 * Preload the radix tree so we can insert safely under the 253 * Preload the radix tree so we can insert safely under the
239 * write spinlock. 254 * write spinlock. Note that we cannot sleep inside the preload
255 * region.
240 */ 256 */
241 if (radix_tree_preload(GFP_KERNEL)) { 257 if (radix_tree_preload(GFP_KERNEL)) {
242 xfs_idestroy(ip); 258 error = EAGAIN;
243 delay(1); 259 goto out_unlock;
244 goto again;
245 } 260 }
261
246 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 262 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
247 first_index = agino & mask; 263 first_index = agino & mask;
248 write_lock(&pag->pag_ici_lock); 264 write_lock(&pag->pag_ici_lock);
249 /* 265
250 * insert the new inode 266 /* insert the new inode */
251 */
252 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 267 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
253 if (unlikely(error)) { 268 if (unlikely(error)) {
254 BUG_ON(error != -EEXIST); 269 WARN_ON(error != -EEXIST);
255 write_unlock(&pag->pag_ici_lock);
256 radix_tree_preload_end();
257 xfs_idestroy(ip);
258 XFS_STATS_INC(xs_ig_dup); 270 XFS_STATS_INC(xs_ig_dup);
259 goto again; 271 error = EAGAIN;
272 goto out_preload_end;
260 } 273 }
261 274
262 /* 275 /* These values _must_ be set before releasing the radix tree lock! */
263 * These values _must_ be set before releasing the radix tree lock!
264 */
265 ip->i_udquot = ip->i_gdquot = NULL; 276 ip->i_udquot = ip->i_gdquot = NULL;
266 xfs_iflags_set(ip, XFS_INEW); 277 xfs_iflags_set(ip, XFS_INEW);
267 278
268 write_unlock(&pag->pag_ici_lock); 279 write_unlock(&pag->pag_ici_lock);
269 radix_tree_preload_end(); 280 radix_tree_preload_end();
270
271 /*
272 * Link ip to its mount and thread it on the mount's inode list.
273 */
274 XFS_MOUNT_ILOCK(mp);
275 if ((iq = mp->m_inodes)) {
276 ASSERT(iq->i_mprev->i_mnext == iq);
277 ip->i_mprev = iq->i_mprev;
278 iq->i_mprev->i_mnext = ip;
279 iq->i_mprev = ip;
280 ip->i_mnext = iq;
281 } else {
282 ip->i_mnext = ip;
283 ip->i_mprev = ip;
284 }
285 mp->m_inodes = ip;
286
287 XFS_MOUNT_IUNLOCK(mp);
288 xfs_put_perag(mp, pag);
289
290 return_ip:
291 ASSERT(ip->i_df.if_ext_max ==
292 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
293
294 xfs_iflags_set(ip, XFS_IMODIFIED);
295 *ipp = ip; 281 *ipp = ip;
296
297 /*
298 * Set up the Linux with the Linux inode.
299 */
300 ip->i_vnode = inode;
301 inode->i_private = ip;
302
303 /*
304 * If we have a real type for an on-disk inode, we can set ops(&unlock)
305 * now. If it's a new inode being created, xfs_ialloc will handle it.
306 */
307 if (ip->i_d.di_mode != 0)
308 xfs_setup_inode(ip);
309 return 0; 282 return 0;
310}
311 283
284out_preload_end:
285 write_unlock(&pag->pag_ici_lock);
286 radix_tree_preload_end();
287out_unlock:
288 if (lock_flags)
289 xfs_iunlock(ip, lock_flags);
290out_destroy:
291 xfs_destroy_inode(ip);
292 return error;
293}
312 294
313/* 295/*
314 * The 'normal' internal xfs_iget, if needed it will 296 * Look up an inode by number in the given file system.
315 * 'allocate', or 'get', the vnode. 297 * The inode is looked up in the cache held in each AG.
298 * If the inode is found in the cache, initialise the vfs inode
299 * if necessary.
300 *
301 * If it is not in core, read it in from the file system's device,
302 * add it to the cache and initialise the vfs inode.
303 *
304 * The inode is locked according to the value of the lock_flags parameter.
305 * This flag parameter indicates how and if the inode's IO lock and inode lock
306 * should be taken.
307 *
308 * mp -- the mount point structure for the current file system. It points
309 * to the inode hash table.
310 * tp -- a pointer to the current transaction if there is one. This is
311 * simply passed through to the xfs_iread() call.
312 * ino -- the number of the inode desired. This is the unique identifier
313 * within the file system for the inode being requested.
314 * lock_flags -- flags indicating how to lock the inode. See the comment
315 * for xfs_ilock() for a list of valid values.
316 * bno -- the block number starting the buffer containing the inode,
317 * if known (as by bulkstat), else 0.
316 */ 318 */
317int 319int
318xfs_iget( 320xfs_iget(
@@ -324,61 +326,64 @@ xfs_iget(
324 xfs_inode_t **ipp, 326 xfs_inode_t **ipp,
325 xfs_daddr_t bno) 327 xfs_daddr_t bno)
326{ 328{
327 struct inode *inode;
328 xfs_inode_t *ip; 329 xfs_inode_t *ip;
329 int error; 330 int error;
331 xfs_perag_t *pag;
332 xfs_agino_t agino;
330 333
331 XFS_STATS_INC(xs_ig_attempts); 334 /* the radix tree exists only in inode capable AGs */
335 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
336 return EINVAL;
332 337
333retry: 338 /* get the perag structure and ensure that it's inode capable */
334 inode = iget_locked(mp->m_super, ino); 339 pag = xfs_get_perag(mp, ino);
335 if (!inode) 340 if (!pag->pagi_inodeok)
336 /* If we got no inode we are out of memory */ 341 return EINVAL;
337 return ENOMEM; 342 ASSERT(pag->pag_ici_init);
343 agino = XFS_INO_TO_AGINO(mp, ino);
338 344
339 if (inode->i_state & I_NEW) { 345again:
340 XFS_STATS_INC(vn_active); 346 error = 0;
341 XFS_STATS_INC(vn_alloc); 347 read_lock(&pag->pag_ici_lock);
342 348 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
343 error = xfs_iget_core(inode, mp, tp, ino, flags, 349
344 lock_flags, ipp, bno); 350 if (ip) {
345 if (error) { 351 error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
346 make_bad_inode(inode); 352 if (error)
347 if (inode->i_state & I_NEW) 353 goto out_error_or_again;
348 unlock_new_inode(inode); 354 } else {
349 iput(inode); 355 read_unlock(&pag->pag_ici_lock);
350 } 356 XFS_STATS_INC(xs_ig_missed);
351 return error; 357
358 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
359 flags, lock_flags);
360 if (error)
361 goto out_error_or_again;
352 } 362 }
363 xfs_put_perag(mp, pag);
353 364
365 *ipp = ip;
366
367 ASSERT(ip->i_df.if_ext_max ==
368 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
354 /* 369 /*
355 * If the inode is not fully constructed due to 370 * If we have a real type for an on-disk inode, we can set ops(&unlock)
356 * filehandle mismatches wait for the inode to go 371 * now. If it's a new inode being created, xfs_ialloc will handle it.
357 * away and try again.
358 *
359 * iget_locked will call __wait_on_freeing_inode
360 * to wait for the inode to go away.
361 */ 372 */
362 if (is_bad_inode(inode)) { 373 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
363 iput(inode); 374 xfs_setup_inode(ip);
364 delay(1); 375 return 0;
365 goto retry;
366 }
367 376
368 ip = XFS_I(inode); 377out_error_or_again:
369 if (!ip) { 378 if (error == EAGAIN) {
370 iput(inode);
371 delay(1); 379 delay(1);
372 goto retry; 380 goto again;
373 } 381 }
374 382 xfs_put_perag(mp, pag);
375 if (lock_flags != 0) 383 return error;
376 xfs_ilock(ip, lock_flags);
377 XFS_STATS_INC(xs_ig_found);
378 *ipp = ip;
379 return 0;
380} 384}
381 385
386
382/* 387/*
383 * Look for the inode corresponding to the given ino in the hash table. 388 * Look for the inode corresponding to the given ino in the hash table.
384 * If it is there and its i_transp pointer matches tp, return it. 389 * If it is there and its i_transp pointer matches tp, return it.
@@ -444,99 +449,109 @@ xfs_iput_new(
444 IRELE(ip); 449 IRELE(ip);
445} 450}
446 451
447
448/* 452/*
449 * This routine embodies the part of the reclaim code that pulls 453 * This is called free all the memory associated with an inode.
450 * the inode from the inode hash table and the mount structure's 454 * It must free the inode itself and any buffers allocated for
451 * inode list. 455 * if_extents/if_data and if_broot. It must also free the lock
452 * This should only be called from xfs_reclaim(). 456 * associated with the inode.
457 *
458 * Note: because we don't initialise everything on reallocation out
459 * of the zone, we must ensure we nullify everything correctly before
460 * freeing the structure.
453 */ 461 */
454void 462void
455xfs_ireclaim(xfs_inode_t *ip) 463xfs_ireclaim(
464 struct xfs_inode *ip)
456{ 465{
457 /* 466 struct xfs_mount *mp = ip->i_mount;
458 * Remove from old hash list and mount list. 467 struct xfs_perag *pag;
459 */
460 XFS_STATS_INC(xs_ig_reclaims);
461 468
462 xfs_iextract(ip); 469 XFS_STATS_INC(xs_ig_reclaims);
463
464 /*
465 * Here we do a spurious inode lock in order to coordinate with
466 * xfs_sync(). This is because xfs_sync() references the inodes
467 * in the mount list without taking references on the corresponding
468 * vnodes. We make that OK here by ensuring that we wait until
469 * the inode is unlocked in xfs_sync() before we go ahead and
470 * free it. We get both the regular lock and the io lock because
471 * the xfs_sync() code may need to drop the regular one but will
472 * still hold the io lock.
473 */
474 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
475
476 /*
477 * Release dquots (and their references) if any. An inode may escape
478 * xfs_inactive and get here via vn_alloc->vn_reclaim path.
479 */
480 XFS_QM_DQDETACH(ip->i_mount, ip);
481
482 /*
483 * Pull our behavior descriptor from the vnode chain.
484 */
485 if (ip->i_vnode) {
486 ip->i_vnode->i_private = NULL;
487 ip->i_vnode = NULL;
488 }
489 470
490 /* 471 /*
491 * Free all memory associated with the inode. 472 * Remove the inode from the per-AG radix tree. It doesn't matter
473 * if it was never added to it because radix_tree_delete can deal
474 * with that case just fine.
492 */ 475 */
493 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 476 pag = xfs_get_perag(mp, ip->i_ino);
494 xfs_idestroy(ip);
495}
496
497/*
498 * This routine removes an about-to-be-destroyed inode from
499 * all of the lists in which it is located with the exception
500 * of the behavior chain.
501 */
502void
503xfs_iextract(
504 xfs_inode_t *ip)
505{
506 xfs_mount_t *mp = ip->i_mount;
507 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
508 xfs_inode_t *iq;
509
510 write_lock(&pag->pag_ici_lock); 477 write_lock(&pag->pag_ici_lock);
511 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); 478 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
512 write_unlock(&pag->pag_ici_lock); 479 write_unlock(&pag->pag_ici_lock);
513 xfs_put_perag(mp, pag); 480 xfs_put_perag(mp, pag);
514 481
515 /* 482 /*
516 * Remove from mount's inode list. 483 * Here we do an (almost) spurious inode lock in order to coordinate
484 * with inode cache radix tree lookups. This is because the lookup
485 * can reference the inodes in the cache without taking references.
486 *
487 * We make that OK here by ensuring that we wait until the inode is
488 * unlocked after the lookup before we go ahead and free it. We get
489 * both the ilock and the iolock because the code may need to drop the
490 * ilock one but will still hold the iolock.
517 */ 491 */
518 XFS_MOUNT_ILOCK(mp); 492 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
519 ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
520 iq = ip->i_mnext;
521 iq->i_mprev = ip->i_mprev;
522 ip->i_mprev->i_mnext = iq;
523
524 /* 493 /*
525 * Fix up the head pointer if it points to the inode being deleted. 494 * Release dquots (and their references) if any.
526 */ 495 */
527 if (mp->m_inodes == ip) { 496 XFS_QM_DQDETACH(ip->i_mount, ip);
528 if (ip == iq) { 497 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
529 mp->m_inodes = NULL; 498
530 } else { 499 switch (ip->i_d.di_mode & S_IFMT) {
531 mp->m_inodes = iq; 500 case S_IFREG:
532 } 501 case S_IFDIR:
502 case S_IFLNK:
503 xfs_idestroy_fork(ip, XFS_DATA_FORK);
504 break;
533 } 505 }
534 506
535 /* Deal with the deleted inodes list */ 507 if (ip->i_afp)
536 list_del_init(&ip->i_reclaim); 508 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
537 509
538 mp->m_ireclaims++; 510#ifdef XFS_INODE_TRACE
539 XFS_MOUNT_IUNLOCK(mp); 511 ktrace_free(ip->i_trace);
512#endif
513#ifdef XFS_BMAP_TRACE
514 ktrace_free(ip->i_xtrace);
515#endif
516#ifdef XFS_BTREE_TRACE
517 ktrace_free(ip->i_btrace);
518#endif
519#ifdef XFS_RW_TRACE
520 ktrace_free(ip->i_rwtrace);
521#endif
522#ifdef XFS_ILOCK_TRACE
523 ktrace_free(ip->i_lock_trace);
524#endif
525#ifdef XFS_DIR2_TRACE
526 ktrace_free(ip->i_dir_trace);
527#endif
528 if (ip->i_itemp) {
529 /*
530 * Only if we are shutting down the fs will we see an
531 * inode still in the AIL. If it is there, we should remove
532 * it to prevent a use-after-free from occurring.
533 */
534 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
535 struct xfs_ail *ailp = lip->li_ailp;
536
537 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
538 XFS_FORCED_SHUTDOWN(ip->i_mount));
539 if (lip->li_flags & XFS_LI_IN_AIL) {
540 spin_lock(&ailp->xa_lock);
541 if (lip->li_flags & XFS_LI_IN_AIL)
542 xfs_trans_ail_delete(ailp, lip);
543 else
544 spin_unlock(&ailp->xa_lock);
545 }
546 xfs_inode_item_destroy(ip);
547 ip->i_itemp = NULL;
548 }
549 /* asserts to verify all state is correct here */
550 ASSERT(atomic_read(&ip->i_iocount) == 0);
551 ASSERT(atomic_read(&ip->i_pincount) == 0);
552 ASSERT(!spin_is_locked(&ip->i_flags_lock));
553 ASSERT(completion_done(&ip->i_flush));
554 kmem_zone_free(xfs_inode_zone, ip);
540} 555}
541 556
542/* 557/*
@@ -737,7 +752,7 @@ xfs_iunlock(
737 * it is in the AIL and anyone is waiting on it. Don't do 752 * it is in the AIL and anyone is waiting on it. Don't do
738 * this if the caller has asked us not to. 753 * this if the caller has asked us not to.
739 */ 754 */
740 xfs_trans_unlocked_item(ip->i_mount, 755 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
741 (xfs_log_item_t*)(ip->i_itemp)); 756 (xfs_log_item_t*)(ip->i_itemp));
742 } 757 }
743 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 758 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
@@ -790,3 +805,51 @@ xfs_isilocked(
790} 805}
791#endif 806#endif
792 807
808#ifdef XFS_INODE_TRACE
809
810#define KTRACE_ENTER(ip, vk, s, line, ra) \
811 ktrace_enter((ip)->i_trace, \
812/* 0 */ (void *)(__psint_t)(vk), \
813/* 1 */ (void *)(s), \
814/* 2 */ (void *)(__psint_t) line, \
815/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \
816/* 4 */ (void *)(ra), \
817/* 5 */ NULL, \
818/* 6 */ (void *)(__psint_t)current_cpu(), \
819/* 7 */ (void *)(__psint_t)current_pid(), \
820/* 8 */ (void *)__return_address, \
821/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
822
823/*
824 * Vnode tracing code.
825 */
826void
827_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
828{
829 KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
830}
831
832void
833_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
834{
835 KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
836}
837
838void
839xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
840{
841 KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
842}
843
844void
845_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
846{
847 KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
848}
849
850void
851xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
852{
853 KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
854}
855#endif /* XFS_INODE_TRACE */
diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h
deleted file mode 100644
index d3645000398..00000000000
--- a/fs/xfs/xfs_imap.h
+++ /dev/null
@@ -1,40 +0,0 @@
1/*
2 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_IMAP_H__
19#define __XFS_IMAP_H__
20
21/*
22 * This is the structure passed to xfs_imap() to map
23 * an inode number to its on disk location.
24 */
25typedef struct xfs_imap {
26 xfs_daddr_t im_blkno; /* starting BB of inode chunk */
27 uint im_len; /* length in BBs of inode chunk */
28 xfs_agblock_t im_agblkno; /* logical block of inode chunk in ag */
29 ushort im_ioffset; /* inode offset in block in "inodes" */
30 ushort im_boffset; /* inode offset in block in bytes */
31} xfs_imap_t;
32
33#ifdef __KERNEL__
34struct xfs_mount;
35struct xfs_trans;
36int xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
37 xfs_imap_t *, uint);
38#endif
39
40#endif /* __XFS_IMAP_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a391b955df0..5a5e035e5d3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -23,7 +23,6 @@
23#include "xfs_bit.h" 23#include "xfs_bit.h"
24#include "xfs_log.h" 24#include "xfs_log.h"
25#include "xfs_inum.h" 25#include "xfs_inum.h"
26#include "xfs_imap.h"
27#include "xfs_trans.h" 26#include "xfs_trans.h"
28#include "xfs_trans_priv.h" 27#include "xfs_trans_priv.h"
29#include "xfs_sb.h" 28#include "xfs_sb.h"
@@ -41,6 +40,7 @@
41#include "xfs_buf_item.h" 40#include "xfs_buf_item.h"
42#include "xfs_inode_item.h" 41#include "xfs_inode_item.h"
43#include "xfs_btree.h" 42#include "xfs_btree.h"
43#include "xfs_btree_trace.h"
44#include "xfs_alloc.h" 44#include "xfs_alloc.h"
45#include "xfs_ialloc.h" 45#include "xfs_ialloc.h"
46#include "xfs_bmap.h" 46#include "xfs_bmap.h"
@@ -133,10 +133,10 @@ STATIC int
133xfs_imap_to_bp( 133xfs_imap_to_bp(
134 xfs_mount_t *mp, 134 xfs_mount_t *mp,
135 xfs_trans_t *tp, 135 xfs_trans_t *tp,
136 xfs_imap_t *imap, 136 struct xfs_imap *imap,
137 xfs_buf_t **bpp, 137 xfs_buf_t **bpp,
138 uint buf_flags, 138 uint buf_flags,
139 uint imap_flags) 139 uint iget_flags)
140{ 140{
141 int error; 141 int error;
142 int i; 142 int i;
@@ -173,12 +173,12 @@ xfs_imap_to_bp(
173 173
174 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 174 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
175 (i << mp->m_sb.sb_inodelog)); 175 (i << mp->m_sb.sb_inodelog));
176 di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && 176 di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
177 XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); 177 XFS_DINODE_GOOD_VERSION(dip->di_version);
178 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 178 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
179 XFS_ERRTAG_ITOBP_INOTOBP, 179 XFS_ERRTAG_ITOBP_INOTOBP,
180 XFS_RANDOM_ITOBP_INOTOBP))) { 180 XFS_RANDOM_ITOBP_INOTOBP))) {
181 if (imap_flags & XFS_IMAP_BULKSTAT) { 181 if (iget_flags & XFS_IGET_BULKSTAT) {
182 xfs_trans_brelse(tp, bp); 182 xfs_trans_brelse(tp, bp);
183 return XFS_ERROR(EINVAL); 183 return XFS_ERROR(EINVAL);
184 } 184 }
@@ -190,7 +190,7 @@ xfs_imap_to_bp(
190 "daddr %lld #%d (magic=%x)", 190 "daddr %lld #%d (magic=%x)",
191 XFS_BUFTARG_NAME(mp->m_ddev_targp), 191 XFS_BUFTARG_NAME(mp->m_ddev_targp),
192 (unsigned long long)imap->im_blkno, i, 192 (unsigned long long)imap->im_blkno, i,
193 be16_to_cpu(dip->di_core.di_magic)); 193 be16_to_cpu(dip->di_magic));
194#endif 194#endif
195 xfs_trans_brelse(tp, bp); 195 xfs_trans_brelse(tp, bp);
196 return XFS_ERROR(EFSCORRUPTED); 196 return XFS_ERROR(EFSCORRUPTED);
@@ -221,25 +221,26 @@ xfs_imap_to_bp(
221 * Use xfs_imap() to determine the size and location of the 221 * Use xfs_imap() to determine the size and location of the
222 * buffer to read from disk. 222 * buffer to read from disk.
223 */ 223 */
224STATIC int 224int
225xfs_inotobp( 225xfs_inotobp(
226 xfs_mount_t *mp, 226 xfs_mount_t *mp,
227 xfs_trans_t *tp, 227 xfs_trans_t *tp,
228 xfs_ino_t ino, 228 xfs_ino_t ino,
229 xfs_dinode_t **dipp, 229 xfs_dinode_t **dipp,
230 xfs_buf_t **bpp, 230 xfs_buf_t **bpp,
231 int *offset) 231 int *offset,
232 uint imap_flags)
232{ 233{
233 xfs_imap_t imap; 234 struct xfs_imap imap;
234 xfs_buf_t *bp; 235 xfs_buf_t *bp;
235 int error; 236 int error;
236 237
237 imap.im_blkno = 0; 238 imap.im_blkno = 0;
238 error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); 239 error = xfs_imap(mp, tp, ino, &imap, imap_flags);
239 if (error) 240 if (error)
240 return error; 241 return error;
241 242
242 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); 243 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
243 if (error) 244 if (error)
244 return error; 245 return error;
245 246
@@ -260,15 +261,11 @@ xfs_inotobp(
260 * If a non-zero error is returned, then the contents of bpp and 261 * If a non-zero error is returned, then the contents of bpp and
261 * dipp are undefined. 262 * dipp are undefined.
262 * 263 *
263 * If the inode is new and has not yet been initialized, use xfs_imap() 264 * The inode is expected to already been mapped to its buffer and read
264 * to determine the size and location of the buffer to read from disk. 265 * in once, thus we can use the mapping information stored in the inode
265 * If the inode has already been mapped to its buffer and read in once, 266 * rather than calling xfs_imap(). This allows us to avoid the overhead
266 * then use the mapping information stored in the inode rather than 267 * of looking at the inode btree for small block file systems
267 * calling xfs_imap(). This allows us to avoid the overhead of looking 268 * (see xfs_imap()).
268 * at the inode btree for small block file systems (see xfs_dilocate()).
269 * We can tell whether the inode has been mapped in before by comparing
270 * its disk block address to 0. Only uninitialized inodes will have
271 * 0 for the disk block address.
272 */ 269 */
273int 270int
274xfs_itobp( 271xfs_itobp(
@@ -277,40 +274,14 @@ xfs_itobp(
277 xfs_inode_t *ip, 274 xfs_inode_t *ip,
278 xfs_dinode_t **dipp, 275 xfs_dinode_t **dipp,
279 xfs_buf_t **bpp, 276 xfs_buf_t **bpp,
280 xfs_daddr_t bno,
281 uint imap_flags,
282 uint buf_flags) 277 uint buf_flags)
283{ 278{
284 xfs_imap_t imap;
285 xfs_buf_t *bp; 279 xfs_buf_t *bp;
286 int error; 280 int error;
287 281
288 if (ip->i_blkno == (xfs_daddr_t)0) { 282 ASSERT(ip->i_imap.im_blkno != 0);
289 imap.im_blkno = bno;
290 error = xfs_imap(mp, tp, ip->i_ino, &imap,
291 XFS_IMAP_LOOKUP | imap_flags);
292 if (error)
293 return error;
294 283
295 /* 284 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0);
296 * Fill in the fields in the inode that will be used to
297 * map the inode to its buffer from now on.
298 */
299 ip->i_blkno = imap.im_blkno;
300 ip->i_len = imap.im_len;
301 ip->i_boffset = imap.im_boffset;
302 } else {
303 /*
304 * We've already mapped the inode once, so just use the
305 * mapping that we saved the first time.
306 */
307 imap.im_blkno = ip->i_blkno;
308 imap.im_len = ip->i_len;
309 imap.im_boffset = ip->i_boffset;
310 }
311 ASSERT(bno == 0 || bno == imap.im_blkno);
312
313 error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags);
314 if (error) 285 if (error)
315 return error; 286 return error;
316 287
@@ -321,7 +292,7 @@ xfs_itobp(
321 return EAGAIN; 292 return EAGAIN;
322 } 293 }
323 294
324 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 295 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
325 *bpp = bp; 296 *bpp = bp;
326 return 0; 297 return 0;
327} 298}
@@ -348,26 +319,26 @@ xfs_iformat(
348 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 319 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
349 error = 0; 320 error = 0;
350 321
351 if (unlikely(be32_to_cpu(dip->di_core.di_nextents) + 322 if (unlikely(be32_to_cpu(dip->di_nextents) +
352 be16_to_cpu(dip->di_core.di_anextents) > 323 be16_to_cpu(dip->di_anextents) >
353 be64_to_cpu(dip->di_core.di_nblocks))) { 324 be64_to_cpu(dip->di_nblocks))) {
354 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 325 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
355 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 326 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
356 (unsigned long long)ip->i_ino, 327 (unsigned long long)ip->i_ino,
357 (int)(be32_to_cpu(dip->di_core.di_nextents) + 328 (int)(be32_to_cpu(dip->di_nextents) +
358 be16_to_cpu(dip->di_core.di_anextents)), 329 be16_to_cpu(dip->di_anextents)),
359 (unsigned long long) 330 (unsigned long long)
360 be64_to_cpu(dip->di_core.di_nblocks)); 331 be64_to_cpu(dip->di_nblocks));
361 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 332 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
362 ip->i_mount, dip); 333 ip->i_mount, dip);
363 return XFS_ERROR(EFSCORRUPTED); 334 return XFS_ERROR(EFSCORRUPTED);
364 } 335 }
365 336
366 if (unlikely(dip->di_core.di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 337 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
367 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 338 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
368 "corrupt dinode %Lu, forkoff = 0x%x.", 339 "corrupt dinode %Lu, forkoff = 0x%x.",
369 (unsigned long long)ip->i_ino, 340 (unsigned long long)ip->i_ino,
370 dip->di_core.di_forkoff); 341 dip->di_forkoff);
371 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 342 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
372 ip->i_mount, dip); 343 ip->i_mount, dip);
373 return XFS_ERROR(EFSCORRUPTED); 344 return XFS_ERROR(EFSCORRUPTED);
@@ -378,25 +349,25 @@ xfs_iformat(
378 case S_IFCHR: 349 case S_IFCHR:
379 case S_IFBLK: 350 case S_IFBLK:
380 case S_IFSOCK: 351 case S_IFSOCK:
381 if (unlikely(dip->di_core.di_format != XFS_DINODE_FMT_DEV)) { 352 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
382 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 353 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
383 ip->i_mount, dip); 354 ip->i_mount, dip);
384 return XFS_ERROR(EFSCORRUPTED); 355 return XFS_ERROR(EFSCORRUPTED);
385 } 356 }
386 ip->i_d.di_size = 0; 357 ip->i_d.di_size = 0;
387 ip->i_size = 0; 358 ip->i_size = 0;
388 ip->i_df.if_u2.if_rdev = be32_to_cpu(dip->di_u.di_dev); 359 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
389 break; 360 break;
390 361
391 case S_IFREG: 362 case S_IFREG:
392 case S_IFLNK: 363 case S_IFLNK:
393 case S_IFDIR: 364 case S_IFDIR:
394 switch (dip->di_core.di_format) { 365 switch (dip->di_format) {
395 case XFS_DINODE_FMT_LOCAL: 366 case XFS_DINODE_FMT_LOCAL:
396 /* 367 /*
397 * no local regular files yet 368 * no local regular files yet
398 */ 369 */
399 if (unlikely((be16_to_cpu(dip->di_core.di_mode) & S_IFMT) == S_IFREG)) { 370 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
400 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 371 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
401 "corrupt inode %Lu " 372 "corrupt inode %Lu "
402 "(local format for regular file).", 373 "(local format for regular file).",
@@ -407,7 +378,7 @@ xfs_iformat(
407 return XFS_ERROR(EFSCORRUPTED); 378 return XFS_ERROR(EFSCORRUPTED);
408 } 379 }
409 380
410 di_size = be64_to_cpu(dip->di_core.di_size); 381 di_size = be64_to_cpu(dip->di_size);
411 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 382 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
412 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 383 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
413 "corrupt inode %Lu " 384 "corrupt inode %Lu "
@@ -449,7 +420,7 @@ xfs_iformat(
449 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 420 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
450 ip->i_afp->if_ext_max = 421 ip->i_afp->if_ext_max =
451 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 422 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
452 switch (dip->di_core.di_aformat) { 423 switch (dip->di_aformat) {
453 case XFS_DINODE_FMT_LOCAL: 424 case XFS_DINODE_FMT_LOCAL:
454 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 425 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
455 size = be16_to_cpu(atp->hdr.totsize); 426 size = be16_to_cpu(atp->hdr.totsize);
@@ -621,7 +592,7 @@ xfs_iformat_btree(
621 ifp = XFS_IFORK_PTR(ip, whichfork); 592 ifp = XFS_IFORK_PTR(ip, whichfork);
622 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 593 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
623 size = XFS_BMAP_BROOT_SPACE(dfp); 594 size = XFS_BMAP_BROOT_SPACE(dfp);
624 nrecs = XFS_BMAP_BROOT_NUMRECS(dfp); 595 nrecs = be16_to_cpu(dfp->bb_numrecs);
625 596
626 /* 597 /*
627 * blow out if -- fork has less extents than can fit in 598 * blow out if -- fork has less extents than can fit in
@@ -649,8 +620,9 @@ xfs_iformat_btree(
649 * Copy and convert from the on-disk structure 620 * Copy and convert from the on-disk structure
650 * to the in-memory structure. 621 * to the in-memory structure.
651 */ 622 */
652 xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 623 xfs_bmdr_to_bmbt(ip->i_mount, dfp,
653 ifp->if_broot, size); 624 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
625 ifp->if_broot, size);
654 ifp->if_flags &= ~XFS_IFEXTENTS; 626 ifp->if_flags &= ~XFS_IFEXTENTS;
655 ifp->if_flags |= XFS_IFBROOT; 627 ifp->if_flags |= XFS_IFBROOT;
656 628
@@ -660,7 +632,7 @@ xfs_iformat_btree(
660void 632void
661xfs_dinode_from_disk( 633xfs_dinode_from_disk(
662 xfs_icdinode_t *to, 634 xfs_icdinode_t *to,
663 xfs_dinode_core_t *from) 635 xfs_dinode_t *from)
664{ 636{
665 to->di_magic = be16_to_cpu(from->di_magic); 637 to->di_magic = be16_to_cpu(from->di_magic);
666 to->di_mode = be16_to_cpu(from->di_mode); 638 to->di_mode = be16_to_cpu(from->di_mode);
@@ -694,7 +666,7 @@ xfs_dinode_from_disk(
694 666
695void 667void
696xfs_dinode_to_disk( 668xfs_dinode_to_disk(
697 xfs_dinode_core_t *to, 669 xfs_dinode_t *to,
698 xfs_icdinode_t *from) 670 xfs_icdinode_t *from)
699{ 671{
700 to->di_magic = cpu_to_be16(from->di_magic); 672 to->di_magic = cpu_to_be16(from->di_magic);
@@ -781,93 +753,57 @@ uint
781xfs_dic2xflags( 753xfs_dic2xflags(
782 xfs_dinode_t *dip) 754 xfs_dinode_t *dip)
783{ 755{
784 xfs_dinode_core_t *dic = &dip->di_core; 756 return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
785
786 return _xfs_dic2xflags(be16_to_cpu(dic->di_flags)) |
787 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 757 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
788} 758}
789 759
790/* 760/*
791 * Given a mount structure and an inode number, return a pointer 761 * Read the disk inode attributes into the in-core inode structure.
792 * to a newly allocated in-core inode corresponding to the given
793 * inode number.
794 *
795 * Initialize the inode's attributes and extent pointers if it
796 * already has them (it will not if the inode has no links).
797 */ 762 */
798int 763int
799xfs_iread( 764xfs_iread(
800 xfs_mount_t *mp, 765 xfs_mount_t *mp,
801 xfs_trans_t *tp, 766 xfs_trans_t *tp,
802 xfs_ino_t ino, 767 xfs_inode_t *ip,
803 xfs_inode_t **ipp,
804 xfs_daddr_t bno, 768 xfs_daddr_t bno,
805 uint imap_flags) 769 uint iget_flags)
806{ 770{
807 xfs_buf_t *bp; 771 xfs_buf_t *bp;
808 xfs_dinode_t *dip; 772 xfs_dinode_t *dip;
809 xfs_inode_t *ip;
810 int error; 773 int error;
811 774
812 ASSERT(xfs_inode_zone != NULL);
813
814 ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
815 ip->i_ino = ino;
816 ip->i_mount = mp;
817 atomic_set(&ip->i_iocount, 0);
818 spin_lock_init(&ip->i_flags_lock);
819
820 /* 775 /*
821 * Get pointer's to the on-disk inode and the buffer containing it. 776 * Fill in the location information in the in-core inode.
822 * If the inode number refers to a block outside the file system
823 * then xfs_itobp() will return NULL. In this case we should
824 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
825 * know that this is a new incore inode.
826 */ 777 */
827 error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); 778 ip->i_imap.im_blkno = bno;
828 if (error) { 779 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
829 kmem_zone_free(xfs_inode_zone, ip); 780 if (error)
830 return error; 781 return error;
831 } 782 ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
832 783
833 /* 784 /*
834 * Initialize inode's trace buffers. 785 * Get pointers to the on-disk inode and the buffer containing it.
835 * Do this before xfs_iformat in case it adds entries.
836 */ 786 */
837#ifdef XFS_INODE_TRACE 787 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
838 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); 788 XFS_BUF_LOCK, iget_flags);
839#endif 789 if (error)
840#ifdef XFS_BMAP_TRACE 790 return error;
841 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); 791 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
842#endif
843#ifdef XFS_BMBT_TRACE
844 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
845#endif
846#ifdef XFS_RW_TRACE
847 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
848#endif
849#ifdef XFS_ILOCK_TRACE
850 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
851#endif
852#ifdef XFS_DIR2_TRACE
853 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
854#endif
855 792
856 /* 793 /*
857 * If we got something that isn't an inode it means someone 794 * If we got something that isn't an inode it means someone
858 * (nfs or dmi) has a stale handle. 795 * (nfs or dmi) has a stale handle.
859 */ 796 */
860 if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) { 797 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
861 kmem_zone_free(xfs_inode_zone, ip);
862 xfs_trans_brelse(tp, bp);
863#ifdef DEBUG 798#ifdef DEBUG
864 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 799 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
865 "dip->di_core.di_magic (0x%x) != " 800 "dip->di_magic (0x%x) != "
866 "XFS_DINODE_MAGIC (0x%x)", 801 "XFS_DINODE_MAGIC (0x%x)",
867 be16_to_cpu(dip->di_core.di_magic), 802 be16_to_cpu(dip->di_magic),
868 XFS_DINODE_MAGIC); 803 XFS_DINODE_MAGIC);
869#endif /* DEBUG */ 804#endif /* DEBUG */
870 return XFS_ERROR(EINVAL); 805 error = XFS_ERROR(EINVAL);
806 goto out_brelse;
871 } 807 }
872 808
873 /* 809 /*
@@ -877,24 +813,22 @@ xfs_iread(
877 * specific information. 813 * specific information.
878 * Otherwise, just get the truly permanent information. 814 * Otherwise, just get the truly permanent information.
879 */ 815 */
880 if (dip->di_core.di_mode) { 816 if (dip->di_mode) {
881 xfs_dinode_from_disk(&ip->i_d, &dip->di_core); 817 xfs_dinode_from_disk(&ip->i_d, dip);
882 error = xfs_iformat(ip, dip); 818 error = xfs_iformat(ip, dip);
883 if (error) { 819 if (error) {
884 kmem_zone_free(xfs_inode_zone, ip);
885 xfs_trans_brelse(tp, bp);
886#ifdef DEBUG 820#ifdef DEBUG
887 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 821 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
888 "xfs_iformat() returned error %d", 822 "xfs_iformat() returned error %d",
889 error); 823 error);
890#endif /* DEBUG */ 824#endif /* DEBUG */
891 return error; 825 goto out_brelse;
892 } 826 }
893 } else { 827 } else {
894 ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic); 828 ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
895 ip->i_d.di_version = dip->di_core.di_version; 829 ip->i_d.di_version = dip->di_version;
896 ip->i_d.di_gen = be32_to_cpu(dip->di_core.di_gen); 830 ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
897 ip->i_d.di_flushiter = be16_to_cpu(dip->di_core.di_flushiter); 831 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
898 /* 832 /*
899 * Make sure to pull in the mode here as well in 833 * Make sure to pull in the mode here as well in
900 * case the inode is released without being used. 834 * case the inode is released without being used.
@@ -911,8 +845,6 @@ xfs_iread(
911 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 845 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
912 } 846 }
913 847
914 INIT_LIST_HEAD(&ip->i_reclaim);
915
916 /* 848 /*
917 * The inode format changed when we moved the link count and 849 * The inode format changed when we moved the link count and
918 * made it 32 bits long. If this is an old format inode, 850 * made it 32 bits long. If this is an old format inode,
@@ -924,7 +856,7 @@ xfs_iread(
924 * the new format. We don't change the version number so that we 856 * the new format. We don't change the version number so that we
925 * can distinguish this from a real new format inode. 857 * can distinguish this from a real new format inode.
926 */ 858 */
927 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 859 if (ip->i_d.di_version == 1) {
928 ip->i_d.di_nlink = ip->i_d.di_onlink; 860 ip->i_d.di_nlink = ip->i_d.di_onlink;
929 ip->i_d.di_onlink = 0; 861 ip->i_d.di_onlink = 0;
930 ip->i_d.di_projid = 0; 862 ip->i_d.di_projid = 0;
@@ -938,7 +870,7 @@ xfs_iread(
938 * around for a while. This helps to keep recently accessed 870 * around for a while. This helps to keep recently accessed
939 * meta-data in-core longer. 871 * meta-data in-core longer.
940 */ 872 */
941 XFS_BUF_SET_REF(bp, XFS_INO_REF); 873 XFS_BUF_SET_REF(bp, XFS_INO_REF);
942 874
943 /* 875 /*
944 * Use xfs_trans_brelse() to release the buffer containing the 876 * Use xfs_trans_brelse() to release the buffer containing the
@@ -953,9 +885,9 @@ xfs_iread(
953 * to worry about the inode being changed just because we released 885 * to worry about the inode being changed just because we released
954 * the buffer. 886 * the buffer.
955 */ 887 */
888 out_brelse:
956 xfs_trans_brelse(tp, bp); 889 xfs_trans_brelse(tp, bp);
957 *ipp = ip; 890 return error;
958 return 0;
959} 891}
960 892
961/* 893/*
@@ -1049,6 +981,7 @@ xfs_ialloc(
1049 uint flags; 981 uint flags;
1050 int error; 982 int error;
1051 timespec_t tv; 983 timespec_t tv;
984 int filestreams = 0;
1052 985
1053 /* 986 /*
1054 * Call the space management code to pick 987 * Call the space management code to pick
@@ -1056,9 +989,8 @@ xfs_ialloc(
1056 */ 989 */
1057 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 990 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
1058 ialloc_context, call_again, &ino); 991 ialloc_context, call_again, &ino);
1059 if (error != 0) { 992 if (error)
1060 return error; 993 return error;
1061 }
1062 if (*call_again || ino == NULLFSINO) { 994 if (*call_again || ino == NULLFSINO) {
1063 *ipp = NULL; 995 *ipp = NULL;
1064 return 0; 996 return 0;
@@ -1072,9 +1004,8 @@ xfs_ialloc(
1072 */ 1004 */
1073 error = xfs_trans_iget(tp->t_mountp, tp, ino, 1005 error = xfs_trans_iget(tp->t_mountp, tp, ino,
1074 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1006 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
1075 if (error != 0) { 1007 if (error)
1076 return error; 1008 return error;
1077 }
1078 ASSERT(ip != NULL); 1009 ASSERT(ip != NULL);
1079 1010
1080 ip->i_d.di_mode = (__uint16_t)mode; 1011 ip->i_d.di_mode = (__uint16_t)mode;
@@ -1093,8 +1024,8 @@ xfs_ialloc(
1093 * here rather than here and in the flush/logging code. 1024 * here rather than here and in the flush/logging code.
1094 */ 1025 */
1095 if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && 1026 if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
1096 ip->i_d.di_version == XFS_DINODE_VERSION_1) { 1027 ip->i_d.di_version == 1) {
1097 ip->i_d.di_version = XFS_DINODE_VERSION_2; 1028 ip->i_d.di_version = 2;
1098 /* 1029 /*
1099 * We've already zeroed the old link count, the projid field, 1030 * We've already zeroed the old link count, the projid field,
1100 * and the pad field. 1031 * and the pad field.
@@ -1104,7 +1035,7 @@ xfs_ialloc(
1104 /* 1035 /*
1105 * Project ids won't be stored on disk if we are using a version 1 inode. 1036 * Project ids won't be stored on disk if we are using a version 1 inode.
1106 */ 1037 */
1107 if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) 1038 if ((prid != 0) && (ip->i_d.di_version == 1))
1108 xfs_bump_ino_vers2(tp, ip); 1039 xfs_bump_ino_vers2(tp, ip);
1109 1040
1110 if (pip && XFS_INHERIT_GID(pip)) { 1041 if (pip && XFS_INHERIT_GID(pip)) {
@@ -1155,13 +1086,12 @@ xfs_ialloc(
1155 flags |= XFS_ILOG_DEV; 1086 flags |= XFS_ILOG_DEV;
1156 break; 1087 break;
1157 case S_IFREG: 1088 case S_IFREG:
1158 if (pip && xfs_inode_is_filestream(pip)) { 1089 /*
1159 error = xfs_filestream_associate(pip, ip); 1090 * we can't set up filestreams until after the VFS inode
1160 if (error < 0) 1091 * is set up properly.
1161 return -error; 1092 */
1162 if (!error) 1093 if (pip && xfs_inode_is_filestream(pip))
1163 xfs_iflags_set(ip, XFS_IFILESTREAM); 1094 filestreams = 1;
1164 }
1165 /* fall through */ 1095 /* fall through */
1166 case S_IFDIR: 1096 case S_IFDIR:
1167 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1097 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
@@ -1227,6 +1157,15 @@ xfs_ialloc(
1227 /* now that we have an i_mode we can setup inode ops and unlock */ 1157 /* now that we have an i_mode we can setup inode ops and unlock */
1228 xfs_setup_inode(ip); 1158 xfs_setup_inode(ip);
1229 1159
1160 /* now we have set up the vfs inode we can associate the filestream */
1161 if (filestreams) {
1162 error = xfs_filestream_associate(pip, ip);
1163 if (error < 0)
1164 return -error;
1165 if (!error)
1166 xfs_iflags_set(ip, XFS_IFILESTREAM);
1167 }
1168
1230 *ipp = ip; 1169 *ipp = ip;
1231 return 0; 1170 return 0;
1232} 1171}
@@ -1383,8 +1322,8 @@ xfs_itrunc_trace(
1383 * direct I/O with the truncate operation. Also, because we hold 1322 * direct I/O with the truncate operation. Also, because we hold
1384 * the IOLOCK in exclusive mode, we prevent new direct I/Os from being 1323 * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
1385 * started until the truncate completes and drops the lock. Essentially, 1324 * started until the truncate completes and drops the lock. Essentially,
1386 * the vn_iowait() call forms an I/O barrier that provides strict ordering 1325 * the xfs_ioend_wait() call forms an I/O barrier that provides strict
1387 * between direct I/Os and the truncate operation. 1326 * ordering between direct I/Os and the truncate operation.
1388 * 1327 *
1389 * The flags parameter can have either the value XFS_ITRUNC_DEFINITE 1328 * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
1390 * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used 1329 * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used
@@ -1415,7 +1354,7 @@ xfs_itruncate_start(
1415 1354
1416 /* wait for the completion of any pending DIOs */ 1355 /* wait for the completion of any pending DIOs */
1417 if (new_size == 0 || new_size < ip->i_size) 1356 if (new_size == 0 || new_size < ip->i_size)
1418 vn_iowait(ip); 1357 xfs_ioend_wait(ip);
1419 1358
1420 /* 1359 /*
1421 * Call toss_pages or flushinval_pages to get rid of pages 1360 * Call toss_pages or flushinval_pages to get rid of pages
@@ -1726,8 +1665,14 @@ xfs_itruncate_finish(
1726 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1665 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1727 xfs_trans_ihold(ntp, ip); 1666 xfs_trans_ihold(ntp, ip);
1728 1667
1729 if (!error) 1668 if (error)
1730 error = xfs_trans_reserve(ntp, 0, 1669 return error;
1670 /*
1671 * transaction commit worked ok so we can drop the extra ticket
1672 * reference that we gained in xfs_trans_dup()
1673 */
1674 xfs_log_ticket_put(ntp->t_ticket);
1675 error = xfs_trans_reserve(ntp, 0,
1731 XFS_ITRUNCATE_LOG_RES(mp), 0, 1676 XFS_ITRUNCATE_LOG_RES(mp), 0,
1732 XFS_TRANS_PERM_LOG_RES, 1677 XFS_TRANS_PERM_LOG_RES,
1733 XFS_ITRUNCATE_LOG_COUNT); 1678 XFS_ITRUNCATE_LOG_COUNT);
@@ -1781,13 +1726,10 @@ xfs_iunlink(
1781 xfs_dinode_t *dip; 1726 xfs_dinode_t *dip;
1782 xfs_buf_t *agibp; 1727 xfs_buf_t *agibp;
1783 xfs_buf_t *ibp; 1728 xfs_buf_t *ibp;
1784 xfs_agnumber_t agno;
1785 xfs_daddr_t agdaddr;
1786 xfs_agino_t agino; 1729 xfs_agino_t agino;
1787 short bucket_index; 1730 short bucket_index;
1788 int offset; 1731 int offset;
1789 int error; 1732 int error;
1790 int agi_ok;
1791 1733
1792 ASSERT(ip->i_d.di_nlink == 0); 1734 ASSERT(ip->i_d.di_nlink == 0);
1793 ASSERT(ip->i_d.di_mode != 0); 1735 ASSERT(ip->i_d.di_mode != 0);
@@ -1795,31 +1737,15 @@ xfs_iunlink(
1795 1737
1796 mp = tp->t_mountp; 1738 mp = tp->t_mountp;
1797 1739
1798 agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
1799 agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
1800
1801 /* 1740 /*
1802 * Get the agi buffer first. It ensures lock ordering 1741 * Get the agi buffer first. It ensures lock ordering
1803 * on the list. 1742 * on the list.
1804 */ 1743 */
1805 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, 1744 error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp);
1806 XFS_FSS_TO_BB(mp, 1), 0, &agibp);
1807 if (error) 1745 if (error)
1808 return error; 1746 return error;
1809
1810 /*
1811 * Validate the magic number of the agi block.
1812 */
1813 agi = XFS_BUF_TO_AGI(agibp); 1747 agi = XFS_BUF_TO_AGI(agibp);
1814 agi_ok = 1748
1815 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1816 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
1817 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK,
1818 XFS_RANDOM_IUNLINK))) {
1819 XFS_CORRUPTION_ERROR("xfs_iunlink", XFS_ERRLEVEL_LOW, mp, agi);
1820 xfs_trans_brelse(tp, agibp);
1821 return XFS_ERROR(EFSCORRUPTED);
1822 }
1823 /* 1749 /*
1824 * Get the index into the agi hash table for the 1750 * Get the index into the agi hash table for the
1825 * list this inode will go on. 1751 * list this inode will go on.
@@ -1837,14 +1763,14 @@ xfs_iunlink(
1837 * Here we put the head pointer into our next pointer, 1763 * Here we put the head pointer into our next pointer,
1838 * and then we fall through to point the head at us. 1764 * and then we fall through to point the head at us.
1839 */ 1765 */
1840 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 1766 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
1841 if (error) 1767 if (error)
1842 return error; 1768 return error;
1843 1769
1844 ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); 1770 ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
1845 /* both on-disk, don't endian flip twice */ 1771 /* both on-disk, don't endian flip twice */
1846 dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 1772 dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
1847 offset = ip->i_boffset + 1773 offset = ip->i_imap.im_boffset +
1848 offsetof(xfs_dinode_t, di_next_unlinked); 1774 offsetof(xfs_dinode_t, di_next_unlinked);
1849 xfs_trans_inode_buf(tp, ibp); 1775 xfs_trans_inode_buf(tp, ibp);
1850 xfs_trans_log_buf(tp, ibp, offset, 1776 xfs_trans_log_buf(tp, ibp, offset,
@@ -1879,7 +1805,6 @@ xfs_iunlink_remove(
1879 xfs_buf_t *agibp; 1805 xfs_buf_t *agibp;
1880 xfs_buf_t *ibp; 1806 xfs_buf_t *ibp;
1881 xfs_agnumber_t agno; 1807 xfs_agnumber_t agno;
1882 xfs_daddr_t agdaddr;
1883 xfs_agino_t agino; 1808 xfs_agino_t agino;
1884 xfs_agino_t next_agino; 1809 xfs_agino_t next_agino;
1885 xfs_buf_t *last_ibp; 1810 xfs_buf_t *last_ibp;
@@ -1887,45 +1812,20 @@ xfs_iunlink_remove(
1887 short bucket_index; 1812 short bucket_index;
1888 int offset, last_offset = 0; 1813 int offset, last_offset = 0;
1889 int error; 1814 int error;
1890 int agi_ok;
1891 1815
1892 /*
1893 * First pull the on-disk inode from the AGI unlinked list.
1894 */
1895 mp = tp->t_mountp; 1816 mp = tp->t_mountp;
1896
1897 agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 1817 agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
1898 agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
1899 1818
1900 /* 1819 /*
1901 * Get the agi buffer first. It ensures lock ordering 1820 * Get the agi buffer first. It ensures lock ordering
1902 * on the list. 1821 * on the list.
1903 */ 1822 */
1904 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, 1823 error = xfs_read_agi(mp, tp, agno, &agibp);
1905 XFS_FSS_TO_BB(mp, 1), 0, &agibp); 1824 if (error)
1906 if (error) {
1907 cmn_err(CE_WARN,
1908 "xfs_iunlink_remove: xfs_trans_read_buf() returned an error %d on %s. Returning error.",
1909 error, mp->m_fsname);
1910 return error; 1825 return error;
1911 } 1826
1912 /*
1913 * Validate the magic number of the agi block.
1914 */
1915 agi = XFS_BUF_TO_AGI(agibp); 1827 agi = XFS_BUF_TO_AGI(agibp);
1916 agi_ok = 1828
1917 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1918 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
1919 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK_REMOVE,
1920 XFS_RANDOM_IUNLINK_REMOVE))) {
1921 XFS_CORRUPTION_ERROR("xfs_iunlink_remove", XFS_ERRLEVEL_LOW,
1922 mp, agi);
1923 xfs_trans_brelse(tp, agibp);
1924 cmn_err(CE_WARN,
1925 "xfs_iunlink_remove: XFS_TEST_ERROR() returned an error on %s. Returning EFSCORRUPTED.",
1926 mp->m_fsname);
1927 return XFS_ERROR(EFSCORRUPTED);
1928 }
1929 /* 1829 /*
1930 * Get the index into the agi hash table for the 1830 * Get the index into the agi hash table for the
1931 * list this inode will go on. 1831 * list this inode will go on.
@@ -1945,7 +1845,7 @@ xfs_iunlink_remove(
1945 * of dealing with the buffer when there is no need to 1845 * of dealing with the buffer when there is no need to
1946 * change it. 1846 * change it.
1947 */ 1847 */
1948 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 1848 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
1949 if (error) { 1849 if (error) {
1950 cmn_err(CE_WARN, 1850 cmn_err(CE_WARN,
1951 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1851 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -1956,7 +1856,7 @@ xfs_iunlink_remove(
1956 ASSERT(next_agino != 0); 1856 ASSERT(next_agino != 0);
1957 if (next_agino != NULLAGINO) { 1857 if (next_agino != NULLAGINO) {
1958 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 1858 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
1959 offset = ip->i_boffset + 1859 offset = ip->i_imap.im_boffset +
1960 offsetof(xfs_dinode_t, di_next_unlinked); 1860 offsetof(xfs_dinode_t, di_next_unlinked);
1961 xfs_trans_inode_buf(tp, ibp); 1861 xfs_trans_inode_buf(tp, ibp);
1962 xfs_trans_log_buf(tp, ibp, offset, 1862 xfs_trans_log_buf(tp, ibp, offset,
@@ -1992,7 +1892,7 @@ xfs_iunlink_remove(
1992 } 1892 }
1993 next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 1893 next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
1994 error = xfs_inotobp(mp, tp, next_ino, &last_dip, 1894 error = xfs_inotobp(mp, tp, next_ino, &last_dip,
1995 &last_ibp, &last_offset); 1895 &last_ibp, &last_offset, 0);
1996 if (error) { 1896 if (error) {
1997 cmn_err(CE_WARN, 1897 cmn_err(CE_WARN,
1998 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 1898 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.",
@@ -2007,7 +1907,7 @@ xfs_iunlink_remove(
2007 * Now last_ibp points to the buffer previous to us on 1907 * Now last_ibp points to the buffer previous to us on
2008 * the unlinked list. Pull us from the list. 1908 * the unlinked list. Pull us from the list.
2009 */ 1909 */
2010 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 1910 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
2011 if (error) { 1911 if (error) {
2012 cmn_err(CE_WARN, 1912 cmn_err(CE_WARN,
2013 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1913 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -2019,7 +1919,7 @@ xfs_iunlink_remove(
2019 ASSERT(next_agino != agino); 1919 ASSERT(next_agino != agino);
2020 if (next_agino != NULLAGINO) { 1920 if (next_agino != NULLAGINO) {
2021 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 1921 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
2022 offset = ip->i_boffset + 1922 offset = ip->i_imap.im_boffset +
2023 offsetof(xfs_dinode_t, di_next_unlinked); 1923 offsetof(xfs_dinode_t, di_next_unlinked);
2024 xfs_trans_inode_buf(tp, ibp); 1924 xfs_trans_inode_buf(tp, ibp);
2025 xfs_trans_log_buf(tp, ibp, offset, 1925 xfs_trans_log_buf(tp, ibp, offset,
@@ -2160,9 +2060,9 @@ xfs_ifree_cluster(
2160 iip = (xfs_inode_log_item_t *)lip; 2060 iip = (xfs_inode_log_item_t *)lip;
2161 ASSERT(iip->ili_logged == 1); 2061 ASSERT(iip->ili_logged == 1);
2162 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; 2062 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
2163 spin_lock(&mp->m_ail_lock); 2063 xfs_trans_ail_copy_lsn(mp->m_ail,
2164 iip->ili_flush_lsn = iip->ili_item.li_lsn; 2064 &iip->ili_flush_lsn,
2165 spin_unlock(&mp->m_ail_lock); 2065 &iip->ili_item.li_lsn);
2166 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 2066 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
2167 pre_flushed++; 2067 pre_flushed++;
2168 } 2068 }
@@ -2183,9 +2083,8 @@ xfs_ifree_cluster(
2183 iip->ili_last_fields = iip->ili_format.ilf_fields; 2083 iip->ili_last_fields = iip->ili_format.ilf_fields;
2184 iip->ili_format.ilf_fields = 0; 2084 iip->ili_format.ilf_fields = 0;
2185 iip->ili_logged = 1; 2085 iip->ili_logged = 1;
2186 spin_lock(&mp->m_ail_lock); 2086 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
2187 iip->ili_flush_lsn = iip->ili_item.li_lsn; 2087 &iip->ili_item.li_lsn);
2188 spin_unlock(&mp->m_ail_lock);
2189 2088
2190 xfs_buf_attach_iodone(bp, 2089 xfs_buf_attach_iodone(bp,
2191 (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2090 (void(*)(xfs_buf_t*,xfs_log_item_t*))
@@ -2263,7 +2162,7 @@ xfs_ifree(
2263 2162
2264 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2163 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2265 2164
2266 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 2165 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
2267 if (error) 2166 if (error)
2268 return error; 2167 return error;
2269 2168
@@ -2279,7 +2178,7 @@ xfs_ifree(
2279 * This is a temporary hack that would require a proper fix 2178 * This is a temporary hack that would require a proper fix
2280 * in the future. 2179 * in the future.
2281 */ 2180 */
2282 dip->di_core.di_mode = 0; 2181 dip->di_mode = 0;
2283 2182
2284 if (delete) { 2183 if (delete) {
2285 xfs_ifree_cluster(ip, tp, first_ino); 2184 xfs_ifree_cluster(ip, tp, first_ino);
@@ -2312,9 +2211,10 @@ xfs_iroot_realloc(
2312 int rec_diff, 2211 int rec_diff,
2313 int whichfork) 2212 int whichfork)
2314{ 2213{
2214 struct xfs_mount *mp = ip->i_mount;
2315 int cur_max; 2215 int cur_max;
2316 xfs_ifork_t *ifp; 2216 xfs_ifork_t *ifp;
2317 xfs_bmbt_block_t *new_broot; 2217 struct xfs_btree_block *new_broot;
2318 int new_max; 2218 int new_max;
2319 size_t new_size; 2219 size_t new_size;
2320 char *np; 2220 char *np;
@@ -2335,8 +2235,7 @@ xfs_iroot_realloc(
2335 */ 2235 */
2336 if (ifp->if_broot_bytes == 0) { 2236 if (ifp->if_broot_bytes == 0) {
2337 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 2237 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
2338 ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size, 2238 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP);
2339 KM_SLEEP);
2340 ifp->if_broot_bytes = (int)new_size; 2239 ifp->if_broot_bytes = (int)new_size;
2341 return; 2240 return;
2342 } 2241 }
@@ -2347,18 +2246,16 @@ xfs_iroot_realloc(
2347 * location. The records don't change location because 2246 * location. The records don't change location because
2348 * they are kept butted up against the btree block header. 2247 * they are kept butted up against the btree block header.
2349 */ 2248 */
2350 cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 2249 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
2351 new_max = cur_max + rec_diff; 2250 new_max = cur_max + rec_diff;
2352 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2251 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
2353 ifp->if_broot = (xfs_bmbt_block_t *) 2252 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
2354 kmem_realloc(ifp->if_broot,
2355 new_size,
2356 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 2253 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
2357 KM_SLEEP); 2254 KM_SLEEP);
2358 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2255 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2359 ifp->if_broot_bytes); 2256 ifp->if_broot_bytes);
2360 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2257 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2361 (int)new_size); 2258 (int)new_size);
2362 ifp->if_broot_bytes = (int)new_size; 2259 ifp->if_broot_bytes = (int)new_size;
2363 ASSERT(ifp->if_broot_bytes <= 2260 ASSERT(ifp->if_broot_bytes <=
2364 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2261 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
@@ -2372,7 +2269,7 @@ xfs_iroot_realloc(
2372 * records, just get rid of the root and clear the status bit. 2269 * records, just get rid of the root and clear the status bit.
2373 */ 2270 */
2374 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 2271 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
2375 cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 2272 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
2376 new_max = cur_max + rec_diff; 2273 new_max = cur_max + rec_diff;
2377 ASSERT(new_max >= 0); 2274 ASSERT(new_max >= 0);
2378 if (new_max > 0) 2275 if (new_max > 0)
@@ -2380,11 +2277,11 @@ xfs_iroot_realloc(
2380 else 2277 else
2381 new_size = 0; 2278 new_size = 0;
2382 if (new_size > 0) { 2279 if (new_size > 0) {
2383 new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP); 2280 new_broot = kmem_alloc(new_size, KM_SLEEP);
2384 /* 2281 /*
2385 * First copy over the btree block header. 2282 * First copy over the btree block header.
2386 */ 2283 */
2387 memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t)); 2284 memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
2388 } else { 2285 } else {
2389 new_broot = NULL; 2286 new_broot = NULL;
2390 ifp->if_flags &= ~XFS_IFBROOT; 2287 ifp->if_flags &= ~XFS_IFBROOT;
@@ -2397,18 +2294,16 @@ xfs_iroot_realloc(
2397 /* 2294 /*
2398 * First copy the records. 2295 * First copy the records.
2399 */ 2296 */
2400 op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1, 2297 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
2401 ifp->if_broot_bytes); 2298 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
2402 np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1,
2403 (int)new_size);
2404 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 2299 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
2405 2300
2406 /* 2301 /*
2407 * Then copy the pointers. 2302 * Then copy the pointers.
2408 */ 2303 */
2409 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2304 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2410 ifp->if_broot_bytes); 2305 ifp->if_broot_bytes);
2411 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1, 2306 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
2412 (int)new_size); 2307 (int)new_size);
2413 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 2308 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
2414 } 2309 }
@@ -2511,64 +2406,6 @@ xfs_idata_realloc(
2511 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2406 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
2512} 2407}
2513 2408
2514
2515
2516
2517/*
2518 * Map inode to disk block and offset.
2519 *
2520 * mp -- the mount point structure for the current file system
2521 * tp -- the current transaction
2522 * ino -- the inode number of the inode to be located
2523 * imap -- this structure is filled in with the information necessary
2524 * to retrieve the given inode from disk
2525 * flags -- flags to pass to xfs_dilocate indicating whether or not
2526 * lookups in the inode btree were OK or not
2527 */
2528int
2529xfs_imap(
2530 xfs_mount_t *mp,
2531 xfs_trans_t *tp,
2532 xfs_ino_t ino,
2533 xfs_imap_t *imap,
2534 uint flags)
2535{
2536 xfs_fsblock_t fsbno;
2537 int len;
2538 int off;
2539 int error;
2540
2541 fsbno = imap->im_blkno ?
2542 XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK;
2543 error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags);
2544 if (error)
2545 return error;
2546
2547 imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno);
2548 imap->im_len = XFS_FSB_TO_BB(mp, len);
2549 imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno);
2550 imap->im_ioffset = (ushort)off;
2551 imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog);
2552
2553 /*
2554 * If the inode number maps to a block outside the bounds
2555 * of the file system then return NULL rather than calling
2556 * read_buf and panicing when we get an error from the
2557 * driver.
2558 */
2559 if ((imap->im_blkno + imap->im_len) >
2560 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
2561 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
2562 "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
2563 " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
2564 (unsigned long long) imap->im_blkno,
2565 (unsigned long long) imap->im_len,
2566 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
2567 return EINVAL;
2568 }
2569 return 0;
2570}
2571
2572void 2409void
2573xfs_idestroy_fork( 2410xfs_idestroy_fork(
2574 xfs_inode_t *ip, 2411 xfs_inode_t *ip,
@@ -2613,70 +2450,6 @@ xfs_idestroy_fork(
2613} 2450}
2614 2451
2615/* 2452/*
2616 * This is called free all the memory associated with an inode.
2617 * It must free the inode itself and any buffers allocated for
2618 * if_extents/if_data and if_broot. It must also free the lock
2619 * associated with the inode.
2620 */
2621void
2622xfs_idestroy(
2623 xfs_inode_t *ip)
2624{
2625 switch (ip->i_d.di_mode & S_IFMT) {
2626 case S_IFREG:
2627 case S_IFDIR:
2628 case S_IFLNK:
2629 xfs_idestroy_fork(ip, XFS_DATA_FORK);
2630 break;
2631 }
2632 if (ip->i_afp)
2633 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
2634 mrfree(&ip->i_lock);
2635 mrfree(&ip->i_iolock);
2636
2637#ifdef XFS_INODE_TRACE
2638 ktrace_free(ip->i_trace);
2639#endif
2640#ifdef XFS_BMAP_TRACE
2641 ktrace_free(ip->i_xtrace);
2642#endif
2643#ifdef XFS_BMBT_TRACE
2644 ktrace_free(ip->i_btrace);
2645#endif
2646#ifdef XFS_RW_TRACE
2647 ktrace_free(ip->i_rwtrace);
2648#endif
2649#ifdef XFS_ILOCK_TRACE
2650 ktrace_free(ip->i_lock_trace);
2651#endif
2652#ifdef XFS_DIR2_TRACE
2653 ktrace_free(ip->i_dir_trace);
2654#endif
2655 if (ip->i_itemp) {
2656 /*
2657 * Only if we are shutting down the fs will we see an
2658 * inode still in the AIL. If it is there, we should remove
2659 * it to prevent a use-after-free from occurring.
2660 */
2661 xfs_mount_t *mp = ip->i_mount;
2662 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
2663
2664 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
2665 XFS_FORCED_SHUTDOWN(ip->i_mount));
2666 if (lip->li_flags & XFS_LI_IN_AIL) {
2667 spin_lock(&mp->m_ail_lock);
2668 if (lip->li_flags & XFS_LI_IN_AIL)
2669 xfs_trans_delete_ail(mp, lip);
2670 else
2671 spin_unlock(&mp->m_ail_lock);
2672 }
2673 xfs_inode_item_destroy(ip);
2674 }
2675 kmem_zone_free(xfs_inode_zone, ip);
2676}
2677
2678
2679/*
2680 * Increment the pin count of the given buffer. 2453 * Increment the pin count of the given buffer.
2681 * This value is protected by ipinlock spinlock in the mount structure. 2454 * This value is protected by ipinlock spinlock in the mount structure.
2682 */ 2455 */
@@ -2880,7 +2653,7 @@ xfs_iflush_fork(
2880 ASSERT(ifp->if_broot_bytes <= 2653 ASSERT(ifp->if_broot_bytes <=
2881 (XFS_IFORK_SIZE(ip, whichfork) + 2654 (XFS_IFORK_SIZE(ip, whichfork) +
2882 XFS_BROOT_SIZE_ADJ)); 2655 XFS_BROOT_SIZE_ADJ));
2883 xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes, 2656 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
2884 (xfs_bmdr_block_t *)cp, 2657 (xfs_bmdr_block_t *)cp,
2885 XFS_DFORK_SIZE(dip, mp, whichfork)); 2658 XFS_DFORK_SIZE(dip, mp, whichfork));
2886 } 2659 }
@@ -2889,15 +2662,16 @@ xfs_iflush_fork(
2889 case XFS_DINODE_FMT_DEV: 2662 case XFS_DINODE_FMT_DEV:
2890 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 2663 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
2891 ASSERT(whichfork == XFS_DATA_FORK); 2664 ASSERT(whichfork == XFS_DATA_FORK);
2892 dip->di_u.di_dev = cpu_to_be32(ip->i_df.if_u2.if_rdev); 2665 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
2893 } 2666 }
2894 break; 2667 break;
2895 2668
2896 case XFS_DINODE_FMT_UUID: 2669 case XFS_DINODE_FMT_UUID:
2897 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 2670 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
2898 ASSERT(whichfork == XFS_DATA_FORK); 2671 ASSERT(whichfork == XFS_DATA_FORK);
2899 memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid, 2672 memcpy(XFS_DFORK_DPTR(dip),
2900 sizeof(uuid_t)); 2673 &ip->i_df.if_u2.if_uuid,
2674 sizeof(uuid_t));
2901 } 2675 }
2902 break; 2676 break;
2903 2677
@@ -3030,7 +2804,6 @@ cluster_corrupt_out:
3030 XFS_BUF_CLR_BDSTRAT_FUNC(bp); 2804 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
3031 XFS_BUF_UNDONE(bp); 2805 XFS_BUF_UNDONE(bp);
3032 XFS_BUF_STALE(bp); 2806 XFS_BUF_STALE(bp);
3033 XFS_BUF_SHUT(bp);
3034 XFS_BUF_ERROR(bp,EIO); 2807 XFS_BUF_ERROR(bp,EIO);
3035 xfs_biodone(bp); 2808 xfs_biodone(bp);
3036 } else { 2809 } else {
@@ -3172,7 +2945,7 @@ xfs_iflush(
3172 /* 2945 /*
3173 * Get the buffer containing the on-disk inode. 2946 * Get the buffer containing the on-disk inode.
3174 */ 2947 */
3175 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, 2948 error = xfs_itobp(mp, NULL, ip, &dip, &bp,
3176 noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); 2949 noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK);
3177 if (error || !bp) { 2950 if (error || !bp) {
3178 xfs_ifunlock(ip); 2951 xfs_ifunlock(ip);
@@ -3253,7 +3026,7 @@ xfs_iflush_int(
3253 } 3026 }
3254 3027
3255 /* set *dip = inode's place in the buffer */ 3028 /* set *dip = inode's place in the buffer */
3256 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); 3029 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
3257 3030
3258 /* 3031 /*
3259 * Clear i_update_core before copying out the data. 3032 * Clear i_update_core before copying out the data.
@@ -3275,11 +3048,11 @@ xfs_iflush_int(
3275 */ 3048 */
3276 xfs_synchronize_atime(ip); 3049 xfs_synchronize_atime(ip);
3277 3050
3278 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC, 3051 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
3279 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 3052 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
3280 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3053 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3281 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", 3054 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p",
3282 ip->i_ino, be16_to_cpu(dip->di_core.di_magic), dip); 3055 ip->i_ino, be16_to_cpu(dip->di_magic), dip);
3283 goto corrupt_out; 3056 goto corrupt_out;
3284 } 3057 }
3285 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 3058 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
@@ -3342,7 +3115,7 @@ xfs_iflush_int(
3342 * because if the inode is dirty at all the core must 3115 * because if the inode is dirty at all the core must
3343 * be. 3116 * be.
3344 */ 3117 */
3345 xfs_dinode_to_disk(&dip->di_core, &ip->i_d); 3118 xfs_dinode_to_disk(dip, &ip->i_d);
3346 3119
3347 /* Wrap, we never let the log put out DI_MAX_FLUSH */ 3120 /* Wrap, we never let the log put out DI_MAX_FLUSH */
3348 if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 3121 if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
@@ -3354,28 +3127,27 @@ xfs_iflush_int(
3354 * convert back to the old inode format. If the superblock version 3127 * convert back to the old inode format. If the superblock version
3355 * has been updated, then make the conversion permanent. 3128 * has been updated, then make the conversion permanent.
3356 */ 3129 */
3357 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 3130 ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
3358 xfs_sb_version_hasnlink(&mp->m_sb)); 3131 if (ip->i_d.di_version == 1) {
3359 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
3360 if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 3132 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
3361 /* 3133 /*
3362 * Convert it back. 3134 * Convert it back.
3363 */ 3135 */
3364 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 3136 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
3365 dip->di_core.di_onlink = cpu_to_be16(ip->i_d.di_nlink); 3137 dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink);
3366 } else { 3138 } else {
3367 /* 3139 /*
3368 * The superblock version has already been bumped, 3140 * The superblock version has already been bumped,
3369 * so just make the conversion to the new inode 3141 * so just make the conversion to the new inode
3370 * format permanent. 3142 * format permanent.
3371 */ 3143 */
3372 ip->i_d.di_version = XFS_DINODE_VERSION_2; 3144 ip->i_d.di_version = 2;
3373 dip->di_core.di_version = XFS_DINODE_VERSION_2; 3145 dip->di_version = 2;
3374 ip->i_d.di_onlink = 0; 3146 ip->i_d.di_onlink = 0;
3375 dip->di_core.di_onlink = 0; 3147 dip->di_onlink = 0;
3376 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 3148 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
3377 memset(&(dip->di_core.di_pad[0]), 0, 3149 memset(&(dip->di_pad[0]), 0,
3378 sizeof(dip->di_core.di_pad)); 3150 sizeof(dip->di_pad));
3379 ASSERT(ip->i_d.di_projid == 0); 3151 ASSERT(ip->i_d.di_projid == 0);
3380 } 3152 }
3381 } 3153 }
@@ -3418,10 +3190,8 @@ xfs_iflush_int(
3418 iip->ili_format.ilf_fields = 0; 3190 iip->ili_format.ilf_fields = 0;
3419 iip->ili_logged = 1; 3191 iip->ili_logged = 1;
3420 3192
3421 ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ 3193 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
3422 spin_lock(&mp->m_ail_lock); 3194 &iip->ili_item.li_lsn);
3423 iip->ili_flush_lsn = iip->ili_item.li_lsn;
3424 spin_unlock(&mp->m_ail_lock);
3425 3195
3426 /* 3196 /*
3427 * Attach the function xfs_iflush_done to the inode's 3197 * Attach the function xfs_iflush_done to the inode's
@@ -3459,45 +3229,8 @@ corrupt_out:
3459} 3229}
3460 3230
3461 3231
3462/*
3463 * Flush all inactive inodes in mp.
3464 */
3465void
3466xfs_iflush_all(
3467 xfs_mount_t *mp)
3468{
3469 xfs_inode_t *ip;
3470
3471 again:
3472 XFS_MOUNT_ILOCK(mp);
3473 ip = mp->m_inodes;
3474 if (ip == NULL)
3475 goto out;
3476
3477 do {
3478 /* Make sure we skip markers inserted by sync */
3479 if (ip->i_mount == NULL) {
3480 ip = ip->i_mnext;
3481 continue;
3482 }
3483
3484 if (!VFS_I(ip)) {
3485 XFS_MOUNT_IUNLOCK(mp);
3486 xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
3487 goto again;
3488 }
3489
3490 ASSERT(vn_count(VFS_I(ip)) == 0);
3491
3492 ip = ip->i_mnext;
3493 } while (ip != mp->m_inodes);
3494 out:
3495 XFS_MOUNT_IUNLOCK(mp);
3496}
3497 3232
3498#ifdef XFS_ILOCK_TRACE 3233#ifdef XFS_ILOCK_TRACE
3499ktrace_t *xfs_ilock_trace_buf;
3500
3501void 3234void
3502xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) 3235xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra)
3503{ 3236{
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1420c49674d..1f175fa34b2 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -19,8 +19,7 @@
19#define __XFS_INODE_H__ 19#define __XFS_INODE_H__
20 20
21struct xfs_dinode; 21struct xfs_dinode;
22struct xfs_dinode_core; 22struct xfs_inode;
23
24 23
25/* 24/*
26 * Fork identifiers. 25 * Fork identifiers.
@@ -63,7 +62,7 @@ typedef struct xfs_ext_irec {
63typedef struct xfs_ifork { 62typedef struct xfs_ifork {
64 int if_bytes; /* bytes in if_u1 */ 63 int if_bytes; /* bytes in if_u1 */
65 int if_real_bytes; /* bytes allocated in if_u1 */ 64 int if_real_bytes; /* bytes allocated in if_u1 */
66 xfs_bmbt_block_t *if_broot; /* file's incore btree root */ 65 struct xfs_btree_block *if_broot; /* file's incore btree root */
67 short if_broot_bytes; /* bytes allocated for root */ 66 short if_broot_bytes; /* bytes allocated for root */
68 unsigned char if_flags; /* per-fork flags */ 67 unsigned char if_flags; /* per-fork flags */
69 unsigned char if_ext_max; /* max # of extent records */ 68 unsigned char if_ext_max; /* max # of extent records */
@@ -84,52 +83,14 @@ typedef struct xfs_ifork {
84} xfs_ifork_t; 83} xfs_ifork_t;
85 84
86/* 85/*
87 * Flags for xfs_ichgtime(). 86 * Inode location information. Stored in the inode and passed to
87 * xfs_imap_to_bp() to get a buffer and dinode for a given inode.
88 */ 88 */
89#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ 89struct xfs_imap {
90#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ 90 xfs_daddr_t im_blkno; /* starting BB of inode chunk */
91 91 ushort im_len; /* length in BBs of inode chunk */
92/* 92 ushort im_boffset; /* inode offset in block in bytes */
93 * Per-fork incore inode flags. 93};
94 */
95#define XFS_IFINLINE 0x01 /* Inline data is read in */
96#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
97#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
98#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
99
100/*
101 * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate().
102 */
103#define XFS_IMAP_LOOKUP 0x1
104#define XFS_IMAP_BULKSTAT 0x2
105
106#ifdef __KERNEL__
107struct bhv_desc;
108struct cred;
109struct ktrace;
110struct xfs_buf;
111struct xfs_bmap_free;
112struct xfs_bmbt_irec;
113struct xfs_bmbt_block;
114struct xfs_inode;
115struct xfs_inode_log_item;
116struct xfs_mount;
117struct xfs_trans;
118struct xfs_dquot;
119
120#if defined(XFS_ILOCK_TRACE)
121#define XFS_ILOCK_KTRACE_SIZE 32
122extern ktrace_t *xfs_ilock_trace_buf;
123extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
124#else
125#define xfs_ilock_trace(i,n,f,ra)
126#endif
127
128typedef struct dm_attrs_s {
129 __uint32_t da_dmevmask; /* DMIG event mask */
130 __uint16_t da_dmstate; /* DMIG state info */
131 __uint16_t da_pad; /* DMIG extra padding */
132} dm_attrs_t;
133 94
134/* 95/*
135 * This is the xfs in-core inode structure. 96 * This is the xfs in-core inode structure.
@@ -160,7 +121,7 @@ typedef struct xfs_ictimestamp {
160} xfs_ictimestamp_t; 121} xfs_ictimestamp_t;
161 122
162/* 123/*
163 * NOTE: This structure must be kept identical to struct xfs_dinode_core 124 * NOTE: This structure must be kept identical to struct xfs_dinode
164 * in xfs_dinode.h except for the endianess annotations. 125 * in xfs_dinode.h except for the endianess annotations.
165 */ 126 */
166typedef struct xfs_icdinode { 127typedef struct xfs_icdinode {
@@ -191,27 +152,97 @@ typedef struct xfs_icdinode {
191 __uint32_t di_gen; /* generation number */ 152 __uint32_t di_gen; /* generation number */
192} xfs_icdinode_t; 153} xfs_icdinode_t;
193 154
194typedef struct { 155/*
195 struct xfs_inode *ip_mnext; /* next inode in mount list */ 156 * Flags for xfs_ichgtime().
196 struct xfs_inode *ip_mprev; /* ptr to prev inode */ 157 */
197 struct xfs_mount *ip_mount; /* fs mount struct ptr */ 158#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
198} xfs_iptr_t; 159#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
160
161/*
162 * Per-fork incore inode flags.
163 */
164#define XFS_IFINLINE 0x01 /* Inline data is read in */
165#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
166#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
167#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
168
169/*
170 * Fork handling.
171 */
172
173#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0)
174#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3))
175
176#define XFS_IFORK_PTR(ip,w) \
177 ((w) == XFS_DATA_FORK ? \
178 &(ip)->i_df : \
179 (ip)->i_afp)
180#define XFS_IFORK_DSIZE(ip) \
181 (XFS_IFORK_Q(ip) ? \
182 XFS_IFORK_BOFF(ip) : \
183 XFS_LITINO((ip)->i_mount))
184#define XFS_IFORK_ASIZE(ip) \
185 (XFS_IFORK_Q(ip) ? \
186 XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
187 0)
188#define XFS_IFORK_SIZE(ip,w) \
189 ((w) == XFS_DATA_FORK ? \
190 XFS_IFORK_DSIZE(ip) : \
191 XFS_IFORK_ASIZE(ip))
192#define XFS_IFORK_FORMAT(ip,w) \
193 ((w) == XFS_DATA_FORK ? \
194 (ip)->i_d.di_format : \
195 (ip)->i_d.di_aformat)
196#define XFS_IFORK_FMT_SET(ip,w,n) \
197 ((w) == XFS_DATA_FORK ? \
198 ((ip)->i_d.di_format = (n)) : \
199 ((ip)->i_d.di_aformat = (n)))
200#define XFS_IFORK_NEXTENTS(ip,w) \
201 ((w) == XFS_DATA_FORK ? \
202 (ip)->i_d.di_nextents : \
203 (ip)->i_d.di_anextents)
204#define XFS_IFORK_NEXT_SET(ip,w,n) \
205 ((w) == XFS_DATA_FORK ? \
206 ((ip)->i_d.di_nextents = (n)) : \
207 ((ip)->i_d.di_anextents = (n)))
208
209
210
211#ifdef __KERNEL__
212
213struct bhv_desc;
214struct cred;
215struct ktrace;
216struct xfs_buf;
217struct xfs_bmap_free;
218struct xfs_bmbt_irec;
219struct xfs_inode_log_item;
220struct xfs_mount;
221struct xfs_trans;
222struct xfs_dquot;
223
224#if defined(XFS_ILOCK_TRACE)
225#define XFS_ILOCK_KTRACE_SIZE 32
226extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
227#else
228#define xfs_ilock_trace(i,n,f,ra)
229#endif
230
231typedef struct dm_attrs_s {
232 __uint32_t da_dmevmask; /* DMIG event mask */
233 __uint16_t da_dmstate; /* DMIG state info */
234 __uint16_t da_pad; /* DMIG extra padding */
235} dm_attrs_t;
199 236
200typedef struct xfs_inode { 237typedef struct xfs_inode {
201 /* Inode linking and identification information. */ 238 /* Inode linking and identification information. */
202 struct xfs_inode *i_mnext; /* next inode in mount list */
203 struct xfs_inode *i_mprev; /* ptr to prev inode */
204 struct xfs_mount *i_mount; /* fs mount struct ptr */ 239 struct xfs_mount *i_mount; /* fs mount struct ptr */
205 struct list_head i_reclaim; /* reclaim list */
206 struct inode *i_vnode; /* vnode backpointer */
207 struct xfs_dquot *i_udquot; /* user dquot */ 240 struct xfs_dquot *i_udquot; /* user dquot */
208 struct xfs_dquot *i_gdquot; /* group dquot */ 241 struct xfs_dquot *i_gdquot; /* group dquot */
209 242
210 /* Inode location stuff */ 243 /* Inode location stuff */
211 xfs_ino_t i_ino; /* inode number (agno/agino)*/ 244 xfs_ino_t i_ino; /* inode number (agno/agino)*/
212 xfs_daddr_t i_blkno; /* blkno of inode buffer */ 245 struct xfs_imap i_imap; /* location for xfs_imap() */
213 ushort i_len; /* len of inode buffer */
214 ushort i_boffset; /* off of inode in buffer */
215 246
216 /* Extent information. */ 247 /* Extent information. */
217 xfs_ifork_t *i_afp; /* attribute fork pointer */ 248 xfs_ifork_t *i_afp; /* attribute fork pointer */
@@ -230,7 +261,6 @@ typedef struct xfs_inode {
230 unsigned short i_flags; /* see defined flags below */ 261 unsigned short i_flags; /* see defined flags below */
231 unsigned char i_update_core; /* timestamps/size is dirty */ 262 unsigned char i_update_core; /* timestamps/size is dirty */
232 unsigned char i_update_size; /* di_size field is dirty */ 263 unsigned char i_update_size; /* di_size field is dirty */
233 unsigned int i_gen; /* generation count */
234 unsigned int i_delayed_blks; /* count of delay alloc blks */ 264 unsigned int i_delayed_blks; /* count of delay alloc blks */
235 265
236 xfs_icdinode_t i_d; /* most of ondisk inode */ 266 xfs_icdinode_t i_d; /* most of ondisk inode */
@@ -238,6 +268,10 @@ typedef struct xfs_inode {
238 xfs_fsize_t i_size; /* in-memory size */ 268 xfs_fsize_t i_size; /* in-memory size */
239 xfs_fsize_t i_new_size; /* size when write completes */ 269 xfs_fsize_t i_new_size; /* size when write completes */
240 atomic_t i_iocount; /* outstanding I/O count */ 270 atomic_t i_iocount; /* outstanding I/O count */
271
272 /* VFS inode */
273 struct inode i_vnode; /* embedded VFS inode */
274
241 /* Trace buffers per inode. */ 275 /* Trace buffers per inode. */
242#ifdef XFS_INODE_TRACE 276#ifdef XFS_INODE_TRACE
243 struct ktrace *i_trace; /* general inode trace */ 277 struct ktrace *i_trace; /* general inode trace */
@@ -245,7 +279,7 @@ typedef struct xfs_inode {
245#ifdef XFS_BMAP_TRACE 279#ifdef XFS_BMAP_TRACE
246 struct ktrace *i_xtrace; /* inode extent list trace */ 280 struct ktrace *i_xtrace; /* inode extent list trace */
247#endif 281#endif
248#ifdef XFS_BMBT_TRACE 282#ifdef XFS_BTREE_TRACE
249 struct ktrace *i_btrace; /* inode bmap btree trace */ 283 struct ktrace *i_btrace; /* inode bmap btree trace */
250#endif 284#endif
251#ifdef XFS_RW_TRACE 285#ifdef XFS_RW_TRACE
@@ -265,13 +299,30 @@ typedef struct xfs_inode {
265/* Convert from vfs inode to xfs inode */ 299/* Convert from vfs inode to xfs inode */
266static inline struct xfs_inode *XFS_I(struct inode *inode) 300static inline struct xfs_inode *XFS_I(struct inode *inode)
267{ 301{
268 return (struct xfs_inode *)inode->i_private; 302 return container_of(inode, struct xfs_inode, i_vnode);
269} 303}
270 304
271/* convert from xfs inode to vfs inode */ 305/* convert from xfs inode to vfs inode */
272static inline struct inode *VFS_I(struct xfs_inode *ip) 306static inline struct inode *VFS_I(struct xfs_inode *ip)
273{ 307{
274 return (struct inode *)ip->i_vnode; 308 return &ip->i_vnode;
309}
310
311/*
312 * Get rid of a partially initialized inode.
313 *
314 * We have to go through destroy_inode to make sure allocations
315 * from init_inode_always like the security data are undone.
316 *
317 * We mark the inode bad so that it takes the short cut in
318 * the reclaim path instead of going through the flush path
319 * which doesn't make sense for an inode that has never seen the
320 * light of day.
321 */
322static inline void xfs_destroy_inode(struct xfs_inode *ip)
323{
324 make_bad_inode(VFS_I(ip));
325 return destroy_inode(VFS_I(ip));
275} 326}
276 327
277/* 328/*
@@ -327,65 +378,36 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
327 spin_unlock(&ip->i_flags_lock); 378 spin_unlock(&ip->i_flags_lock);
328 return ret; 379 return ret;
329} 380}
330#endif /* __KERNEL__ */
331
332 381
333/* 382/*
334 * Fork handling. 383 * Manage the i_flush queue embedded in the inode. This completion
384 * queue synchronizes processes attempting to flush the in-core
385 * inode back to disk.
335 */ 386 */
387static inline void xfs_iflock(xfs_inode_t *ip)
388{
389 wait_for_completion(&ip->i_flush);
390}
336 391
337#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) 392static inline int xfs_iflock_nowait(xfs_inode_t *ip)
338#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) 393{
339 394 return try_wait_for_completion(&ip->i_flush);
340#define XFS_IFORK_PTR(ip,w) \ 395}
341 ((w) == XFS_DATA_FORK ? \
342 &(ip)->i_df : \
343 (ip)->i_afp)
344#define XFS_IFORK_DSIZE(ip) \
345 (XFS_IFORK_Q(ip) ? \
346 XFS_IFORK_BOFF(ip) : \
347 XFS_LITINO((ip)->i_mount))
348#define XFS_IFORK_ASIZE(ip) \
349 (XFS_IFORK_Q(ip) ? \
350 XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
351 0)
352#define XFS_IFORK_SIZE(ip,w) \
353 ((w) == XFS_DATA_FORK ? \
354 XFS_IFORK_DSIZE(ip) : \
355 XFS_IFORK_ASIZE(ip))
356#define XFS_IFORK_FORMAT(ip,w) \
357 ((w) == XFS_DATA_FORK ? \
358 (ip)->i_d.di_format : \
359 (ip)->i_d.di_aformat)
360#define XFS_IFORK_FMT_SET(ip,w,n) \
361 ((w) == XFS_DATA_FORK ? \
362 ((ip)->i_d.di_format = (n)) : \
363 ((ip)->i_d.di_aformat = (n)))
364#define XFS_IFORK_NEXTENTS(ip,w) \
365 ((w) == XFS_DATA_FORK ? \
366 (ip)->i_d.di_nextents : \
367 (ip)->i_d.di_anextents)
368#define XFS_IFORK_NEXT_SET(ip,w,n) \
369 ((w) == XFS_DATA_FORK ? \
370 ((ip)->i_d.di_nextents = (n)) : \
371 ((ip)->i_d.di_anextents = (n)))
372 396
373#ifdef __KERNEL__ 397static inline void xfs_ifunlock(xfs_inode_t *ip)
398{
399 complete(&ip->i_flush);
400}
374 401
375/* 402/*
376 * In-core inode flags. 403 * In-core inode flags.
377 */ 404 */
378#define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */ 405#define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */
379#define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ 406#define XFS_ISTALE 0x0002 /* inode has been staled */
380#define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ 407#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
381#define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ 408#define XFS_INEW 0x0008 /* inode has just been allocated */
382#define XFS_ISTALE 0x0010 /* inode has been staled */ 409#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
383#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ 410#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
384#define XFS_INEW 0x0040
385#define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */
386#define XFS_IMODIFIED 0x0100 /* XFS inode state possibly differs */
387 /* to the Linux inode state. */
388#define XFS_ITRUNCATED 0x0200 /* truncated down so flush-on-close */
389 411
390/* 412/*
391 * Flags for inode locking. 413 * Flags for inode locking.
@@ -460,16 +482,8 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
460 ((pip)->i_d.di_mode & S_ISGID)) 482 ((pip)->i_d.di_mode & S_ISGID))
461 483
462/* 484/*
463 * Flags for xfs_iget()
464 */
465#define XFS_IGET_CREATE 0x1
466#define XFS_IGET_BULKSTAT 0x2
467
468/*
469 * xfs_iget.c prototypes. 485 * xfs_iget.c prototypes.
470 */ 486 */
471void xfs_ihash_init(struct xfs_mount *);
472void xfs_ihash_free(struct xfs_mount *);
473xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, 487xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
474 struct xfs_trans *); 488 struct xfs_trans *);
475int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 489int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
@@ -484,25 +498,13 @@ int xfs_isilocked(xfs_inode_t *, uint);
484uint xfs_ilock_map_shared(xfs_inode_t *); 498uint xfs_ilock_map_shared(xfs_inode_t *);
485void xfs_iunlock_map_shared(xfs_inode_t *, uint); 499void xfs_iunlock_map_shared(xfs_inode_t *, uint);
486void xfs_ireclaim(xfs_inode_t *); 500void xfs_ireclaim(xfs_inode_t *);
487int xfs_finish_reclaim(xfs_inode_t *, int, int);
488int xfs_finish_reclaim_all(struct xfs_mount *, int);
489 501
490/* 502/*
491 * xfs_inode.c prototypes. 503 * xfs_inode.c prototypes.
492 */ 504 */
493int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
494 xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
495 xfs_daddr_t, uint, uint);
496int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
497 xfs_inode_t **, xfs_daddr_t, uint);
498int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
499int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, 505int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
500 xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, 506 xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t,
501 int, struct xfs_buf **, boolean_t *, xfs_inode_t **); 507 int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
502void xfs_dinode_from_disk(struct xfs_icdinode *,
503 struct xfs_dinode_core *);
504void xfs_dinode_to_disk(struct xfs_dinode_core *,
505 struct xfs_icdinode *);
506 508
507uint xfs_ip2xflags(struct xfs_inode *); 509uint xfs_ip2xflags(struct xfs_inode *);
508uint xfs_dic2xflags(struct xfs_dinode *); 510uint xfs_dic2xflags(struct xfs_dinode *);
@@ -513,17 +515,10 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
513 xfs_fsize_t, int, int); 515 xfs_fsize_t, int, int);
514int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 516int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
515 517
516void xfs_idestroy_fork(xfs_inode_t *, int);
517void xfs_idestroy(xfs_inode_t *);
518void xfs_idata_realloc(xfs_inode_t *, int, int);
519void xfs_iextract(xfs_inode_t *);
520void xfs_iext_realloc(xfs_inode_t *, int, int); 518void xfs_iext_realloc(xfs_inode_t *, int, int);
521void xfs_iroot_realloc(xfs_inode_t *, int, int);
522void xfs_ipin(xfs_inode_t *); 519void xfs_ipin(xfs_inode_t *);
523void xfs_iunpin(xfs_inode_t *); 520void xfs_iunpin(xfs_inode_t *);
524int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
525int xfs_iflush(xfs_inode_t *, uint); 521int xfs_iflush(xfs_inode_t *, uint);
526void xfs_iflush_all(struct xfs_mount *);
527void xfs_ichgtime(xfs_inode_t *, int); 522void xfs_ichgtime(xfs_inode_t *, int);
528xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 523xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
529void xfs_lock_inodes(xfs_inode_t **, int, uint); 524void xfs_lock_inodes(xfs_inode_t **, int, uint);
@@ -532,6 +527,77 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
532void xfs_synchronize_atime(xfs_inode_t *); 527void xfs_synchronize_atime(xfs_inode_t *);
533void xfs_mark_inode_dirty_sync(xfs_inode_t *); 528void xfs_mark_inode_dirty_sync(xfs_inode_t *);
534 529
530#if defined(XFS_INODE_TRACE)
531
532#define INODE_TRACE_SIZE 16 /* number of trace entries */
533#define INODE_KTRACE_ENTRY 1
534#define INODE_KTRACE_EXIT 2
535#define INODE_KTRACE_HOLD 3
536#define INODE_KTRACE_REF 4
537#define INODE_KTRACE_RELE 5
538
539extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *);
540extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *);
541extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
542extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
543extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
544#define xfs_itrace_entry(ip) \
545 _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
546#define xfs_itrace_exit(ip) \
547 _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
548#define xfs_itrace_exit_tag(ip, tag) \
549 _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
550#define xfs_itrace_ref(ip) \
551 _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address)
552
553#else
554#define xfs_itrace_entry(a)
555#define xfs_itrace_exit(a)
556#define xfs_itrace_exit_tag(a, b)
557#define xfs_itrace_hold(a, b, c, d)
558#define xfs_itrace_ref(a)
559#define xfs_itrace_rele(a, b, c, d)
560#endif
561
562#define IHOLD(ip) \
563do { \
564 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
565 atomic_inc(&(VFS_I(ip)->i_count)); \
566 xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
567} while (0)
568
569#define IRELE(ip) \
570do { \
571 xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
572 iput(VFS_I(ip)); \
573} while (0)
574
575#endif /* __KERNEL__ */
576
577/*
578 * Flags for xfs_iget()
579 */
580#define XFS_IGET_CREATE 0x1
581#define XFS_IGET_BULKSTAT 0x2
582
583int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
584 xfs_ino_t, struct xfs_dinode **,
585 struct xfs_buf **, int *, uint);
586int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
587 struct xfs_inode *, struct xfs_dinode **,
588 struct xfs_buf **, uint);
589int xfs_iread(struct xfs_mount *, struct xfs_trans *,
590 struct xfs_inode *, xfs_daddr_t, uint);
591void xfs_dinode_from_disk(struct xfs_icdinode *,
592 struct xfs_dinode *);
593void xfs_dinode_to_disk(struct xfs_dinode *,
594 struct xfs_icdinode *);
595void xfs_idestroy_fork(struct xfs_inode *, int);
596void xfs_idata_realloc(struct xfs_inode *, int, int);
597void xfs_iroot_realloc(struct xfs_inode *, int, int);
598int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
599int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int);
600
535xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); 601xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
536void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, 602void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
537 xfs_bmbt_irec_t *); 603 xfs_bmbt_irec_t *);
@@ -561,7 +627,8 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
561#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 627#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
562 628
563#ifdef DEBUG 629#ifdef DEBUG
564void xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t); 630void xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
631 xfs_fsize_t);
565#else /* DEBUG */ 632#else /* DEBUG */
566#define xfs_isize_check(mp, ip, isize) 633#define xfs_isize_check(mp, ip, isize)
567#endif /* DEBUG */ 634#endif /* DEBUG */
@@ -576,26 +643,4 @@ extern struct kmem_zone *xfs_ifork_zone;
576extern struct kmem_zone *xfs_inode_zone; 643extern struct kmem_zone *xfs_inode_zone;
577extern struct kmem_zone *xfs_ili_zone; 644extern struct kmem_zone *xfs_ili_zone;
578 645
579/*
580 * Manage the i_flush queue embedded in the inode. This completion
581 * queue synchronizes processes attempting to flush the in-core
582 * inode back to disk.
583 */
584static inline void xfs_iflock(xfs_inode_t *ip)
585{
586 wait_for_completion(&ip->i_flush);
587}
588
589static inline int xfs_iflock_nowait(xfs_inode_t *ip)
590{
591 return try_wait_for_completion(&ip->i_flush);
592}
593
594static inline void xfs_ifunlock(xfs_inode_t *ip)
595{
596 complete(&ip->i_flush);
597}
598
599#endif /* __KERNEL__ */
600
601#endif /* __XFS_INODE_H__ */ 646#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 97c7452e262..977c4aec587 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -281,7 +281,7 @@ xfs_inode_item_format(
281 xfs_mark_inode_dirty_sync(ip); 281 xfs_mark_inode_dirty_sync(ip);
282 282
283 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 283 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
284 vecp->i_len = sizeof(xfs_dinode_core_t); 284 vecp->i_len = sizeof(struct xfs_icdinode);
285 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); 285 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
286 vecp++; 286 vecp++;
287 nvecs++; 287 nvecs++;
@@ -296,9 +296,8 @@ xfs_inode_item_format(
296 * has a new version number, then we don't bother converting back. 296 * has a new version number, then we don't bother converting back.
297 */ 297 */
298 mp = ip->i_mount; 298 mp = ip->i_mount;
299 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 299 ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
300 xfs_sb_version_hasnlink(&mp->m_sb)); 300 if (ip->i_d.di_version == 1) {
301 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
302 if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 301 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
303 /* 302 /*
304 * Convert it back. 303 * Convert it back.
@@ -311,7 +310,7 @@ xfs_inode_item_format(
311 * so just make the conversion to the new inode 310 * so just make the conversion to the new inode
312 * format permanent. 311 * format permanent.
313 */ 312 */
314 ip->i_d.di_version = XFS_DINODE_VERSION_2; 313 ip->i_d.di_version = 2;
315 ip->i_d.di_onlink = 0; 314 ip->i_d.di_onlink = 0;
316 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 315 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
317 } 316 }
@@ -932,6 +931,7 @@ xfs_inode_item_init(
932 iip->ili_item.li_type = XFS_LI_INODE; 931 iip->ili_item.li_type = XFS_LI_INODE;
933 iip->ili_item.li_ops = &xfs_inode_item_ops; 932 iip->ili_item.li_ops = &xfs_inode_item_ops;
934 iip->ili_item.li_mountp = mp; 933 iip->ili_item.li_mountp = mp;
934 iip->ili_item.li_ailp = mp->m_ail;
935 iip->ili_inode = ip; 935 iip->ili_inode = ip;
936 936
937 /* 937 /*
@@ -942,9 +942,9 @@ xfs_inode_item_init(
942 942
943 iip->ili_format.ilf_type = XFS_LI_INODE; 943 iip->ili_format.ilf_type = XFS_LI_INODE;
944 iip->ili_format.ilf_ino = ip->i_ino; 944 iip->ili_format.ilf_ino = ip->i_ino;
945 iip->ili_format.ilf_blkno = ip->i_blkno; 945 iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;
946 iip->ili_format.ilf_len = ip->i_len; 946 iip->ili_format.ilf_len = ip->i_imap.im_len;
947 iip->ili_format.ilf_boffset = ip->i_boffset; 947 iip->ili_format.ilf_boffset = ip->i_imap.im_boffset;
948} 948}
949 949
950/* 950/*
@@ -976,9 +976,8 @@ xfs_iflush_done(
976 xfs_buf_t *bp, 976 xfs_buf_t *bp,
977 xfs_inode_log_item_t *iip) 977 xfs_inode_log_item_t *iip)
978{ 978{
979 xfs_inode_t *ip; 979 xfs_inode_t *ip = iip->ili_inode;
980 980 struct xfs_ail *ailp = iip->ili_item.li_ailp;
981 ip = iip->ili_inode;
982 981
983 /* 982 /*
984 * We only want to pull the item from the AIL if it is 983 * We only want to pull the item from the AIL if it is
@@ -991,15 +990,12 @@ xfs_iflush_done(
991 */ 990 */
992 if (iip->ili_logged && 991 if (iip->ili_logged &&
993 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { 992 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) {
994 spin_lock(&ip->i_mount->m_ail_lock); 993 spin_lock(&ailp->xa_lock);
995 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { 994 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) {
996 /* 995 /* xfs_trans_ail_delete() drops the AIL lock. */
997 * xfs_trans_delete_ail() drops the AIL lock. 996 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip);
998 */
999 xfs_trans_delete_ail(ip->i_mount,
1000 (xfs_log_item_t*)iip);
1001 } else { 997 } else {
1002 spin_unlock(&ip->i_mount->m_ail_lock); 998 spin_unlock(&ailp->xa_lock);
1003 } 999 }
1004 } 1000 }
1005 1001
@@ -1031,21 +1027,20 @@ void
1031xfs_iflush_abort( 1027xfs_iflush_abort(
1032 xfs_inode_t *ip) 1028 xfs_inode_t *ip)
1033{ 1029{
1034 xfs_inode_log_item_t *iip; 1030 xfs_inode_log_item_t *iip = ip->i_itemp;
1035 xfs_mount_t *mp; 1031 xfs_mount_t *mp;
1036 1032
1037 iip = ip->i_itemp; 1033 iip = ip->i_itemp;
1038 mp = ip->i_mount; 1034 mp = ip->i_mount;
1039 if (iip) { 1035 if (iip) {
1036 struct xfs_ail *ailp = iip->ili_item.li_ailp;
1040 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 1037 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
1041 spin_lock(&mp->m_ail_lock); 1038 spin_lock(&ailp->xa_lock);
1042 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 1039 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
1043 /* 1040 /* xfs_trans_ail_delete() drops the AIL lock. */
1044 * xfs_trans_delete_ail() drops the AIL lock. 1041 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip);
1045 */
1046 xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip);
1047 } else 1042 } else
1048 spin_unlock(&mp->m_ail_lock); 1043 spin_unlock(&ailp->xa_lock);
1049 } 1044 }
1050 iip->ili_logged = 0; 1045 iip->ili_logged = 0;
1051 /* 1046 /*
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 40513077ab3..1ff04cc323a 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -112,6 +112,24 @@ typedef struct xfs_inode_log_format_64 {
112#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED) 112#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED)
113 113
114 114
115#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w)
116static inline int xfs_ilog_fbroot(int w)
117{
118 return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
119}
120
121#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w)
122static inline int xfs_ilog_fext(int w)
123{
124 return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
125}
126
127#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w)
128static inline int xfs_ilog_fdata(int w)
129{
130 return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
131}
132
115#ifdef __KERNEL__ 133#ifdef __KERNEL__
116 134
117struct xfs_buf; 135struct xfs_buf;
@@ -148,26 +166,6 @@ typedef struct xfs_inode_log_item {
148} xfs_inode_log_item_t; 166} xfs_inode_log_item_t;
149 167
150 168
151#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w)
152static inline int xfs_ilog_fdata(int w)
153{
154 return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
155}
156
157#endif /* __KERNEL__ */
158
159#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w)
160static inline int xfs_ilog_fbroot(int w)
161{
162 return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
163}
164
165#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w)
166static inline int xfs_ilog_fext(int w)
167{
168 return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
169}
170
171static inline int xfs_inode_clean(xfs_inode_t *ip) 169static inline int xfs_inode_clean(xfs_inode_t *ip)
172{ 170{
173 return (!ip->i_itemp || 171 return (!ip->i_itemp ||
@@ -175,9 +173,6 @@ static inline int xfs_inode_clean(xfs_inode_t *ip)
175 !ip->i_update_core; 173 !ip->i_update_core;
176} 174}
177 175
178
179#ifdef __KERNEL__
180
181extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); 176extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
182extern void xfs_inode_item_destroy(struct xfs_inode *); 177extern void xfs_inode_item_destroy(struct xfs_inode *);
183extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); 178extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 67f22b2b44b..911062cf73a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -290,7 +290,6 @@ STATIC int
290xfs_iomap_eof_align_last_fsb( 290xfs_iomap_eof_align_last_fsb(
291 xfs_mount_t *mp, 291 xfs_mount_t *mp,
292 xfs_inode_t *ip, 292 xfs_inode_t *ip,
293 xfs_fsize_t isize,
294 xfs_extlen_t extsize, 293 xfs_extlen_t extsize,
295 xfs_fileoff_t *last_fsb) 294 xfs_fileoff_t *last_fsb)
296{ 295{
@@ -306,14 +305,14 @@ xfs_iomap_eof_align_last_fsb(
306 * stripe width and we are allocating past the allocation eof. 305 * stripe width and we are allocating past the allocation eof.
307 */ 306 */
308 else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) && 307 else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
309 (isize >= XFS_FSB_TO_B(mp, mp->m_swidth))) 308 (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth)))
310 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth); 309 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
311 /* 310 /*
312 * Roundup the allocation request to a stripe unit (m_dalign) boundary 311 * Roundup the allocation request to a stripe unit (m_dalign) boundary
313 * if the file size is >= stripe unit size, and we are allocating past 312 * if the file size is >= stripe unit size, and we are allocating past
314 * the allocation eof. 313 * the allocation eof.
315 */ 314 */
316 else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign))) 315 else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign)))
317 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign); 316 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
318 317
319 /* 318 /*
@@ -403,7 +402,6 @@ xfs_iomap_write_direct(
403 xfs_filblks_t count_fsb, resaligned; 402 xfs_filblks_t count_fsb, resaligned;
404 xfs_fsblock_t firstfsb; 403 xfs_fsblock_t firstfsb;
405 xfs_extlen_t extsz, temp; 404 xfs_extlen_t extsz, temp;
406 xfs_fsize_t isize;
407 int nimaps; 405 int nimaps;
408 int bmapi_flag; 406 int bmapi_flag;
409 int quota_flag; 407 int quota_flag;
@@ -426,15 +424,10 @@ xfs_iomap_write_direct(
426 rt = XFS_IS_REALTIME_INODE(ip); 424 rt = XFS_IS_REALTIME_INODE(ip);
427 extsz = xfs_get_extsz_hint(ip); 425 extsz = xfs_get_extsz_hint(ip);
428 426
429 isize = ip->i_size;
430 if (ip->i_new_size > isize)
431 isize = ip->i_new_size;
432
433 offset_fsb = XFS_B_TO_FSBT(mp, offset); 427 offset_fsb = XFS_B_TO_FSBT(mp, offset);
434 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 428 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
435 if ((offset + count) > isize) { 429 if ((offset + count) > ip->i_size) {
436 error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, 430 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
437 &last_fsb);
438 if (error) 431 if (error)
439 goto error_out; 432 goto error_out;
440 } else { 433 } else {
@@ -559,7 +552,6 @@ STATIC int
559xfs_iomap_eof_want_preallocate( 552xfs_iomap_eof_want_preallocate(
560 xfs_mount_t *mp, 553 xfs_mount_t *mp,
561 xfs_inode_t *ip, 554 xfs_inode_t *ip,
562 xfs_fsize_t isize,
563 xfs_off_t offset, 555 xfs_off_t offset,
564 size_t count, 556 size_t count,
565 int ioflag, 557 int ioflag,
@@ -573,7 +565,7 @@ xfs_iomap_eof_want_preallocate(
573 int n, error, imaps; 565 int n, error, imaps;
574 566
575 *prealloc = 0; 567 *prealloc = 0;
576 if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize) 568 if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size)
577 return 0; 569 return 0;
578 570
579 /* 571 /*
@@ -617,7 +609,6 @@ xfs_iomap_write_delay(
617 xfs_fileoff_t ioalign; 609 xfs_fileoff_t ioalign;
618 xfs_fsblock_t firstblock; 610 xfs_fsblock_t firstblock;
619 xfs_extlen_t extsz; 611 xfs_extlen_t extsz;
620 xfs_fsize_t isize;
621 int nimaps; 612 int nimaps;
622 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 613 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
623 int prealloc, fsynced = 0; 614 int prealloc, fsynced = 0;
@@ -637,11 +628,7 @@ xfs_iomap_write_delay(
637 offset_fsb = XFS_B_TO_FSBT(mp, offset); 628 offset_fsb = XFS_B_TO_FSBT(mp, offset);
638 629
639retry: 630retry:
640 isize = ip->i_size; 631 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
641 if (ip->i_new_size > isize)
642 isize = ip->i_new_size;
643
644 error = xfs_iomap_eof_want_preallocate(mp, ip, isize, offset, count,
645 ioflag, imap, XFS_WRITE_IMAPS, &prealloc); 632 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
646 if (error) 633 if (error)
647 return error; 634 return error;
@@ -655,8 +642,7 @@ retry:
655 } 642 }
656 643
657 if (prealloc || extsz) { 644 if (prealloc || extsz) {
658 error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, 645 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
659 &last_fsb);
660 if (error) 646 if (error)
661 return error; 647 return error;
662 } 648 }
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index cf6754a3c5b..e19d0a8d561 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -69,7 +69,7 @@ xfs_bulkstat_one_iget(
69 } 69 }
70 70
71 ASSERT(ip != NULL); 71 ASSERT(ip != NULL);
72 ASSERT(ip->i_blkno != (xfs_daddr_t)0); 72 ASSERT(ip->i_imap.im_blkno != 0);
73 73
74 dic = &ip->i_d; 74 dic = &ip->i_d;
75 75
@@ -125,13 +125,9 @@ STATIC void
125xfs_bulkstat_one_dinode( 125xfs_bulkstat_one_dinode(
126 xfs_mount_t *mp, /* mount point for filesystem */ 126 xfs_mount_t *mp, /* mount point for filesystem */
127 xfs_ino_t ino, /* inode number to get data for */ 127 xfs_ino_t ino, /* inode number to get data for */
128 xfs_dinode_t *dip, /* dinode inode pointer */ 128 xfs_dinode_t *dic, /* dinode inode pointer */
129 xfs_bstat_t *buf) /* return buffer */ 129 xfs_bstat_t *buf) /* return buffer */
130{ 130{
131 xfs_dinode_core_t *dic; /* dinode core info pointer */
132
133 dic = &dip->di_core;
134
135 /* 131 /*
136 * The inode format changed when we moved the link count and 132 * The inode format changed when we moved the link count and
137 * made it 32 bits long. If this is an old format inode, 133 * made it 32 bits long. If this is an old format inode,
@@ -143,7 +139,7 @@ xfs_bulkstat_one_dinode(
143 * the new format. We don't change the version number so that we 139 * the new format. We don't change the version number so that we
144 * can distinguish this from a real new format inode. 140 * can distinguish this from a real new format inode.
145 */ 141 */
146 if (dic->di_version == XFS_DINODE_VERSION_1) { 142 if (dic->di_version == 1) {
147 buf->bs_nlink = be16_to_cpu(dic->di_onlink); 143 buf->bs_nlink = be16_to_cpu(dic->di_onlink);
148 buf->bs_projid = 0; 144 buf->bs_projid = 0;
149 } else { 145 } else {
@@ -162,7 +158,7 @@ xfs_bulkstat_one_dinode(
162 buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); 158 buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
163 buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); 159 buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
164 buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); 160 buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
165 buf->bs_xflags = xfs_dic2xflags(dip); 161 buf->bs_xflags = xfs_dic2xflags(dic);
166 buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; 162 buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
167 buf->bs_extents = be32_to_cpu(dic->di_nextents); 163 buf->bs_extents = be32_to_cpu(dic->di_nextents);
168 buf->bs_gen = be32_to_cpu(dic->di_gen); 164 buf->bs_gen = be32_to_cpu(dic->di_gen);
@@ -173,7 +169,7 @@ xfs_bulkstat_one_dinode(
173 169
174 switch (dic->di_format) { 170 switch (dic->di_format) {
175 case XFS_DINODE_FMT_DEV: 171 case XFS_DINODE_FMT_DEV:
176 buf->bs_rdev = be32_to_cpu(dip->di_u.di_dev); 172 buf->bs_rdev = xfs_dinode_get_rdev(dic);
177 buf->bs_blksize = BLKDEV_IOSIZE; 173 buf->bs_blksize = BLKDEV_IOSIZE;
178 buf->bs_blocks = 0; 174 buf->bs_blocks = 0;
179 break; 175 break;
@@ -192,27 +188,34 @@ xfs_bulkstat_one_dinode(
192 } 188 }
193} 189}
194 190
191/* Return 0 on success or positive error */
195STATIC int 192STATIC int
196xfs_bulkstat_one_fmt( 193xfs_bulkstat_one_fmt(
197 void __user *ubuffer, 194 void __user *ubuffer,
195 int ubsize,
196 int *ubused,
198 const xfs_bstat_t *buffer) 197 const xfs_bstat_t *buffer)
199{ 198{
199 if (ubsize < sizeof(*buffer))
200 return XFS_ERROR(ENOMEM);
200 if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) 201 if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
201 return -EFAULT; 202 return XFS_ERROR(EFAULT);
202 return sizeof(*buffer); 203 if (ubused)
204 *ubused = sizeof(*buffer);
205 return 0;
203} 206}
204 207
205/* 208/*
206 * Return stat information for one inode. 209 * Return stat information for one inode.
207 * Return 0 if ok, else errno. 210 * Return 0 if ok, else errno.
208 */ 211 */
209int /* error status */ 212int /* error status */
210xfs_bulkstat_one( 213xfs_bulkstat_one_int(
211 xfs_mount_t *mp, /* mount point for filesystem */ 214 xfs_mount_t *mp, /* mount point for filesystem */
212 xfs_ino_t ino, /* inode number to get data for */ 215 xfs_ino_t ino, /* inode number to get data for */
213 void __user *buffer, /* buffer to place output in */ 216 void __user *buffer, /* buffer to place output in */
214 int ubsize, /* size of buffer */ 217 int ubsize, /* size of buffer */
215 void *private_data, /* my private data */ 218 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
216 xfs_daddr_t bno, /* starting bno of inode cluster */ 219 xfs_daddr_t bno, /* starting bno of inode cluster */
217 int *ubused, /* bytes used by me */ 220 int *ubused, /* bytes used by me */
218 void *dibuff, /* on-disk inode buffer */ 221 void *dibuff, /* on-disk inode buffer */
@@ -221,15 +224,12 @@ xfs_bulkstat_one(
221 xfs_bstat_t *buf; /* return buffer */ 224 xfs_bstat_t *buf; /* return buffer */
222 int error = 0; /* error value */ 225 int error = 0; /* error value */
223 xfs_dinode_t *dip; /* dinode inode pointer */ 226 xfs_dinode_t *dip; /* dinode inode pointer */
224 bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt;
225 227
226 dip = (xfs_dinode_t *)dibuff; 228 dip = (xfs_dinode_t *)dibuff;
227 *stat = BULKSTAT_RV_NOTHING; 229 *stat = BULKSTAT_RV_NOTHING;
228 230
229 if (!buffer || xfs_internal_inum(mp, ino)) 231 if (!buffer || xfs_internal_inum(mp, ino))
230 return XFS_ERROR(EINVAL); 232 return XFS_ERROR(EINVAL);
231 if (ubsize < sizeof(*buf))
232 return XFS_ERROR(ENOMEM);
233 233
234 buf = kmem_alloc(sizeof(*buf), KM_SLEEP); 234 buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
235 235
@@ -244,21 +244,34 @@ xfs_bulkstat_one(
244 xfs_bulkstat_one_dinode(mp, ino, dip, buf); 244 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
245 } 245 }
246 246
247 error = formatter(buffer, buf); 247 error = formatter(buffer, ubsize, ubused, buf);
248 if (error < 0) { 248 if (error)
249 error = EFAULT;
250 goto out_free; 249 goto out_free;
251 }
252 250
253 *stat = BULKSTAT_RV_DIDONE; 251 *stat = BULKSTAT_RV_DIDONE;
254 if (ubused)
255 *ubused = error;
256 252
257 out_free: 253 out_free:
258 kmem_free(buf); 254 kmem_free(buf);
259 return error; 255 return error;
260} 256}
261 257
258int
259xfs_bulkstat_one(
260 xfs_mount_t *mp, /* mount point for filesystem */
261 xfs_ino_t ino, /* inode number to get data for */
262 void __user *buffer, /* buffer to place output in */
263 int ubsize, /* size of buffer */
264 void *private_data, /* my private data */
265 xfs_daddr_t bno, /* starting bno of inode cluster */
266 int *ubused, /* bytes used by me */
267 void *dibuff, /* on-disk inode buffer */
268 int *stat) /* BULKSTAT_RV_... */
269{
270 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
271 xfs_bulkstat_one_fmt, bno,
272 ubused, dibuff, stat);
273}
274
262/* 275/*
263 * Test to see whether we can use the ondisk inode directly, based 276 * Test to see whether we can use the ondisk inode directly, based
264 * on the given bulkstat flags, filling in dipp accordingly. 277 * on the given bulkstat flags, filling in dipp accordingly.
@@ -287,19 +300,19 @@ xfs_bulkstat_use_dinode(
287 * to disk yet. This is a temporary hack that would require a proper 300 * to disk yet. This is a temporary hack that would require a proper
288 * fix in the future. 301 * fix in the future.
289 */ 302 */
290 if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC || 303 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
291 !XFS_DINODE_GOOD_VERSION(dip->di_core.di_version) || 304 !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
292 !dip->di_core.di_mode) 305 !dip->di_mode)
293 return 0; 306 return 0;
294 if (flags & BULKSTAT_FG_QUICK) { 307 if (flags & BULKSTAT_FG_QUICK) {
295 *dipp = dip; 308 *dipp = dip;
296 return 1; 309 return 1;
297 } 310 }
298 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ 311 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
299 aformat = dip->di_core.di_aformat; 312 aformat = dip->di_aformat;
300 if ((XFS_DFORK_Q(dip) == 0) || 313 if ((XFS_DFORK_Q(dip) == 0) ||
301 (aformat == XFS_DINODE_FMT_LOCAL) || 314 (aformat == XFS_DINODE_FMT_LOCAL) ||
302 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) { 315 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
303 *dipp = dip; 316 *dipp = dip;
304 return 1; 317 return 1;
305 } 318 }
@@ -359,7 +372,6 @@ xfs_bulkstat(
359 int ubused; /* bytes used by formatter */ 372 int ubused; /* bytes used by formatter */
360 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ 373 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
361 xfs_dinode_t *dip; /* ptr into bp for specific inode */ 374 xfs_dinode_t *dip; /* ptr into bp for specific inode */
362 xfs_inode_t *ip; /* ptr to in-core inode struct */
363 375
364 /* 376 /*
365 * Get the last inode value, see if there's nothing to do. 377 * Get the last inode value, see if there's nothing to do.
@@ -416,8 +428,7 @@ xfs_bulkstat(
416 /* 428 /*
417 * Allocate and initialize a btree cursor for ialloc btree. 429 * Allocate and initialize a btree cursor for ialloc btree.
418 */ 430 */
419 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO, 431 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
420 (xfs_inode_t *)0, 0);
421 irbp = irbuf; 432 irbp = irbuf;
422 irbufend = irbuf + nirbuf; 433 irbufend = irbuf + nirbuf;
423 end_of_ag = 0; 434 end_of_ag = 0;
@@ -472,7 +483,7 @@ xfs_bulkstat(
472 * In any case, increment to the next record. 483 * In any case, increment to the next record.
473 */ 484 */
474 if (!error) 485 if (!error)
475 error = xfs_inobt_increment(cur, 0, &tmp); 486 error = xfs_btree_increment(cur, 0, &tmp);
476 } else { 487 } else {
477 /* 488 /*
478 * Start of ag. Lookup the first inode chunk. 489 * Start of ag. Lookup the first inode chunk.
@@ -539,7 +550,7 @@ xfs_bulkstat(
539 * Set agino to after this chunk and bump the cursor. 550 * Set agino to after this chunk and bump the cursor.
540 */ 551 */
541 agino = gino + XFS_INODES_PER_CHUNK; 552 agino = gino + XFS_INODES_PER_CHUNK;
542 error = xfs_inobt_increment(cur, 0, &tmp); 553 error = xfs_btree_increment(cur, 0, &tmp);
543 cond_resched(); 554 cond_resched();
544 } 555 }
545 /* 556 /*
@@ -586,6 +597,8 @@ xfs_bulkstat(
586 597
587 if (flags & (BULKSTAT_FG_QUICK | 598 if (flags & (BULKSTAT_FG_QUICK |
588 BULKSTAT_FG_INLINE)) { 599 BULKSTAT_FG_INLINE)) {
600 int offset;
601
589 ino = XFS_AGINO_TO_INO(mp, agno, 602 ino = XFS_AGINO_TO_INO(mp, agno,
590 agino); 603 agino);
591 bno = XFS_AGB_TO_DADDR(mp, agno, 604 bno = XFS_AGB_TO_DADDR(mp, agno,
@@ -594,21 +607,15 @@ xfs_bulkstat(
594 /* 607 /*
595 * Get the inode cluster buffer 608 * Get the inode cluster buffer
596 */ 609 */
597 ASSERT(xfs_inode_zone != NULL);
598 ip = kmem_zone_zalloc(xfs_inode_zone,
599 KM_SLEEP);
600 ip->i_ino = ino;
601 ip->i_mount = mp;
602 spin_lock_init(&ip->i_flags_lock);
603 if (bp) 610 if (bp)
604 xfs_buf_relse(bp); 611 xfs_buf_relse(bp);
605 error = xfs_itobp(mp, NULL, ip, 612
606 &dip, &bp, bno, 613 error = xfs_inotobp(mp, NULL, ino, &dip,
607 XFS_IMAP_BULKSTAT, 614 &bp, &offset,
608 XFS_BUF_LOCK); 615 XFS_IGET_BULKSTAT);
616
609 if (!error) 617 if (!error)
610 clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; 618 clustidx = offset / mp->m_sb.sb_inodesize;
611 kmem_zone_free(xfs_inode_zone, ip);
612 if (XFS_TEST_ERROR(error != 0, 619 if (XFS_TEST_ERROR(error != 0,
613 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, 620 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
614 XFS_RANDOM_BULKSTAT_READ_CHUNK)) { 621 XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
@@ -842,8 +849,7 @@ xfs_inumbers(
842 agino = 0; 849 agino = 0;
843 continue; 850 continue;
844 } 851 }
845 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, 852 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
846 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
847 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); 853 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp);
848 if (error) { 854 if (error) {
849 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 855 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
@@ -887,7 +893,7 @@ xfs_inumbers(
887 bufidx = 0; 893 bufidx = 0;
888 } 894 }
889 if (left) { 895 if (left) {
890 error = xfs_inobt_increment(cur, 0, &tmp); 896 error = xfs_btree_increment(cur, 0, &tmp);
891 if (error) { 897 if (error) {
892 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 898 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
893 cur = NULL; 899 cur = NULL;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index a1f18fce9b7..1fb04e7deb6 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -71,9 +71,23 @@ xfs_bulkstat_single(
71 71
72typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ 72typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
73 void __user *ubuffer, /* buffer to write to */ 73 void __user *ubuffer, /* buffer to write to */
74 int ubsize, /* remaining user buffer sz */
75 int *ubused, /* bytes used by formatter */
74 const xfs_bstat_t *buffer); /* buffer to read from */ 76 const xfs_bstat_t *buffer); /* buffer to read from */
75 77
76int 78int
79xfs_bulkstat_one_int(
80 xfs_mount_t *mp,
81 xfs_ino_t ino,
82 void __user *buffer,
83 int ubsize,
84 bulkstat_one_fmt_pf formatter,
85 xfs_daddr_t bno,
86 int *ubused,
87 void *dibuff,
88 int *stat);
89
90int
77xfs_bulkstat_one( 91xfs_bulkstat_one(
78 xfs_mount_t *mp, 92 xfs_mount_t *mp,
79 xfs_ino_t ino, 93 xfs_ino_t ino,
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3608a0f0a5f..f4726f702a9 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -100,12 +100,11 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
100 100
101 101
102/* local ticket functions */ 102/* local ticket functions */
103STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, 103STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log,
104 int unit_bytes, 104 int unit_bytes,
105 int count, 105 int count,
106 char clientid, 106 char clientid,
107 uint flags); 107 uint flags);
108STATIC void xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket);
109 108
110#if defined(DEBUG) 109#if defined(DEBUG)
111STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr); 110STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr);
@@ -360,7 +359,7 @@ xfs_log_done(xfs_mount_t *mp,
360 */ 359 */
361 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); 360 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
362 xlog_ungrant_log_space(log, ticket); 361 xlog_ungrant_log_space(log, ticket);
363 xlog_ticket_put(log, ticket); 362 xfs_log_ticket_put(ticket);
364 } else { 363 } else {
365 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 364 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
366 xlog_regrant_reserve_log_space(log, ticket); 365 xlog_regrant_reserve_log_space(log, ticket);
@@ -514,7 +513,7 @@ xfs_log_reserve(xfs_mount_t *mp,
514 retval = xlog_regrant_write_log_space(log, internal_ticket); 513 retval = xlog_regrant_write_log_space(log, internal_ticket);
515 } else { 514 } else {
516 /* may sleep if need to allocate more tickets */ 515 /* may sleep if need to allocate more tickets */
517 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, 516 internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
518 client, flags); 517 client, flags);
519 if (!internal_ticket) 518 if (!internal_ticket)
520 return XFS_ERROR(ENOMEM); 519 return XFS_ERROR(ENOMEM);
@@ -572,12 +571,12 @@ xfs_log_mount(
572 /* 571 /*
573 * Initialize the AIL now we have a log. 572 * Initialize the AIL now we have a log.
574 */ 573 */
575 spin_lock_init(&mp->m_ail_lock);
576 error = xfs_trans_ail_init(mp); 574 error = xfs_trans_ail_init(mp);
577 if (error) { 575 if (error) {
578 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); 576 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error);
579 goto error; 577 goto error;
580 } 578 }
579 mp->m_log->l_ailp = mp->m_ail;
581 580
582 /* 581 /*
583 * skip log recovery on a norecovery mount. pretend it all 582 * skip log recovery on a norecovery mount. pretend it all
@@ -730,8 +729,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
730 spin_lock(&log->l_icloglock); 729 spin_lock(&log->l_icloglock);
731 iclog = log->l_iclog; 730 iclog = log->l_iclog;
732 atomic_inc(&iclog->ic_refcnt); 731 atomic_inc(&iclog->ic_refcnt);
733 spin_unlock(&log->l_icloglock);
734 xlog_state_want_sync(log, iclog); 732 xlog_state_want_sync(log, iclog);
733 spin_unlock(&log->l_icloglock);
735 error = xlog_state_release_iclog(log, iclog); 734 error = xlog_state_release_iclog(log, iclog);
736 735
737 spin_lock(&log->l_icloglock); 736 spin_lock(&log->l_icloglock);
@@ -749,7 +748,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
749 if (tic) { 748 if (tic) {
750 xlog_trace_loggrant(log, tic, "unmount rec"); 749 xlog_trace_loggrant(log, tic, "unmount rec");
751 xlog_ungrant_log_space(log, tic); 750 xlog_ungrant_log_space(log, tic);
752 xlog_ticket_put(log, tic); 751 xfs_log_ticket_put(tic);
753 } 752 }
754 } else { 753 } else {
755 /* 754 /*
@@ -768,9 +767,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
768 spin_lock(&log->l_icloglock); 767 spin_lock(&log->l_icloglock);
769 iclog = log->l_iclog; 768 iclog = log->l_iclog;
770 atomic_inc(&iclog->ic_refcnt); 769 atomic_inc(&iclog->ic_refcnt);
771 spin_unlock(&log->l_icloglock);
772 770
773 xlog_state_want_sync(log, iclog); 771 xlog_state_want_sync(log, iclog);
772 spin_unlock(&log->l_icloglock);
774 error = xlog_state_release_iclog(log, iclog); 773 error = xlog_state_release_iclog(log, iclog);
775 774
776 spin_lock(&log->l_icloglock); 775 spin_lock(&log->l_icloglock);
@@ -906,7 +905,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
906int 905int
907xfs_log_need_covered(xfs_mount_t *mp) 906xfs_log_need_covered(xfs_mount_t *mp)
908{ 907{
909 int needed = 0, gen; 908 int needed = 0;
910 xlog_t *log = mp->m_log; 909 xlog_t *log = mp->m_log;
911 910
912 if (!xfs_fs_writable(mp)) 911 if (!xfs_fs_writable(mp))
@@ -915,7 +914,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
915 spin_lock(&log->l_icloglock); 914 spin_lock(&log->l_icloglock);
916 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || 915 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) ||
917 (log->l_covered_state == XLOG_STATE_COVER_NEED2)) 916 (log->l_covered_state == XLOG_STATE_COVER_NEED2))
918 && !xfs_trans_first_ail(mp, &gen) 917 && !xfs_trans_ail_tail(log->l_ailp)
919 && xlog_iclogs_empty(log)) { 918 && xlog_iclogs_empty(log)) {
920 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 919 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
921 log->l_covered_state = XLOG_STATE_COVER_DONE; 920 log->l_covered_state = XLOG_STATE_COVER_DONE;
@@ -952,7 +951,7 @@ xlog_assign_tail_lsn(xfs_mount_t *mp)
952 xfs_lsn_t tail_lsn; 951 xfs_lsn_t tail_lsn;
953 xlog_t *log = mp->m_log; 952 xlog_t *log = mp->m_log;
954 953
955 tail_lsn = xfs_trans_tail_ail(mp); 954 tail_lsn = xfs_trans_ail_tail(mp->m_ail);
956 spin_lock(&log->l_grant_lock); 955 spin_lock(&log->l_grant_lock);
957 if (tail_lsn != 0) { 956 if (tail_lsn != 0) {
958 log->l_tail_lsn = tail_lsn; 957 log->l_tail_lsn = tail_lsn;
@@ -1030,12 +1029,6 @@ xlog_iodone(xfs_buf_t *bp)
1030 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2); 1029 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2);
1031 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1030 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1032 aborted = 0; 1031 aborted = 0;
1033
1034 /*
1035 * Some versions of cpp barf on the recursive definition of
1036 * ic_log -> hic_fields.ic_log and expand ic_log twice when
1037 * it is passed through two macros. Workaround broken cpp.
1038 */
1039 l = iclog->ic_log; 1032 l = iclog->ic_log;
1040 1033
1041 /* 1034 /*
@@ -1302,7 +1295,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1302 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); 1295 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1303 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1296 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1304 iclog->ic_bp = bp; 1297 iclog->ic_bp = bp;
1305 iclog->hic_data = bp->b_addr; 1298 iclog->ic_data = bp->b_addr;
1306#ifdef DEBUG 1299#ifdef DEBUG
1307 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); 1300 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
1308#endif 1301#endif
@@ -1322,7 +1315,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1322 atomic_set(&iclog->ic_refcnt, 0); 1315 atomic_set(&iclog->ic_refcnt, 0);
1323 spin_lock_init(&iclog->ic_callback_lock); 1316 spin_lock_init(&iclog->ic_callback_lock);
1324 iclog->ic_callback_tail = &(iclog->ic_callback); 1317 iclog->ic_callback_tail = &(iclog->ic_callback);
1325 iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; 1318 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
1326 1319
1327 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); 1320 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
1328 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); 1321 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
@@ -1446,7 +1439,7 @@ xlog_grant_push_ail(xfs_mount_t *mp,
1446 */ 1439 */
1447 if (threshold_lsn && 1440 if (threshold_lsn &&
1448 !XLOG_FORCED_SHUTDOWN(log)) 1441 !XLOG_FORCED_SHUTDOWN(log))
1449 xfs_trans_push_ail(mp, threshold_lsn); 1442 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1450} /* xlog_grant_push_ail */ 1443} /* xlog_grant_push_ail */
1451 1444
1452 1445
@@ -1991,7 +1984,9 @@ xlog_write(xfs_mount_t * mp,
1991 if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { 1984 if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
1992 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); 1985 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1993 record_cnt = data_cnt = 0; 1986 record_cnt = data_cnt = 0;
1987 spin_lock(&log->l_icloglock);
1994 xlog_state_want_sync(log, iclog); 1988 xlog_state_want_sync(log, iclog);
1989 spin_unlock(&log->l_icloglock);
1995 if (commit_iclog) { 1990 if (commit_iclog) {
1996 ASSERT(flags & XLOG_COMMIT_TRANS); 1991 ASSERT(flags & XLOG_COMMIT_TRANS);
1997 *commit_iclog = iclog; 1992 *commit_iclog = iclog;
@@ -3200,7 +3195,7 @@ try_again:
3200STATIC void 3195STATIC void
3201xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) 3196xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3202{ 3197{
3203 spin_lock(&log->l_icloglock); 3198 ASSERT(spin_is_locked(&log->l_icloglock));
3204 3199
3205 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3200 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3206 xlog_state_switch_iclogs(log, iclog, 0); 3201 xlog_state_switch_iclogs(log, iclog, 0);
@@ -3208,10 +3203,7 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3208 ASSERT(iclog->ic_state & 3203 ASSERT(iclog->ic_state &
3209 (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR)); 3204 (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR));
3210 } 3205 }
3211 3206}
3212 spin_unlock(&log->l_icloglock);
3213} /* xlog_state_want_sync */
3214
3215 3207
3216 3208
3217/***************************************************************************** 3209/*****************************************************************************
@@ -3222,22 +3214,33 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3222 */ 3214 */
3223 3215
3224/* 3216/*
3225 * Free a used ticket. 3217 * Free a used ticket when it's refcount falls to zero.
3226 */ 3218 */
3227STATIC void 3219void
3228xlog_ticket_put(xlog_t *log, 3220xfs_log_ticket_put(
3229 xlog_ticket_t *ticket) 3221 xlog_ticket_t *ticket)
3230{ 3222{
3231 sv_destroy(&ticket->t_wait); 3223 ASSERT(atomic_read(&ticket->t_ref) > 0);
3232 kmem_zone_free(xfs_log_ticket_zone, ticket); 3224 if (atomic_dec_and_test(&ticket->t_ref)) {
3233} /* xlog_ticket_put */ 3225 sv_destroy(&ticket->t_wait);
3226 kmem_zone_free(xfs_log_ticket_zone, ticket);
3227 }
3228}
3234 3229
3230xlog_ticket_t *
3231xfs_log_ticket_get(
3232 xlog_ticket_t *ticket)
3233{
3234 ASSERT(atomic_read(&ticket->t_ref) > 0);
3235 atomic_inc(&ticket->t_ref);
3236 return ticket;
3237}
3235 3238
3236/* 3239/*
3237 * Allocate and initialise a new log ticket. 3240 * Allocate and initialise a new log ticket.
3238 */ 3241 */
3239STATIC xlog_ticket_t * 3242STATIC xlog_ticket_t *
3240xlog_ticket_get(xlog_t *log, 3243xlog_ticket_alloc(xlog_t *log,
3241 int unit_bytes, 3244 int unit_bytes,
3242 int cnt, 3245 int cnt,
3243 char client, 3246 char client,
@@ -3308,6 +3311,7 @@ xlog_ticket_get(xlog_t *log,
3308 unit_bytes += 2*BBSIZE; 3311 unit_bytes += 2*BBSIZE;
3309 } 3312 }
3310 3313
3314 atomic_set(&tic->t_ref, 1);
3311 tic->t_unit_res = unit_bytes; 3315 tic->t_unit_res = unit_bytes;
3312 tic->t_curr_res = unit_bytes; 3316 tic->t_curr_res = unit_bytes;
3313 tic->t_cnt = cnt; 3317 tic->t_cnt = cnt;
@@ -3323,7 +3327,7 @@ xlog_ticket_get(xlog_t *log,
3323 xlog_tic_reset_res(tic); 3327 xlog_tic_reset_res(tic);
3324 3328
3325 return tic; 3329 return tic;
3326} /* xlog_ticket_get */ 3330}
3327 3331
3328 3332
3329/****************************************************************************** 3333/******************************************************************************
@@ -3452,7 +3456,7 @@ xlog_verify_iclog(xlog_t *log,
3452 ptr = iclog->ic_datap; 3456 ptr = iclog->ic_datap;
3453 base_ptr = ptr; 3457 base_ptr = ptr;
3454 ophead = (xlog_op_header_t *)ptr; 3458 ophead = (xlog_op_header_t *)ptr;
3455 xhdr = (xlog_in_core_2_t *)&iclog->ic_header; 3459 xhdr = iclog->ic_data;
3456 for (i = 0; i < len; i++) { 3460 for (i = 0; i < len; i++) {
3457 ophead = (xlog_op_header_t *)ptr; 3461 ophead = (xlog_op_header_t *)ptr;
3458 3462
@@ -3558,7 +3562,8 @@ xfs_log_force_umount(
3558 if (!log || 3562 if (!log ||
3559 log->l_flags & XLOG_ACTIVE_RECOVERY) { 3563 log->l_flags & XLOG_ACTIVE_RECOVERY) {
3560 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3564 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3561 XFS_BUF_DONE(mp->m_sb_bp); 3565 if (mp->m_sb_bp)
3566 XFS_BUF_DONE(mp->m_sb_bp);
3562 return 0; 3567 return 0;
3563 } 3568 }
3564 3569
@@ -3579,7 +3584,9 @@ xfs_log_force_umount(
3579 spin_lock(&log->l_icloglock); 3584 spin_lock(&log->l_icloglock);
3580 spin_lock(&log->l_grant_lock); 3585 spin_lock(&log->l_grant_lock);
3581 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3586 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3582 XFS_BUF_DONE(mp->m_sb_bp); 3587 if (mp->m_sb_bp)
3588 XFS_BUF_DONE(mp->m_sb_bp);
3589
3583 /* 3590 /*
3584 * This flag is sort of redundant because of the mount flag, but 3591 * This flag is sort of redundant because of the mount flag, but
3585 * it's good to maintain the separation between the log and the rest 3592 * it's good to maintain the separation between the log and the rest
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d47b91f1082..8a3e84e900a 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -134,6 +134,7 @@ typedef struct xfs_log_callback {
134#ifdef __KERNEL__ 134#ifdef __KERNEL__
135/* Log manager interfaces */ 135/* Log manager interfaces */
136struct xfs_mount; 136struct xfs_mount;
137struct xlog_ticket;
137xfs_lsn_t xfs_log_done(struct xfs_mount *mp, 138xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
138 xfs_log_ticket_t ticket, 139 xfs_log_ticket_t ticket,
139 void **iclog, 140 void **iclog,
@@ -177,6 +178,9 @@ int xfs_log_need_covered(struct xfs_mount *mp);
177 178
178void xlog_iodone(struct xfs_buf *); 179void xlog_iodone(struct xfs_buf *);
179 180
181struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket);
182void xfs_log_ticket_put(struct xlog_ticket *ticket);
183
180#endif 184#endif
181 185
182 186
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index e7d8f84443f..654167be0ef 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -245,6 +245,7 @@ typedef struct xlog_ticket {
245 struct xlog_ticket *t_next; /* :4|8 */ 245 struct xlog_ticket *t_next; /* :4|8 */
246 struct xlog_ticket *t_prev; /* :4|8 */ 246 struct xlog_ticket *t_prev; /* :4|8 */
247 xlog_tid_t t_tid; /* transaction identifier : 4 */ 247 xlog_tid_t t_tid; /* transaction identifier : 4 */
248 atomic_t t_ref; /* ticket reference count : 4 */
248 int t_curr_res; /* current reservation in bytes : 4 */ 249 int t_curr_res; /* current reservation in bytes : 4 */
249 int t_unit_res; /* unit reservation in bytes : 4 */ 250 int t_unit_res; /* unit reservation in bytes : 4 */
250 char t_ocnt; /* original count : 1 */ 251 char t_ocnt; /* original count : 1 */
@@ -309,6 +310,16 @@ typedef struct xlog_rec_ext_header {
309} xlog_rec_ext_header_t; 310} xlog_rec_ext_header_t;
310 311
311#ifdef __KERNEL__ 312#ifdef __KERNEL__
313
314/*
315 * Quite misnamed, because this union lays out the actual on-disk log buffer.
316 */
317typedef union xlog_in_core2 {
318 xlog_rec_header_t hic_header;
319 xlog_rec_ext_header_t hic_xheader;
320 char hic_sector[XLOG_HEADER_SIZE];
321} xlog_in_core_2_t;
322
312/* 323/*
313 * - A log record header is 512 bytes. There is plenty of room to grow the 324 * - A log record header is 512 bytes. There is plenty of room to grow the
314 * xlog_rec_header_t into the reserved space. 325 * xlog_rec_header_t into the reserved space.
@@ -338,7 +349,7 @@ typedef struct xlog_rec_ext_header {
338 * We'll put all the read-only and l_icloglock fields in the first cacheline, 349 * We'll put all the read-only and l_icloglock fields in the first cacheline,
339 * and move everything else out to subsequent cachelines. 350 * and move everything else out to subsequent cachelines.
340 */ 351 */
341typedef struct xlog_iclog_fields { 352typedef struct xlog_in_core {
342 sv_t ic_force_wait; 353 sv_t ic_force_wait;
343 sv_t ic_write_wait; 354 sv_t ic_write_wait;
344 struct xlog_in_core *ic_next; 355 struct xlog_in_core *ic_next;
@@ -361,41 +372,11 @@ typedef struct xlog_iclog_fields {
361 372
362 /* reference counts need their own cacheline */ 373 /* reference counts need their own cacheline */
363 atomic_t ic_refcnt ____cacheline_aligned_in_smp; 374 atomic_t ic_refcnt ____cacheline_aligned_in_smp;
364} xlog_iclog_fields_t; 375 xlog_in_core_2_t *ic_data;
365 376#define ic_header ic_data->hic_header
366typedef union xlog_in_core2 {
367 xlog_rec_header_t hic_header;
368 xlog_rec_ext_header_t hic_xheader;
369 char hic_sector[XLOG_HEADER_SIZE];
370} xlog_in_core_2_t;
371
372typedef struct xlog_in_core {
373 xlog_iclog_fields_t hic_fields;
374 xlog_in_core_2_t *hic_data;
375} xlog_in_core_t; 377} xlog_in_core_t;
376 378
377/* 379/*
378 * Defines to save our code from this glop.
379 */
380#define ic_force_wait hic_fields.ic_force_wait
381#define ic_write_wait hic_fields.ic_write_wait
382#define ic_next hic_fields.ic_next
383#define ic_prev hic_fields.ic_prev
384#define ic_bp hic_fields.ic_bp
385#define ic_log hic_fields.ic_log
386#define ic_callback hic_fields.ic_callback
387#define ic_callback_lock hic_fields.ic_callback_lock
388#define ic_callback_tail hic_fields.ic_callback_tail
389#define ic_trace hic_fields.ic_trace
390#define ic_size hic_fields.ic_size
391#define ic_offset hic_fields.ic_offset
392#define ic_refcnt hic_fields.ic_refcnt
393#define ic_bwritecnt hic_fields.ic_bwritecnt
394#define ic_state hic_fields.ic_state
395#define ic_datap hic_fields.ic_datap
396#define ic_header hic_data->hic_header
397
398/*
399 * The reservation head lsn is not made up of a cycle number and block number. 380 * The reservation head lsn is not made up of a cycle number and block number.
400 * Instead, it uses a cycle number and byte number. Logs don't expect to 381 * Instead, it uses a cycle number and byte number. Logs don't expect to
401 * overflow 31 bits worth of byte offset, so using a byte number will mean 382 * overflow 31 bits worth of byte offset, so using a byte number will mean
@@ -404,6 +385,7 @@ typedef struct xlog_in_core {
404typedef struct log { 385typedef struct log {
405 /* The following fields don't need locking */ 386 /* The following fields don't need locking */
406 struct xfs_mount *l_mp; /* mount point */ 387 struct xfs_mount *l_mp; /* mount point */
388 struct xfs_ail *l_ailp; /* AIL log is working with */
407 struct xfs_buf *l_xbuf; /* extra buffer for log 389 struct xfs_buf *l_xbuf; /* extra buffer for log
408 * wrapping */ 390 * wrapping */
409 struct xfs_buftarg *l_targ; /* buftarg of log */ 391 struct xfs_buftarg *l_targ; /* buftarg of log */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 70e3ba32e6b..35cca98bd94 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -36,7 +36,6 @@
36#include "xfs_dinode.h" 36#include "xfs_dinode.h"
37#include "xfs_inode.h" 37#include "xfs_inode.h"
38#include "xfs_inode_item.h" 38#include "xfs_inode_item.h"
39#include "xfs_imap.h"
40#include "xfs_alloc.h" 39#include "xfs_alloc.h"
41#include "xfs_ialloc.h" 40#include "xfs_ialloc.h"
42#include "xfs_log_priv.h" 41#include "xfs_log_priv.h"
@@ -54,10 +53,8 @@ STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q,
54 xlog_recover_item_t *item); 53 xlog_recover_item_t *item);
55#if defined(DEBUG) 54#if defined(DEBUG)
56STATIC void xlog_recover_check_summary(xlog_t *); 55STATIC void xlog_recover_check_summary(xlog_t *);
57STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int);
58#else 56#else
59#define xlog_recover_check_summary(log) 57#define xlog_recover_check_summary(log)
60#define xlog_recover_check_ail(mp, lip, gen)
61#endif 58#endif
62 59
63 60
@@ -270,21 +267,16 @@ STATIC void
270xlog_recover_iodone( 267xlog_recover_iodone(
271 struct xfs_buf *bp) 268 struct xfs_buf *bp)
272{ 269{
273 xfs_mount_t *mp;
274
275 ASSERT(XFS_BUF_FSPRIVATE(bp, void *));
276
277 if (XFS_BUF_GETERROR(bp)) { 270 if (XFS_BUF_GETERROR(bp)) {
278 /* 271 /*
279 * We're not going to bother about retrying 272 * We're not going to bother about retrying
280 * this during recovery. One strike! 273 * this during recovery. One strike!
281 */ 274 */
282 mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *);
283 xfs_ioerror_alert("xlog_recover_iodone", 275 xfs_ioerror_alert("xlog_recover_iodone",
284 mp, bp, XFS_BUF_ADDR(bp)); 276 bp->b_mount, bp, XFS_BUF_ADDR(bp));
285 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 277 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
286 } 278 }
287 XFS_BUF_SET_FSPRIVATE(bp, NULL); 279 bp->b_mount = NULL;
288 XFS_BUF_CLR_IODONE_FUNC(bp); 280 XFS_BUF_CLR_IODONE_FUNC(bp);
289 xfs_biodone(bp); 281 xfs_biodone(bp);
290} 282}
@@ -2228,9 +2220,8 @@ xlog_recover_do_buffer_trans(
2228 XFS_BUF_STALE(bp); 2220 XFS_BUF_STALE(bp);
2229 error = xfs_bwrite(mp, bp); 2221 error = xfs_bwrite(mp, bp);
2230 } else { 2222 } else {
2231 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2223 ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
2232 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2224 bp->b_mount = mp;
2233 XFS_BUF_SET_FSPRIVATE(bp, mp);
2234 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2225 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2235 xfs_bdwrite(mp, bp); 2226 xfs_bdwrite(mp, bp);
2236 } 2227 }
@@ -2247,7 +2238,6 @@ xlog_recover_do_inode_trans(
2247 xfs_inode_log_format_t *in_f; 2238 xfs_inode_log_format_t *in_f;
2248 xfs_mount_t *mp; 2239 xfs_mount_t *mp;
2249 xfs_buf_t *bp; 2240 xfs_buf_t *bp;
2250 xfs_imap_t imap;
2251 xfs_dinode_t *dip; 2241 xfs_dinode_t *dip;
2252 xfs_ino_t ino; 2242 xfs_ino_t ino;
2253 int len; 2243 int len;
@@ -2275,54 +2265,35 @@ xlog_recover_do_inode_trans(
2275 } 2265 }
2276 ino = in_f->ilf_ino; 2266 ino = in_f->ilf_ino;
2277 mp = log->l_mp; 2267 mp = log->l_mp;
2278 if (ITEM_TYPE(item) == XFS_LI_INODE) {
2279 imap.im_blkno = (xfs_daddr_t)in_f->ilf_blkno;
2280 imap.im_len = in_f->ilf_len;
2281 imap.im_boffset = in_f->ilf_boffset;
2282 } else {
2283 /*
2284 * It's an old inode format record. We don't know where
2285 * its cluster is located on disk, and we can't allow
2286 * xfs_imap() to figure it out because the inode btrees
2287 * are not ready to be used. Therefore do not pass the
2288 * XFS_IMAP_LOOKUP flag to xfs_imap(). This will give
2289 * us only the single block in which the inode lives
2290 * rather than its cluster, so we must make sure to
2291 * invalidate the buffer when we write it out below.
2292 */
2293 imap.im_blkno = 0;
2294 error = xfs_imap(log->l_mp, NULL, ino, &imap, 0);
2295 if (error)
2296 goto error;
2297 }
2298 2268
2299 /* 2269 /*
2300 * Inode buffers can be freed, look out for it, 2270 * Inode buffers can be freed, look out for it,
2301 * and do not replay the inode. 2271 * and do not replay the inode.
2302 */ 2272 */
2303 if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) { 2273 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
2274 in_f->ilf_len, 0)) {
2304 error = 0; 2275 error = 0;
2305 goto error; 2276 goto error;
2306 } 2277 }
2307 2278
2308 bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len, 2279 bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno,
2309 XFS_BUF_LOCK); 2280 in_f->ilf_len, XFS_BUF_LOCK);
2310 if (XFS_BUF_ISERROR(bp)) { 2281 if (XFS_BUF_ISERROR(bp)) {
2311 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, 2282 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
2312 bp, imap.im_blkno); 2283 bp, in_f->ilf_blkno);
2313 error = XFS_BUF_GETERROR(bp); 2284 error = XFS_BUF_GETERROR(bp);
2314 xfs_buf_relse(bp); 2285 xfs_buf_relse(bp);
2315 goto error; 2286 goto error;
2316 } 2287 }
2317 error = 0; 2288 error = 0;
2318 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); 2289 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
2319 dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 2290 dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
2320 2291
2321 /* 2292 /*
2322 * Make sure the place we're flushing out to really looks 2293 * Make sure the place we're flushing out to really looks
2323 * like an inode! 2294 * like an inode!
2324 */ 2295 */
2325 if (unlikely(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC)) { 2296 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
2326 xfs_buf_relse(bp); 2297 xfs_buf_relse(bp);
2327 xfs_fs_cmn_err(CE_ALERT, mp, 2298 xfs_fs_cmn_err(CE_ALERT, mp,
2328 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", 2299 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
@@ -2345,12 +2316,12 @@ xlog_recover_do_inode_trans(
2345 } 2316 }
2346 2317
2347 /* Skip replay when the on disk inode is newer than the log one */ 2318 /* Skip replay when the on disk inode is newer than the log one */
2348 if (dicp->di_flushiter < be16_to_cpu(dip->di_core.di_flushiter)) { 2319 if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
2349 /* 2320 /*
2350 * Deal with the wrap case, DI_MAX_FLUSH is less 2321 * Deal with the wrap case, DI_MAX_FLUSH is less
2351 * than smaller numbers 2322 * than smaller numbers
2352 */ 2323 */
2353 if (be16_to_cpu(dip->di_core.di_flushiter) == DI_MAX_FLUSH && 2324 if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
2354 dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { 2325 dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
2355 /* do nothing */ 2326 /* do nothing */
2356 } else { 2327 } else {
@@ -2410,7 +2381,7 @@ xlog_recover_do_inode_trans(
2410 error = EFSCORRUPTED; 2381 error = EFSCORRUPTED;
2411 goto error; 2382 goto error;
2412 } 2383 }
2413 if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) { 2384 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
2414 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", 2385 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
2415 XFS_ERRLEVEL_LOW, mp, dicp); 2386 XFS_ERRLEVEL_LOW, mp, dicp);
2416 xfs_buf_relse(bp); 2387 xfs_buf_relse(bp);
@@ -2422,23 +2393,24 @@ xlog_recover_do_inode_trans(
2422 } 2393 }
2423 2394
2424 /* The core is in in-core format */ 2395 /* The core is in in-core format */
2425 xfs_dinode_to_disk(&dip->di_core, 2396 xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr);
2426 (xfs_icdinode_t *)item->ri_buf[1].i_addr);
2427 2397
2428 /* the rest is in on-disk format */ 2398 /* the rest is in on-disk format */
2429 if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) { 2399 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
2430 memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t), 2400 memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode),
2431 item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t), 2401 item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode),
2432 item->ri_buf[1].i_len - sizeof(xfs_dinode_core_t)); 2402 item->ri_buf[1].i_len - sizeof(struct xfs_icdinode));
2433 } 2403 }
2434 2404
2435 fields = in_f->ilf_fields; 2405 fields = in_f->ilf_fields;
2436 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { 2406 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) {
2437 case XFS_ILOG_DEV: 2407 case XFS_ILOG_DEV:
2438 dip->di_u.di_dev = cpu_to_be32(in_f->ilf_u.ilfu_rdev); 2408 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
2439 break; 2409 break;
2440 case XFS_ILOG_UUID: 2410 case XFS_ILOG_UUID:
2441 dip->di_u.di_muuid = in_f->ilf_u.ilfu_uuid; 2411 memcpy(XFS_DFORK_DPTR(dip),
2412 &in_f->ilf_u.ilfu_uuid,
2413 sizeof(uuid_t));
2442 break; 2414 break;
2443 } 2415 }
2444 2416
@@ -2454,12 +2426,12 @@ xlog_recover_do_inode_trans(
2454 switch (fields & XFS_ILOG_DFORK) { 2426 switch (fields & XFS_ILOG_DFORK) {
2455 case XFS_ILOG_DDATA: 2427 case XFS_ILOG_DDATA:
2456 case XFS_ILOG_DEXT: 2428 case XFS_ILOG_DEXT:
2457 memcpy(&dip->di_u, src, len); 2429 memcpy(XFS_DFORK_DPTR(dip), src, len);
2458 break; 2430 break;
2459 2431
2460 case XFS_ILOG_DBROOT: 2432 case XFS_ILOG_DBROOT:
2461 xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, 2433 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
2462 &(dip->di_u.di_bmbt), 2434 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip),
2463 XFS_DFORK_DSIZE(dip, mp)); 2435 XFS_DFORK_DSIZE(dip, mp));
2464 break; 2436 break;
2465 2437
@@ -2496,8 +2468,8 @@ xlog_recover_do_inode_trans(
2496 2468
2497 case XFS_ILOG_ABROOT: 2469 case XFS_ILOG_ABROOT:
2498 dest = XFS_DFORK_APTR(dip); 2470 dest = XFS_DFORK_APTR(dip);
2499 xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, 2471 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
2500 (xfs_bmdr_block_t*)dest, 2472 len, (xfs_bmdr_block_t*)dest,
2501 XFS_DFORK_ASIZE(dip, mp)); 2473 XFS_DFORK_ASIZE(dip, mp));
2502 break; 2474 break;
2503 2475
@@ -2512,9 +2484,8 @@ xlog_recover_do_inode_trans(
2512 2484
2513write_inode_buffer: 2485write_inode_buffer:
2514 if (ITEM_TYPE(item) == XFS_LI_INODE) { 2486 if (ITEM_TYPE(item) == XFS_LI_INODE) {
2515 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2487 ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
2516 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2488 bp->b_mount = mp;
2517 XFS_BUF_SET_FSPRIVATE(bp, mp);
2518 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2489 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2519 xfs_bdwrite(mp, bp); 2490 xfs_bdwrite(mp, bp);
2520 } else { 2491 } else {
@@ -2645,9 +2616,8 @@ xlog_recover_do_dquot_trans(
2645 memcpy(ddq, recddq, item->ri_buf[1].i_len); 2616 memcpy(ddq, recddq, item->ri_buf[1].i_len);
2646 2617
2647 ASSERT(dq_f->qlf_size == 2); 2618 ASSERT(dq_f->qlf_size == 2);
2648 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2619 ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
2649 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2620 bp->b_mount = mp;
2650 XFS_BUF_SET_FSPRIVATE(bp, mp);
2651 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2621 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2652 xfs_bdwrite(mp, bp); 2622 xfs_bdwrite(mp, bp);
2653 2623
@@ -2689,11 +2659,11 @@ xlog_recover_do_efi_trans(
2689 efip->efi_next_extent = efi_formatp->efi_nextents; 2659 efip->efi_next_extent = efi_formatp->efi_nextents;
2690 efip->efi_flags |= XFS_EFI_COMMITTED; 2660 efip->efi_flags |= XFS_EFI_COMMITTED;
2691 2661
2692 spin_lock(&mp->m_ail_lock); 2662 spin_lock(&log->l_ailp->xa_lock);
2693 /* 2663 /*
2694 * xfs_trans_update_ail() drops the AIL lock. 2664 * xfs_trans_ail_update() drops the AIL lock.
2695 */ 2665 */
2696 xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn); 2666 xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn);
2697 return 0; 2667 return 0;
2698} 2668}
2699 2669
@@ -2712,12 +2682,12 @@ xlog_recover_do_efd_trans(
2712 xlog_recover_item_t *item, 2682 xlog_recover_item_t *item,
2713 int pass) 2683 int pass)
2714{ 2684{
2715 xfs_mount_t *mp;
2716 xfs_efd_log_format_t *efd_formatp; 2685 xfs_efd_log_format_t *efd_formatp;
2717 xfs_efi_log_item_t *efip = NULL; 2686 xfs_efi_log_item_t *efip = NULL;
2718 xfs_log_item_t *lip; 2687 xfs_log_item_t *lip;
2719 int gen;
2720 __uint64_t efi_id; 2688 __uint64_t efi_id;
2689 struct xfs_ail_cursor cur;
2690 struct xfs_ail *ailp = log->l_ailp;
2721 2691
2722 if (pass == XLOG_RECOVER_PASS1) { 2692 if (pass == XLOG_RECOVER_PASS1) {
2723 return; 2693 return;
@@ -2734,25 +2704,26 @@ xlog_recover_do_efd_trans(
2734 * Search for the efi with the id in the efd format structure 2704 * Search for the efi with the id in the efd format structure
2735 * in the AIL. 2705 * in the AIL.
2736 */ 2706 */
2737 mp = log->l_mp; 2707 spin_lock(&ailp->xa_lock);
2738 spin_lock(&mp->m_ail_lock); 2708 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
2739 lip = xfs_trans_first_ail(mp, &gen);
2740 while (lip != NULL) { 2709 while (lip != NULL) {
2741 if (lip->li_type == XFS_LI_EFI) { 2710 if (lip->li_type == XFS_LI_EFI) {
2742 efip = (xfs_efi_log_item_t *)lip; 2711 efip = (xfs_efi_log_item_t *)lip;
2743 if (efip->efi_format.efi_id == efi_id) { 2712 if (efip->efi_format.efi_id == efi_id) {
2744 /* 2713 /*
2745 * xfs_trans_delete_ail() drops the 2714 * xfs_trans_ail_delete() drops the
2746 * AIL lock. 2715 * AIL lock.
2747 */ 2716 */
2748 xfs_trans_delete_ail(mp, lip); 2717 xfs_trans_ail_delete(ailp, lip);
2749 xfs_efi_item_free(efip); 2718 xfs_efi_item_free(efip);
2750 return; 2719 spin_lock(&ailp->xa_lock);
2720 break;
2751 } 2721 }
2752 } 2722 }
2753 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 2723 lip = xfs_trans_ail_cursor_next(ailp, &cur);
2754 } 2724 }
2755 spin_unlock(&mp->m_ail_lock); 2725 xfs_trans_ail_cursor_done(ailp, &cur);
2726 spin_unlock(&ailp->xa_lock);
2756} 2727}
2757 2728
2758/* 2729/*
@@ -3036,33 +3007,6 @@ abort_error:
3036} 3007}
3037 3008
3038/* 3009/*
3039 * Verify that once we've encountered something other than an EFI
3040 * in the AIL that there are no more EFIs in the AIL.
3041 */
3042#if defined(DEBUG)
3043STATIC void
3044xlog_recover_check_ail(
3045 xfs_mount_t *mp,
3046 xfs_log_item_t *lip,
3047 int gen)
3048{
3049 int orig_gen = gen;
3050
3051 do {
3052 ASSERT(lip->li_type != XFS_LI_EFI);
3053 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
3054 /*
3055 * The check will be bogus if we restart from the
3056 * beginning of the AIL, so ASSERT that we don't.
3057 * We never should since we're holding the AIL lock
3058 * the entire time.
3059 */
3060 ASSERT(gen == orig_gen);
3061 } while (lip != NULL);
3062}
3063#endif /* DEBUG */
3064
3065/*
3066 * When this is called, all of the EFIs which did not have 3010 * When this is called, all of the EFIs which did not have
3067 * corresponding EFDs should be in the AIL. What we do now 3011 * corresponding EFDs should be in the AIL. What we do now
3068 * is free the extents associated with each one. 3012 * is free the extents associated with each one.
@@ -3086,20 +3030,23 @@ xlog_recover_process_efis(
3086{ 3030{
3087 xfs_log_item_t *lip; 3031 xfs_log_item_t *lip;
3088 xfs_efi_log_item_t *efip; 3032 xfs_efi_log_item_t *efip;
3089 int gen;
3090 xfs_mount_t *mp;
3091 int error = 0; 3033 int error = 0;
3034 struct xfs_ail_cursor cur;
3035 struct xfs_ail *ailp;
3092 3036
3093 mp = log->l_mp; 3037 ailp = log->l_ailp;
3094 spin_lock(&mp->m_ail_lock); 3038 spin_lock(&ailp->xa_lock);
3095 3039 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
3096 lip = xfs_trans_first_ail(mp, &gen);
3097 while (lip != NULL) { 3040 while (lip != NULL) {
3098 /* 3041 /*
3099 * We're done when we see something other than an EFI. 3042 * We're done when we see something other than an EFI.
3043 * There should be no EFIs left in the AIL now.
3100 */ 3044 */
3101 if (lip->li_type != XFS_LI_EFI) { 3045 if (lip->li_type != XFS_LI_EFI) {
3102 xlog_recover_check_ail(mp, lip, gen); 3046#ifdef DEBUG
3047 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
3048 ASSERT(lip->li_type != XFS_LI_EFI);
3049#endif
3103 break; 3050 break;
3104 } 3051 }
3105 3052
@@ -3108,18 +3055,20 @@ xlog_recover_process_efis(
3108 */ 3055 */
3109 efip = (xfs_efi_log_item_t *)lip; 3056 efip = (xfs_efi_log_item_t *)lip;
3110 if (efip->efi_flags & XFS_EFI_RECOVERED) { 3057 if (efip->efi_flags & XFS_EFI_RECOVERED) {
3111 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 3058 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3112 continue; 3059 continue;
3113 } 3060 }
3114 3061
3115 spin_unlock(&mp->m_ail_lock); 3062 spin_unlock(&ailp->xa_lock);
3116 error = xlog_recover_process_efi(mp, efip); 3063 error = xlog_recover_process_efi(log->l_mp, efip);
3064 spin_lock(&ailp->xa_lock);
3117 if (error) 3065 if (error)
3118 return error; 3066 goto out;
3119 spin_lock(&mp->m_ail_lock); 3067 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3120 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
3121 } 3068 }
3122 spin_unlock(&mp->m_ail_lock); 3069out:
3070 xfs_trans_ail_cursor_done(ailp, &cur);
3071 spin_unlock(&ailp->xa_lock);
3123 return error; 3072 return error;
3124} 3073}
3125 3074
@@ -3140,19 +3089,16 @@ xlog_recover_clear_agi_bucket(
3140 int error; 3089 int error;
3141 3090
3142 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); 3091 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
3143 error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); 3092 error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp),
3144 if (!error) 3093 0, 0, 0);
3145 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3146 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
3147 XFS_FSS_TO_BB(mp, 1), 0, &agibp);
3148 if (error) 3094 if (error)
3149 goto out_abort; 3095 goto out_abort;
3150 3096
3151 error = EINVAL; 3097 error = xfs_read_agi(mp, tp, agno, &agibp);
3152 agi = XFS_BUF_TO_AGI(agibp); 3098 if (error)
3153 if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC)
3154 goto out_abort; 3099 goto out_abort;
3155 3100
3101 agi = XFS_BUF_TO_AGI(agibp);
3156 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 3102 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
3157 offset = offsetof(xfs_agi_t, agi_unlinked) + 3103 offset = offsetof(xfs_agi_t, agi_unlinked) +
3158 (sizeof(xfs_agino_t) * bucket); 3104 (sizeof(xfs_agino_t) * bucket);
@@ -3172,6 +3118,62 @@ out_error:
3172 return; 3118 return;
3173} 3119}
3174 3120
3121STATIC xfs_agino_t
3122xlog_recover_process_one_iunlink(
3123 struct xfs_mount *mp,
3124 xfs_agnumber_t agno,
3125 xfs_agino_t agino,
3126 int bucket)
3127{
3128 struct xfs_buf *ibp;
3129 struct xfs_dinode *dip;
3130 struct xfs_inode *ip;
3131 xfs_ino_t ino;
3132 int error;
3133
3134 ino = XFS_AGINO_TO_INO(mp, agno, agino);
3135 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
3136 if (error)
3137 goto fail;
3138
3139 /*
3140 * Get the on disk inode to find the next inode in the bucket.
3141 */
3142 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK);
3143 if (error)
3144 goto fail_iput;
3145
3146 ASSERT(ip->i_d.di_nlink == 0);
3147 ASSERT(ip->i_d.di_mode != 0);
3148
3149 /* setup for the next pass */
3150 agino = be32_to_cpu(dip->di_next_unlinked);
3151 xfs_buf_relse(ibp);
3152
3153 /*
3154 * Prevent any DMAPI event from being sent when the reference on
3155 * the inode is dropped.
3156 */
3157 ip->i_d.di_dmevmask = 0;
3158
3159 IRELE(ip);
3160 return agino;
3161
3162 fail_iput:
3163 IRELE(ip);
3164 fail:
3165 /*
3166 * We can't read in the inode this bucket points to, or this inode
3167 * is messed up. Just ditch this bucket of inodes. We will lose
3168 * some inodes and space, but at least we won't hang.
3169 *
3170 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
3171 * clear the inode pointer in the bucket.
3172 */
3173 xlog_recover_clear_agi_bucket(mp, agno, bucket);
3174 return NULLAGINO;
3175}
3176
3175/* 3177/*
3176 * xlog_iunlink_recover 3178 * xlog_iunlink_recover
3177 * 3179 *
@@ -3192,11 +3194,7 @@ xlog_recover_process_iunlinks(
3192 xfs_agnumber_t agno; 3194 xfs_agnumber_t agno;
3193 xfs_agi_t *agi; 3195 xfs_agi_t *agi;
3194 xfs_buf_t *agibp; 3196 xfs_buf_t *agibp;
3195 xfs_buf_t *ibp;
3196 xfs_dinode_t *dip;
3197 xfs_inode_t *ip;
3198 xfs_agino_t agino; 3197 xfs_agino_t agino;
3199 xfs_ino_t ino;
3200 int bucket; 3198 int bucket;
3201 int error; 3199 int error;
3202 uint mp_dmevmask; 3200 uint mp_dmevmask;
@@ -3213,22 +3211,21 @@ xlog_recover_process_iunlinks(
3213 /* 3211 /*
3214 * Find the agi for this ag. 3212 * Find the agi for this ag.
3215 */ 3213 */
3216 agibp = xfs_buf_read(mp->m_ddev_targp, 3214 error = xfs_read_agi(mp, NULL, agno, &agibp);
3217 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 3215 if (error) {
3218 XFS_FSS_TO_BB(mp, 1), 0); 3216 /*
3219 if (XFS_BUF_ISERROR(agibp)) { 3217 * AGI is b0rked. Don't process it.
3220 xfs_ioerror_alert("xlog_recover_process_iunlinks(#1)", 3218 *
3221 log->l_mp, agibp, 3219 * We should probably mark the filesystem as corrupt
3222 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp))); 3220 * after we've recovered all the ag's we can....
3221 */
3222 continue;
3223 } 3223 }
3224 agi = XFS_BUF_TO_AGI(agibp); 3224 agi = XFS_BUF_TO_AGI(agibp);
3225 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agi->agi_magicnum));
3226 3225
3227 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { 3226 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
3228
3229 agino = be32_to_cpu(agi->agi_unlinked[bucket]); 3227 agino = be32_to_cpu(agi->agi_unlinked[bucket]);
3230 while (agino != NULLAGINO) { 3228 while (agino != NULLAGINO) {
3231
3232 /* 3229 /*
3233 * Release the agi buffer so that it can 3230 * Release the agi buffer so that it can
3234 * be acquired in the normal course of the 3231 * be acquired in the normal course of the
@@ -3236,87 +3233,17 @@ xlog_recover_process_iunlinks(
3236 */ 3233 */
3237 xfs_buf_relse(agibp); 3234 xfs_buf_relse(agibp);
3238 3235
3239 ino = XFS_AGINO_TO_INO(mp, agno, agino); 3236 agino = xlog_recover_process_one_iunlink(mp,
3240 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); 3237 agno, agino, bucket);
3241 ASSERT(error || (ip != NULL));
3242
3243 if (!error) {
3244 /*
3245 * Get the on disk inode to find the
3246 * next inode in the bucket.
3247 */
3248 error = xfs_itobp(mp, NULL, ip, &dip,
3249 &ibp, 0, 0,
3250 XFS_BUF_LOCK);
3251 ASSERT(error || (dip != NULL));
3252 }
3253
3254 if (!error) {
3255 ASSERT(ip->i_d.di_nlink == 0);
3256
3257 /* setup for the next pass */
3258 agino = be32_to_cpu(
3259 dip->di_next_unlinked);
3260 xfs_buf_relse(ibp);
3261 /*
3262 * Prevent any DMAPI event from
3263 * being sent when the
3264 * reference on the inode is
3265 * dropped.
3266 */
3267 ip->i_d.di_dmevmask = 0;
3268
3269 /*
3270 * If this is a new inode, handle
3271 * it specially. Otherwise,
3272 * just drop our reference to the
3273 * inode. If there are no
3274 * other references, this will
3275 * send the inode to
3276 * xfs_inactive() which will
3277 * truncate the file and free
3278 * the inode.
3279 */
3280 if (ip->i_d.di_mode == 0)
3281 xfs_iput_new(ip, 0);
3282 else
3283 IRELE(ip);
3284 } else {
3285 /*
3286 * We can't read in the inode
3287 * this bucket points to, or
3288 * this inode is messed up. Just
3289 * ditch this bucket of inodes. We
3290 * will lose some inodes and space,
3291 * but at least we won't hang. Call
3292 * xlog_recover_clear_agi_bucket()
3293 * to perform a transaction to clear
3294 * the inode pointer in the bucket.
3295 */
3296 xlog_recover_clear_agi_bucket(mp, agno,
3297 bucket);
3298
3299 agino = NULLAGINO;
3300 }
3301 3238
3302 /* 3239 /*
3303 * Reacquire the agibuffer and continue around 3240 * Reacquire the agibuffer and continue around
3304 * the loop. 3241 * the loop. This should never fail as we know
3242 * the buffer was good earlier on.
3305 */ 3243 */
3306 agibp = xfs_buf_read(mp->m_ddev_targp, 3244 error = xfs_read_agi(mp, NULL, agno, &agibp);
3307 XFS_AG_DADDR(mp, agno, 3245 ASSERT(error == 0);
3308 XFS_AGI_DADDR(mp)),
3309 XFS_FSS_TO_BB(mp, 1), 0);
3310 if (XFS_BUF_ISERROR(agibp)) {
3311 xfs_ioerror_alert(
3312 "xlog_recover_process_iunlinks(#2)",
3313 log->l_mp, agibp,
3314 XFS_AG_DADDR(mp, agno,
3315 XFS_AGI_DADDR(mp)));
3316 }
3317 agi = XFS_BUF_TO_AGI(agibp); 3246 agi = XFS_BUF_TO_AGI(agibp);
3318 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(
3319 agi->agi_magicnum));
3320 } 3247 }
3321 } 3248 }
3322 3249
@@ -3367,7 +3294,6 @@ xlog_pack_data(
3367 int size = iclog->ic_offset + roundoff; 3294 int size = iclog->ic_offset + roundoff;
3368 __be32 cycle_lsn; 3295 __be32 cycle_lsn;
3369 xfs_caddr_t dp; 3296 xfs_caddr_t dp;
3370 xlog_in_core_2_t *xhdr;
3371 3297
3372 xlog_pack_data_checksum(log, iclog, size); 3298 xlog_pack_data_checksum(log, iclog, size);
3373 3299
@@ -3382,7 +3308,8 @@ xlog_pack_data(
3382 } 3308 }
3383 3309
3384 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3310 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3385 xhdr = (xlog_in_core_2_t *)&iclog->ic_header; 3311 xlog_in_core_2_t *xhdr = iclog->ic_data;
3312
3386 for ( ; i < BTOBB(size); i++) { 3313 for ( ; i < BTOBB(size); i++) {
3387 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3314 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3388 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3315 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3440,7 +3367,6 @@ xlog_unpack_data(
3440 xlog_t *log) 3367 xlog_t *log)
3441{ 3368{
3442 int i, j, k; 3369 int i, j, k;
3443 xlog_in_core_2_t *xhdr;
3444 3370
3445 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && 3371 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
3446 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 3372 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
@@ -3449,7 +3375,7 @@ xlog_unpack_data(
3449 } 3375 }
3450 3376
3451 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3377 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3452 xhdr = (xlog_in_core_2_t *)rhead; 3378 xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
3453 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { 3379 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
3454 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3380 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3455 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3381 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -4003,11 +3929,8 @@ xlog_recover_check_summary(
4003{ 3929{
4004 xfs_mount_t *mp; 3930 xfs_mount_t *mp;
4005 xfs_agf_t *agfp; 3931 xfs_agf_t *agfp;
4006 xfs_agi_t *agip;
4007 xfs_buf_t *agfbp; 3932 xfs_buf_t *agfbp;
4008 xfs_buf_t *agibp; 3933 xfs_buf_t *agibp;
4009 xfs_daddr_t agfdaddr;
4010 xfs_daddr_t agidaddr;
4011 xfs_buf_t *sbbp; 3934 xfs_buf_t *sbbp;
4012#ifdef XFS_LOUD_RECOVERY 3935#ifdef XFS_LOUD_RECOVERY
4013 xfs_sb_t *sbp; 3936 xfs_sb_t *sbp;
@@ -4016,6 +3939,7 @@ xlog_recover_check_summary(
4016 __uint64_t freeblks; 3939 __uint64_t freeblks;
4017 __uint64_t itotal; 3940 __uint64_t itotal;
4018 __uint64_t ifree; 3941 __uint64_t ifree;
3942 int error;
4019 3943
4020 mp = log->l_mp; 3944 mp = log->l_mp;
4021 3945
@@ -4023,37 +3947,27 @@ xlog_recover_check_summary(
4023 itotal = 0LL; 3947 itotal = 0LL;
4024 ifree = 0LL; 3948 ifree = 0LL;
4025 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3949 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
4026 agfdaddr = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)); 3950 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp);
4027 agfbp = xfs_buf_read(mp->m_ddev_targp, agfdaddr, 3951 if (error) {
4028 XFS_FSS_TO_BB(mp, 1), 0); 3952 xfs_fs_cmn_err(CE_ALERT, mp,
4029 if (XFS_BUF_ISERROR(agfbp)) { 3953 "xlog_recover_check_summary(agf)"
4030 xfs_ioerror_alert("xlog_recover_check_summary(agf)", 3954 "agf read failed agno %d error %d",
4031 mp, agfbp, agfdaddr); 3955 agno, error);
4032 } 3956 } else {
4033 agfp = XFS_BUF_TO_AGF(agfbp); 3957 agfp = XFS_BUF_TO_AGF(agfbp);
4034 ASSERT(XFS_AGF_MAGIC == be32_to_cpu(agfp->agf_magicnum)); 3958 freeblks += be32_to_cpu(agfp->agf_freeblks) +
4035 ASSERT(XFS_AGF_GOOD_VERSION(be32_to_cpu(agfp->agf_versionnum))); 3959 be32_to_cpu(agfp->agf_flcount);
4036 ASSERT(be32_to_cpu(agfp->agf_seqno) == agno); 3960 xfs_buf_relse(agfbp);
4037
4038 freeblks += be32_to_cpu(agfp->agf_freeblks) +
4039 be32_to_cpu(agfp->agf_flcount);
4040 xfs_buf_relse(agfbp);
4041
4042 agidaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
4043 agibp = xfs_buf_read(mp->m_ddev_targp, agidaddr,
4044 XFS_FSS_TO_BB(mp, 1), 0);
4045 if (XFS_BUF_ISERROR(agibp)) {
4046 xfs_ioerror_alert("xlog_recover_check_summary(agi)",
4047 mp, agibp, agidaddr);
4048 } 3961 }
4049 agip = XFS_BUF_TO_AGI(agibp);
4050 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agip->agi_magicnum));
4051 ASSERT(XFS_AGI_GOOD_VERSION(be32_to_cpu(agip->agi_versionnum)));
4052 ASSERT(be32_to_cpu(agip->agi_seqno) == agno);
4053 3962
4054 itotal += be32_to_cpu(agip->agi_count); 3963 error = xfs_read_agi(mp, NULL, agno, &agibp);
4055 ifree += be32_to_cpu(agip->agi_freecount); 3964 if (!error) {
4056 xfs_buf_relse(agibp); 3965 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp);
3966
3967 itotal += be32_to_cpu(agi->agi_count);
3968 ifree += be32_to_cpu(agi->agi_freecount);
3969 xfs_buf_relse(agibp);
3970 }
4057 } 3971 }
4058 3972
4059 sbbp = xfs_getsb(mp, 0); 3973 sbbp = xfs_getsb(mp, 0);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 15f5dd22fbb..3c97c6463a4 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -567,8 +567,6 @@ xfs_readsb(xfs_mount_t *mp, int flags)
567STATIC void 567STATIC void
568xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) 568xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
569{ 569{
570 int i;
571
572 mp->m_agfrotor = mp->m_agirotor = 0; 570 mp->m_agfrotor = mp->m_agirotor = 0;
573 spin_lock_init(&mp->m_agirotor_lock); 571 spin_lock_init(&mp->m_agirotor_lock);
574 mp->m_maxagi = mp->m_sb.sb_agcount; 572 mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -577,12 +575,10 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
577 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 575 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
578 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 576 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
579 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 577 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
580 mp->m_litino = sbp->sb_inodesize - 578 mp->m_litino = sbp->sb_inodesize - sizeof(struct xfs_dinode);
581 ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
582 mp->m_blockmask = sbp->sb_blocksize - 1; 579 mp->m_blockmask = sbp->sb_blocksize - 1;
583 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 580 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
584 mp->m_blockwmask = mp->m_blockwsize - 1; 581 mp->m_blockwmask = mp->m_blockwsize - 1;
585 INIT_LIST_HEAD(&mp->m_del_inodes);
586 582
587 /* 583 /*
588 * Setup for attributes, in case they get created. 584 * Setup for attributes, in case they get created.
@@ -605,24 +601,20 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
605 } 601 }
606 ASSERT(mp->m_attroffset < XFS_LITINO(mp)); 602 ASSERT(mp->m_attroffset < XFS_LITINO(mp));
607 603
608 for (i = 0; i < 2; i++) { 604 mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
609 mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 605 mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
610 xfs_alloc, i == 0); 606 mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
611 mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 607 mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
612 xfs_alloc, i == 0); 608
613 } 609 mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
614 for (i = 0; i < 2; i++) { 610 mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
615 mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 611 mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
616 xfs_bmbt, i == 0); 612 mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
617 mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 613
618 xfs_bmbt, i == 0); 614 mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
619 } 615 mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
620 for (i = 0; i < 2; i++) { 616 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
621 mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 617 mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
622 xfs_inobt, i == 0);
623 mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
624 xfs_inobt, i == 0);
625 }
626 618
627 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 619 mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
628 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 620 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
@@ -1228,6 +1220,16 @@ xfs_unmountfs(
1228 __uint64_t resblks; 1220 __uint64_t resblks;
1229 int error; 1221 int error;
1230 1222
1223 /*
1224 * Release dquot that rootinode, rbmino and rsumino might be holding,
1225 * and release the quota inodes.
1226 */
1227 XFS_QM_UNMOUNT(mp);
1228
1229 if (mp->m_rbmip)
1230 IRELE(mp->m_rbmip);
1231 if (mp->m_rsumip)
1232 IRELE(mp->m_rsumip);
1231 IRELE(mp->m_rootip); 1233 IRELE(mp->m_rootip);
1232 1234
1233 /* 1235 /*
@@ -1241,7 +1243,7 @@ xfs_unmountfs(
1241 * need to force the log first. 1243 * need to force the log first.
1242 */ 1244 */
1243 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1245 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
1244 xfs_iflush_all(mp); 1246 xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
1245 1247
1246 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1248 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
1247 1249
@@ -1288,11 +1290,6 @@ xfs_unmountfs(
1288 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1290 xfs_unmountfs_wait(mp); /* wait for async bufs */
1289 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1291 xfs_log_unmount(mp); /* Done! No more fs ops. */
1290 1292
1291 /*
1292 * All inodes from this mount point should be freed.
1293 */
1294 ASSERT(mp->m_inodes == NULL);
1295
1296 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1293 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
1297 uuid_table_remove(&mp->m_sb.sb_uuid); 1294 uuid_table_remove(&mp->m_sb.sb_uuid);
1298 1295
@@ -1365,24 +1362,6 @@ xfs_log_sbcount(
1365 return error; 1362 return error;
1366} 1363}
1367 1364
1368STATIC void
1369xfs_mark_shared_ro(
1370 xfs_mount_t *mp,
1371 xfs_buf_t *bp)
1372{
1373 xfs_dsb_t *sb = XFS_BUF_TO_SBP(bp);
1374 __uint16_t version;
1375
1376 if (!(sb->sb_flags & XFS_SBF_READONLY))
1377 sb->sb_flags |= XFS_SBF_READONLY;
1378
1379 version = be16_to_cpu(sb->sb_versionnum);
1380 if ((version & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4 ||
1381 !(version & XFS_SB_VERSION_SHAREDBIT))
1382 version |= XFS_SB_VERSION_SHAREDBIT;
1383 sb->sb_versionnum = cpu_to_be16(version);
1384}
1385
1386int 1365int
1387xfs_unmountfs_writesb(xfs_mount_t *mp) 1366xfs_unmountfs_writesb(xfs_mount_t *mp)
1388{ 1367{
@@ -1398,12 +1377,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1398 1377
1399 sbp = xfs_getsb(mp, 0); 1378 sbp = xfs_getsb(mp, 0);
1400 1379
1401 /*
1402 * mark shared-readonly if desired
1403 */
1404 if (mp->m_mk_sharedro)
1405 xfs_mark_shared_ro(mp, sbp);
1406
1407 XFS_BUF_UNDONE(sbp); 1380 XFS_BUF_UNDONE(sbp);
1408 XFS_BUF_UNREAD(sbp); 1381 XFS_BUF_UNREAD(sbp);
1409 XFS_BUF_UNDELAYWRITE(sbp); 1382 XFS_BUF_UNDELAYWRITE(sbp);
@@ -1415,8 +1388,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1415 if (error) 1388 if (error)
1416 xfs_ioerror_alert("xfs_unmountfs_writesb", 1389 xfs_ioerror_alert("xfs_unmountfs_writesb",
1417 mp, sbp, XFS_BUF_ADDR(sbp)); 1390 mp, sbp, XFS_BUF_ADDR(sbp));
1418 if (error && mp->m_mk_sharedro)
1419 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
1420 xfs_buf_relse(sbp); 1391 xfs_buf_relse(sbp);
1421 } 1392 }
1422 return error; 1393 return error;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f3c1024b124..c1e02846732 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,7 +18,6 @@
18#ifndef __XFS_MOUNT_H__ 18#ifndef __XFS_MOUNT_H__
19#define __XFS_MOUNT_H__ 19#define __XFS_MOUNT_H__
20 20
21
22typedef struct xfs_trans_reservations { 21typedef struct xfs_trans_reservations {
23 uint tr_write; /* extent alloc trans */ 22 uint tr_write; /* extent alloc trans */
24 uint tr_itruncate; /* truncate trans */ 23 uint tr_itruncate; /* truncate trans */
@@ -44,14 +43,16 @@ typedef struct xfs_trans_reservations {
44} xfs_trans_reservations_t; 43} xfs_trans_reservations_t;
45 44
46#ifndef __KERNEL__ 45#ifndef __KERNEL__
47/* 46
48 * Moved here from xfs_ag.h to avoid reordering header files
49 */
50#define XFS_DADDR_TO_AGNO(mp,d) \ 47#define XFS_DADDR_TO_AGNO(mp,d) \
51 ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) 48 ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks))
52#define XFS_DADDR_TO_AGBNO(mp,d) \ 49#define XFS_DADDR_TO_AGBNO(mp,d) \
53 ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks)) 50 ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks))
54#else 51
52#else /* __KERNEL__ */
53
54#include "xfs_sync.h"
55
55struct cred; 56struct cred;
56struct log; 57struct log;
57struct xfs_mount_args; 58struct xfs_mount_args;
@@ -62,6 +63,7 @@ struct xfs_extdelta;
62struct xfs_swapext; 63struct xfs_swapext;
63struct xfs_mru_cache; 64struct xfs_mru_cache;
64struct xfs_nameops; 65struct xfs_nameops;
66struct xfs_ail;
65 67
66/* 68/*
67 * Prototypes and functions for the Data Migration subsystem. 69 * Prototypes and functions for the Data Migration subsystem.
@@ -115,7 +117,7 @@ struct xfs_quotainfo;
115 117
116typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); 118typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
117typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint); 119typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
118typedef int (*xfs_qmunmount_t)(struct xfs_mount *); 120typedef void (*xfs_qmunmount_t)(struct xfs_mount *);
119typedef void (*xfs_qmdone_t)(struct xfs_mount *); 121typedef void (*xfs_qmdone_t)(struct xfs_mount *);
120typedef void (*xfs_dqrele_t)(struct xfs_dquot *); 122typedef void (*xfs_dqrele_t)(struct xfs_dquot *);
121typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint); 123typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint);
@@ -132,7 +134,7 @@ typedef struct xfs_dquot * (*xfs_dqvopchown_t)(
132 struct xfs_dquot **, struct xfs_dquot *); 134 struct xfs_dquot **, struct xfs_dquot *);
133typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *, 135typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
134 struct xfs_dquot *, struct xfs_dquot *, uint); 136 struct xfs_dquot *, struct xfs_dquot *, uint);
135typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, bhv_statvfs_t *); 137typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
136typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags); 138typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags);
137typedef int (*xfs_quotactl_t)(struct xfs_mount *, int, int, xfs_caddr_t); 139typedef int (*xfs_quotactl_t)(struct xfs_mount *, int, int, xfs_caddr_t);
138 140
@@ -223,18 +225,10 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
223#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) 225#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
224#endif 226#endif
225 227
226typedef struct xfs_ail {
227 struct list_head xa_ail;
228 uint xa_gen;
229 struct task_struct *xa_task;
230 xfs_lsn_t xa_target;
231} xfs_ail_t;
232
233typedef struct xfs_mount { 228typedef struct xfs_mount {
234 struct super_block *m_super; 229 struct super_block *m_super;
235 xfs_tid_t m_tid; /* next unused tid for fs */ 230 xfs_tid_t m_tid; /* next unused tid for fs */
236 spinlock_t m_ail_lock; /* fs AIL mutex */ 231 struct xfs_ail *m_ail; /* fs active log item list */
237 xfs_ail_t m_ail; /* fs active log item list */
238 xfs_sb_t m_sb; /* copy of fs superblock */ 232 xfs_sb_t m_sb; /* copy of fs superblock */
239 spinlock_t m_sb_lock; /* sb counter lock */ 233 spinlock_t m_sb_lock; /* sb counter lock */
240 struct xfs_buf *m_sb_bp; /* buffer for superblock */ 234 struct xfs_buf *m_sb_bp; /* buffer for superblock */
@@ -247,10 +241,6 @@ typedef struct xfs_mount {
247 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ 241 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */
248 spinlock_t m_agirotor_lock;/* .. and lock protecting it */ 242 spinlock_t m_agirotor_lock;/* .. and lock protecting it */
249 xfs_agnumber_t m_maxagi; /* highest inode alloc group */ 243 xfs_agnumber_t m_maxagi; /* highest inode alloc group */
250 struct xfs_inode *m_inodes; /* active inode list */
251 struct list_head m_del_inodes; /* inodes to reclaim */
252 mutex_t m_ilock; /* inode list mutex */
253 uint m_ireclaims; /* count of calls to reclaim*/
254 uint m_readio_log; /* min read size log bytes */ 244 uint m_readio_log; /* min read size log bytes */
255 uint m_readio_blocks; /* min read size blocks */ 245 uint m_readio_blocks; /* min read size blocks */
256 uint m_writeio_log; /* min write size log bytes */ 246 uint m_writeio_log; /* min write size log bytes */
@@ -267,7 +257,6 @@ typedef struct xfs_mount {
267 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ 257 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */
268 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ 258 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */
269 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ 259 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */
270 __uint8_t m_dircook_elog; /* log d-cookie entry bits */
271 __uint8_t m_blkbit_log; /* blocklog + NBBY */ 260 __uint8_t m_blkbit_log; /* blocklog + NBBY */
272 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ 261 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
273 __uint8_t m_agno_log; /* log #ag's */ 262 __uint8_t m_agno_log; /* log #ag's */
@@ -276,12 +265,12 @@ typedef struct xfs_mount {
276 uint m_blockmask; /* sb_blocksize-1 */ 265 uint m_blockmask; /* sb_blocksize-1 */
277 uint m_blockwsize; /* sb_blocksize in words */ 266 uint m_blockwsize; /* sb_blocksize in words */
278 uint m_blockwmask; /* blockwsize-1 */ 267 uint m_blockwmask; /* blockwsize-1 */
279 uint m_alloc_mxr[2]; /* XFS_ALLOC_BLOCK_MAXRECS */ 268 uint m_alloc_mxr[2]; /* max alloc btree records */
280 uint m_alloc_mnr[2]; /* XFS_ALLOC_BLOCK_MINRECS */ 269 uint m_alloc_mnr[2]; /* min alloc btree records */
281 uint m_bmap_dmxr[2]; /* XFS_BMAP_BLOCK_DMAXRECS */ 270 uint m_bmap_dmxr[2]; /* max bmap btree records */
282 uint m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */ 271 uint m_bmap_dmnr[2]; /* min bmap btree records */
283 uint m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */ 272 uint m_inobt_mxr[2]; /* max inobt btree records */
284 uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */ 273 uint m_inobt_mnr[2]; /* min inobt btree records */
285 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 274 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
286 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 275 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
287 uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */ 276 uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */
@@ -312,9 +301,6 @@ typedef struct xfs_mount {
312 int m_sinoalign; /* stripe unit inode alignment */ 301 int m_sinoalign; /* stripe unit inode alignment */
313 int m_attr_magicpct;/* 37% of the blocksize */ 302 int m_attr_magicpct;/* 37% of the blocksize */
314 int m_dir_magicpct; /* 37% of the dir blocksize */ 303 int m_dir_magicpct; /* 37% of the dir blocksize */
315 __uint8_t m_mk_sharedro; /* mark shared ro on unmount */
316 __uint8_t m_inode_quiesce;/* call quiesce on new inodes.
317 field governed by m_ilock */
318 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 304 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
319 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ 305 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
320 int m_dirblksize; /* directory block sz--bytes */ 306 int m_dirblksize; /* directory block sz--bytes */
@@ -362,7 +348,6 @@ typedef struct xfs_mount {
362#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ 348#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */
363#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */ 349#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */
364#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ 350#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */
365#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */
366#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */ 351#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */
367#define XFS_MOUNT_OSYNCISOSYNC (1ULL << 13) /* o_sync is REALLY o_sync */ 352#define XFS_MOUNT_OSYNCISOSYNC (1ULL << 13) /* o_sync is REALLY o_sync */
368 /* osyncisdsync is now default*/ 353 /* osyncisdsync is now default*/
@@ -439,6 +424,16 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
439#define xfs_force_shutdown(m,f) \ 424#define xfs_force_shutdown(m,f) \
440 xfs_do_force_shutdown(m, f, __FILE__, __LINE__) 425 xfs_do_force_shutdown(m, f, __FILE__, __LINE__)
441 426
427#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
428#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
429#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */
430#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */
431#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */
432#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */
433
434#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen)
435#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l))
436
442/* 437/*
443 * Flags for xfs_mountfs 438 * Flags for xfs_mountfs
444 */ 439 */
@@ -508,14 +503,12 @@ typedef struct xfs_mod_sb {
508#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) 503#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
509#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) 504#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
510 505
511extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
512extern int xfs_log_sbcount(xfs_mount_t *, uint); 506extern int xfs_log_sbcount(xfs_mount_t *, uint);
513extern int xfs_mountfs(xfs_mount_t *mp); 507extern int xfs_mountfs(xfs_mount_t *mp);
514extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 508extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
515 509
516extern void xfs_unmountfs(xfs_mount_t *); 510extern void xfs_unmountfs(xfs_mount_t *);
517extern int xfs_unmountfs_writesb(xfs_mount_t *); 511extern int xfs_unmountfs_writesb(xfs_mount_t *);
518extern int xfs_unmount_flush(xfs_mount_t *, int);
519extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 512extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
520extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, 513extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
521 int64_t, int); 514 int64_t, int);
@@ -525,20 +518,20 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
525extern int xfs_readsb(xfs_mount_t *, int); 518extern int xfs_readsb(xfs_mount_t *, int);
526extern void xfs_freesb(xfs_mount_t *); 519extern void xfs_freesb(xfs_mount_t *);
527extern int xfs_fs_writable(xfs_mount_t *); 520extern int xfs_fs_writable(xfs_mount_t *);
528extern int xfs_syncsub(xfs_mount_t *, int, int *);
529extern int xfs_sync_inodes(xfs_mount_t *, int, int *);
530extern xfs_agnumber_t xfs_initialize_perag(xfs_mount_t *, xfs_agnumber_t);
531extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
532extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
533extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); 521extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
534 522
535extern int xfs_dmops_get(struct xfs_mount *, struct xfs_mount_args *); 523extern int xfs_dmops_get(struct xfs_mount *);
536extern void xfs_dmops_put(struct xfs_mount *); 524extern void xfs_dmops_put(struct xfs_mount *);
537extern int xfs_qmops_get(struct xfs_mount *, struct xfs_mount_args *); 525extern int xfs_qmops_get(struct xfs_mount *);
538extern void xfs_qmops_put(struct xfs_mount *); 526extern void xfs_qmops_put(struct xfs_mount *);
539 527
540extern struct xfs_dmops xfs_dmcore_xfs; 528extern struct xfs_dmops xfs_dmcore_xfs;
541 529
542#endif /* __KERNEL__ */ 530#endif /* __KERNEL__ */
543 531
532extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
533extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t);
534extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
535extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
536
544#endif /* __XFS_MOUNT_H__ */ 537#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index a294e58db8d..27f80581520 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -28,7 +28,6 @@
28#include "xfs_mount.h" 28#include "xfs_mount.h"
29#include "xfs_quota.h" 29#include "xfs_quota.h"
30#include "xfs_error.h" 30#include "xfs_error.h"
31#include "xfs_clnt.h"
32 31
33 32
34STATIC struct xfs_dquot * 33STATIC struct xfs_dquot *
@@ -131,9 +130,9 @@ static struct xfs_qmops xfs_qmcore_stub = {
131}; 130};
132 131
133int 132int
134xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) 133xfs_qmops_get(struct xfs_mount *mp)
135{ 134{
136 if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) { 135 if (XFS_IS_QUOTA_RUNNING(mp)) {
137#ifdef CONFIG_XFS_QUOTA 136#ifdef CONFIG_XFS_QUOTA
138 mp->m_qm_ops = &xfs_qmcore_xfs; 137 mp->m_qm_ops = &xfs_qmcore_xfs;
139#else 138#else
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 12c4ec775af..48965ecaa15 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -84,11 +84,9 @@ typedef struct xfs_dqblk {
84#define XFS_DQ_USER 0x0001 /* a user quota */ 84#define XFS_DQ_USER 0x0001 /* a user quota */
85#define XFS_DQ_PROJ 0x0002 /* project quota */ 85#define XFS_DQ_PROJ 0x0002 /* project quota */
86#define XFS_DQ_GROUP 0x0004 /* a group quota */ 86#define XFS_DQ_GROUP 0x0004 /* a group quota */
87#define XFS_DQ_FLOCKED 0x0008 /* flush lock taken */ 87#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
88#define XFS_DQ_DIRTY 0x0010 /* dquot is dirty */ 88#define XFS_DQ_WANT 0x0010 /* for lookup/reclaim race */
89#define XFS_DQ_WANT 0x0020 /* for lookup/reclaim race */ 89#define XFS_DQ_INACTIVE 0x0020 /* dq off mplist & hashlist */
90#define XFS_DQ_INACTIVE 0x0040 /* dq off mplist & hashlist */
91#define XFS_DQ_MARKER 0x0080 /* sentinel */
92 90
93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) 91#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
94 92
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index c903130be7f..86471bb40fd 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -42,31 +42,6 @@
42 42
43 43
44/* 44/*
45 * Given an array of up to 4 inode pointers, unlock the pointed to inodes.
46 * If there are fewer than 4 entries in the array, the empty entries will
47 * be at the end and will have NULL pointers in them.
48 */
49STATIC void
50xfs_rename_unlock4(
51 xfs_inode_t **i_tab,
52 uint lock_mode)
53{
54 int i;
55
56 xfs_iunlock(i_tab[0], lock_mode);
57 for (i = 1; i < 4; i++) {
58 if (i_tab[i] == NULL)
59 break;
60
61 /*
62 * Watch out for duplicate entries in the table.
63 */
64 if (i_tab[i] != i_tab[i-1])
65 xfs_iunlock(i_tab[i], lock_mode);
66 }
67}
68
69/*
70 * Enter all inodes for a rename transaction into a sorted array. 45 * Enter all inodes for a rename transaction into a sorted array.
71 */ 46 */
72STATIC void 47STATIC void
@@ -205,19 +180,6 @@ xfs_rename(
205 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); 180 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
206 181
207 /* 182 /*
208 * If we are using project inheritance, we only allow renames
209 * into our tree when the project IDs are the same; else the
210 * tree quota mechanism would be circumvented.
211 */
212 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
213 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
214 error = XFS_ERROR(EXDEV);
215 xfs_rename_unlock4(inodes, XFS_ILOCK_EXCL);
216 xfs_trans_cancel(tp, cancel_flags);
217 goto std_return;
218 }
219
220 /*
221 * Join all the inodes to the transaction. From this point on, 183 * Join all the inodes to the transaction. From this point on,
222 * we can rely on either trans_commit or trans_cancel to unlock 184 * we can rely on either trans_commit or trans_cancel to unlock
223 * them. Note that we need to add a vnode reference to the 185 * them. Note that we need to add a vnode reference to the
@@ -242,6 +204,17 @@ xfs_rename(
242 } 204 }
243 205
244 /* 206 /*
207 * If we are using project inheritance, we only allow renames
208 * into our tree when the project IDs are the same; else the
209 * tree quota mechanism would be circumvented.
210 */
211 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
212 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
213 error = XFS_ERROR(EXDEV);
214 goto error_return;
215 }
216
217 /*
245 * Set up the target. 218 * Set up the target.
246 */ 219 */
247 if (target_ip == NULL) { 220 if (target_ip == NULL) {
@@ -367,19 +340,11 @@ xfs_rename(
367 &first_block, &free_list, spaceres); 340 &first_block, &free_list, spaceres);
368 if (error) 341 if (error)
369 goto abort_return; 342 goto abort_return;
370 xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
371 343
372 /* 344 xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
373 * Update the generation counts on all the directory inodes
374 * that we're modifying.
375 */
376 src_dp->i_gen++;
377 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); 345 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
378 346 if (new_parent)
379 if (new_parent) {
380 target_dp->i_gen++;
381 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 347 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
382 }
383 348
384 /* 349 /*
385 * If this is a synchronous mount, make sure that the 350 * If this is a synchronous mount, make sure that the
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index e2f68de1615..edf12c7b834 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -85,7 +85,6 @@ xfs_growfs_rt_alloc(
85{ 85{
86 xfs_fileoff_t bno; /* block number in file */ 86 xfs_fileoff_t bno; /* block number in file */
87 xfs_buf_t *bp; /* temporary buffer for zeroing */ 87 xfs_buf_t *bp; /* temporary buffer for zeroing */
88 int cancelflags; /* flags for xfs_trans_cancel */
89 int committed; /* transaction committed flag */ 88 int committed; /* transaction committed flag */
90 xfs_daddr_t d; /* disk block address */ 89 xfs_daddr_t d; /* disk block address */
91 int error; /* error return value */ 90 int error; /* error return value */
@@ -96,15 +95,16 @@ xfs_growfs_rt_alloc(
96 xfs_bmbt_irec_t map; /* block map output */ 95 xfs_bmbt_irec_t map; /* block map output */
97 int nmap; /* number of block maps */ 96 int nmap; /* number of block maps */
98 int resblks; /* space reservation */ 97 int resblks; /* space reservation */
99 xfs_trans_t *tp; /* transaction pointer */
100 98
101 /* 99 /*
102 * Allocate space to the file, as necessary. 100 * Allocate space to the file, as necessary.
103 */ 101 */
104 while (oblocks < nblocks) { 102 while (oblocks < nblocks) {
103 int cancelflags = 0;
104 xfs_trans_t *tp;
105
105 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); 106 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
106 resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); 107 resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);
107 cancelflags = 0;
108 /* 108 /*
109 * Reserve space & log for one extent added to the file. 109 * Reserve space & log for one extent added to the file.
110 */ 110 */
@@ -171,7 +171,9 @@ xfs_growfs_rt_alloc(
171 mp->m_bsize, 0); 171 mp->m_bsize, 0);
172 if (bp == NULL) { 172 if (bp == NULL) {
173 error = XFS_ERROR(EIO); 173 error = XFS_ERROR(EIO);
174 goto error_cancel; 174error_cancel:
175 xfs_trans_cancel(tp, cancelflags);
176 goto error;
175 } 177 }
176 memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); 178 memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize);
177 xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); 179 xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
@@ -188,8 +190,6 @@ xfs_growfs_rt_alloc(
188 oblocks = map.br_startoff + map.br_blockcount; 190 oblocks = map.br_startoff + map.br_blockcount;
189 } 191 }
190 return 0; 192 return 0;
191error_cancel:
192 xfs_trans_cancel(tp, cancelflags);
193error: 193error:
194 return error; 194 return error;
195} 195}
@@ -1856,7 +1856,6 @@ xfs_growfs_rt(
1856{ 1856{
1857 xfs_rtblock_t bmbno; /* bitmap block number */ 1857 xfs_rtblock_t bmbno; /* bitmap block number */
1858 xfs_buf_t *bp; /* temporary buffer */ 1858 xfs_buf_t *bp; /* temporary buffer */
1859 int cancelflags; /* flags for xfs_trans_cancel */
1860 int error; /* error return value */ 1859 int error; /* error return value */
1861 xfs_inode_t *ip; /* bitmap inode, used as lock */ 1860 xfs_inode_t *ip; /* bitmap inode, used as lock */
1862 xfs_mount_t *nmp; /* new (fake) mount structure */ 1861 xfs_mount_t *nmp; /* new (fake) mount structure */
@@ -1872,13 +1871,13 @@ xfs_growfs_rt(
1872 xfs_extlen_t rsumblocks; /* current number of rt summary blks */ 1871 xfs_extlen_t rsumblocks; /* current number of rt summary blks */
1873 xfs_sb_t *sbp; /* old superblock */ 1872 xfs_sb_t *sbp; /* old superblock */
1874 xfs_fsblock_t sumbno; /* summary block number */ 1873 xfs_fsblock_t sumbno; /* summary block number */
1875 xfs_trans_t *tp; /* transaction pointer */
1876 1874
1877 sbp = &mp->m_sb; 1875 sbp = &mp->m_sb;
1878 cancelflags = 0;
1879 /* 1876 /*
1880 * Initial error checking. 1877 * Initial error checking.
1881 */ 1878 */
1879 if (!capable(CAP_SYS_ADMIN))
1880 return XFS_ERROR(EPERM);
1882 if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL || 1881 if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
1883 (nrblocks = in->newblocks) <= sbp->sb_rblocks || 1882 (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
1884 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) 1883 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
@@ -1942,6 +1941,9 @@ xfs_growfs_rt(
1942 ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); 1941 ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
1943 bmbno < nrbmblocks; 1942 bmbno < nrbmblocks;
1944 bmbno++) { 1943 bmbno++) {
1944 xfs_trans_t *tp;
1945 int cancelflags = 0;
1946
1945 *nmp = *mp; 1947 *nmp = *mp;
1946 nsbp = &nmp->m_sb; 1948 nsbp = &nmp->m_sb;
1947 /* 1949 /*
@@ -1967,16 +1969,15 @@ xfs_growfs_rt(
1967 * Start a transaction, get the log reservation. 1969 * Start a transaction, get the log reservation.
1968 */ 1970 */
1969 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE); 1971 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE);
1970 cancelflags = 0;
1971 if ((error = xfs_trans_reserve(tp, 0, 1972 if ((error = xfs_trans_reserve(tp, 0,
1972 XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0))) 1973 XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0)))
1973 break; 1974 goto error_cancel;
1974 /* 1975 /*
1975 * Lock out other callers by grabbing the bitmap inode lock. 1976 * Lock out other callers by grabbing the bitmap inode lock.
1976 */ 1977 */
1977 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 1978 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
1978 XFS_ILOCK_EXCL, &ip))) 1979 XFS_ILOCK_EXCL, &ip)))
1979 break; 1980 goto error_cancel;
1980 ASSERT(ip == mp->m_rbmip); 1981 ASSERT(ip == mp->m_rbmip);
1981 /* 1982 /*
1982 * Update the bitmap inode's size. 1983 * Update the bitmap inode's size.
@@ -1990,7 +1991,7 @@ xfs_growfs_rt(
1990 */ 1991 */
1991 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 1992 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0,
1992 XFS_ILOCK_EXCL, &ip))) 1993 XFS_ILOCK_EXCL, &ip)))
1993 break; 1994 goto error_cancel;
1994 ASSERT(ip == mp->m_rsumip); 1995 ASSERT(ip == mp->m_rsumip);
1995 /* 1996 /*
1996 * Update the summary inode's size. 1997 * Update the summary inode's size.
@@ -2005,7 +2006,7 @@ xfs_growfs_rt(
2005 mp->m_rsumlevels != nmp->m_rsumlevels) { 2006 mp->m_rsumlevels != nmp->m_rsumlevels) {
2006 error = xfs_rtcopy_summary(mp, nmp, tp); 2007 error = xfs_rtcopy_summary(mp, nmp, tp);
2007 if (error) 2008 if (error)
2008 break; 2009 goto error_cancel;
2009 } 2010 }
2010 /* 2011 /*
2011 * Update superblock fields. 2012 * Update superblock fields.
@@ -2031,8 +2032,11 @@ xfs_growfs_rt(
2031 bp = NULL; 2032 bp = NULL;
2032 error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents, 2033 error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents,
2033 nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno); 2034 nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
2034 if (error) 2035 if (error) {
2036error_cancel:
2037 xfs_trans_cancel(tp, cancelflags);
2035 break; 2038 break;
2039 }
2036 /* 2040 /*
2037 * Mark more blocks free in the superblock. 2041 * Mark more blocks free in the superblock.
2038 */ 2042 */
@@ -2045,15 +2049,10 @@ xfs_growfs_rt(
2045 mp->m_rsumsize = nrsumsize; 2049 mp->m_rsumsize = nrsumsize;
2046 2050
2047 error = xfs_trans_commit(tp, 0); 2051 error = xfs_trans_commit(tp, 0);
2048 if (error) { 2052 if (error)
2049 tp = NULL;
2050 break; 2053 break;
2051 }
2052 } 2054 }
2053 2055
2054 if (error && tp)
2055 xfs_trans_cancel(tp, cancelflags);
2056
2057 /* 2056 /*
2058 * Free the fake mp structure. 2057 * Free the fake mp structure.
2059 */ 2058 */
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 3a82576dde9..36f3a21c54d 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -406,7 +406,7 @@ xfs_bwrite(
406 * XXXsup how does this work for quotas. 406 * XXXsup how does this work for quotas.
407 */ 407 */
408 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); 408 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
409 XFS_BUF_SET_FSPRIVATE3(bp, mp); 409 bp->b_mount = mp;
410 XFS_BUF_WRITE(bp); 410 XFS_BUF_WRITE(bp);
411 411
412 if ((error = XFS_bwrite(bp))) { 412 if ((error = XFS_bwrite(bp))) {
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 3f8cf1587f4..1ed71916e4c 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -79,6 +79,7 @@ struct xfs_mount;
79#define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */ 79#define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */
80#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 80#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004
81#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ 81#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */
82#define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */
82 83
83#define XFS_SB_VERSION2_OKREALFBITS \ 84#define XFS_SB_VERSION2_OKREALFBITS \
84 (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ 85 (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
@@ -296,30 +297,34 @@ typedef enum {
296 297
297#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) 298#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
298 299
299#ifdef __KERNEL__
300static inline int xfs_sb_good_version(xfs_sb_t *sbp) 300static inline int xfs_sb_good_version(xfs_sb_t *sbp)
301{ 301{
302 return (((sbp->sb_versionnum >= XFS_SB_VERSION_1) && \ 302 /* We always support version 1-3 */
303 (sbp->sb_versionnum <= XFS_SB_VERSION_3)) || \ 303 if (sbp->sb_versionnum >= XFS_SB_VERSION_1 &&
304 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 304 sbp->sb_versionnum <= XFS_SB_VERSION_3)
305 !((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \ 305 return 1;
306 ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \ 306
307 (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) && \ 307 /* We support version 4 if all feature bits are supported */
308 (sbp->sb_shared_vn <= XFS_SB_MAX_SHARED_VN))); 308 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) {
309} 309 if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) ||
310 ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
311 (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS)))
312 return 0;
313
314#ifdef __KERNEL__
315 if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
316 return 0;
310#else 317#else
311static inline int xfs_sb_good_version(xfs_sb_t *sbp) 318 if ((sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) &&
312{ 319 sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
313 return (((sbp->sb_versionnum >= XFS_SB_VERSION_1) && \ 320 return 0;
314 (sbp->sb_versionnum <= XFS_SB_VERSION_3)) || \ 321#endif
315 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 322
316 !((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \ 323 return 1;
317 ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \ 324 }
318 (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) && \ 325
319 (!(sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) || \ 326 return 0;
320 (sbp->sb_shared_vn <= XFS_SB_MAX_SHARED_VN))));
321} 327}
322#endif /* __KERNEL__ */
323 328
324/* 329/*
325 * Detect a mismatched features2 field. Older kernels read/wrote 330 * Detect a mismatched features2 field. Older kernels read/wrote
@@ -332,123 +337,127 @@ static inline int xfs_sb_has_mismatched_features2(xfs_sb_t *sbp)
332 337
333static inline unsigned xfs_sb_version_tonew(unsigned v) 338static inline unsigned xfs_sb_version_tonew(unsigned v)
334{ 339{
335 return ((((v) == XFS_SB_VERSION_1) ? \ 340 if (v == XFS_SB_VERSION_1)
336 0 : \ 341 return XFS_SB_VERSION_4;
337 (((v) == XFS_SB_VERSION_2) ? \ 342
338 XFS_SB_VERSION_ATTRBIT : \ 343 if (v == XFS_SB_VERSION_2)
339 (XFS_SB_VERSION_ATTRBIT | XFS_SB_VERSION_NLINKBIT))) | \ 344 return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
340 XFS_SB_VERSION_4); 345
346 return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT |
347 XFS_SB_VERSION_NLINKBIT;
341} 348}
342 349
343static inline unsigned xfs_sb_version_toold(unsigned v) 350static inline unsigned xfs_sb_version_toold(unsigned v)
344{ 351{
345 return (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \ 352 if (v & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT))
346 0 : \ 353 return 0;
347 (((v) & XFS_SB_VERSION_NLINKBIT) ? \ 354 if (v & XFS_SB_VERSION_NLINKBIT)
348 XFS_SB_VERSION_3 : \ 355 return XFS_SB_VERSION_3;
349 (((v) & XFS_SB_VERSION_ATTRBIT) ? \ 356 if (v & XFS_SB_VERSION_ATTRBIT)
350 XFS_SB_VERSION_2 : \ 357 return XFS_SB_VERSION_2;
351 XFS_SB_VERSION_1))); 358 return XFS_SB_VERSION_1;
352} 359}
353 360
354static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp) 361static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
355{ 362{
356 return ((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \ 363 return sbp->sb_versionnum == XFS_SB_VERSION_2 ||
357 ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ 364 sbp->sb_versionnum == XFS_SB_VERSION_3 ||
358 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 365 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
359 ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); 366 (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
360} 367}
361 368
362static inline void xfs_sb_version_addattr(xfs_sb_t *sbp) 369static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
363{ 370{
364 (sbp)->sb_versionnum = (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \ 371 if (sbp->sb_versionnum == XFS_SB_VERSION_1)
365 XFS_SB_VERSION_2 : \ 372 sbp->sb_versionnum = XFS_SB_VERSION_2;
366 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) ? \ 373 else if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4)
367 ((sbp)->sb_versionnum | XFS_SB_VERSION_ATTRBIT) : \ 374 sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
368 (XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT))); 375 else
376 sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
369} 377}
370 378
371static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp) 379static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
372{ 380{
373 return ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ 381 return sbp->sb_versionnum == XFS_SB_VERSION_3 ||
374 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 382 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
375 ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); 383 (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
376} 384}
377 385
378static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp) 386static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
379{ 387{
380 (sbp)->sb_versionnum = ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \ 388 if (sbp->sb_versionnum <= XFS_SB_VERSION_2)
381 XFS_SB_VERSION_3 : \ 389 sbp->sb_versionnum = XFS_SB_VERSION_3;
382 ((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT)); 390 else
391 sbp->sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
383} 392}
384 393
385static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp) 394static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
386{ 395{
387 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 396 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
388 ((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT); 397 (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
389} 398}
390 399
391static inline void xfs_sb_version_addquota(xfs_sb_t *sbp) 400static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
392{ 401{
393 (sbp)->sb_versionnum = \ 402 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4)
394 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \ 403 sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
395 ((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \ 404 else
396 (xfs_sb_version_tonew((sbp)->sb_versionnum) | \ 405 sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) |
397 XFS_SB_VERSION_QUOTABIT)); 406 XFS_SB_VERSION_QUOTABIT;
398} 407}
399 408
400static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp) 409static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
401{ 410{
402 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 411 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
403 ((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT); 412 (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT);
404} 413}
405 414
406static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp) 415static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
407{ 416{
408 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 417 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
409 ((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); 418 (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
410} 419}
411 420
412static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp) 421static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
413{ 422{
414 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 423 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
415 ((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT); 424 (sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT);
416} 425}
417 426
418static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp) 427static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
419{ 428{
420 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 429 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
421 ((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT); 430 (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
422} 431}
423 432
424static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp) 433static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
425{ 434{
426 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 435 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
427 ((sbp)->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); 436 (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
428} 437}
429 438
430static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp) 439static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
431{ 440{
432 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 441 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
433 ((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); 442 (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
434} 443}
435 444
436static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) 445static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
437{ 446{
438 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 447 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
439 ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT); 448 (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
440} 449}
441 450
442static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp) 451static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
443{ 452{
444 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 453 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
445 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); 454 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
446} 455}
447 456
448static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) 457static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
449{ 458{
450 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 459 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
451 ((sbp)->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); 460 (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
452} 461}
453 462
454/* 463/*
@@ -463,22 +472,20 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
463 472
464static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) 473static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
465{ 474{
466 return (xfs_sb_version_hasmorebits(sbp) && \ 475 return xfs_sb_version_hasmorebits(sbp) &&
467 ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); 476 (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT);
468} 477}
469 478
470static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) 479static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
471{ 480{
472 return (xfs_sb_version_hasmorebits(sbp)) && \ 481 return xfs_sb_version_hasmorebits(sbp) &&
473 ((sbp)->sb_features2 & XFS_SB_VERSION2_ATTR2BIT); 482 (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT);
474} 483}
475 484
476static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) 485static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
477{ 486{
478 ((sbp)->sb_versionnum = \ 487 sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
479 ((sbp)->sb_versionnum | XFS_SB_VERSION_MOREBITSBIT), \ 488 sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT;
480 ((sbp)->sb_features2 = \
481 ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
482} 489}
483 490
484static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) 491static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 4e1c22a23be..8570b826fed 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -290,7 +290,7 @@ xfs_trans_dup(
290 ASSERT(tp->t_ticket != NULL); 290 ASSERT(tp->t_ticket != NULL);
291 291
292 ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); 292 ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE);
293 ntp->t_ticket = tp->t_ticket; 293 ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
294 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; 294 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
295 tp->t_blk_res = tp->t_blk_res_used; 295 tp->t_blk_res = tp->t_blk_res_used;
296 ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; 296 ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
@@ -1260,6 +1260,13 @@ xfs_trans_roll(
1260 trans = *tpp; 1260 trans = *tpp;
1261 1261
1262 /* 1262 /*
1263 * transaction commit worked ok so we can drop the extra ticket
1264 * reference that we gained in xfs_trans_dup()
1265 */
1266 xfs_log_ticket_put(trans->t_ticket);
1267
1268
1269 /*
1263 * Reserve space in the log for th next transaction. 1270 * Reserve space in the log for th next transaction.
1264 * This also pushes items in the "AIL", the list of logged items, 1271 * This also pushes items in the "AIL", the list of logged items,
1265 * out to disk if they are taking up space at the tail of the log 1272 * out to disk if they are taking up space at the tail of the log
@@ -1383,11 +1390,12 @@ xfs_trans_chunk_committed(
1383 xfs_log_item_desc_t *lidp; 1390 xfs_log_item_desc_t *lidp;
1384 xfs_log_item_t *lip; 1391 xfs_log_item_t *lip;
1385 xfs_lsn_t item_lsn; 1392 xfs_lsn_t item_lsn;
1386 struct xfs_mount *mp;
1387 int i; 1393 int i;
1388 1394
1389 lidp = licp->lic_descs; 1395 lidp = licp->lic_descs;
1390 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1396 for (i = 0; i < licp->lic_unused; i++, lidp++) {
1397 struct xfs_ail *ailp;
1398
1391 if (xfs_lic_isfree(licp, i)) { 1399 if (xfs_lic_isfree(licp, i)) {
1392 continue; 1400 continue;
1393 } 1401 }
@@ -1424,19 +1432,19 @@ xfs_trans_chunk_committed(
1424 * This would cause the earlier transaction to fail 1432 * This would cause the earlier transaction to fail
1425 * the test below. 1433 * the test below.
1426 */ 1434 */
1427 mp = lip->li_mountp; 1435 ailp = lip->li_ailp;
1428 spin_lock(&mp->m_ail_lock); 1436 spin_lock(&ailp->xa_lock);
1429 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { 1437 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
1430 /* 1438 /*
1431 * This will set the item's lsn to item_lsn 1439 * This will set the item's lsn to item_lsn
1432 * and update the position of the item in 1440 * and update the position of the item in
1433 * the AIL. 1441 * the AIL.
1434 * 1442 *
1435 * xfs_trans_update_ail() drops the AIL lock. 1443 * xfs_trans_ail_update() drops the AIL lock.
1436 */ 1444 */
1437 xfs_trans_update_ail(mp, lip, item_lsn); 1445 xfs_trans_ail_update(ailp, lip, item_lsn);
1438 } else { 1446 } else {
1439 spin_unlock(&mp->m_ail_lock); 1447 spin_unlock(&ailp->xa_lock);
1440 } 1448 }
1441 1449
1442 /* 1450 /*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 74c80bd2b0e..d6fe4a88d79 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -18,6 +18,8 @@
18#ifndef __XFS_TRANS_H__ 18#ifndef __XFS_TRANS_H__
19#define __XFS_TRANS_H__ 19#define __XFS_TRANS_H__
20 20
21struct xfs_log_item;
22
21/* 23/*
22 * This is the structure written in the log at the head of 24 * This is the structure written in the log at the head of
23 * every transaction. It identifies the type and id of the 25 * every transaction. It identifies the type and id of the
@@ -98,76 +100,6 @@ typedef struct xfs_trans_header {
98#define XFS_TRANS_TYPE_MAX 41 100#define XFS_TRANS_TYPE_MAX 41
99/* new transaction types need to be reflected in xfs_logprint(8) */ 101/* new transaction types need to be reflected in xfs_logprint(8) */
100 102
101
102#ifdef __KERNEL__
103struct xfs_buf;
104struct xfs_buftarg;
105struct xfs_efd_log_item;
106struct xfs_efi_log_item;
107struct xfs_inode;
108struct xfs_item_ops;
109struct xfs_log_iovec;
110struct xfs_log_item;
111struct xfs_log_item_desc;
112struct xfs_mount;
113struct xfs_trans;
114struct xfs_dquot_acct;
115
116typedef struct xfs_log_item {
117 struct list_head li_ail; /* AIL pointers */
118 xfs_lsn_t li_lsn; /* last on-disk lsn */
119 struct xfs_log_item_desc *li_desc; /* ptr to current desc*/
120 struct xfs_mount *li_mountp; /* ptr to fs mount */
121 uint li_type; /* item type */
122 uint li_flags; /* misc flags */
123 struct xfs_log_item *li_bio_list; /* buffer item list */
124 void (*li_cb)(struct xfs_buf *,
125 struct xfs_log_item *);
126 /* buffer item iodone */
127 /* callback func */
128 struct xfs_item_ops *li_ops; /* function list */
129} xfs_log_item_t;
130
131#define XFS_LI_IN_AIL 0x1
132#define XFS_LI_ABORTED 0x2
133
134typedef struct xfs_item_ops {
135 uint (*iop_size)(xfs_log_item_t *);
136 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
137 void (*iop_pin)(xfs_log_item_t *);
138 void (*iop_unpin)(xfs_log_item_t *, int);
139 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
140 uint (*iop_trylock)(xfs_log_item_t *);
141 void (*iop_unlock)(xfs_log_item_t *);
142 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
143 void (*iop_push)(xfs_log_item_t *);
144 void (*iop_pushbuf)(xfs_log_item_t *);
145 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
146} xfs_item_ops_t;
147
148#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
149#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
150#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
151#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags)
152#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
153#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
154#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
155#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
156#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
157#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
158#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
159
160/*
161 * Return values for the IOP_TRYLOCK() routines.
162 */
163#define XFS_ITEM_SUCCESS 0
164#define XFS_ITEM_PINNED 1
165#define XFS_ITEM_LOCKED 2
166#define XFS_ITEM_FLUSHING 3
167#define XFS_ITEM_PUSHBUF 4
168
169#endif /* __KERNEL__ */
170
171/* 103/*
172 * This structure is used to track log items associated with 104 * This structure is used to track log items associated with
173 * a transaction. It points to the log item and keeps some 105 * a transaction. It points to the log item and keeps some
@@ -176,7 +108,7 @@ typedef struct xfs_item_ops {
176 * once we get to commit processing (see xfs_trans_commit()). 108 * once we get to commit processing (see xfs_trans_commit()).
177 */ 109 */
178typedef struct xfs_log_item_desc { 110typedef struct xfs_log_item_desc {
179 xfs_log_item_t *lid_item; 111 struct xfs_log_item *lid_item;
180 ushort lid_size; 112 ushort lid_size;
181 unsigned char lid_flags; 113 unsigned char lid_flags;
182 unsigned char lid_index; 114 unsigned char lid_index;
@@ -276,94 +208,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
276 (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs)); 208 (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs));
277} 209}
278 210
279#ifdef __KERNEL__
280/*
281 * This structure is used to maintain a list of block ranges that have been
282 * freed in the transaction. The ranges are listed in the perag[] busy list
283 * between when they're freed and the transaction is committed to disk.
284 */
285
286typedef struct xfs_log_busy_slot {
287 xfs_agnumber_t lbc_ag;
288 ushort lbc_idx; /* index in perag.busy[] */
289} xfs_log_busy_slot_t;
290
291#define XFS_LBC_NUM_SLOTS 31
292typedef struct xfs_log_busy_chunk {
293 struct xfs_log_busy_chunk *lbc_next;
294 uint lbc_free; /* free slots bitmask */
295 ushort lbc_unused; /* first unused */
296 xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS];
297} xfs_log_busy_chunk_t;
298
299#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
300#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
301
302#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
303#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
304#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
305#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
306#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
307
308/*
309 * This is the type of function which can be given to xfs_trans_callback()
310 * to be called upon the transaction's commit to disk.
311 */
312typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
313
314/*
315 * This is the structure maintained for every active transaction.
316 */
317typedef struct xfs_trans {
318 unsigned int t_magic; /* magic number */
319 xfs_log_callback_t t_logcb; /* log callback struct */
320 unsigned int t_type; /* transaction type */
321 unsigned int t_log_res; /* amt of log space resvd */
322 unsigned int t_log_count; /* count for perm log res */
323 unsigned int t_blk_res; /* # of blocks resvd */
324 unsigned int t_blk_res_used; /* # of resvd blocks used */
325 unsigned int t_rtx_res; /* # of rt extents resvd */
326 unsigned int t_rtx_res_used; /* # of resvd rt extents used */
327 xfs_log_ticket_t t_ticket; /* log mgr ticket */
328 xfs_lsn_t t_lsn; /* log seq num of start of
329 * transaction. */
330 xfs_lsn_t t_commit_lsn; /* log seq num of end of
331 * transaction. */
332 struct xfs_mount *t_mountp; /* ptr to fs mount struct */
333 struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
334 xfs_trans_callback_t t_callback; /* transaction callback */
335 void *t_callarg; /* callback arg */
336 unsigned int t_flags; /* misc flags */
337 int64_t t_icount_delta; /* superblock icount change */
338 int64_t t_ifree_delta; /* superblock ifree change */
339 int64_t t_fdblocks_delta; /* superblock fdblocks chg */
340 int64_t t_res_fdblocks_delta; /* on-disk only chg */
341 int64_t t_frextents_delta;/* superblock freextents chg*/
342 int64_t t_res_frextents_delta; /* on-disk only chg */
343#ifdef DEBUG
344 int64_t t_ag_freeblks_delta; /* debugging counter */
345 int64_t t_ag_flist_delta; /* debugging counter */
346 int64_t t_ag_btree_delta; /* debugging counter */
347#endif
348 int64_t t_dblocks_delta;/* superblock dblocks change */
349 int64_t t_agcount_delta;/* superblock agcount change */
350 int64_t t_imaxpct_delta;/* superblock imaxpct change */
351 int64_t t_rextsize_delta;/* superblock rextsize chg */
352 int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
353 int64_t t_rblocks_delta;/* superblock rblocks change */
354 int64_t t_rextents_delta;/* superblocks rextents chg */
355 int64_t t_rextslog_delta;/* superblocks rextslog chg */
356 unsigned int t_items_free; /* log item descs free */
357 xfs_log_item_chunk_t t_items; /* first log item desc chunk */
358 xfs_trans_header_t t_header; /* header for in-log trans */
359 unsigned int t_busy_free; /* busy descs free */
360 xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */
361 unsigned long t_pflags; /* saved process flags state */
362} xfs_trans_t;
363
364#endif /* __KERNEL__ */
365
366
367#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */ 211#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */
368/* 212/*
369 * Values for t_flags. 213 * Values for t_flags.
@@ -906,6 +750,157 @@ typedef struct xfs_trans {
906#define XFS_DQUOT_REF 1 750#define XFS_DQUOT_REF 1
907 751
908#ifdef __KERNEL__ 752#ifdef __KERNEL__
753
754struct xfs_buf;
755struct xfs_buftarg;
756struct xfs_efd_log_item;
757struct xfs_efi_log_item;
758struct xfs_inode;
759struct xfs_item_ops;
760struct xfs_log_iovec;
761struct xfs_log_item_desc;
762struct xfs_mount;
763struct xfs_trans;
764struct xfs_dquot_acct;
765
766typedef struct xfs_log_item {
767 struct list_head li_ail; /* AIL pointers */
768 xfs_lsn_t li_lsn; /* last on-disk lsn */
769 struct xfs_log_item_desc *li_desc; /* ptr to current desc*/
770 struct xfs_mount *li_mountp; /* ptr to fs mount */
771 struct xfs_ail *li_ailp; /* ptr to AIL */
772 uint li_type; /* item type */
773 uint li_flags; /* misc flags */
774 struct xfs_log_item *li_bio_list; /* buffer item list */
775 void (*li_cb)(struct xfs_buf *,
776 struct xfs_log_item *);
777 /* buffer item iodone */
778 /* callback func */
779 struct xfs_item_ops *li_ops; /* function list */
780} xfs_log_item_t;
781
782#define XFS_LI_IN_AIL 0x1
783#define XFS_LI_ABORTED 0x2
784
785typedef struct xfs_item_ops {
786 uint (*iop_size)(xfs_log_item_t *);
787 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
788 void (*iop_pin)(xfs_log_item_t *);
789 void (*iop_unpin)(xfs_log_item_t *, int);
790 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
791 uint (*iop_trylock)(xfs_log_item_t *);
792 void (*iop_unlock)(xfs_log_item_t *);
793 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
794 void (*iop_push)(xfs_log_item_t *);
795 void (*iop_pushbuf)(xfs_log_item_t *);
796 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
797} xfs_item_ops_t;
798
799#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
800#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
801#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
802#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags)
803#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
804#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
805#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
806#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
807#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
808#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
809#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
810
811/*
812 * Return values for the IOP_TRYLOCK() routines.
813 */
814#define XFS_ITEM_SUCCESS 0
815#define XFS_ITEM_PINNED 1
816#define XFS_ITEM_LOCKED 2
817#define XFS_ITEM_FLUSHING 3
818#define XFS_ITEM_PUSHBUF 4
819
820/*
821 * This structure is used to maintain a list of block ranges that have been
822 * freed in the transaction. The ranges are listed in the perag[] busy list
823 * between when they're freed and the transaction is committed to disk.
824 */
825
826typedef struct xfs_log_busy_slot {
827 xfs_agnumber_t lbc_ag;
828 ushort lbc_idx; /* index in perag.busy[] */
829} xfs_log_busy_slot_t;
830
831#define XFS_LBC_NUM_SLOTS 31
832typedef struct xfs_log_busy_chunk {
833 struct xfs_log_busy_chunk *lbc_next;
834 uint lbc_free; /* free slots bitmask */
835 ushort lbc_unused; /* first unused */
836 xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS];
837} xfs_log_busy_chunk_t;
838
839#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
840#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
841
842#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
843#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
844#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
845#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
846#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
847
848/*
849 * This is the type of function which can be given to xfs_trans_callback()
850 * to be called upon the transaction's commit to disk.
851 */
852typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
853
854/*
855 * This is the structure maintained for every active transaction.
856 */
857typedef struct xfs_trans {
858 unsigned int t_magic; /* magic number */
859 xfs_log_callback_t t_logcb; /* log callback struct */
860 unsigned int t_type; /* transaction type */
861 unsigned int t_log_res; /* amt of log space resvd */
862 unsigned int t_log_count; /* count for perm log res */
863 unsigned int t_blk_res; /* # of blocks resvd */
864 unsigned int t_blk_res_used; /* # of resvd blocks used */
865 unsigned int t_rtx_res; /* # of rt extents resvd */
866 unsigned int t_rtx_res_used; /* # of resvd rt extents used */
867 xfs_log_ticket_t t_ticket; /* log mgr ticket */
868 xfs_lsn_t t_lsn; /* log seq num of start of
869 * transaction. */
870 xfs_lsn_t t_commit_lsn; /* log seq num of end of
871 * transaction. */
872 struct xfs_mount *t_mountp; /* ptr to fs mount struct */
873 struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
874 xfs_trans_callback_t t_callback; /* transaction callback */
875 void *t_callarg; /* callback arg */
876 unsigned int t_flags; /* misc flags */
877 int64_t t_icount_delta; /* superblock icount change */
878 int64_t t_ifree_delta; /* superblock ifree change */
879 int64_t t_fdblocks_delta; /* superblock fdblocks chg */
880 int64_t t_res_fdblocks_delta; /* on-disk only chg */
881 int64_t t_frextents_delta;/* superblock freextents chg*/
882 int64_t t_res_frextents_delta; /* on-disk only chg */
883#ifdef DEBUG
884 int64_t t_ag_freeblks_delta; /* debugging counter */
885 int64_t t_ag_flist_delta; /* debugging counter */
886 int64_t t_ag_btree_delta; /* debugging counter */
887#endif
888 int64_t t_dblocks_delta;/* superblock dblocks change */
889 int64_t t_agcount_delta;/* superblock agcount change */
890 int64_t t_imaxpct_delta;/* superblock imaxpct change */
891 int64_t t_rextsize_delta;/* superblock rextsize chg */
892 int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
893 int64_t t_rblocks_delta;/* superblock rblocks change */
894 int64_t t_rextents_delta;/* superblocks rextents chg */
895 int64_t t_rextslog_delta;/* superblocks rextslog chg */
896 unsigned int t_items_free; /* log item descs free */
897 xfs_log_item_chunk_t t_items; /* first log item desc chunk */
898 xfs_trans_header_t t_header; /* header for in-log trans */
899 unsigned int t_busy_free; /* busy descs free */
900 xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */
901 unsigned long t_pflags; /* saved process flags state */
902} xfs_trans_t;
903
909/* 904/*
910 * XFS transaction mechanism exported interfaces that are 905 * XFS transaction mechanism exported interfaces that are
911 * actually macros. 906 * actually macros.
@@ -928,7 +923,6 @@ typedef struct xfs_trans {
928/* 923/*
929 * XFS transaction mechanism exported interfaces. 924 * XFS transaction mechanism exported interfaces.
930 */ 925 */
931void xfs_trans_init(struct xfs_mount *);
932xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); 926xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
933xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); 927xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint);
934xfs_trans_t *xfs_trans_dup(xfs_trans_t *); 928xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
@@ -975,13 +969,8 @@ int _xfs_trans_commit(xfs_trans_t *,
975 int *); 969 int *);
976#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) 970#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
977void xfs_trans_cancel(xfs_trans_t *, int); 971void xfs_trans_cancel(xfs_trans_t *, int);
978int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
979int xfs_trans_ail_init(struct xfs_mount *); 972int xfs_trans_ail_init(struct xfs_mount *);
980void xfs_trans_ail_destroy(struct xfs_mount *); 973void xfs_trans_ail_destroy(struct xfs_mount *);
981void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
982xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *);
983void xfs_trans_unlocked_item(struct xfs_mount *,
984 xfs_log_item_t *);
985xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, 974xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
986 xfs_agnumber_t ag, 975 xfs_agnumber_t ag,
987 xfs_extlen_t idx); 976 xfs_extlen_t idx);
@@ -990,4 +979,7 @@ extern kmem_zone_t *xfs_trans_zone;
990 979
991#endif /* __KERNEL__ */ 980#endif /* __KERNEL__ */
992 981
982void xfs_trans_init(struct xfs_mount *);
983int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
984
993#endif /* __XFS_TRANS_H__ */ 985#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 1f77c00af56..2d47f10f8be 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2008 Dave Chinner
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -28,13 +29,13 @@
28#include "xfs_trans_priv.h" 29#include "xfs_trans_priv.h"
29#include "xfs_error.h" 30#include "xfs_error.h"
30 31
31STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *); 32STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *);
32STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *); 33STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
33STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *); 34STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
34STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *); 35STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
35 36
36#ifdef DEBUG 37#ifdef DEBUG
37STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *); 38STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *);
38#else 39#else
39#define xfs_ail_check(a,l) 40#define xfs_ail_check(a,l)
40#endif /* DEBUG */ 41#endif /* DEBUG */
@@ -50,20 +51,20 @@ STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *);
50 * lsn of the last item in the AIL. 51 * lsn of the last item in the AIL.
51 */ 52 */
52xfs_lsn_t 53xfs_lsn_t
53xfs_trans_tail_ail( 54xfs_trans_ail_tail(
54 xfs_mount_t *mp) 55 struct xfs_ail *ailp)
55{ 56{
56 xfs_lsn_t lsn; 57 xfs_lsn_t lsn;
57 xfs_log_item_t *lip; 58 xfs_log_item_t *lip;
58 59
59 spin_lock(&mp->m_ail_lock); 60 spin_lock(&ailp->xa_lock);
60 lip = xfs_ail_min(&mp->m_ail); 61 lip = xfs_ail_min(ailp);
61 if (lip == NULL) { 62 if (lip == NULL) {
62 lsn = (xfs_lsn_t)0; 63 lsn = (xfs_lsn_t)0;
63 } else { 64 } else {
64 lsn = lip->li_lsn; 65 lsn = lip->li_lsn;
65 } 66 }
66 spin_unlock(&mp->m_ail_lock); 67 spin_unlock(&ailp->xa_lock);
67 68
68 return lsn; 69 return lsn;
69} 70}
@@ -85,16 +86,125 @@ xfs_trans_tail_ail(
85 * any of the objects, so the lock is not needed. 86 * any of the objects, so the lock is not needed.
86 */ 87 */
87void 88void
88xfs_trans_push_ail( 89xfs_trans_ail_push(
89 xfs_mount_t *mp, 90 struct xfs_ail *ailp,
90 xfs_lsn_t threshold_lsn) 91 xfs_lsn_t threshold_lsn)
91{ 92{
92 xfs_log_item_t *lip; 93 xfs_log_item_t *lip;
94
95 lip = xfs_ail_min(ailp);
96 if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
97 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0)
98 xfsaild_wakeup(ailp, threshold_lsn);
99 }
100}
101
102/*
103 * AIL traversal cursor initialisation.
104 *
105 * The cursor keeps track of where our current traversal is up
106 * to by tracking the next ƣtem in the list for us. However, for
107 * this to be safe, removing an object from the AIL needs to invalidate
108 * any cursor that points to it. hence the traversal cursor needs to
109 * be linked to the struct xfs_ail so that deletion can search all the
110 * active cursors for invalidation.
111 *
112 * We don't link the push cursor because it is embedded in the struct
113 * xfs_ail and hence easily findable.
114 */
115STATIC void
116xfs_trans_ail_cursor_init(
117 struct xfs_ail *ailp,
118 struct xfs_ail_cursor *cur)
119{
120 cur->item = NULL;
121 if (cur == &ailp->xa_cursors)
122 return;
123
124 cur->next = ailp->xa_cursors.next;
125 ailp->xa_cursors.next = cur;
126}
127
128/*
129 * Set the cursor to the next item, because when we look
130 * up the cursor the current item may have been freed.
131 */
132STATIC void
133xfs_trans_ail_cursor_set(
134 struct xfs_ail *ailp,
135 struct xfs_ail_cursor *cur,
136 struct xfs_log_item *lip)
137{
138 if (lip)
139 cur->item = xfs_ail_next(ailp, lip);
140}
141
142/*
143 * Get the next item in the traversal and advance the cursor.
144 * If the cursor was invalidated (inidicated by a lip of 1),
145 * restart the traversal.
146 */
147struct xfs_log_item *
148xfs_trans_ail_cursor_next(
149 struct xfs_ail *ailp,
150 struct xfs_ail_cursor *cur)
151{
152 struct xfs_log_item *lip = cur->item;
153
154 if ((__psint_t)lip & 1)
155 lip = xfs_ail_min(ailp);
156 xfs_trans_ail_cursor_set(ailp, cur, lip);
157 return lip;
158}
159
160/*
161 * Now that the traversal is complete, we need to remove the cursor
162 * from the list of traversing cursors. Avoid removing the embedded
163 * push cursor, but use the fact it is alway present to make the
164 * list deletion simple.
165 */
166void
167xfs_trans_ail_cursor_done(
168 struct xfs_ail *ailp,
169 struct xfs_ail_cursor *done)
170{
171 struct xfs_ail_cursor *prev = NULL;
172 struct xfs_ail_cursor *cur;
173
174 done->item = NULL;
175 if (done == &ailp->xa_cursors)
176 return;
177 prev = &ailp->xa_cursors;
178 for (cur = prev->next; cur; prev = cur, cur = prev->next) {
179 if (cur == done) {
180 prev->next = cur->next;
181 break;
182 }
183 }
184 ASSERT(cur);
185}
186
187/*
188 * Invalidate any cursor that is pointing to this item. This is
189 * called when an item is removed from the AIL. Any cursor pointing
190 * to this object is now invalid and the traversal needs to be
191 * terminated so it doesn't reference a freed object. We set the
192 * cursor item to a value of 1 so we can distinguish between an
193 * invalidation and the end of the list when getting the next item
194 * from the cursor.
195 */
196STATIC void
197xfs_trans_ail_cursor_clear(
198 struct xfs_ail *ailp,
199 struct xfs_log_item *lip)
200{
201 struct xfs_ail_cursor *cur;
93 202
94 lip = xfs_ail_min(&mp->m_ail); 203 /* need to search all cursors */
95 if (lip && !XFS_FORCED_SHUTDOWN(mp)) { 204 for (cur = &ailp->xa_cursors; cur; cur = cur->next) {
96 if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) 205 if (cur->item == lip)
97 xfsaild_wakeup(mp, threshold_lsn); 206 cur->item = (struct xfs_log_item *)
207 ((__psint_t)cur->item | 1);
98 } 208 }
99} 209}
100 210
@@ -103,25 +213,27 @@ xfs_trans_push_ail(
103 * Return the current tree generation number for use 213 * Return the current tree generation number for use
104 * in calls to xfs_trans_next_ail(). 214 * in calls to xfs_trans_next_ail().
105 */ 215 */
106STATIC xfs_log_item_t * 216xfs_log_item_t *
107xfs_trans_first_push_ail( 217xfs_trans_ail_cursor_first(
108 xfs_mount_t *mp, 218 struct xfs_ail *ailp,
109 int *gen, 219 struct xfs_ail_cursor *cur,
110 xfs_lsn_t lsn) 220 xfs_lsn_t lsn)
111{ 221{
112 xfs_log_item_t *lip; 222 xfs_log_item_t *lip;
113 223
114 lip = xfs_ail_min(&mp->m_ail); 224 xfs_trans_ail_cursor_init(ailp, cur);
115 *gen = (int)mp->m_ail.xa_gen; 225 lip = xfs_ail_min(ailp);
116 if (lsn == 0) 226 if (lsn == 0)
117 return lip; 227 goto out;
118 228
119 list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) { 229 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
120 if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) 230 if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
121 return lip; 231 goto out;
122 } 232 }
123 233 lip = NULL;
124 return NULL; 234out:
235 xfs_trans_ail_cursor_set(ailp, cur, lip);
236 return lip;
125} 237}
126 238
127/* 239/*
@@ -129,29 +241,29 @@ xfs_trans_first_push_ail(
129 */ 241 */
130long 242long
131xfsaild_push( 243xfsaild_push(
132 xfs_mount_t *mp, 244 struct xfs_ail *ailp,
133 xfs_lsn_t *last_lsn) 245 xfs_lsn_t *last_lsn)
134{ 246{
135 long tout = 1000; /* milliseconds */ 247 long tout = 1000; /* milliseconds */
136 xfs_lsn_t last_pushed_lsn = *last_lsn; 248 xfs_lsn_t last_pushed_lsn = *last_lsn;
137 xfs_lsn_t target = mp->m_ail.xa_target; 249 xfs_lsn_t target = ailp->xa_target;
138 xfs_lsn_t lsn; 250 xfs_lsn_t lsn;
139 xfs_log_item_t *lip; 251 xfs_log_item_t *lip;
140 int gen;
141 int restarts;
142 int flush_log, count, stuck; 252 int flush_log, count, stuck;
253 xfs_mount_t *mp = ailp->xa_mount;
254 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
143 255
144#define XFS_TRANS_PUSH_AIL_RESTARTS 10 256 spin_lock(&ailp->xa_lock);
145 257 xfs_trans_ail_cursor_init(ailp, cur);
146 spin_lock(&mp->m_ail_lock); 258 lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn);
147 lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn);
148 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 259 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
149 /* 260 /*
150 * AIL is empty or our push has reached the end. 261 * AIL is empty or our push has reached the end.
151 */ 262 */
152 spin_unlock(&mp->m_ail_lock); 263 xfs_trans_ail_cursor_done(ailp, cur);
264 spin_unlock(&ailp->xa_lock);
153 last_pushed_lsn = 0; 265 last_pushed_lsn = 0;
154 goto out; 266 return tout;
155 } 267 }
156 268
157 XFS_STATS_INC(xs_push_ail); 269 XFS_STATS_INC(xs_push_ail);
@@ -169,7 +281,7 @@ xfsaild_push(
169 */ 281 */
170 tout = 10; 282 tout = 10;
171 lsn = lip->li_lsn; 283 lsn = lip->li_lsn;
172 flush_log = stuck = count = restarts = 0; 284 flush_log = stuck = count = 0;
173 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { 285 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
174 int lock_result; 286 int lock_result;
175 /* 287 /*
@@ -184,7 +296,7 @@ xfsaild_push(
184 * skip to the next item in the list. 296 * skip to the next item in the list.
185 */ 297 */
186 lock_result = IOP_TRYLOCK(lip); 298 lock_result = IOP_TRYLOCK(lip);
187 spin_unlock(&mp->m_ail_lock); 299 spin_unlock(&ailp->xa_lock);
188 switch (lock_result) { 300 switch (lock_result) {
189 case XFS_ITEM_SUCCESS: 301 case XFS_ITEM_SUCCESS:
190 XFS_STATS_INC(xs_push_ail_success); 302 XFS_STATS_INC(xs_push_ail_success);
@@ -221,7 +333,7 @@ xfsaild_push(
221 break; 333 break;
222 } 334 }
223 335
224 spin_lock(&mp->m_ail_lock); 336 spin_lock(&ailp->xa_lock);
225 /* should we bother continuing? */ 337 /* should we bother continuing? */
226 if (XFS_FORCED_SHUTDOWN(mp)) 338 if (XFS_FORCED_SHUTDOWN(mp))
227 break; 339 break;
@@ -244,14 +356,13 @@ xfsaild_push(
244 if (stuck > 100) 356 if (stuck > 100)
245 break; 357 break;
246 358
247 lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); 359 lip = xfs_trans_ail_cursor_next(ailp, cur);
248 if (lip == NULL) 360 if (lip == NULL)
249 break; 361 break;
250 if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS)
251 break;
252 lsn = lip->li_lsn; 362 lsn = lip->li_lsn;
253 } 363 }
254 spin_unlock(&mp->m_ail_lock); 364 xfs_trans_ail_cursor_done(ailp, cur);
365 spin_unlock(&ailp->xa_lock);
255 366
256 if (flush_log) { 367 if (flush_log) {
257 /* 368 /*
@@ -274,8 +385,7 @@ xfsaild_push(
274 */ 385 */
275 tout += 20; 386 tout += 20;
276 last_pushed_lsn = 0; 387 last_pushed_lsn = 0;
277 } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) || 388 } else if ((stuck * 100) / count > 90) {
278 ((stuck * 100) / count > 90)) {
279 /* 389 /*
280 * Either there is a lot of contention on the AIL or we 390 * Either there is a lot of contention on the AIL or we
281 * are stuck due to operations in progress. "Stuck" in this 391 * are stuck due to operations in progress. "Stuck" in this
@@ -287,7 +397,6 @@ xfsaild_push(
287 */ 397 */
288 tout += 10; 398 tout += 10;
289 } 399 }
290out:
291 *last_lsn = last_pushed_lsn; 400 *last_lsn = last_pushed_lsn;
292 return tout; 401 return tout;
293} /* xfsaild_push */ 402} /* xfsaild_push */
@@ -303,7 +412,7 @@ out:
303 */ 412 */
304void 413void
305xfs_trans_unlocked_item( 414xfs_trans_unlocked_item(
306 xfs_mount_t *mp, 415 struct xfs_ail *ailp,
307 xfs_log_item_t *lip) 416 xfs_log_item_t *lip)
308{ 417{
309 xfs_log_item_t *min_lip; 418 xfs_log_item_t *min_lip;
@@ -315,7 +424,7 @@ xfs_trans_unlocked_item(
315 * over some potentially valid data. 424 * over some potentially valid data.
316 */ 425 */
317 if (!(lip->li_flags & XFS_LI_IN_AIL) || 426 if (!(lip->li_flags & XFS_LI_IN_AIL) ||
318 XFS_FORCED_SHUTDOWN(mp)) { 427 XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
319 return; 428 return;
320 } 429 }
321 430
@@ -331,10 +440,10 @@ xfs_trans_unlocked_item(
331 * the call to xfs_log_move_tail() doesn't do anything if there's 440 * the call to xfs_log_move_tail() doesn't do anything if there's
332 * not enough free space to wake people up so we're safe calling it. 441 * not enough free space to wake people up so we're safe calling it.
333 */ 442 */
334 min_lip = xfs_ail_min(&mp->m_ail); 443 min_lip = xfs_ail_min(ailp);
335 444
336 if (min_lip == lip) 445 if (min_lip == lip)
337 xfs_log_move_tail(mp, 1); 446 xfs_log_move_tail(ailp->xa_mount, 1);
338} /* xfs_trans_unlocked_item */ 447} /* xfs_trans_unlocked_item */
339 448
340 449
@@ -347,41 +456,37 @@ xfs_trans_unlocked_item(
347 * we move in the AIL is the minimum one, update the tail lsn in the 456 * we move in the AIL is the minimum one, update the tail lsn in the
348 * log manager. 457 * log manager.
349 * 458 *
350 * Increment the AIL's generation count to indicate that the tree
351 * has changed.
352 *
353 * This function must be called with the AIL lock held. The lock 459 * This function must be called with the AIL lock held. The lock
354 * is dropped before returning. 460 * is dropped before returning.
355 */ 461 */
356void 462void
357xfs_trans_update_ail( 463xfs_trans_ail_update(
358 xfs_mount_t *mp, 464 struct xfs_ail *ailp,
359 xfs_log_item_t *lip, 465 xfs_log_item_t *lip,
360 xfs_lsn_t lsn) __releases(mp->m_ail_lock) 466 xfs_lsn_t lsn) __releases(ailp->xa_lock)
361{ 467{
362 xfs_log_item_t *dlip=NULL; 468 xfs_log_item_t *dlip = NULL;
363 xfs_log_item_t *mlip; /* ptr to minimum lip */ 469 xfs_log_item_t *mlip; /* ptr to minimum lip */
364 470
365 mlip = xfs_ail_min(&mp->m_ail); 471 mlip = xfs_ail_min(ailp);
366 472
367 if (lip->li_flags & XFS_LI_IN_AIL) { 473 if (lip->li_flags & XFS_LI_IN_AIL) {
368 dlip = xfs_ail_delete(&mp->m_ail, lip); 474 dlip = xfs_ail_delete(ailp, lip);
369 ASSERT(dlip == lip); 475 ASSERT(dlip == lip);
476 xfs_trans_ail_cursor_clear(ailp, dlip);
370 } else { 477 } else {
371 lip->li_flags |= XFS_LI_IN_AIL; 478 lip->li_flags |= XFS_LI_IN_AIL;
372 } 479 }
373 480
374 lip->li_lsn = lsn; 481 lip->li_lsn = lsn;
375 482 xfs_ail_insert(ailp, lip);
376 xfs_ail_insert(&mp->m_ail, lip);
377 mp->m_ail.xa_gen++;
378 483
379 if (mlip == dlip) { 484 if (mlip == dlip) {
380 mlip = xfs_ail_min(&mp->m_ail); 485 mlip = xfs_ail_min(ailp);
381 spin_unlock(&mp->m_ail_lock); 486 spin_unlock(&ailp->xa_lock);
382 xfs_log_move_tail(mp, mlip->li_lsn); 487 xfs_log_move_tail(ailp->xa_mount, mlip->li_lsn);
383 } else { 488 } else {
384 spin_unlock(&mp->m_ail_lock); 489 spin_unlock(&ailp->xa_lock);
385 } 490 }
386 491
387 492
@@ -403,29 +508,30 @@ xfs_trans_update_ail(
403 * is dropped before returning. 508 * is dropped before returning.
404 */ 509 */
405void 510void
406xfs_trans_delete_ail( 511xfs_trans_ail_delete(
407 xfs_mount_t *mp, 512 struct xfs_ail *ailp,
408 xfs_log_item_t *lip) __releases(mp->m_ail_lock) 513 xfs_log_item_t *lip) __releases(ailp->xa_lock)
409{ 514{
410 xfs_log_item_t *dlip; 515 xfs_log_item_t *dlip;
411 xfs_log_item_t *mlip; 516 xfs_log_item_t *mlip;
412 517
413 if (lip->li_flags & XFS_LI_IN_AIL) { 518 if (lip->li_flags & XFS_LI_IN_AIL) {
414 mlip = xfs_ail_min(&mp->m_ail); 519 mlip = xfs_ail_min(ailp);
415 dlip = xfs_ail_delete(&mp->m_ail, lip); 520 dlip = xfs_ail_delete(ailp, lip);
416 ASSERT(dlip == lip); 521 ASSERT(dlip == lip);
522 xfs_trans_ail_cursor_clear(ailp, dlip);
417 523
418 524
419 lip->li_flags &= ~XFS_LI_IN_AIL; 525 lip->li_flags &= ~XFS_LI_IN_AIL;
420 lip->li_lsn = 0; 526 lip->li_lsn = 0;
421 mp->m_ail.xa_gen++;
422 527
423 if (mlip == dlip) { 528 if (mlip == dlip) {
424 mlip = xfs_ail_min(&mp->m_ail); 529 mlip = xfs_ail_min(ailp);
425 spin_unlock(&mp->m_ail_lock); 530 spin_unlock(&ailp->xa_lock);
426 xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); 531 xfs_log_move_tail(ailp->xa_mount,
532 (mlip ? mlip->li_lsn : 0));
427 } else { 533 } else {
428 spin_unlock(&mp->m_ail_lock); 534 spin_unlock(&ailp->xa_lock);
429 } 535 }
430 } 536 }
431 else { 537 else {
@@ -433,13 +539,13 @@ xfs_trans_delete_ail(
433 * If the file system is not being shutdown, we are in 539 * If the file system is not being shutdown, we are in
434 * serious trouble if we get to this stage. 540 * serious trouble if we get to this stage.
435 */ 541 */
436 if (XFS_FORCED_SHUTDOWN(mp)) 542 struct xfs_mount *mp = ailp->xa_mount;
437 spin_unlock(&mp->m_ail_lock); 543
438 else { 544 spin_unlock(&ailp->xa_lock);
545 if (!XFS_FORCED_SHUTDOWN(mp)) {
439 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 546 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
440 "%s: attempting to delete a log item that is not in the AIL", 547 "%s: attempting to delete a log item that is not in the AIL",
441 __func__); 548 __func__);
442 spin_unlock(&mp->m_ail_lock);
443 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 549 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
444 } 550 }
445 } 551 }
@@ -448,56 +554,6 @@ xfs_trans_delete_ail(
448 554
449 555
450/* 556/*
451 * Return the item in the AIL with the smallest lsn.
452 * Return the current tree generation number for use
453 * in calls to xfs_trans_next_ail().
454 */
455xfs_log_item_t *
456xfs_trans_first_ail(
457 xfs_mount_t *mp,
458 int *gen)
459{
460 xfs_log_item_t *lip;
461
462 lip = xfs_ail_min(&mp->m_ail);
463 *gen = (int)mp->m_ail.xa_gen;
464
465 return lip;
466}
467
468/*
469 * If the generation count of the tree has not changed since the
470 * caller last took something from the AIL, then return the elmt
471 * in the tree which follows the one given. If the count has changed,
472 * then return the minimum elmt of the AIL and bump the restarts counter
473 * if one is given.
474 */
475xfs_log_item_t *
476xfs_trans_next_ail(
477 xfs_mount_t *mp,
478 xfs_log_item_t *lip,
479 int *gen,
480 int *restarts)
481{
482 xfs_log_item_t *nlip;
483
484 ASSERT(mp && lip && gen);
485 if (mp->m_ail.xa_gen == *gen) {
486 nlip = xfs_ail_next(&mp->m_ail, lip);
487 } else {
488 nlip = xfs_ail_min(&mp->m_ail);
489 *gen = (int)mp->m_ail.xa_gen;
490 if (restarts != NULL) {
491 XFS_STATS_INC(xs_push_ail_restarts);
492 (*restarts)++;
493 }
494 }
495
496 return (nlip);
497}
498
499
500/*
501 * The active item list (AIL) is a doubly linked list of log 557 * The active item list (AIL) is a doubly linked list of log
502 * items sorted by ascending lsn. The base of the list is 558 * items sorted by ascending lsn. The base of the list is
503 * a forw/back pointer pair embedded in the xfs mount structure. 559 * a forw/back pointer pair embedded in the xfs mount structure.
@@ -515,15 +571,35 @@ int
515xfs_trans_ail_init( 571xfs_trans_ail_init(
516 xfs_mount_t *mp) 572 xfs_mount_t *mp)
517{ 573{
518 INIT_LIST_HEAD(&mp->m_ail.xa_ail); 574 struct xfs_ail *ailp;
519 return xfsaild_start(mp); 575 int error;
576
577 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
578 if (!ailp)
579 return ENOMEM;
580
581 ailp->xa_mount = mp;
582 INIT_LIST_HEAD(&ailp->xa_ail);
583 spin_lock_init(&ailp->xa_lock);
584 error = xfsaild_start(ailp);
585 if (error)
586 goto out_free_ailp;
587 mp->m_ail = ailp;
588 return 0;
589
590out_free_ailp:
591 kmem_free(ailp);
592 return error;
520} 593}
521 594
522void 595void
523xfs_trans_ail_destroy( 596xfs_trans_ail_destroy(
524 xfs_mount_t *mp) 597 xfs_mount_t *mp)
525{ 598{
526 xfsaild_stop(mp); 599 struct xfs_ail *ailp = mp->m_ail;
600
601 xfsaild_stop(ailp);
602 kmem_free(ailp);
527} 603}
528 604
529/* 605/*
@@ -534,7 +610,7 @@ xfs_trans_ail_destroy(
534 */ 610 */
535STATIC void 611STATIC void
536xfs_ail_insert( 612xfs_ail_insert(
537 xfs_ail_t *ailp, 613 struct xfs_ail *ailp,
538 xfs_log_item_t *lip) 614 xfs_log_item_t *lip)
539/* ARGSUSED */ 615/* ARGSUSED */
540{ 616{
@@ -568,7 +644,7 @@ xfs_ail_insert(
568/*ARGSUSED*/ 644/*ARGSUSED*/
569STATIC xfs_log_item_t * 645STATIC xfs_log_item_t *
570xfs_ail_delete( 646xfs_ail_delete(
571 xfs_ail_t *ailp, 647 struct xfs_ail *ailp,
572 xfs_log_item_t *lip) 648 xfs_log_item_t *lip)
573/* ARGSUSED */ 649/* ARGSUSED */
574{ 650{
@@ -585,7 +661,7 @@ xfs_ail_delete(
585 */ 661 */
586STATIC xfs_log_item_t * 662STATIC xfs_log_item_t *
587xfs_ail_min( 663xfs_ail_min(
588 xfs_ail_t *ailp) 664 struct xfs_ail *ailp)
589/* ARGSUSED */ 665/* ARGSUSED */
590{ 666{
591 if (list_empty(&ailp->xa_ail)) 667 if (list_empty(&ailp->xa_ail))
@@ -601,7 +677,7 @@ xfs_ail_min(
601 */ 677 */
602STATIC xfs_log_item_t * 678STATIC xfs_log_item_t *
603xfs_ail_next( 679xfs_ail_next(
604 xfs_ail_t *ailp, 680 struct xfs_ail *ailp,
605 xfs_log_item_t *lip) 681 xfs_log_item_t *lip)
606/* ARGSUSED */ 682/* ARGSUSED */
607{ 683{
@@ -617,7 +693,7 @@ xfs_ail_next(
617 */ 693 */
618STATIC void 694STATIC void
619xfs_ail_check( 695xfs_ail_check(
620 xfs_ail_t *ailp, 696 struct xfs_ail *ailp,
621 xfs_log_item_t *lip) 697 xfs_log_item_t *lip)
622{ 698{
623 xfs_log_item_t *prev_lip; 699 xfs_log_item_t *prev_lip;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 4e855b5ced6..8ee2f8c8b0a 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -527,9 +527,8 @@ xfs_trans_brelse(xfs_trans_t *tp,
527 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 527 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
528 if (lip->li_type == XFS_LI_BUF) { 528 if (lip->li_type == XFS_LI_BUF) {
529 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); 529 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
530 xfs_trans_unlocked_item( 530 xfs_trans_unlocked_item(bip->bli_item.li_ailp,
531 bip->bli_item.li_mountp, 531 lip);
532 lip);
533 } 532 }
534 } 533 }
535 xfs_buf_relse(bp); 534 xfs_buf_relse(bp);
@@ -626,7 +625,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
626 * tell the AIL that the buffer is being unlocked. 625 * tell the AIL that the buffer is being unlocked.
627 */ 626 */
628 if (bip != NULL) { 627 if (bip != NULL) {
629 xfs_trans_unlocked_item(bip->bli_item.li_mountp, 628 xfs_trans_unlocked_item(bip->bli_item.li_ailp,
630 (xfs_log_item_t*)bip); 629 (xfs_log_item_t*)bip);
631 } 630 }
632 631
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 2a1c0f071f9..23d276af2e0 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -85,7 +85,6 @@ xfs_trans_iget(
85{ 85{
86 int error; 86 int error;
87 xfs_inode_t *ip; 87 xfs_inode_t *ip;
88 xfs_inode_log_item_t *iip;
89 88
90 /* 89 /*
91 * If the transaction pointer is NULL, just call the normal 90 * If the transaction pointer is NULL, just call the normal
@@ -138,34 +137,7 @@ xfs_trans_iget(
138 } 137 }
139 ASSERT(ip != NULL); 138 ASSERT(ip != NULL);
140 139
141 /* 140 xfs_trans_ijoin(tp, ip, lock_flags);
142 * Get a log_item_desc to point at the new item.
143 */
144 if (ip->i_itemp == NULL)
145 xfs_inode_item_init(ip, mp);
146 iip = ip->i_itemp;
147 (void) xfs_trans_add_item(tp, (xfs_log_item_t *)(iip));
148
149 xfs_trans_inode_broot_debug(ip);
150
151 /*
152 * If the IO lock has been acquired, mark that in
153 * the inode log item so we'll know to unlock it
154 * when the transaction commits.
155 */
156 ASSERT(iip->ili_flags == 0);
157 if (lock_flags & XFS_IOLOCK_EXCL) {
158 iip->ili_flags |= XFS_ILI_IOLOCKED_EXCL;
159 } else if (lock_flags & XFS_IOLOCK_SHARED) {
160 iip->ili_flags |= XFS_ILI_IOLOCKED_SHARED;
161 }
162
163 /*
164 * Initialize i_transp so we can find it with xfs_inode_incore()
165 * above.
166 */
167 ip->i_transp = tp;
168
169 *ipp = ip; 141 *ipp = ip;
170 return 0; 142 return 0;
171} 143}
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 3c666e8317f..e110bf57d7f 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -22,6 +22,14 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_trans_priv.h" 24#include "xfs_trans_priv.h"
25/* XXX: from here down needed until struct xfs_trans has it's own ailp */
26#include "xfs_bit.h"
27#include "xfs_buf_item.h"
28#include "xfs_sb.h"
29#include "xfs_ag.h"
30#include "xfs_dir2.h"
31#include "xfs_dmapi.h"
32#include "xfs_mount.h"
25 33
26STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *, 34STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *,
27 int, int, xfs_lsn_t); 35 int, int, xfs_lsn_t);
@@ -79,6 +87,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
79 lidp->lid_size = 0; 87 lidp->lid_size = 0;
80 lip->li_desc = lidp; 88 lip->li_desc = lidp;
81 lip->li_mountp = tp->t_mountp; 89 lip->li_mountp = tp->t_mountp;
90 lip->li_ailp = tp->t_mountp->m_ail;
82 return lidp; 91 return lidp;
83 } 92 }
84 93
@@ -120,6 +129,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
120 lidp->lid_size = 0; 129 lidp->lid_size = 0;
121 lip->li_desc = lidp; 130 lip->li_desc = lidp;
122 lip->li_mountp = tp->t_mountp; 131 lip->li_mountp = tp->t_mountp;
132 lip->li_ailp = tp->t_mountp->m_ail;
123 return lidp; 133 return lidp;
124} 134}
125 135
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 3c748c456ed..73e2ad39743 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -44,25 +44,93 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
44 xfs_extlen_t idx); 44 xfs_extlen_t idx);
45 45
46/* 46/*
47 * From xfs_trans_ail.c 47 * AIL traversal cursor.
48 *
49 * Rather than using a generation number for detecting changes in the ail, use
50 * a cursor that is protected by the ail lock. The aild cursor exists in the
51 * struct xfs_ail, but other traversals can declare it on the stack and link it
52 * to the ail list.
53 *
54 * When an object is deleted from or moved int the AIL, the cursor list is
55 * searched to see if the object is a designated cursor item. If it is, it is
56 * deleted from the cursor so that the next time the cursor is used traversal
57 * will return to the start.
58 *
59 * This means a traversal colliding with a removal will cause a restart of the
60 * list scan, rather than any insertion or deletion anywhere in the list. The
61 * low bit of the item pointer is set if the cursor has been invalidated so
62 * that we can tell the difference between invalidation and reaching the end
63 * of the list to trigger traversal restarts.
48 */ 64 */
49void xfs_trans_update_ail(struct xfs_mount *mp, 65struct xfs_ail_cursor {
50 struct xfs_log_item *lip, xfs_lsn_t lsn) 66 struct xfs_ail_cursor *next;
51 __releases(mp->m_ail_lock); 67 struct xfs_log_item *item;
52void xfs_trans_delete_ail(struct xfs_mount *mp, 68};
53 struct xfs_log_item *lip)
54 __releases(mp->m_ail_lock);
55struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *);
56struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *,
57 struct xfs_log_item *, int *, int *);
58 69
70/*
71 * Private AIL structures.
72 *
73 * Eventually we need to drive the locking in here as well.
74 */
75struct xfs_ail {
76 struct xfs_mount *xa_mount;
77 struct list_head xa_ail;
78 uint xa_gen;
79 struct task_struct *xa_task;
80 xfs_lsn_t xa_target;
81 struct xfs_ail_cursor xa_cursors;
82 spinlock_t xa_lock;
83};
59 84
60/* 85/*
61 * AIL push thread support 86 * From xfs_trans_ail.c
62 */ 87 */
63long xfsaild_push(struct xfs_mount *, xfs_lsn_t *); 88void xfs_trans_ail_update(struct xfs_ail *ailp,
64void xfsaild_wakeup(struct xfs_mount *, xfs_lsn_t); 89 struct xfs_log_item *lip, xfs_lsn_t lsn)
65int xfsaild_start(struct xfs_mount *); 90 __releases(ailp->xa_lock);
66void xfsaild_stop(struct xfs_mount *); 91void xfs_trans_ail_delete(struct xfs_ail *ailp,
92 struct xfs_log_item *lip)
93 __releases(ailp->xa_lock);
94void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t);
95void xfs_trans_unlocked_item(struct xfs_ail *,
96 xfs_log_item_t *);
97
98xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp);
99
100struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
101 struct xfs_ail_cursor *cur,
102 xfs_lsn_t lsn);
103struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
104 struct xfs_ail_cursor *cur);
105void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
106 struct xfs_ail_cursor *cur);
107
108long xfsaild_push(struct xfs_ail *, xfs_lsn_t *);
109void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t);
110int xfsaild_start(struct xfs_ail *);
111void xfsaild_stop(struct xfs_ail *);
67 112
113#if BITS_PER_LONG != 64
114static inline void
115xfs_trans_ail_copy_lsn(
116 struct xfs_ail *ailp,
117 xfs_lsn_t *dst,
118 xfs_lsn_t *src)
119{
120 ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */
121 spin_lock(&ailp->xa_lock);
122 *dst = *src;
123 spin_unlock(&ailp->xa_lock);
124}
125#else
126static inline void
127xfs_trans_ail_copy_lsn(
128 struct xfs_ail *ailp,
129 xfs_lsn_t *dst,
130 xfs_lsn_t *src)
131{
132 ASSERT(sizeof(xfs_lsn_t) == 8);
133 *dst = *src;
134}
135#endif
68#endif /* __XFS_TRANS_PRIV_H__ */ 136#endif /* __XFS_TRANS_PRIV_H__ */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 35d4d414bcc..fcc2285d03e 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -172,6 +172,12 @@ xfs_dir_ialloc(
172 *ipp = NULL; 172 *ipp = NULL;
173 return code; 173 return code;
174 } 174 }
175
176 /*
177 * transaction commit worked ok so we can drop the extra ticket
178 * reference that we gained in xfs_trans_dup()
179 */
180 xfs_log_ticket_put(tp->t_ticket);
175 code = xfs_trans_reserve(tp, 0, log_res, 0, 181 code = xfs_trans_reserve(tp, 0, log_res, 0,
176 XFS_TRANS_PERM_LOG_RES, log_count); 182 XFS_TRANS_PERM_LOG_RES, log_count);
177 /* 183 /*
@@ -268,9 +274,9 @@ xfs_bump_ino_vers2(
268 xfs_mount_t *mp; 274 xfs_mount_t *mp;
269 275
270 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 276 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
271 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); 277 ASSERT(ip->i_d.di_version == 1);
272 278
273 ip->i_d.di_version = XFS_DINODE_VERSION_2; 279 ip->i_d.di_version = 2;
274 ip->i_d.di_onlink = 0; 280 ip->i_d.di_onlink = 0;
275 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 281 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
276 mp = tp->t_mountp; 282 mp = tp->t_mountp;
@@ -302,7 +308,7 @@ xfs_bumplink(
302 ASSERT(ip->i_d.di_nlink > 0); 308 ASSERT(ip->i_d.di_nlink > 0);
303 ip->i_d.di_nlink++; 309 ip->i_d.di_nlink++;
304 inc_nlink(VFS_I(ip)); 310 inc_nlink(VFS_I(ip));
305 if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && 311 if ((ip->i_d.di_version == 1) &&
306 (ip->i_d.di_nlink > XFS_MAXLINK_1)) { 312 (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
307 /* 313 /*
308 * The inode has increased its number of links beyond 314 * The inode has increased its number of links beyond
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
deleted file mode 100644
index 439dd3939dd..00000000000
--- a/fs/xfs/xfs_vfsops.c
+++ /dev/null
@@ -1,757 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h"
30#include "xfs_da_btree.h"
31#include "xfs_bmap_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h"
38#include "xfs_inode_item.h"
39#include "xfs_btree.h"
40#include "xfs_alloc.h"
41#include "xfs_ialloc.h"
42#include "xfs_quota.h"
43#include "xfs_error.h"
44#include "xfs_bmap.h"
45#include "xfs_rw.h"
46#include "xfs_buf_item.h"
47#include "xfs_log_priv.h"
48#include "xfs_dir2_trace.h"
49#include "xfs_extfree_item.h"
50#include "xfs_acl.h"
51#include "xfs_attr.h"
52#include "xfs_clnt.h"
53#include "xfs_mru_cache.h"
54#include "xfs_filestream.h"
55#include "xfs_fsops.h"
56#include "xfs_vnodeops.h"
57#include "xfs_vfsops.h"
58#include "xfs_utils.h"
59
60
61STATIC void
62xfs_quiesce_fs(
63 xfs_mount_t *mp)
64{
65 int count = 0, pincount;
66
67 xfs_flush_buftarg(mp->m_ddev_targp, 0);
68 xfs_finish_reclaim_all(mp, 0);
69
70 /* This loop must run at least twice.
71 * The first instance of the loop will flush
72 * most meta data but that will generate more
73 * meta data (typically directory updates).
74 * Which then must be flushed and logged before
75 * we can write the unmount record.
76 */
77 do {
78 xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL);
79 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
80 if (!pincount) {
81 delay(50);
82 count++;
83 }
84 } while (count < 2);
85}
86
87/*
88 * Second stage of a quiesce. The data is already synced, now we have to take
89 * care of the metadata. New transactions are already blocked, so we need to
90 * wait for any remaining transactions to drain out before proceding.
91 */
92void
93xfs_attr_quiesce(
94 xfs_mount_t *mp)
95{
96 int error = 0;
97
98 /* wait for all modifications to complete */
99 while (atomic_read(&mp->m_active_trans) > 0)
100 delay(100);
101
102 /* flush inodes and push all remaining buffers out to disk */
103 xfs_quiesce_fs(mp);
104
105 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
106
107 /* Push the superblock and write an unmount record */
108 error = xfs_log_sbcount(mp, 1);
109 if (error)
110 xfs_fs_cmn_err(CE_WARN, mp,
111 "xfs_attr_quiesce: failed to log sb changes. "
112 "Frozen image may not be consistent.");
113 xfs_log_unmount_write(mp);
114 xfs_unmountfs_writesb(mp);
115}
116
117/*
118 * xfs_unmount_flush implements a set of flush operation on special
119 * inodes, which are needed as a separate set of operations so that
120 * they can be called as part of relocation process.
121 */
122int
123xfs_unmount_flush(
124 xfs_mount_t *mp, /* Mount structure we are getting
125 rid of. */
126 int relocation) /* Called from vfs relocation. */
127{
128 xfs_inode_t *rip = mp->m_rootip;
129 xfs_inode_t *rbmip;
130 xfs_inode_t *rsumip = NULL;
131 int error;
132
133 xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
134 xfs_iflock(rip);
135
136 /*
137 * Flush out the real time inodes.
138 */
139 if ((rbmip = mp->m_rbmip) != NULL) {
140 xfs_ilock(rbmip, XFS_ILOCK_EXCL);
141 xfs_iflock(rbmip);
142 error = xfs_iflush(rbmip, XFS_IFLUSH_SYNC);
143 xfs_iunlock(rbmip, XFS_ILOCK_EXCL);
144
145 if (error == EFSCORRUPTED)
146 goto fscorrupt_out;
147
148 ASSERT(vn_count(VFS_I(rbmip)) == 1);
149
150 rsumip = mp->m_rsumip;
151 xfs_ilock(rsumip, XFS_ILOCK_EXCL);
152 xfs_iflock(rsumip);
153 error = xfs_iflush(rsumip, XFS_IFLUSH_SYNC);
154 xfs_iunlock(rsumip, XFS_ILOCK_EXCL);
155
156 if (error == EFSCORRUPTED)
157 goto fscorrupt_out;
158
159 ASSERT(vn_count(VFS_I(rsumip)) == 1);
160 }
161
162 /*
163 * Synchronously flush root inode to disk
164 */
165 error = xfs_iflush(rip, XFS_IFLUSH_SYNC);
166 if (error == EFSCORRUPTED)
167 goto fscorrupt_out2;
168
169 if (vn_count(VFS_I(rip)) != 1 && !relocation) {
170 xfs_iunlock(rip, XFS_ILOCK_EXCL);
171 return XFS_ERROR(EBUSY);
172 }
173
174 /*
175 * Release dquot that rootinode, rbmino and rsumino might be holding,
176 * flush and purge the quota inodes.
177 */
178 error = XFS_QM_UNMOUNT(mp);
179 if (error == EFSCORRUPTED)
180 goto fscorrupt_out2;
181
182 if (rbmip) {
183 IRELE(rbmip);
184 IRELE(rsumip);
185 }
186
187 xfs_iunlock(rip, XFS_ILOCK_EXCL);
188 return 0;
189
190fscorrupt_out:
191 xfs_ifunlock(rip);
192
193fscorrupt_out2:
194 xfs_iunlock(rip, XFS_ILOCK_EXCL);
195
196 return XFS_ERROR(EFSCORRUPTED);
197}
198
199/*
200 * xfs_sync flushes any pending I/O to file system vfsp.
201 *
202 * This routine is called by vfs_sync() to make sure that things make it
203 * out to disk eventually, on sync() system calls to flush out everything,
204 * and when the file system is unmounted. For the vfs_sync() case, all
205 * we really need to do is sync out the log to make all of our meta-data
206 * updates permanent (except for timestamps). For calls from pflushd(),
207 * dirty pages are kept moving by calling pdflush() on the inodes
208 * containing them. We also flush the inodes that we can lock without
209 * sleeping and the superblock if we can lock it without sleeping from
210 * vfs_sync() so that items at the tail of the log are always moving out.
211 *
212 * Flags:
213 * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want
214 * to sleep if we can help it. All we really need
215 * to do is ensure that the log is synced at least
216 * periodically. We also push the inodes and
217 * superblock if we can lock them without sleeping
218 * and they are not pinned.
219 * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not
220 * set, then we really want to lock each inode and flush
221 * it.
222 * SYNC_WAIT - All the flushes that take place in this call should
223 * be synchronous.
224 * SYNC_DELWRI - This tells us to push dirty pages associated with
225 * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to
226 * determine if they should be flushed sync, async, or
227 * delwri.
228 * SYNC_CLOSE - This flag is passed when the system is being
229 * unmounted. We should sync and invalidate everything.
230 * SYNC_FSDATA - This indicates that the caller would like to make
231 * sure the superblock is safe on disk. We can ensure
232 * this by simply making sure the log gets flushed
233 * if SYNC_BDFLUSH is set, and by actually writing it
234 * out otherwise.
235 * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete
236 * before we return (including direct I/O). Forms the drain
237 * side of the write barrier needed to safely quiesce the
238 * filesystem.
239 *
240 */
241int
242xfs_sync(
243 xfs_mount_t *mp,
244 int flags)
245{
246 int error;
247
248 /*
249 * Get the Quota Manager to flush the dquots.
250 *
251 * If XFS quota support is not enabled or this filesystem
252 * instance does not use quotas XFS_QM_DQSYNC will always
253 * return zero.
254 */
255 error = XFS_QM_DQSYNC(mp, flags);
256 if (error) {
257 /*
258 * If we got an IO error, we will be shutting down.
259 * So, there's nothing more for us to do here.
260 */
261 ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp));
262 if (XFS_FORCED_SHUTDOWN(mp))
263 return XFS_ERROR(error);
264 }
265
266 if (flags & SYNC_IOWAIT)
267 xfs_filestream_flush(mp);
268
269 return xfs_syncsub(mp, flags, NULL);
270}
271
272/*
273 * xfs sync routine for internal use
274 *
275 * This routine supports all of the flags defined for the generic vfs_sync
276 * interface as explained above under xfs_sync.
277 *
278 */
279int
280xfs_sync_inodes(
281 xfs_mount_t *mp,
282 int flags,
283 int *bypassed)
284{
285 xfs_inode_t *ip = NULL;
286 struct inode *vp = NULL;
287 int error;
288 int last_error;
289 uint64_t fflag;
290 uint lock_flags;
291 uint base_lock_flags;
292 boolean_t mount_locked;
293 boolean_t vnode_refed;
294 int preempt;
295 xfs_iptr_t *ipointer;
296#ifdef DEBUG
297 boolean_t ipointer_in = B_FALSE;
298
299#define IPOINTER_SET ipointer_in = B_TRUE
300#define IPOINTER_CLR ipointer_in = B_FALSE
301#else
302#define IPOINTER_SET
303#define IPOINTER_CLR
304#endif
305
306
307/* Insert a marker record into the inode list after inode ip. The list
308 * must be locked when this is called. After the call the list will no
309 * longer be locked.
310 */
311#define IPOINTER_INSERT(ip, mp) { \
312 ASSERT(ipointer_in == B_FALSE); \
313 ipointer->ip_mnext = ip->i_mnext; \
314 ipointer->ip_mprev = ip; \
315 ip->i_mnext = (xfs_inode_t *)ipointer; \
316 ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \
317 preempt = 0; \
318 XFS_MOUNT_IUNLOCK(mp); \
319 mount_locked = B_FALSE; \
320 IPOINTER_SET; \
321 }
322
323/* Remove the marker from the inode list. If the marker was the only item
324 * in the list then there are no remaining inodes and we should zero out
325 * the whole list. If we are the current head of the list then move the head
326 * past us.
327 */
328#define IPOINTER_REMOVE(ip, mp) { \
329 ASSERT(ipointer_in == B_TRUE); \
330 if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \
331 ip = ipointer->ip_mnext; \
332 ip->i_mprev = ipointer->ip_mprev; \
333 ipointer->ip_mprev->i_mnext = ip; \
334 if (mp->m_inodes == (xfs_inode_t *)ipointer) { \
335 mp->m_inodes = ip; \
336 } \
337 } else { \
338 ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \
339 mp->m_inodes = NULL; \
340 ip = NULL; \
341 } \
342 IPOINTER_CLR; \
343 }
344
345#define XFS_PREEMPT_MASK 0x7f
346
347 ASSERT(!(flags & SYNC_BDFLUSH));
348
349 if (bypassed)
350 *bypassed = 0;
351 if (mp->m_flags & XFS_MOUNT_RDONLY)
352 return 0;
353 error = 0;
354 last_error = 0;
355 preempt = 0;
356
357 /* Allocate a reference marker */
358 ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP);
359
360 fflag = XFS_B_ASYNC; /* default is don't wait */
361 if (flags & SYNC_DELWRI)
362 fflag = XFS_B_DELWRI;
363 if (flags & SYNC_WAIT)
364 fflag = 0; /* synchronous overrides all */
365
366 base_lock_flags = XFS_ILOCK_SHARED;
367 if (flags & (SYNC_DELWRI | SYNC_CLOSE)) {
368 /*
369 * We need the I/O lock if we're going to call any of
370 * the flush/inval routines.
371 */
372 base_lock_flags |= XFS_IOLOCK_SHARED;
373 }
374
375 XFS_MOUNT_ILOCK(mp);
376
377 ip = mp->m_inodes;
378
379 mount_locked = B_TRUE;
380 vnode_refed = B_FALSE;
381
382 IPOINTER_CLR;
383
384 do {
385 ASSERT(ipointer_in == B_FALSE);
386 ASSERT(vnode_refed == B_FALSE);
387
388 lock_flags = base_lock_flags;
389
390 /*
391 * There were no inodes in the list, just break out
392 * of the loop.
393 */
394 if (ip == NULL) {
395 break;
396 }
397
398 /*
399 * We found another sync thread marker - skip it
400 */
401 if (ip->i_mount == NULL) {
402 ip = ip->i_mnext;
403 continue;
404 }
405
406 vp = VFS_I(ip);
407
408 /*
409 * If the vnode is gone then this is being torn down,
410 * call reclaim if it is flushed, else let regular flush
411 * code deal with it later in the loop.
412 */
413
414 if (vp == NULL) {
415 /* Skip ones already in reclaim */
416 if (ip->i_flags & XFS_IRECLAIM) {
417 ip = ip->i_mnext;
418 continue;
419 }
420 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
421 ip = ip->i_mnext;
422 } else if ((xfs_ipincount(ip) == 0) &&
423 xfs_iflock_nowait(ip)) {
424 IPOINTER_INSERT(ip, mp);
425
426 xfs_finish_reclaim(ip, 1,
427 XFS_IFLUSH_DELWRI_ELSE_ASYNC);
428
429 XFS_MOUNT_ILOCK(mp);
430 mount_locked = B_TRUE;
431 IPOINTER_REMOVE(ip, mp);
432 } else {
433 xfs_iunlock(ip, XFS_ILOCK_EXCL);
434 ip = ip->i_mnext;
435 }
436 continue;
437 }
438
439 if (VN_BAD(vp)) {
440 ip = ip->i_mnext;
441 continue;
442 }
443
444 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
445 XFS_MOUNT_IUNLOCK(mp);
446 kmem_free(ipointer);
447 return 0;
448 }
449
450 /*
451 * Try to lock without sleeping. We're out of order with
452 * the inode list lock here, so if we fail we need to drop
453 * the mount lock and try again. If we're called from
454 * bdflush() here, then don't bother.
455 *
456 * The inode lock here actually coordinates with the
457 * almost spurious inode lock in xfs_ireclaim() to prevent
458 * the vnode we handle here without a reference from
459 * being freed while we reference it. If we lock the inode
460 * while it's on the mount list here, then the spurious inode
461 * lock in xfs_ireclaim() after the inode is pulled from
462 * the mount list will sleep until we release it here.
463 * This keeps the vnode from being freed while we reference
464 * it.
465 */
466 if (xfs_ilock_nowait(ip, lock_flags) == 0) {
467 if (vp == NULL) {
468 ip = ip->i_mnext;
469 continue;
470 }
471
472 vp = vn_grab(vp);
473 if (vp == NULL) {
474 ip = ip->i_mnext;
475 continue;
476 }
477
478 IPOINTER_INSERT(ip, mp);
479 xfs_ilock(ip, lock_flags);
480
481 ASSERT(vp == VFS_I(ip));
482 ASSERT(ip->i_mount == mp);
483
484 vnode_refed = B_TRUE;
485 }
486
487 /* From here on in the loop we may have a marker record
488 * in the inode list.
489 */
490
491 /*
492 * If we have to flush data or wait for I/O completion
493 * we need to drop the ilock that we currently hold.
494 * If we need to drop the lock, insert a marker if we
495 * have not already done so.
496 */
497 if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
498 ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
499 if (mount_locked) {
500 IPOINTER_INSERT(ip, mp);
501 }
502 xfs_iunlock(ip, XFS_ILOCK_SHARED);
503
504 if (flags & SYNC_CLOSE) {
505 /* Shutdown case. Flush and invalidate. */
506 if (XFS_FORCED_SHUTDOWN(mp))
507 xfs_tosspages(ip, 0, -1,
508 FI_REMAPF);
509 else
510 error = xfs_flushinval_pages(ip,
511 0, -1, FI_REMAPF);
512 } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
513 error = xfs_flush_pages(ip, 0,
514 -1, fflag, FI_NONE);
515 }
516
517 /*
518 * When freezing, we need to wait ensure all I/O (including direct
519 * I/O) is complete to ensure no further data modification can take
520 * place after this point
521 */
522 if (flags & SYNC_IOWAIT)
523 vn_iowait(ip);
524
525 xfs_ilock(ip, XFS_ILOCK_SHARED);
526 }
527
528 if ((flags & SYNC_ATTR) &&
529 (ip->i_update_core ||
530 (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) {
531 if (mount_locked)
532 IPOINTER_INSERT(ip, mp);
533
534 if (flags & SYNC_WAIT) {
535 xfs_iflock(ip);
536 error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
537
538 /*
539 * If we can't acquire the flush lock, then the inode
540 * is already being flushed so don't bother waiting.
541 *
542 * If we can lock it then do a delwri flush so we can
543 * combine multiple inode flushes in each disk write.
544 */
545 } else if (xfs_iflock_nowait(ip)) {
546 error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
547 } else if (bypassed) {
548 (*bypassed)++;
549 }
550 }
551
552 if (lock_flags != 0) {
553 xfs_iunlock(ip, lock_flags);
554 }
555
556 if (vnode_refed) {
557 /*
558 * If we had to take a reference on the vnode
559 * above, then wait until after we've unlocked
560 * the inode to release the reference. This is
561 * because we can be already holding the inode
562 * lock when IRELE() calls xfs_inactive().
563 *
564 * Make sure to drop the mount lock before calling
565 * IRELE() so that we don't trip over ourselves if
566 * we have to go for the mount lock again in the
567 * inactive code.
568 */
569 if (mount_locked) {
570 IPOINTER_INSERT(ip, mp);
571 }
572
573 IRELE(ip);
574
575 vnode_refed = B_FALSE;
576 }
577
578 if (error) {
579 last_error = error;
580 }
581
582 /*
583 * bail out if the filesystem is corrupted.
584 */
585 if (error == EFSCORRUPTED) {
586 if (!mount_locked) {
587 XFS_MOUNT_ILOCK(mp);
588 IPOINTER_REMOVE(ip, mp);
589 }
590 XFS_MOUNT_IUNLOCK(mp);
591 ASSERT(ipointer_in == B_FALSE);
592 kmem_free(ipointer);
593 return XFS_ERROR(error);
594 }
595
596 /* Let other threads have a chance at the mount lock
597 * if we have looped many times without dropping the
598 * lock.
599 */
600 if ((++preempt & XFS_PREEMPT_MASK) == 0) {
601 if (mount_locked) {
602 IPOINTER_INSERT(ip, mp);
603 }
604 }
605
606 if (mount_locked == B_FALSE) {
607 XFS_MOUNT_ILOCK(mp);
608 mount_locked = B_TRUE;
609 IPOINTER_REMOVE(ip, mp);
610 continue;
611 }
612
613 ASSERT(ipointer_in == B_FALSE);
614 ip = ip->i_mnext;
615
616 } while (ip != mp->m_inodes);
617
618 XFS_MOUNT_IUNLOCK(mp);
619
620 ASSERT(ipointer_in == B_FALSE);
621
622 kmem_free(ipointer);
623 return XFS_ERROR(last_error);
624}
625
626/*
627 * xfs sync routine for internal use
628 *
629 * This routine supports all of the flags defined for the generic vfs_sync
630 * interface as explained above under xfs_sync.
631 *
632 */
633int
634xfs_syncsub(
635 xfs_mount_t *mp,
636 int flags,
637 int *bypassed)
638{
639 int error = 0;
640 int last_error = 0;
641 uint log_flags = XFS_LOG_FORCE;
642 xfs_buf_t *bp;
643 xfs_buf_log_item_t *bip;
644
645 /*
646 * Sync out the log. This ensures that the log is periodically
647 * flushed even if there is not enough activity to fill it up.
648 */
649 if (flags & SYNC_WAIT)
650 log_flags |= XFS_LOG_SYNC;
651
652 xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
653
654 if (flags & (SYNC_ATTR|SYNC_DELWRI)) {
655 if (flags & SYNC_BDFLUSH)
656 xfs_finish_reclaim_all(mp, 1);
657 else
658 error = xfs_sync_inodes(mp, flags, bypassed);
659 }
660
661 /*
662 * Flushing out dirty data above probably generated more
663 * log activity, so if this isn't vfs_sync() then flush
664 * the log again.
665 */
666 if (flags & SYNC_DELWRI) {
667 xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
668 }
669
670 if (flags & SYNC_FSDATA) {
671 /*
672 * If this is vfs_sync() then only sync the superblock
673 * if we can lock it without sleeping and it is not pinned.
674 */
675 if (flags & SYNC_BDFLUSH) {
676 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
677 if (bp != NULL) {
678 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
679 if ((bip != NULL) &&
680 xfs_buf_item_dirty(bip)) {
681 if (!(XFS_BUF_ISPINNED(bp))) {
682 XFS_BUF_ASYNC(bp);
683 error = xfs_bwrite(mp, bp);
684 } else {
685 xfs_buf_relse(bp);
686 }
687 } else {
688 xfs_buf_relse(bp);
689 }
690 }
691 } else {
692 bp = xfs_getsb(mp, 0);
693 /*
694 * If the buffer is pinned then push on the log so
695 * we won't get stuck waiting in the write for
696 * someone, maybe ourselves, to flush the log.
697 * Even though we just pushed the log above, we
698 * did not have the superblock buffer locked at
699 * that point so it can become pinned in between
700 * there and here.
701 */
702 if (XFS_BUF_ISPINNED(bp))
703 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
704 if (flags & SYNC_WAIT)
705 XFS_BUF_UNASYNC(bp);
706 else
707 XFS_BUF_ASYNC(bp);
708 error = xfs_bwrite(mp, bp);
709 }
710 if (error) {
711 last_error = error;
712 }
713 }
714
715 /*
716 * Now check to see if the log needs a "dummy" transaction.
717 */
718 if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) {
719 xfs_trans_t *tp;
720 xfs_inode_t *ip;
721
722 /*
723 * Put a dummy transaction in the log to tell
724 * recovery that all others are OK.
725 */
726 tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
727 if ((error = xfs_trans_reserve(tp, 0,
728 XFS_ICHANGE_LOG_RES(mp),
729 0, 0, 0))) {
730 xfs_trans_cancel(tp, 0);
731 return error;
732 }
733
734 ip = mp->m_rootip;
735 xfs_ilock(ip, XFS_ILOCK_EXCL);
736
737 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
738 xfs_trans_ihold(tp, ip);
739 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
740 error = xfs_trans_commit(tp, 0);
741 xfs_iunlock(ip, XFS_ILOCK_EXCL);
742 xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
743 }
744
745 /*
746 * When shutting down, we need to insure that the AIL is pushed
747 * to disk or the filesystem can appear corrupt from the PROM.
748 */
749 if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) {
750 XFS_bflush(mp->m_ddev_targp);
751 if (mp->m_rtdev_targp) {
752 XFS_bflush(mp->m_rtdev_targp);
753 }
754 }
755
756 return XFS_ERROR(last_error);
757}
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
deleted file mode 100644
index a74b05087da..00000000000
--- a/fs/xfs/xfs_vfsops.h
+++ /dev/null
@@ -1,16 +0,0 @@
1#ifndef _XFS_VFSOPS_H
2#define _XFS_VFSOPS_H 1
3
4struct cred;
5struct xfs_fid;
6struct inode;
7struct kstatfs;
8struct xfs_mount;
9struct xfs_mount_args;
10
11int xfs_sync(struct xfs_mount *mp, int flags);
12void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
13 int lnnum);
14void xfs_attr_quiesce(struct xfs_mount *mp);
15
16#endif /* _XFS_VFSOPS_H */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8b6812f66a1..f07bf8768c3 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -54,33 +54,10 @@
54#include "xfs_vnodeops.h" 54#include "xfs_vnodeops.h"
55 55
56int 56int
57xfs_open(
58 xfs_inode_t *ip)
59{
60 int mode;
61
62 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
63 return XFS_ERROR(EIO);
64
65 /*
66 * If it's a directory with any blocks, read-ahead block 0
67 * as we're almost certain to have the next operation be a read there.
68 */
69 if (S_ISDIR(ip->i_d.di_mode) && ip->i_d.di_nextents > 0) {
70 mode = xfs_ilock_map_shared(ip);
71 if (ip->i_d.di_nextents > 0)
72 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
73 xfs_iunlock(ip, mode);
74 }
75 return 0;
76}
77
78int
79xfs_setattr( 57xfs_setattr(
80 struct xfs_inode *ip, 58 struct xfs_inode *ip,
81 struct iattr *iattr, 59 struct iattr *iattr,
82 int flags, 60 int flags)
83 cred_t *credp)
84{ 61{
85 xfs_mount_t *mp = ip->i_mount; 62 xfs_mount_t *mp = ip->i_mount;
86 struct inode *inode = VFS_I(ip); 63 struct inode *inode = VFS_I(ip);
@@ -93,7 +70,6 @@ xfs_setattr(
93 gid_t gid=0, igid=0; 70 gid_t gid=0, igid=0;
94 int timeflags = 0; 71 int timeflags = 0;
95 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 72 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
96 int file_owner;
97 int need_iolock = 1; 73 int need_iolock = 1;
98 74
99 xfs_itrace_entry(ip); 75 xfs_itrace_entry(ip);
@@ -104,6 +80,10 @@ xfs_setattr(
104 if (XFS_FORCED_SHUTDOWN(mp)) 80 if (XFS_FORCED_SHUTDOWN(mp))
105 return XFS_ERROR(EIO); 81 return XFS_ERROR(EIO);
106 82
83 code = -inode_change_ok(inode, iattr);
84 if (code)
85 return code;
86
107 olddquot1 = olddquot2 = NULL; 87 olddquot1 = olddquot2 = NULL;
108 udqp = gdqp = NULL; 88 udqp = gdqp = NULL;
109 89
@@ -181,62 +161,8 @@ xfs_setattr(
181 161
182 xfs_ilock(ip, lock_flags); 162 xfs_ilock(ip, lock_flags);
183 163
184 /* boolean: are we the file owner? */
185 file_owner = (current_fsuid() == ip->i_d.di_uid);
186
187 /*
188 * Change various properties of a file.
189 * Only the owner or users with CAP_FOWNER
190 * capability may do these things.
191 */
192 if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) {
193 /*
194 * CAP_FOWNER overrides the following restrictions:
195 *
196 * The user ID of the calling process must be equal
197 * to the file owner ID, except in cases where the
198 * CAP_FSETID capability is applicable.
199 */
200 if (!file_owner && !capable(CAP_FOWNER)) {
201 code = XFS_ERROR(EPERM);
202 goto error_return;
203 }
204
205 /*
206 * CAP_FSETID overrides the following restrictions:
207 *
208 * The effective user ID of the calling process shall match
209 * the file owner when setting the set-user-ID and
210 * set-group-ID bits on that file.
211 *
212 * The effective group ID or one of the supplementary group
213 * IDs of the calling process shall match the group owner of
214 * the file when setting the set-group-ID bit on that file
215 */
216 if (mask & ATTR_MODE) {
217 mode_t m = 0;
218
219 if ((iattr->ia_mode & S_ISUID) && !file_owner)
220 m |= S_ISUID;
221 if ((iattr->ia_mode & S_ISGID) &&
222 !in_group_p((gid_t)ip->i_d.di_gid))
223 m |= S_ISGID;
224#if 0
225 /* Linux allows this, Irix doesn't. */
226 if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
227 m |= S_ISVTX;
228#endif
229 if (m && !capable(CAP_FSETID))
230 iattr->ia_mode &= ~m;
231 }
232 }
233
234 /* 164 /*
235 * Change file ownership. Must be the owner or privileged. 165 * Change file ownership. Must be the owner or privileged.
236 * If the system was configured with the "restricted_chown"
237 * option, the owner is not permitted to give away the file,
238 * and can change the group id only to a group of which he
239 * or she is a member.
240 */ 166 */
241 if (mask & (ATTR_UID|ATTR_GID)) { 167 if (mask & (ATTR_UID|ATTR_GID)) {
242 /* 168 /*
@@ -251,23 +177,6 @@ xfs_setattr(
251 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; 177 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
252 178
253 /* 179 /*
254 * CAP_CHOWN overrides the following restrictions:
255 *
256 * If _POSIX_CHOWN_RESTRICTED is defined, this capability
257 * shall override the restriction that a process cannot
258 * change the user ID of a file it owns and the restriction
259 * that the group ID supplied to the chown() function
260 * shall be equal to either the group ID or one of the
261 * supplementary group IDs of the calling process.
262 */
263 if (restricted_chown &&
264 (iuid != uid || (igid != gid &&
265 !in_group_p((gid_t)gid))) &&
266 !capable(CAP_CHOWN)) {
267 code = XFS_ERROR(EPERM);
268 goto error_return;
269 }
270 /*
271 * Do a quota reservation only if uid/gid is actually 180 * Do a quota reservation only if uid/gid is actually
272 * going to change. 181 * going to change.
273 */ 182 */
@@ -304,36 +213,22 @@ xfs_setattr(
304 code = XFS_ERROR(EINVAL); 213 code = XFS_ERROR(EINVAL);
305 goto error_return; 214 goto error_return;
306 } 215 }
216
307 /* 217 /*
308 * Make sure that the dquots are attached to the inode. 218 * Make sure that the dquots are attached to the inode.
309 */ 219 */
310 if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) 220 code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
221 if (code)
311 goto error_return; 222 goto error_return;
312 }
313
314 /*
315 * Change file access or modified times.
316 */
317 if (mask & (ATTR_ATIME|ATTR_MTIME)) {
318 if (!file_owner) {
319 if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) &&
320 !capable(CAP_FOWNER)) {
321 code = XFS_ERROR(EPERM);
322 goto error_return;
323 }
324 }
325 }
326 223
327 /* 224 /*
328 * Now we can make the changes. Before we join the inode 225 * Now we can make the changes. Before we join the inode
329 * to the transaction, if ATTR_SIZE is set then take care of 226 * to the transaction, if ATTR_SIZE is set then take care of
330 * the part of the truncation that must be done without the 227 * the part of the truncation that must be done without the
331 * inode lock. This needs to be done before joining the inode 228 * inode lock. This needs to be done before joining the inode
332 * to the transaction, because the inode cannot be unlocked 229 * to the transaction, because the inode cannot be unlocked
333 * once it is a part of the transaction. 230 * once it is a part of the transaction.
334 */ 231 */
335 if (mask & ATTR_SIZE) {
336 code = 0;
337 if (iattr->ia_size > ip->i_size) { 232 if (iattr->ia_size > ip->i_size) {
338 /* 233 /*
339 * Do the first part of growing a file: zero any data 234 * Do the first part of growing a file: zero any data
@@ -366,7 +261,7 @@ xfs_setattr(
366 } 261 }
367 262
368 /* wait for all I/O to complete */ 263 /* wait for all I/O to complete */
369 vn_iowait(ip); 264 xfs_ioend_wait(ip);
370 265
371 if (!code) 266 if (!code)
372 code = xfs_itruncate_data(ip, iattr->ia_size); 267 code = xfs_itruncate_data(ip, iattr->ia_size);
@@ -388,17 +283,10 @@ xfs_setattr(
388 } 283 }
389 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 284 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
390 xfs_ilock(ip, XFS_ILOCK_EXCL); 285 xfs_ilock(ip, XFS_ILOCK_EXCL);
391 }
392 286
393 if (tp) {
394 xfs_trans_ijoin(tp, ip, lock_flags); 287 xfs_trans_ijoin(tp, ip, lock_flags);
395 xfs_trans_ihold(tp, ip); 288 xfs_trans_ihold(tp, ip);
396 }
397 289
398 /*
399 * Truncate file. Must have write permission and not be a directory.
400 */
401 if (mask & ATTR_SIZE) {
402 /* 290 /*
403 * Only change the c/mtime if we are changing the size 291 * Only change the c/mtime if we are changing the size
404 * or we are explicitly asked to change it. This handles 292 * or we are explicitly asked to change it. This handles
@@ -438,28 +326,13 @@ xfs_setattr(
438 */ 326 */
439 xfs_iflags_set(ip, XFS_ITRUNCATED); 327 xfs_iflags_set(ip, XFS_ITRUNCATED);
440 } 328 }
441 } 329 } else if (tp) {
442 330 xfs_trans_ijoin(tp, ip, lock_flags);
443 /* 331 xfs_trans_ihold(tp, ip);
444 * Change file access modes.
445 */
446 if (mask & ATTR_MODE) {
447 ip->i_d.di_mode &= S_IFMT;
448 ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT;
449
450 inode->i_mode &= S_IFMT;
451 inode->i_mode |= iattr->ia_mode & ~S_IFMT;
452
453 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
454 timeflags |= XFS_ICHGTIME_CHG;
455 } 332 }
456 333
457 /* 334 /*
458 * Change file ownership. Must be the owner or privileged. 335 * Change file ownership. Must be the owner or privileged.
459 * If the system was configured with the "restricted_chown"
460 * option, the owner is not permitted to give away the file,
461 * and can change the group id only to a group of which he
462 * or she is a member.
463 */ 336 */
464 if (mask & (ATTR_UID|ATTR_GID)) { 337 if (mask & (ATTR_UID|ATTR_GID)) {
465 /* 338 /*
@@ -503,6 +376,24 @@ xfs_setattr(
503 timeflags |= XFS_ICHGTIME_CHG; 376 timeflags |= XFS_ICHGTIME_CHG;
504 } 377 }
505 378
379 /*
380 * Change file access modes.
381 */
382 if (mask & ATTR_MODE) {
383 umode_t mode = iattr->ia_mode;
384
385 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
386 mode &= ~S_ISGID;
387
388 ip->i_d.di_mode &= S_IFMT;
389 ip->i_d.di_mode |= mode & ~S_IFMT;
390
391 inode->i_mode &= S_IFMT;
392 inode->i_mode |= mode & ~S_IFMT;
393
394 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
395 timeflags |= XFS_ICHGTIME_CHG;
396 }
506 397
507 /* 398 /*
508 * Change file access or modified times. 399 * Change file access or modified times.
@@ -713,7 +604,7 @@ xfs_fsync(
713 return XFS_ERROR(EIO); 604 return XFS_ERROR(EIO);
714 605
715 /* capture size updates in I/O completion before writing the inode. */ 606 /* capture size updates in I/O completion before writing the inode. */
716 error = filemap_fdatawait(VFS_I(ip)->i_mapping); 607 error = xfs_wait_on_pages(ip, 0, -1);
717 if (error) 608 if (error)
718 return XFS_ERROR(error); 609 return XFS_ERROR(error);
719 610
@@ -1029,6 +920,12 @@ xfs_inactive_symlink_rmt(
1029 goto error0; 920 goto error0;
1030 } 921 }
1031 /* 922 /*
923 * transaction commit worked ok so we can drop the extra ticket
924 * reference that we gained in xfs_trans_dup()
925 */
926 xfs_log_ticket_put(tp->t_ticket);
927
928 /*
1032 * Remove the memory for extent descriptions (just bookkeeping). 929 * Remove the memory for extent descriptions (just bookkeeping).
1033 */ 930 */
1034 if (ip->i_df.if_bytes) 931 if (ip->i_df.if_bytes)
@@ -1625,8 +1522,6 @@ xfs_create(
1625 xfs_trans_set_sync(tp); 1522 xfs_trans_set_sync(tp);
1626 } 1523 }
1627 1524
1628 dp->i_gen++;
1629
1630 /* 1525 /*
1631 * Attach the dquot(s) to the inodes and modify them incore. 1526 * Attach the dquot(s) to the inodes and modify them incore.
1632 * These ids of the inode couldn't have changed since the new 1527 * These ids of the inode couldn't have changed since the new
@@ -1993,13 +1888,6 @@ xfs_remove(
1993 } 1888 }
1994 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1889 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1995 1890
1996 /*
1997 * Bump the in memory generation count on the parent
1998 * directory so that other can know that it has changed.
1999 */
2000 dp->i_gen++;
2001 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2002
2003 if (is_dir) { 1891 if (is_dir) {
2004 /* 1892 /*
2005 * Drop the link from ip's "..". 1893 * Drop the link from ip's "..".
@@ -2009,7 +1897,7 @@ xfs_remove(
2009 goto out_bmap_cancel; 1897 goto out_bmap_cancel;
2010 1898
2011 /* 1899 /*
2012 * Drop the link from dp to ip. 1900 * Drop the "." link from ip to self.
2013 */ 1901 */
2014 error = xfs_droplink(tp, ip); 1902 error = xfs_droplink(tp, ip);
2015 if (error) 1903 if (error)
@@ -2017,14 +1905,14 @@ xfs_remove(
2017 } else { 1905 } else {
2018 /* 1906 /*
2019 * When removing a non-directory we need to log the parent 1907 * When removing a non-directory we need to log the parent
2020 * inode here for the i_gen update. For a directory this is 1908 * inode here. For a directory this is done implicitly
2021 * done implicitly by the xfs_droplink call for the ".." entry. 1909 * by the xfs_droplink call for the ".." entry.
2022 */ 1910 */
2023 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1911 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2024 } 1912 }
2025 1913
2026 /* 1914 /*
2027 * Drop the "." link from ip to self. 1915 * Drop the link from dp to ip.
2028 */ 1916 */
2029 error = xfs_droplink(tp, ip); 1917 error = xfs_droplink(tp, ip);
2030 if (error) 1918 if (error)
@@ -2178,7 +2066,6 @@ xfs_link(
2178 if (error) 2066 if (error)
2179 goto abort_return; 2067 goto abort_return;
2180 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2068 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2181 tdp->i_gen++;
2182 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 2069 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
2183 2070
2184 error = xfs_bumplink(tp, sip); 2071 error = xfs_bumplink(tp, sip);
@@ -2355,18 +2242,10 @@ xfs_mkdir(
2355 } 2242 }
2356 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2243 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2357 2244
2358 /*
2359 * Bump the in memory version number of the parent directory
2360 * so that other processes accessing it will recognize that
2361 * the directory has changed.
2362 */
2363 dp->i_gen++;
2364
2365 error = xfs_dir_init(tp, cdp, dp); 2245 error = xfs_dir_init(tp, cdp, dp);
2366 if (error) 2246 if (error)
2367 goto error2; 2247 goto error2;
2368 2248
2369 cdp->i_gen = 1;
2370 error = xfs_bumplink(tp, dp); 2249 error = xfs_bumplink(tp, dp);
2371 if (error) 2250 if (error)
2372 goto error2; 2251 goto error2;
@@ -2653,13 +2532,6 @@ xfs_symlink(
2653 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2532 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2654 2533
2655 /* 2534 /*
2656 * Bump the in memory version number of the parent directory
2657 * so that other processes accessing it will recognize that
2658 * the directory has changed.
2659 */
2660 dp->i_gen++;
2661
2662 /*
2663 * If this is a synchronous mount, make sure that the 2535 * If this is a synchronous mount, make sure that the
2664 * symlink transaction goes to disk before returning to 2536 * symlink transaction goes to disk before returning to
2665 * the user. 2537 * the user.
@@ -2809,7 +2681,7 @@ xfs_reclaim(
2809 return 0; 2681 return 0;
2810 } 2682 }
2811 2683
2812 vn_iowait(ip); 2684 xfs_ioend_wait(ip);
2813 2685
2814 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 2686 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
2815 2687
@@ -2833,122 +2705,10 @@ xfs_reclaim(
2833 if (!ip->i_update_core && (ip->i_itemp == NULL)) { 2705 if (!ip->i_update_core && (ip->i_itemp == NULL)) {
2834 xfs_ilock(ip, XFS_ILOCK_EXCL); 2706 xfs_ilock(ip, XFS_ILOCK_EXCL);
2835 xfs_iflock(ip); 2707 xfs_iflock(ip);
2836 return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); 2708 xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2837 } else { 2709 return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
2838 xfs_mount_t *mp = ip->i_mount;
2839
2840 /* Protect sync and unpin from us */
2841 XFS_MOUNT_ILOCK(mp);
2842 spin_lock(&ip->i_flags_lock);
2843 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2844 VFS_I(ip)->i_private = NULL;
2845 ip->i_vnode = NULL;
2846 spin_unlock(&ip->i_flags_lock);
2847 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
2848 XFS_MOUNT_IUNLOCK(mp);
2849 }
2850 return 0;
2851}
2852
2853int
2854xfs_finish_reclaim(
2855 xfs_inode_t *ip,
2856 int locked,
2857 int sync_mode)
2858{
2859 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
2860 struct inode *vp = VFS_I(ip);
2861
2862 if (vp && VN_BAD(vp))
2863 goto reclaim;
2864
2865 /* The hash lock here protects a thread in xfs_iget_core from
2866 * racing with us on linking the inode back with a vnode.
2867 * Once we have the XFS_IRECLAIM flag set it will not touch
2868 * us.
2869 */
2870 write_lock(&pag->pag_ici_lock);
2871 spin_lock(&ip->i_flags_lock);
2872 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
2873 (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) {
2874 spin_unlock(&ip->i_flags_lock);
2875 write_unlock(&pag->pag_ici_lock);
2876 if (locked) {
2877 xfs_ifunlock(ip);
2878 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2879 }
2880 return 1;
2881 }
2882 __xfs_iflags_set(ip, XFS_IRECLAIM);
2883 spin_unlock(&ip->i_flags_lock);
2884 write_unlock(&pag->pag_ici_lock);
2885 xfs_put_perag(ip->i_mount, pag);
2886
2887 /*
2888 * If the inode is still dirty, then flush it out. If the inode
2889 * is not in the AIL, then it will be OK to flush it delwri as
2890 * long as xfs_iflush() does not keep any references to the inode.
2891 * We leave that decision up to xfs_iflush() since it has the
2892 * knowledge of whether it's OK to simply do a delwri flush of
2893 * the inode or whether we need to wait until the inode is
2894 * pulled from the AIL.
2895 * We get the flush lock regardless, though, just to make sure
2896 * we don't free it while it is being flushed.
2897 */
2898 if (!locked) {
2899 xfs_ilock(ip, XFS_ILOCK_EXCL);
2900 xfs_iflock(ip);
2901 } 2710 }
2902 2711 xfs_inode_set_reclaim_tag(ip);
2903 /*
2904 * In the case of a forced shutdown we rely on xfs_iflush() to
2905 * wait for the inode to be unpinned before returning an error.
2906 */
2907 if (xfs_iflush(ip, sync_mode) == 0) {
2908 /* synchronize with xfs_iflush_done */
2909 xfs_iflock(ip);
2910 xfs_ifunlock(ip);
2911 }
2912
2913 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2914
2915 reclaim:
2916 xfs_ireclaim(ip);
2917 return 0;
2918}
2919
2920int
2921xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
2922{
2923 int purged;
2924 xfs_inode_t *ip, *n;
2925 int done = 0;
2926
2927 while (!done) {
2928 purged = 0;
2929 XFS_MOUNT_ILOCK(mp);
2930 list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
2931 if (noblock) {
2932 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
2933 continue;
2934 if (xfs_ipincount(ip) ||
2935 !xfs_iflock_nowait(ip)) {
2936 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2937 continue;
2938 }
2939 }
2940 XFS_MOUNT_IUNLOCK(mp);
2941 if (xfs_finish_reclaim(ip, noblock,
2942 XFS_IFLUSH_DELWRI_ELSE_ASYNC))
2943 delay(1);
2944 purged = 1;
2945 break;
2946 }
2947
2948 done = !purged;
2949 }
2950
2951 XFS_MOUNT_IUNLOCK(mp);
2952 return 0; 2712 return 0;
2953} 2713}
2954 2714
@@ -3197,6 +2957,8 @@ xfs_zero_remaining_bytes(
3197 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 2957 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
3198 XFS_IS_REALTIME_INODE(ip) ? 2958 XFS_IS_REALTIME_INODE(ip) ?
3199 mp->m_rtdev_targp : mp->m_ddev_targp); 2959 mp->m_rtdev_targp : mp->m_ddev_targp);
2960 if (!bp)
2961 return XFS_ERROR(ENOMEM);
3200 2962
3201 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 2963 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
3202 offset_fsb = XFS_B_TO_FSBT(mp, offset); 2964 offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -3312,7 +3074,8 @@ xfs_free_file_space(
3312 need_iolock = 0; 3074 need_iolock = 0;
3313 if (need_iolock) { 3075 if (need_iolock) {
3314 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3076 xfs_ilock(ip, XFS_IOLOCK_EXCL);
3315 vn_iowait(ip); /* wait for the completion of any pending DIOs */ 3077 /* wait for the completion of any pending DIOs */
3078 xfs_ioend_wait(ip);
3316 } 3079 }
3317 3080
3318 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 3081 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
@@ -3474,7 +3237,6 @@ xfs_change_file_space(
3474 int cmd, 3237 int cmd,
3475 xfs_flock64_t *bf, 3238 xfs_flock64_t *bf,
3476 xfs_off_t offset, 3239 xfs_off_t offset,
3477 cred_t *credp,
3478 int attr_flags) 3240 int attr_flags)
3479{ 3241{
3480 xfs_mount_t *mp = ip->i_mount; 3242 xfs_mount_t *mp = ip->i_mount;
@@ -3562,7 +3324,7 @@ xfs_change_file_space(
3562 iattr.ia_valid = ATTR_SIZE; 3324 iattr.ia_valid = ATTR_SIZE;
3563 iattr.ia_size = startoffset; 3325 iattr.ia_size = startoffset;
3564 3326
3565 error = xfs_setattr(ip, &iattr, attr_flags, credp); 3327 error = xfs_setattr(ip, &iattr, attr_flags);
3566 3328
3567 if (error) 3329 if (error)
3568 return error; 3330 return error;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index e932a96bec5..76df328c61b 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -14,9 +14,7 @@ struct xfs_inode;
14struct xfs_iomap; 14struct xfs_iomap;
15 15
16 16
17int xfs_open(struct xfs_inode *ip); 17int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
18int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
19 struct cred *credp);
20#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ 18#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
21#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ 19#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
22#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ 20#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
@@ -28,24 +26,23 @@ int xfs_inactive(struct xfs_inode *ip);
28int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 26int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
29 struct xfs_inode **ipp, struct xfs_name *ci_name); 27 struct xfs_inode **ipp, struct xfs_name *ci_name);
30int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, 28int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
31 xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); 29 xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp);
32int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 30int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
33 struct xfs_inode *ip); 31 struct xfs_inode *ip);
34int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, 32int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
35 struct xfs_name *target_name); 33 struct xfs_name *target_name);
36int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, 34int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
37 mode_t mode, struct xfs_inode **ipp, struct cred *credp); 35 mode_t mode, struct xfs_inode **ipp, cred_t *credp);
38int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, 36int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
39 xfs_off_t *offset, filldir_t filldir); 37 xfs_off_t *offset, filldir_t filldir);
40int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, 38int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
41 const char *target_path, mode_t mode, struct xfs_inode **ipp, 39 const char *target_path, mode_t mode, struct xfs_inode **ipp,
42 struct cred *credp); 40 cred_t *credp);
43int xfs_inode_flush(struct xfs_inode *ip, int flags); 41int xfs_inode_flush(struct xfs_inode *ip, int flags);
44int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 42int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
45int xfs_reclaim(struct xfs_inode *ip); 43int xfs_reclaim(struct xfs_inode *ip);
46int xfs_change_file_space(struct xfs_inode *ip, int cmd, 44int xfs_change_file_space(struct xfs_inode *ip, int cmd,
47 xfs_flock64_t *bf, xfs_off_t offset, 45 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
48 struct cred *credp, int attr_flags);
49int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 46int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
50 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 47 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
51 struct xfs_name *target_name, struct xfs_inode *target_ip); 48 struct xfs_name *target_name, struct xfs_inode *target_ip);
@@ -56,8 +53,6 @@ int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
56int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); 53int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags);
57int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, 54int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
58 int flags, struct attrlist_cursor_kern *cursor); 55 int flags, struct attrlist_cursor_kern *cursor);
59int xfs_ioctl(struct xfs_inode *ip, struct file *filp,
60 int ioflags, unsigned int cmd, void __user *arg);
61ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, 56ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
62 const struct iovec *iovp, unsigned int segs, 57 const struct iovec *iovp, unsigned int segs,
63 loff_t *offset, int ioflags); 58 loff_t *offset, int ioflags);
@@ -78,5 +73,6 @@ int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first,
78 xfs_off_t last, int fiopt); 73 xfs_off_t last, int fiopt);
79int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first, 74int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first,
80 xfs_off_t last, uint64_t flags, int fiopt); 75 xfs_off_t last, uint64_t flags, int fiopt);
76int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last);
81 77
82#endif /* _XFS_VNODEOPS_H */ 78#endif /* _XFS_VNODEOPS_H */