aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/9p.h375
-rw-r--r--fs/9p/Makefile6
-rw-r--r--fs/9p/conv.c845
-rw-r--r--fs/9p/conv.h50
-rw-r--r--fs/9p/debug.h77
-rw-r--r--fs/9p/error.c93
-rw-r--r--fs/9p/error.h177
-rw-r--r--fs/9p/fcall.c427
-rw-r--r--fs/9p/fcprint.c345
-rw-r--r--fs/9p/fid.c168
-rw-r--r--fs/9p/fid.h43
-rw-r--r--fs/9p/mux.c1033
-rw-r--r--fs/9p/mux.h55
-rw-r--r--fs/9p/trans_fd.c308
-rw-r--r--fs/9p/transport.h45
-rw-r--r--fs/9p/v9fs.c293
-rw-r--r--fs/9p/v9fs.h32
-rw-r--r--fs/9p/v9fs_vfs.h6
-rw-r--r--fs/9p/vfs_addr.c57
-rw-r--r--fs/9p/vfs_dentry.c37
-rw-r--r--fs/9p/vfs_dir.c155
-rw-r--r--fs/9p/vfs_file.c166
-rw-r--r--fs/9p/vfs_inode.c754
-rw-r--r--fs/9p/vfs_super.c93
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/bad_inode.c7
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/binfmt_elf.c7
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/bio.c2
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/cifs/cifs_debug.c252
-rw-r--r--fs/cifs/cifs_unicode.c12
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/cifs/cifssmb.c4
-rw-r--r--fs/cifs/connect.c16
-rw-r--r--fs/cifs/dir.c184
-rw-r--r--fs/cifs/fcntl.c46
-rw-r--r--fs/cifs/inode.c5
-rw-r--r--fs/cifs/ioctl.c29
-rw-r--r--fs/cifs/rfc1002pdu.h2
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/coda/file.c11
-rw-r--r--fs/debugfs/inode.c63
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/dlm/Kconfig2
-rw-r--r--fs/dlm/Makefile1
-rw-r--r--fs/dlm/config.c25
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/debug_fs.c186
-rw-r--r--fs/dlm/dlm_internal.h17
-rw-r--r--fs/dlm/lock.c470
-rw-r--r--fs/dlm/lock.h13
-rw-r--r--fs/dlm/lockspace.c86
-rw-r--r--fs/dlm/lowcomms.c23
-rw-r--r--fs/dlm/main.c11
-rw-r--r--fs/dlm/member.c11
-rw-r--r--fs/dlm/netlink.c153
-rw-r--r--fs/dlm/rcom.c13
-rw-r--r--fs/dlm/recoverd.c4
-rw-r--r--fs/dlm/user.c129
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/file.c15
-rw-r--r--fs/ecryptfs/inode.c66
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/ecryptfs/mmap.c55
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/super.c9
-rw-r--r--fs/ext3/file.c1
-rw-r--r--fs/ext3/inode.c4
-rw-r--r--fs/ext4/balloc.c6
-rw-r--r--fs/ext4/extents.c148
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/ext4/inode.c8
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/inode.c3
-rw-r--r--fs/gfs2/Makefile2
-rw-r--r--fs/gfs2/bmap.c23
-rw-r--r--fs/gfs2/daemon.c11
-rw-r--r--fs/gfs2/dir.c69
-rw-r--r--fs/gfs2/dir.h9
-rw-r--r--fs/gfs2/eattr.c14
-rw-r--r--fs/gfs2/glock.c123
-rw-r--r--fs/gfs2/glock.h1
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/gfs2/incore.h81
-rw-r--r--fs/gfs2/inode.c288
-rw-r--r--fs/gfs2/inode.h30
-rw-r--r--fs/gfs2/locking/dlm/lock.c11
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h2
-rw-r--r--fs/gfs2/locking/dlm/mount.c2
-rw-r--r--fs/gfs2/locking/dlm/plock.c8
-rw-r--r--fs/gfs2/locking/dlm/thread.c11
-rw-r--r--fs/gfs2/log.c129
-rw-r--r--fs/gfs2/lops.c49
-rw-r--r--fs/gfs2/lops.h23
-rw-r--r--fs/gfs2/meta_io.c8
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/mount.c25
-rw-r--r--fs/gfs2/ondisk.c251
-rw-r--r--fs/gfs2/ops_address.c69
-rw-r--r--fs/gfs2/ops_address.h2
-rw-r--r--fs/gfs2/ops_dentry.c24
-rw-r--r--fs/gfs2/ops_export.c65
-rw-r--r--fs/gfs2/ops_export.h22
-rw-r--r--fs/gfs2/ops_file.c5
-rw-r--r--fs/gfs2/ops_fstype.c33
-rw-r--r--fs/gfs2/ops_fstype.h1
-rw-r--r--fs/gfs2/ops_inode.c30
-rw-r--r--fs/gfs2/ops_super.c8
-rw-r--r--fs/gfs2/ops_vm.c2
-rw-r--r--fs/gfs2/quota.c57
-rw-r--r--fs/gfs2/recovery.c22
-rw-r--r--fs/gfs2/rgrp.c377
-rw-r--r--fs/gfs2/rgrp.h1
-rw-r--r--fs/gfs2/super.c79
-rw-r--r--fs/gfs2/super.h2
-rw-r--r--fs/gfs2/util.c6
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hugetlbfs/inode.c7
-rw-r--r--fs/ioctl.c14
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/readinode.c45
-rw-r--r--fs/jffs2/super.c194
-rw-r--r--fs/jffs2/xattr.c6
-rw-r--r--fs/jfs/endian24.h2
-rw-r--r--fs/jfs/file.c1
-rw-r--r--fs/jfs/jfs_debug.c28
-rw-r--r--fs/jfs/jfs_debug.h2
-rw-r--r--fs/jfs/jfs_dinode.h42
-rw-r--r--fs/jfs/jfs_dmap.c419
-rw-r--r--fs/jfs/jfs_dmap.h118
-rw-r--r--fs/jfs/jfs_dtree.c105
-rw-r--r--fs/jfs/jfs_dtree.h2
-rw-r--r--fs/jfs/jfs_extent.c102
-rw-r--r--fs/jfs/jfs_filsys.h13
-rw-r--r--fs/jfs/jfs_imap.c296
-rw-r--r--fs/jfs/jfs_imap.h98
-rw-r--r--fs/jfs/jfs_incore.h4
-rw-r--r--fs/jfs/jfs_logmgr.c90
-rw-r--r--fs/jfs/jfs_logmgr.h26
-rw-r--r--fs/jfs/jfs_metapage.c3
-rw-r--r--fs/jfs/jfs_mount.c6
-rw-r--r--fs/jfs/jfs_txnmgr.c302
-rw-r--r--fs/jfs/jfs_txnmgr.h2
-rw-r--r--fs/jfs/jfs_types.h20
-rw-r--r--fs/jfs/jfs_umount.c2
-rw-r--r--fs/jfs/jfs_xtree.c428
-rw-r--r--fs/jfs/jfs_xtree.h48
-rw-r--r--fs/jfs/namei.c26
-rw-r--r--fs/jfs/resize.c48
-rw-r--r--fs/jfs/xattr.c9
-rw-r--r--fs/lockd/host.c39
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svc.c6
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/client.c28
-rw-r--r--fs/nfs/delegation.c186
-rw-r--r--fs/nfs/delegation.h26
-rw-r--r--fs/nfs/dir.c16
-rw-r--r--fs/nfs/direct.c34
-rw-r--r--fs/nfs/file.c15
-rw-r--r--fs/nfs/inode.c73
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/mount_clnt.c169
-rw-r--r--fs/nfs/nfs2xdr.c6
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c8
-rw-r--r--fs/nfs/nfs4_fs.h40
-rw-r--r--fs/nfs/nfs4proc.c760
-rw-r--r--fs/nfs/nfs4state.c310
-rw-r--r--fs/nfs/nfs4xdr.c126
-rw-r--r--fs/nfs/nfsroot.c5
-rw-r--r--fs/nfs/pagelist.c60
-rw-r--r--fs/nfs/read.c40
-rw-r--r--fs/nfs/super.c1189
-rw-r--r--fs/nfs/write.c149
-rw-r--r--fs/nfsd/nfs4callback.c18
-rw-r--r--fs/nfsd/nfs4state.c1
-rw-r--r--fs/nfsd/vfs.c46
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ocfs2/aops.c20
-rw-r--r--fs/ocfs2/cluster/masklog.c4
-rw-r--r--fs/ocfs2/file.c18
-rw-r--r--fs/partitions/check.c1
-rw-r--r--fs/partitions/ibm.c167
-rw-r--r--fs/pipe.c70
-rw-r--r--fs/proc/array.c59
-rw-r--r--fs/proc/base.c71
-rw-r--r--fs/qnx4/file.c2
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/ramfs/file-nommu.c7
-rw-r--r--fs/read_write.c20
-rw-r--r--fs/reiserfs/file.c1
-rw-r--r--fs/seq_file.c34
-rw-r--r--fs/signalfd.c3
-rw-r--r--fs/smbfs/file.c9
-rw-r--r--fs/splice.c538
-rw-r--r--fs/sync.c8
-rw-r--r--fs/sysfs/bin.c195
-rw-r--r--fs/sysfs/dir.c1297
-rw-r--r--fs/sysfs/file.c379
-rw-r--r--fs/sysfs/group.c55
-rw-r--r--fs/sysfs/inode.c206
-rw-r--r--fs/sysfs/mount.c37
-rw-r--r--fs/sysfs/symlink.c150
-rw-r--r--fs/sysfs/sysfs.h169
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/inode.c23
-rw-r--r--fs/udf/super.c2
-rw-r--r--fs/udf/truncate.c79
-rw-r--r--fs/udf/udfdecl.h1
-rw-r--r--fs/ufs/file.c2
-rw-r--r--fs/utimes.c13
-rw-r--r--fs/xfs/Makefile-linux-2.62
-rw-r--r--fs/xfs/linux-2.6/kmem.h19
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c43
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c59
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c37
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c321
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c46
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h15
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h11
-rw-r--r--fs/xfs/quota/xfs_qm.c9
-rw-r--r--fs/xfs/xfs.h1
-rw-r--r--fs/xfs/xfs_ag.h9
-rw-r--r--fs/xfs/xfs_alloc.c101
-rw-r--r--fs/xfs/xfs_alloc.h6
-rw-r--r--fs/xfs/xfs_alloc_btree.c20
-rw-r--r--fs/xfs/xfs_bit.c91
-rw-r--r--fs/xfs/xfs_bit.h4
-rw-r--r--fs/xfs/xfs_bmap.c369
-rw-r--r--fs/xfs/xfs_bmap.h6
-rw-r--r--fs/xfs/xfs_bmap_btree.c88
-rw-r--r--fs/xfs/xfs_btree.h32
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_clnt.h2
-rw-r--r--fs/xfs/xfs_dinode.h4
-rw-r--r--fs/xfs/xfs_dir2.c12
-rw-r--r--fs/xfs/xfs_dir2_block.c98
-rw-r--r--fs/xfs/xfs_dir2_block.h2
-rw-r--r--fs/xfs/xfs_dir2_data.c54
-rw-r--r--fs/xfs/xfs_dir2_data.h12
-rw-r--r--fs/xfs/xfs_dir2_leaf.c106
-rw-r--r--fs/xfs/xfs_dir2_leaf.h29
-rw-r--r--fs/xfs/xfs_dir2_node.c66
-rw-r--r--fs/xfs/xfs_dir2_node.h4
-rw-r--r--fs/xfs/xfs_dir2_sf.c204
-rw-r--r--fs/xfs/xfs_dir2_sf.h20
-rw-r--r--fs/xfs/xfs_filestream.c771
-rw-r--r--fs/xfs/xfs_filestream.h136
-rw-r--r--fs/xfs/xfs_fs.h2
-rw-r--r--fs/xfs/xfs_fsops.c17
-rw-r--r--fs/xfs/xfs_ialloc.c28
-rw-r--r--fs/xfs/xfs_ialloc.h10
-rw-r--r--fs/xfs/xfs_inode.c39
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_iomap.c41
-rw-r--r--fs/xfs/xfs_itable.c42
-rw-r--r--fs/xfs/xfs_itable.h20
-rw-r--r--fs/xfs/xfs_log.c41
-rw-r--r--fs/xfs/xfs_log_recover.c8
-rw-r--r--fs/xfs/xfs_mount.c237
-rw-r--r--fs/xfs/xfs_mount.h15
-rw-r--r--fs/xfs/xfs_mru_cache.c608
-rw-r--r--fs/xfs/xfs_mru_cache.h57
-rw-r--r--fs/xfs/xfs_rtalloc.c4
-rw-r--r--fs/xfs/xfs_rw.h36
-rw-r--r--fs/xfs/xfs_sb.h16
-rw-r--r--fs/xfs/xfs_trans.c125
-rw-r--r--fs/xfs/xfs_trans.h3
-rw-r--r--fs/xfs/xfs_vfsops.c159
-rw-r--r--fs/xfs/xfs_vnodeops.c122
294 files changed, 12409 insertions, 11379 deletions
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
deleted file mode 100644
index 94e2f92ab2e8..000000000000
--- a/fs/9p/9p.h
+++ /dev/null
@@ -1,375 +0,0 @@
1/*
2 * linux/fs/9p/9p.h
3 *
4 * 9P protocol definitions.
5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27/* Message Types */
28enum {
29 TVERSION = 100,
30 RVERSION,
31 TAUTH = 102,
32 RAUTH,
33 TATTACH = 104,
34 RATTACH,
35 TERROR = 106,
36 RERROR,
37 TFLUSH = 108,
38 RFLUSH,
39 TWALK = 110,
40 RWALK,
41 TOPEN = 112,
42 ROPEN,
43 TCREATE = 114,
44 RCREATE,
45 TREAD = 116,
46 RREAD,
47 TWRITE = 118,
48 RWRITE,
49 TCLUNK = 120,
50 RCLUNK,
51 TREMOVE = 122,
52 RREMOVE,
53 TSTAT = 124,
54 RSTAT,
55 TWSTAT = 126,
56 RWSTAT,
57};
58
59/* modes */
60enum {
61 V9FS_OREAD = 0x00,
62 V9FS_OWRITE = 0x01,
63 V9FS_ORDWR = 0x02,
64 V9FS_OEXEC = 0x03,
65 V9FS_OEXCL = 0x04,
66 V9FS_OTRUNC = 0x10,
67 V9FS_OREXEC = 0x20,
68 V9FS_ORCLOSE = 0x40,
69 V9FS_OAPPEND = 0x80,
70};
71
72/* permissions */
73enum {
74 V9FS_DMDIR = 0x80000000,
75 V9FS_DMAPPEND = 0x40000000,
76 V9FS_DMEXCL = 0x20000000,
77 V9FS_DMMOUNT = 0x10000000,
78 V9FS_DMAUTH = 0x08000000,
79 V9FS_DMTMP = 0x04000000,
80 V9FS_DMSYMLINK = 0x02000000,
81 V9FS_DMLINK = 0x01000000,
82 /* 9P2000.u extensions */
83 V9FS_DMDEVICE = 0x00800000,
84 V9FS_DMNAMEDPIPE = 0x00200000,
85 V9FS_DMSOCKET = 0x00100000,
86 V9FS_DMSETUID = 0x00080000,
87 V9FS_DMSETGID = 0x00040000,
88};
89
90/* qid.types */
91enum {
92 V9FS_QTDIR = 0x80,
93 V9FS_QTAPPEND = 0x40,
94 V9FS_QTEXCL = 0x20,
95 V9FS_QTMOUNT = 0x10,
96 V9FS_QTAUTH = 0x08,
97 V9FS_QTTMP = 0x04,
98 V9FS_QTSYMLINK = 0x02,
99 V9FS_QTLINK = 0x01,
100 V9FS_QTFILE = 0x00,
101};
102
103#define V9FS_NOTAG (u16)(~0)
104#define V9FS_NOFID (u32)(~0)
105#define V9FS_MAXWELEM 16
106
107/* ample room for Twrite/Rread header (iounit) */
108#define V9FS_IOHDRSZ 24
109
110struct v9fs_str {
111 u16 len;
112 char *str;
113};
114
115/* qids are the unique ID for a file (like an inode */
116struct v9fs_qid {
117 u8 type;
118 u32 version;
119 u64 path;
120};
121
122/* Plan 9 file metadata (stat) structure */
123struct v9fs_stat {
124 u16 size;
125 u16 type;
126 u32 dev;
127 struct v9fs_qid qid;
128 u32 mode;
129 u32 atime;
130 u32 mtime;
131 u64 length;
132 struct v9fs_str name;
133 struct v9fs_str uid;
134 struct v9fs_str gid;
135 struct v9fs_str muid;
136 struct v9fs_str extension; /* 9p2000.u extensions */
137 u32 n_uid; /* 9p2000.u extensions */
138 u32 n_gid; /* 9p2000.u extensions */
139 u32 n_muid; /* 9p2000.u extensions */
140};
141
142/* file metadata (stat) structure used to create Twstat message
143 The is similar to v9fs_stat, but the strings don't point to
144 the same memory block and should be freed separately
145*/
146struct v9fs_wstat {
147 u16 size;
148 u16 type;
149 u32 dev;
150 struct v9fs_qid qid;
151 u32 mode;
152 u32 atime;
153 u32 mtime;
154 u64 length;
155 char *name;
156 char *uid;
157 char *gid;
158 char *muid;
159 char *extension; /* 9p2000.u extensions */
160 u32 n_uid; /* 9p2000.u extensions */
161 u32 n_gid; /* 9p2000.u extensions */
162 u32 n_muid; /* 9p2000.u extensions */
163};
164
165/* Structures for Protocol Operations */
166
167struct Tversion {
168 u32 msize;
169 struct v9fs_str version;
170};
171
172struct Rversion {
173 u32 msize;
174 struct v9fs_str version;
175};
176
177struct Tauth {
178 u32 afid;
179 struct v9fs_str uname;
180 struct v9fs_str aname;
181};
182
183struct Rauth {
184 struct v9fs_qid qid;
185};
186
187struct Rerror {
188 struct v9fs_str error;
189 u32 errno; /* 9p2000.u extension */
190};
191
192struct Tflush {
193 u16 oldtag;
194};
195
196struct Rflush {
197};
198
199struct Tattach {
200 u32 fid;
201 u32 afid;
202 struct v9fs_str uname;
203 struct v9fs_str aname;
204};
205
206struct Rattach {
207 struct v9fs_qid qid;
208};
209
210struct Twalk {
211 u32 fid;
212 u32 newfid;
213 u16 nwname;
214 struct v9fs_str wnames[16];
215};
216
217struct Rwalk {
218 u16 nwqid;
219 struct v9fs_qid wqids[16];
220};
221
222struct Topen {
223 u32 fid;
224 u8 mode;
225};
226
227struct Ropen {
228 struct v9fs_qid qid;
229 u32 iounit;
230};
231
232struct Tcreate {
233 u32 fid;
234 struct v9fs_str name;
235 u32 perm;
236 u8 mode;
237 struct v9fs_str extension;
238};
239
240struct Rcreate {
241 struct v9fs_qid qid;
242 u32 iounit;
243};
244
245struct Tread {
246 u32 fid;
247 u64 offset;
248 u32 count;
249};
250
251struct Rread {
252 u32 count;
253 u8 *data;
254};
255
256struct Twrite {
257 u32 fid;
258 u64 offset;
259 u32 count;
260 u8 *data;
261};
262
263struct Rwrite {
264 u32 count;
265};
266
267struct Tclunk {
268 u32 fid;
269};
270
271struct Rclunk {
272};
273
274struct Tremove {
275 u32 fid;
276};
277
278struct Rremove {
279};
280
281struct Tstat {
282 u32 fid;
283};
284
285struct Rstat {
286 struct v9fs_stat stat;
287};
288
289struct Twstat {
290 u32 fid;
291 struct v9fs_stat stat;
292};
293
294struct Rwstat {
295};
296
297/*
298 * fcall is the primary packet structure
299 *
300 */
301
302struct v9fs_fcall {
303 u32 size;
304 u8 id;
305 u16 tag;
306 void *sdata;
307
308 union {
309 struct Tversion tversion;
310 struct Rversion rversion;
311 struct Tauth tauth;
312 struct Rauth rauth;
313 struct Rerror rerror;
314 struct Tflush tflush;
315 struct Rflush rflush;
316 struct Tattach tattach;
317 struct Rattach rattach;
318 struct Twalk twalk;
319 struct Rwalk rwalk;
320 struct Topen topen;
321 struct Ropen ropen;
322 struct Tcreate tcreate;
323 struct Rcreate rcreate;
324 struct Tread tread;
325 struct Rread rread;
326 struct Twrite twrite;
327 struct Rwrite rwrite;
328 struct Tclunk tclunk;
329 struct Rclunk rclunk;
330 struct Tremove tremove;
331 struct Rremove rremove;
332 struct Tstat tstat;
333 struct Rstat rstat;
334 struct Twstat twstat;
335 struct Rwstat rwstat;
336 } params;
337};
338
339#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \
340 fcall?fcall->params.rerror.error.len:0, \
341 fcall?fcall->params.rerror.error.str:"");
342
343int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
344 char *version, struct v9fs_fcall **rcall);
345
346int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
347 u32 fid, u32 afid, struct v9fs_fcall **rcall);
348
349int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
350
351int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
352 struct v9fs_fcall **rcall);
353
354int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
355 struct v9fs_wstat *wstat, struct v9fs_fcall **rcall);
356
357int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
358 char *name, struct v9fs_fcall **rcall);
359
360int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
361 struct v9fs_fcall **rcall);
362
363int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
364 struct v9fs_fcall **rcall);
365
366int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
367 u32 perm, u8 mode, char *extension, struct v9fs_fcall **rcall);
368
369int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
370 u64 offset, u32 count, struct v9fs_fcall **rcall);
371
372int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
373 u32 count, const char __user * data,
374 struct v9fs_fcall **rcall);
375int v9fs_printfcall(char *, int, struct v9fs_fcall *, int);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 87897f84dfb6..bc7f0d1551e6 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,18 +1,12 @@
1obj-$(CONFIG_9P_FS) := 9p.o 1obj-$(CONFIG_9P_FS) := 9p.o
2 2
39p-objs := \ 39p-objs := \
4 trans_fd.o \
5 mux.o \
6 fcall.o \
7 conv.o \
8 vfs_super.o \ 4 vfs_super.o \
9 vfs_inode.o \ 5 vfs_inode.o \
10 vfs_addr.o \ 6 vfs_addr.o \
11 vfs_file.o \ 7 vfs_file.o \
12 vfs_dir.o \ 8 vfs_dir.o \
13 vfs_dentry.o \ 9 vfs_dentry.o \
14 error.o \
15 v9fs.o \ 10 v9fs.o \
16 fid.o \ 11 fid.o \
17 fcprint.o
18 12
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
deleted file mode 100644
index a3ed571eee31..000000000000
--- a/fs/9p/conv.c
+++ /dev/null
@@ -1,845 +0,0 @@
1/*
2 * linux/fs/9p/conv.c
3 *
4 * 9P protocol conversion functions
5 *
6 * Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/sched.h>
31#include <linux/idr.h>
32#include <asm/uaccess.h>
33#include "debug.h"
34#include "v9fs.h"
35#include "9p.h"
36#include "conv.h"
37
38/*
39 * Buffer to help with string parsing
40 */
41struct cbuf {
42 unsigned char *sp;
43 unsigned char *p;
44 unsigned char *ep;
45};
46
47static inline void buf_init(struct cbuf *buf, void *data, int datalen)
48{
49 buf->sp = buf->p = data;
50 buf->ep = data + datalen;
51}
52
53static inline int buf_check_overflow(struct cbuf *buf)
54{
55 return buf->p > buf->ep;
56}
57
58static int buf_check_size(struct cbuf *buf, int len)
59{
60 if (buf->p + len > buf->ep) {
61 if (buf->p < buf->ep) {
62 eprintk(KERN_ERR, "buffer overflow: want %d has %d\n",
63 len, (int)(buf->ep - buf->p));
64 dump_stack();
65 buf->p = buf->ep + 1;
66 }
67
68 return 0;
69 }
70
71 return 1;
72}
73
74static void *buf_alloc(struct cbuf *buf, int len)
75{
76 void *ret = NULL;
77
78 if (buf_check_size(buf, len)) {
79 ret = buf->p;
80 buf->p += len;
81 }
82
83 return ret;
84}
85
86static void buf_put_int8(struct cbuf *buf, u8 val)
87{
88 if (buf_check_size(buf, 1)) {
89 buf->p[0] = val;
90 buf->p++;
91 }
92}
93
94static void buf_put_int16(struct cbuf *buf, u16 val)
95{
96 if (buf_check_size(buf, 2)) {
97 *(__le16 *) buf->p = cpu_to_le16(val);
98 buf->p += 2;
99 }
100}
101
102static void buf_put_int32(struct cbuf *buf, u32 val)
103{
104 if (buf_check_size(buf, 4)) {
105 *(__le32 *)buf->p = cpu_to_le32(val);
106 buf->p += 4;
107 }
108}
109
110static void buf_put_int64(struct cbuf *buf, u64 val)
111{
112 if (buf_check_size(buf, 8)) {
113 *(__le64 *)buf->p = cpu_to_le64(val);
114 buf->p += 8;
115 }
116}
117
118static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
119{
120 char *ret;
121
122 ret = NULL;
123 if (buf_check_size(buf, slen + 2)) {
124 buf_put_int16(buf, slen);
125 ret = buf->p;
126 memcpy(buf->p, s, slen);
127 buf->p += slen;
128 }
129
130 return ret;
131}
132
133static inline void buf_put_string(struct cbuf *buf, const char *s)
134{
135 buf_put_stringn(buf, s, strlen(s));
136}
137
138static u8 buf_get_int8(struct cbuf *buf)
139{
140 u8 ret = 0;
141
142 if (buf_check_size(buf, 1)) {
143 ret = buf->p[0];
144 buf->p++;
145 }
146
147 return ret;
148}
149
150static u16 buf_get_int16(struct cbuf *buf)
151{
152 u16 ret = 0;
153
154 if (buf_check_size(buf, 2)) {
155 ret = le16_to_cpu(*(__le16 *)buf->p);
156 buf->p += 2;
157 }
158
159 return ret;
160}
161
162static u32 buf_get_int32(struct cbuf *buf)
163{
164 u32 ret = 0;
165
166 if (buf_check_size(buf, 4)) {
167 ret = le32_to_cpu(*(__le32 *)buf->p);
168 buf->p += 4;
169 }
170
171 return ret;
172}
173
174static u64 buf_get_int64(struct cbuf *buf)
175{
176 u64 ret = 0;
177
178 if (buf_check_size(buf, 8)) {
179 ret = le64_to_cpu(*(__le64 *)buf->p);
180 buf->p += 8;
181 }
182
183 return ret;
184}
185
186static void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr)
187{
188 vstr->len = buf_get_int16(buf);
189 if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
190 vstr->str = buf->p;
191 buf->p += vstr->len;
192 } else {
193 vstr->len = 0;
194 vstr->str = NULL;
195 }
196}
197
198static void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid)
199{
200 qid->type = buf_get_int8(bufp);
201 qid->version = buf_get_int32(bufp);
202 qid->path = buf_get_int64(bufp);
203}
204
205/**
206 * v9fs_size_wstat - calculate the size of a variable length stat struct
207 * @stat: metadata (stat) structure
208 * @extended: non-zero if 9P2000.u
209 *
210 */
211
212static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended)
213{
214 int size = 0;
215
216 if (wstat == NULL) {
217 eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
218 return 0;
219 }
220
221 size = /* 2 + *//* size[2] */
222 2 + /* type[2] */
223 4 + /* dev[4] */
224 1 + /* qid.type[1] */
225 4 + /* qid.vers[4] */
226 8 + /* qid.path[8] */
227 4 + /* mode[4] */
228 4 + /* atime[4] */
229 4 + /* mtime[4] */
230 8 + /* length[8] */
231 8; /* minimum sum of string lengths */
232
233 if (wstat->name)
234 size += strlen(wstat->name);
235 if (wstat->uid)
236 size += strlen(wstat->uid);
237 if (wstat->gid)
238 size += strlen(wstat->gid);
239 if (wstat->muid)
240 size += strlen(wstat->muid);
241
242 if (extended) {
243 size += 4 + /* n_uid[4] */
244 4 + /* n_gid[4] */
245 4 + /* n_muid[4] */
246 2; /* string length of extension[4] */
247 if (wstat->extension)
248 size += strlen(wstat->extension);
249 }
250
251 return size;
252}
253
254/**
255 * buf_get_stat - safely decode a recieved metadata (stat) structure
256 * @bufp: buffer to deserialize
257 * @stat: metadata (stat) structure
258 * @extended: non-zero if 9P2000.u
259 *
260 */
261
262static void
263buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended)
264{
265 stat->size = buf_get_int16(bufp);
266 stat->type = buf_get_int16(bufp);
267 stat->dev = buf_get_int32(bufp);
268 stat->qid.type = buf_get_int8(bufp);
269 stat->qid.version = buf_get_int32(bufp);
270 stat->qid.path = buf_get_int64(bufp);
271 stat->mode = buf_get_int32(bufp);
272 stat->atime = buf_get_int32(bufp);
273 stat->mtime = buf_get_int32(bufp);
274 stat->length = buf_get_int64(bufp);
275 buf_get_str(bufp, &stat->name);
276 buf_get_str(bufp, &stat->uid);
277 buf_get_str(bufp, &stat->gid);
278 buf_get_str(bufp, &stat->muid);
279
280 if (extended) {
281 buf_get_str(bufp, &stat->extension);
282 stat->n_uid = buf_get_int32(bufp);
283 stat->n_gid = buf_get_int32(bufp);
284 stat->n_muid = buf_get_int32(bufp);
285 }
286}
287
288/**
289 * v9fs_deserialize_stat - decode a received metadata structure
290 * @buf: buffer to deserialize
291 * @buflen: length of received buffer
292 * @stat: metadata structure to decode into
293 * @extended: non-zero if 9P2000.u
294 *
295 * Note: stat will point to the buf region.
296 */
297
298int
299v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
300 int extended)
301{
302 struct cbuf buffer;
303 struct cbuf *bufp = &buffer;
304 unsigned char *p;
305
306 buf_init(bufp, buf, buflen);
307 p = bufp->p;
308 buf_get_stat(bufp, stat, extended);
309
310 if (buf_check_overflow(bufp))
311 return 0;
312 else
313 return bufp->p - p;
314}
315
316/**
317 * deserialize_fcall - unmarshal a response
318 * @buf: recieved buffer
319 * @buflen: length of received buffer
320 * @rcall: fcall structure to populate
321 * @rcalllen: length of fcall structure to populate
322 * @extended: non-zero if 9P2000.u
323 *
324 */
325
326int
327v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
328 int extended)
329{
330
331 struct cbuf buffer;
332 struct cbuf *bufp = &buffer;
333 int i = 0;
334
335 buf_init(bufp, buf, buflen);
336
337 rcall->size = buf_get_int32(bufp);
338 rcall->id = buf_get_int8(bufp);
339 rcall->tag = buf_get_int16(bufp);
340
341 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
342 rcall->tag);
343
344 switch (rcall->id) {
345 default:
346 eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
347 return -EPROTO;
348 case RVERSION:
349 rcall->params.rversion.msize = buf_get_int32(bufp);
350 buf_get_str(bufp, &rcall->params.rversion.version);
351 break;
352 case RFLUSH:
353 break;
354 case RATTACH:
355 rcall->params.rattach.qid.type = buf_get_int8(bufp);
356 rcall->params.rattach.qid.version = buf_get_int32(bufp);
357 rcall->params.rattach.qid.path = buf_get_int64(bufp);
358 break;
359 case RWALK:
360 rcall->params.rwalk.nwqid = buf_get_int16(bufp);
361 if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) {
362 eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n",
363 V9FS_MAXWELEM, rcall->params.rwalk.nwqid);
364 return -EPROTO;
365 }
366
367 for (i = 0; i < rcall->params.rwalk.nwqid; i++)
368 buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
369 break;
370 case ROPEN:
371 buf_get_qid(bufp, &rcall->params.ropen.qid);
372 rcall->params.ropen.iounit = buf_get_int32(bufp);
373 break;
374 case RCREATE:
375 buf_get_qid(bufp, &rcall->params.rcreate.qid);
376 rcall->params.rcreate.iounit = buf_get_int32(bufp);
377 break;
378 case RREAD:
379 rcall->params.rread.count = buf_get_int32(bufp);
380 rcall->params.rread.data = bufp->p;
381 buf_check_size(bufp, rcall->params.rread.count);
382 break;
383 case RWRITE:
384 rcall->params.rwrite.count = buf_get_int32(bufp);
385 break;
386 case RCLUNK:
387 break;
388 case RREMOVE:
389 break;
390 case RSTAT:
391 buf_get_int16(bufp);
392 buf_get_stat(bufp, &rcall->params.rstat.stat, extended);
393 break;
394 case RWSTAT:
395 break;
396 case RERROR:
397 buf_get_str(bufp, &rcall->params.rerror.error);
398 if (extended)
399 rcall->params.rerror.errno = buf_get_int16(bufp);
400 break;
401 }
402
403 if (buf_check_overflow(bufp)) {
404 dprintk(DEBUG_ERROR, "buffer overflow\n");
405 return -EIO;
406 }
407
408 return bufp->p - bufp->sp;
409}
410
411static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p)
412{
413 *p = val;
414 buf_put_int8(bufp, val);
415}
416
417static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p)
418{
419 *p = val;
420 buf_put_int16(bufp, val);
421}
422
423static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p)
424{
425 *p = val;
426 buf_put_int32(bufp, val);
427}
428
429static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p)
430{
431 *p = val;
432 buf_put_int64(bufp, val);
433}
434
435static void
436v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str)
437{
438 int len;
439 char *s;
440
441 if (data)
442 len = strlen(data);
443 else
444 len = 0;
445
446 s = buf_put_stringn(bufp, data, len);
447 if (str) {
448 str->len = len;
449 str->str = s;
450 }
451}
452
453static int
454v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count,
455 unsigned char **pdata)
456{
457 *pdata = buf_alloc(bufp, count);
458 return copy_from_user(*pdata, data, count);
459}
460
461static void
462v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat,
463 struct v9fs_stat *stat, int statsz, int extended)
464{
465 v9fs_put_int16(bufp, statsz, &stat->size);
466 v9fs_put_int16(bufp, wstat->type, &stat->type);
467 v9fs_put_int32(bufp, wstat->dev, &stat->dev);
468 v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type);
469 v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version);
470 v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path);
471 v9fs_put_int32(bufp, wstat->mode, &stat->mode);
472 v9fs_put_int32(bufp, wstat->atime, &stat->atime);
473 v9fs_put_int32(bufp, wstat->mtime, &stat->mtime);
474 v9fs_put_int64(bufp, wstat->length, &stat->length);
475
476 v9fs_put_str(bufp, wstat->name, &stat->name);
477 v9fs_put_str(bufp, wstat->uid, &stat->uid);
478 v9fs_put_str(bufp, wstat->gid, &stat->gid);
479 v9fs_put_str(bufp, wstat->muid, &stat->muid);
480
481 if (extended) {
482 v9fs_put_str(bufp, wstat->extension, &stat->extension);
483 v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid);
484 v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid);
485 v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid);
486 }
487}
488
489static struct v9fs_fcall *
490v9fs_create_common(struct cbuf *bufp, u32 size, u8 id)
491{
492 struct v9fs_fcall *fc;
493
494 size += 4 + 1 + 2; /* size[4] id[1] tag[2] */
495 fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL);
496 if (!fc)
497 return ERR_PTR(-ENOMEM);
498
499 fc->sdata = (char *)fc + sizeof(*fc);
500
501 buf_init(bufp, (char *)fc->sdata, size);
502 v9fs_put_int32(bufp, size, &fc->size);
503 v9fs_put_int8(bufp, id, &fc->id);
504 v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag);
505
506 return fc;
507}
508
509void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag)
510{
511 fc->tag = tag;
512 *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
513}
514
515struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
516{
517 int size;
518 struct v9fs_fcall *fc;
519 struct cbuf buffer;
520 struct cbuf *bufp = &buffer;
521
522 size = 4 + 2 + strlen(version); /* msize[4] version[s] */
523 fc = v9fs_create_common(bufp, size, TVERSION);
524 if (IS_ERR(fc))
525 goto error;
526
527 v9fs_put_int32(bufp, msize, &fc->params.tversion.msize);
528 v9fs_put_str(bufp, version, &fc->params.tversion.version);
529
530 if (buf_check_overflow(bufp)) {
531 kfree(fc);
532 fc = ERR_PTR(-ENOMEM);
533 }
534 error:
535 return fc;
536}
537
538#if 0
539struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
540{
541 int size;
542 struct v9fs_fcall *fc;
543 struct cbuf buffer;
544 struct cbuf *bufp = &buffer;
545
546 size = 4 + 2 + strlen(uname) + 2 + strlen(aname); /* afid[4] uname[s] aname[s] */
547 fc = v9fs_create_common(bufp, size, TAUTH);
548 if (IS_ERR(fc))
549 goto error;
550
551 v9fs_put_int32(bufp, afid, &fc->params.tauth.afid);
552 v9fs_put_str(bufp, uname, &fc->params.tauth.uname);
553 v9fs_put_str(bufp, aname, &fc->params.tauth.aname);
554
555 if (buf_check_overflow(bufp)) {
556 kfree(fc);
557 fc = ERR_PTR(-ENOMEM);
558 }
559 error:
560 return fc;
561}
562#endif /* 0 */
563
564struct v9fs_fcall *
565v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
566{
567 int size;
568 struct v9fs_fcall *fc;
569 struct cbuf buffer;
570 struct cbuf *bufp = &buffer;
571
572 size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); /* fid[4] afid[4] uname[s] aname[s] */
573 fc = v9fs_create_common(bufp, size, TATTACH);
574 if (IS_ERR(fc))
575 goto error;
576
577 v9fs_put_int32(bufp, fid, &fc->params.tattach.fid);
578 v9fs_put_int32(bufp, afid, &fc->params.tattach.afid);
579 v9fs_put_str(bufp, uname, &fc->params.tattach.uname);
580 v9fs_put_str(bufp, aname, &fc->params.tattach.aname);
581
582 error:
583 return fc;
584}
585
586struct v9fs_fcall *v9fs_create_tflush(u16 oldtag)
587{
588 int size;
589 struct v9fs_fcall *fc;
590 struct cbuf buffer;
591 struct cbuf *bufp = &buffer;
592
593 size = 2; /* oldtag[2] */
594 fc = v9fs_create_common(bufp, size, TFLUSH);
595 if (IS_ERR(fc))
596 goto error;
597
598 v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
599
600 if (buf_check_overflow(bufp)) {
601 kfree(fc);
602 fc = ERR_PTR(-ENOMEM);
603 }
604 error:
605 return fc;
606}
607
608struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
609 char **wnames)
610{
611 int i, size;
612 struct v9fs_fcall *fc;
613 struct cbuf buffer;
614 struct cbuf *bufp = &buffer;
615
616 if (nwname > V9FS_MAXWELEM) {
617 dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM);
618 return NULL;
619 }
620
621 size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */
622 for (i = 0; i < nwname; i++) {
623 size += 2 + strlen(wnames[i]); /* wname[s] */
624 }
625
626 fc = v9fs_create_common(bufp, size, TWALK);
627 if (IS_ERR(fc))
628 goto error;
629
630 v9fs_put_int32(bufp, fid, &fc->params.twalk.fid);
631 v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid);
632 v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname);
633 for (i = 0; i < nwname; i++) {
634 v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
635 }
636
637 if (buf_check_overflow(bufp)) {
638 kfree(fc);
639 fc = ERR_PTR(-ENOMEM);
640 }
641 error:
642 return fc;
643}
644
645struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
646{
647 int size;
648 struct v9fs_fcall *fc;
649 struct cbuf buffer;
650 struct cbuf *bufp = &buffer;
651
652 size = 4 + 1; /* fid[4] mode[1] */
653 fc = v9fs_create_common(bufp, size, TOPEN);
654 if (IS_ERR(fc))
655 goto error;
656
657 v9fs_put_int32(bufp, fid, &fc->params.topen.fid);
658 v9fs_put_int8(bufp, mode, &fc->params.topen.mode);
659
660 if (buf_check_overflow(bufp)) {
661 kfree(fc);
662 fc = ERR_PTR(-ENOMEM);
663 }
664 error:
665 return fc;
666}
667
668struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
669 char *extension, int extended)
670{
671 int size;
672 struct v9fs_fcall *fc;
673 struct cbuf buffer;
674 struct cbuf *bufp = &buffer;
675
676 size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */
677 if (extended) {
678 size += 2 + /* extension[s] */
679 (extension == NULL ? 0 : strlen(extension));
680 }
681
682 fc = v9fs_create_common(bufp, size, TCREATE);
683 if (IS_ERR(fc))
684 goto error;
685
686 v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid);
687 v9fs_put_str(bufp, name, &fc->params.tcreate.name);
688 v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
689 v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
690 if (extended)
691 v9fs_put_str(bufp, extension, &fc->params.tcreate.extension);
692
693 if (buf_check_overflow(bufp)) {
694 kfree(fc);
695 fc = ERR_PTR(-ENOMEM);
696 }
697 error:
698 return fc;
699}
700
701struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count)
702{
703 int size;
704 struct v9fs_fcall *fc;
705 struct cbuf buffer;
706 struct cbuf *bufp = &buffer;
707
708 size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */
709 fc = v9fs_create_common(bufp, size, TREAD);
710 if (IS_ERR(fc))
711 goto error;
712
713 v9fs_put_int32(bufp, fid, &fc->params.tread.fid);
714 v9fs_put_int64(bufp, offset, &fc->params.tread.offset);
715 v9fs_put_int32(bufp, count, &fc->params.tread.count);
716
717 if (buf_check_overflow(bufp)) {
718 kfree(fc);
719 fc = ERR_PTR(-ENOMEM);
720 }
721 error:
722 return fc;
723}
724
725struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
726 const char __user * data)
727{
728 int size, err;
729 struct v9fs_fcall *fc;
730 struct cbuf buffer;
731 struct cbuf *bufp = &buffer;
732
733 size = 4 + 8 + 4 + count; /* fid[4] offset[8] count[4] data[count] */
734 fc = v9fs_create_common(bufp, size, TWRITE);
735 if (IS_ERR(fc))
736 goto error;
737
738 v9fs_put_int32(bufp, fid, &fc->params.twrite.fid);
739 v9fs_put_int64(bufp, offset, &fc->params.twrite.offset);
740 v9fs_put_int32(bufp, count, &fc->params.twrite.count);
741 err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data);
742 if (err) {
743 kfree(fc);
744 fc = ERR_PTR(err);
745 }
746
747 if (buf_check_overflow(bufp)) {
748 kfree(fc);
749 fc = ERR_PTR(-ENOMEM);
750 }
751 error:
752 return fc;
753}
754
755struct v9fs_fcall *v9fs_create_tclunk(u32 fid)
756{
757 int size;
758 struct v9fs_fcall *fc;
759 struct cbuf buffer;
760 struct cbuf *bufp = &buffer;
761
762 size = 4; /* fid[4] */
763 fc = v9fs_create_common(bufp, size, TCLUNK);
764 if (IS_ERR(fc))
765 goto error;
766
767 v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid);
768
769 if (buf_check_overflow(bufp)) {
770 kfree(fc);
771 fc = ERR_PTR(-ENOMEM);
772 }
773 error:
774 return fc;
775}
776
777struct v9fs_fcall *v9fs_create_tremove(u32 fid)
778{
779 int size;
780 struct v9fs_fcall *fc;
781 struct cbuf buffer;
782 struct cbuf *bufp = &buffer;
783
784 size = 4; /* fid[4] */
785 fc = v9fs_create_common(bufp, size, TREMOVE);
786 if (IS_ERR(fc))
787 goto error;
788
789 v9fs_put_int32(bufp, fid, &fc->params.tremove.fid);
790
791 if (buf_check_overflow(bufp)) {
792 kfree(fc);
793 fc = ERR_PTR(-ENOMEM);
794 }
795 error:
796 return fc;
797}
798
799struct v9fs_fcall *v9fs_create_tstat(u32 fid)
800{
801 int size;
802 struct v9fs_fcall *fc;
803 struct cbuf buffer;
804 struct cbuf *bufp = &buffer;
805
806 size = 4; /* fid[4] */
807 fc = v9fs_create_common(bufp, size, TSTAT);
808 if (IS_ERR(fc))
809 goto error;
810
811 v9fs_put_int32(bufp, fid, &fc->params.tstat.fid);
812
813 if (buf_check_overflow(bufp)) {
814 kfree(fc);
815 fc = ERR_PTR(-ENOMEM);
816 }
817 error:
818 return fc;
819}
820
821struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
822 int extended)
823{
824 int size, statsz;
825 struct v9fs_fcall *fc;
826 struct cbuf buffer;
827 struct cbuf *bufp = &buffer;
828
829 statsz = v9fs_size_wstat(wstat, extended);
830 size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */
831 fc = v9fs_create_common(bufp, size, TWSTAT);
832 if (IS_ERR(fc))
833 goto error;
834
835 v9fs_put_int32(bufp, fid, &fc->params.twstat.fid);
836 buf_put_int16(bufp, statsz + 2);
837 v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended);
838
839 if (buf_check_overflow(bufp)) {
840 kfree(fc);
841 fc = ERR_PTR(-ENOMEM);
842 }
843 error:
844 return fc;
845}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
deleted file mode 100644
index dd5b6b1b610f..000000000000
--- a/fs/9p/conv.h
+++ /dev/null
@@ -1,50 +0,0 @@
1/*
2 * linux/fs/9p/conv.h
3 *
4 * 9P protocol conversion definitions.
5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
28 int extended);
29int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
30 int extended);
31
32void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
33
34struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
35struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
36 char *aname);
37struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
38struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
39 char **wnames);
40struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
41struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
42 char *extension, int extended);
43struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
44struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
45 const char __user *data);
46struct v9fs_fcall *v9fs_create_tclunk(u32 fid);
47struct v9fs_fcall *v9fs_create_tremove(u32 fid);
48struct v9fs_fcall *v9fs_create_tstat(u32 fid);
49struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
50 int extended);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
deleted file mode 100644
index 4228c0bb3c32..000000000000
--- a/fs/9p/debug.h
+++ /dev/null
@@ -1,77 +0,0 @@
1/*
2 * linux/fs/9p/debug.h - V9FS Debug Definitions
3 *
4 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
5 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2
9 * as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to:
18 * Free Software Foundation
19 * 51 Franklin Street, Fifth Floor
20 * Boston, MA 02111-1301 USA
21 *
22 */
23
24#define DEBUG_ERROR (1<<0)
25#define DEBUG_CURRENT (1<<1)
26#define DEBUG_9P (1<<2)
27#define DEBUG_VFS (1<<3)
28#define DEBUG_CONV (1<<4)
29#define DEBUG_MUX (1<<5)
30#define DEBUG_TRANS (1<<6)
31#define DEBUG_SLABS (1<<7)
32#define DEBUG_FCALL (1<<8)
33
34#define DEBUG_DUMP_PKT 0
35
36extern int v9fs_debug_level;
37
38#define dprintk(level, format, arg...) \
39do { \
40 if((v9fs_debug_level & level)==level) \
41 printk(KERN_NOTICE "-- %s (%d): " \
42 format , __FUNCTION__, current->pid , ## arg); \
43} while(0)
44
45#define eprintk(level, format, arg...) \
46do { \
47 printk(level "v9fs: %s (%d): " \
48 format , __FUNCTION__, current->pid , ## arg); \
49} while(0)
50
51#if DEBUG_DUMP_PKT
52static inline void dump_data(const unsigned char *data, unsigned int datalen)
53{
54 int i, n;
55 char buf[5*8];
56
57 n = 0;
58 i = 0;
59 while (i < datalen) {
60 n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]);
61 if (i%4 == 0)
62 n += snprintf(buf+n, sizeof(buf)-n, " ");
63
64 if (i%16 == 0) {
65 dprintk(DEBUG_ERROR, "%s\n", buf);
66 n = 0;
67 }
68 }
69
70 dprintk(DEBUG_ERROR, "%s\n", buf);
71}
72#else /* DEBUG_DUMP_PKT */
73static inline void dump_data(const unsigned char *data, unsigned int datalen)
74{
75
76}
77#endif /* DEBUG_DUMP_PKT */
diff --git a/fs/9p/error.c b/fs/9p/error.c
deleted file mode 100644
index 0d7fa4e08812..000000000000
--- a/fs/9p/error.c
+++ /dev/null
@@ -1,93 +0,0 @@
1/*
2 * linux/fs/9p/error.c
3 *
4 * Error string handling
5 *
6 * Plan 9 uses error strings, Unix uses error numbers. These functions
7 * try to help manage that and provide for dynamically adding error
8 * mappings.
9 *
10 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
11 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to:
24 * Free Software Foundation
25 * 51 Franklin Street, Fifth Floor
26 * Boston, MA 02111-1301 USA
27 *
28 */
29
30#include <linux/module.h>
31
32#include <linux/list.h>
33#include <linux/jhash.h>
34
35#include "debug.h"
36#include "error.h"
37
38/**
39 * v9fs_error_init - preload
40 * @errstr: error string
41 *
42 */
43
44int v9fs_error_init(void)
45{
46 struct errormap *c;
47 int bucket;
48
49 /* initialize hash table */
50 for (bucket = 0; bucket < ERRHASHSZ; bucket++)
51 INIT_HLIST_HEAD(&hash_errmap[bucket]);
52
53 /* load initial error map into hash table */
54 for (c = errmap; c->name != NULL; c++) {
55 c->namelen = strlen(c->name);
56 bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
57 INIT_HLIST_NODE(&c->list);
58 hlist_add_head(&c->list, &hash_errmap[bucket]);
59 }
60
61 return 1;
62}
63
64/**
65 * errstr2errno - convert error string to error number
66 * @errstr: error string
67 *
68 */
69
70int v9fs_errstr2errno(char *errstr, int len)
71{
72 int errno = 0;
73 struct hlist_node *p = NULL;
74 struct errormap *c = NULL;
75 int bucket = jhash(errstr, len, 0) % ERRHASHSZ;
76
77 hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
78 if (c->namelen==len && !memcmp(c->name, errstr, len)) {
79 errno = c->val;
80 break;
81 }
82 }
83
84 if (errno == 0) {
85 /* TODO: if error isn't found, add it dynamically */
86 errstr[len] = 0;
87 printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
88 errstr);
89 errno = 1;
90 }
91
92 return -errno;
93}
diff --git a/fs/9p/error.h b/fs/9p/error.h
deleted file mode 100644
index 5f3ca522b316..000000000000
--- a/fs/9p/error.h
+++ /dev/null
@@ -1,177 +0,0 @@
1/*
2 * linux/fs/9p/error.h
3 *
4 * Huge Nasty Error Table
5 *
6 * Plan 9 uses error strings, Unix uses error numbers. This table tries to
7 * match UNIX strings and Plan 9 strings to unix error numbers. It is used
8 * to preload the dynamic error table which can also track user-specific error
9 * strings.
10 *
11 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
12 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License version 2
16 * as published by the Free Software Foundation.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to:
25 * Free Software Foundation
26 * 51 Franklin Street, Fifth Floor
27 * Boston, MA 02111-1301 USA
28 *
29 */
30
31#include <linux/errno.h>
32#include <asm/errno.h>
33
34struct errormap {
35 char *name;
36 int val;
37
38 int namelen;
39 struct hlist_node list;
40};
41
42#define ERRHASHSZ 32
43static struct hlist_head hash_errmap[ERRHASHSZ];
44
45/* FixMe - reduce to a reasonable size */
46static struct errormap errmap[] = {
47 {"Operation not permitted", EPERM},
48 {"wstat prohibited", EPERM},
49 {"No such file or directory", ENOENT},
50 {"directory entry not found", ENOENT},
51 {"file not found", ENOENT},
52 {"Interrupted system call", EINTR},
53 {"Input/output error", EIO},
54 {"No such device or address", ENXIO},
55 {"Argument list too long", E2BIG},
56 {"Bad file descriptor", EBADF},
57 {"Resource temporarily unavailable", EAGAIN},
58 {"Cannot allocate memory", ENOMEM},
59 {"Permission denied", EACCES},
60 {"Bad address", EFAULT},
61 {"Block device required", ENOTBLK},
62 {"Device or resource busy", EBUSY},
63 {"File exists", EEXIST},
64 {"Invalid cross-device link", EXDEV},
65 {"No such device", ENODEV},
66 {"Not a directory", ENOTDIR},
67 {"Is a directory", EISDIR},
68 {"Invalid argument", EINVAL},
69 {"Too many open files in system", ENFILE},
70 {"Too many open files", EMFILE},
71 {"Text file busy", ETXTBSY},
72 {"File too large", EFBIG},
73 {"No space left on device", ENOSPC},
74 {"Illegal seek", ESPIPE},
75 {"Read-only file system", EROFS},
76 {"Too many links", EMLINK},
77 {"Broken pipe", EPIPE},
78 {"Numerical argument out of domain", EDOM},
79 {"Numerical result out of range", ERANGE},
80 {"Resource deadlock avoided", EDEADLK},
81 {"File name too long", ENAMETOOLONG},
82 {"No locks available", ENOLCK},
83 {"Function not implemented", ENOSYS},
84 {"Directory not empty", ENOTEMPTY},
85 {"Too many levels of symbolic links", ELOOP},
86 {"No message of desired type", ENOMSG},
87 {"Identifier removed", EIDRM},
88 {"No data available", ENODATA},
89 {"Machine is not on the network", ENONET},
90 {"Package not installed", ENOPKG},
91 {"Object is remote", EREMOTE},
92 {"Link has been severed", ENOLINK},
93 {"Communication error on send", ECOMM},
94 {"Protocol error", EPROTO},
95 {"Bad message", EBADMSG},
96 {"File descriptor in bad state", EBADFD},
97 {"Streams pipe error", ESTRPIPE},
98 {"Too many users", EUSERS},
99 {"Socket operation on non-socket", ENOTSOCK},
100 {"Message too long", EMSGSIZE},
101 {"Protocol not available", ENOPROTOOPT},
102 {"Protocol not supported", EPROTONOSUPPORT},
103 {"Socket type not supported", ESOCKTNOSUPPORT},
104 {"Operation not supported", EOPNOTSUPP},
105 {"Protocol family not supported", EPFNOSUPPORT},
106 {"Network is down", ENETDOWN},
107 {"Network is unreachable", ENETUNREACH},
108 {"Network dropped connection on reset", ENETRESET},
109 {"Software caused connection abort", ECONNABORTED},
110 {"Connection reset by peer", ECONNRESET},
111 {"No buffer space available", ENOBUFS},
112 {"Transport endpoint is already connected", EISCONN},
113 {"Transport endpoint is not connected", ENOTCONN},
114 {"Cannot send after transport endpoint shutdown", ESHUTDOWN},
115 {"Connection timed out", ETIMEDOUT},
116 {"Connection refused", ECONNREFUSED},
117 {"Host is down", EHOSTDOWN},
118 {"No route to host", EHOSTUNREACH},
119 {"Operation already in progress", EALREADY},
120 {"Operation now in progress", EINPROGRESS},
121 {"Is a named type file", EISNAM},
122 {"Remote I/O error", EREMOTEIO},
123 {"Disk quota exceeded", EDQUOT},
124/* errors from fossil, vacfs, and u9fs */
125 {"fid unknown or out of range", EBADF},
126 {"permission denied", EACCES},
127 {"file does not exist", ENOENT},
128 {"authentication failed", ECONNREFUSED},
129 {"bad offset in directory read", ESPIPE},
130 {"bad use of fid", EBADF},
131 {"wstat can't convert between files and directories", EPERM},
132 {"directory is not empty", ENOTEMPTY},
133 {"file exists", EEXIST},
134 {"file already exists", EEXIST},
135 {"file or directory already exists", EEXIST},
136 {"fid already in use", EBADF},
137 {"file in use", ETXTBSY},
138 {"i/o error", EIO},
139 {"file already open for I/O", ETXTBSY},
140 {"illegal mode", EINVAL},
141 {"illegal name", ENAMETOOLONG},
142 {"not a directory", ENOTDIR},
143 {"not a member of proposed group", EPERM},
144 {"not owner", EACCES},
145 {"only owner can change group in wstat", EACCES},
146 {"read only file system", EROFS},
147 {"no access to special file", EPERM},
148 {"i/o count too large", EIO},
149 {"unknown group", EINVAL},
150 {"unknown user", EINVAL},
151 {"bogus wstat buffer", EPROTO},
152 {"exclusive use file already open", EAGAIN},
153 {"corrupted directory entry", EIO},
154 {"corrupted file entry", EIO},
155 {"corrupted block label", EIO},
156 {"corrupted meta data", EIO},
157 {"illegal offset", EINVAL},
158 {"illegal path element", ENOENT},
159 {"root of file system is corrupted", EIO},
160 {"corrupted super block", EIO},
161 {"protocol botch", EPROTO},
162 {"file system is full", ENOSPC},
163 {"file is in use", EAGAIN},
164 {"directory entry is not allocated", ENOENT},
165 {"file is read only", EROFS},
166 {"file has been removed", EIDRM},
167 {"only support truncation to zero length", EPERM},
168 {"cannot remove root", EPERM},
169 {"file too big", EFBIG},
170 {"venti i/o error", EIO},
171 /* these are not errors */
172 {"u9fs rhostsauth: no authentication required", 0},
173 {"u9fs authnone: no authentication required", 0},
174 {NULL, -1}
175};
176
177extern int v9fs_error_init(void);
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c
deleted file mode 100644
index dc336a67592f..000000000000
--- a/fs/9p/fcall.c
+++ /dev/null
@@ -1,427 +0,0 @@
1/*
2 * linux/fs/9p/fcall.c
3 *
4 * This file contains functions to perform synchronous 9P calls
5 *
6 * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/sched.h>
31#include <linux/idr.h>
32
33#include "debug.h"
34#include "v9fs.h"
35#include "9p.h"
36#include "conv.h"
37#include "mux.h"
38
39/**
40 * v9fs_t_version - negotiate protocol parameters with sever
41 * @v9ses: 9P2000 session information
42 * @msize: requested max size packet
43 * @version: requested version.extension string
44 * @fcall: pointer to response fcall pointer
45 *
46 */
47
48int
49v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
50 char *version, struct v9fs_fcall **rcp)
51{
52 int ret;
53 struct v9fs_fcall *tc;
54
55 dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
56 tc = v9fs_create_tversion(msize, version);
57
58 if (!IS_ERR(tc)) {
59 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
60 kfree(tc);
61 } else
62 ret = PTR_ERR(tc);
63
64 return ret;
65}
66
67/**
68 * v9fs_t_attach - mount the server
69 * @v9ses: 9P2000 session information
70 * @uname: user name doing the attach
71 * @aname: remote name being attached to
72 * @fid: mount fid to attatch to root node
73 * @afid: authentication fid (in this case result key)
74 * @fcall: pointer to response fcall pointer
75 *
76 */
77
78int
79v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
80 u32 fid, u32 afid, struct v9fs_fcall **rcp)
81{
82 int ret;
83 struct v9fs_fcall* tc;
84
85 dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
86 aname, fid, afid);
87
88 tc = v9fs_create_tattach(fid, afid, uname, aname);
89 if (!IS_ERR(tc)) {
90 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
91 kfree(tc);
92 } else
93 ret = PTR_ERR(tc);
94
95 return ret;
96}
97
98static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
99 struct v9fs_fcall *rc, int err)
100{
101 int fid, id;
102 struct v9fs_session_info *v9ses;
103
104 id = 0;
105 fid = tc->params.tclunk.fid;
106 if (rc)
107 id = rc->id;
108
109 kfree(tc);
110 kfree(rc);
111 if (id == RCLUNK) {
112 v9ses = a;
113 v9fs_put_idpool(fid, &v9ses->fidpool);
114 }
115}
116
117/**
118 * v9fs_t_clunk - release a fid (finish a transaction)
119 * @v9ses: 9P2000 session information
120 * @fid: fid to release
121 * @fcall: pointer to response fcall pointer
122 *
123 */
124
125int
126v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
127{
128 int ret;
129 struct v9fs_fcall *tc, *rc;
130
131 dprintk(DEBUG_9P, "fid %d\n", fid);
132
133 rc = NULL;
134 tc = v9fs_create_tclunk(fid);
135 if (!IS_ERR(tc))
136 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
137 else
138 ret = PTR_ERR(tc);
139
140 if (ret)
141 dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
142
143 v9fs_t_clunk_cb(v9ses, tc, rc, ret);
144 return ret;
145}
146
147#if 0
148/**
149 * v9fs_v9fs_t_flush - flush a pending transaction
150 * @v9ses: 9P2000 session information
151 * @tag: tag to release
152 *
153 */
154int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
155{
156 int ret;
157 struct v9fs_fcall *tc;
158
159 dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
160
161 tc = v9fs_create_tflush(oldtag);
162 if (!IS_ERR(tc)) {
163 ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
164 kfree(tc);
165 } else
166 ret = PTR_ERR(tc);
167
168 return ret;
169}
170#endif
171
172/**
173 * v9fs_t_stat - read a file's meta-data
174 * @v9ses: 9P2000 session information
175 * @fid: fid pointing to file or directory to get info about
176 * @fcall: pointer to response fcall
177 *
178 */
179
180int
181v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
182{
183 int ret;
184 struct v9fs_fcall *tc;
185
186 dprintk(DEBUG_9P, "fid %d\n", fid);
187
188 ret = -ENOMEM;
189 tc = v9fs_create_tstat(fid);
190 if (!IS_ERR(tc)) {
191 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
192 kfree(tc);
193 } else
194 ret = PTR_ERR(tc);
195
196 return ret;
197}
198
199/**
200 * v9fs_t_wstat - write a file's meta-data
201 * @v9ses: 9P2000 session information
202 * @fid: fid pointing to file or directory to write info about
203 * @stat: metadata
204 * @fcall: pointer to response fcall
205 *
206 */
207
208int
209v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
210 struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
211{
212 int ret;
213 struct v9fs_fcall *tc;
214
215 dprintk(DEBUG_9P, "fid %d\n", fid);
216
217 tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
218 if (!IS_ERR(tc)) {
219 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
220 kfree(tc);
221 } else
222 ret = PTR_ERR(tc);
223
224 return ret;
225}
226
227/**
228 * v9fs_t_walk - walk a fid to a new file or directory
229 * @v9ses: 9P2000 session information
230 * @fid: fid to walk
231 * @newfid: new fid (for clone operations)
232 * @name: path to walk fid to
233 * @fcall: pointer to response fcall
234 *
235 */
236
237/* TODO: support multiple walk */
238
239int
240v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
241 char *name, struct v9fs_fcall **rcp)
242{
243 int ret;
244 struct v9fs_fcall *tc;
245 int nwname;
246
247 dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
248
249 if (name)
250 nwname = 1;
251 else
252 nwname = 0;
253
254 tc = v9fs_create_twalk(fid, newfid, nwname, &name);
255 if (!IS_ERR(tc)) {
256 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
257 kfree(tc);
258 } else
259 ret = PTR_ERR(tc);
260
261 return ret;
262}
263
264/**
265 * v9fs_t_open - open a file
266 *
267 * @v9ses - 9P2000 session information
268 * @fid - fid to open
269 * @mode - mode to open file (R, RW, etc)
270 * @fcall - pointer to response fcall
271 *
272 */
273
274int
275v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
276 struct v9fs_fcall **rcp)
277{
278 int ret;
279 struct v9fs_fcall *tc;
280
281 dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
282
283 tc = v9fs_create_topen(fid, mode);
284 if (!IS_ERR(tc)) {
285 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
286 kfree(tc);
287 } else
288 ret = PTR_ERR(tc);
289
290 return ret;
291}
292
293/**
294 * v9fs_t_remove - remove a file or directory
295 * @v9ses: 9P2000 session information
296 * @fid: fid to remove
297 * @fcall: pointer to response fcall
298 *
299 */
300
301int
302v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
303 struct v9fs_fcall **rcp)
304{
305 int ret;
306 struct v9fs_fcall *tc;
307
308 dprintk(DEBUG_9P, "fid %d\n", fid);
309
310 tc = v9fs_create_tremove(fid);
311 if (!IS_ERR(tc)) {
312 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
313 kfree(tc);
314 } else
315 ret = PTR_ERR(tc);
316
317 return ret;
318}
319
320/**
321 * v9fs_t_create - create a file or directory
322 * @v9ses: 9P2000 session information
323 * @fid: fid to create
324 * @name: name of the file or directory to create
325 * @perm: permissions to create with
326 * @mode: mode to open file (R, RW, etc)
327 * @fcall: pointer to response fcall
328 *
329 */
330
331int
332v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
333 u8 mode, char *extension, struct v9fs_fcall **rcp)
334{
335 int ret;
336 struct v9fs_fcall *tc;
337
338 dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
339 fid, name, perm, mode);
340
341 tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
342 v9ses->extended);
343
344 if (!IS_ERR(tc)) {
345 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
346 kfree(tc);
347 } else
348 ret = PTR_ERR(tc);
349
350 return ret;
351}
352
353/**
354 * v9fs_t_read - read data
355 * @v9ses: 9P2000 session information
356 * @fid: fid to read from
357 * @offset: offset to start read at
358 * @count: how many bytes to read
359 * @fcall: pointer to response fcall (with data)
360 *
361 */
362
363int
364v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
365 u32 count, struct v9fs_fcall **rcp)
366{
367 int ret;
368 struct v9fs_fcall *tc, *rc;
369
370 dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
371 (long long unsigned) offset, count);
372
373 tc = v9fs_create_tread(fid, offset, count);
374 if (!IS_ERR(tc)) {
375 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
376 if (!ret)
377 ret = rc->params.rread.count;
378 if (rcp)
379 *rcp = rc;
380 else
381 kfree(rc);
382
383 kfree(tc);
384 } else
385 ret = PTR_ERR(tc);
386
387 return ret;
388}
389
390/**
391 * v9fs_t_write - write data
392 * @v9ses: 9P2000 session information
393 * @fid: fid to write to
394 * @offset: offset to start write at
395 * @count: how many bytes to write
396 * @fcall: pointer to response fcall
397 *
398 */
399
400int
401v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
402 const char __user *data, struct v9fs_fcall **rcp)
403{
404 int ret;
405 struct v9fs_fcall *tc, *rc;
406
407 dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
408 (long long unsigned) offset, count);
409
410 tc = v9fs_create_twrite(fid, offset, count, data);
411 if (!IS_ERR(tc)) {
412 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
413
414 if (!ret)
415 ret = rc->params.rwrite.count;
416 if (rcp)
417 *rcp = rc;
418 else
419 kfree(rc);
420
421 kfree(tc);
422 } else
423 ret = PTR_ERR(tc);
424
425 return ret;
426}
427
diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c
deleted file mode 100644
index 34b96114a28d..000000000000
--- a/fs/9p/fcprint.c
+++ /dev/null
@@ -1,345 +0,0 @@
1/*
2 * linux/fs/9p/fcprint.c
3 *
4 * Print 9P call.
5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to:
19 * Free Software Foundation
20 * 51 Franklin Street, Fifth Floor
21 * Boston, MA 02111-1301 USA
22 *
23 */
24#include <linux/module.h>
25#include <linux/errno.h>
26#include <linux/fs.h>
27#include <linux/idr.h>
28
29#include "debug.h"
30#include "v9fs.h"
31#include "9p.h"
32#include "mux.h"
33
34static int
35v9fs_printqid(char *buf, int buflen, struct v9fs_qid *q)
36{
37 int n;
38 char b[10];
39
40 n = 0;
41 if (q->type & V9FS_QTDIR)
42 b[n++] = 'd';
43 if (q->type & V9FS_QTAPPEND)
44 b[n++] = 'a';
45 if (q->type & V9FS_QTAUTH)
46 b[n++] = 'A';
47 if (q->type & V9FS_QTEXCL)
48 b[n++] = 'l';
49 if (q->type & V9FS_QTTMP)
50 b[n++] = 't';
51 if (q->type & V9FS_QTSYMLINK)
52 b[n++] = 'L';
53 b[n] = '\0';
54
55 return scnprintf(buf, buflen, "(%.16llx %x %s)", (long long int) q->path,
56 q->version, b);
57}
58
59static int
60v9fs_printperm(char *buf, int buflen, int perm)
61{
62 int n;
63 char b[15];
64
65 n = 0;
66 if (perm & V9FS_DMDIR)
67 b[n++] = 'd';
68 if (perm & V9FS_DMAPPEND)
69 b[n++] = 'a';
70 if (perm & V9FS_DMAUTH)
71 b[n++] = 'A';
72 if (perm & V9FS_DMEXCL)
73 b[n++] = 'l';
74 if (perm & V9FS_DMTMP)
75 b[n++] = 't';
76 if (perm & V9FS_DMDEVICE)
77 b[n++] = 'D';
78 if (perm & V9FS_DMSOCKET)
79 b[n++] = 'S';
80 if (perm & V9FS_DMNAMEDPIPE)
81 b[n++] = 'P';
82 if (perm & V9FS_DMSYMLINK)
83 b[n++] = 'L';
84 b[n] = '\0';
85
86 return scnprintf(buf, buflen, "%s%03o", b, perm&077);
87}
88
89static int
90v9fs_printstat(char *buf, int buflen, struct v9fs_stat *st, int extended)
91{
92 int n;
93
94 n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len,
95 st->name.str, st->uid.len, st->uid.str);
96 if (extended)
97 n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid);
98
99 n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str);
100 if (extended)
101 n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid);
102
103 n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str);
104 if (extended)
105 n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid);
106
107 n += scnprintf(buf+n, buflen-n, " q ");
108 n += v9fs_printqid(buf+n, buflen-n, &st->qid);
109 n += scnprintf(buf+n, buflen-n, " m ");
110 n += v9fs_printperm(buf+n, buflen-n, st->mode);
111 n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld",
112 st->atime, st->mtime, (long long int) st->length);
113
114 if (extended)
115 n += scnprintf(buf+n, buflen-n, " ext '%.*s'",
116 st->extension.len, st->extension.str);
117
118 return n;
119}
120
121static int
122v9fs_dumpdata(char *buf, int buflen, u8 *data, int datalen)
123{
124 int i, n;
125
126 i = n = 0;
127 while (i < datalen) {
128 n += scnprintf(buf + n, buflen - n, "%02x", data[i]);
129 if (i%4 == 3)
130 n += scnprintf(buf + n, buflen - n, " ");
131 if (i%32 == 31)
132 n += scnprintf(buf + n, buflen - n, "\n");
133
134 i++;
135 }
136 n += scnprintf(buf + n, buflen - n, "\n");
137
138 return n;
139}
140
141static int
142v9fs_printdata(char *buf, int buflen, u8 *data, int datalen)
143{
144 return v9fs_dumpdata(buf, buflen, data, datalen<16?datalen:16);
145}
146
147int
148v9fs_printfcall(char *buf, int buflen, struct v9fs_fcall *fc, int extended)
149{
150 int i, ret, type, tag;
151
152 if (!fc)
153 return scnprintf(buf, buflen, "<NULL>");
154
155 type = fc->id;
156 tag = fc->tag;
157
158 ret = 0;
159 switch (type) {
160 case TVERSION:
161 ret += scnprintf(buf+ret, buflen-ret,
162 "Tversion tag %u msize %u version '%.*s'", tag,
163 fc->params.tversion.msize, fc->params.tversion.version.len,
164 fc->params.tversion.version.str);
165 break;
166
167 case RVERSION:
168 ret += scnprintf(buf+ret, buflen-ret,
169 "Rversion tag %u msize %u version '%.*s'", tag,
170 fc->params.rversion.msize, fc->params.rversion.version.len,
171 fc->params.rversion.version.str);
172 break;
173
174 case TAUTH:
175 ret += scnprintf(buf+ret, buflen-ret,
176 "Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag,
177 fc->params.tauth.afid, fc->params.tauth.uname.len,
178 fc->params.tauth.uname.str, fc->params.tauth.aname.len,
179 fc->params.tauth.aname.str);
180 break;
181
182 case RAUTH:
183 ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag);
184 v9fs_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid);
185 break;
186
187 case TATTACH:
188 ret += scnprintf(buf+ret, buflen-ret,
189 "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'",
190 tag, fc->params.tattach.fid, fc->params.tattach.afid,
191 fc->params.tattach.uname.len, fc->params.tattach.uname.str,
192 fc->params.tattach.aname.len, fc->params.tattach.aname.str);
193 break;
194
195 case RATTACH:
196 ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", tag);
197 v9fs_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid);
198 break;
199
200 case RERROR:
201 ret += scnprintf(buf+ret, buflen-ret, "Rerror tag %u ename '%.*s'",
202 tag, fc->params.rerror.error.len,
203 fc->params.rerror.error.str);
204 if (extended)
205 ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n",
206 fc->params.rerror.errno);
207 break;
208
209 case TFLUSH:
210 ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u",
211 tag, fc->params.tflush.oldtag);
212 break;
213
214 case RFLUSH:
215 ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag);
216 break;
217
218 case TWALK:
219 ret += scnprintf(buf+ret, buflen-ret,
220 "Twalk tag %u fid %d newfid %d nwname %d", tag,
221 fc->params.twalk.fid, fc->params.twalk.newfid,
222 fc->params.twalk.nwname);
223 for(i = 0; i < fc->params.twalk.nwname; i++)
224 ret += scnprintf(buf+ret, buflen-ret," '%.*s'",
225 fc->params.twalk.wnames[i].len,
226 fc->params.twalk.wnames[i].str);
227 break;
228
229 case RWALK:
230 ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d",
231 tag, fc->params.rwalk.nwqid);
232 for(i = 0; i < fc->params.rwalk.nwqid; i++)
233 ret += v9fs_printqid(buf+ret, buflen-ret,
234 &fc->params.rwalk.wqids[i]);
235 break;
236
237 case TOPEN:
238 ret += scnprintf(buf+ret, buflen-ret,
239 "Topen tag %u fid %d mode %d", tag,
240 fc->params.topen.fid, fc->params.topen.mode);
241 break;
242
243 case ROPEN:
244 ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag);
245 ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid);
246 ret += scnprintf(buf+ret, buflen-ret," iounit %d",
247 fc->params.ropen.iounit);
248 break;
249
250 case TCREATE:
251 ret += scnprintf(buf+ret, buflen-ret,
252 "Tcreate tag %u fid %d name '%.*s' perm ", tag,
253 fc->params.tcreate.fid, fc->params.tcreate.name.len,
254 fc->params.tcreate.name.str);
255
256 ret += v9fs_printperm(buf+ret, buflen-ret, fc->params.tcreate.perm);
257 ret += scnprintf(buf+ret, buflen-ret, " mode %d",
258 fc->params.tcreate.mode);
259 break;
260
261 case RCREATE:
262 ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag);
263 ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.rcreate.qid);
264 ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
265 fc->params.rcreate.iounit);
266 break;
267
268 case TREAD:
269 ret += scnprintf(buf+ret, buflen-ret,
270 "Tread tag %u fid %d offset %lld count %u", tag,
271 fc->params.tread.fid,
272 (long long int) fc->params.tread.offset,
273 fc->params.tread.count);
274 break;
275
276 case RREAD:
277 ret += scnprintf(buf+ret, buflen-ret,
278 "Rread tag %u count %u data ", tag,
279 fc->params.rread.count);
280 ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.rread.data,
281 fc->params.rread.count);
282 break;
283
284 case TWRITE:
285 ret += scnprintf(buf+ret, buflen-ret,
286 "Twrite tag %u fid %d offset %lld count %u data ",
287 tag, fc->params.twrite.fid,
288 (long long int) fc->params.twrite.offset,
289 fc->params.twrite.count);
290 ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.twrite.data,
291 fc->params.twrite.count);
292 break;
293
294 case RWRITE:
295 ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u",
296 tag, fc->params.rwrite.count);
297 break;
298
299 case TCLUNK:
300 ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d",
301 tag, fc->params.tclunk.fid);
302 break;
303
304 case RCLUNK:
305 ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag);
306 break;
307
308 case TREMOVE:
309 ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d",
310 tag, fc->params.tremove.fid);
311 break;
312
313 case RREMOVE:
314 ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag);
315 break;
316
317 case TSTAT:
318 ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d",
319 tag, fc->params.tstat.fid);
320 break;
321
322 case RSTAT:
323 ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag);
324 ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat,
325 extended);
326 break;
327
328 case TWSTAT:
329 ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ",
330 tag, fc->params.twstat.fid);
331 ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.twstat.stat,
332 extended);
333 break;
334
335 case RWSTAT:
336 ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag);
337 break;
338
339 default:
340 ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type);
341 break;
342 }
343
344 return ret;
345}
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 90419715c7e9..08fa320b7e6d 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -26,10 +26,10 @@
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/idr.h> 27#include <linux/idr.h>
28#include <asm/semaphore.h> 28#include <asm/semaphore.h>
29#include <net/9p/9p.h>
30#include <net/9p/client.h>
29 31
30#include "debug.h"
31#include "v9fs.h" 32#include "v9fs.h"
32#include "9p.h"
33#include "v9fs_vfs.h" 33#include "v9fs_vfs.h"
34#include "fid.h" 34#include "fid.h"
35 35
@@ -40,67 +40,29 @@
40 * 40 *
41 */ 41 */
42 42
43int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry) 43int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
44{ 44{
45 struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; 45 struct v9fs_dentry *dent;
46 dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid,
47 dentry->d_iname, dentry);
48 if (dentry->d_fsdata == NULL) {
49 dentry->d_fsdata =
50 kmalloc(sizeof(struct list_head), GFP_KERNEL);
51 if (dentry->d_fsdata == NULL) {
52 dprintk(DEBUG_ERROR, "Out of memory\n");
53 return -ENOMEM;
54 }
55 fid_list = (struct list_head *)dentry->d_fsdata;
56 INIT_LIST_HEAD(fid_list); /* Initialize list head */
57 }
58 46
59 fid->uid = current->uid; 47 P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n",
60 list_add(&fid->list, fid_list); 48 fid->fid, dentry->d_iname);
61 return 0;
62}
63 49
64/** 50 dent = dentry->d_fsdata;
65 * v9fs_fid_create - allocate a FID structure 51 if (!dent) {
66 * @dentry - dentry to link newly created fid to 52 dent = kmalloc(sizeof(struct v9fs_dentry), GFP_KERNEL);
67 * 53 if (!dent)
68 */ 54 return -ENOMEM;
69
70struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *v9ses, int fid)
71{
72 struct v9fs_fid *new;
73 55
74 dprintk(DEBUG_9P, "fid create fid %d\n", fid); 56 spin_lock_init(&dent->lock);
75 new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL); 57 INIT_LIST_HEAD(&dent->fidlist);
76 if (new == NULL) { 58 dentry->d_fsdata = dent;
77 dprintk(DEBUG_ERROR, "Out of Memory\n");
78 return ERR_PTR(-ENOMEM);
79 } 59 }
80 60
81 new->fid = fid; 61 spin_lock(&dent->lock);
82 new->v9ses = v9ses; 62 list_add(&fid->dlist, &dent->fidlist);
83 new->fidopen = 0; 63 spin_unlock(&dent->lock);
84 new->fidclunked = 0;
85 new->iounit = 0;
86 new->rdir_pos = 0;
87 new->rdir_fcall = NULL;
88 init_MUTEX(&new->lock);
89 INIT_LIST_HEAD(&new->list);
90
91 return new;
92}
93
94/**
95 * v9fs_fid_destroy - deallocate a FID structure
96 * @fid: fid to destroy
97 *
98 */
99 64
100void v9fs_fid_destroy(struct v9fs_fid *fid) 65 return 0;
101{
102 list_del(&fid->list);
103 kfree(fid);
104} 66}
105 67
106/** 68/**
@@ -114,30 +76,42 @@ void v9fs_fid_destroy(struct v9fs_fid *fid)
114 * 76 *
115 */ 77 */
116 78
117struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry) 79struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
118{ 80{
119 struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; 81 struct v9fs_dentry *dent;
120 struct v9fs_fid *return_fid = NULL; 82 struct p9_fid *fid;
121 83
122 dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry); 84 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
123 85 dent = dentry->d_fsdata;
124 if (fid_list) 86 if (dent)
125 return_fid = list_entry(fid_list->next, struct v9fs_fid, list); 87 fid = list_entry(dent->fidlist.next, struct p9_fid, dlist);
88 else
89 fid = ERR_PTR(-EBADF);
90
91 P9_DPRINTK(P9_DEBUG_VFS, " fid: %p\n", fid);
92 return fid;
93}
126 94
127 if (!return_fid) { 95struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry)
128 dprintk(DEBUG_ERROR, "Couldn't find a fid in dentry\n"); 96{
129 return_fid = ERR_PTR(-EBADF); 97 struct p9_fid *fid;
98 struct v9fs_dentry *dent;
99
100 dent = dentry->d_fsdata;
101 fid = v9fs_fid_lookup(dentry);
102 if (!IS_ERR(fid)) {
103 spin_lock(&dent->lock);
104 list_del(&fid->dlist);
105 spin_unlock(&dent->lock);
130 } 106 }
131 107
132 if(down_interruptible(&return_fid->lock)) 108 return fid;
133 return ERR_PTR(-EINTR);
134
135 return return_fid;
136} 109}
137 110
111
138/** 112/**
139 * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and 113 * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and
140 * release it 114 * release it
141 * @dentry: dentry to look for fid in 115 * @dentry: dentry to look for fid in
142 * 116 *
143 * find a fid in the dentry and then clone to a new private fid 117 * find a fid in the dentry and then clone to a new private fid
@@ -146,49 +120,15 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry)
146 * 120 *
147 */ 121 */
148 122
149struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry) 123struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
150{ 124{
151 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); 125 struct p9_fid *ofid, *fid;
152 struct v9fs_fid *base_fid, *new_fid = ERR_PTR(-EBADF);
153 struct v9fs_fcall *fcall = NULL;
154 int fid, err;
155
156 base_fid = v9fs_fid_lookup(dentry);
157
158 if(IS_ERR(base_fid))
159 return base_fid;
160
161 if(base_fid) { /* clone fid */
162 fid = v9fs_get_idpool(&v9ses->fidpool);
163 if (fid < 0) {
164 eprintk(KERN_WARNING, "newfid fails!\n");
165 new_fid = ERR_PTR(-ENOSPC);
166 goto Release_Fid;
167 }
168
169 err = v9fs_t_walk(v9ses, base_fid->fid, fid, NULL, &fcall);
170 if (err < 0) {
171 dprintk(DEBUG_ERROR, "clone walk didn't work\n");
172 v9fs_put_idpool(fid, &v9ses->fidpool);
173 new_fid = ERR_PTR(err);
174 goto Free_Fcall;
175 }
176 new_fid = v9fs_fid_create(v9ses, fid);
177 if (new_fid == NULL) {
178 dprintk(DEBUG_ERROR, "out of memory\n");
179 new_fid = ERR_PTR(-ENOMEM);
180 }
181Free_Fcall:
182 kfree(fcall);
183 }
184 126
185Release_Fid: 127 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
186 up(&base_fid->lock); 128 ofid = v9fs_fid_lookup(dentry);
187 return new_fid; 129 if (IS_ERR(ofid))
188} 130 return ofid;
189 131
190void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid) 132 fid = p9_client_walk(ofid, 0, NULL, 1);
191{ 133 return fid;
192 v9fs_t_clunk(v9ses, fid->fid);
193 v9fs_fid_destroy(fid);
194} 134}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 48fc170c26c8..47a0ba742872 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -22,41 +22,12 @@
22 22
23#include <linux/list.h> 23#include <linux/list.h>
24 24
25#define FID_OP 0 25struct v9fs_dentry {
26#define FID_WALK 1 26 spinlock_t lock; /* protect fidlist */
27#define FID_CREATE 2 27 struct list_head fidlist;
28
29struct v9fs_fid {
30 struct list_head list; /* list of fids associated with a dentry */
31 struct list_head active; /* XXX - debug */
32
33 struct semaphore lock;
34
35 u32 fid;
36 unsigned char fidopen; /* set when fid is opened */
37 unsigned char fidclunked; /* set when fid has already been clunked */
38
39 struct v9fs_qid qid;
40 u32 iounit;
41
42 /* readdir stuff */
43 int rdir_fpos;
44 loff_t rdir_pos;
45 struct v9fs_fcall *rdir_fcall;
46
47 /* management stuff */
48 uid_t uid; /* user associated with this fid */
49
50 /* private data */
51 struct file *filp; /* backpointer to File struct for open files */
52 struct v9fs_session_info *v9ses; /* session info for this FID */
53}; 28};
54 29
55struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry); 30struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
56struct v9fs_fid *v9fs_fid_get_created(struct dentry *); 31struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry);
57void v9fs_fid_destroy(struct v9fs_fid *fid); 32struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
58struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *, int fid); 33int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
59int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry);
60struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry);
61void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid);
62
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
deleted file mode 100644
index c783874a9caf..000000000000
--- a/fs/9p/mux.c
+++ /dev/null
@@ -1,1033 +0,0 @@
1/*
2 * linux/fs/9p/mux.c
3 *
4 * Protocol Multiplexer
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26#include <linux/module.h>
27#include <linux/errno.h>
28#include <linux/fs.h>
29#include <linux/poll.h>
30#include <linux/kthread.h>
31#include <linux/idr.h>
32#include <linux/mutex.h>
33
34#include "debug.h"
35#include "v9fs.h"
36#include "9p.h"
37#include "conv.h"
38#include "transport.h"
39#include "mux.h"
40
41#define ERREQFLUSH 1
42#define SCHED_TIMEOUT 10
43#define MAXPOLLWADDR 2
44
45enum {
46 Rworksched = 1, /* read work scheduled or running */
47 Rpending = 2, /* can read */
48 Wworksched = 4, /* write work scheduled or running */
49 Wpending = 8, /* can write */
50};
51
52enum {
53 None,
54 Flushing,
55 Flushed,
56};
57
58struct v9fs_mux_poll_task;
59
60struct v9fs_req {
61 spinlock_t lock;
62 int tag;
63 struct v9fs_fcall *tcall;
64 struct v9fs_fcall *rcall;
65 int err;
66 v9fs_mux_req_callback cb;
67 void *cba;
68 int flush;
69 struct list_head req_list;
70};
71
72struct v9fs_mux_data {
73 spinlock_t lock;
74 struct list_head mux_list;
75 struct v9fs_mux_poll_task *poll_task;
76 int msize;
77 unsigned char *extended;
78 struct v9fs_transport *trans;
79 struct v9fs_idpool tagpool;
80 int err;
81 wait_queue_head_t equeue;
82 struct list_head req_list;
83 struct list_head unsent_req_list;
84 struct v9fs_fcall *rcall;
85 int rpos;
86 char *rbuf;
87 int wpos;
88 int wsize;
89 char *wbuf;
90 wait_queue_t poll_wait[MAXPOLLWADDR];
91 wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
92 poll_table pt;
93 struct work_struct rq;
94 struct work_struct wq;
95 unsigned long wsched;
96};
97
98struct v9fs_mux_poll_task {
99 struct task_struct *task;
100 struct list_head mux_list;
101 int muxnum;
102};
103
104struct v9fs_mux_rpc {
105 struct v9fs_mux_data *m;
106 int err;
107 struct v9fs_fcall *tcall;
108 struct v9fs_fcall *rcall;
109 wait_queue_head_t wqueue;
110};
111
112static int v9fs_poll_proc(void *);
113static void v9fs_read_work(struct work_struct *work);
114static void v9fs_write_work(struct work_struct *work);
115static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
116 poll_table * p);
117static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
118static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
119
120static DEFINE_MUTEX(v9fs_mux_task_lock);
121static struct workqueue_struct *v9fs_mux_wq;
122
123static int v9fs_mux_num;
124static int v9fs_mux_poll_task_num;
125static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100];
126
127int v9fs_mux_global_init(void)
128{
129 int i;
130
131 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++)
132 v9fs_mux_poll_tasks[i].task = NULL;
133
134 v9fs_mux_wq = create_workqueue("v9fs");
135 if (!v9fs_mux_wq) {
136 printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
137 return -ENOMEM;
138 }
139
140 return 0;
141}
142
143void v9fs_mux_global_exit(void)
144{
145 destroy_workqueue(v9fs_mux_wq);
146}
147
148/**
149 * v9fs_mux_calc_poll_procs - calculates the number of polling procs
150 * based on the number of mounted v9fs filesystems.
151 *
152 * The current implementation returns sqrt of the number of mounts.
153 */
154static int v9fs_mux_calc_poll_procs(int muxnum)
155{
156 int n;
157
158 if (v9fs_mux_poll_task_num)
159 n = muxnum / v9fs_mux_poll_task_num +
160 (muxnum % v9fs_mux_poll_task_num ? 1 : 0);
161 else
162 n = 1;
163
164 if (n > ARRAY_SIZE(v9fs_mux_poll_tasks))
165 n = ARRAY_SIZE(v9fs_mux_poll_tasks);
166
167 return n;
168}
169
170static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
171{
172 int i, n;
173 struct v9fs_mux_poll_task *vpt, *vptlast;
174 struct task_struct *pproc;
175
176 dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
177 v9fs_mux_poll_task_num);
178 mutex_lock(&v9fs_mux_task_lock);
179
180 n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
181 if (n > v9fs_mux_poll_task_num) {
182 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
183 if (v9fs_mux_poll_tasks[i].task == NULL) {
184 vpt = &v9fs_mux_poll_tasks[i];
185 dprintk(DEBUG_MUX, "create proc %p\n", vpt);
186 pproc = kthread_create(v9fs_poll_proc, vpt,
187 "v9fs-poll");
188
189 if (!IS_ERR(pproc)) {
190 vpt->task = pproc;
191 INIT_LIST_HEAD(&vpt->mux_list);
192 vpt->muxnum = 0;
193 v9fs_mux_poll_task_num++;
194 wake_up_process(vpt->task);
195 }
196 break;
197 }
198 }
199
200 if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks))
201 dprintk(DEBUG_ERROR, "warning: no free poll slots\n");
202 }
203
204 n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num +
205 ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0);
206
207 vptlast = NULL;
208 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
209 vpt = &v9fs_mux_poll_tasks[i];
210 if (vpt->task != NULL) {
211 vptlast = vpt;
212 if (vpt->muxnum < n) {
213 dprintk(DEBUG_MUX, "put in proc %d\n", i);
214 list_add(&m->mux_list, &vpt->mux_list);
215 vpt->muxnum++;
216 m->poll_task = vpt;
217 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
218 init_poll_funcptr(&m->pt, v9fs_pollwait);
219 break;
220 }
221 }
222 }
223
224 if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) {
225 if (vptlast == NULL)
226 return -ENOMEM;
227
228 dprintk(DEBUG_MUX, "put in proc %d\n", i);
229 list_add(&m->mux_list, &vptlast->mux_list);
230 vptlast->muxnum++;
231 m->poll_task = vptlast;
232 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
233 init_poll_funcptr(&m->pt, v9fs_pollwait);
234 }
235
236 v9fs_mux_num++;
237 mutex_unlock(&v9fs_mux_task_lock);
238
239 return 0;
240}
241
242static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
243{
244 int i;
245 struct v9fs_mux_poll_task *vpt;
246
247 mutex_lock(&v9fs_mux_task_lock);
248 vpt = m->poll_task;
249 list_del(&m->mux_list);
250 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
251 if (m->poll_waddr[i] != NULL) {
252 remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
253 m->poll_waddr[i] = NULL;
254 }
255 }
256 vpt->muxnum--;
257 if (!vpt->muxnum) {
258 dprintk(DEBUG_MUX, "destroy proc %p\n", vpt);
259 kthread_stop(vpt->task);
260 vpt->task = NULL;
261 v9fs_mux_poll_task_num--;
262 }
263 v9fs_mux_num--;
264 mutex_unlock(&v9fs_mux_task_lock);
265}
266
267/**
268 * v9fs_mux_init - allocate and initialize the per-session mux data
269 * Creates the polling task if this is the first session.
270 *
271 * @trans - transport structure
272 * @msize - maximum message size
273 * @extended - pointer to the extended flag
274 */
275struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
276 unsigned char *extended)
277{
278 int i, n;
279 struct v9fs_mux_data *m, *mtmp;
280
281 dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize);
282 m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL);
283 if (!m)
284 return ERR_PTR(-ENOMEM);
285
286 spin_lock_init(&m->lock);
287 INIT_LIST_HEAD(&m->mux_list);
288 m->msize = msize;
289 m->extended = extended;
290 m->trans = trans;
291 idr_init(&m->tagpool.pool);
292 init_MUTEX(&m->tagpool.lock);
293 m->err = 0;
294 init_waitqueue_head(&m->equeue);
295 INIT_LIST_HEAD(&m->req_list);
296 INIT_LIST_HEAD(&m->unsent_req_list);
297 m->rcall = NULL;
298 m->rpos = 0;
299 m->rbuf = NULL;
300 m->wpos = m->wsize = 0;
301 m->wbuf = NULL;
302 INIT_WORK(&m->rq, v9fs_read_work);
303 INIT_WORK(&m->wq, v9fs_write_work);
304 m->wsched = 0;
305 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
306 m->poll_task = NULL;
307 n = v9fs_mux_poll_start(m);
308 if (n)
309 return ERR_PTR(n);
310
311 n = trans->poll(trans, &m->pt);
312 if (n & POLLIN) {
313 dprintk(DEBUG_MUX, "mux %p can read\n", m);
314 set_bit(Rpending, &m->wsched);
315 }
316
317 if (n & POLLOUT) {
318 dprintk(DEBUG_MUX, "mux %p can write\n", m);
319 set_bit(Wpending, &m->wsched);
320 }
321
322 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
323 if (IS_ERR(m->poll_waddr[i])) {
324 v9fs_mux_poll_stop(m);
325 mtmp = (void *)m->poll_waddr; /* the error code */
326 kfree(m);
327 m = mtmp;
328 break;
329 }
330 }
331
332 return m;
333}
334
335/**
336 * v9fs_mux_destroy - cancels all pending requests and frees mux resources
337 */
338void v9fs_mux_destroy(struct v9fs_mux_data *m)
339{
340 dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m,
341 m->mux_list.prev, m->mux_list.next);
342 v9fs_mux_cancel(m, -ECONNRESET);
343
344 if (!list_empty(&m->req_list)) {
345 /* wait until all processes waiting on this session exit */
346 dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n",
347 m);
348 wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
349 dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m,
350 list_empty(&m->req_list));
351 }
352
353 v9fs_mux_poll_stop(m);
354 m->trans = NULL;
355
356 kfree(m);
357}
358
359/**
360 * v9fs_pollwait - called by files poll operation to add v9fs-poll task
361 * to files wait queue
362 */
363static void
364v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
365 poll_table * p)
366{
367 int i;
368 struct v9fs_mux_data *m;
369
370 m = container_of(p, struct v9fs_mux_data, pt);
371 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
372 if (m->poll_waddr[i] == NULL)
373 break;
374
375 if (i >= ARRAY_SIZE(m->poll_waddr)) {
376 dprintk(DEBUG_ERROR, "not enough wait_address slots\n");
377 return;
378 }
379
380 m->poll_waddr[i] = wait_address;
381
382 if (!wait_address) {
383 dprintk(DEBUG_ERROR, "no wait_address\n");
384 m->poll_waddr[i] = ERR_PTR(-EIO);
385 return;
386 }
387
388 init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
389 add_wait_queue(wait_address, &m->poll_wait[i]);
390}
391
392/**
393 * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
394 */
395static void v9fs_poll_mux(struct v9fs_mux_data *m)
396{
397 int n;
398
399 if (m->err < 0)
400 return;
401
402 n = m->trans->poll(m->trans, NULL);
403 if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
404 dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n);
405 if (n >= 0)
406 n = -ECONNRESET;
407 v9fs_mux_cancel(m, n);
408 }
409
410 if (n & POLLIN) {
411 set_bit(Rpending, &m->wsched);
412 dprintk(DEBUG_MUX, "mux %p can read\n", m);
413 if (!test_and_set_bit(Rworksched, &m->wsched)) {
414 dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
415 queue_work(v9fs_mux_wq, &m->rq);
416 }
417 }
418
419 if (n & POLLOUT) {
420 set_bit(Wpending, &m->wsched);
421 dprintk(DEBUG_MUX, "mux %p can write\n", m);
422 if ((m->wsize || !list_empty(&m->unsent_req_list))
423 && !test_and_set_bit(Wworksched, &m->wsched)) {
424 dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
425 queue_work(v9fs_mux_wq, &m->wq);
426 }
427 }
428}
429
430/**
431 * v9fs_poll_proc - polls all v9fs transports for new events and queues
432 * the appropriate work to the work queue
433 */
434static int v9fs_poll_proc(void *a)
435{
436 struct v9fs_mux_data *m, *mtmp;
437 struct v9fs_mux_poll_task *vpt;
438
439 vpt = a;
440 dprintk(DEBUG_MUX, "start %p %p\n", current, vpt);
441 while (!kthread_should_stop()) {
442 set_current_state(TASK_INTERRUPTIBLE);
443
444 list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
445 v9fs_poll_mux(m);
446 }
447
448 dprintk(DEBUG_MUX, "sleeping...\n");
449 schedule_timeout(SCHED_TIMEOUT * HZ);
450 }
451
452 __set_current_state(TASK_RUNNING);
453 dprintk(DEBUG_MUX, "finish\n");
454 return 0;
455}
456
457/**
458 * v9fs_write_work - called when a transport can send some data
459 */
460static void v9fs_write_work(struct work_struct *work)
461{
462 int n, err;
463 struct v9fs_mux_data *m;
464 struct v9fs_req *req;
465
466 m = container_of(work, struct v9fs_mux_data, wq);
467
468 if (m->err < 0) {
469 clear_bit(Wworksched, &m->wsched);
470 return;
471 }
472
473 if (!m->wsize) {
474 if (list_empty(&m->unsent_req_list)) {
475 clear_bit(Wworksched, &m->wsched);
476 return;
477 }
478
479 spin_lock(&m->lock);
480again:
481 req = list_entry(m->unsent_req_list.next, struct v9fs_req,
482 req_list);
483 list_move_tail(&req->req_list, &m->req_list);
484 if (req->err == ERREQFLUSH)
485 goto again;
486
487 m->wbuf = req->tcall->sdata;
488 m->wsize = req->tcall->size;
489 m->wpos = 0;
490 dump_data(m->wbuf, m->wsize);
491 spin_unlock(&m->lock);
492 }
493
494 dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize);
495 clear_bit(Wpending, &m->wsched);
496 err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
497 dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
498 if (err == -EAGAIN) {
499 clear_bit(Wworksched, &m->wsched);
500 return;
501 }
502
503 if (err <= 0)
504 goto error;
505
506 m->wpos += err;
507 if (m->wpos == m->wsize)
508 m->wpos = m->wsize = 0;
509
510 if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
511 if (test_and_clear_bit(Wpending, &m->wsched))
512 n = POLLOUT;
513 else
514 n = m->trans->poll(m->trans, NULL);
515
516 if (n & POLLOUT) {
517 dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
518 queue_work(v9fs_mux_wq, &m->wq);
519 } else
520 clear_bit(Wworksched, &m->wsched);
521 } else
522 clear_bit(Wworksched, &m->wsched);
523
524 return;
525
526 error:
527 v9fs_mux_cancel(m, err);
528 clear_bit(Wworksched, &m->wsched);
529}
530
531static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
532{
533 int ecode;
534 struct v9fs_str *ename;
535
536 if (!req->err && req->rcall->id == RERROR) {
537 ecode = req->rcall->params.rerror.errno;
538 ename = &req->rcall->params.rerror.error;
539
540 dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str);
541
542 if (*m->extended)
543 req->err = -ecode;
544
545 if (!req->err) {
546 req->err = v9fs_errstr2errno(ename->str, ename->len);
547
548 if (!req->err) { /* string match failed */
549 PRINT_FCALL_ERROR("unknown error", req->rcall);
550 }
551
552 if (!req->err)
553 req->err = -ESERVERFAULT;
554 }
555 } else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
556 dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n",
557 req->tcall->id + 1, req->rcall->id);
558 if (!req->err)
559 req->err = -EIO;
560 }
561}
562
563/**
564 * v9fs_read_work - called when there is some data to be read from a transport
565 */
566static void v9fs_read_work(struct work_struct *work)
567{
568 int n, err;
569 struct v9fs_mux_data *m;
570 struct v9fs_req *req, *rptr, *rreq;
571 struct v9fs_fcall *rcall;
572 char *rbuf;
573
574 m = container_of(work, struct v9fs_mux_data, rq);
575
576 if (m->err < 0)
577 return;
578
579 rcall = NULL;
580 dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
581
582 if (!m->rcall) {
583 m->rcall =
584 kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL);
585 if (!m->rcall) {
586 err = -ENOMEM;
587 goto error;
588 }
589
590 m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
591 m->rpos = 0;
592 }
593
594 clear_bit(Rpending, &m->wsched);
595 err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
596 dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err);
597 if (err == -EAGAIN) {
598 clear_bit(Rworksched, &m->wsched);
599 return;
600 }
601
602 if (err <= 0)
603 goto error;
604
605 m->rpos += err;
606 while (m->rpos > 4) {
607 n = le32_to_cpu(*(__le32 *) m->rbuf);
608 if (n >= m->msize) {
609 dprintk(DEBUG_ERROR,
610 "requested packet size too big: %d\n", n);
611 err = -EIO;
612 goto error;
613 }
614
615 if (m->rpos < n)
616 break;
617
618 dump_data(m->rbuf, n);
619 err =
620 v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended);
621 if (err < 0) {
622 goto error;
623 }
624
625 if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
626 char buf[150];
627
628 v9fs_printfcall(buf, sizeof(buf), m->rcall,
629 *m->extended);
630 printk(KERN_NOTICE ">>> %p %s\n", m, buf);
631 }
632
633 rcall = m->rcall;
634 rbuf = m->rbuf;
635 if (m->rpos > n) {
636 m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize,
637 GFP_KERNEL);
638 if (!m->rcall) {
639 err = -ENOMEM;
640 goto error;
641 }
642
643 m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
644 memmove(m->rbuf, rbuf + n, m->rpos - n);
645 m->rpos -= n;
646 } else {
647 m->rcall = NULL;
648 m->rbuf = NULL;
649 m->rpos = 0;
650 }
651
652 dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id,
653 rcall->tag);
654
655 req = NULL;
656 spin_lock(&m->lock);
657 list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
658 if (rreq->tag == rcall->tag) {
659 req = rreq;
660 if (req->flush != Flushing)
661 list_del(&req->req_list);
662 break;
663 }
664 }
665 spin_unlock(&m->lock);
666
667 if (req) {
668 req->rcall = rcall;
669 process_request(m, req);
670
671 if (req->flush != Flushing) {
672 if (req->cb)
673 (*req->cb) (req, req->cba);
674 else
675 kfree(req->rcall);
676
677 wake_up(&m->equeue);
678 }
679 } else {
680 if (err >= 0 && rcall->id != RFLUSH)
681 dprintk(DEBUG_ERROR,
682 "unexpected response mux %p id %d tag %d\n",
683 m, rcall->id, rcall->tag);
684 kfree(rcall);
685 }
686 }
687
688 if (!list_empty(&m->req_list)) {
689 if (test_and_clear_bit(Rpending, &m->wsched))
690 n = POLLIN;
691 else
692 n = m->trans->poll(m->trans, NULL);
693
694 if (n & POLLIN) {
695 dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
696 queue_work(v9fs_mux_wq, &m->rq);
697 } else
698 clear_bit(Rworksched, &m->wsched);
699 } else
700 clear_bit(Rworksched, &m->wsched);
701
702 return;
703
704 error:
705 v9fs_mux_cancel(m, err);
706 clear_bit(Rworksched, &m->wsched);
707}
708
709/**
710 * v9fs_send_request - send 9P request
711 * The function can sleep until the request is scheduled for sending.
712 * The function can be interrupted. Return from the function is not
713 * a guarantee that the request is sent successfully. Can return errors
714 * that can be retrieved by PTR_ERR macros.
715 *
716 * @m: mux data
717 * @tc: request to be sent
718 * @cb: callback function to call when response is received
719 * @cba: parameter to pass to the callback function
720 */
721static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
722 struct v9fs_fcall *tc,
723 v9fs_mux_req_callback cb, void *cba)
724{
725 int n;
726 struct v9fs_req *req;
727
728 dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
729 tc, tc->id);
730 if (m->err < 0)
731 return ERR_PTR(m->err);
732
733 req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL);
734 if (!req)
735 return ERR_PTR(-ENOMEM);
736
737 if (tc->id == TVERSION)
738 n = V9FS_NOTAG;
739 else
740 n = v9fs_mux_get_tag(m);
741
742 if (n < 0)
743 return ERR_PTR(-ENOMEM);
744
745 v9fs_set_tag(tc, n);
746 if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
747 char buf[150];
748
749 v9fs_printfcall(buf, sizeof(buf), tc, *m->extended);
750 printk(KERN_NOTICE "<<< %p %s\n", m, buf);
751 }
752
753 spin_lock_init(&req->lock);
754 req->tag = n;
755 req->tcall = tc;
756 req->rcall = NULL;
757 req->err = 0;
758 req->cb = cb;
759 req->cba = cba;
760 req->flush = None;
761
762 spin_lock(&m->lock);
763 list_add_tail(&req->req_list, &m->unsent_req_list);
764 spin_unlock(&m->lock);
765
766 if (test_and_clear_bit(Wpending, &m->wsched))
767 n = POLLOUT;
768 else
769 n = m->trans->poll(m->trans, NULL);
770
771 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
772 queue_work(v9fs_mux_wq, &m->wq);
773
774 return req;
775}
776
777static void v9fs_mux_free_request(struct v9fs_mux_data *m, struct v9fs_req *req)
778{
779 v9fs_mux_put_tag(m, req->tag);
780 kfree(req);
781}
782
783static void v9fs_mux_flush_cb(struct v9fs_req *freq, void *a)
784{
785 v9fs_mux_req_callback cb;
786 int tag;
787 struct v9fs_mux_data *m;
788 struct v9fs_req *req, *rreq, *rptr;
789
790 m = a;
791 dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
792 freq->tcall, freq->rcall, freq->err,
793 freq->tcall->params.tflush.oldtag);
794
795 spin_lock(&m->lock);
796 cb = NULL;
797 tag = freq->tcall->params.tflush.oldtag;
798 req = NULL;
799 list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
800 if (rreq->tag == tag) {
801 req = rreq;
802 list_del(&req->req_list);
803 break;
804 }
805 }
806 spin_unlock(&m->lock);
807
808 if (req) {
809 spin_lock(&req->lock);
810 req->flush = Flushed;
811 spin_unlock(&req->lock);
812
813 if (req->cb)
814 (*req->cb) (req, req->cba);
815 else
816 kfree(req->rcall);
817
818 wake_up(&m->equeue);
819 }
820
821 kfree(freq->tcall);
822 kfree(freq->rcall);
823 v9fs_mux_free_request(m, freq);
824}
825
826static int
827v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
828{
829 struct v9fs_fcall *fc;
830 struct v9fs_req *rreq, *rptr;
831
832 dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
833
834 /* if a response was received for a request, do nothing */
835 spin_lock(&req->lock);
836 if (req->rcall || req->err) {
837 spin_unlock(&req->lock);
838 dprintk(DEBUG_MUX, "mux %p req %p response already received\n", m, req);
839 return 0;
840 }
841
842 req->flush = Flushing;
843 spin_unlock(&req->lock);
844
845 spin_lock(&m->lock);
846 /* if the request is not sent yet, just remove it from the list */
847 list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) {
848 if (rreq->tag == req->tag) {
849 dprintk(DEBUG_MUX, "mux %p req %p request is not sent yet\n", m, req);
850 list_del(&rreq->req_list);
851 req->flush = Flushed;
852 spin_unlock(&m->lock);
853 if (req->cb)
854 (*req->cb) (req, req->cba);
855 return 0;
856 }
857 }
858 spin_unlock(&m->lock);
859
860 clear_thread_flag(TIF_SIGPENDING);
861 fc = v9fs_create_tflush(req->tag);
862 v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
863 return 1;
864}
865
866static void
867v9fs_mux_rpc_cb(struct v9fs_req *req, void *a)
868{
869 struct v9fs_mux_rpc *r;
870
871 dprintk(DEBUG_MUX, "req %p r %p\n", req, a);
872 r = a;
873 r->rcall = req->rcall;
874 r->err = req->err;
875
876 if (req->flush!=None && !req->err)
877 r->err = -ERESTARTSYS;
878
879 wake_up(&r->wqueue);
880}
881
882/**
883 * v9fs_mux_rpc - sends 9P request and waits until a response is available.
884 * The function can be interrupted.
885 * @m: mux data
886 * @tc: request to be sent
887 * @rc: pointer where a pointer to the response is stored
888 */
889int
890v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
891 struct v9fs_fcall **rc)
892{
893 int err, sigpending;
894 unsigned long flags;
895 struct v9fs_req *req;
896 struct v9fs_mux_rpc r;
897
898 r.err = 0;
899 r.tcall = tc;
900 r.rcall = NULL;
901 r.m = m;
902 init_waitqueue_head(&r.wqueue);
903
904 if (rc)
905 *rc = NULL;
906
907 sigpending = 0;
908 if (signal_pending(current)) {
909 sigpending = 1;
910 clear_thread_flag(TIF_SIGPENDING);
911 }
912
913 req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
914 if (IS_ERR(req)) {
915 err = PTR_ERR(req);
916 dprintk(DEBUG_MUX, "error %d\n", err);
917 return err;
918 }
919
920 err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
921 if (r.err < 0)
922 err = r.err;
923
924 if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
925 if (v9fs_mux_flush_request(m, req)) {
926 /* wait until we get response of the flush message */
927 do {
928 clear_thread_flag(TIF_SIGPENDING);
929 err = wait_event_interruptible(r.wqueue,
930 r.rcall || r.err);
931 } while (!r.rcall && !r.err && err==-ERESTARTSYS &&
932 m->trans->status==Connected && !m->err);
933
934 err = -ERESTARTSYS;
935 }
936 sigpending = 1;
937 }
938
939 if (sigpending) {
940 spin_lock_irqsave(&current->sighand->siglock, flags);
941 recalc_sigpending();
942 spin_unlock_irqrestore(&current->sighand->siglock, flags);
943 }
944
945 if (rc)
946 *rc = r.rcall;
947 else
948 kfree(r.rcall);
949
950 v9fs_mux_free_request(m, req);
951 if (err > 0)
952 err = -EIO;
953
954 return err;
955}
956
957#if 0
958/**
959 * v9fs_mux_rpcnb - sends 9P request without waiting for response.
960 * @m: mux data
961 * @tc: request to be sent
962 * @cb: callback function to be called when response arrives
963 * @cba: value to pass to the callback function
964 */
965int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
966 v9fs_mux_req_callback cb, void *a)
967{
968 int err;
969 struct v9fs_req *req;
970
971 req = v9fs_send_request(m, tc, cb, a);
972 if (IS_ERR(req)) {
973 err = PTR_ERR(req);
974 dprintk(DEBUG_MUX, "error %d\n", err);
975 return PTR_ERR(req);
976 }
977
978 dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
979 return 0;
980}
981#endif /* 0 */
982
983/**
984 * v9fs_mux_cancel - cancel all pending requests with error
985 * @m: mux data
986 * @err: error code
987 */
988void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
989{
990 struct v9fs_req *req, *rtmp;
991 LIST_HEAD(cancel_list);
992
993 dprintk(DEBUG_ERROR, "mux %p err %d\n", m, err);
994 m->err = err;
995 spin_lock(&m->lock);
996 list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
997 list_move(&req->req_list, &cancel_list);
998 }
999 list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
1000 list_move(&req->req_list, &cancel_list);
1001 }
1002 spin_unlock(&m->lock);
1003
1004 list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
1005 list_del(&req->req_list);
1006 if (!req->err)
1007 req->err = err;
1008
1009 if (req->cb)
1010 (*req->cb) (req, req->cba);
1011 else
1012 kfree(req->rcall);
1013 }
1014
1015 wake_up(&m->equeue);
1016}
1017
1018static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
1019{
1020 int tag;
1021
1022 tag = v9fs_get_idpool(&m->tagpool);
1023 if (tag < 0)
1024 return V9FS_NOTAG;
1025 else
1026 return (u16) tag;
1027}
1028
1029static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
1030{
1031 if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tagpool))
1032 v9fs_put_idpool(tag, &m->tagpool);
1033}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
deleted file mode 100644
index fb10c50186a1..000000000000
--- a/fs/9p/mux.h
+++ /dev/null
@@ -1,55 +0,0 @@
1/*
2 * linux/fs/9p/mux.h
3 *
4 * Multiplexer Definitions
5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26struct v9fs_mux_data;
27struct v9fs_req;
28
29/**
30 * v9fs_mux_req_callback - callback function that is called when the
31 * response of a request is received. The callback is called from
32 * a workqueue and shouldn't block.
33 *
34 * @a - the pointer that was specified when the request was send to be
35 * passed to the callback
36 * @tc - request call
37 * @rc - response call
38 * @err - error code (non-zero if error occured)
39 */
40typedef void (*v9fs_mux_req_callback)(struct v9fs_req *req, void *a);
41
42int v9fs_mux_global_init(void);
43void v9fs_mux_global_exit(void);
44
45struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
46 unsigned char *extended);
47void v9fs_mux_destroy(struct v9fs_mux_data *);
48
49int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
50struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
51int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
52
53void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
54void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
55int v9fs_errstr2errno(char *errstr, int len);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
deleted file mode 100644
index 34d43355beb7..000000000000
--- a/fs/9p/trans_fd.c
+++ /dev/null
@@ -1,308 +0,0 @@
1/*
2 * linux/fs/9p/trans_fd.c
3 *
4 * Fd transport layer. Includes deprecated socket layer.
5 *
6 * Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
7 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
8 * Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com>
9 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2
13 * as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to:
22 * Free Software Foundation
23 * 51 Franklin Street, Fifth Floor
24 * Boston, MA 02111-1301 USA
25 *
26 */
27
28#include <linux/in.h>
29#include <linux/module.h>
30#include <linux/net.h>
31#include <linux/ipv6.h>
32#include <linux/errno.h>
33#include <linux/kernel.h>
34#include <linux/un.h>
35#include <asm/uaccess.h>
36#include <linux/inet.h>
37#include <linux/idr.h>
38#include <linux/file.h>
39
40#include "debug.h"
41#include "v9fs.h"
42#include "transport.h"
43
44#define V9FS_PORT 564
45
46struct v9fs_trans_fd {
47 struct file *rd;
48 struct file *wr;
49};
50
51/**
52 * v9fs_fd_read- read from a fd
53 * @v9ses: session information
54 * @v: buffer to receive data into
55 * @len: size of receive buffer
56 *
57 */
58static int v9fs_fd_read(struct v9fs_transport *trans, void *v, int len)
59{
60 int ret;
61 struct v9fs_trans_fd *ts;
62
63 if (!trans || trans->status == Disconnected || !(ts = trans->priv))
64 return -EREMOTEIO;
65
66 if (!(ts->rd->f_flags & O_NONBLOCK))
67 dprintk(DEBUG_ERROR, "blocking read ...\n");
68
69 ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
70 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
71 trans->status = Disconnected;
72 return ret;
73}
74
75/**
76 * v9fs_fd_write - write to a socket
77 * @v9ses: session information
78 * @v: buffer to send data from
79 * @len: size of send buffer
80 *
81 */
82static int v9fs_fd_write(struct v9fs_transport *trans, void *v, int len)
83{
84 int ret;
85 mm_segment_t oldfs;
86 struct v9fs_trans_fd *ts;
87
88 if (!trans || trans->status == Disconnected || !(ts = trans->priv))
89 return -EREMOTEIO;
90
91 if (!(ts->wr->f_flags & O_NONBLOCK))
92 dprintk(DEBUG_ERROR, "blocking write ...\n");
93
94 oldfs = get_fs();
95 set_fs(get_ds());
96 /* The cast to a user pointer is valid due to the set_fs() */
97 ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
98 set_fs(oldfs);
99
100 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
101 trans->status = Disconnected;
102 return ret;
103}
104
105static unsigned int
106v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
107{
108 int ret, n;
109 struct v9fs_trans_fd *ts;
110 mm_segment_t oldfs;
111
112 if (!trans || trans->status != Connected || !(ts = trans->priv))
113 return -EREMOTEIO;
114
115 if (!ts->rd->f_op || !ts->rd->f_op->poll)
116 return -EIO;
117
118 if (!ts->wr->f_op || !ts->wr->f_op->poll)
119 return -EIO;
120
121 oldfs = get_fs();
122 set_fs(get_ds());
123
124 ret = ts->rd->f_op->poll(ts->rd, pt);
125 if (ret < 0)
126 goto end;
127
128 if (ts->rd != ts->wr) {
129 n = ts->wr->f_op->poll(ts->wr, pt);
130 if (n < 0) {
131 ret = n;
132 goto end;
133 }
134 ret = (ret & ~POLLOUT) | (n & ~POLLIN);
135 }
136
137 end:
138 set_fs(oldfs);
139 return ret;
140}
141
142static int v9fs_fd_open(struct v9fs_session_info *v9ses, int rfd, int wfd)
143{
144 struct v9fs_transport *trans = v9ses->transport;
145 struct v9fs_trans_fd *ts = kmalloc(sizeof(struct v9fs_trans_fd),
146 GFP_KERNEL);
147 if (!ts)
148 return -ENOMEM;
149
150 ts->rd = fget(rfd);
151 ts->wr = fget(wfd);
152 if (!ts->rd || !ts->wr) {
153 if (ts->rd)
154 fput(ts->rd);
155 if (ts->wr)
156 fput(ts->wr);
157 kfree(ts);
158 return -EIO;
159 }
160
161 trans->priv = ts;
162 trans->status = Connected;
163
164 return 0;
165}
166
167static int v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr,
168 char *data)
169{
170 if (v9ses->rfdno == ~0 || v9ses->wfdno == ~0) {
171 printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
172 return -ENOPROTOOPT;
173 }
174
175 return v9fs_fd_open(v9ses, v9ses->rfdno, v9ses->wfdno);
176}
177
178static int v9fs_socket_open(struct v9fs_session_info *v9ses,
179 struct socket *csocket)
180{
181 int fd, ret;
182
183 csocket->sk->sk_allocation = GFP_NOIO;
184 if ((fd = sock_map_fd(csocket)) < 0) {
185 eprintk(KERN_ERR, "v9fs_socket_open: failed to map fd\n");
186 ret = fd;
187 release_csocket:
188 sock_release(csocket);
189 return ret;
190 }
191
192 if ((ret = v9fs_fd_open(v9ses, fd, fd)) < 0) {
193 sockfd_put(csocket);
194 eprintk(KERN_ERR, "v9fs_socket_open: failed to open fd\n");
195 goto release_csocket;
196 }
197
198 ((struct v9fs_trans_fd *)v9ses->transport->priv)->rd->f_flags |=
199 O_NONBLOCK;
200 return 0;
201}
202
203static int v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr,
204 char *data)
205{
206 int ret;
207 struct socket *csocket = NULL;
208 struct sockaddr_in sin_server;
209
210 sin_server.sin_family = AF_INET;
211 sin_server.sin_addr.s_addr = in_aton(addr);
212 sin_server.sin_port = htons(v9ses->port);
213 sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
214
215 if (!csocket) {
216 eprintk(KERN_ERR, "v9fs_trans_tcp: problem creating socket\n");
217 return -1;
218 }
219
220 ret = csocket->ops->connect(csocket,
221 (struct sockaddr *)&sin_server,
222 sizeof(struct sockaddr_in), 0);
223 if (ret < 0) {
224 eprintk(KERN_ERR,
225 "v9fs_trans_tcp: problem connecting socket to %s\n",
226 addr);
227 return ret;
228 }
229
230 return v9fs_socket_open(v9ses, csocket);
231}
232
233static int
234v9fs_unix_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
235{
236 int ret;
237 struct socket *csocket;
238 struct sockaddr_un sun_server;
239
240 if (strlen(addr) > UNIX_PATH_MAX) {
241 eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
242 addr);
243 return -ENAMETOOLONG;
244 }
245
246 sun_server.sun_family = PF_UNIX;
247 strcpy(sun_server.sun_path, addr);
248 sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
249 ret = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
250 sizeof(struct sockaddr_un) - 1, 0);
251 if (ret < 0) {
252 eprintk(KERN_ERR,
253 "v9fs_trans_unix: problem connecting socket: %s: %d\n",
254 addr, ret);
255 return ret;
256 }
257
258 return v9fs_socket_open(v9ses, csocket);
259}
260
261/**
262 * v9fs_sock_close - shutdown socket
263 * @trans: private socket structure
264 *
265 */
266static void v9fs_fd_close(struct v9fs_transport *trans)
267{
268 struct v9fs_trans_fd *ts;
269
270 if (!trans)
271 return;
272
273 ts = xchg(&trans->priv, NULL);
274
275 if (!ts)
276 return;
277
278 trans->status = Disconnected;
279 if (ts->rd)
280 fput(ts->rd);
281 if (ts->wr)
282 fput(ts->wr);
283 kfree(ts);
284}
285
286struct v9fs_transport v9fs_trans_fd = {
287 .init = v9fs_fd_init,
288 .write = v9fs_fd_write,
289 .read = v9fs_fd_read,
290 .close = v9fs_fd_close,
291 .poll = v9fs_fd_poll,
292};
293
294struct v9fs_transport v9fs_trans_tcp = {
295 .init = v9fs_tcp_init,
296 .write = v9fs_fd_write,
297 .read = v9fs_fd_read,
298 .close = v9fs_fd_close,
299 .poll = v9fs_fd_poll,
300};
301
302struct v9fs_transport v9fs_trans_unix = {
303 .init = v9fs_unix_init,
304 .write = v9fs_fd_write,
305 .read = v9fs_fd_read,
306 .close = v9fs_fd_close,
307 .poll = v9fs_fd_poll,
308};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
deleted file mode 100644
index b38a4b8a41ce..000000000000
--- a/fs/9p/transport.h
+++ /dev/null
@@ -1,45 +0,0 @@
1/*
2 * linux/fs/9p/transport.h
3 *
4 * Transport Definition
5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26enum v9fs_transport_status {
27 Connected,
28 Disconnected,
29 Hung,
30};
31
32struct v9fs_transport {
33 enum v9fs_transport_status status;
34 void *priv;
35
36 int (*init) (struct v9fs_session_info *, const char *, char *);
37 int (*write) (struct v9fs_transport *, void *, int);
38 int (*read) (struct v9fs_transport *, void *, int);
39 void (*close) (struct v9fs_transport *);
40 unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *);
41};
42
43extern struct v9fs_transport v9fs_trans_tcp;
44extern struct v9fs_transport v9fs_trans_unix;
45extern struct v9fs_transport v9fs_trans_fd;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 6ad6f192b6e4..45c35986d49f 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -29,16 +29,12 @@
29#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/parser.h> 30#include <linux/parser.h>
31#include <linux/idr.h> 31#include <linux/idr.h>
32 32#include <net/9p/9p.h>
33#include "debug.h" 33#include <net/9p/transport.h>
34#include <net/9p/conn.h>
35#include <net/9p/client.h>
34#include "v9fs.h" 36#include "v9fs.h"
35#include "9p.h"
36#include "v9fs_vfs.h" 37#include "v9fs_vfs.h"
37#include "transport.h"
38#include "mux.h"
39
40/* TODO: sysfs or debugfs interface */
41int v9fs_debug_level = 0; /* feature-rific global debug level */
42 38
43/* 39/*
44 * Option Parsing (code inspired by NFS code) 40 * Option Parsing (code inspired by NFS code)
@@ -47,12 +43,12 @@ int v9fs_debug_level = 0; /* feature-rific global debug level */
47 43
48enum { 44enum {
49 /* Options that take integer arguments */ 45 /* Options that take integer arguments */
50 Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug, 46 Opt_debug, Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid,
51 Opt_rfdno, Opt_wfdno, 47 Opt_rfdno, Opt_wfdno,
52 /* String options */ 48 /* String options */
53 Opt_uname, Opt_remotename, 49 Opt_uname, Opt_remotename,
54 /* Options that take no arguments */ 50 /* Options that take no arguments */
55 Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, 51 Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, Opt_pci,
56 /* Cache options */ 52 /* Cache options */
57 Opt_cache_loose, 53 Opt_cache_loose,
58 /* Error token */ 54 /* Error token */
@@ -60,6 +56,7 @@ enum {
60}; 56};
61 57
62static match_table_t tokens = { 58static match_table_t tokens = {
59 {Opt_debug, "debug=%x"},
63 {Opt_port, "port=%u"}, 60 {Opt_port, "port=%u"},
64 {Opt_msize, "msize=%u"}, 61 {Opt_msize, "msize=%u"},
65 {Opt_uid, "uid=%u"}, 62 {Opt_uid, "uid=%u"},
@@ -67,12 +64,14 @@ static match_table_t tokens = {
67 {Opt_afid, "afid=%u"}, 64 {Opt_afid, "afid=%u"},
68 {Opt_rfdno, "rfdno=%u"}, 65 {Opt_rfdno, "rfdno=%u"},
69 {Opt_wfdno, "wfdno=%u"}, 66 {Opt_wfdno, "wfdno=%u"},
70 {Opt_debug, "debug=%x"},
71 {Opt_uname, "uname=%s"}, 67 {Opt_uname, "uname=%s"},
72 {Opt_remotename, "aname=%s"}, 68 {Opt_remotename, "aname=%s"},
73 {Opt_unix, "proto=unix"}, 69 {Opt_unix, "proto=unix"},
74 {Opt_tcp, "proto=tcp"}, 70 {Opt_tcp, "proto=tcp"},
75 {Opt_fd, "proto=fd"}, 71 {Opt_fd, "proto=fd"},
72#ifdef CONFIG_PCI_9P
73 {Opt_pci, "proto=pci"},
74#endif
76 {Opt_tcp, "tcp"}, 75 {Opt_tcp, "tcp"},
77 {Opt_unix, "unix"}, 76 {Opt_unix, "unix"},
78 {Opt_fd, "fd"}, 77 {Opt_fd, "fd"},
@@ -83,6 +82,8 @@ static match_table_t tokens = {
83 {Opt_err, NULL} 82 {Opt_err, NULL}
84}; 83};
85 84
85extern struct p9_transport *p9pci_trans_create(void);
86
86/* 87/*
87 * Parse option string. 88 * Parse option string.
88 */ 89 */
@@ -122,12 +123,16 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
122 token = match_token(p, tokens, args); 123 token = match_token(p, tokens, args);
123 if (token < Opt_uname) { 124 if (token < Opt_uname) {
124 if ((ret = match_int(&args[0], &option)) < 0) { 125 if ((ret = match_int(&args[0], &option)) < 0) {
125 dprintk(DEBUG_ERROR, 126 P9_DPRINTK(P9_DEBUG_ERROR,
126 "integer field, but no integer?\n"); 127 "integer field, but no integer?\n");
127 continue; 128 continue;
128 } 129 }
129 } 130 }
130 switch (token) { 131 switch (token) {
132 case Opt_debug:
133 v9ses->debug = option;
134 p9_debug_level = option;
135 break;
131 case Opt_port: 136 case Opt_port:
132 v9ses->port = option; 137 v9ses->port = option;
133 break; 138 break;
@@ -149,15 +154,15 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
149 case Opt_wfdno: 154 case Opt_wfdno:
150 v9ses->wfdno = option; 155 v9ses->wfdno = option;
151 break; 156 break;
152 case Opt_debug:
153 v9ses->debug = option;
154 break;
155 case Opt_tcp: 157 case Opt_tcp:
156 v9ses->proto = PROTO_TCP; 158 v9ses->proto = PROTO_TCP;
157 break; 159 break;
158 case Opt_unix: 160 case Opt_unix:
159 v9ses->proto = PROTO_UNIX; 161 v9ses->proto = PROTO_UNIX;
160 break; 162 break;
163 case Opt_pci:
164 v9ses->proto = PROTO_PCI;
165 break;
161 case Opt_fd: 166 case Opt_fd:
162 v9ses->proto = PROTO_FD; 167 v9ses->proto = PROTO_FD;
163 break; 168 break;
@@ -183,82 +188,6 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
183} 188}
184 189
185/** 190/**
186 * v9fs_inode2v9ses - safely extract v9fs session info from super block
187 * @inode: inode to extract information from
188 *
189 * Paranoid function to extract v9ses information from superblock,
190 * if anything is missing it will report an error.
191 *
192 */
193
194struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
195{
196 return (inode->i_sb->s_fs_info);
197}
198
199/**
200 * v9fs_get_idpool - allocate numeric id from pool
201 * @p - pool to allocate from
202 *
203 * XXX - This seems to be an awful generic function, should it be in idr.c with
204 * the lock included in struct idr?
205 */
206
207int v9fs_get_idpool(struct v9fs_idpool *p)
208{
209 int i = 0;
210 int error;
211
212retry:
213 if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
214 return 0;
215
216 if (down_interruptible(&p->lock) == -EINTR) {
217 eprintk(KERN_WARNING, "Interrupted while locking\n");
218 return -1;
219 }
220
221 /* no need to store exactly p, we just need something non-null */
222 error = idr_get_new(&p->pool, p, &i);
223 up(&p->lock);
224
225 if (error == -EAGAIN)
226 goto retry;
227 else if (error)
228 return -1;
229
230 return i;
231}
232
233/**
234 * v9fs_put_idpool - release numeric id from pool
235 * @p - pool to allocate from
236 *
237 * XXX - This seems to be an awful generic function, should it be in idr.c with
238 * the lock included in struct idr?
239 */
240
241void v9fs_put_idpool(int id, struct v9fs_idpool *p)
242{
243 if (down_interruptible(&p->lock) == -EINTR) {
244 eprintk(KERN_WARNING, "Interrupted while locking\n");
245 return;
246 }
247 idr_remove(&p->pool, id);
248 up(&p->lock);
249}
250
251/**
252 * v9fs_check_idpool - check if the specified id is available
253 * @id - id to check
254 * @p - pool
255 */
256int v9fs_check_idpool(int id, struct v9fs_idpool *p)
257{
258 return idr_find(&p->pool, id) != NULL;
259}
260
261/**
262 * v9fs_session_init - initialize session 191 * v9fs_session_init - initialize session
263 * @v9ses: session information structure 192 * @v9ses: session information structure
264 * @dev_name: device being mounted 193 * @dev_name: device being mounted
@@ -266,25 +195,21 @@ int v9fs_check_idpool(int id, struct v9fs_idpool *p)
266 * 195 *
267 */ 196 */
268 197
269int 198struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
270v9fs_session_init(struct v9fs_session_info *v9ses,
271 const char *dev_name, char *data) 199 const char *dev_name, char *data)
272{ 200{
273 struct v9fs_fcall *fcall = NULL;
274 struct v9fs_transport *trans_proto;
275 int n = 0;
276 int newfid = -1;
277 int retval = -EINVAL; 201 int retval = -EINVAL;
278 struct v9fs_str *version; 202 struct p9_transport *trans;
203 struct p9_fid *fid;
279 204
280 v9ses->name = __getname(); 205 v9ses->name = __getname();
281 if (!v9ses->name) 206 if (!v9ses->name)
282 return -ENOMEM; 207 return ERR_PTR(-ENOMEM);
283 208
284 v9ses->remotename = __getname(); 209 v9ses->remotename = __getname();
285 if (!v9ses->remotename) { 210 if (!v9ses->remotename) {
286 __putname(v9ses->name); 211 __putname(v9ses->name);
287 return -ENOMEM; 212 return ERR_PTR(-ENOMEM);
288 } 213 }
289 214
290 strcpy(v9ses->name, V9FS_DEFUSER); 215 strcpy(v9ses->name, V9FS_DEFUSER);
@@ -292,130 +217,60 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
292 217
293 v9fs_parse_options(data, v9ses); 218 v9fs_parse_options(data, v9ses);
294 219
295 /* set global debug level */
296 v9fs_debug_level = v9ses->debug;
297
298 /* id pools that are session-dependent: fids and tags */
299 idr_init(&v9ses->fidpool.pool);
300 init_MUTEX(&v9ses->fidpool.lock);
301
302 switch (v9ses->proto) { 220 switch (v9ses->proto) {
303 case PROTO_TCP: 221 case PROTO_TCP:
304 trans_proto = &v9fs_trans_tcp; 222 trans = p9_trans_create_tcp(dev_name, v9ses->port);
305 break; 223 break;
306 case PROTO_UNIX: 224 case PROTO_UNIX:
307 trans_proto = &v9fs_trans_unix; 225 trans = p9_trans_create_unix(dev_name);
308 *v9ses->remotename = 0; 226 *v9ses->remotename = 0;
309 break; 227 break;
310 case PROTO_FD: 228 case PROTO_FD:
311 trans_proto = &v9fs_trans_fd; 229 trans = p9_trans_create_fd(v9ses->rfdno, v9ses->wfdno);
312 *v9ses->remotename = 0; 230 *v9ses->remotename = 0;
313 break; 231 break;
232#ifdef CONFIG_PCI_9P
233 case PROTO_PCI:
234 trans = p9pci_trans_create();
235 *v9ses->remotename = 0;
236 break;
237#endif
314 default: 238 default:
315 printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto); 239 printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
316 retval = -ENOPROTOOPT; 240 retval = -ENOPROTOOPT;
317 goto SessCleanUp; 241 goto error;
318 }; 242 };
319 243
320 v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL); 244 if (IS_ERR(trans)) {
321 if (!v9ses->transport) { 245 retval = PTR_ERR(trans);
322 retval = -ENOMEM; 246 trans = NULL;
323 goto SessCleanUp; 247 goto error;
324 } 248 }
325 249
326 memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport)); 250 v9ses->clnt = p9_client_create(trans, v9ses->maxdata + P9_IOHDRSZ,
251 v9ses->extended);
327 252
328 if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) { 253 if (IS_ERR(v9ses->clnt)) {
329 eprintk(KERN_ERR, "problem initializing transport\n"); 254 retval = PTR_ERR(v9ses->clnt);
330 goto SessCleanUp; 255 v9ses->clnt = NULL;
256 P9_DPRINTK(P9_DEBUG_ERROR, "problem initializing 9p client\n");
257 goto error;
331 } 258 }
332 259
333 v9ses->inprogress = 0; 260 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->name,
334 v9ses->shutdown = 0; 261 v9ses->remotename);
335 v9ses->session_hung = 0; 262 if (IS_ERR(fid)) {
336 263 retval = PTR_ERR(fid);
337 v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ, 264 fid = NULL;
338 &v9ses->extended); 265 P9_DPRINTK(P9_DEBUG_ERROR, "cannot attach\n");
339 266 goto error;
340 if (IS_ERR(v9ses->mux)) {
341 retval = PTR_ERR(v9ses->mux);
342 v9ses->mux = NULL;
343 dprintk(DEBUG_ERROR, "problem initializing mux\n");
344 goto SessCleanUp;
345 } 267 }
346 268
347 if (v9ses->afid == ~0) { 269 return fid;
348 if (v9ses->extended)
349 retval =
350 v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u",
351 &fcall);
352 else
353 retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000",
354 &fcall);
355
356 if (retval < 0) {
357 dprintk(DEBUG_ERROR, "v9fs_t_version failed\n");
358 goto FreeFcall;
359 }
360
361 version = &fcall->params.rversion.version;
362 if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) {
363 dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
364 v9ses->extended = 1;
365 } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) {
366 dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
367 v9ses->extended = 0;
368 } else {
369 retval = -EREMOTEIO;
370 goto FreeFcall;
371 }
372 270
373 n = fcall->params.rversion.msize; 271error:
374 kfree(fcall);
375
376 if (n < v9ses->maxdata)
377 v9ses->maxdata = n;
378 }
379
380 newfid = v9fs_get_idpool(&v9ses->fidpool);
381 if (newfid < 0) {
382 eprintk(KERN_WARNING, "couldn't allocate FID\n");
383 retval = -ENOMEM;
384 goto SessCleanUp;
385 }
386 /* it is a little bit ugly, but we have to prevent newfid */
387 /* being the same as afid, so if it is, get a new fid */
388 if (v9ses->afid != ~0 && newfid == v9ses->afid) {
389 newfid = v9fs_get_idpool(&v9ses->fidpool);
390 if (newfid < 0) {
391 eprintk(KERN_WARNING, "couldn't allocate FID\n");
392 retval = -ENOMEM;
393 goto SessCleanUp;
394 }
395 }
396
397 if ((retval =
398 v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid,
399 v9ses->afid, NULL))
400 < 0) {
401 dprintk(DEBUG_ERROR, "cannot attach\n");
402 goto SessCleanUp;
403 }
404
405 if (v9ses->afid != ~0) {
406 dprintk(DEBUG_ERROR, "afid not equal to ~0\n");
407 if (v9fs_t_clunk(v9ses, v9ses->afid))
408 dprintk(DEBUG_ERROR, "clunk failed\n");
409 }
410
411 return newfid;
412
413 FreeFcall:
414 kfree(fcall);
415
416 SessCleanUp:
417 v9fs_session_close(v9ses); 272 v9fs_session_close(v9ses);
418 return retval; 273 return ERR_PTR(retval);
419} 274}
420 275
421/** 276/**
@@ -426,15 +281,9 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
426 281
427void v9fs_session_close(struct v9fs_session_info *v9ses) 282void v9fs_session_close(struct v9fs_session_info *v9ses)
428{ 283{
429 if (v9ses->mux) { 284 if (v9ses->clnt) {
430 v9fs_mux_destroy(v9ses->mux); 285 p9_client_destroy(v9ses->clnt);
431 v9ses->mux = NULL; 286 v9ses->clnt = NULL;
432 }
433
434 if (v9ses->transport) {
435 v9ses->transport->close(v9ses->transport);
436 kfree(v9ses->transport);
437 v9ses->transport = NULL;
438 } 287 }
439 288
440 __putname(v9ses->name); 289 __putname(v9ses->name);
@@ -446,9 +295,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
446 * and cancel all pending requests. 295 * and cancel all pending requests.
447 */ 296 */
448void v9fs_session_cancel(struct v9fs_session_info *v9ses) { 297void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
449 dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses); 298 P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses);
450 v9ses->transport->status = Disconnected; 299 p9_client_disconnect(v9ses->clnt);
451 v9fs_mux_cancel(v9ses->mux, -EIO);
452} 300}
453 301
454extern int v9fs_error_init(void); 302extern int v9fs_error_init(void);
@@ -460,24 +308,9 @@ extern int v9fs_error_init(void);
460 308
461static int __init init_v9fs(void) 309static int __init init_v9fs(void)
462{ 310{
463 int ret;
464
465 v9fs_error_init();
466
467 printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); 311 printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
468 312
469 ret = v9fs_mux_global_init(); 313 return register_filesystem(&v9fs_fs_type);
470 if (ret) {
471 printk(KERN_WARNING "v9fs: starting mux failed\n");
472 return ret;
473 }
474 ret = register_filesystem(&v9fs_fs_type);
475 if (ret) {
476 printk(KERN_WARNING "v9fs: registering file system failed\n");
477 v9fs_mux_global_exit();
478 }
479
480 return ret;
481} 314}
482 315
483/** 316/**
@@ -487,13 +320,13 @@ static int __init init_v9fs(void)
487 320
488static void __exit exit_v9fs(void) 321static void __exit exit_v9fs(void)
489{ 322{
490 v9fs_mux_global_exit();
491 unregister_filesystem(&v9fs_fs_type); 323 unregister_filesystem(&v9fs_fs_type);
492} 324}
493 325
494module_init(init_v9fs) 326module_init(init_v9fs)
495module_exit(exit_v9fs) 327module_exit(exit_v9fs)
496 328
329MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>");
497MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); 330MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
498MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>"); 331MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
499MODULE_LICENSE("GPL"); 332MODULE_LICENSE("GPL");
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 820bf5ca35d8..abc4b1668ace 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -22,16 +22,6 @@
22 */ 22 */
23 23
24/* 24/*
25 * Idpool structure provides lock and id management
26 *
27 */
28
29struct v9fs_idpool {
30 struct semaphore lock;
31 struct idr pool;
32};
33
34/*
35 * Session structure provides information for an opened session 25 * Session structure provides information for an opened session
36 * 26 *
37 */ 27 */
@@ -54,15 +44,7 @@ struct v9fs_session_info {
54 unsigned int uid; /* default uid/muid for legacy support */ 44 unsigned int uid; /* default uid/muid for legacy support */
55 unsigned int gid; /* default gid for legacy support */ 45 unsigned int gid; /* default gid for legacy support */
56 46
57 /* book keeping */ 47 struct p9_client *clnt; /* 9p client */
58 struct v9fs_idpool fidpool; /* The FID pool for file descriptors */
59
60 struct v9fs_transport *transport;
61 struct v9fs_mux_data *mux;
62
63 int inprogress; /* session in progress => true */
64 int shutdown; /* session shutting down. no more attaches. */
65 unsigned char session_hung;
66 struct dentry *debugfs_dir; 48 struct dentry *debugfs_dir;
67}; 49};
68 50
@@ -71,6 +53,7 @@ enum {
71 PROTO_TCP, 53 PROTO_TCP,
72 PROTO_UNIX, 54 PROTO_UNIX,
73 PROTO_FD, 55 PROTO_FD,
56 PROTO_PCI,
74}; 57};
75 58
76/* possible values of ->cache */ 59/* possible values of ->cache */
@@ -82,12 +65,9 @@ enum {
82 65
83extern struct dentry *v9fs_debugfs_root; 66extern struct dentry *v9fs_debugfs_root;
84 67
85int v9fs_session_init(struct v9fs_session_info *, const char *, char *); 68struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
86struct v9fs_session_info *v9fs_inode2v9ses(struct inode *); 69 char *);
87void v9fs_session_close(struct v9fs_session_info *v9ses); 70void v9fs_session_close(struct v9fs_session_info *v9ses);
88int v9fs_get_idpool(struct v9fs_idpool *p);
89void v9fs_put_idpool(int id, struct v9fs_idpool *p);
90int v9fs_check_idpool(int id, struct v9fs_idpool *p);
91void v9fs_session_cancel(struct v9fs_session_info *v9ses); 71void v9fs_session_cancel(struct v9fs_session_info *v9ses);
92 72
93#define V9FS_MAGIC 0x01021997 73#define V9FS_MAGIC 0x01021997
@@ -97,3 +77,7 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses);
97#define V9FS_DEFUSER "nobody" 77#define V9FS_DEFUSER "nobody"
98#define V9FS_DEFANAME "" 78#define V9FS_DEFANAME ""
99 79
80static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
81{
82 return (inode->i_sb->s_fs_info);
83}
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 6a82d39dc498..fd01d90cada5 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -45,10 +45,10 @@ extern struct dentry_operations v9fs_dentry_operations;
45extern struct dentry_operations v9fs_cached_dentry_operations; 45extern struct dentry_operations v9fs_cached_dentry_operations;
46 46
47struct inode *v9fs_get_inode(struct super_block *sb, int mode); 47struct inode *v9fs_get_inode(struct super_block *sb, int mode);
48ino_t v9fs_qid2ino(struct v9fs_qid *qid); 48ino_t v9fs_qid2ino(struct p9_qid *qid);
49void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *); 49void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *);
50int v9fs_dir_release(struct inode *inode, struct file *filp); 50int v9fs_dir_release(struct inode *inode, struct file *filp);
51int v9fs_file_open(struct inode *inode, struct file *file); 51int v9fs_file_open(struct inode *inode, struct file *file);
52void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat); 52void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat);
53void v9fs_dentry_release(struct dentry *); 53void v9fs_dentry_release(struct dentry *);
54int v9fs_uflags2omode(int uflags); 54int v9fs_uflags2omode(int uflags);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 9ac4ffe9ac7d..6248f0e727a3 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,10 +33,10 @@
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <net/9p/9p.h>
37#include <net/9p/client.h>
36 38
37#include "debug.h"
38#include "v9fs.h" 39#include "v9fs.h"
39#include "9p.h"
40#include "v9fs_vfs.h" 40#include "v9fs_vfs.h"
41#include "fid.h" 41#include "fid.h"
42 42
@@ -50,55 +50,26 @@
50 50
51static int v9fs_vfs_readpage(struct file *filp, struct page *page) 51static int v9fs_vfs_readpage(struct file *filp, struct page *page)
52{ 52{
53 char *buffer = NULL; 53 int retval;
54 int retval = -EIO; 54 loff_t offset;
55 loff_t offset = page_offset(page); 55 char *buffer;
56 int count = PAGE_CACHE_SIZE; 56 struct p9_fid *fid;
57 struct inode *inode = filp->f_path.dentry->d_inode;
58 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
59 int rsize = v9ses->maxdata - V9FS_IOHDRSZ;
60 struct v9fs_fid *v9f = filp->private_data;
61 struct v9fs_fcall *fcall = NULL;
62 int fid = v9f->fid;
63 int total = 0;
64 int result = 0;
65
66 dprintk(DEBUG_VFS, "\n");
67 57
58 P9_DPRINTK(P9_DEBUG_VFS, "\n");
59 fid = filp->private_data;
68 buffer = kmap(page); 60 buffer = kmap(page);
69 do { 61 offset = page_offset(page);
70 if (count < rsize)
71 rsize = count;
72
73 result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall);
74
75 if (result < 0) {
76 printk(KERN_ERR "v9fs_t_read returned %d\n",
77 result);
78
79 kfree(fcall);
80 goto UnmapAndUnlock;
81 } else
82 offset += result;
83
84 memcpy(buffer, fcall->params.rread.data, result);
85
86 count -= result;
87 buffer += result;
88 total += result;
89
90 kfree(fcall);
91 62
92 if (result < rsize) 63 retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE);
93 break; 64 if (retval < 0)
94 } while (count); 65 goto done;
95 66
96 memset(buffer, 0, count); 67 memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
97 flush_dcache_page(page); 68 flush_dcache_page(page);
98 SetPageUptodate(page); 69 SetPageUptodate(page);
99 retval = 0; 70 retval = 0;
100 71
101UnmapAndUnlock: 72done:
102 kunmap(page); 73 kunmap(page);
103 unlock_page(page); 74 unlock_page(page);
104 return retval; 75 return retval;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index d93960429c09..f9534f18df0a 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -34,10 +34,10 @@
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/idr.h> 35#include <linux/idr.h>
36#include <linux/sched.h> 36#include <linux/sched.h>
37#include <net/9p/9p.h>
38#include <net/9p/client.h>
37 39
38#include "debug.h"
39#include "v9fs.h" 40#include "v9fs.h"
40#include "9p.h"
41#include "v9fs_vfs.h" 41#include "v9fs_vfs.h"
42#include "fid.h" 42#include "fid.h"
43 43
@@ -52,7 +52,7 @@
52 52
53static int v9fs_dentry_delete(struct dentry *dentry) 53static int v9fs_dentry_delete(struct dentry *dentry)
54{ 54{
55 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 55 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
56 56
57 return 1; 57 return 1;
58} 58}
@@ -69,7 +69,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
69static int v9fs_cached_dentry_delete(struct dentry *dentry) 69static int v9fs_cached_dentry_delete(struct dentry *dentry)
70{ 70{
71 struct inode *inode = dentry->d_inode; 71 struct inode *inode = dentry->d_inode;
72 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 72 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
73 73
74 if(!inode) 74 if(!inode)
75 return 1; 75 return 1;
@@ -85,26 +85,19 @@ static int v9fs_cached_dentry_delete(struct dentry *dentry)
85 85
86void v9fs_dentry_release(struct dentry *dentry) 86void v9fs_dentry_release(struct dentry *dentry)
87{ 87{
88 int err; 88 struct v9fs_dentry *dent;
89 89 struct p9_fid *temp, *current_fid;
90 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 90
91 91 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
92 if (dentry->d_fsdata != NULL) { 92 dent = dentry->d_fsdata;
93 struct list_head *fid_list = dentry->d_fsdata; 93 if (dent) {
94 struct v9fs_fid *temp = NULL; 94 list_for_each_entry_safe(current_fid, temp, &dent->fidlist,
95 struct v9fs_fid *current_fid = NULL; 95 dlist) {
96 96 p9_client_clunk(current_fid);
97 list_for_each_entry_safe(current_fid, temp, fid_list, list) {
98 err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid);
99
100 if (err < 0)
101 dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n",
102 err, dentry->d_iname);
103
104 v9fs_fid_destroy(current_fid);
105 } 97 }
106 98
107 kfree(dentry->d_fsdata); /* free the list_head */ 99 kfree(dent);
100 dentry->d_fsdata = NULL;
108 } 101 }
109} 102}
110 103
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 1dd86ee90bc5..0924d4477da3 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -32,11 +32,10 @@
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <net/9p/9p.h>
36#include <net/9p/client.h>
35 37
36#include "debug.h"
37#include "v9fs.h" 38#include "v9fs.h"
38#include "9p.h"
39#include "conv.h"
40#include "v9fs_vfs.h" 39#include "v9fs_vfs.h"
41#include "fid.h" 40#include "fid.h"
42 41
@@ -46,14 +45,14 @@
46 * 45 *
47 */ 46 */
48 47
49static inline int dt_type(struct v9fs_stat *mistat) 48static inline int dt_type(struct p9_stat *mistat)
50{ 49{
51 unsigned long perm = mistat->mode; 50 unsigned long perm = mistat->mode;
52 int rettype = DT_REG; 51 int rettype = DT_REG;
53 52
54 if (perm & V9FS_DMDIR) 53 if (perm & P9_DMDIR)
55 rettype = DT_DIR; 54 rettype = DT_DIR;
56 if (perm & V9FS_DMSYMLINK) 55 if (perm & P9_DMSYMLINK)
57 rettype = DT_LNK; 56 rettype = DT_LNK;
58 57
59 return rettype; 58 return rettype;
@@ -69,106 +68,36 @@ static inline int dt_type(struct v9fs_stat *mistat)
69 68
70static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) 69static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
71{ 70{
72 struct v9fs_fcall *fcall = NULL; 71 int over;
73 struct inode *inode = filp->f_path.dentry->d_inode; 72 struct p9_fid *fid;
74 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); 73 struct v9fs_session_info *v9ses;
75 struct v9fs_fid *file = filp->private_data; 74 struct inode *inode;
76 unsigned int i, n, s; 75 struct p9_stat *st;
77 int fid = -1; 76
78 int ret = 0; 77 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
79 struct v9fs_stat stat; 78 inode = filp->f_path.dentry->d_inode;
80 int over = 0; 79 v9ses = v9fs_inode2v9ses(inode);
81 80 fid = filp->private_data;
82 dprintk(DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); 81 while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) {
83 82 if (IS_ERR(st))
84 fid = file->fid; 83 return PTR_ERR(st);
85 84
86 if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) { 85 over = filldir(dirent, st->name.str, st->name.len, filp->f_pos,
87 kfree(file->rdir_fcall); 86 v9fs_qid2ino(&st->qid), dt_type(st));
88 file->rdir_fcall = NULL; 87
89 } 88 if (over)
90
91 if (file->rdir_fcall) {
92 n = file->rdir_fcall->params.rread.count;
93 i = file->rdir_fpos;
94 while (i < n) {
95 s = v9fs_deserialize_stat(
96 file->rdir_fcall->params.rread.data + i,
97 n - i, &stat, v9ses->extended);
98
99 if (s == 0) {
100 dprintk(DEBUG_ERROR,
101 "error while deserializing stat\n");
102 ret = -EIO;
103 goto FreeStructs;
104 }
105
106 over = filldir(dirent, stat.name.str, stat.name.len,
107 filp->f_pos, v9fs_qid2ino(&stat.qid),
108 dt_type(&stat));
109
110 if (over) {
111 file->rdir_fpos = i;
112 file->rdir_pos = filp->f_pos;
113 break;
114 }
115
116 i += s;
117 filp->f_pos += s;
118 }
119
120 if (!over) {
121 kfree(file->rdir_fcall);
122 file->rdir_fcall = NULL;
123 }
124 }
125
126 while (!over) {
127 ret = v9fs_t_read(v9ses, fid, filp->f_pos,
128 v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
129 if (ret < 0) {
130 dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
131 ret, fcall);
132 goto FreeStructs;
133 } else if (ret == 0)
134 break; 89 break;
135 90
136 n = ret; 91 filp->f_pos += st->size;
137 i = 0; 92 kfree(st);
138 while (i < n) { 93 st = NULL;
139 s = v9fs_deserialize_stat(fcall->params.rread.data + i,
140 n - i, &stat, v9ses->extended);
141
142 if (s == 0) {
143 dprintk(DEBUG_ERROR,
144 "error while deserializing stat\n");
145 return -EIO;
146 }
147
148 over = filldir(dirent, stat.name.str, stat.name.len,
149 filp->f_pos, v9fs_qid2ino(&stat.qid),
150 dt_type(&stat));
151
152 if (over) {
153 file->rdir_fcall = fcall;
154 file->rdir_fpos = i;
155 file->rdir_pos = filp->f_pos;
156 fcall = NULL;
157 break;
158 }
159
160 i += s;
161 filp->f_pos += s;
162 }
163
164 kfree(fcall);
165 } 94 }
166 95
167 FreeStructs: 96 kfree(st);
168 kfree(fcall); 97 return 0;
169 return ret;
170} 98}
171 99
100
172/** 101/**
173 * v9fs_dir_release - close a directory 102 * v9fs_dir_release - close a directory
174 * @inode: inode of the directory 103 * @inode: inode of the directory
@@ -178,29 +107,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
178 107
179int v9fs_dir_release(struct inode *inode, struct file *filp) 108int v9fs_dir_release(struct inode *inode, struct file *filp)
180{ 109{
181 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); 110 struct p9_fid *fid;
182 struct v9fs_fid *fid = filp->private_data;
183 int fidnum = -1;
184
185 dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp,
186 fid->fid);
187 fidnum = fid->fid;
188 111
112 fid = filp->private_data;
113 P9_DPRINTK(P9_DEBUG_VFS,
114 "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid);
189 filemap_write_and_wait(inode->i_mapping); 115 filemap_write_and_wait(inode->i_mapping);
190 116 p9_client_clunk(fid);
191 if (fidnum >= 0) {
192 dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
193 fid->fid);
194
195 if (v9fs_t_clunk(v9ses, fidnum))
196 dprintk(DEBUG_ERROR, "clunk failed\n");
197
198 kfree(fid->rdir_fcall);
199 kfree(fid);
200
201 filp->private_data = NULL;
202 }
203
204 return 0; 117 return 0;
205} 118}
206 119
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 6e7678e4852f..2a40c2946d0a 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -34,10 +34,10 @@
34#include <linux/list.h> 34#include <linux/list.h>
35#include <asm/uaccess.h> 35#include <asm/uaccess.h>
36#include <linux/idr.h> 36#include <linux/idr.h>
37#include <net/9p/9p.h>
38#include <net/9p/client.h>
37 39
38#include "debug.h"
39#include "v9fs.h" 40#include "v9fs.h"
40#include "9p.h"
41#include "v9fs_vfs.h" 41#include "v9fs_vfs.h"
42#include "fid.h" 42#include "fid.h"
43 43
@@ -52,48 +52,40 @@ static const struct file_operations v9fs_cached_file_operations;
52 52
53int v9fs_file_open(struct inode *inode, struct file *file) 53int v9fs_file_open(struct inode *inode, struct file *file)
54{ 54{
55 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
56 struct v9fs_fid *vfid;
57 struct v9fs_fcall *fcall = NULL;
58 int omode;
59 int err; 55 int err;
56 struct v9fs_session_info *v9ses;
57 struct p9_fid *fid;
58 int omode;
60 59
61 dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file); 60 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
62 61 v9ses = v9fs_inode2v9ses(inode);
63 vfid = v9fs_fid_clone(file->f_path.dentry);
64 if (IS_ERR(vfid))
65 return PTR_ERR(vfid);
66
67 omode = v9fs_uflags2omode(file->f_flags); 62 omode = v9fs_uflags2omode(file->f_flags);
68 err = v9fs_t_open(v9ses, vfid->fid, omode, &fcall); 63 fid = file->private_data;
69 if (err < 0) { 64 if (!fid) {
70 PRINT_FCALL_ERROR("open failed", fcall); 65 fid = v9fs_fid_clone(file->f_path.dentry);
71 goto Clunk_Fid; 66 if (IS_ERR(fid))
67 return PTR_ERR(fid);
68
69 err = p9_client_open(fid, omode);
70 if (err < 0) {
71 p9_client_clunk(fid);
72 return err;
73 }
74 if (omode & P9_OTRUNC) {
75 inode->i_size = 0;
76 inode->i_blocks = 0;
77 }
72 } 78 }
73 79
74 file->private_data = vfid; 80 file->private_data = fid;
75 vfid->fidopen = 1; 81 if ((fid->qid.version) && (v9ses->cache)) {
76 vfid->fidclunked = 0; 82 P9_DPRINTK(P9_DEBUG_VFS, "cached");
77 vfid->iounit = fcall->params.ropen.iounit;
78 vfid->rdir_pos = 0;
79 vfid->rdir_fcall = NULL;
80 vfid->filp = file;
81 kfree(fcall);
82
83 if((vfid->qid.version) && (v9ses->cache)) {
84 dprintk(DEBUG_VFS, "cached");
85 /* enable cached file options */ 83 /* enable cached file options */
86 if(file->f_op == &v9fs_file_operations) 84 if(file->f_op == &v9fs_file_operations)
87 file->f_op = &v9fs_cached_file_operations; 85 file->f_op = &v9fs_cached_file_operations;
88 } 86 }
89 87
90 return 0; 88 return 0;
91
92Clunk_Fid:
93 v9fs_fid_clunk(v9ses, vfid);
94 kfree(fcall);
95
96 return err;
97} 89}
98 90
99/** 91/**
@@ -110,7 +102,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
110 int res = 0; 102 int res = 0;
111 struct inode *inode = filp->f_path.dentry->d_inode; 103 struct inode *inode = filp->f_path.dentry->d_inode;
112 104
113 dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); 105 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
114 106
115 /* No mandatory locks */ 107 /* No mandatory locks */
116 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) 108 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
@@ -136,55 +128,16 @@ static ssize_t
136v9fs_file_read(struct file *filp, char __user * data, size_t count, 128v9fs_file_read(struct file *filp, char __user * data, size_t count,
137 loff_t * offset) 129 loff_t * offset)
138{ 130{
139 struct inode *inode = filp->f_path.dentry->d_inode; 131 int ret;
140 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); 132 struct p9_fid *fid;
141 struct v9fs_fid *v9f = filp->private_data;
142 struct v9fs_fcall *fcall = NULL;
143 int fid = v9f->fid;
144 int rsize = 0;
145 int result = 0;
146 int total = 0;
147 int n;
148
149 dprintk(DEBUG_VFS, "\n");
150
151 rsize = v9ses->maxdata - V9FS_IOHDRSZ;
152 if (v9f->iounit != 0 && rsize > v9f->iounit)
153 rsize = v9f->iounit;
154
155 do {
156 if (count < rsize)
157 rsize = count;
158 133
159 result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall); 134 P9_DPRINTK(P9_DEBUG_VFS, "\n");
135 fid = filp->private_data;
136 ret = p9_client_uread(fid, data, *offset, count);
137 if (ret > 0)
138 *offset += ret;
160 139
161 if (result < 0) { 140 return ret;
162 printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n",
163 result);
164
165 kfree(fcall);
166 return total;
167 } else
168 *offset += result;
169
170 n = copy_to_user(data, fcall->params.rread.data, result);
171 if (n) {
172 dprintk(DEBUG_ERROR, "Problem copying to user %d\n", n);
173 kfree(fcall);
174 return -EFAULT;
175 }
176
177 count -= result;
178 data += result;
179 total += result;
180
181 kfree(fcall);
182
183 if (result < rsize)
184 break;
185 } while (count);
186
187 return total;
188} 141}
189 142
190/** 143/**
@@ -200,50 +153,25 @@ static ssize_t
200v9fs_file_write(struct file *filp, const char __user * data, 153v9fs_file_write(struct file *filp, const char __user * data,
201 size_t count, loff_t * offset) 154 size_t count, loff_t * offset)
202{ 155{
156 int ret;
157 struct p9_fid *fid;
203 struct inode *inode = filp->f_path.dentry->d_inode; 158 struct inode *inode = filp->f_path.dentry->d_inode;
204 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
205 struct v9fs_fid *v9fid = filp->private_data;
206 struct v9fs_fcall *fcall;
207 int fid = v9fid->fid;
208 int result = -EIO;
209 int rsize = 0;
210 int total = 0;
211
212 dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count,
213 (int)*offset);
214 rsize = v9ses->maxdata - V9FS_IOHDRSZ;
215 if (v9fid->iounit != 0 && rsize > v9fid->iounit)
216 rsize = v9fid->iounit;
217
218 do {
219 if (count < rsize)
220 rsize = count;
221 159
222 result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall); 160 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
223 if (result < 0) { 161 (int)count, (int)*offset);
224 PRINT_FCALL_ERROR("error while writing", fcall);
225 kfree(fcall);
226 return result;
227 } else
228 *offset += result;
229 162
230 kfree(fcall); 163 fid = filp->private_data;
231 fcall = NULL; 164 ret = p9_client_uwrite(fid, data, *offset, count);
165 if (ret > 0)
166 *offset += ret;
232 167
233 if (result != rsize) { 168 if (*offset > inode->i_size) {
234 eprintk(KERN_ERR, 169 inode->i_size = *offset;
235 "short write: v9fs_t_write returned %d\n", 170 inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
236 result); 171 }
237 break;
238 }
239
240 count -= result;
241 data += result;
242 total += result;
243 } while (count);
244 172
245 invalidate_inode_pages2(inode->i_mapping); 173 invalidate_inode_pages2(inode->i_mapping);
246 return total; 174 return ret;
247} 175}
248 176
249static const struct file_operations v9fs_cached_file_operations = { 177static const struct file_operations v9fs_cached_file_operations = {
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c76cd8fa3f6c..e5c45eed58a9 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -34,10 +34,10 @@
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/idr.h> 35#include <linux/idr.h>
36#include <linux/sched.h> 36#include <linux/sched.h>
37#include <net/9p/9p.h>
38#include <net/9p/client.h>
37 39
38#include "debug.h"
39#include "v9fs.h" 40#include "v9fs.h"
40#include "9p.h"
41#include "v9fs_vfs.h" 41#include "v9fs_vfs.h"
42#include "fid.h" 42#include "fid.h"
43 43
@@ -58,27 +58,27 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
58 int res; 58 int res;
59 res = mode & 0777; 59 res = mode & 0777;
60 if (S_ISDIR(mode)) 60 if (S_ISDIR(mode))
61 res |= V9FS_DMDIR; 61 res |= P9_DMDIR;
62 if (v9ses->extended) { 62 if (v9ses->extended) {
63 if (S_ISLNK(mode)) 63 if (S_ISLNK(mode))
64 res |= V9FS_DMSYMLINK; 64 res |= P9_DMSYMLINK;
65 if (v9ses->nodev == 0) { 65 if (v9ses->nodev == 0) {
66 if (S_ISSOCK(mode)) 66 if (S_ISSOCK(mode))
67 res |= V9FS_DMSOCKET; 67 res |= P9_DMSOCKET;
68 if (S_ISFIFO(mode)) 68 if (S_ISFIFO(mode))
69 res |= V9FS_DMNAMEDPIPE; 69 res |= P9_DMNAMEDPIPE;
70 if (S_ISBLK(mode)) 70 if (S_ISBLK(mode))
71 res |= V9FS_DMDEVICE; 71 res |= P9_DMDEVICE;
72 if (S_ISCHR(mode)) 72 if (S_ISCHR(mode))
73 res |= V9FS_DMDEVICE; 73 res |= P9_DMDEVICE;
74 } 74 }
75 75
76 if ((mode & S_ISUID) == S_ISUID) 76 if ((mode & S_ISUID) == S_ISUID)
77 res |= V9FS_DMSETUID; 77 res |= P9_DMSETUID;
78 if ((mode & S_ISGID) == S_ISGID) 78 if ((mode & S_ISGID) == S_ISGID)
79 res |= V9FS_DMSETGID; 79 res |= P9_DMSETGID;
80 if ((mode & V9FS_DMLINK)) 80 if ((mode & P9_DMLINK))
81 res |= V9FS_DMLINK; 81 res |= P9_DMLINK;
82 } 82 }
83 83
84 return res; 84 return res;
@@ -97,27 +97,27 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
97 97
98 res = mode & 0777; 98 res = mode & 0777;
99 99
100 if ((mode & V9FS_DMDIR) == V9FS_DMDIR) 100 if ((mode & P9_DMDIR) == P9_DMDIR)
101 res |= S_IFDIR; 101 res |= S_IFDIR;
102 else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended)) 102 else if ((mode & P9_DMSYMLINK) && (v9ses->extended))
103 res |= S_IFLNK; 103 res |= S_IFLNK;
104 else if ((mode & V9FS_DMSOCKET) && (v9ses->extended) 104 else if ((mode & P9_DMSOCKET) && (v9ses->extended)
105 && (v9ses->nodev == 0)) 105 && (v9ses->nodev == 0))
106 res |= S_IFSOCK; 106 res |= S_IFSOCK;
107 else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended) 107 else if ((mode & P9_DMNAMEDPIPE) && (v9ses->extended)
108 && (v9ses->nodev == 0)) 108 && (v9ses->nodev == 0))
109 res |= S_IFIFO; 109 res |= S_IFIFO;
110 else if ((mode & V9FS_DMDEVICE) && (v9ses->extended) 110 else if ((mode & P9_DMDEVICE) && (v9ses->extended)
111 && (v9ses->nodev == 0)) 111 && (v9ses->nodev == 0))
112 res |= S_IFBLK; 112 res |= S_IFBLK;
113 else 113 else
114 res |= S_IFREG; 114 res |= S_IFREG;
115 115
116 if (v9ses->extended) { 116 if (v9ses->extended) {
117 if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID) 117 if ((mode & P9_DMSETUID) == P9_DMSETUID)
118 res |= S_ISUID; 118 res |= S_ISUID;
119 119
120 if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID) 120 if ((mode & P9_DMSETGID) == P9_DMSETGID)
121 res |= S_ISGID; 121 res |= S_ISGID;
122 } 122 }
123 123
@@ -132,26 +132,26 @@ int v9fs_uflags2omode(int uflags)
132 switch (uflags&3) { 132 switch (uflags&3) {
133 default: 133 default:
134 case O_RDONLY: 134 case O_RDONLY:
135 ret = V9FS_OREAD; 135 ret = P9_OREAD;
136 break; 136 break;
137 137
138 case O_WRONLY: 138 case O_WRONLY:
139 ret = V9FS_OWRITE; 139 ret = P9_OWRITE;
140 break; 140 break;
141 141
142 case O_RDWR: 142 case O_RDWR:
143 ret = V9FS_ORDWR; 143 ret = P9_ORDWR;
144 break; 144 break;
145 } 145 }
146 146
147 if (uflags & O_EXCL) 147 if (uflags & O_EXCL)
148 ret |= V9FS_OEXCL; 148 ret |= P9_OEXCL;
149 149
150 if (uflags & O_TRUNC) 150 if (uflags & O_TRUNC)
151 ret |= V9FS_OTRUNC; 151 ret |= P9_OTRUNC;
152 152
153 if (uflags & O_APPEND) 153 if (uflags & O_APPEND)
154 ret |= V9FS_OAPPEND; 154 ret |= P9_OAPPEND;
155 155
156 return ret; 156 return ret;
157} 157}
@@ -164,7 +164,7 @@ int v9fs_uflags2omode(int uflags)
164 */ 164 */
165 165
166static void 166static void
167v9fs_blank_wstat(struct v9fs_wstat *wstat) 167v9fs_blank_wstat(struct p9_wstat *wstat)
168{ 168{
169 wstat->type = ~0; 169 wstat->type = ~0;
170 wstat->dev = ~0; 170 wstat->dev = ~0;
@@ -197,7 +197,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
197 struct inode *inode; 197 struct inode *inode;
198 struct v9fs_session_info *v9ses = sb->s_fs_info; 198 struct v9fs_session_info *v9ses = sb->s_fs_info;
199 199
200 dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); 200 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
201 201
202 inode = new_inode(sb); 202 inode = new_inode(sb);
203 if (inode) { 203 if (inode) {
@@ -215,7 +215,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
215 case S_IFCHR: 215 case S_IFCHR:
216 case S_IFSOCK: 216 case S_IFSOCK:
217 if(!v9ses->extended) { 217 if(!v9ses->extended) {
218 dprintk(DEBUG_ERROR, "special files without extended mode\n"); 218 P9_DPRINTK(P9_DEBUG_ERROR,
219 "special files without extended mode\n");
219 return ERR_PTR(-EINVAL); 220 return ERR_PTR(-EINVAL);
220 } 221 }
221 init_special_inode(inode, inode->i_mode, 222 init_special_inode(inode, inode->i_mode,
@@ -227,7 +228,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
227 break; 228 break;
228 case S_IFLNK: 229 case S_IFLNK:
229 if(!v9ses->extended) { 230 if(!v9ses->extended) {
230 dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n"); 231 P9_DPRINTK(P9_DEBUG_ERROR,
232 "extended modes used w/o 9P2000.u\n");
231 return ERR_PTR(-EINVAL); 233 return ERR_PTR(-EINVAL);
232 } 234 }
233 inode->i_op = &v9fs_symlink_inode_operations; 235 inode->i_op = &v9fs_symlink_inode_operations;
@@ -241,71 +243,19 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
241 inode->i_fop = &v9fs_dir_operations; 243 inode->i_fop = &v9fs_dir_operations;
242 break; 244 break;
243 default: 245 default:
244 dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", 246 P9_DPRINTK(P9_DEBUG_ERROR,
247 "BAD mode 0x%x S_IFMT 0x%x\n",
245 mode, mode & S_IFMT); 248 mode, mode & S_IFMT);
246 return ERR_PTR(-EINVAL); 249 return ERR_PTR(-EINVAL);
247 } 250 }
248 } else { 251 } else {
249 eprintk(KERN_WARNING, "Problem allocating inode\n"); 252 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
250 return ERR_PTR(-ENOMEM); 253 return ERR_PTR(-ENOMEM);
251 } 254 }
252 return inode; 255 return inode;
253} 256}
254 257
255static int 258/*
256v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
257 u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
258{
259 int fid;
260 int err;
261 struct v9fs_fcall *fcall;
262
263 fid = v9fs_get_idpool(&v9ses->fidpool);
264 if (fid < 0) {
265 eprintk(KERN_WARNING, "no free fids available\n");
266 return -ENOSPC;
267 }
268
269 err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall);
270 if (err < 0) {
271 PRINT_FCALL_ERROR("clone error", fcall);
272 if (fcall && fcall->id == RWALK)
273 goto clunk_fid;
274 else
275 goto put_fid;
276 }
277 kfree(fcall);
278
279 err = v9fs_t_create(v9ses, fid, name, perm, mode, extension, &fcall);
280 if (err < 0) {
281 PRINT_FCALL_ERROR("create fails", fcall);
282 goto clunk_fid;
283 }
284
285 if (iounit)
286 *iounit = fcall->params.rcreate.iounit;
287
288 if (qid)
289 *qid = fcall->params.rcreate.qid;
290
291 if (fidp)
292 *fidp = fid;
293
294 kfree(fcall);
295 return 0;
296
297clunk_fid:
298 v9fs_t_clunk(v9ses, fid);
299 fid = V9FS_NOFID;
300
301put_fid:
302 if (fid != V9FS_NOFID)
303 v9fs_put_idpool(fid, &v9ses->fidpool);
304
305 kfree(fcall);
306 return err;
307}
308
309static struct v9fs_fid* 259static struct v9fs_fid*
310v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry) 260v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry)
311{ 261{
@@ -355,23 +305,25 @@ error:
355 kfree(fcall); 305 kfree(fcall);
356 return ERR_PTR(err); 306 return ERR_PTR(err);
357} 307}
308*/
358 309
359static struct inode * 310static struct inode *
360v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid, 311v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
361 struct super_block *sb) 312 struct super_block *sb)
362{ 313{
363 int err, umode; 314 int err, umode;
364 struct inode *ret; 315 struct inode *ret;
365 struct v9fs_fcall *fcall; 316 struct p9_stat *st;
366 317
367 ret = NULL; 318 ret = NULL;
368 err = v9fs_t_stat(v9ses, fid, &fcall); 319 st = p9_client_stat(fid);
369 if (err) { 320 if (IS_ERR(st)) {
370 PRINT_FCALL_ERROR("stat error", fcall); 321 err = PTR_ERR(st);
322 st = NULL;
371 goto error; 323 goto error;
372 } 324 }
373 325
374 umode = p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode); 326 umode = p9mode2unixmode(v9ses, st->mode);
375 ret = v9fs_get_inode(sb, umode); 327 ret = v9fs_get_inode(sb, umode);
376 if (IS_ERR(ret)) { 328 if (IS_ERR(ret)) {
377 err = PTR_ERR(ret); 329 err = PTR_ERR(ret);
@@ -379,12 +331,13 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid,
379 goto error; 331 goto error;
380 } 332 }
381 333
382 v9fs_stat2inode(&fcall->params.rstat.stat, ret, sb); 334 v9fs_stat2inode(st, ret, sb);
383 kfree(fcall); 335 ret->i_ino = v9fs_qid2ino(&st->qid);
336 kfree(st);
384 return ret; 337 return ret;
385 338
386error: 339error:
387 kfree(fcall); 340 kfree(st);
388 if (ret) 341 if (ret)
389 iput(ret); 342 iput(ret);
390 343
@@ -401,43 +354,20 @@ error:
401 354
402static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) 355static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
403{ 356{
404 struct v9fs_fcall *fcall = NULL; 357 struct inode *file_inode;
405 struct super_block *sb = NULL; 358 struct v9fs_session_info *v9ses;
406 struct v9fs_session_info *v9ses = NULL; 359 struct p9_fid *v9fid;
407 struct v9fs_fid *v9fid = NULL;
408 struct inode *file_inode = NULL;
409 int fid = -1;
410 int result = 0;
411 360
412 dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, 361 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
413 rmdir); 362 rmdir);
414 363
415 file_inode = file->d_inode; 364 file_inode = file->d_inode;
416 sb = file_inode->i_sb;
417 v9ses = v9fs_inode2v9ses(file_inode); 365 v9ses = v9fs_inode2v9ses(file_inode);
418 v9fid = v9fs_fid_clone(file); 366 v9fid = v9fs_fid_clone(file);
419 if(IS_ERR(v9fid)) 367 if(IS_ERR(v9fid))
420 return PTR_ERR(v9fid); 368 return PTR_ERR(v9fid);
421 369
422 fid = v9fid->fid; 370 return p9_client_remove(v9fid);
423 if (fid < 0) {
424 dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n",
425 file_inode->i_ino);
426 return -EBADF;
427 }
428
429 result = v9fs_t_remove(v9ses, fid, &fcall);
430 if (result < 0) {
431 PRINT_FCALL_ERROR("remove fails", fcall);
432 goto Error;
433 }
434
435 v9fs_put_idpool(fid, &v9ses->fidpool);
436 v9fs_fid_destroy(v9fid);
437
438Error:
439 kfree(fcall);
440 return result;
441} 371}
442 372
443static int 373static int
@@ -446,61 +376,59 @@ v9fs_open_created(struct inode *inode, struct file *file)
446 return 0; 376 return 0;
447} 377}
448 378
379
449/** 380/**
450 * v9fs_vfs_create - VFS hook to create files 381 * v9fs_create - Create a file
451 * @inode: directory inode that is being deleted 382 * @dentry: dentry that is being created
452 * @dentry: dentry that is being deleted 383 * @perm: create permissions
453 * @mode: create permissions 384 * @mode: open mode
454 * @nd: path information
455 * 385 *
456 */ 386 */
457 387static struct p9_fid *
458static int 388v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
459v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, 389 struct dentry *dentry, char *extension, u32 perm, u8 mode)
460 struct nameidata *nd)
461{ 390{
462 int err; 391 int err;
463 u32 fid, perm, iounit; 392 char *name;
464 int flags; 393 struct p9_fid *dfid, *ofid, *fid;
465 struct v9fs_session_info *v9ses;
466 struct v9fs_fid *dfid, *vfid, *ffid;
467 struct inode *inode; 394 struct inode *inode;
468 struct v9fs_qid qid;
469 struct file *filp;
470 395
471 inode = NULL; 396 err = 0;
472 vfid = NULL; 397 ofid = NULL;
473 v9ses = v9fs_inode2v9ses(dir); 398 fid = NULL;
399 name = (char *) dentry->d_name.name;
474 dfid = v9fs_fid_clone(dentry->d_parent); 400 dfid = v9fs_fid_clone(dentry->d_parent);
475 if(IS_ERR(dfid)) { 401 if(IS_ERR(dfid)) {
476 err = PTR_ERR(dfid); 402 err = PTR_ERR(dfid);
403 dfid = NULL;
477 goto error; 404 goto error;
478 } 405 }
479 406
480 perm = unixmode2p9mode(v9ses, mode); 407 /* clone a fid to use for creation */
481 if (nd && nd->flags & LOOKUP_OPEN) 408 ofid = p9_client_walk(dfid, 0, NULL, 1);
482 flags = nd->intent.open.flags - 1; 409 if (IS_ERR(ofid)) {
483 else 410 err = PTR_ERR(ofid);
484 flags = O_RDWR; 411 ofid = NULL;
485 412 goto error;
486 err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, 413 }
487 perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit);
488 414
489 if (err) 415 err = p9_client_fcreate(ofid, name, perm, mode, extension);
490 goto clunk_dfid; 416 if (err < 0)
417 goto error;
491 418
492 vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); 419 /* now walk from the parent so we can get unopened fid */
493 v9fs_fid_clunk(v9ses, dfid); 420 fid = p9_client_walk(dfid, 1, &name, 0);
494 if (IS_ERR(vfid)) { 421 if (IS_ERR(fid)) {
495 err = PTR_ERR(vfid); 422 err = PTR_ERR(fid);
496 vfid = NULL; 423 fid = NULL;
497 goto error; 424 goto error;
498 } 425 } else
426 dfid = NULL;
499 427
500 inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb); 428 /* instantiate inode and assign the unopened fid to the dentry */
429 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
501 if (IS_ERR(inode)) { 430 if (IS_ERR(inode)) {
502 err = PTR_ERR(inode); 431 err = PTR_ERR(inode);
503 inode = NULL;
504 goto error; 432 goto error;
505 } 433 }
506 434
@@ -508,35 +436,78 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
508 dentry->d_op = &v9fs_cached_dentry_operations; 436 dentry->d_op = &v9fs_cached_dentry_operations;
509 else 437 else
510 dentry->d_op = &v9fs_dentry_operations; 438 dentry->d_op = &v9fs_dentry_operations;
439
511 d_instantiate(dentry, inode); 440 d_instantiate(dentry, inode);
441 v9fs_fid_add(dentry, fid);
442 return ofid;
512 443
513 if (nd && nd->flags & LOOKUP_OPEN) { 444error:
514 ffid = v9fs_fid_create(v9ses, fid); 445 if (dfid)
515 if (!ffid) 446 p9_client_clunk(dfid);
516 return -ENOMEM; 447
448 if (ofid)
449 p9_client_clunk(ofid);
450
451 if (fid)
452 p9_client_clunk(fid);
453
454 return ERR_PTR(err);
455}
456
457/**
458 * v9fs_vfs_create - VFS hook to create files
459 * @inode: directory inode that is being created
460 * @dentry: dentry that is being deleted
461 * @mode: create permissions
462 * @nd: path information
463 *
464 */
517 465
466static int
467v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
468 struct nameidata *nd)
469{
470 int err;
471 u32 perm;
472 int flags;
473 struct v9fs_session_info *v9ses;
474 struct p9_fid *fid;
475 struct file *filp;
476
477 err = 0;
478 fid = NULL;
479 v9ses = v9fs_inode2v9ses(dir);
480 perm = unixmode2p9mode(v9ses, mode);
481 if (nd && nd->flags & LOOKUP_OPEN)
482 flags = nd->intent.open.flags - 1;
483 else
484 flags = O_RDWR;
485
486 fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
487 v9fs_uflags2omode(flags));
488 if (IS_ERR(fid)) {
489 err = PTR_ERR(fid);
490 fid = NULL;
491 goto error;
492 }
493
494 /* if we are opening a file, assign the open fid to the file */
495 if (nd && nd->flags & LOOKUP_OPEN) {
518 filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created); 496 filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
519 if (IS_ERR(filp)) { 497 if (IS_ERR(filp)) {
520 v9fs_fid_destroy(ffid); 498 err = PTR_ERR(filp);
521 return PTR_ERR(filp); 499 goto error;
522 } 500 }
523 501
524 ffid->rdir_pos = 0; 502 filp->private_data = fid;
525 ffid->rdir_fcall = NULL; 503 } else
526 ffid->fidopen = 1; 504 p9_client_clunk(fid);
527 ffid->iounit = iounit;
528 ffid->filp = filp;
529 filp->private_data = ffid;
530 }
531 505
532 return 0; 506 return 0;
533 507
534clunk_dfid:
535 v9fs_fid_clunk(v9ses, dfid);
536
537error: 508error:
538 if (vfid) 509 if (fid)
539 v9fs_fid_destroy(vfid); 510 p9_client_clunk(fid);
540 511
541 return err; 512 return err;
542} 513}
@@ -552,57 +523,23 @@ error:
552static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 523static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
553{ 524{
554 int err; 525 int err;
555 u32 fid, perm; 526 u32 perm;
556 struct v9fs_session_info *v9ses; 527 struct v9fs_session_info *v9ses;
557 struct v9fs_fid *dfid, *vfid; 528 struct p9_fid *fid;
558 struct inode *inode;
559 529
560 inode = NULL; 530 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
561 vfid = NULL; 531 err = 0;
562 v9ses = v9fs_inode2v9ses(dir); 532 v9ses = v9fs_inode2v9ses(dir);
563 dfid = v9fs_fid_clone(dentry->d_parent);
564 if(IS_ERR(dfid)) {
565 err = PTR_ERR(dfid);
566 goto error;
567 }
568
569 perm = unixmode2p9mode(v9ses, mode | S_IFDIR); 533 perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
570 534 fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_OREAD);
571 err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, 535 if (IS_ERR(fid)) {
572 perm, V9FS_OREAD, NULL, &fid, NULL, NULL); 536 err = PTR_ERR(fid);
573 537 fid = NULL;
574 if (err) {
575 dprintk(DEBUG_ERROR, "create error %d\n", err);
576 goto clean_up_dfid;
577 } 538 }
578 539
579 vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); 540 if (fid)
580 if (IS_ERR(vfid)) { 541 p9_client_clunk(fid);
581 err = PTR_ERR(vfid);
582 vfid = NULL;
583 goto clean_up_dfid;
584 }
585 542
586 v9fs_fid_clunk(v9ses, dfid);
587 inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
588 if (IS_ERR(inode)) {
589 err = PTR_ERR(inode);
590 inode = NULL;
591 v9fs_fid_destroy(vfid);
592 goto error;
593 }
594
595 if(v9ses->cache)
596 dentry->d_op = &v9fs_cached_dentry_operations;
597 else
598 dentry->d_op = &v9fs_dentry_operations;
599 d_instantiate(dentry, inode);
600 return 0;
601
602clean_up_dfid:
603 v9fs_fid_clunk(v9ses, dfid);
604
605error:
606 return err; 543 return err;
607} 544}
608 545
@@ -619,104 +556,54 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
619{ 556{
620 struct super_block *sb; 557 struct super_block *sb;
621 struct v9fs_session_info *v9ses; 558 struct v9fs_session_info *v9ses;
622 struct v9fs_fid *dirfid; 559 struct p9_fid *dfid, *fid;
623 struct v9fs_fid *fid;
624 struct inode *inode; 560 struct inode *inode;
625 struct v9fs_fcall *fcall = NULL; 561 char *name;
626 int dirfidnum = -1;
627 int newfid = -1;
628 int result = 0; 562 int result = 0;
629 563
630 dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", 564 P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
631 dir, dentry->d_name.name, dentry, nameidata); 565 dir, dentry->d_name.name, dentry, nameidata);
632 566
633 sb = dir->i_sb; 567 sb = dir->i_sb;
634 v9ses = v9fs_inode2v9ses(dir); 568 v9ses = v9fs_inode2v9ses(dir);
635 dirfid = v9fs_fid_lookup(dentry->d_parent); 569 dfid = v9fs_fid_lookup(dentry->d_parent);
636 570 if (IS_ERR(dfid))
637 if(IS_ERR(dirfid)) 571 return ERR_PTR(PTR_ERR(dfid));
638 return ERR_PTR(PTR_ERR(dirfid)); 572
639 573 name = (char *) dentry->d_name.name;
640 dirfidnum = dirfid->fid; 574 fid = p9_client_walk(dfid, 1, &name, 1);
641 575 if (IS_ERR(fid)) {
642 newfid = v9fs_get_idpool(&v9ses->fidpool); 576 result = PTR_ERR(fid);
643 if (newfid < 0) {
644 eprintk(KERN_WARNING, "newfid fails!\n");
645 result = -ENOSPC;
646 goto Release_Dirfid;
647 }
648
649 result = v9fs_t_walk(v9ses, dirfidnum, newfid,
650 (char *)dentry->d_name.name, &fcall);
651
652 up(&dirfid->lock);
653
654 if (result < 0) {
655 if (fcall && fcall->id == RWALK)
656 v9fs_t_clunk(v9ses, newfid);
657 else
658 v9fs_put_idpool(newfid, &v9ses->fidpool);
659
660 if (result == -ENOENT) { 577 if (result == -ENOENT) {
661 d_add(dentry, NULL); 578 d_add(dentry, NULL);
662 dprintk(DEBUG_VFS,
663 "Return negative dentry %p count %d\n",
664 dentry, atomic_read(&dentry->d_count));
665 kfree(fcall);
666 return NULL; 579 return NULL;
667 } 580 }
668 dprintk(DEBUG_ERROR, "walk error:%d\n", result);
669 goto FreeFcall;
670 }
671 kfree(fcall);
672
673 result = v9fs_t_stat(v9ses, newfid, &fcall);
674 if (result < 0) {
675 dprintk(DEBUG_ERROR, "stat error\n");
676 goto FreeFcall;
677 }
678
679 inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses,
680 fcall->params.rstat.stat.mode));
681 581
682 if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) { 582 return ERR_PTR(result);
683 eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
684 PTR_ERR(inode));
685
686 result = -ENOSPC;
687 goto FreeFcall;
688 } 583 }
689 584
690 inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid); 585 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
691 586 if (IS_ERR(inode)) {
692 fid = v9fs_fid_create(v9ses, newfid); 587 result = PTR_ERR(inode);
693 if (fid == NULL) { 588 inode = NULL;
694 dprintk(DEBUG_ERROR, "couldn't insert\n"); 589 goto error;
695 result = -ENOMEM;
696 goto FreeFcall;
697 } 590 }
698 591
699 result = v9fs_fid_insert(fid, dentry); 592 result = v9fs_fid_add(dentry, fid);
700 if (result < 0) 593 if (result < 0)
701 goto FreeFcall; 594 goto error;
702 595
703 fid->qid = fcall->params.rstat.stat.qid;
704 v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
705 if((fid->qid.version)&&(v9ses->cache)) 596 if((fid->qid.version)&&(v9ses->cache))
706 dentry->d_op = &v9fs_cached_dentry_operations; 597 dentry->d_op = &v9fs_cached_dentry_operations;
707 else 598 else
708 dentry->d_op = &v9fs_dentry_operations; 599 dentry->d_op = &v9fs_dentry_operations;
709 600
710 d_add(dentry, inode); 601 d_add(dentry, inode);
711 kfree(fcall);
712
713 return NULL; 602 return NULL;
714 603
715Release_Dirfid: 604error:
716 up(&dirfid->lock); 605 if (fid)
717 606 p9_client_clunk(fid);
718FreeFcall:
719 kfree(fcall);
720 607
721 return ERR_PTR(result); 608 return ERR_PTR(result);
722} 609}
@@ -758,73 +645,54 @@ static int
758v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 645v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
759 struct inode *new_dir, struct dentry *new_dentry) 646 struct inode *new_dir, struct dentry *new_dentry)
760{ 647{
761 struct inode *old_inode = old_dentry->d_inode; 648 struct inode *old_inode;
762 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode); 649 struct v9fs_session_info *v9ses;
763 struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry); 650 struct p9_fid *oldfid;
764 struct v9fs_fid *olddirfid; 651 struct p9_fid *olddirfid;
765 struct v9fs_fid *newdirfid; 652 struct p9_fid *newdirfid;
766 struct v9fs_wstat wstat; 653 struct p9_wstat wstat;
767 struct v9fs_fcall *fcall = NULL; 654 int retval;
768 int fid = -1;
769 int olddirfidnum = -1;
770 int newdirfidnum = -1;
771 int retval = 0;
772
773 dprintk(DEBUG_VFS, "\n");
774 655
656 P9_DPRINTK(P9_DEBUG_VFS, "\n");
657 retval = 0;
658 old_inode = old_dentry->d_inode;
659 v9ses = v9fs_inode2v9ses(old_inode);
660 oldfid = v9fs_fid_lookup(old_dentry);
775 if(IS_ERR(oldfid)) 661 if(IS_ERR(oldfid))
776 return PTR_ERR(oldfid); 662 return PTR_ERR(oldfid);
777 663
778 olddirfid = v9fs_fid_clone(old_dentry->d_parent); 664 olddirfid = v9fs_fid_clone(old_dentry->d_parent);
779 if(IS_ERR(olddirfid)) { 665 if(IS_ERR(olddirfid)) {
780 retval = PTR_ERR(olddirfid); 666 retval = PTR_ERR(olddirfid);
781 goto Release_lock; 667 goto done;
782 } 668 }
783 669
784 newdirfid = v9fs_fid_clone(new_dentry->d_parent); 670 newdirfid = v9fs_fid_clone(new_dentry->d_parent);
785 if(IS_ERR(newdirfid)) { 671 if(IS_ERR(newdirfid)) {
786 retval = PTR_ERR(newdirfid); 672 retval = PTR_ERR(newdirfid);
787 goto Clunk_olddir; 673 goto clunk_olddir;
788 } 674 }
789 675
790 /* 9P can only handle file rename in the same directory */ 676 /* 9P can only handle file rename in the same directory */
791 if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { 677 if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
792 dprintk(DEBUG_ERROR, "old dir and new dir are different\n"); 678 P9_DPRINTK(P9_DEBUG_ERROR,
679 "old dir and new dir are different\n");
793 retval = -EXDEV; 680 retval = -EXDEV;
794 goto Clunk_newdir; 681 goto clunk_newdir;
795 }
796
797 fid = oldfid->fid;
798 olddirfidnum = olddirfid->fid;
799 newdirfidnum = newdirfid->fid;
800
801 if (fid < 0) {
802 dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
803 old_inode->i_ino);
804 retval = -EBADF;
805 goto Clunk_newdir;
806 } 682 }
807 683
808 v9fs_blank_wstat(&wstat); 684 v9fs_blank_wstat(&wstat);
809 wstat.muid = v9ses->name; 685 wstat.muid = v9ses->name;
810 wstat.name = (char *) new_dentry->d_name.name; 686 wstat.name = (char *) new_dentry->d_name.name;
687 retval = p9_client_wstat(oldfid, &wstat);
811 688
812 retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall); 689clunk_newdir:
690 p9_client_clunk(olddirfid);
813 691
814 if (retval < 0) 692clunk_olddir:
815 PRINT_FCALL_ERROR("wstat error", fcall); 693 p9_client_clunk(newdirfid);
816
817 kfree(fcall);
818
819Clunk_newdir:
820 v9fs_fid_clunk(v9ses, newdirfid);
821
822Clunk_olddir:
823 v9fs_fid_clunk(v9ses, olddirfid);
824
825Release_lock:
826 up(&oldfid->lock);
827 694
695done:
828 return retval; 696 return retval;
829} 697}
830 698
@@ -840,28 +708,30 @@ static int
840v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 708v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
841 struct kstat *stat) 709 struct kstat *stat)
842{ 710{
843 struct v9fs_fcall *fcall = NULL; 711 int err;
844 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); 712 struct v9fs_session_info *v9ses;
845 struct v9fs_fid *fid = v9fs_fid_clone(dentry); 713 struct p9_fid *fid;
846 int err = -EPERM; 714 struct p9_stat *st;
847 715
848 dprintk(DEBUG_VFS, "dentry: %p\n", dentry); 716 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
849 if(IS_ERR(fid)) 717 err = -EPERM;
718 v9ses = v9fs_inode2v9ses(dentry->d_inode);
719 if (v9ses->cache == CACHE_LOOSE)
720 return simple_getattr(mnt, dentry, stat);
721
722 fid = v9fs_fid_lookup(dentry);
723 if (IS_ERR(fid))
850 return PTR_ERR(fid); 724 return PTR_ERR(fid);
851 725
852 err = v9fs_t_stat(v9ses, fid->fid, &fcall); 726 st = p9_client_stat(fid);
727 if (IS_ERR(st))
728 return PTR_ERR(st);
853 729
854 if (err < 0) 730 v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
855 dprintk(DEBUG_ERROR, "stat error\n");
856 else {
857 v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode,
858 dentry->d_inode->i_sb);
859 generic_fillattr(dentry->d_inode, stat); 731 generic_fillattr(dentry->d_inode, stat);
860 }
861 732
862 kfree(fcall); 733 kfree(st);
863 v9fs_fid_clunk(v9ses, fid); 734 return 0;
864 return err;
865} 735}
866 736
867/** 737/**
@@ -873,13 +743,15 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
873 743
874static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) 744static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
875{ 745{
876 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); 746 int retval;
877 struct v9fs_fid *fid = v9fs_fid_clone(dentry); 747 struct v9fs_session_info *v9ses;
878 struct v9fs_fcall *fcall = NULL; 748 struct p9_fid *fid;
879 struct v9fs_wstat wstat; 749 struct p9_wstat wstat;
880 int res = -EPERM;
881 750
882 dprintk(DEBUG_VFS, "\n"); 751 P9_DPRINTK(P9_DEBUG_VFS, "\n");
752 retval = -EPERM;
753 v9ses = v9fs_inode2v9ses(dentry->d_inode);
754 fid = v9fs_fid_lookup(dentry);
883 if(IS_ERR(fid)) 755 if(IS_ERR(fid))
884 return PTR_ERR(fid); 756 return PTR_ERR(fid);
885 757
@@ -904,17 +776,11 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
904 wstat.n_gid = iattr->ia_gid; 776 wstat.n_gid = iattr->ia_gid;
905 } 777 }
906 778
907 res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall); 779 retval = p9_client_wstat(fid, &wstat);
780 if (retval >= 0)
781 retval = inode_setattr(dentry->d_inode, iattr);
908 782
909 if (res < 0) 783 return retval;
910 PRINT_FCALL_ERROR("wstat error", fcall);
911
912 kfree(fcall);
913 if (res >= 0)
914 res = inode_setattr(dentry->d_inode, iattr);
915
916 v9fs_fid_clunk(v9ses, fid);
917 return res;
918} 784}
919 785
920/** 786/**
@@ -926,7 +792,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
926 */ 792 */
927 793
928void 794void
929v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, 795v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
930 struct super_block *sb) 796 struct super_block *sb)
931{ 797{
932 int n; 798 int n;
@@ -967,8 +833,9 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
967 case 'b': 833 case 'b':
968 break; 834 break;
969 default: 835 default:
970 dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n", 836 P9_DPRINTK(P9_DEBUG_ERROR,
971 type, stat->extension.len, stat->extension.str); 837 "Unknown special type %c (%.*s)\n", type,
838 stat->extension.len, stat->extension.str);
972 }; 839 };
973 inode->i_rdev = MKDEV(major, minor); 840 inode->i_rdev = MKDEV(major, minor);
974 } else 841 } else
@@ -976,8 +843,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
976 843
977 inode->i_size = stat->length; 844 inode->i_size = stat->length;
978 845
979 inode->i_blocks = 846 /* not real number of blocks, but 512 byte ones ... */
980 (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; 847 inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
981} 848}
982 849
983/** 850/**
@@ -987,7 +854,7 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
987 * BUG: potential for inode number collisions? 854 * BUG: potential for inode number collisions?
988 */ 855 */
989 856
990ino_t v9fs_qid2ino(struct v9fs_qid *qid) 857ino_t v9fs_qid2ino(struct p9_qid *qid)
991{ 858{
992 u64 path = qid->path + 2; 859 u64 path = qid->path + 2;
993 ino_t i = 0; 860 ino_t i = 0;
@@ -1010,56 +877,46 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid)
1010 877
1011static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) 878static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1012{ 879{
1013 int retval = -EPERM; 880 int retval;
1014 881
1015 struct v9fs_fcall *fcall = NULL; 882 struct v9fs_session_info *v9ses;
1016 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); 883 struct p9_fid *fid;
1017 struct v9fs_fid *fid = v9fs_fid_clone(dentry); 884 struct p9_stat *st;
1018 885
886 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
887 retval = -EPERM;
888 v9ses = v9fs_inode2v9ses(dentry->d_inode);
889 fid = v9fs_fid_lookup(dentry);
1019 if(IS_ERR(fid)) 890 if(IS_ERR(fid))
1020 return PTR_ERR(fid); 891 return PTR_ERR(fid);
1021 892
1022 if (!v9ses->extended) { 893 if (!v9ses->extended)
1023 retval = -EBADF; 894 return -EBADF;
1024 dprintk(DEBUG_ERROR, "not extended\n");
1025 goto ClunkFid;
1026 }
1027
1028 dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
1029 retval = v9fs_t_stat(v9ses, fid->fid, &fcall);
1030
1031 if (retval < 0) {
1032 dprintk(DEBUG_ERROR, "stat error\n");
1033 goto FreeFcall;
1034 }
1035 895
1036 if (!fcall) { 896 st = p9_client_stat(fid);
1037 retval = -EIO; 897 if (IS_ERR(st))
1038 goto ClunkFid; 898 return PTR_ERR(st);
1039 }
1040 899
1041 if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) { 900 if (!(st->mode & P9_DMSYMLINK)) {
1042 retval = -EINVAL; 901 retval = -EINVAL;
1043 goto FreeFcall; 902 goto done;
1044 } 903 }
1045 904
1046 /* copy extension buffer into buffer */ 905 /* copy extension buffer into buffer */
1047 if (fcall->params.rstat.stat.extension.len < buflen) 906 if (st->extension.len < buflen)
1048 buflen = fcall->params.rstat.stat.extension.len + 1; 907 buflen = st->extension.len + 1;
1049 908
1050 memmove(buffer, fcall->params.rstat.stat.extension.str, buflen - 1); 909 memmove(buffer, st->extension.str, buflen - 1);
1051 buffer[buflen-1] = 0; 910 buffer[buflen-1] = 0;
1052 911
1053 dprintk(DEBUG_ERROR, "%s -> %.*s (%s)\n", dentry->d_name.name, fcall->params.rstat.stat.extension.len, 912 P9_DPRINTK(P9_DEBUG_VFS,
1054 fcall->params.rstat.stat.extension.str, buffer); 913 "%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len,
1055 retval = buflen; 914 st->extension.str, buffer);
1056 915
1057FreeFcall: 916 retval = buflen;
1058 kfree(fcall);
1059
1060ClunkFid:
1061 v9fs_fid_clunk(v9ses, fid);
1062 917
918done:
919 kfree(st);
1063 return retval; 920 return retval;
1064} 921}
1065 922
@@ -1084,14 +941,14 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1084 if (buflen > PATH_MAX) 941 if (buflen > PATH_MAX)
1085 buflen = PATH_MAX; 942 buflen = PATH_MAX;
1086 943
1087 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 944 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
1088 945
1089 retval = v9fs_readlink(dentry, link, buflen); 946 retval = v9fs_readlink(dentry, link, buflen);
1090 947
1091 if (retval > 0) { 948 if (retval > 0) {
1092 if ((ret = copy_to_user(buffer, link, retval)) != 0) { 949 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1093 dprintk(DEBUG_ERROR, "problem copying to user: %d\n", 950 P9_DPRINTK(P9_DEBUG_ERROR,
1094 ret); 951 "problem copying to user: %d\n", ret);
1095 retval = ret; 952 retval = ret;
1096 } 953 }
1097 } 954 }
@@ -1112,7 +969,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
1112 int len = 0; 969 int len = 0;
1113 char *link = __getname(); 970 char *link = __getname();
1114 971
1115 dprintk(DEBUG_VFS, "%s n", dentry->d_name.name); 972 P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
1116 973
1117 if (!link) 974 if (!link)
1118 link = ERR_PTR(-ENOMEM); 975 link = ERR_PTR(-ENOMEM);
@@ -1141,7 +998,7 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
1141{ 998{
1142 char *s = nd_get_link(nd); 999 char *s = nd_get_link(nd);
1143 1000
1144 dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); 1001 P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
1145 if (!IS_ERR(s)) 1002 if (!IS_ERR(s))
1146 __putname(s); 1003 __putname(s);
1147} 1004}
@@ -1149,66 +1006,24 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
1149static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, 1006static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1150 int mode, const char *extension) 1007 int mode, const char *extension)
1151{ 1008{
1152 int err; 1009 u32 perm;
1153 u32 fid, perm;
1154 struct v9fs_session_info *v9ses; 1010 struct v9fs_session_info *v9ses;
1155 struct v9fs_fid *dfid, *vfid = NULL; 1011 struct p9_fid *fid;
1156 struct inode *inode = NULL;
1157 1012
1158 v9ses = v9fs_inode2v9ses(dir); 1013 v9ses = v9fs_inode2v9ses(dir);
1159 if (!v9ses->extended) { 1014 if (!v9ses->extended) {
1160 dprintk(DEBUG_ERROR, "not extended\n"); 1015 P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n");
1161 return -EPERM; 1016 return -EPERM;
1162 } 1017 }
1163 1018
1164 dfid = v9fs_fid_clone(dentry->d_parent);
1165 if(IS_ERR(dfid)) {
1166 err = PTR_ERR(dfid);
1167 goto error;
1168 }
1169
1170 perm = unixmode2p9mode(v9ses, mode); 1019 perm = unixmode2p9mode(v9ses, mode);
1020 fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm,
1021 P9_OREAD);
1022 if (IS_ERR(fid))
1023 return PTR_ERR(fid);
1171 1024
1172 err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, 1025 p9_client_clunk(fid);
1173 perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL);
1174
1175 if (err)
1176 goto clunk_dfid;
1177
1178 err = v9fs_t_clunk(v9ses, fid);
1179 if (err)
1180 goto clunk_dfid;
1181
1182 vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
1183 if (IS_ERR(vfid)) {
1184 err = PTR_ERR(vfid);
1185 vfid = NULL;
1186 goto clunk_dfid;
1187 }
1188
1189 inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
1190 if (IS_ERR(inode)) {
1191 err = PTR_ERR(inode);
1192 inode = NULL;
1193 goto free_vfid;
1194 }
1195
1196 if(v9ses->cache)
1197 dentry->d_op = &v9fs_cached_dentry_operations;
1198 else
1199 dentry->d_op = &v9fs_dentry_operations;
1200 d_instantiate(dentry, inode);
1201 return 0; 1026 return 0;
1202
1203free_vfid:
1204 v9fs_fid_destroy(vfid);
1205
1206clunk_dfid:
1207 v9fs_fid_clunk(v9ses, dfid);
1208
1209error:
1210 return err;
1211
1212} 1027}
1213 1028
1214/** 1029/**
@@ -1224,8 +1039,8 @@ error:
1224static int 1039static int
1225v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 1040v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1226{ 1041{
1227 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1042 P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino,
1228 symname); 1043 dentry->d_name.name, symname);
1229 1044
1230 return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname); 1045 return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
1231} 1046}
@@ -1247,11 +1062,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1247 struct dentry *dentry) 1062 struct dentry *dentry)
1248{ 1063{
1249 int retval; 1064 int retval;
1250 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); 1065 struct p9_fid *oldfid;
1251 struct v9fs_fid *oldfid;
1252 char *name; 1066 char *name;
1253 1067
1254 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1068 P9_DPRINTK(P9_DEBUG_VFS,
1069 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
1255 old_dentry->d_name.name); 1070 old_dentry->d_name.name);
1256 1071
1257 oldfid = v9fs_fid_clone(old_dentry); 1072 oldfid = v9fs_fid_clone(old_dentry);
@@ -1265,11 +1080,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1265 } 1080 }
1266 1081
1267 sprintf(name, "%d\n", oldfid->fid); 1082 sprintf(name, "%d\n", oldfid->fid);
1268 retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name); 1083 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
1269 __putname(name); 1084 __putname(name);
1270 1085
1271clunk_fid: 1086clunk_fid:
1272 v9fs_fid_clunk(v9ses, oldfid); 1087 p9_client_clunk(oldfid);
1273 return retval; 1088 return retval;
1274} 1089}
1275 1090
@@ -1288,7 +1103,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1288 int retval; 1103 int retval;
1289 char *name; 1104 char *name;
1290 1105
1291 dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, 1106 P9_DPRINTK(P9_DEBUG_VFS,
1107 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1292 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); 1108 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
1293 1109
1294 if (!new_valid_dev(rdev)) 1110 if (!new_valid_dev(rdev))
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 7bdf8b326841..ba904371218b 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -37,10 +37,10 @@
37#include <linux/mount.h> 37#include <linux/mount.h>
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <net/9p/9p.h>
41#include <net/9p/client.h>
40 42
41#include "debug.h"
42#include "v9fs.h" 43#include "v9fs.h"
43#include "9p.h"
44#include "v9fs_vfs.h" 44#include "v9fs_vfs.h"
45#include "fid.h" 45#include "fid.h"
46 46
@@ -107,41 +107,48 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
107 struct vfsmount *mnt) 107 struct vfsmount *mnt)
108{ 108{
109 struct super_block *sb = NULL; 109 struct super_block *sb = NULL;
110 struct v9fs_fcall *fcall = NULL;
111 struct inode *inode = NULL; 110 struct inode *inode = NULL;
112 struct dentry *root = NULL; 111 struct dentry *root = NULL;
113 struct v9fs_session_info *v9ses = NULL; 112 struct v9fs_session_info *v9ses = NULL;
114 struct v9fs_fid *root_fid = NULL; 113 struct p9_stat *st = NULL;
115 int mode = S_IRWXUGO | S_ISVTX; 114 int mode = S_IRWXUGO | S_ISVTX;
116 uid_t uid = current->fsuid; 115 uid_t uid = current->fsuid;
117 gid_t gid = current->fsgid; 116 gid_t gid = current->fsgid;
118 int stat_result = 0; 117 struct p9_fid *fid;
119 int newfid = 0;
120 int retval = 0; 118 int retval = 0;
121 119
122 dprintk(DEBUG_VFS, " \n"); 120 P9_DPRINTK(P9_DEBUG_VFS, " \n");
123 121
124 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); 122 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
125 if (!v9ses) 123 if (!v9ses)
126 return -ENOMEM; 124 return -ENOMEM;
127 125
128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { 126 fid = v9fs_session_init(v9ses, dev_name, data);
129 dprintk(DEBUG_ERROR, "problem initiating session\n"); 127 if (IS_ERR(fid)) {
130 retval = newfid; 128 retval = PTR_ERR(fid);
131 goto out_free_session; 129 fid = NULL;
130 kfree(v9ses);
131 v9ses = NULL;
132 goto error;
133 }
134
135 st = p9_client_stat(fid);
136 if (IS_ERR(st)) {
137 retval = PTR_ERR(st);
138 goto error;
132 } 139 }
133 140
134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses); 141 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
135 if (IS_ERR(sb)) { 142 if (IS_ERR(sb)) {
136 retval = PTR_ERR(sb); 143 retval = PTR_ERR(sb);
137 goto out_close_session; 144 goto error;
138 } 145 }
139 v9fs_fill_super(sb, v9ses, flags); 146 v9fs_fill_super(sb, v9ses, flags);
140 147
141 inode = v9fs_get_inode(sb, S_IFDIR | mode); 148 inode = v9fs_get_inode(sb, S_IFDIR | mode);
142 if (IS_ERR(inode)) { 149 if (IS_ERR(inode)) {
143 retval = PTR_ERR(inode); 150 retval = PTR_ERR(inode);
144 goto put_back_sb; 151 goto error;
145 } 152 }
146 153
147 inode->i_uid = uid; 154 inode->i_uid = uid;
@@ -150,54 +157,30 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
150 root = d_alloc_root(inode); 157 root = d_alloc_root(inode);
151 if (!root) { 158 if (!root) {
152 retval = -ENOMEM; 159 retval = -ENOMEM;
153 goto put_back_sb; 160 goto error;
154 } 161 }
155 162
156 sb->s_root = root; 163 sb->s_root = root;
164 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
165 v9fs_stat2inode(st, root->d_inode, sb);
166 v9fs_fid_add(root, fid);
157 167
158 stat_result = v9fs_t_stat(v9ses, newfid, &fcall); 168 return simple_set_mnt(mnt, sb);
159 if (stat_result < 0) {
160 dprintk(DEBUG_ERROR, "stat error\n");
161 v9fs_t_clunk(v9ses, newfid);
162 } else {
163 /* Setup the Root Inode */
164 root_fid = v9fs_fid_create(v9ses, newfid);
165 if (root_fid == NULL) {
166 retval = -ENOMEM;
167 goto put_back_sb;
168 }
169
170 retval = v9fs_fid_insert(root_fid, root);
171 if (retval < 0) {
172 kfree(fcall);
173 goto put_back_sb;
174 }
175
176 root_fid->qid = fcall->params.rstat.stat.qid;
177 root->d_inode->i_ino =
178 v9fs_qid2ino(&fcall->params.rstat.stat.qid);
179 v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb);
180 }
181 169
182 kfree(fcall); 170error:
171 if (fid)
172 p9_client_clunk(fid);
183 173
184 if (stat_result < 0) { 174 if (v9ses) {
185 retval = stat_result; 175 v9fs_session_close(v9ses);
186 goto put_back_sb; 176 kfree(v9ses);
187 } 177 }
188 178
189 return simple_set_mnt(mnt, sb); 179 if (sb) {
190 180 up_write(&sb->s_umount);
191out_close_session: 181 deactivate_super(sb);
192 v9fs_session_close(v9ses); 182 }
193out_free_session:
194 kfree(v9ses);
195 return retval;
196 183
197put_back_sb:
198 /* deactivate_super calls v9fs_kill_super which will frees the rest */
199 up_write(&sb->s_umount);
200 deactivate_super(sb);
201 return retval; 184 return retval;
202} 185}
203 186
@@ -211,7 +194,7 @@ static void v9fs_kill_super(struct super_block *s)
211{ 194{
212 struct v9fs_session_info *v9ses = s->s_fs_info; 195 struct v9fs_session_info *v9ses = s->s_fs_info;
213 196
214 dprintk(DEBUG_VFS, " %p\n", s); 197 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
215 198
216 v9fs_dentry_release(s->s_root); /* clunk root */ 199 v9fs_dentry_release(s->s_root); /* clunk root */
217 200
@@ -219,7 +202,7 @@ static void v9fs_kill_super(struct super_block *s)
219 202
220 v9fs_session_close(v9ses); 203 v9fs_session_close(v9ses);
221 kfree(v9ses); 204 kfree(v9ses);
222 dprintk(DEBUG_VFS, "exiting kill_super\n"); 205 P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n");
223} 206}
224 207
225/** 208/**
@@ -234,7 +217,7 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
234 struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info; 217 struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
235 218
236 if (v9ses->debug != 0) 219 if (v9ses->debug != 0)
237 seq_printf(m, ",debug=%u", v9ses->debug); 220 seq_printf(m, ",debug=%x", v9ses->debug);
238 if (v9ses->port != V9FS_PORT) 221 if (v9ses->port != V9FS_PORT)
239 seq_printf(m, ",port=%u", v9ses->port); 222 seq_printf(m, ",port=%u", v9ses->port);
240 if (v9ses->maxdata != 9000) 223 if (v9ses->maxdata != 9000)
diff --git a/fs/Kconfig b/fs/Kconfig
index 0fa0c1193e81..94b9d861bf9b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2048,7 +2048,7 @@ config AFS_DEBUG
2048 2048
2049config 9P_FS 2049config 9P_FS
2050 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" 2050 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
2051 depends on INET && EXPERIMENTAL 2051 depends on INET && NET_9P && EXPERIMENTAL
2052 help 2052 help
2053 If you say Y here, you will get experimental support for 2053 If you say Y here, you will get experimental support for
2054 Plan 9 resource sharing via the 9P2000 protocol. 2054 Plan 9 resource sharing via the 9P2000 protocol.
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index f544a2855923..36e381c6a99a 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -33,7 +33,7 @@ const struct file_operations adfs_file_operations = {
33 .fsync = file_fsync, 33 .fsync = file_fsync,
34 .write = do_sync_write, 34 .write = do_sync_write,
35 .aio_write = generic_file_aio_write, 35 .aio_write = generic_file_aio_write,
36 .sendfile = generic_file_sendfile, 36 .splice_read = generic_file_splice_read,
37}; 37};
38 38
39const struct inode_operations adfs_file_inode_operations = { 39const struct inode_operations adfs_file_inode_operations = {
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c8796906f584..c314a35f0918 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -35,7 +35,7 @@ const struct file_operations affs_file_operations = {
35 .open = affs_file_open, 35 .open = affs_file_open,
36 .release = affs_file_release, 36 .release = affs_file_release,
37 .fsync = file_fsync, 37 .fsync = file_fsync,
38 .sendfile = generic_file_sendfile, 38 .splice_read = generic_file_splice_read,
39}; 39};
40 40
41const struct inode_operations affs_file_inode_operations = { 41const struct inode_operations affs_file_inode_operations = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 9c0e721d9fc2..aede7eb66dd4 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -32,7 +32,7 @@ const struct file_operations afs_file_operations = {
32 .aio_read = generic_file_aio_read, 32 .aio_read = generic_file_aio_read,
33 .aio_write = afs_file_write, 33 .aio_write = afs_file_write,
34 .mmap = generic_file_readonly_mmap, 34 .mmap = generic_file_readonly_mmap,
35 .sendfile = generic_file_sendfile, 35 .splice_read = generic_file_splice_read,
36 .fsync = afs_fsync, 36 .fsync = afs_fsync,
37}; 37};
38 38
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 2dac3ad2c44b..2c55dd94a1de 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -17,6 +17,8 @@
17#include <linux/rxrpc.h> 17#include <linux/rxrpc.h>
18#include <linux/key.h> 18#include <linux/key.h>
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20#include <linux/sched.h>
21
20#include "afs.h" 22#include "afs.h"
21#include "afs_vl.h" 23#include "afs_vl.h"
22 24
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 329ee473eede..521ff7caadbd 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -114,12 +114,6 @@ static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
114 return -EIO; 114 return -EIO;
115} 115}
116 116
117static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
118 size_t count, read_actor_t actor, void *target)
119{
120 return -EIO;
121}
122
123static ssize_t bad_file_sendpage(struct file *file, struct page *page, 117static ssize_t bad_file_sendpage(struct file *file, struct page *page,
124 int off, size_t len, loff_t *pos, int more) 118 int off, size_t len, loff_t *pos, int more)
125{ 119{
@@ -182,7 +176,6 @@ static const struct file_operations bad_file_ops =
182 .aio_fsync = bad_file_aio_fsync, 176 .aio_fsync = bad_file_aio_fsync,
183 .fasync = bad_file_fasync, 177 .fasync = bad_file_fasync,
184 .lock = bad_file_lock, 178 .lock = bad_file_lock,
185 .sendfile = bad_file_sendfile,
186 .sendpage = bad_file_sendpage, 179 .sendpage = bad_file_sendpage,
187 .get_unmapped_area = bad_file_get_unmapped_area, 180 .get_unmapped_area = bad_file_get_unmapped_area,
188 .check_flags = bad_file_check_flags, 181 .check_flags = bad_file_check_flags,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index ef4d1fa04e65..24310e9ee05a 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -24,7 +24,7 @@ const struct file_operations bfs_file_operations = {
24 .write = do_sync_write, 24 .write = do_sync_write,
25 .aio_write = generic_file_aio_write, 25 .aio_write = generic_file_aio_write,
26 .mmap = generic_file_mmap, 26 .mmap = generic_file_mmap,
27 .sendfile = generic_file_sendfile, 27 .splice_read = generic_file_splice_read,
28}; 28};
29 29
30static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb) 30static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fa8ea33ab0be..08e4414b8374 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1499,6 +1499,9 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1499#endif 1499#endif
1500 int thread_status_size = 0; 1500 int thread_status_size = 0;
1501 elf_addr_t *auxv; 1501 elf_addr_t *auxv;
1502#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1503 int extra_notes_size;
1504#endif
1502 1505
1503 /* 1506 /*
1504 * We no longer stop all VM operations. 1507 * We no longer stop all VM operations.
@@ -1628,7 +1631,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1628 sz += thread_status_size; 1631 sz += thread_status_size;
1629 1632
1630#ifdef ELF_CORE_WRITE_EXTRA_NOTES 1633#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1631 sz += ELF_CORE_EXTRA_NOTES_SIZE; 1634 extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1635 sz += extra_notes_size;
1632#endif 1636#endif
1633 1637
1634 fill_elf_note_phdr(&phdr, sz, offset); 1638 fill_elf_note_phdr(&phdr, sz, offset);
@@ -1674,6 +1678,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1674 1678
1675#ifdef ELF_CORE_WRITE_EXTRA_NOTES 1679#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1676 ELF_CORE_WRITE_EXTRA_NOTES; 1680 ELF_CORE_WRITE_EXTRA_NOTES;
1681 foffset += extra_notes_size;
1677#endif 1682#endif
1678 1683
1679 /* write out the thread status notes section */ 1684 /* write out the thread status notes section */
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 7b0265d7f3a8..861141b4f6d6 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -558,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm,
558 if (!realdatastart) 558 if (!realdatastart)
559 realdatastart = (unsigned long) -ENOMEM; 559 realdatastart = (unsigned long) -ENOMEM;
560 printk("Unable to allocate RAM for process data, errno %d\n", 560 printk("Unable to allocate RAM for process data, errno %d\n",
561 (int)-datapos); 561 (int)-realdatastart);
562 do_munmap(current->mm, textpos, text_len); 562 do_munmap(current->mm, textpos, text_len);
563 ret = realdatastart; 563 ret = realdatastart;
564 goto err; 564 goto err;
diff --git a/fs/bio.c b/fs/bio.c
index 093345f00128..33e46340a766 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1223,8 +1223,6 @@ EXPORT_SYMBOL(bio_hw_segments);
1223EXPORT_SYMBOL(bio_add_page); 1223EXPORT_SYMBOL(bio_add_page);
1224EXPORT_SYMBOL(bio_add_pc_page); 1224EXPORT_SYMBOL(bio_add_pc_page);
1225EXPORT_SYMBOL(bio_get_nr_vecs); 1225EXPORT_SYMBOL(bio_get_nr_vecs);
1226EXPORT_SYMBOL(bio_map_user);
1227EXPORT_SYMBOL(bio_unmap_user);
1228EXPORT_SYMBOL(bio_map_kern); 1226EXPORT_SYMBOL(bio_map_kern);
1229EXPORT_SYMBOL(bio_pair_release); 1227EXPORT_SYMBOL(bio_pair_release);
1230EXPORT_SYMBOL(bio_split); 1228EXPORT_SYMBOL(bio_split);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ea1480a16f51..b3e9bfa748cf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1346,7 +1346,6 @@ const struct file_operations def_blk_fops = {
1346#ifdef CONFIG_COMPAT 1346#ifdef CONFIG_COMPAT
1347 .compat_ioctl = compat_blkdev_ioctl, 1347 .compat_ioctl = compat_blkdev_ioctl,
1348#endif 1348#endif
1349 .sendfile = generic_file_sendfile,
1350 .splice_read = generic_file_splice_read, 1349 .splice_read = generic_file_splice_read,
1351 .splice_write = generic_file_splice_write, 1350 .splice_write = generic_file_splice_write,
1352}; 1351};
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 6017c465440e..07838b2ac1ce 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -7,16 +7,16 @@
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by 9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or 10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version. 11 * (at your option) any later version.
12 * 12 *
13 * This program is distributed in the hope that it will be useful, 13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
16 * the GNU General Public License for more details. 16 * the GNU General Public License for more details.
17 * 17 *
18 * You should have received a copy of the GNU General Public License 18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software 19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */ 21 */
22#include <linux/fs.h> 22#include <linux/fs.h>
@@ -39,7 +39,7 @@ cifs_dump_mem(char *label, void *data, int length)
39 char *charptr = data; 39 char *charptr = data;
40 char buf[10], line[80]; 40 char buf[10], line[80];
41 41
42 printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n", 42 printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n",
43 label, length, data); 43 label, length, data);
44 for (i = 0; i < length; i += 16) { 44 for (i = 0; i < length; i += 16) {
45 line[0] = 0; 45 line[0] = 0;
@@ -60,10 +60,10 @@ cifs_dump_mem(char *label, void *data, int length)
60#ifdef CONFIG_CIFS_DEBUG2 60#ifdef CONFIG_CIFS_DEBUG2
61void cifs_dump_detail(struct smb_hdr * smb) 61void cifs_dump_detail(struct smb_hdr * smb)
62{ 62{
63 cERROR(1,("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", 63 cERROR(1, ("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError, 64 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid)); 65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid));
66 cERROR(1,("smb buf %p len %d", smb, smbCalcSize_LE(smb))); 66 cERROR(1, ("smb buf %p len %d", smb, smbCalcSize_LE(smb)));
67} 67}
68 68
69 69
@@ -72,36 +72,35 @@ void cifs_dump_mids(struct TCP_Server_Info * server)
72 struct list_head *tmp; 72 struct list_head *tmp;
73 struct mid_q_entry * mid_entry; 73 struct mid_q_entry * mid_entry;
74 74
75 if(server == NULL) 75 if (server == NULL)
76 return; 76 return;
77 77
78 cERROR(1,("Dump pending requests:")); 78 cERROR(1, ("Dump pending requests:"));
79 spin_lock(&GlobalMid_Lock); 79 spin_lock(&GlobalMid_Lock);
80 list_for_each(tmp, &server->pending_mid_q) { 80 list_for_each(tmp, &server->pending_mid_q) {
81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
82 if(mid_entry) { 82 if (mid_entry) {
83 cERROR(1,("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", 83 cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
84 mid_entry->midState, 84 mid_entry->midState,
85 (int)mid_entry->command, 85 (int)mid_entry->command,
86 mid_entry->pid, 86 mid_entry->pid,
87 mid_entry->tsk, 87 mid_entry->tsk,
88 mid_entry->mid)); 88 mid_entry->mid));
89#ifdef CONFIG_CIFS_STATS2 89#ifdef CONFIG_CIFS_STATS2
90 cERROR(1,("IsLarge: %d buf: %p time rcv: %ld now: %ld", 90 cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
91 mid_entry->largeBuf, 91 mid_entry->largeBuf,
92 mid_entry->resp_buf, 92 mid_entry->resp_buf,
93 mid_entry->when_received, 93 mid_entry->when_received,
94 jiffies)); 94 jiffies));
95#endif /* STATS2 */ 95#endif /* STATS2 */
96 cERROR(1,("IsMult: %d IsEnd: %d", mid_entry->multiRsp, 96 cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
97 mid_entry->multiEnd)); 97 mid_entry->multiEnd));
98 if(mid_entry->resp_buf) { 98 if (mid_entry->resp_buf) {
99 cifs_dump_detail(mid_entry->resp_buf); 99 cifs_dump_detail(mid_entry->resp_buf);
100 cifs_dump_mem("existing buf: ", 100 cifs_dump_mem("existing buf: ",
101 mid_entry->resp_buf, 101 mid_entry->resp_buf,
102 62 /* fixme */); 102 62 /* fixme */);
103 } 103 }
104
105 } 104 }
106 } 105 }
107 spin_unlock(&GlobalMid_Lock); 106 spin_unlock(&GlobalMid_Lock);
@@ -129,9 +128,10 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
129 "Display Internal CIFS Data Structures for Debugging\n" 128 "Display Internal CIFS Data Structures for Debugging\n"
130 "---------------------------------------------------\n"); 129 "---------------------------------------------------\n");
131 buf += length; 130 buf += length;
132 length = sprintf(buf,"CIFS Version %s\n",CIFS_VERSION); 131 length = sprintf(buf, "CIFS Version %s\n", CIFS_VERSION);
133 buf += length; 132 buf += length;
134 length = sprintf(buf,"Active VFS Requests: %d\n", GlobalTotalActiveXid); 133 length = sprintf(buf,
134 "Active VFS Requests: %d\n", GlobalTotalActiveXid);
135 buf += length; 135 buf += length;
136 length = sprintf(buf, "Servers:"); 136 length = sprintf(buf, "Servers:");
137 buf += length; 137 buf += length;
@@ -141,7 +141,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
141 list_for_each(tmp, &GlobalSMBSessionList) { 141 list_for_each(tmp, &GlobalSMBSessionList) {
142 i++; 142 i++;
143 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); 143 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList);
144 if((ses->serverDomain == NULL) || (ses->serverOS == NULL) || 144 if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) ||
145 (ses->serverNOS == NULL)) { 145 (ses->serverNOS == NULL)) {
146 buf += sprintf(buf, "\nentry for %s not fully " 146 buf += sprintf(buf, "\nentry for %s not fully "
147 "displayed\n\t", ses->serverName); 147 "displayed\n\t", ses->serverName);
@@ -149,15 +149,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
149 } else { 149 } else {
150 length = 150 length =
151 sprintf(buf, 151 sprintf(buf,
152 "\n%d) Name: %s Domain: %s Mounts: %d OS: %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB session status: %d\t", 152 "\n%d) Name: %s Domain: %s Mounts: %d OS:"
153 " %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB"
154 " session status: %d\t",
153 i, ses->serverName, ses->serverDomain, 155 i, ses->serverName, ses->serverDomain,
154 atomic_read(&ses->inUse), 156 atomic_read(&ses->inUse),
155 ses->serverOS, ses->serverNOS, 157 ses->serverOS, ses->serverNOS,
156 ses->capabilities,ses->status); 158 ses->capabilities, ses->status);
157 buf += length; 159 buf += length;
158 } 160 }
159 if(ses->server) { 161 if (ses->server) {
160 buf += sprintf(buf, "TCP status: %d\n\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d", 162 buf += sprintf(buf, "TCP status: %d\n\tLocal Users To "
163 "Server: %d SecMode: 0x%x Req On Wire: %d",
161 ses->server->tcpStatus, 164 ses->server->tcpStatus,
162 atomic_read(&ses->server->socketUseCount), 165 atomic_read(&ses->server->socketUseCount),
163 ses->server->secMode, 166 ses->server->secMode,
@@ -165,7 +168,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
165 168
166#ifdef CONFIG_CIFS_STATS2 169#ifdef CONFIG_CIFS_STATS2
167 buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d", 170 buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d",
168 atomic_read(&ses->server->inSend), 171 atomic_read(&ses->server->inSend),
169 atomic_read(&ses->server->num_waiters)); 172 atomic_read(&ses->server->num_waiters));
170#endif 173#endif
171 174
@@ -177,17 +180,19 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
177 mid_entry = list_entry(tmp1, struct 180 mid_entry = list_entry(tmp1, struct
178 mid_q_entry, 181 mid_q_entry,
179 qhead); 182 qhead);
180 if(mid_entry) { 183 if (mid_entry) {
181 length = sprintf(buf,"State: %d com: %d pid: %d tsk: %p mid %d\n", 184 length = sprintf(buf,
182 mid_entry->midState, 185 "State: %d com: %d pid:"
183 (int)mid_entry->command, 186 " %d tsk: %p mid %d\n",
184 mid_entry->pid, 187 mid_entry->midState,
185 mid_entry->tsk, 188 (int)mid_entry->command,
186 mid_entry->mid); 189 mid_entry->pid,
190 mid_entry->tsk,
191 mid_entry->mid);
187 buf += length; 192 buf += length;
188 } 193 }
189 } 194 }
190 spin_unlock(&GlobalMid_Lock); 195 spin_unlock(&GlobalMid_Lock);
191 } 196 }
192 197
193 } 198 }
@@ -207,7 +212,8 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
207 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); 212 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
208 length = 213 length =
209 sprintf(buf, 214 sprintf(buf,
210 "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x Attributes: 0x%x\nPathComponentMax: %d Status: %d", 215 "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x "
216 "Attributes: 0x%x\nPathComponentMax: %d Status: %d",
211 i, tcon->treeName, 217 i, tcon->treeName,
212 atomic_read(&tcon->useCount), 218 atomic_read(&tcon->useCount),
213 tcon->nativeFileSystem, 219 tcon->nativeFileSystem,
@@ -215,7 +221,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
215 le32_to_cpu(tcon->fsAttrInfo.Attributes), 221 le32_to_cpu(tcon->fsAttrInfo.Attributes),
216 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), 222 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
217 tcon->tidStatus); 223 tcon->tidStatus);
218 buf += length; 224 buf += length;
219 if (dev_type == FILE_DEVICE_DISK) 225 if (dev_type == FILE_DEVICE_DISK)
220 length = sprintf(buf, " type: DISK "); 226 length = sprintf(buf, " type: DISK ");
221 else if (dev_type == FILE_DEVICE_CD_ROM) 227 else if (dev_type == FILE_DEVICE_CD_ROM)
@@ -224,7 +230,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
224 length = 230 length =
225 sprintf(buf, " type: %d ", dev_type); 231 sprintf(buf, " type: %d ", dev_type);
226 buf += length; 232 buf += length;
227 if(tcon->tidStatus == CifsNeedReconnect) { 233 if (tcon->tidStatus == CifsNeedReconnect) {
228 buf += sprintf(buf, "\tDISCONNECTED "); 234 buf += sprintf(buf, "\tDISCONNECTED ");
229 length += 14; 235 length += 14;
230 } 236 }
@@ -238,9 +244,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
238 /* Now calculate total size of returned data */ 244 /* Now calculate total size of returned data */
239 length = buf - original_buf; 245 length = buf - original_buf;
240 246
241 if(offset + count >= length) 247 if (offset + count >= length)
242 *eof = 1; 248 *eof = 1;
243 if(length < offset) { 249 if (length < offset) {
244 *eof = 1; 250 *eof = 1;
245 return 0; 251 return 0;
246 } else { 252 } else {
@@ -256,18 +262,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
256 262
257static int 263static int
258cifs_stats_write(struct file *file, const char __user *buffer, 264cifs_stats_write(struct file *file, const char __user *buffer,
259 unsigned long count, void *data) 265 unsigned long count, void *data)
260{ 266{
261 char c; 267 char c;
262 int rc; 268 int rc;
263 struct list_head *tmp; 269 struct list_head *tmp;
264 struct cifsTconInfo *tcon; 270 struct cifsTconInfo *tcon;
265 271
266 rc = get_user(c, buffer); 272 rc = get_user(c, buffer);
267 if (rc) 273 if (rc)
268 return rc; 274 return rc;
269 275
270 if (c == '1' || c == 'y' || c == 'Y' || c == '0') { 276 if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
271 read_lock(&GlobalSMBSeslock); 277 read_lock(&GlobalSMBSeslock);
272#ifdef CONFIG_CIFS_STATS2 278#ifdef CONFIG_CIFS_STATS2
273 atomic_set(&totBufAllocCount, 0); 279 atomic_set(&totBufAllocCount, 0);
@@ -297,14 +303,14 @@ cifs_stats_write(struct file *file, const char __user *buffer,
297 read_unlock(&GlobalSMBSeslock); 303 read_unlock(&GlobalSMBSeslock);
298 } 304 }
299 305
300 return count; 306 return count;
301} 307}
302 308
303static int 309static int
304cifs_stats_read(char *buf, char **beginBuffer, off_t offset, 310cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
305 int count, int *eof, void *data) 311 int count, int *eof, void *data)
306{ 312{
307 int item_length,i,length; 313 int item_length, i, length;
308 struct list_head *tmp; 314 struct list_head *tmp;
309 struct cifsTconInfo *tcon; 315 struct cifsTconInfo *tcon;
310 316
@@ -314,44 +320,44 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
314 "Resources in use\nCIFS Session: %d\n", 320 "Resources in use\nCIFS Session: %d\n",
315 sesInfoAllocCount.counter); 321 sesInfoAllocCount.counter);
316 buf += length; 322 buf += length;
317 item_length = 323 item_length =
318 sprintf(buf,"Share (unique mount targets): %d\n", 324 sprintf(buf, "Share (unique mount targets): %d\n",
319 tconInfoAllocCount.counter); 325 tconInfoAllocCount.counter);
320 length += item_length; 326 length += item_length;
321 buf += item_length; 327 buf += item_length;
322 item_length = 328 item_length =
323 sprintf(buf,"SMB Request/Response Buffer: %d Pool size: %d\n", 329 sprintf(buf, "SMB Request/Response Buffer: %d Pool size: %d\n",
324 bufAllocCount.counter, 330 bufAllocCount.counter,
325 cifs_min_rcv + tcpSesAllocCount.counter); 331 cifs_min_rcv + tcpSesAllocCount.counter);
326 length += item_length; 332 length += item_length;
327 buf += item_length; 333 buf += item_length;
328 item_length = 334 item_length =
329 sprintf(buf,"SMB Small Req/Resp Buffer: %d Pool size: %d\n", 335 sprintf(buf, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
330 smBufAllocCount.counter,cifs_min_small); 336 smBufAllocCount.counter, cifs_min_small);
331 length += item_length; 337 length += item_length;
332 buf += item_length; 338 buf += item_length;
333#ifdef CONFIG_CIFS_STATS2 339#ifdef CONFIG_CIFS_STATS2
334 item_length = sprintf(buf, "Total Large %d Small %d Allocations\n", 340 item_length = sprintf(buf, "Total Large %d Small %d Allocations\n",
335 atomic_read(&totBufAllocCount), 341 atomic_read(&totBufAllocCount),
336 atomic_read(&totSmBufAllocCount)); 342 atomic_read(&totSmBufAllocCount));
337 length += item_length; 343 length += item_length;
338 buf += item_length; 344 buf += item_length;
339#endif /* CONFIG_CIFS_STATS2 */ 345#endif /* CONFIG_CIFS_STATS2 */
340 346
341 item_length = 347 item_length =
342 sprintf(buf,"Operations (MIDs): %d\n", 348 sprintf(buf, "Operations (MIDs): %d\n",
343 midCount.counter); 349 midCount.counter);
344 length += item_length; 350 length += item_length;
345 buf += item_length; 351 buf += item_length;
346 item_length = sprintf(buf, 352 item_length = sprintf(buf,
347 "\n%d session %d share reconnects\n", 353 "\n%d session %d share reconnects\n",
348 tcpSesReconnectCount.counter,tconInfoReconnectCount.counter); 354 tcpSesReconnectCount.counter, tconInfoReconnectCount.counter);
349 length += item_length; 355 length += item_length;
350 buf += item_length; 356 buf += item_length;
351 357
352 item_length = sprintf(buf, 358 item_length = sprintf(buf,
353 "Total vfs operations: %d maximum at one time: %d\n", 359 "Total vfs operations: %d maximum at one time: %d\n",
354 GlobalCurrentXid,GlobalMaxActiveXid); 360 GlobalCurrentXid, GlobalMaxActiveXid);
355 length += item_length; 361 length += item_length;
356 buf += item_length; 362 buf += item_length;
357 363
@@ -360,10 +366,10 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
360 list_for_each(tmp, &GlobalTreeConnectionList) { 366 list_for_each(tmp, &GlobalTreeConnectionList) {
361 i++; 367 i++;
362 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 368 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
363 item_length = sprintf(buf,"\n%d) %s",i, tcon->treeName); 369 item_length = sprintf(buf, "\n%d) %s", i, tcon->treeName);
364 buf += item_length; 370 buf += item_length;
365 length += item_length; 371 length += item_length;
366 if(tcon->tidStatus == CifsNeedReconnect) { 372 if (tcon->tidStatus == CifsNeedReconnect) {
367 buf += sprintf(buf, "\tDISCONNECTED "); 373 buf += sprintf(buf, "\tDISCONNECTED ");
368 length += 14; 374 length += 14;
369 } 375 }
@@ -380,15 +386,15 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
380 item_length = sprintf(buf, "\nWrites: %d Bytes: %lld", 386 item_length = sprintf(buf, "\nWrites: %d Bytes: %lld",
381 atomic_read(&tcon->num_writes), 387 atomic_read(&tcon->num_writes),
382 (long long)(tcon->bytes_written)); 388 (long long)(tcon->bytes_written));
383 buf += item_length; 389 buf += item_length;
384 length += item_length; 390 length += item_length;
385 item_length = sprintf(buf, 391 item_length = sprintf(buf,
386 "\nLocks: %d HardLinks: %d Symlinks: %d", 392 "\nLocks: %d HardLinks: %d Symlinks: %d",
387 atomic_read(&tcon->num_locks), 393 atomic_read(&tcon->num_locks),
388 atomic_read(&tcon->num_hardlinks), 394 atomic_read(&tcon->num_hardlinks),
389 atomic_read(&tcon->num_symlinks)); 395 atomic_read(&tcon->num_symlinks));
390 buf += item_length; 396 buf += item_length;
391 length += item_length; 397 length += item_length;
392 398
393 item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d", 399 item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d",
394 atomic_read(&tcon->num_opens), 400 atomic_read(&tcon->num_opens),
@@ -415,12 +421,12 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
415 } 421 }
416 read_unlock(&GlobalSMBSeslock); 422 read_unlock(&GlobalSMBSeslock);
417 423
418 buf += sprintf(buf,"\n"); 424 buf += sprintf(buf, "\n");
419 length++; 425 length++;
420 426
421 if(offset + count >= length) 427 if (offset + count >= length)
422 *eof = 1; 428 *eof = 1;
423 if(length < offset) { 429 if (length < offset) {
424 *eof = 1; 430 *eof = 1;
425 return 0; 431 return 0;
426 } else { 432 } else {
@@ -428,7 +434,7 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
428 } 434 }
429 if (length > count) 435 if (length > count)
430 length = count; 436 length = count;
431 437
432 return length; 438 return length;
433} 439}
434#endif 440#endif
@@ -547,11 +553,11 @@ cifs_proc_clean(void)
547 remove_proc_entry("MultiuserMount", proc_fs_cifs); 553 remove_proc_entry("MultiuserMount", proc_fs_cifs);
548 remove_proc_entry("OplockEnabled", proc_fs_cifs); 554 remove_proc_entry("OplockEnabled", proc_fs_cifs);
549/* remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */ 555/* remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */
550 remove_proc_entry("SecurityFlags",proc_fs_cifs); 556 remove_proc_entry("SecurityFlags", proc_fs_cifs);
551/* remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); */ 557/* remove_proc_entry("PacketSigningEnabled", proc_fs_cifs); */
552 remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs); 558 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
553 remove_proc_entry("Experimental",proc_fs_cifs); 559 remove_proc_entry("Experimental", proc_fs_cifs);
554 remove_proc_entry("LookupCacheEnabled",proc_fs_cifs); 560 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
555 remove_proc_entry("cifs", proc_root_fs); 561 remove_proc_entry("cifs", proc_root_fs);
556} 562}
557 563
@@ -590,7 +596,7 @@ cifsFYI_write(struct file *file, const char __user *buffer,
590 cifsFYI = 0; 596 cifsFYI = 0;
591 else if (c == '1' || c == 'y' || c == 'Y') 597 else if (c == '1' || c == 'y' || c == 'Y')
592 cifsFYI = 1; 598 cifsFYI = 1;
593 else if((c > '1') && (c <= '9')) 599 else if ((c > '1') && (c <= '9'))
594 cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */ 600 cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */
595 601
596 return count; 602 return count;
@@ -637,28 +643,28 @@ oplockEnabled_write(struct file *file, const char __user *buffer,
637 643
638static int 644static int
639experimEnabled_read(char *page, char **start, off_t off, 645experimEnabled_read(char *page, char **start, off_t off,
640 int count, int *eof, void *data) 646 int count, int *eof, void *data)
641{ 647{
642 int len; 648 int len;
643 649
644 len = sprintf(page, "%d\n", experimEnabled); 650 len = sprintf(page, "%d\n", experimEnabled);
645 651
646 len -= off; 652 len -= off;
647 *start = page + off; 653 *start = page + off;
648 654
649 if (len > count) 655 if (len > count)
650 len = count; 656 len = count;
651 else 657 else
652 *eof = 1; 658 *eof = 1;
653 659
654 if (len < 0) 660 if (len < 0)
655 len = 0; 661 len = 0;
656 662
657 return len; 663 return len;
658} 664}
659static int 665static int
660experimEnabled_write(struct file *file, const char __user *buffer, 666experimEnabled_write(struct file *file, const char __user *buffer,
661 unsigned long count, void *data) 667 unsigned long count, void *data)
662{ 668{
663 char c; 669 char c;
664 int rc; 670 int rc;
@@ -678,46 +684,46 @@ experimEnabled_write(struct file *file, const char __user *buffer,
678 684
679static int 685static int
680linuxExtensionsEnabled_read(char *page, char **start, off_t off, 686linuxExtensionsEnabled_read(char *page, char **start, off_t off,
681 int count, int *eof, void *data) 687 int count, int *eof, void *data)
682{ 688{
683 int len; 689 int len;
684 690
685 len = sprintf(page, "%d\n", linuxExtEnabled); 691 len = sprintf(page, "%d\n", linuxExtEnabled);
686 len -= off; 692 len -= off;
687 *start = page + off; 693 *start = page + off;
688 694
689 if (len > count) 695 if (len > count)
690 len = count; 696 len = count;
691 else 697 else
692 *eof = 1; 698 *eof = 1;
693 699
694 if (len < 0) 700 if (len < 0)
695 len = 0; 701 len = 0;
696 702
697 return len; 703 return len;
698} 704}
699static int 705static int
700linuxExtensionsEnabled_write(struct file *file, const char __user *buffer, 706linuxExtensionsEnabled_write(struct file *file, const char __user *buffer,
701 unsigned long count, void *data) 707 unsigned long count, void *data)
702{ 708{
703 char c; 709 char c;
704 int rc; 710 int rc;
705 711
706 rc = get_user(c, buffer); 712 rc = get_user(c, buffer);
707 if (rc) 713 if (rc)
708 return rc; 714 return rc;
709 if (c == '0' || c == 'n' || c == 'N') 715 if (c == '0' || c == 'n' || c == 'N')
710 linuxExtEnabled = 0; 716 linuxExtEnabled = 0;
711 else if (c == '1' || c == 'y' || c == 'Y') 717 else if (c == '1' || c == 'y' || c == 'Y')
712 linuxExtEnabled = 1; 718 linuxExtEnabled = 1;
713 719
714 return count; 720 return count;
715} 721}
716 722
717 723
718static int 724static int
719lookupFlag_read(char *page, char **start, off_t off, 725lookupFlag_read(char *page, char **start, off_t off,
720 int count, int *eof, void *data) 726 int count, int *eof, void *data)
721{ 727{
722 int len; 728 int len;
723 729
@@ -860,15 +866,15 @@ security_flags_write(struct file *file, const char __user *buffer,
860 char flags_string[12]; 866 char flags_string[12];
861 char c; 867 char c;
862 868
863 if((count < 1) || (count > 11)) 869 if ((count < 1) || (count > 11))
864 return -EINVAL; 870 return -EINVAL;
865 871
866 memset(flags_string, 0, 12); 872 memset(flags_string, 0, 12);
867 873
868 if(copy_from_user(flags_string, buffer, count)) 874 if (copy_from_user(flags_string, buffer, count))
869 return -EFAULT; 875 return -EFAULT;
870 876
871 if(count < 3) { 877 if (count < 3) {
872 /* single char or single char followed by null */ 878 /* single char or single char followed by null */
873 c = flags_string[0]; 879 c = flags_string[0];
874 if (c == '0' || c == 'n' || c == 'N') 880 if (c == '0' || c == 'n' || c == 'N')
@@ -881,15 +887,15 @@ security_flags_write(struct file *file, const char __user *buffer,
881 887
882 flags = simple_strtoul(flags_string, NULL, 0); 888 flags = simple_strtoul(flags_string, NULL, 0);
883 889
884 cFYI(1,("sec flags 0x%x", flags)); 890 cFYI(1, ("sec flags 0x%x", flags));
885 891
886 if(flags <= 0) { 892 if (flags <= 0) {
887 cERROR(1,("invalid security flags %s",flags_string)); 893 cERROR(1, ("invalid security flags %s", flags_string));
888 return -EINVAL; 894 return -EINVAL;
889 } 895 }
890 896
891 if(flags & ~CIFSSEC_MASK) { 897 if (flags & ~CIFSSEC_MASK) {
892 cERROR(1,("attempt to set unsupported security flags 0x%x", 898 cERROR(1, ("attempt to set unsupported security flags 0x%x",
893 flags & ~CIFSSEC_MASK)); 899 flags & ~CIFSSEC_MASK));
894 return -EINVAL; 900 return -EINVAL;
895 } 901 }
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 793c4b95c164..701e9a9185f2 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -6,16 +6,16 @@
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or 9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version. 10 * (at your option) any later version.
11 * 11 *
12 * This program is distributed in the hope that it will be useful, 12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU General Public License for more details. 15 * the GNU General Public License for more details.
16 * 16 *
17 * You should have received a copy of the GNU General Public License 17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software 18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21#include <linux/fs.h> 21#include <linux/fs.h>
@@ -32,7 +32,7 @@
32 * 32 *
33 */ 33 */
34int 34int
35cifs_strfromUCS_le(char *to, const __le16 * from, 35cifs_strfromUCS_le(char *to, const __le16 * from,
36 int len, const struct nls_table *codepage) 36 int len, const struct nls_table *codepage)
37{ 37{
38 int i; 38 int i;
@@ -66,7 +66,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len,
66{ 66{
67 int charlen; 67 int charlen;
68 int i; 68 int i;
69 wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */ 69 wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */
70 70
71 for (i = 0; len && *from; i++, from += charlen, len -= charlen) { 71 for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
72 72
@@ -79,7 +79,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len,
79 /* A question mark */ 79 /* A question mark */
80 to[i] = cpu_to_le16(0x003f); 80 to[i] = cpu_to_le16(0x003f);
81 charlen = 1; 81 charlen = 1;
82 } else 82 } else
83 to[i] = cpu_to_le16(wchar_to[i]); 83 to[i] = cpu_to_le16(wchar_to[i]);
84 84
85 } 85 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d38c69b591cf..8b0cbf4a4ad0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -616,7 +616,7 @@ const struct file_operations cifs_file_ops = {
616 .fsync = cifs_fsync, 616 .fsync = cifs_fsync,
617 .flush = cifs_flush, 617 .flush = cifs_flush,
618 .mmap = cifs_file_mmap, 618 .mmap = cifs_file_mmap,
619 .sendfile = generic_file_sendfile, 619 .splice_read = generic_file_splice_read,
620 .llseek = cifs_llseek, 620 .llseek = cifs_llseek,
621#ifdef CONFIG_CIFS_POSIX 621#ifdef CONFIG_CIFS_POSIX
622 .ioctl = cifs_ioctl, 622 .ioctl = cifs_ioctl,
@@ -637,7 +637,7 @@ const struct file_operations cifs_file_direct_ops = {
637 .lock = cifs_lock, 637 .lock = cifs_lock,
638 .fsync = cifs_fsync, 638 .fsync = cifs_fsync,
639 .flush = cifs_flush, 639 .flush = cifs_flush,
640 .sendfile = generic_file_sendfile, /* BB removeme BB */ 640 .splice_read = generic_file_splice_read,
641#ifdef CONFIG_CIFS_POSIX 641#ifdef CONFIG_CIFS_POSIX
642 .ioctl = cifs_ioctl, 642 .ioctl = cifs_ioctl,
643#endif /* CONFIG_CIFS_POSIX */ 643#endif /* CONFIG_CIFS_POSIX */
@@ -656,7 +656,7 @@ const struct file_operations cifs_file_nobrl_ops = {
656 .fsync = cifs_fsync, 656 .fsync = cifs_fsync,
657 .flush = cifs_flush, 657 .flush = cifs_flush,
658 .mmap = cifs_file_mmap, 658 .mmap = cifs_file_mmap,
659 .sendfile = generic_file_sendfile, 659 .splice_read = generic_file_splice_read,
660 .llseek = cifs_llseek, 660 .llseek = cifs_llseek,
661#ifdef CONFIG_CIFS_POSIX 661#ifdef CONFIG_CIFS_POSIX
662 .ioctl = cifs_ioctl, 662 .ioctl = cifs_ioctl,
@@ -676,7 +676,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
676 .release = cifs_close, 676 .release = cifs_close,
677 .fsync = cifs_fsync, 677 .fsync = cifs_fsync,
678 .flush = cifs_flush, 678 .flush = cifs_flush,
679 .sendfile = generic_file_sendfile, /* BB removeme BB */ 679 .splice_read = generic_file_splice_read,
680#ifdef CONFIG_CIFS_POSIX 680#ifdef CONFIG_CIFS_POSIX
681 .ioctl = cifs_ioctl, 681 .ioctl = cifs_ioctl,
682#endif /* CONFIG_CIFS_POSIX */ 682#endif /* CONFIG_CIFS_POSIX */
@@ -825,8 +825,8 @@ cifs_init_mids(void)
825 sizeof (struct oplock_q_entry), 0, 825 sizeof (struct oplock_q_entry), 0,
826 SLAB_HWCACHE_ALIGN, NULL, NULL); 826 SLAB_HWCACHE_ALIGN, NULL, NULL);
827 if (cifs_oplock_cachep == NULL) { 827 if (cifs_oplock_cachep == NULL) {
828 kmem_cache_destroy(cifs_mid_cachep);
829 mempool_destroy(cifs_mid_poolp); 828 mempool_destroy(cifs_mid_poolp);
829 kmem_cache_destroy(cifs_mid_cachep);
830 return -ENOMEM; 830 return -ENOMEM;
831 } 831 }
832 832
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 14de58fa1437..57419a176688 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -433,8 +433,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
433 cFYI(1,("secFlags 0x%x",secFlags)); 433 cFYI(1,("secFlags 0x%x",secFlags));
434 434
435 pSMB->hdr.Mid = GetNextMid(server); 435 pSMB->hdr.Mid = GetNextMid(server);
436 pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; 436 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
437 if((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) 437 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
438 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 438 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
439 439
440 count = 0; 440 count = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 216fb625843f..f4e92661b223 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2069,8 +2069,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2069 srvTcp->tcpStatus = CifsExiting; 2069 srvTcp->tcpStatus = CifsExiting;
2070 spin_unlock(&GlobalMid_Lock); 2070 spin_unlock(&GlobalMid_Lock);
2071 if (srvTcp->tsk) { 2071 if (srvTcp->tsk) {
2072 struct task_struct *tsk;
2073 /* If we could verify that kthread_stop would
2074 always wake up processes blocked in
2075 tcp in recv_mesg then we could remove the
2076 send_sig call */
2072 send_sig(SIGKILL,srvTcp->tsk,1); 2077 send_sig(SIGKILL,srvTcp->tsk,1);
2073 kthread_stop(srvTcp->tsk); 2078 tsk = srvTcp->tsk;
2079 if(tsk)
2080 kthread_stop(tsk);
2074 } 2081 }
2075 } 2082 }
2076 /* If find_unc succeeded then rc == 0 so we can not end */ 2083 /* If find_unc succeeded then rc == 0 so we can not end */
@@ -2085,8 +2092,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2085 /* if the socketUseCount is now zero */ 2092 /* if the socketUseCount is now zero */
2086 if ((temp_rc == -ESHUTDOWN) && 2093 if ((temp_rc == -ESHUTDOWN) &&
2087 (pSesInfo->server) && (pSesInfo->server->tsk)) { 2094 (pSesInfo->server) && (pSesInfo->server->tsk)) {
2095 struct task_struct *tsk;
2088 send_sig(SIGKILL,pSesInfo->server->tsk,1); 2096 send_sig(SIGKILL,pSesInfo->server->tsk,1);
2089 kthread_stop(pSesInfo->server->tsk); 2097 tsk = pSesInfo->server->tsk;
2098 if (tsk)
2099 kthread_stop(tsk);
2090 } 2100 }
2091 } else 2101 } else
2092 cFYI(1, ("No session or bad tcon")); 2102 cFYI(1, ("No session or bad tcon"));
@@ -3334,7 +3344,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3334 return 0; 3344 return 0;
3335 } else if (rc == -ESHUTDOWN) { 3345 } else if (rc == -ESHUTDOWN) {
3336 cFYI(1,("Waking up socket by sending it signal")); 3346 cFYI(1,("Waking up socket by sending it signal"));
3337 if(cifsd_task) { 3347 if (cifsd_task) {
3338 send_sig(SIGKILL,cifsd_task,1); 3348 send_sig(SIGKILL,cifsd_task,1);
3339 kthread_stop(cifsd_task); 3349 kthread_stop(cifsd_task);
3340 } 3350 }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e5210519ac4b..8e86aaceb68a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -2,7 +2,7 @@
2 * fs/cifs/dir.c 2 * fs/cifs/dir.c
3 * 3 *
4 * vfs operations that deal with dentries 4 * vfs operations that deal with dentries
5 * 5 *
6 * Copyright (C) International Business Machines Corp., 2002,2005 6 * Copyright (C) International Business Machines Corp., 2002,2005
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * 8 *
@@ -34,11 +34,12 @@
34static void 34static void
35renew_parental_timestamps(struct dentry *direntry) 35renew_parental_timestamps(struct dentry *direntry)
36{ 36{
37 /* BB check if there is a way to get the kernel to do this or if we really need this */ 37 /* BB check if there is a way to get the kernel to do this or if we
38 really need this */
38 do { 39 do {
39 direntry->d_time = jiffies; 40 direntry->d_time = jiffies;
40 direntry = direntry->d_parent; 41 direntry = direntry->d_parent;
41 } while (!IS_ROOT(direntry)); 42 } while (!IS_ROOT(direntry));
42} 43}
43 44
44/* Note: caller must free return buffer */ 45/* Note: caller must free return buffer */
@@ -51,7 +52,7 @@ build_path_from_dentry(struct dentry *direntry)
51 char *full_path; 52 char *full_path;
52 char dirsep; 53 char dirsep;
53 54
54 if(direntry == NULL) 55 if (direntry == NULL)
55 return NULL; /* not much we can do if dentry is freed and 56 return NULL; /* not much we can do if dentry is freed and
56 we need to reopen the file after it was closed implicitly 57 we need to reopen the file after it was closed implicitly
57 when the server crashed */ 58 when the server crashed */
@@ -59,18 +60,18 @@ build_path_from_dentry(struct dentry *direntry)
59 dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb)); 60 dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
60 pplen = CIFS_SB(direntry->d_sb)->prepathlen; 61 pplen = CIFS_SB(direntry->d_sb)->prepathlen;
61cifs_bp_rename_retry: 62cifs_bp_rename_retry:
62 namelen = pplen; 63 namelen = pplen;
63 for (temp = direntry; !IS_ROOT(temp);) { 64 for (temp = direntry; !IS_ROOT(temp);) {
64 namelen += (1 + temp->d_name.len); 65 namelen += (1 + temp->d_name.len);
65 temp = temp->d_parent; 66 temp = temp->d_parent;
66 if(temp == NULL) { 67 if (temp == NULL) {
67 cERROR(1,("corrupt dentry")); 68 cERROR(1, ("corrupt dentry"));
68 return NULL; 69 return NULL;
69 } 70 }
70 } 71 }
71 72
72 full_path = kmalloc(namelen+1, GFP_KERNEL); 73 full_path = kmalloc(namelen+1, GFP_KERNEL);
73 if(full_path == NULL) 74 if (full_path == NULL)
74 return full_path; 75 return full_path;
75 full_path[namelen] = 0; /* trailing null */ 76 full_path[namelen] = 0; /* trailing null */
76 for (temp = direntry; !IS_ROOT(temp);) { 77 for (temp = direntry; !IS_ROOT(temp);) {
@@ -84,8 +85,8 @@ cifs_bp_rename_retry:
84 cFYI(0, ("name: %s", full_path + namelen)); 85 cFYI(0, ("name: %s", full_path + namelen));
85 } 86 }
86 temp = temp->d_parent; 87 temp = temp->d_parent;
87 if(temp == NULL) { 88 if (temp == NULL) {
88 cERROR(1,("corrupt dentry")); 89 cERROR(1, ("corrupt dentry"));
89 kfree(full_path); 90 kfree(full_path);
90 return NULL; 91 return NULL;
91 } 92 }
@@ -94,7 +95,7 @@ cifs_bp_rename_retry:
94 cERROR(1, 95 cERROR(1,
95 ("did not end path lookup where expected namelen is %d", 96 ("did not end path lookup where expected namelen is %d",
96 namelen)); 97 namelen));
97 /* presumably this is only possible if racing with a rename 98 /* presumably this is only possible if racing with a rename
98 of one of the parent directories (we can not lock the dentries 99 of one of the parent directories (we can not lock the dentries
99 above us to prevent this, but retrying should be harmless) */ 100 above us to prevent this, but retrying should be harmless) */
100 kfree(full_path); 101 kfree(full_path);
@@ -106,7 +107,7 @@ cifs_bp_rename_retry:
106 since the '\' is a valid posix character so we can not switch 107 since the '\' is a valid posix character so we can not switch
107 those safely to '/' if any are found in the middle of the prepath */ 108 those safely to '/' if any are found in the middle of the prepath */
108 /* BB test paths to Windows with '/' in the midst of prepath */ 109 /* BB test paths to Windows with '/' in the midst of prepath */
109 strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen); 110 strncpy(full_path, CIFS_SB(direntry->d_sb)->prepath, pplen);
110 return full_path; 111 return full_path;
111} 112}
112 113
@@ -147,12 +148,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
147 pTcon = cifs_sb->tcon; 148 pTcon = cifs_sb->tcon;
148 149
149 full_path = build_path_from_dentry(direntry); 150 full_path = build_path_from_dentry(direntry);
150 if(full_path == NULL) { 151 if (full_path == NULL) {
151 FreeXid(xid); 152 FreeXid(xid);
152 return -ENOMEM; 153 return -ENOMEM;
153 } 154 }
154 155
155 if(nd && (nd->flags & LOOKUP_OPEN)) { 156 if (nd && (nd->flags & LOOKUP_OPEN)) {
156 int oflags = nd->intent.open.flags; 157 int oflags = nd->intent.open.flags;
157 158
158 desiredAccess = 0; 159 desiredAccess = 0;
@@ -164,28 +165,29 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
164 write_only = TRUE; 165 write_only = TRUE;
165 } 166 }
166 167
167 if((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 168 if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
168 disposition = FILE_CREATE; 169 disposition = FILE_CREATE;
169 else if((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 170 else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
170 disposition = FILE_OVERWRITE_IF; 171 disposition = FILE_OVERWRITE_IF;
171 else if((oflags & O_CREAT) == O_CREAT) 172 else if ((oflags & O_CREAT) == O_CREAT)
172 disposition = FILE_OPEN_IF; 173 disposition = FILE_OPEN_IF;
173 else { 174 else {
174 cFYI(1,("Create flag not set in create function")); 175 cFYI(1, ("Create flag not set in create function"));
175 } 176 }
176 } 177 }
177 178
178 /* BB add processing to set equivalent of mode - e.g. via CreateX with ACLs */ 179 /* BB add processing to set equivalent of mode - e.g. via CreateX with
180 ACLs */
179 if (oplockEnabled) 181 if (oplockEnabled)
180 oplock = REQ_OPLOCK; 182 oplock = REQ_OPLOCK;
181 183
182 buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL); 184 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
183 if(buf == NULL) { 185 if (buf == NULL) {
184 kfree(full_path); 186 kfree(full_path);
185 FreeXid(xid); 187 FreeXid(xid);
186 return -ENOMEM; 188 return -ENOMEM;
187 } 189 }
188 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) 190 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
189 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, 191 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
190 desiredAccess, CREATE_NOT_DIR, 192 desiredAccess, CREATE_NOT_DIR,
191 &fileHandle, &oplock, buf, cifs_sb->local_nls, 193 &fileHandle, &oplock, buf, cifs_sb->local_nls,
@@ -193,27 +195,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
193 else 195 else
194 rc = -EIO; /* no NT SMB support fall into legacy open below */ 196 rc = -EIO; /* no NT SMB support fall into legacy open below */
195 197
196 if(rc == -EIO) { 198 if (rc == -EIO) {
197 /* old server, retry the open legacy style */ 199 /* old server, retry the open legacy style */
198 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, 200 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
199 desiredAccess, CREATE_NOT_DIR, 201 desiredAccess, CREATE_NOT_DIR,
200 &fileHandle, &oplock, buf, cifs_sb->local_nls, 202 &fileHandle, &oplock, buf, cifs_sb->local_nls,
201 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 203 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
202 } 204 }
203 if (rc) { 205 if (rc) {
204 cFYI(1, ("cifs_create returned 0x%x", rc)); 206 cFYI(1, ("cifs_create returned 0x%x", rc));
205 } else { 207 } else {
206 /* If Open reported that we actually created a file 208 /* If Open reported that we actually created a file
207 then we now have to set the mode if possible */ 209 then we now have to set the mode if possible */
208 if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX) && 210 if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX) &&
209 (oplock & CIFS_CREATE_ACTION)) 211 (oplock & CIFS_CREATE_ACTION)) {
210 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 212 mode &= ~current->fs->umask;
213 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
211 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, 214 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
212 (__u64)current->fsuid, 215 (__u64)current->fsuid,
213 (__u64)current->fsgid, 216 (__u64)current->fsgid,
214 0 /* dev */, 217 0 /* dev */,
215 cifs_sb->local_nls, 218 cifs_sb->local_nls,
216 cifs_sb->mnt_cifs_flags & 219 cifs_sb->mnt_cifs_flags &
217 CIFS_MOUNT_MAP_SPECIAL_CHR); 220 CIFS_MOUNT_MAP_SPECIAL_CHR);
218 } else { 221 } else {
219 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, 222 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
@@ -221,26 +224,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
221 (__u64)-1, 224 (__u64)-1,
222 0 /* dev */, 225 0 /* dev */,
223 cifs_sb->local_nls, 226 cifs_sb->local_nls,
224 cifs_sb->mnt_cifs_flags & 227 cifs_sb->mnt_cifs_flags &
225 CIFS_MOUNT_MAP_SPECIAL_CHR); 228 CIFS_MOUNT_MAP_SPECIAL_CHR);
226 } 229 }
227 else { 230 } else {
228 /* BB implement mode setting via Windows security descriptors */ 231 /* BB implement mode setting via Windows security
229 /* eg CIFSSMBWinSetPerms(xid,pTcon,full_path,mode,-1,-1,local_nls);*/ 232 descriptors e.g. */
230 /* could set r/o dos attribute if mode & 0222 == 0 */ 233 /* CIFSSMBWinSetPerms(xid,pTcon,path,mode,-1,-1,nls);*/
234
235 /* Could set r/o dos attribute if mode & 0222 == 0 */
231 } 236 }
232 237
233 /* BB server might mask mode so we have to query for Unix case*/ 238 /* BB server might mask mode so we have to query for Unix case*/
234 if (pTcon->ses->capabilities & CAP_UNIX) 239 if (pTcon->ses->capabilities & CAP_UNIX)
235 rc = cifs_get_inode_info_unix(&newinode, full_path, 240 rc = cifs_get_inode_info_unix(&newinode, full_path,
236 inode->i_sb,xid); 241 inode->i_sb, xid);
237 else { 242 else {
238 rc = cifs_get_inode_info(&newinode, full_path, 243 rc = cifs_get_inode_info(&newinode, full_path,
239 buf, inode->i_sb,xid); 244 buf, inode->i_sb, xid);
240 if(newinode) { 245 if (newinode) {
241 newinode->i_mode = mode; 246 newinode->i_mode = mode;
242 if((oplock & CIFS_CREATE_ACTION) && 247 if ((oplock & CIFS_CREATE_ACTION) &&
243 (cifs_sb->mnt_cifs_flags & 248 (cifs_sb->mnt_cifs_flags &
244 CIFS_MOUNT_SET_UID)) { 249 CIFS_MOUNT_SET_UID)) {
245 newinode->i_uid = current->fsuid; 250 newinode->i_uid = current->fsuid;
246 newinode->i_gid = current->fsgid; 251 newinode->i_gid = current->fsgid;
@@ -259,14 +264,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
259 direntry->d_op = &cifs_dentry_ops; 264 direntry->d_op = &cifs_dentry_ops;
260 d_instantiate(direntry, newinode); 265 d_instantiate(direntry, newinode);
261 } 266 }
262 if((nd->flags & LOOKUP_OPEN) == FALSE) { 267 if ((nd->flags & LOOKUP_OPEN) == FALSE) {
263 /* mknod case - do not leave file open */ 268 /* mknod case - do not leave file open */
264 CIFSSMBClose(xid, pTcon, fileHandle); 269 CIFSSMBClose(xid, pTcon, fileHandle);
265 } else if(newinode) { 270 } else if (newinode) {
266 pCifsFile = 271 pCifsFile =
267 kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL); 272 kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL);
268 273
269 if(pCifsFile == NULL) 274 if (pCifsFile == NULL)
270 goto cifs_create_out; 275 goto cifs_create_out;
271 pCifsFile->netfid = fileHandle; 276 pCifsFile->netfid = fileHandle;
272 pCifsFile->pid = current->tgid; 277 pCifsFile->pid = current->tgid;
@@ -276,33 +281,33 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
276 init_MUTEX(&pCifsFile->fh_sem); 281 init_MUTEX(&pCifsFile->fh_sem);
277 mutex_init(&pCifsFile->lock_mutex); 282 mutex_init(&pCifsFile->lock_mutex);
278 INIT_LIST_HEAD(&pCifsFile->llist); 283 INIT_LIST_HEAD(&pCifsFile->llist);
279 atomic_set(&pCifsFile->wrtPending,0); 284 atomic_set(&pCifsFile->wrtPending, 0);
280 285
281 /* set the following in open now 286 /* set the following in open now
282 pCifsFile->pfile = file; */ 287 pCifsFile->pfile = file; */
283 write_lock(&GlobalSMBSeslock); 288 write_lock(&GlobalSMBSeslock);
284 list_add(&pCifsFile->tlist,&pTcon->openFileList); 289 list_add(&pCifsFile->tlist, &pTcon->openFileList);
285 pCifsInode = CIFS_I(newinode); 290 pCifsInode = CIFS_I(newinode);
286 if(pCifsInode) { 291 if (pCifsInode) {
287 /* if readable file instance put first in list*/ 292 /* if readable file instance put first in list*/
288 if (write_only == TRUE) { 293 if (write_only == TRUE) {
289 list_add_tail(&pCifsFile->flist, 294 list_add_tail(&pCifsFile->flist,
290 &pCifsInode->openFileList); 295 &pCifsInode->openFileList);
291 } else { 296 } else {
292 list_add(&pCifsFile->flist, 297 list_add(&pCifsFile->flist,
293 &pCifsInode->openFileList); 298 &pCifsInode->openFileList);
294 } 299 }
295 if((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 300 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
296 pCifsInode->clientCanCacheAll = TRUE; 301 pCifsInode->clientCanCacheAll = TRUE;
297 pCifsInode->clientCanCacheRead = TRUE; 302 pCifsInode->clientCanCacheRead = TRUE;
298 cFYI(1,("Exclusive Oplock for inode %p", 303 cFYI(1, ("Exclusive Oplock inode %p",
299 newinode)); 304 newinode));
300 } else if((oplock & 0xF) == OPLOCK_READ) 305 } else if ((oplock & 0xF) == OPLOCK_READ)
301 pCifsInode->clientCanCacheRead = TRUE; 306 pCifsInode->clientCanCacheRead = TRUE;
302 } 307 }
303 write_unlock(&GlobalSMBSeslock); 308 write_unlock(&GlobalSMBSeslock);
304 } 309 }
305 } 310 }
306cifs_create_out: 311cifs_create_out:
307 kfree(buf); 312 kfree(buf);
308 kfree(full_path); 313 kfree(full_path);
@@ -310,8 +315,8 @@ cifs_create_out:
310 return rc; 315 return rc;
311} 316}
312 317
313int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, 318int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
314 dev_t device_number) 319 dev_t device_number)
315{ 320{
316 int rc = -EPERM; 321 int rc = -EPERM;
317 int xid; 322 int xid;
@@ -329,43 +334,45 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
329 pTcon = cifs_sb->tcon; 334 pTcon = cifs_sb->tcon;
330 335
331 full_path = build_path_from_dentry(direntry); 336 full_path = build_path_from_dentry(direntry);
332 if(full_path == NULL) 337 if (full_path == NULL)
333 rc = -ENOMEM; 338 rc = -ENOMEM;
334 else if (pTcon->ses->capabilities & CAP_UNIX) { 339 else if (pTcon->ses->capabilities & CAP_UNIX) {
335 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 340 mode &= ~current->fs->umask;
341 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
336 rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, 342 rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path,
337 mode,(__u64)current->fsuid,(__u64)current->fsgid, 343 mode, (__u64)current->fsuid,
344 (__u64)current->fsgid,
338 device_number, cifs_sb->local_nls, 345 device_number, cifs_sb->local_nls,
339 cifs_sb->mnt_cifs_flags & 346 cifs_sb->mnt_cifs_flags &
340 CIFS_MOUNT_MAP_SPECIAL_CHR); 347 CIFS_MOUNT_MAP_SPECIAL_CHR);
341 } else { 348 } else {
342 rc = CIFSSMBUnixSetPerms(xid, pTcon, 349 rc = CIFSSMBUnixSetPerms(xid, pTcon,
343 full_path, mode, (__u64)-1, (__u64)-1, 350 full_path, mode, (__u64)-1, (__u64)-1,
344 device_number, cifs_sb->local_nls, 351 device_number, cifs_sb->local_nls,
345 cifs_sb->mnt_cifs_flags & 352 cifs_sb->mnt_cifs_flags &
346 CIFS_MOUNT_MAP_SPECIAL_CHR); 353 CIFS_MOUNT_MAP_SPECIAL_CHR);
347 } 354 }
348 355
349 if(!rc) { 356 if (!rc) {
350 rc = cifs_get_inode_info_unix(&newinode, full_path, 357 rc = cifs_get_inode_info_unix(&newinode, full_path,
351 inode->i_sb,xid); 358 inode->i_sb, xid);
352 if (pTcon->nocase) 359 if (pTcon->nocase)
353 direntry->d_op = &cifs_ci_dentry_ops; 360 direntry->d_op = &cifs_ci_dentry_ops;
354 else 361 else
355 direntry->d_op = &cifs_dentry_ops; 362 direntry->d_op = &cifs_dentry_ops;
356 if(rc == 0) 363 if (rc == 0)
357 d_instantiate(direntry, newinode); 364 d_instantiate(direntry, newinode);
358 } 365 }
359 } else { 366 } else {
360 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { 367 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
361 int oplock = 0; 368 int oplock = 0;
362 u16 fileHandle; 369 u16 fileHandle;
363 FILE_ALL_INFO * buf; 370 FILE_ALL_INFO * buf;
364 371
365 cFYI(1,("sfu compat create special file")); 372 cFYI(1, ("sfu compat create special file"));
366 373
367 buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL); 374 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
368 if(buf == NULL) { 375 if (buf == NULL) {
369 kfree(full_path); 376 kfree(full_path);
370 FreeXid(xid); 377 FreeXid(xid);
371 return -ENOMEM; 378 return -ENOMEM;
@@ -373,39 +380,38 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
373 380
374 rc = CIFSSMBOpen(xid, pTcon, full_path, 381 rc = CIFSSMBOpen(xid, pTcon, full_path,
375 FILE_CREATE, /* fail if exists */ 382 FILE_CREATE, /* fail if exists */
376 GENERIC_WRITE /* BB would 383 GENERIC_WRITE /* BB would
377 WRITE_OWNER | WRITE_DAC be better? */, 384 WRITE_OWNER | WRITE_DAC be better? */,
378 /* Create a file and set the 385 /* Create a file and set the
379 file attribute to SYSTEM */ 386 file attribute to SYSTEM */
380 CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, 387 CREATE_NOT_DIR | CREATE_OPTION_SPECIAL,
381 &fileHandle, &oplock, buf, 388 &fileHandle, &oplock, buf,
382 cifs_sb->local_nls, 389 cifs_sb->local_nls,
383 cifs_sb->mnt_cifs_flags & 390 cifs_sb->mnt_cifs_flags &
384 CIFS_MOUNT_MAP_SPECIAL_CHR); 391 CIFS_MOUNT_MAP_SPECIAL_CHR);
385 392
386 /* BB FIXME - add handling for backlevel servers 393 /* BB FIXME - add handling for backlevel servers
387 which need legacy open and check for all 394 which need legacy open and check for all
388 calls to SMBOpen for fallback to 395 calls to SMBOpen for fallback to SMBLeagcyOpen */
389 SMBLeagcyOpen */ 396 if (!rc) {
390 if(!rc) {
391 /* BB Do not bother to decode buf since no 397 /* BB Do not bother to decode buf since no
392 local inode yet to put timestamps in, 398 local inode yet to put timestamps in,
393 but we can reuse it safely */ 399 but we can reuse it safely */
394 int bytes_written; 400 int bytes_written;
395 struct win_dev *pdev; 401 struct win_dev *pdev;
396 pdev = (struct win_dev *)buf; 402 pdev = (struct win_dev *)buf;
397 if(S_ISCHR(mode)) { 403 if (S_ISCHR(mode)) {
398 memcpy(pdev->type, "IntxCHR", 8); 404 memcpy(pdev->type, "IntxCHR", 8);
399 pdev->major = 405 pdev->major =
400 cpu_to_le64(MAJOR(device_number)); 406 cpu_to_le64(MAJOR(device_number));
401 pdev->minor = 407 pdev->minor =
402 cpu_to_le64(MINOR(device_number)); 408 cpu_to_le64(MINOR(device_number));
403 rc = CIFSSMBWrite(xid, pTcon, 409 rc = CIFSSMBWrite(xid, pTcon,
404 fileHandle, 410 fileHandle,
405 sizeof(struct win_dev), 411 sizeof(struct win_dev),
406 0, &bytes_written, (char *)pdev, 412 0, &bytes_written, (char *)pdev,
407 NULL, 0); 413 NULL, 0);
408 } else if(S_ISBLK(mode)) { 414 } else if (S_ISBLK(mode)) {
409 memcpy(pdev->type, "IntxBLK", 8); 415 memcpy(pdev->type, "IntxBLK", 8);
410 pdev->major = 416 pdev->major =
411 cpu_to_le64(MAJOR(device_number)); 417 cpu_to_le64(MAJOR(device_number));
@@ -432,7 +438,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
432 438
433 439
434struct dentry * 440struct dentry *
435cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct nameidata *nd) 441cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
442 struct nameidata *nd)
436{ 443{
437 int xid; 444 int xid;
438 int rc = 0; /* to get around spurious gcc warning, set to zero here */ 445 int rc = 0; /* to get around spurious gcc warning, set to zero here */
@@ -447,8 +454,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
447 (" parent inode = 0x%p name is: %s and dentry = 0x%p", 454 (" parent inode = 0x%p name is: %s and dentry = 0x%p",
448 parent_dir_inode, direntry->d_name.name, direntry)); 455 parent_dir_inode, direntry->d_name.name, direntry));
449 456
450 /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */
451
452 /* check whether path exists */ 457 /* check whether path exists */
453 458
454 cifs_sb = CIFS_SB(parent_dir_inode->i_sb); 459 cifs_sb = CIFS_SB(parent_dir_inode->i_sb);
@@ -472,7 +477,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
472 deadlock in the cases (beginning of sys_rename itself) 477 deadlock in the cases (beginning of sys_rename itself)
473 in which we already have the sb rename sem */ 478 in which we already have the sb rename sem */
474 full_path = build_path_from_dentry(direntry); 479 full_path = build_path_from_dentry(direntry);
475 if(full_path == NULL) { 480 if (full_path == NULL) {
476 FreeXid(xid); 481 FreeXid(xid);
477 return ERR_PTR(-ENOMEM); 482 return ERR_PTR(-ENOMEM);
478 } 483 }
@@ -487,10 +492,10 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
487 492
488 if (pTcon->ses->capabilities & CAP_UNIX) 493 if (pTcon->ses->capabilities & CAP_UNIX)
489 rc = cifs_get_inode_info_unix(&newInode, full_path, 494 rc = cifs_get_inode_info_unix(&newInode, full_path,
490 parent_dir_inode->i_sb,xid); 495 parent_dir_inode->i_sb, xid);
491 else 496 else
492 rc = cifs_get_inode_info(&newInode, full_path, NULL, 497 rc = cifs_get_inode_info(&newInode, full_path, NULL,
493 parent_dir_inode->i_sb,xid); 498 parent_dir_inode->i_sb, xid);
494 499
495 if ((rc == 0) && (newInode != NULL)) { 500 if ((rc == 0) && (newInode != NULL)) {
496 if (pTcon->nocase) 501 if (pTcon->nocase)
@@ -499,7 +504,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
499 direntry->d_op = &cifs_dentry_ops; 504 direntry->d_op = &cifs_dentry_ops;
500 d_add(direntry, newInode); 505 d_add(direntry, newInode);
501 506
502 /* since paths are not looked up by component - the parent 507 /* since paths are not looked up by component - the parent
503 directories are presumed to be good here */ 508 directories are presumed to be good here */
504 renew_parental_timestamps(direntry); 509 renew_parental_timestamps(direntry);
505 510
@@ -511,13 +516,13 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
511 else 516 else
512 direntry->d_op = &cifs_dentry_ops; 517 direntry->d_op = &cifs_dentry_ops;
513 d_add(direntry, NULL); 518 d_add(direntry, NULL);
514 /* if it was once a directory (but how can we tell?) we could do 519 /* if it was once a directory (but how can we tell?) we could do
515 shrink_dcache_parent(direntry); */ 520 shrink_dcache_parent(direntry); */
516 } else { 521 } else {
517 cERROR(1,("Error 0x%x on cifs_get_inode_info in lookup of %s", 522 cERROR(1, ("Error 0x%x on cifs_get_inode_info in lookup of %s",
518 rc,full_path)); 523 rc, full_path));
519 /* BB special case check for Access Denied - watch security 524 /* BB special case check for Access Denied - watch security
520 exposure of returning dir info implicitly via different rc 525 exposure of returning dir info implicitly via different rc
521 if file exists or not but no access BB */ 526 if file exists or not but no access BB */
522 } 527 }
523 528
@@ -538,11 +543,11 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
538 } else { 543 } else {
539 cFYI(1, ("neg dentry 0x%p name = %s", 544 cFYI(1, ("neg dentry 0x%p name = %s",
540 direntry, direntry->d_name.name)); 545 direntry, direntry->d_name.name));
541 if(time_after(jiffies, direntry->d_time + HZ) || 546 if (time_after(jiffies, direntry->d_time + HZ) ||
542 !lookupCacheEnabled) { 547 !lookupCacheEnabled) {
543 d_drop(direntry); 548 d_drop(direntry);
544 isValid = 0; 549 isValid = 0;
545 } 550 }
546 } 551 }
547 552
548 return isValid; 553 return isValid;
@@ -559,8 +564,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
559 564
560struct dentry_operations cifs_dentry_ops = { 565struct dentry_operations cifs_dentry_ops = {
561 .d_revalidate = cifs_d_revalidate, 566 .d_revalidate = cifs_d_revalidate,
562/* d_delete: cifs_d_delete, *//* not needed except for debugging */ 567/* d_delete: cifs_d_delete, */ /* not needed except for debugging */
563 /* no need for d_hash, d_compare, d_release, d_iput ... yet. BB confirm this BB */
564}; 568};
565 569
566static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) 570static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index da12b482ebe5..8e375bb4b379 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -2,7 +2,7 @@
2 * fs/cifs/fcntl.c 2 * fs/cifs/fcntl.c
3 * 3 *
4 * vfs operations that deal with the file control API 4 * vfs operations that deal with the file control API
5 * 5 *
6 * Copyright (C) International Business Machines Corp., 2003,2004 6 * Copyright (C) International Business Machines Corp., 2003,2004
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * 8 *
@@ -35,35 +35,34 @@ static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
35 35
36 /* No way on Linux VFS to ask to monitor xattr 36 /* No way on Linux VFS to ask to monitor xattr
37 changes (and no stream support either */ 37 changes (and no stream support either */
38 if(fcntl_notify_flags & DN_ACCESS) { 38 if (fcntl_notify_flags & DN_ACCESS) {
39 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS; 39 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
40 } 40 }
41 if(fcntl_notify_flags & DN_MODIFY) { 41 if (fcntl_notify_flags & DN_MODIFY) {
42 /* What does this mean on directories? */ 42 /* What does this mean on directories? */
43 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE | 43 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE |
44 FILE_NOTIFY_CHANGE_SIZE; 44 FILE_NOTIFY_CHANGE_SIZE;
45 } 45 }
46 if(fcntl_notify_flags & DN_CREATE) { 46 if (fcntl_notify_flags & DN_CREATE) {
47 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | 47 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION |
48 FILE_NOTIFY_CHANGE_LAST_WRITE; 48 FILE_NOTIFY_CHANGE_LAST_WRITE;
49 } 49 }
50 if(fcntl_notify_flags & DN_DELETE) { 50 if (fcntl_notify_flags & DN_DELETE) {
51 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE; 51 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE;
52 } 52 }
53 if(fcntl_notify_flags & DN_RENAME) { 53 if (fcntl_notify_flags & DN_RENAME) {
54 /* BB review this - checking various server behaviors */ 54 /* BB review this - checking various server behaviors */
55 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | 55 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME |
56 FILE_NOTIFY_CHANGE_FILE_NAME; 56 FILE_NOTIFY_CHANGE_FILE_NAME;
57 } 57 }
58 if(fcntl_notify_flags & DN_ATTRIB) { 58 if (fcntl_notify_flags & DN_ATTRIB) {
59 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | 59 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY |
60 FILE_NOTIFY_CHANGE_ATTRIBUTES; 60 FILE_NOTIFY_CHANGE_ATTRIBUTES;
61 } 61 }
62/* if(fcntl_notify_flags & DN_MULTISHOT) { 62/* if (fcntl_notify_flags & DN_MULTISHOT) {
63 cifs_ntfy_flags |= ; 63 cifs_ntfy_flags |= ;
64 } */ /* BB fixme - not sure how to handle this with CIFS yet */ 64 } */ /* BB fixme - not sure how to handle this with CIFS yet */
65 65
66
67 return cifs_ntfy_flags; 66 return cifs_ntfy_flags;
68} 67}
69 68
@@ -78,8 +77,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
78 __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES; 77 __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES;
79 __u16 netfid; 78 __u16 netfid;
80 79
81 80 if (experimEnabled == 0)
82 if(experimEnabled == 0)
83 return 0; 81 return 0;
84 82
85 xid = GetXid(); 83 xid = GetXid();
@@ -88,21 +86,21 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
88 86
89 full_path = build_path_from_dentry(file->f_path.dentry); 87 full_path = build_path_from_dentry(file->f_path.dentry);
90 88
91 if(full_path == NULL) { 89 if (full_path == NULL) {
92 rc = -ENOMEM; 90 rc = -ENOMEM;
93 } else { 91 } else {
94 cFYI(1,("dir notify on file %s Arg 0x%lx",full_path,arg)); 92 cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg));
95 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, 93 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
96 GENERIC_READ | SYNCHRONIZE, 0 /* create options */, 94 GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
97 &netfid, &oplock,NULL, cifs_sb->local_nls, 95 &netfid, &oplock, NULL, cifs_sb->local_nls,
98 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 96 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
99 /* BB fixme - add this handle to a notify handle list */ 97 /* BB fixme - add this handle to a notify handle list */
100 if(rc) { 98 if (rc) {
101 cFYI(1,("Could not open directory for notify")); 99 cFYI(1, ("Could not open directory for notify"));
102 } else { 100 } else {
103 filter = convert_to_cifs_notify_flags(arg); 101 filter = convert_to_cifs_notify_flags(arg);
104 if(filter != 0) { 102 if (filter != 0) {
105 rc = CIFSSMBNotify(xid, pTcon, 103 rc = CIFSSMBNotify(xid, pTcon,
106 0 /* no subdirs */, netfid, 104 0 /* no subdirs */, netfid,
107 filter, file, arg & DN_MULTISHOT, 105 filter, file, arg & DN_MULTISHOT,
108 cifs_sb->local_nls); 106 cifs_sb->local_nls);
@@ -113,10 +111,10 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
113 it would close automatically but may be a way 111 it would close automatically but may be a way
114 to do it easily when inode freed or when 112 to do it easily when inode freed or when
115 notify info is cleared/changed */ 113 notify info is cleared/changed */
116 cFYI(1,("notify rc %d",rc)); 114 cFYI(1, ("notify rc %d", rc));
117 } 115 }
118 } 116 }
119 117
120 FreeXid(xid); 118 FreeXid(xid);
121 return rc; 119 return rc;
122} 120}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3e87dad3367c..f0ff12b3f398 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -986,7 +986,8 @@ mkdir_get_info:
986 * failed to get it from the server or was set bogus */ 986 * failed to get it from the server or was set bogus */
987 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) 987 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
988 direntry->d_inode->i_nlink = 2; 988 direntry->d_inode->i_nlink = 2;
989 if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) 989 if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) {
990 mode &= ~current->fs->umask;
990 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 991 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
991 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 992 CIFSSMBUnixSetPerms(xid, pTcon, full_path,
992 mode, 993 mode,
@@ -1004,7 +1005,7 @@ mkdir_get_info:
1004 cifs_sb->mnt_cifs_flags & 1005 cifs_sb->mnt_cifs_flags &
1005 CIFS_MOUNT_MAP_SPECIAL_CHR); 1006 CIFS_MOUNT_MAP_SPECIAL_CHR);
1006 } 1007 }
1007 else { 1008 } else {
1008 /* BB to be implemented via Windows secrty descriptors 1009 /* BB to be implemented via Windows secrty descriptors
1009 eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode, 1010 eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
1010 -1, -1, local_nls); */ 1011 -1, -1, local_nls); */
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index e34c7db00f6f..a414f1775ae0 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -30,7 +30,7 @@
30 30
31#define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2) 31#define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2)
32 32
33int cifs_ioctl (struct inode * inode, struct file * filep, 33int cifs_ioctl (struct inode * inode, struct file * filep,
34 unsigned int command, unsigned long arg) 34 unsigned int command, unsigned long arg)
35{ 35{
36 int rc = -ENOTTY; /* strange error - but the precedent */ 36 int rc = -ENOTTY; /* strange error - but the precedent */
@@ -47,13 +47,13 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
47 47
48 xid = GetXid(); 48 xid = GetXid();
49 49
50 cFYI(1,("ioctl file %p cmd %u arg %lu",filep,command,arg)); 50 cFYI(1, ("ioctl file %p cmd %u arg %lu", filep, command, arg));
51 51
52 cifs_sb = CIFS_SB(inode->i_sb); 52 cifs_sb = CIFS_SB(inode->i_sb);
53 53
54#ifdef CONFIG_CIFS_POSIX 54#ifdef CONFIG_CIFS_POSIX
55 tcon = cifs_sb->tcon; 55 tcon = cifs_sb->tcon;
56 if(tcon) 56 if (tcon)
57 caps = le64_to_cpu(tcon->fsUnixInfo.Capability); 57 caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
58 else { 58 else {
59 rc = -EIO; 59 rc = -EIO;
@@ -62,24 +62,24 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
62 } 62 }
63#endif /* CONFIG_CIFS_POSIX */ 63#endif /* CONFIG_CIFS_POSIX */
64 64
65 switch(command) { 65 switch (command) {
66 case CIFS_IOC_CHECKUMOUNT: 66 case CIFS_IOC_CHECKUMOUNT:
67 cFYI(1,("User unmount attempted")); 67 cFYI(1, ("User unmount attempted"));
68 if(cifs_sb->mnt_uid == current->uid) 68 if (cifs_sb->mnt_uid == current->uid)
69 rc = 0; 69 rc = 0;
70 else { 70 else {
71 rc = -EACCES; 71 rc = -EACCES;
72 cFYI(1,("uids do not match")); 72 cFYI(1, ("uids do not match"));
73 } 73 }
74 break; 74 break;
75#ifdef CONFIG_CIFS_POSIX 75#ifdef CONFIG_CIFS_POSIX
76 case FS_IOC_GETFLAGS: 76 case FS_IOC_GETFLAGS:
77 if(CIFS_UNIX_EXTATTR_CAP & caps) { 77 if (CIFS_UNIX_EXTATTR_CAP & caps) {
78 if (pSMBFile == NULL) 78 if (pSMBFile == NULL)
79 break; 79 break;
80 rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, 80 rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
81 &ExtAttrBits, &ExtAttrMask); 81 &ExtAttrBits, &ExtAttrMask);
82 if(rc == 0) 82 if (rc == 0)
83 rc = put_user(ExtAttrBits & 83 rc = put_user(ExtAttrBits &
84 FS_FL_USER_VISIBLE, 84 FS_FL_USER_VISIBLE,
85 (int __user *)arg); 85 (int __user *)arg);
@@ -87,8 +87,8 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
87 break; 87 break;
88 88
89 case FS_IOC_SETFLAGS: 89 case FS_IOC_SETFLAGS:
90 if(CIFS_UNIX_EXTATTR_CAP & caps) { 90 if (CIFS_UNIX_EXTATTR_CAP & caps) {
91 if(get_user(ExtAttrBits,(int __user *)arg)) { 91 if (get_user(ExtAttrBits, (int __user *)arg)) {
92 rc = -EFAULT; 92 rc = -EFAULT;
93 break; 93 break;
94 } 94 }
@@ -96,16 +96,15 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
96 break; 96 break;
97 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, 97 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
98 extAttrBits, &ExtAttrMask);*/ 98 extAttrBits, &ExtAttrMask);*/
99
100 } 99 }
101 cFYI(1,("set flags not implemented yet")); 100 cFYI(1, ("set flags not implemented yet"));
102 break; 101 break;
103#endif /* CONFIG_CIFS_POSIX */ 102#endif /* CONFIG_CIFS_POSIX */
104 default: 103 default:
105 cFYI(1,("unsupported ioctl")); 104 cFYI(1, ("unsupported ioctl"));
106 break; 105 break;
107 } 106 }
108 107
109 FreeXid(xid); 108 FreeXid(xid);
110 return rc; 109 return rc;
111} 110}
diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h
index aede606132aa..8b69fcceb597 100644
--- a/fs/cifs/rfc1002pdu.h
+++ b/fs/cifs/rfc1002pdu.h
@@ -18,7 +18,7 @@
18 * 18 *
19 * You should have received a copy of the GNU Lesser General Public License 19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this library; if not, write to the Free Software 20 * along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */ 22 */
23 23
24/* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */ 24/* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9ddf5ed62162..898a86dde8f5 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -470,7 +470,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
470 470
471 ret = -ENOENT; 471 ret = -ENOENT;
472 if (!IS_DEADDIR(host_inode)) { 472 if (!IS_DEADDIR(host_inode)) {
473 ret = host_file->f_op->readdir(host_file, filldir, dirent); 473 ret = host_file->f_op->readdir(host_file, dirent, filldir);
474 file_accessed(host_file); 474 file_accessed(host_file);
475 } 475 }
476 } 476 }
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 5ef2b609ec7d..99dbe866816d 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -47,8 +47,9 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
47} 47}
48 48
49static ssize_t 49static ssize_t
50coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count, 50coda_file_splice_read(struct file *coda_file, loff_t *ppos,
51 read_actor_t actor, void *target) 51 struct pipe_inode_info *pipe, size_t count,
52 unsigned int flags)
52{ 53{
53 struct coda_file_info *cfi; 54 struct coda_file_info *cfi;
54 struct file *host_file; 55 struct file *host_file;
@@ -57,10 +58,10 @@ coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
57 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 58 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
58 host_file = cfi->cfi_container; 59 host_file = cfi->cfi_container;
59 60
60 if (!host_file->f_op || !host_file->f_op->sendfile) 61 if (!host_file->f_op || !host_file->f_op->splice_read)
61 return -EINVAL; 62 return -EINVAL;
62 63
63 return host_file->f_op->sendfile(host_file, ppos, count, actor, target); 64 return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
64} 65}
65 66
66static ssize_t 67static ssize_t
@@ -295,6 +296,6 @@ const struct file_operations coda_file_operations = {
295 .flush = coda_flush, 296 .flush = coda_flush,
296 .release = coda_release, 297 .release = coda_release,
297 .fsync = coda_fsync, 298 .fsync = coda_fsync,
298 .sendfile = coda_file_sendfile, 299 .splice_read = coda_file_splice_read,
299}; 300};
300 301
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index ec8896b264de..1d533a2ec3a6 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -368,6 +368,69 @@ void debugfs_remove(struct dentry *dentry)
368} 368}
369EXPORT_SYMBOL_GPL(debugfs_remove); 369EXPORT_SYMBOL_GPL(debugfs_remove);
370 370
371/**
372 * debugfs_rename - rename a file/directory in the debugfs filesystem
373 * @old_dir: a pointer to the parent dentry for the renamed object. This
374 * should be a directory dentry.
375 * @old_dentry: dentry of an object to be renamed.
376 * @new_dir: a pointer to the parent dentry where the object should be
377 * moved. This should be a directory dentry.
378 * @new_name: a pointer to a string containing the target name.
379 *
380 * This function renames a file/directory in debugfs. The target must not
381 * exist for rename to succeed.
382 *
383 * This function will return a pointer to old_dentry (which is updated to
384 * reflect renaming) if it succeeds. If an error occurs, %NULL will be
385 * returned.
386 *
387 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
388 * returned.
389 */
390struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
391 struct dentry *new_dir, const char *new_name)
392{
393 int error;
394 struct dentry *dentry = NULL, *trap;
395 const char *old_name;
396
397 trap = lock_rename(new_dir, old_dir);
398 /* Source or destination directories don't exist? */
399 if (!old_dir->d_inode || !new_dir->d_inode)
400 goto exit;
401 /* Source does not exist, cyclic rename, or mountpoint? */
402 if (!old_dentry->d_inode || old_dentry == trap ||
403 d_mountpoint(old_dentry))
404 goto exit;
405 dentry = lookup_one_len(new_name, new_dir, strlen(new_name));
406 /* Lookup failed, cyclic rename or target exists? */
407 if (IS_ERR(dentry) || dentry == trap || dentry->d_inode)
408 goto exit;
409
410 old_name = fsnotify_oldname_init(old_dentry->d_name.name);
411
412 error = simple_rename(old_dir->d_inode, old_dentry, new_dir->d_inode,
413 dentry);
414 if (error) {
415 fsnotify_oldname_free(old_name);
416 goto exit;
417 }
418 d_move(old_dentry, dentry);
419 fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
420 old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode),
421 NULL, old_dentry->d_inode);
422 fsnotify_oldname_free(old_name);
423 unlock_rename(new_dir, old_dir);
424 dput(dentry);
425 return old_dentry;
426exit:
427 if (dentry && !IS_ERR(dentry))
428 dput(dentry);
429 unlock_rename(new_dir, old_dir);
430 return NULL;
431}
432EXPORT_SYMBOL_GPL(debugfs_rename);
433
371static decl_subsys(debug, NULL, NULL); 434static decl_subsys(debug, NULL, NULL);
372 435
373static int __init debugfs_init(void) 436static int __init debugfs_init(void)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 8593f3dfd299..52bb2638f7ab 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1106,7 +1106,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1106 spin_lock_irqsave(&dio->bio_lock, flags); 1106 spin_lock_irqsave(&dio->bio_lock, flags);
1107 ret2 = --dio->refcount; 1107 ret2 = --dio->refcount;
1108 spin_unlock_irqrestore(&dio->bio_lock, flags); 1108 spin_unlock_irqrestore(&dio->bio_lock, flags);
1109 BUG_ON(!dio->is_async && ret2 != 0); 1109
1110 if (ret2 == 0) { 1110 if (ret2 == 0) {
1111 ret = dio_complete(dio, offset, ret); 1111 ret = dio_complete(dio, offset, ret);
1112 kfree(dio); 1112 kfree(dio);
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 69a94690e493..54bcc00ec8df 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,7 +3,7 @@ menu "Distributed Lock Manager"
3 3
4config DLM 4config DLM
5 tristate "Distributed Lock Manager (DLM)" 5 tristate "Distributed Lock Manager (DLM)"
6 depends on IPV6 || IPV6=n 6 depends on SYSFS && (IPV6 || IPV6=n)
7 select CONFIGFS_FS 7 select CONFIGFS_FS
8 select IP_SCTP 8 select IP_SCTP
9 help 9 help
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 604cf7dc5f39..d248e60951ba 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -8,6 +8,7 @@ dlm-y := ast.o \
8 member.o \ 8 member.o \
9 memory.o \ 9 memory.o \
10 midcomms.o \ 10 midcomms.o \
11 netlink.o \
11 lowcomms.o \ 12 lowcomms.o \
12 rcom.o \ 13 rcom.o \
13 recover.o \ 14 recover.o \
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 822abdcd1434..5069b2cb5a1f 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -90,6 +90,7 @@ struct cluster {
90 unsigned int cl_scan_secs; 90 unsigned int cl_scan_secs;
91 unsigned int cl_log_debug; 91 unsigned int cl_log_debug;
92 unsigned int cl_protocol; 92 unsigned int cl_protocol;
93 unsigned int cl_timewarn_cs;
93}; 94};
94 95
95enum { 96enum {
@@ -103,6 +104,7 @@ enum {
103 CLUSTER_ATTR_SCAN_SECS, 104 CLUSTER_ATTR_SCAN_SECS,
104 CLUSTER_ATTR_LOG_DEBUG, 105 CLUSTER_ATTR_LOG_DEBUG,
105 CLUSTER_ATTR_PROTOCOL, 106 CLUSTER_ATTR_PROTOCOL,
107 CLUSTER_ATTR_TIMEWARN_CS,
106}; 108};
107 109
108struct cluster_attribute { 110struct cluster_attribute {
@@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1);
162CLUSTER_ATTR(scan_secs, 1); 164CLUSTER_ATTR(scan_secs, 1);
163CLUSTER_ATTR(log_debug, 0); 165CLUSTER_ATTR(log_debug, 0);
164CLUSTER_ATTR(protocol, 0); 166CLUSTER_ATTR(protocol, 0);
167CLUSTER_ATTR(timewarn_cs, 1);
165 168
166static struct configfs_attribute *cluster_attrs[] = { 169static struct configfs_attribute *cluster_attrs[] = {
167 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, 170 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = {
174 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, 177 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
175 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, 178 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
176 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, 179 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
180 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
177 NULL, 181 NULL,
178}; 182};
179 183
@@ -429,6 +433,8 @@ static struct config_group *make_cluster(struct config_group *g,
429 cl->cl_toss_secs = dlm_config.ci_toss_secs; 433 cl->cl_toss_secs = dlm_config.ci_toss_secs;
430 cl->cl_scan_secs = dlm_config.ci_scan_secs; 434 cl->cl_scan_secs = dlm_config.ci_scan_secs;
431 cl->cl_log_debug = dlm_config.ci_log_debug; 435 cl->cl_log_debug = dlm_config.ci_log_debug;
436 cl->cl_protocol = dlm_config.ci_protocol;
437 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
432 438
433 space_list = &sps->ss_group; 439 space_list = &sps->ss_group;
434 comm_list = &cms->cs_group; 440 comm_list = &cms->cs_group;
@@ -748,9 +754,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
748 754
749static struct space *get_space(char *name) 755static struct space *get_space(char *name)
750{ 756{
757 struct config_item *i;
758
751 if (!space_list) 759 if (!space_list)
752 return NULL; 760 return NULL;
753 return to_space(config_group_find_obj(space_list, name)); 761
762 down(&space_list->cg_subsys->su_sem);
763 i = config_group_find_obj(space_list, name);
764 up(&space_list->cg_subsys->su_sem);
765
766 return to_space(i);
754} 767}
755 768
756static void put_space(struct space *sp) 769static void put_space(struct space *sp)
@@ -776,20 +789,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
776 if (cm->nodeid != nodeid) 789 if (cm->nodeid != nodeid)
777 continue; 790 continue;
778 found = 1; 791 found = 1;
792 config_item_get(i);
779 break; 793 break;
780 } else { 794 } else {
781 if (!cm->addr_count || 795 if (!cm->addr_count ||
782 memcmp(cm->addr[0], addr, sizeof(*addr))) 796 memcmp(cm->addr[0], addr, sizeof(*addr)))
783 continue; 797 continue;
784 found = 1; 798 found = 1;
799 config_item_get(i);
785 break; 800 break;
786 } 801 }
787 } 802 }
788 up(&clusters_root.subsys.su_sem); 803 up(&clusters_root.subsys.su_sem);
789 804
790 if (found) 805 if (!found)
791 config_item_get(i);
792 else
793 cm = NULL; 806 cm = NULL;
794 return cm; 807 return cm;
795} 808}
@@ -909,6 +922,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
909#define DEFAULT_SCAN_SECS 5 922#define DEFAULT_SCAN_SECS 5
910#define DEFAULT_LOG_DEBUG 0 923#define DEFAULT_LOG_DEBUG 0
911#define DEFAULT_PROTOCOL 0 924#define DEFAULT_PROTOCOL 0
925#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
912 926
913struct dlm_config_info dlm_config = { 927struct dlm_config_info dlm_config = {
914 .ci_tcp_port = DEFAULT_TCP_PORT, 928 .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -920,6 +934,7 @@ struct dlm_config_info dlm_config = {
920 .ci_toss_secs = DEFAULT_TOSS_SECS, 934 .ci_toss_secs = DEFAULT_TOSS_SECS,
921 .ci_scan_secs = DEFAULT_SCAN_SECS, 935 .ci_scan_secs = DEFAULT_SCAN_SECS,
922 .ci_log_debug = DEFAULT_LOG_DEBUG, 936 .ci_log_debug = DEFAULT_LOG_DEBUG,
923 .ci_protocol = DEFAULT_PROTOCOL 937 .ci_protocol = DEFAULT_PROTOCOL,
938 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
924}; 939};
925 940
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 967cc3d72e5e..a3170fe22090 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -27,6 +27,7 @@ struct dlm_config_info {
27 int ci_scan_secs; 27 int ci_scan_secs;
28 int ci_log_debug; 28 int ci_log_debug;
29 int ci_protocol; 29 int ci_protocol;
30 int ci_timewarn_cs;
30}; 31};
31 32
32extern struct dlm_config_info dlm_config; 33extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 61ba670b9e02..12c3bfd5e660 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -17,6 +17,7 @@
17#include <linux/debugfs.h> 17#include <linux/debugfs.h>
18 18
19#include "dlm_internal.h" 19#include "dlm_internal.h"
20#include "lock.h"
20 21
21#define DLM_DEBUG_BUF_LEN 4096 22#define DLM_DEBUG_BUF_LEN 4096
22static char debug_buf[DLM_DEBUG_BUF_LEN]; 23static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -26,6 +27,8 @@ static struct dentry *dlm_root;
26 27
27struct rsb_iter { 28struct rsb_iter {
28 int entry; 29 int entry;
30 int locks;
31 int header;
29 struct dlm_ls *ls; 32 struct dlm_ls *ls;
30 struct list_head *next; 33 struct list_head *next;
31 struct dlm_rsb *rsb; 34 struct dlm_rsb *rsb;
@@ -57,8 +60,8 @@ static char *print_lockmode(int mode)
57 } 60 }
58} 61}
59 62
60static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, 63static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
61 struct dlm_rsb *res) 64 struct dlm_rsb *res)
62{ 65{
63 seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); 66 seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
64 67
@@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
85 struct dlm_lkb *lkb; 88 struct dlm_lkb *lkb;
86 int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list; 89 int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
87 90
91 lock_rsb(res);
92
88 seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length); 93 seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
89 for (i = 0; i < res->res_length; i++) { 94 for (i = 0; i < res->res_length; i++) {
90 if (isprint(res->res_name[i])) 95 if (isprint(res->res_name[i]))
@@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
129 /* Print the locks attached to this resource */ 134 /* Print the locks attached to this resource */
130 seq_printf(s, "Granted Queue\n"); 135 seq_printf(s, "Granted Queue\n");
131 list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) 136 list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
132 print_lock(s, lkb, res); 137 print_resource_lock(s, lkb, res);
133 138
134 seq_printf(s, "Conversion Queue\n"); 139 seq_printf(s, "Conversion Queue\n");
135 list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) 140 list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
136 print_lock(s, lkb, res); 141 print_resource_lock(s, lkb, res);
137 142
138 seq_printf(s, "Waiting Queue\n"); 143 seq_printf(s, "Waiting Queue\n");
139 list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) 144 list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
140 print_lock(s, lkb, res); 145 print_resource_lock(s, lkb, res);
141 146
142 if (list_empty(&res->res_lookup)) 147 if (list_empty(&res->res_lookup))
143 goto out; 148 goto out;
@@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
151 seq_printf(s, "\n"); 156 seq_printf(s, "\n");
152 } 157 }
153 out: 158 out:
159 unlock_rsb(res);
160 return 0;
161}
162
163static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
164{
165 struct dlm_user_args *ua;
166 unsigned int waiting = 0;
167 uint64_t xid = 0;
168
169 if (lkb->lkb_flags & DLM_IFL_USER) {
170 ua = (struct dlm_user_args *) lkb->lkb_astparam;
171 if (ua)
172 xid = ua->xid;
173 }
174
175 if (lkb->lkb_timestamp)
176 waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
177
178 /* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
179 r_nodeid r_len r_name */
180
181 seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
182 lkb->lkb_id,
183 lkb->lkb_nodeid,
184 lkb->lkb_remid,
185 lkb->lkb_ownpid,
186 (unsigned long long)xid,
187 lkb->lkb_exflags,
188 lkb->lkb_flags,
189 lkb->lkb_status,
190 lkb->lkb_grmode,
191 lkb->lkb_rqmode,
192 waiting,
193 r->res_nodeid,
194 r->res_length,
195 r->res_name);
196}
197
198static int print_locks(struct dlm_rsb *r, struct seq_file *s)
199{
200 struct dlm_lkb *lkb;
201
202 lock_rsb(r);
203
204 list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
205 print_lock(s, lkb, r);
206
207 list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
208 print_lock(s, lkb, r);
209
210 list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
211 print_lock(s, lkb, r);
212
213 unlock_rsb(r);
154 return 0; 214 return 0;
155} 215}
156 216
@@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri)
166 read_lock(&ls->ls_rsbtbl[i].lock); 226 read_lock(&ls->ls_rsbtbl[i].lock);
167 if (!list_empty(&ls->ls_rsbtbl[i].list)) { 227 if (!list_empty(&ls->ls_rsbtbl[i].list)) {
168 ri->next = ls->ls_rsbtbl[i].list.next; 228 ri->next = ls->ls_rsbtbl[i].list.next;
229 ri->rsb = list_entry(ri->next, struct dlm_rsb,
230 res_hashchain);
231 dlm_hold_rsb(ri->rsb);
169 read_unlock(&ls->ls_rsbtbl[i].lock); 232 read_unlock(&ls->ls_rsbtbl[i].lock);
170 break; 233 break;
171 } 234 }
@@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
176 if (ri->entry >= ls->ls_rsbtbl_size) 239 if (ri->entry >= ls->ls_rsbtbl_size)
177 return 1; 240 return 1;
178 } else { 241 } else {
242 struct dlm_rsb *old = ri->rsb;
179 i = ri->entry; 243 i = ri->entry;
180 read_lock(&ls->ls_rsbtbl[i].lock); 244 read_lock(&ls->ls_rsbtbl[i].lock);
181 ri->next = ri->next->next; 245 ri->next = ri->next->next;
@@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri)
184 ri->next = NULL; 248 ri->next = NULL;
185 ri->entry++; 249 ri->entry++;
186 read_unlock(&ls->ls_rsbtbl[i].lock); 250 read_unlock(&ls->ls_rsbtbl[i].lock);
251 dlm_put_rsb(old);
187 goto top; 252 goto top;
188 } 253 }
254 ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
255 dlm_hold_rsb(ri->rsb);
189 read_unlock(&ls->ls_rsbtbl[i].lock); 256 read_unlock(&ls->ls_rsbtbl[i].lock);
257 dlm_put_rsb(old);
190 } 258 }
191 ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
192 259
193 return 0; 260 return 0;
194} 261}
@@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
202{ 269{
203 struct rsb_iter *ri; 270 struct rsb_iter *ri;
204 271
205 ri = kmalloc(sizeof *ri, GFP_KERNEL); 272 ri = kzalloc(sizeof *ri, GFP_KERNEL);
206 if (!ri) 273 if (!ri)
207 return NULL; 274 return NULL;
208 275
@@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
260{ 327{
261 struct rsb_iter *ri = iter_ptr; 328 struct rsb_iter *ri = iter_ptr;
262 329
263 print_resource(ri->rsb, file); 330 if (ri->locks) {
331 if (ri->header) {
332 seq_printf(file, "id nodeid remid pid xid exflags flags "
333 "sts grmode rqmode time_ms r_nodeid "
334 "r_len r_name\n");
335 ri->header = 0;
336 }
337 print_locks(ri->rsb, file);
338 } else {
339 print_resource(ri->rsb, file);
340 }
264 341
265 return 0; 342 return 0;
266} 343}
@@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = {
296}; 373};
297 374
298/* 375/*
376 * Dump state in compact per-lock listing
377 */
378
379static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
380{
381 struct rsb_iter *ri;
382
383 ri = kzalloc(sizeof *ri, GFP_KERNEL);
384 if (!ri)
385 return NULL;
386
387 ri->ls = ls;
388 ri->entry = 0;
389 ri->next = NULL;
390 ri->locks = 1;
391
392 if (*pos == 0)
393 ri->header = 1;
394
395 if (rsb_iter_next(ri)) {
396 rsb_iter_free(ri);
397 return NULL;
398 }
399
400 return ri;
401}
402
403static void *locks_seq_start(struct seq_file *file, loff_t *pos)
404{
405 struct rsb_iter *ri;
406 loff_t n = *pos;
407
408 ri = locks_iter_init(file->private, pos);
409 if (!ri)
410 return NULL;
411
412 while (n--) {
413 if (rsb_iter_next(ri)) {
414 rsb_iter_free(ri);
415 return NULL;
416 }
417 }
418
419 return ri;
420}
421
422static struct seq_operations locks_seq_ops = {
423 .start = locks_seq_start,
424 .next = rsb_seq_next,
425 .stop = rsb_seq_stop,
426 .show = rsb_seq_show,
427};
428
429static int locks_open(struct inode *inode, struct file *file)
430{
431 struct seq_file *seq;
432 int ret;
433
434 ret = seq_open(file, &locks_seq_ops);
435 if (ret)
436 return ret;
437
438 seq = file->private_data;
439 seq->private = inode->i_private;
440
441 return 0;
442}
443
444static const struct file_operations locks_fops = {
445 .owner = THIS_MODULE,
446 .open = locks_open,
447 .read = seq_read,
448 .llseek = seq_lseek,
449 .release = seq_release
450};
451
452/*
299 * dump lkb's on the ls_waiters list 453 * dump lkb's on the ls_waiters list
300 */ 454 */
301 455
@@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls)
362 return -ENOMEM; 516 return -ENOMEM;
363 } 517 }
364 518
519 memset(name, 0, sizeof(name));
520 snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
521
522 ls->ls_debug_locks_dentry = debugfs_create_file(name,
523 S_IFREG | S_IRUGO,
524 dlm_root,
525 ls,
526 &locks_fops);
527 if (!ls->ls_debug_locks_dentry) {
528 debugfs_remove(ls->ls_debug_waiters_dentry);
529 debugfs_remove(ls->ls_debug_rsb_dentry);
530 return -ENOMEM;
531 }
532
365 return 0; 533 return 0;
366} 534}
367 535
@@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
371 debugfs_remove(ls->ls_debug_rsb_dentry); 539 debugfs_remove(ls->ls_debug_rsb_dentry);
372 if (ls->ls_debug_waiters_dentry) 540 if (ls->ls_debug_waiters_dentry)
373 debugfs_remove(ls->ls_debug_waiters_dentry); 541 debugfs_remove(ls->ls_debug_waiters_dentry);
542 if (ls->ls_debug_locks_dentry)
543 debugfs_remove(ls->ls_debug_locks_dentry);
374} 544}
375 545
376int dlm_register_debugfs(void) 546int dlm_register_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 30994d68f6a0..74901e981e10 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -151,6 +151,7 @@ struct dlm_args {
151 void *bastaddr; 151 void *bastaddr;
152 int mode; 152 int mode;
153 struct dlm_lksb *lksb; 153 struct dlm_lksb *lksb;
154 unsigned long timeout;
154}; 155};
155 156
156 157
@@ -213,6 +214,9 @@ struct dlm_args {
213#define DLM_IFL_OVERLAP_UNLOCK 0x00080000 214#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
214#define DLM_IFL_OVERLAP_CANCEL 0x00100000 215#define DLM_IFL_OVERLAP_CANCEL 0x00100000
215#define DLM_IFL_ENDOFLIFE 0x00200000 216#define DLM_IFL_ENDOFLIFE 0x00200000
217#define DLM_IFL_WATCH_TIMEWARN 0x00400000
218#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
219#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
216#define DLM_IFL_USER 0x00000001 220#define DLM_IFL_USER 0x00000001
217#define DLM_IFL_ORPHAN 0x00000002 221#define DLM_IFL_ORPHAN 0x00000002
218 222
@@ -243,6 +247,9 @@ struct dlm_lkb {
243 struct list_head lkb_wait_reply; /* waiting for remote reply */ 247 struct list_head lkb_wait_reply; /* waiting for remote reply */
244 struct list_head lkb_astqueue; /* need ast to be sent */ 248 struct list_head lkb_astqueue; /* need ast to be sent */
245 struct list_head lkb_ownqueue; /* list of locks for a process */ 249 struct list_head lkb_ownqueue; /* list of locks for a process */
250 struct list_head lkb_time_list;
251 unsigned long lkb_timestamp;
252 unsigned long lkb_timeout_cs;
246 253
247 char *lkb_lvbptr; 254 char *lkb_lvbptr;
248 struct dlm_lksb *lkb_lksb; /* caller's status block */ 255 struct dlm_lksb *lkb_lksb; /* caller's status block */
@@ -447,12 +454,16 @@ struct dlm_ls {
447 struct mutex ls_orphans_mutex; 454 struct mutex ls_orphans_mutex;
448 struct list_head ls_orphans; 455 struct list_head ls_orphans;
449 456
457 struct mutex ls_timeout_mutex;
458 struct list_head ls_timeout;
459
450 struct list_head ls_nodes; /* current nodes in ls */ 460 struct list_head ls_nodes; /* current nodes in ls */
451 struct list_head ls_nodes_gone; /* dead node list, recovery */ 461 struct list_head ls_nodes_gone; /* dead node list, recovery */
452 int ls_num_nodes; /* number of nodes in ls */ 462 int ls_num_nodes; /* number of nodes in ls */
453 int ls_low_nodeid; 463 int ls_low_nodeid;
454 int ls_total_weight; 464 int ls_total_weight;
455 int *ls_node_array; 465 int *ls_node_array;
466 gfp_t ls_allocation;
456 467
457 struct dlm_rsb ls_stub_rsb; /* for returning errors */ 468 struct dlm_rsb ls_stub_rsb; /* for returning errors */
458 struct dlm_lkb ls_stub_lkb; /* for returning errors */ 469 struct dlm_lkb ls_stub_lkb; /* for returning errors */
@@ -460,9 +471,12 @@ struct dlm_ls {
460 471
461 struct dentry *ls_debug_rsb_dentry; /* debugfs */ 472 struct dentry *ls_debug_rsb_dentry; /* debugfs */
462 struct dentry *ls_debug_waiters_dentry; /* debugfs */ 473 struct dentry *ls_debug_waiters_dentry; /* debugfs */
474 struct dentry *ls_debug_locks_dentry; /* debugfs */
463 475
464 wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ 476 wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
465 int ls_uevent_result; 477 int ls_uevent_result;
478 struct completion ls_members_done;
479 int ls_members_result;
466 480
467 struct miscdevice ls_device; 481 struct miscdevice ls_device;
468 482
@@ -472,6 +486,7 @@ struct dlm_ls {
472 struct task_struct *ls_recoverd_task; 486 struct task_struct *ls_recoverd_task;
473 struct mutex ls_recoverd_active; 487 struct mutex ls_recoverd_active;
474 spinlock_t ls_recover_lock; 488 spinlock_t ls_recover_lock;
489 unsigned long ls_recover_begin; /* jiffies timestamp */
475 uint32_t ls_recover_status; /* DLM_RS_ */ 490 uint32_t ls_recover_status; /* DLM_RS_ */
476 uint64_t ls_recover_seq; 491 uint64_t ls_recover_seq;
477 struct dlm_recover *ls_recover_args; 492 struct dlm_recover *ls_recover_args;
@@ -501,6 +516,7 @@ struct dlm_ls {
501#define LSFL_RCOM_READY 3 516#define LSFL_RCOM_READY 3
502#define LSFL_RCOM_WAIT 4 517#define LSFL_RCOM_WAIT 4
503#define LSFL_UEVENT_WAIT 5 518#define LSFL_UEVENT_WAIT 5
519#define LSFL_TIMEWARN 6
504 520
505/* much of this is just saving user space pointers associated with the 521/* much of this is just saving user space pointers associated with the
506 lock that we pass back to the user lib with an ast */ 522 lock that we pass back to the user lib with an ast */
@@ -518,6 +534,7 @@ struct dlm_user_args {
518 void __user *castaddr; 534 void __user *castaddr;
519 void __user *bastparam; 535 void __user *bastparam;
520 void __user *bastaddr; 536 void __user *bastaddr;
537 uint64_t xid;
521}; 538};
522 539
523#define DLM_PROC_FLAGS_CLOSING 1 540#define DLM_PROC_FLAGS_CLOSING 1
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d8d6e729f96b..b455919c1998 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
82static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb); 82static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
83static int send_remove(struct dlm_rsb *r); 83static int send_remove(struct dlm_rsb *r);
84static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); 84static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
85static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
85static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, 86static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
86 struct dlm_message *ms); 87 struct dlm_message *ms);
87static int receive_extralen(struct dlm_message *ms); 88static int receive_extralen(struct dlm_message *ms);
88static void do_purge(struct dlm_ls *ls, int nodeid, int pid); 89static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
90static void del_timeout(struct dlm_lkb *lkb);
91void dlm_timeout_warn(struct dlm_lkb *lkb);
89 92
90/* 93/*
91 * Lock compatibilty matrix - thanks Steve 94 * Lock compatibilty matrix - thanks Steve
@@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r)
194 197
195/* Threads cannot use the lockspace while it's being recovered */ 198/* Threads cannot use the lockspace while it's being recovered */
196 199
197static inline void lock_recovery(struct dlm_ls *ls) 200static inline void dlm_lock_recovery(struct dlm_ls *ls)
198{ 201{
199 down_read(&ls->ls_in_recovery); 202 down_read(&ls->ls_in_recovery);
200} 203}
201 204
202static inline void unlock_recovery(struct dlm_ls *ls) 205void dlm_unlock_recovery(struct dlm_ls *ls)
203{ 206{
204 up_read(&ls->ls_in_recovery); 207 up_read(&ls->ls_in_recovery);
205} 208}
206 209
207static inline int lock_recovery_try(struct dlm_ls *ls) 210int dlm_lock_recovery_try(struct dlm_ls *ls)
208{ 211{
209 return down_read_trylock(&ls->ls_in_recovery); 212 return down_read_trylock(&ls->ls_in_recovery);
210} 213}
@@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
286 if (is_master_copy(lkb)) 289 if (is_master_copy(lkb))
287 return; 290 return;
288 291
292 del_timeout(lkb);
293
289 DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); 294 DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
290 295
296 /* if the operation was a cancel, then return -DLM_ECANCEL, if a
297 timeout caused the cancel then return -ETIMEDOUT */
298 if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
299 lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
300 rv = -ETIMEDOUT;
301 }
302
303 if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
304 lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
305 rv = -EDEADLK;
306 }
307
291 lkb->lkb_lksb->sb_status = rv; 308 lkb->lkb_lksb->sb_status = rv;
292 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; 309 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
293 310
@@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
581 kref_init(&lkb->lkb_ref); 598 kref_init(&lkb->lkb_ref);
582 INIT_LIST_HEAD(&lkb->lkb_ownqueue); 599 INIT_LIST_HEAD(&lkb->lkb_ownqueue);
583 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); 600 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
601 INIT_LIST_HEAD(&lkb->lkb_time_list);
584 602
585 get_random_bytes(&bucket, sizeof(bucket)); 603 get_random_bytes(&bucket, sizeof(bucket));
586 bucket &= (ls->ls_lkbtbl_size - 1); 604 bucket &= (ls->ls_lkbtbl_size - 1);
@@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
985{ 1003{
986 int i; 1004 int i;
987 1005
988 if (dlm_locking_stopped(ls))
989 return;
990
991 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 1006 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
992 shrink_bucket(ls, i); 1007 shrink_bucket(ls, i);
1008 if (dlm_locking_stopped(ls))
1009 break;
993 cond_resched(); 1010 cond_resched();
994 } 1011 }
995} 1012}
996 1013
1014static void add_timeout(struct dlm_lkb *lkb)
1015{
1016 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1017
1018 if (is_master_copy(lkb)) {
1019 lkb->lkb_timestamp = jiffies;
1020 return;
1021 }
1022
1023 if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
1024 !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
1025 lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
1026 goto add_it;
1027 }
1028 if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
1029 goto add_it;
1030 return;
1031
1032 add_it:
1033 DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
1034 mutex_lock(&ls->ls_timeout_mutex);
1035 hold_lkb(lkb);
1036 lkb->lkb_timestamp = jiffies;
1037 list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
1038 mutex_unlock(&ls->ls_timeout_mutex);
1039}
1040
1041static void del_timeout(struct dlm_lkb *lkb)
1042{
1043 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1044
1045 mutex_lock(&ls->ls_timeout_mutex);
1046 if (!list_empty(&lkb->lkb_time_list)) {
1047 list_del_init(&lkb->lkb_time_list);
1048 unhold_lkb(lkb);
1049 }
1050 mutex_unlock(&ls->ls_timeout_mutex);
1051}
1052
1053/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
1054 lkb_lksb_timeout without lock_rsb? Note: we can't lock timeout_mutex
1055 and then lock rsb because of lock ordering in add_timeout. We may need
1056 to specify some special timeout-related bits in the lkb that are just to
1057 be accessed under the timeout_mutex. */
1058
1059void dlm_scan_timeout(struct dlm_ls *ls)
1060{
1061 struct dlm_rsb *r;
1062 struct dlm_lkb *lkb;
1063 int do_cancel, do_warn;
1064
1065 for (;;) {
1066 if (dlm_locking_stopped(ls))
1067 break;
1068
1069 do_cancel = 0;
1070 do_warn = 0;
1071 mutex_lock(&ls->ls_timeout_mutex);
1072 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
1073
1074 if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
1075 time_after_eq(jiffies, lkb->lkb_timestamp +
1076 lkb->lkb_timeout_cs * HZ/100))
1077 do_cancel = 1;
1078
1079 if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
1080 time_after_eq(jiffies, lkb->lkb_timestamp +
1081 dlm_config.ci_timewarn_cs * HZ/100))
1082 do_warn = 1;
1083
1084 if (!do_cancel && !do_warn)
1085 continue;
1086 hold_lkb(lkb);
1087 break;
1088 }
1089 mutex_unlock(&ls->ls_timeout_mutex);
1090
1091 if (!do_cancel && !do_warn)
1092 break;
1093
1094 r = lkb->lkb_resource;
1095 hold_rsb(r);
1096 lock_rsb(r);
1097
1098 if (do_warn) {
1099 /* clear flag so we only warn once */
1100 lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
1101 if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
1102 del_timeout(lkb);
1103 dlm_timeout_warn(lkb);
1104 }
1105
1106 if (do_cancel) {
1107 log_debug(ls, "timeout cancel %x node %d %s",
1108 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
1109 lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
1110 lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
1111 del_timeout(lkb);
1112 _cancel_lock(r, lkb);
1113 }
1114
1115 unlock_rsb(r);
1116 unhold_rsb(r);
1117 dlm_put_lkb(lkb);
1118 }
1119}
1120
1121/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
1122 dlm_recoverd before checking/setting ls_recover_begin. */
1123
1124void dlm_adjust_timeouts(struct dlm_ls *ls)
1125{
1126 struct dlm_lkb *lkb;
1127 long adj = jiffies - ls->ls_recover_begin;
1128
1129 ls->ls_recover_begin = 0;
1130 mutex_lock(&ls->ls_timeout_mutex);
1131 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
1132 lkb->lkb_timestamp += adj;
1133 mutex_unlock(&ls->ls_timeout_mutex);
1134}
1135
997/* lkb is master or local copy */ 1136/* lkb is master or local copy */
998 1137
999static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) 1138static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
1275 * queue for one resource. The granted mode of each lock blocks the requested 1414 * queue for one resource. The granted mode of each lock blocks the requested
1276 * mode of the other lock." 1415 * mode of the other lock."
1277 * 1416 *
1278 * Part 2: if the granted mode of lkb is preventing the first lkb in the 1417 * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
1279 * convert queue from being granted, then demote lkb (set grmode to NL). 1418 * convert queue from being granted, then deadlk/demote lkb.
1280 * This second form requires that we check for conv-deadlk even when
1281 * now == 0 in _can_be_granted().
1282 * 1419 *
1283 * Example: 1420 * Example:
1284 * Granted Queue: empty 1421 * Granted Queue: empty
@@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
1287 * 1424 *
1288 * The first lock can't be granted because of the granted mode of the second 1425 * The first lock can't be granted because of the granted mode of the second
1289 * lock and the second lock can't be granted because it's not first in the 1426 * lock and the second lock can't be granted because it's not first in the
1290 * list. We demote the granted mode of the second lock (the lkb passed to this 1427 * list. We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
1291 * function). 1428 * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
1429 * flag set and return DEMOTED in the lksb flags.
1430 *
1431 * Originally, this function detected conv-deadlk in a more limited scope:
1432 * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
1433 * - if lkb1 was the first entry in the queue (not just earlier), and was
1434 * blocked by the granted mode of lkb2, and there was nothing on the
1435 * granted queue preventing lkb1 from being granted immediately, i.e.
1436 * lkb2 was the only thing preventing lkb1 from being granted.
1437 *
1438 * That second condition meant we'd only say there was conv-deadlk if
1439 * resolving it (by demotion) would lead to the first lock on the convert
1440 * queue being granted right away. It allowed conversion deadlocks to exist
1441 * between locks on the convert queue while they couldn't be granted anyway.
1292 * 1442 *
1293 * After the resolution, the "grant pending" function needs to go back and try 1443 * Now, we detect and take action on conversion deadlocks immediately when
1294 * to grant locks on the convert queue again since the first lock can now be 1444 * they're created, even if they may not be immediately consequential. If
1295 * granted. 1445 * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
1446 * mode that would prevent lkb1's conversion from being granted, we do a
1447 * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
1448 * I think this means that the lkb_is_ahead condition below should always
1449 * be zero, i.e. there will never be conv-deadlk between two locks that are
1450 * both already on the convert queue.
1296 */ 1451 */
1297 1452
1298static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) 1453static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
1299{ 1454{
1300 struct dlm_lkb *this, *first = NULL, *self = NULL; 1455 struct dlm_lkb *lkb1;
1456 int lkb_is_ahead = 0;
1301 1457
1302 list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { 1458 list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
1303 if (!first) 1459 if (lkb1 == lkb2) {
1304 first = this; 1460 lkb_is_ahead = 1;
1305 if (this == lkb) {
1306 self = lkb;
1307 continue; 1461 continue;
1308 } 1462 }
1309 1463
1310 if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) 1464 if (!lkb_is_ahead) {
1311 return 1; 1465 if (!modes_compat(lkb2, lkb1))
1312 } 1466 return 1;
1313 1467 } else {
1314 /* if lkb is on the convert queue and is preventing the first 1468 if (!modes_compat(lkb2, lkb1) &&
1315 from being granted, then there's deadlock and we demote lkb. 1469 !modes_compat(lkb1, lkb2))
1316 multiple converting locks may need to do this before the first 1470 return 1;
1317 converting lock can be granted. */ 1471 }
1318
1319 if (self && self != first) {
1320 if (!modes_compat(lkb, first) &&
1321 !queue_conflict(&rsb->res_grantqueue, first))
1322 return 1;
1323 } 1472 }
1324
1325 return 0; 1473 return 0;
1326} 1474}
1327 1475
@@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1450 if (!now && !conv && list_empty(&r->res_convertqueue) && 1598 if (!now && !conv && list_empty(&r->res_convertqueue) &&
1451 first_in_list(lkb, &r->res_waitqueue)) 1599 first_in_list(lkb, &r->res_waitqueue))
1452 return 1; 1600 return 1;
1453
1454 out: 1601 out:
1455 /*
1456 * The following, enabled by CONVDEADLK, departs from VMS.
1457 */
1458
1459 if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
1460 conversion_deadlock_detect(r, lkb)) {
1461 lkb->lkb_grmode = DLM_LOCK_NL;
1462 lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
1463 }
1464
1465 return 0; 1602 return 0;
1466} 1603}
1467 1604
1468/* 1605static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
1469 * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a 1606 int *err)
1470 * simple way to provide a big optimization to applications that can use them.
1471 */
1472
1473static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1474{ 1607{
1475 uint32_t flags = lkb->lkb_exflags;
1476 int rv; 1608 int rv;
1477 int8_t alt = 0, rqmode = lkb->lkb_rqmode; 1609 int8_t alt = 0, rqmode = lkb->lkb_rqmode;
1610 int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
1611
1612 if (err)
1613 *err = 0;
1478 1614
1479 rv = _can_be_granted(r, lkb, now); 1615 rv = _can_be_granted(r, lkb, now);
1480 if (rv) 1616 if (rv)
1481 goto out; 1617 goto out;
1482 1618
1483 if (lkb->lkb_sbflags & DLM_SBF_DEMOTED) 1619 /*
1620 * The CONVDEADLK flag is non-standard and tells the dlm to resolve
1621 * conversion deadlocks by demoting grmode to NL, otherwise the dlm
1622 * cancels one of the locks.
1623 */
1624
1625 if (is_convert && can_be_queued(lkb) &&
1626 conversion_deadlock_detect(r, lkb)) {
1627 if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
1628 lkb->lkb_grmode = DLM_LOCK_NL;
1629 lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
1630 } else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
1631 if (err)
1632 *err = -EDEADLK;
1633 else {
1634 log_print("can_be_granted deadlock %x now %d",
1635 lkb->lkb_id, now);
1636 dlm_dump_rsb(r);
1637 }
1638 }
1484 goto out; 1639 goto out;
1640 }
1485 1641
1486 if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR) 1642 /*
1643 * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
1644 * to grant a request in a mode other than the normal rqmode. It's a
1645 * simple way to provide a big optimization to applications that can
1646 * use them.
1647 */
1648
1649 if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
1487 alt = DLM_LOCK_PR; 1650 alt = DLM_LOCK_PR;
1488 else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW) 1651 else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
1489 alt = DLM_LOCK_CW; 1652 alt = DLM_LOCK_CW;
1490 1653
1491 if (alt) { 1654 if (alt) {
@@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1500 return rv; 1663 return rv;
1501} 1664}
1502 1665
1666/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
1667 for locks pending on the convert list. Once verified (watch for these
1668 log_prints), we should be able to just call _can_be_granted() and not
1669 bother with the demote/deadlk cases here (and there's no easy way to deal
1670 with a deadlk here, we'd have to generate something like grant_lock with
1671 the deadlk error.) */
1672
1673/* returns the highest requested mode of all blocked conversions */
1674
1503static int grant_pending_convert(struct dlm_rsb *r, int high) 1675static int grant_pending_convert(struct dlm_rsb *r, int high)
1504{ 1676{
1505 struct dlm_lkb *lkb, *s; 1677 struct dlm_lkb *lkb, *s;
1506 int hi, demoted, quit, grant_restart, demote_restart; 1678 int hi, demoted, quit, grant_restart, demote_restart;
1679 int deadlk;
1507 1680
1508 quit = 0; 1681 quit = 0;
1509 restart: 1682 restart:
@@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
1513 1686
1514 list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { 1687 list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
1515 demoted = is_demoted(lkb); 1688 demoted = is_demoted(lkb);
1516 if (can_be_granted(r, lkb, 0)) { 1689 deadlk = 0;
1690
1691 if (can_be_granted(r, lkb, 0, &deadlk)) {
1517 grant_lock_pending(r, lkb); 1692 grant_lock_pending(r, lkb);
1518 grant_restart = 1; 1693 grant_restart = 1;
1519 } else { 1694 continue;
1520 hi = max_t(int, lkb->lkb_rqmode, hi);
1521 if (!demoted && is_demoted(lkb))
1522 demote_restart = 1;
1523 } 1695 }
1696
1697 if (!demoted && is_demoted(lkb)) {
1698 log_print("WARN: pending demoted %x node %d %s",
1699 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
1700 demote_restart = 1;
1701 continue;
1702 }
1703
1704 if (deadlk) {
1705 log_print("WARN: pending deadlock %x node %d %s",
1706 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
1707 dlm_dump_rsb(r);
1708 continue;
1709 }
1710
1711 hi = max_t(int, lkb->lkb_rqmode, hi);
1524 } 1712 }
1525 1713
1526 if (grant_restart) 1714 if (grant_restart)
@@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high)
1538 struct dlm_lkb *lkb, *s; 1726 struct dlm_lkb *lkb, *s;
1539 1727
1540 list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { 1728 list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
1541 if (can_be_granted(r, lkb, 0)) 1729 if (can_be_granted(r, lkb, 0, NULL))
1542 grant_lock_pending(r, lkb); 1730 grant_lock_pending(r, lkb);
1543 else 1731 else
1544 high = max_t(int, lkb->lkb_rqmode, high); 1732 high = max_t(int, lkb->lkb_rqmode, high);
@@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
1733} 1921}
1734 1922
1735static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, 1923static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1736 int namelen, uint32_t parent_lkid, void *ast, 1924 int namelen, unsigned long timeout_cs, void *ast,
1737 void *astarg, void *bast, struct dlm_args *args) 1925 void *astarg, void *bast, struct dlm_args *args)
1738{ 1926{
1739 int rv = -EINVAL; 1927 int rv = -EINVAL;
@@ -1776,10 +1964,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1776 if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr) 1964 if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
1777 goto out; 1965 goto out;
1778 1966
1779 /* parent/child locks not yet supported */
1780 if (parent_lkid)
1781 goto out;
1782
1783 if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid) 1967 if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
1784 goto out; 1968 goto out;
1785 1969
@@ -1791,6 +1975,7 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1791 args->astaddr = ast; 1975 args->astaddr = ast;
1792 args->astparam = (long) astarg; 1976 args->astparam = (long) astarg;
1793 args->bastaddr = bast; 1977 args->bastaddr = bast;
1978 args->timeout = timeout_cs;
1794 args->mode = mode; 1979 args->mode = mode;
1795 args->lksb = lksb; 1980 args->lksb = lksb;
1796 rv = 0; 1981 rv = 0;
@@ -1845,6 +2030,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
1845 lkb->lkb_lksb = args->lksb; 2030 lkb->lkb_lksb = args->lksb;
1846 lkb->lkb_lvbptr = args->lksb->sb_lvbptr; 2031 lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
1847 lkb->lkb_ownpid = (int) current->pid; 2032 lkb->lkb_ownpid = (int) current->pid;
2033 lkb->lkb_timeout_cs = args->timeout;
1848 rv = 0; 2034 rv = 0;
1849 out: 2035 out:
1850 return rv; 2036 return rv;
@@ -1903,6 +2089,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
1903 if (is_overlap(lkb)) 2089 if (is_overlap(lkb))
1904 goto out; 2090 goto out;
1905 2091
2092 /* don't let scand try to do a cancel */
2093 del_timeout(lkb);
2094
1906 if (lkb->lkb_flags & DLM_IFL_RESEND) { 2095 if (lkb->lkb_flags & DLM_IFL_RESEND) {
1907 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL; 2096 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
1908 rv = -EBUSY; 2097 rv = -EBUSY;
@@ -1934,6 +2123,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
1934 if (is_overlap_unlock(lkb)) 2123 if (is_overlap_unlock(lkb))
1935 goto out; 2124 goto out;
1936 2125
2126 /* don't let scand try to do a cancel */
2127 del_timeout(lkb);
2128
1937 if (lkb->lkb_flags & DLM_IFL_RESEND) { 2129 if (lkb->lkb_flags & DLM_IFL_RESEND) {
1938 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK; 2130 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
1939 rv = -EBUSY; 2131 rv = -EBUSY;
@@ -1984,7 +2176,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
1984{ 2176{
1985 int error = 0; 2177 int error = 0;
1986 2178
1987 if (can_be_granted(r, lkb, 1)) { 2179 if (can_be_granted(r, lkb, 1, NULL)) {
1988 grant_lock(r, lkb); 2180 grant_lock(r, lkb);
1989 queue_cast(r, lkb, 0); 2181 queue_cast(r, lkb, 0);
1990 goto out; 2182 goto out;
@@ -1994,6 +2186,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
1994 error = -EINPROGRESS; 2186 error = -EINPROGRESS;
1995 add_lkb(r, lkb, DLM_LKSTS_WAITING); 2187 add_lkb(r, lkb, DLM_LKSTS_WAITING);
1996 send_blocking_asts(r, lkb); 2188 send_blocking_asts(r, lkb);
2189 add_timeout(lkb);
1997 goto out; 2190 goto out;
1998 } 2191 }
1999 2192
@@ -2009,16 +2202,32 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
2009static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) 2202static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2010{ 2203{
2011 int error = 0; 2204 int error = 0;
2205 int deadlk = 0;
2012 2206
2013 /* changing an existing lock may allow others to be granted */ 2207 /* changing an existing lock may allow others to be granted */
2014 2208
2015 if (can_be_granted(r, lkb, 1)) { 2209 if (can_be_granted(r, lkb, 1, &deadlk)) {
2016 grant_lock(r, lkb); 2210 grant_lock(r, lkb);
2017 queue_cast(r, lkb, 0); 2211 queue_cast(r, lkb, 0);
2018 grant_pending_locks(r); 2212 grant_pending_locks(r);
2019 goto out; 2213 goto out;
2020 } 2214 }
2021 2215
2216 /* can_be_granted() detected that this lock would block in a conversion
2217 deadlock, so we leave it on the granted queue and return EDEADLK in
2218 the ast for the convert. */
2219
2220 if (deadlk) {
2221 /* it's left on the granted queue */
2222 log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
2223 lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
2224 lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
2225 revert_lock(r, lkb);
2226 queue_cast(r, lkb, -EDEADLK);
2227 error = -EDEADLK;
2228 goto out;
2229 }
2230
2022 /* is_demoted() means the can_be_granted() above set the grmode 2231 /* is_demoted() means the can_be_granted() above set the grmode
2023 to NL, and left us on the granted queue. This auto-demotion 2232 to NL, and left us on the granted queue. This auto-demotion
2024 (due to CONVDEADLK) might mean other locks, and/or this lock, are 2233 (due to CONVDEADLK) might mean other locks, and/or this lock, are
@@ -2041,6 +2250,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2041 del_lkb(r, lkb); 2250 del_lkb(r, lkb);
2042 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 2251 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
2043 send_blocking_asts(r, lkb); 2252 send_blocking_asts(r, lkb);
2253 add_timeout(lkb);
2044 goto out; 2254 goto out;
2045 } 2255 }
2046 2256
@@ -2274,7 +2484,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
2274 if (!ls) 2484 if (!ls)
2275 return -EINVAL; 2485 return -EINVAL;
2276 2486
2277 lock_recovery(ls); 2487 dlm_lock_recovery(ls);
2278 2488
2279 if (convert) 2489 if (convert)
2280 error = find_lkb(ls, lksb->sb_lkid, &lkb); 2490 error = find_lkb(ls, lksb->sb_lkid, &lkb);
@@ -2284,7 +2494,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
2284 if (error) 2494 if (error)
2285 goto out; 2495 goto out;
2286 2496
2287 error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast, 2497 error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
2288 astarg, bast, &args); 2498 astarg, bast, &args);
2289 if (error) 2499 if (error)
2290 goto out_put; 2500 goto out_put;
@@ -2299,10 +2509,10 @@ int dlm_lock(dlm_lockspace_t *lockspace,
2299 out_put: 2509 out_put:
2300 if (convert || error) 2510 if (convert || error)
2301 __put_lkb(ls, lkb); 2511 __put_lkb(ls, lkb);
2302 if (error == -EAGAIN) 2512 if (error == -EAGAIN || error == -EDEADLK)
2303 error = 0; 2513 error = 0;
2304 out: 2514 out:
2305 unlock_recovery(ls); 2515 dlm_unlock_recovery(ls);
2306 dlm_put_lockspace(ls); 2516 dlm_put_lockspace(ls);
2307 return error; 2517 return error;
2308} 2518}
@@ -2322,7 +2532,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
2322 if (!ls) 2532 if (!ls)
2323 return -EINVAL; 2533 return -EINVAL;
2324 2534
2325 lock_recovery(ls); 2535 dlm_lock_recovery(ls);
2326 2536
2327 error = find_lkb(ls, lkid, &lkb); 2537 error = find_lkb(ls, lkid, &lkb);
2328 if (error) 2538 if (error)
@@ -2344,7 +2554,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
2344 out_put: 2554 out_put:
2345 dlm_put_lkb(lkb); 2555 dlm_put_lkb(lkb);
2346 out: 2556 out:
2347 unlock_recovery(ls); 2557 dlm_unlock_recovery(ls);
2348 dlm_put_lockspace(ls); 2558 dlm_put_lockspace(ls);
2349 return error; 2559 return error;
2350} 2560}
@@ -2384,7 +2594,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
2384 pass into lowcomms_commit and a message buffer (mb) that we 2594 pass into lowcomms_commit and a message buffer (mb) that we
2385 write our data into */ 2595 write our data into */
2386 2596
2387 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); 2597 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
2388 if (!mh) 2598 if (!mh)
2389 return -ENOBUFS; 2599 return -ENOBUFS;
2390 2600
@@ -3111,9 +3321,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
3111 lkb->lkb_remid = ms->m_lkid; 3321 lkb->lkb_remid = ms->m_lkid;
3112 if (is_altmode(lkb)) 3322 if (is_altmode(lkb))
3113 munge_altmode(lkb, ms); 3323 munge_altmode(lkb, ms);
3114 if (result) 3324 if (result) {
3115 add_lkb(r, lkb, DLM_LKSTS_WAITING); 3325 add_lkb(r, lkb, DLM_LKSTS_WAITING);
3116 else { 3326 add_timeout(lkb);
3327 } else {
3117 grant_lock_pc(r, lkb, ms); 3328 grant_lock_pc(r, lkb, ms);
3118 queue_cast(r, lkb, 0); 3329 queue_cast(r, lkb, 0);
3119 } 3330 }
@@ -3172,6 +3383,12 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3172 queue_cast(r, lkb, -EAGAIN); 3383 queue_cast(r, lkb, -EAGAIN);
3173 break; 3384 break;
3174 3385
3386 case -EDEADLK:
3387 receive_flags_reply(lkb, ms);
3388 revert_lock_pc(r, lkb);
3389 queue_cast(r, lkb, -EDEADLK);
3390 break;
3391
3175 case -EINPROGRESS: 3392 case -EINPROGRESS:
3176 /* convert was queued on remote master */ 3393 /* convert was queued on remote master */
3177 receive_flags_reply(lkb, ms); 3394 receive_flags_reply(lkb, ms);
@@ -3179,6 +3396,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3179 munge_demoted(lkb, ms); 3396 munge_demoted(lkb, ms);
3180 del_lkb(r, lkb); 3397 del_lkb(r, lkb);
3181 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 3398 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
3399 add_timeout(lkb);
3182 break; 3400 break;
3183 3401
3184 case 0: 3402 case 0:
@@ -3298,8 +3516,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3298 case -DLM_ECANCEL: 3516 case -DLM_ECANCEL:
3299 receive_flags_reply(lkb, ms); 3517 receive_flags_reply(lkb, ms);
3300 revert_lock_pc(r, lkb); 3518 revert_lock_pc(r, lkb);
3301 if (ms->m_result) 3519 queue_cast(r, lkb, -DLM_ECANCEL);
3302 queue_cast(r, lkb, -DLM_ECANCEL);
3303 break; 3520 break;
3304 case 0: 3521 case 0:
3305 break; 3522 break;
@@ -3424,7 +3641,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3424 } 3641 }
3425 } 3642 }
3426 3643
3427 if (lock_recovery_try(ls)) 3644 if (dlm_lock_recovery_try(ls))
3428 break; 3645 break;
3429 schedule(); 3646 schedule();
3430 } 3647 }
@@ -3503,7 +3720,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3503 log_error(ls, "unknown message type %d", ms->m_type); 3720 log_error(ls, "unknown message type %d", ms->m_type);
3504 } 3721 }
3505 3722
3506 unlock_recovery(ls); 3723 dlm_unlock_recovery(ls);
3507 out: 3724 out:
3508 dlm_put_lockspace(ls); 3725 dlm_put_lockspace(ls);
3509 dlm_astd_wake(); 3726 dlm_astd_wake();
@@ -4034,13 +4251,13 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
4034 4251
4035int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, 4252int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4036 int mode, uint32_t flags, void *name, unsigned int namelen, 4253 int mode, uint32_t flags, void *name, unsigned int namelen,
4037 uint32_t parent_lkid) 4254 unsigned long timeout_cs)
4038{ 4255{
4039 struct dlm_lkb *lkb; 4256 struct dlm_lkb *lkb;
4040 struct dlm_args args; 4257 struct dlm_args args;
4041 int error; 4258 int error;
4042 4259
4043 lock_recovery(ls); 4260 dlm_lock_recovery(ls);
4044 4261
4045 error = create_lkb(ls, &lkb); 4262 error = create_lkb(ls, &lkb);
4046 if (error) { 4263 if (error) {
@@ -4062,7 +4279,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4062 When DLM_IFL_USER is set, the dlm knows that this is a userspace 4279 When DLM_IFL_USER is set, the dlm knows that this is a userspace
4063 lock and that lkb_astparam is the dlm_user_args structure. */ 4280 lock and that lkb_astparam is the dlm_user_args structure. */
4064 4281
4065 error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid, 4282 error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
4066 DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args); 4283 DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
4067 lkb->lkb_flags |= DLM_IFL_USER; 4284 lkb->lkb_flags |= DLM_IFL_USER;
4068 ua->old_mode = DLM_LOCK_IV; 4285 ua->old_mode = DLM_LOCK_IV;
@@ -4094,19 +4311,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4094 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); 4311 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
4095 spin_unlock(&ua->proc->locks_spin); 4312 spin_unlock(&ua->proc->locks_spin);
4096 out: 4313 out:
4097 unlock_recovery(ls); 4314 dlm_unlock_recovery(ls);
4098 return error; 4315 return error;
4099} 4316}
4100 4317
4101int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 4318int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4102 int mode, uint32_t flags, uint32_t lkid, char *lvb_in) 4319 int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
4320 unsigned long timeout_cs)
4103{ 4321{
4104 struct dlm_lkb *lkb; 4322 struct dlm_lkb *lkb;
4105 struct dlm_args args; 4323 struct dlm_args args;
4106 struct dlm_user_args *ua; 4324 struct dlm_user_args *ua;
4107 int error; 4325 int error;
4108 4326
4109 lock_recovery(ls); 4327 dlm_lock_recovery(ls);
4110 4328
4111 error = find_lkb(ls, lkid, &lkb); 4329 error = find_lkb(ls, lkid, &lkb);
4112 if (error) 4330 if (error)
@@ -4127,6 +4345,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4127 if (lvb_in && ua->lksb.sb_lvbptr) 4345 if (lvb_in && ua->lksb.sb_lvbptr)
4128 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); 4346 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
4129 4347
4348 ua->xid = ua_tmp->xid;
4130 ua->castparam = ua_tmp->castparam; 4349 ua->castparam = ua_tmp->castparam;
4131 ua->castaddr = ua_tmp->castaddr; 4350 ua->castaddr = ua_tmp->castaddr;
4132 ua->bastparam = ua_tmp->bastparam; 4351 ua->bastparam = ua_tmp->bastparam;
@@ -4134,19 +4353,19 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4134 ua->user_lksb = ua_tmp->user_lksb; 4353 ua->user_lksb = ua_tmp->user_lksb;
4135 ua->old_mode = lkb->lkb_grmode; 4354 ua->old_mode = lkb->lkb_grmode;
4136 4355
4137 error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST, 4356 error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
4138 ua, DLM_FAKE_USER_AST, &args); 4357 DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
4139 if (error) 4358 if (error)
4140 goto out_put; 4359 goto out_put;
4141 4360
4142 error = convert_lock(ls, lkb, &args); 4361 error = convert_lock(ls, lkb, &args);
4143 4362
4144 if (error == -EINPROGRESS || error == -EAGAIN) 4363 if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
4145 error = 0; 4364 error = 0;
4146 out_put: 4365 out_put:
4147 dlm_put_lkb(lkb); 4366 dlm_put_lkb(lkb);
4148 out: 4367 out:
4149 unlock_recovery(ls); 4368 dlm_unlock_recovery(ls);
4150 kfree(ua_tmp); 4369 kfree(ua_tmp);
4151 return error; 4370 return error;
4152} 4371}
@@ -4159,7 +4378,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4159 struct dlm_user_args *ua; 4378 struct dlm_user_args *ua;
4160 int error; 4379 int error;
4161 4380
4162 lock_recovery(ls); 4381 dlm_lock_recovery(ls);
4163 4382
4164 error = find_lkb(ls, lkid, &lkb); 4383 error = find_lkb(ls, lkid, &lkb);
4165 if (error) 4384 if (error)
@@ -4194,7 +4413,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4194 out_put: 4413 out_put:
4195 dlm_put_lkb(lkb); 4414 dlm_put_lkb(lkb);
4196 out: 4415 out:
4197 unlock_recovery(ls); 4416 dlm_unlock_recovery(ls);
4198 kfree(ua_tmp); 4417 kfree(ua_tmp);
4199 return error; 4418 return error;
4200} 4419}
@@ -4207,7 +4426,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4207 struct dlm_user_args *ua; 4426 struct dlm_user_args *ua;
4208 int error; 4427 int error;
4209 4428
4210 lock_recovery(ls); 4429 dlm_lock_recovery(ls);
4211 4430
4212 error = find_lkb(ls, lkid, &lkb); 4431 error = find_lkb(ls, lkid, &lkb);
4213 if (error) 4432 if (error)
@@ -4231,11 +4450,59 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4231 out_put: 4450 out_put:
4232 dlm_put_lkb(lkb); 4451 dlm_put_lkb(lkb);
4233 out: 4452 out:
4234 unlock_recovery(ls); 4453 dlm_unlock_recovery(ls);
4235 kfree(ua_tmp); 4454 kfree(ua_tmp);
4236 return error; 4455 return error;
4237} 4456}
4238 4457
4458int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
4459{
4460 struct dlm_lkb *lkb;
4461 struct dlm_args args;
4462 struct dlm_user_args *ua;
4463 struct dlm_rsb *r;
4464 int error;
4465
4466 dlm_lock_recovery(ls);
4467
4468 error = find_lkb(ls, lkid, &lkb);
4469 if (error)
4470 goto out;
4471
4472 ua = (struct dlm_user_args *)lkb->lkb_astparam;
4473
4474 error = set_unlock_args(flags, ua, &args);
4475 if (error)
4476 goto out_put;
4477
4478 /* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
4479
4480 r = lkb->lkb_resource;
4481 hold_rsb(r);
4482 lock_rsb(r);
4483
4484 error = validate_unlock_args(lkb, &args);
4485 if (error)
4486 goto out_r;
4487 lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
4488
4489 error = _cancel_lock(r, lkb);
4490 out_r:
4491 unlock_rsb(r);
4492 put_rsb(r);
4493
4494 if (error == -DLM_ECANCEL)
4495 error = 0;
4496 /* from validate_unlock_args() */
4497 if (error == -EBUSY)
4498 error = 0;
4499 out_put:
4500 dlm_put_lkb(lkb);
4501 out:
4502 dlm_unlock_recovery(ls);
4503 return error;
4504}
4505
4239/* lkb's that are removed from the waiters list by revert are just left on the 4506/* lkb's that are removed from the waiters list by revert are just left on the
4240 orphans list with the granted orphan locks, to be freed by purge */ 4507 orphans list with the granted orphan locks, to be freed by purge */
4241 4508
@@ -4314,12 +4581,13 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
4314{ 4581{
4315 struct dlm_lkb *lkb, *safe; 4582 struct dlm_lkb *lkb, *safe;
4316 4583
4317 lock_recovery(ls); 4584 dlm_lock_recovery(ls);
4318 4585
4319 while (1) { 4586 while (1) {
4320 lkb = del_proc_lock(ls, proc); 4587 lkb = del_proc_lock(ls, proc);
4321 if (!lkb) 4588 if (!lkb)
4322 break; 4589 break;
4590 del_timeout(lkb);
4323 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) 4591 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
4324 orphan_proc_lock(ls, lkb); 4592 orphan_proc_lock(ls, lkb);
4325 else 4593 else
@@ -4347,7 +4615,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
4347 } 4615 }
4348 4616
4349 mutex_unlock(&ls->ls_clear_proc_locks); 4617 mutex_unlock(&ls->ls_clear_proc_locks);
4350 unlock_recovery(ls); 4618 dlm_unlock_recovery(ls);
4351} 4619}
4352 4620
4353static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) 4621static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
@@ -4429,12 +4697,12 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
4429 if (nodeid != dlm_our_nodeid()) { 4697 if (nodeid != dlm_our_nodeid()) {
4430 error = send_purge(ls, nodeid, pid); 4698 error = send_purge(ls, nodeid, pid);
4431 } else { 4699 } else {
4432 lock_recovery(ls); 4700 dlm_lock_recovery(ls);
4433 if (pid == current->pid) 4701 if (pid == current->pid)
4434 purge_proc_locks(ls, proc); 4702 purge_proc_locks(ls, proc);
4435 else 4703 else
4436 do_purge(ls, nodeid, pid); 4704 do_purge(ls, nodeid, pid);
4437 unlock_recovery(ls); 4705 dlm_unlock_recovery(ls);
4438 } 4706 }
4439 return error; 4707 return error;
4440} 4708}
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 64fc4ec40668..1720313c22df 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -24,6 +24,10 @@ void dlm_put_rsb(struct dlm_rsb *r);
24void dlm_hold_rsb(struct dlm_rsb *r); 24void dlm_hold_rsb(struct dlm_rsb *r);
25int dlm_put_lkb(struct dlm_lkb *lkb); 25int dlm_put_lkb(struct dlm_lkb *lkb);
26void dlm_scan_rsbs(struct dlm_ls *ls); 26void dlm_scan_rsbs(struct dlm_ls *ls);
27int dlm_lock_recovery_try(struct dlm_ls *ls);
28void dlm_unlock_recovery(struct dlm_ls *ls);
29void dlm_scan_timeout(struct dlm_ls *ls);
30void dlm_adjust_timeouts(struct dlm_ls *ls);
27 31
28int dlm_purge_locks(struct dlm_ls *ls); 32int dlm_purge_locks(struct dlm_ls *ls);
29void dlm_purge_mstcpy_locks(struct dlm_rsb *r); 33void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
@@ -34,15 +38,18 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
34int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc); 38int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
35 39
36int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, 40int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
37 uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid); 41 uint32_t flags, void *name, unsigned int namelen,
42 unsigned long timeout_cs);
38int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 43int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
39 int mode, uint32_t flags, uint32_t lkid, char *lvb_in); 44 int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
45 unsigned long timeout_cs);
40int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 46int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
41 uint32_t flags, uint32_t lkid, char *lvb_in); 47 uint32_t flags, uint32_t lkid, char *lvb_in);
42int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 48int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
43 uint32_t flags, uint32_t lkid); 49 uint32_t flags, uint32_t lkid);
44int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, 50int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
45 int nodeid, int pid); 51 int nodeid, int pid);
52int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
46void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); 53void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
47 54
48static inline int is_master(struct dlm_rsb *r) 55static inline int is_master(struct dlm_rsb *r)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index a677b2a5eed4..1dc72105ab12 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in)
197 else 197 else
198 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); 198 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
199 199
200 log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
201
202 /* dlm_controld will see the uevent, do the necessary group management
203 and then write to sysfs to wake us */
204
200 error = wait_event_interruptible(ls->ls_uevent_wait, 205 error = wait_event_interruptible(ls->ls_uevent_wait,
201 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); 206 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
207
208 log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
209
202 if (error) 210 if (error)
203 goto out; 211 goto out;
204 212
205 error = ls->ls_uevent_result; 213 error = ls->ls_uevent_result;
206 out: 214 out:
215 if (error)
216 log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
217 error, ls->ls_uevent_result);
207 return error; 218 return error;
208} 219}
209 220
@@ -234,8 +245,13 @@ static int dlm_scand(void *data)
234 struct dlm_ls *ls; 245 struct dlm_ls *ls;
235 246
236 while (!kthread_should_stop()) { 247 while (!kthread_should_stop()) {
237 list_for_each_entry(ls, &lslist, ls_list) 248 list_for_each_entry(ls, &lslist, ls_list) {
238 dlm_scan_rsbs(ls); 249 if (dlm_lock_recovery_try(ls)) {
250 dlm_scan_rsbs(ls);
251 dlm_scan_timeout(ls);
252 dlm_unlock_recovery(ls);
253 }
254 }
239 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); 255 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
240 } 256 }
241 return 0; 257 return 0;
@@ -395,6 +411,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
395{ 411{
396 struct dlm_ls *ls; 412 struct dlm_ls *ls;
397 int i, size, error = -ENOMEM; 413 int i, size, error = -ENOMEM;
414 int do_unreg = 0;
398 415
399 if (namelen > DLM_LOCKSPACE_LEN) 416 if (namelen > DLM_LOCKSPACE_LEN)
400 return -EINVAL; 417 return -EINVAL;
@@ -417,11 +434,22 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
417 goto out; 434 goto out;
418 memcpy(ls->ls_name, name, namelen); 435 memcpy(ls->ls_name, name, namelen);
419 ls->ls_namelen = namelen; 436 ls->ls_namelen = namelen;
420 ls->ls_exflags = flags;
421 ls->ls_lvblen = lvblen; 437 ls->ls_lvblen = lvblen;
422 ls->ls_count = 0; 438 ls->ls_count = 0;
423 ls->ls_flags = 0; 439 ls->ls_flags = 0;
424 440
441 if (flags & DLM_LSFL_TIMEWARN)
442 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
443
444 if (flags & DLM_LSFL_FS)
445 ls->ls_allocation = GFP_NOFS;
446 else
447 ls->ls_allocation = GFP_KERNEL;
448
449 /* ls_exflags are forced to match among nodes, and we don't
450 need to require all nodes to have TIMEWARN or FS set */
451 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
452
425 size = dlm_config.ci_rsbtbl_size; 453 size = dlm_config.ci_rsbtbl_size;
426 ls->ls_rsbtbl_size = size; 454 ls->ls_rsbtbl_size = size;
427 455
@@ -461,6 +489,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
461 mutex_init(&ls->ls_waiters_mutex); 489 mutex_init(&ls->ls_waiters_mutex);
462 INIT_LIST_HEAD(&ls->ls_orphans); 490 INIT_LIST_HEAD(&ls->ls_orphans);
463 mutex_init(&ls->ls_orphans_mutex); 491 mutex_init(&ls->ls_orphans_mutex);
492 INIT_LIST_HEAD(&ls->ls_timeout);
493 mutex_init(&ls->ls_timeout_mutex);
464 494
465 INIT_LIST_HEAD(&ls->ls_nodes); 495 INIT_LIST_HEAD(&ls->ls_nodes);
466 INIT_LIST_HEAD(&ls->ls_nodes_gone); 496 INIT_LIST_HEAD(&ls->ls_nodes_gone);
@@ -477,6 +507,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
477 507
478 init_waitqueue_head(&ls->ls_uevent_wait); 508 init_waitqueue_head(&ls->ls_uevent_wait);
479 ls->ls_uevent_result = 0; 509 ls->ls_uevent_result = 0;
510 init_completion(&ls->ls_members_done);
511 ls->ls_members_result = -1;
480 512
481 ls->ls_recoverd_task = NULL; 513 ls->ls_recoverd_task = NULL;
482 mutex_init(&ls->ls_recoverd_active); 514 mutex_init(&ls->ls_recoverd_active);
@@ -513,32 +545,49 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
513 error = dlm_recoverd_start(ls); 545 error = dlm_recoverd_start(ls);
514 if (error) { 546 if (error) {
515 log_error(ls, "can't start dlm_recoverd %d", error); 547 log_error(ls, "can't start dlm_recoverd %d", error);
516 goto out_rcomfree; 548 goto out_delist;
517 } 549 }
518 550
519 dlm_create_debug_file(ls);
520
521 error = kobject_setup(ls); 551 error = kobject_setup(ls);
522 if (error) 552 if (error)
523 goto out_del; 553 goto out_stop;
524 554
525 error = kobject_register(&ls->ls_kobj); 555 error = kobject_register(&ls->ls_kobj);
526 if (error) 556 if (error)
527 goto out_del; 557 goto out_stop;
558
559 /* let kobject handle freeing of ls if there's an error */
560 do_unreg = 1;
561
562 /* This uevent triggers dlm_controld in userspace to add us to the
563 group of nodes that are members of this lockspace (managed by the
564 cluster infrastructure.) Once it's done that, it tells us who the
565 current lockspace members are (via configfs) and then tells the
566 lockspace to start running (via sysfs) in dlm_ls_start(). */
528 567
529 error = do_uevent(ls, 1); 568 error = do_uevent(ls, 1);
530 if (error) 569 if (error)
531 goto out_unreg; 570 goto out_stop;
571
572 wait_for_completion(&ls->ls_members_done);
573 error = ls->ls_members_result;
574 if (error)
575 goto out_members;
576
577 dlm_create_debug_file(ls);
578
579 log_debug(ls, "join complete");
532 580
533 *lockspace = ls; 581 *lockspace = ls;
534 return 0; 582 return 0;
535 583
536 out_unreg: 584 out_members:
537 kobject_unregister(&ls->ls_kobj); 585 do_uevent(ls, 0);
538 out_del: 586 dlm_clear_members(ls);
539 dlm_delete_debug_file(ls); 587 kfree(ls->ls_node_array);
588 out_stop:
540 dlm_recoverd_stop(ls); 589 dlm_recoverd_stop(ls);
541 out_rcomfree: 590 out_delist:
542 spin_lock(&lslist_lock); 591 spin_lock(&lslist_lock);
543 list_del(&ls->ls_list); 592 list_del(&ls->ls_list);
544 spin_unlock(&lslist_lock); 593 spin_unlock(&lslist_lock);
@@ -550,7 +599,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
550 out_rsbfree: 599 out_rsbfree:
551 kfree(ls->ls_rsbtbl); 600 kfree(ls->ls_rsbtbl);
552 out_lsfree: 601 out_lsfree:
553 kfree(ls); 602 if (do_unreg)
603 kobject_unregister(&ls->ls_kobj);
604 else
605 kfree(ls);
554 out: 606 out:
555 module_put(THIS_MODULE); 607 module_put(THIS_MODULE);
556 return error; 608 return error;
@@ -570,6 +622,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
570 error = new_lockspace(name, namelen, lockspace, flags, lvblen); 622 error = new_lockspace(name, namelen, lockspace, flags, lvblen);
571 if (!error) 623 if (!error)
572 ls_count++; 624 ls_count++;
625 else if (!ls_count)
626 threads_stop();
573 out: 627 out:
574 mutex_unlock(&ls_lock); 628 mutex_unlock(&ls_lock);
575 return error; 629 return error;
@@ -696,7 +750,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
696 dlm_clear_members_gone(ls); 750 dlm_clear_members_gone(ls);
697 kfree(ls->ls_node_array); 751 kfree(ls->ls_node_array);
698 kobject_unregister(&ls->ls_kobj); 752 kobject_unregister(&ls->ls_kobj);
699 /* The ls structure will be freed when the kobject is done with */ 753 /* The ls structure will be freed when the kobject is done with */
700 754
701 mutex_lock(&ls_lock); 755 mutex_lock(&ls_lock);
702 ls_count--; 756 ls_count--;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 27970a58d29b..0553a6158dcb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -260,7 +260,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
260static void lowcomms_data_ready(struct sock *sk, int count_unused) 260static void lowcomms_data_ready(struct sock *sk, int count_unused)
261{ 261{
262 struct connection *con = sock2con(sk); 262 struct connection *con = sock2con(sk);
263 if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) 263 if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
264 queue_work(recv_workqueue, &con->rwork); 264 queue_work(recv_workqueue, &con->rwork);
265} 265}
266 266
@@ -268,7 +268,7 @@ static void lowcomms_write_space(struct sock *sk)
268{ 268{
269 struct connection *con = sock2con(sk); 269 struct connection *con = sock2con(sk);
270 270
271 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 271 if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
272 queue_work(send_workqueue, &con->swork); 272 queue_work(send_workqueue, &con->swork);
273} 273}
274 274
@@ -720,11 +720,17 @@ static int tcp_accept_from_sock(struct connection *con)
720 INIT_WORK(&othercon->rwork, process_recv_sockets); 720 INIT_WORK(&othercon->rwork, process_recv_sockets);
721 set_bit(CF_IS_OTHERCON, &othercon->flags); 721 set_bit(CF_IS_OTHERCON, &othercon->flags);
722 newcon->othercon = othercon; 722 newcon->othercon = othercon;
723 othercon->sock = newsock;
724 newsock->sk->sk_user_data = othercon;
725 add_sock(newsock, othercon);
726 addcon = othercon;
727 }
728 else {
729 printk("Extra connection from node %d attempted\n", nodeid);
730 result = -EAGAIN;
731 mutex_unlock(&newcon->sock_mutex);
732 goto accept_err;
723 } 733 }
724 othercon->sock = newsock;
725 newsock->sk->sk_user_data = othercon;
726 add_sock(newsock, othercon);
727 addcon = othercon;
728 } 734 }
729 else { 735 else {
730 newsock->sk->sk_user_data = newcon; 736 newsock->sk->sk_user_data = newcon;
@@ -1400,8 +1406,11 @@ void dlm_lowcomms_stop(void)
1400 down(&connections_lock); 1406 down(&connections_lock);
1401 for (i = 0; i <= max_nodeid; i++) { 1407 for (i = 0; i <= max_nodeid; i++) {
1402 con = __nodeid2con(i, 0); 1408 con = __nodeid2con(i, 0);
1403 if (con) 1409 if (con) {
1404 con->flags |= 0xFF; 1410 con->flags |= 0xFF;
1411 if (con->sock)
1412 con->sock->sk->sk_user_data = NULL;
1413 }
1405 } 1414 }
1406 up(&connections_lock); 1415 up(&connections_lock);
1407 1416
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 162fbae58fe5..eca2907f2386 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -25,6 +25,8 @@ void dlm_unregister_debugfs(void);
25static inline int dlm_register_debugfs(void) { return 0; } 25static inline int dlm_register_debugfs(void) { return 0; }
26static inline void dlm_unregister_debugfs(void) { } 26static inline void dlm_unregister_debugfs(void) { }
27#endif 27#endif
28int dlm_netlink_init(void);
29void dlm_netlink_exit(void);
28 30
29static int __init init_dlm(void) 31static int __init init_dlm(void)
30{ 32{
@@ -50,10 +52,16 @@ static int __init init_dlm(void)
50 if (error) 52 if (error)
51 goto out_debug; 53 goto out_debug;
52 54
55 error = dlm_netlink_init();
56 if (error)
57 goto out_user;
58
53 printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); 59 printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
54 60
55 return 0; 61 return 0;
56 62
63 out_user:
64 dlm_user_exit();
57 out_debug: 65 out_debug:
58 dlm_unregister_debugfs(); 66 dlm_unregister_debugfs();
59 out_config: 67 out_config:
@@ -68,6 +76,7 @@ static int __init init_dlm(void)
68 76
69static void __exit exit_dlm(void) 77static void __exit exit_dlm(void)
70{ 78{
79 dlm_netlink_exit();
71 dlm_user_exit(); 80 dlm_user_exit();
72 dlm_config_exit(); 81 dlm_config_exit();
73 dlm_memory_exit(); 82 dlm_memory_exit();
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 85e2897bd740..073599dced2a 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
233 *neg_out = neg; 233 *neg_out = neg;
234 234
235 error = ping_members(ls); 235 error = ping_members(ls);
236 if (!error || error == -EPROTO) {
237 /* new_lockspace() may be waiting to know if the config
238 is good or bad */
239 ls->ls_members_result = error;
240 complete(&ls->ls_members_done);
241 }
236 if (error) 242 if (error)
237 goto out; 243 goto out;
238 244
@@ -284,6 +290,9 @@ int dlm_ls_stop(struct dlm_ls *ls)
284 dlm_recoverd_suspend(ls); 290 dlm_recoverd_suspend(ls);
285 ls->ls_recover_status = 0; 291 ls->ls_recover_status = 0;
286 dlm_recoverd_resume(ls); 292 dlm_recoverd_resume(ls);
293
294 if (!ls->ls_recover_begin)
295 ls->ls_recover_begin = jiffies;
287 return 0; 296 return 0;
288} 297}
289 298
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
new file mode 100644
index 000000000000..863b87d0dc71
--- /dev/null
+++ b/fs/dlm/netlink.c
@@ -0,0 +1,153 @@
1/*
2 * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v.2.
7 */
8
9#include <net/genetlink.h>
10#include <linux/dlm.h>
11#include <linux/dlm_netlink.h>
12
13#include "dlm_internal.h"
14
15static uint32_t dlm_nl_seqnum;
16static uint32_t listener_nlpid;
17
18static struct genl_family family = {
19 .id = GENL_ID_GENERATE,
20 .name = DLM_GENL_NAME,
21 .version = DLM_GENL_VERSION,
22};
23
24static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
25{
26 struct sk_buff *skb;
27 void *data;
28
29 skb = genlmsg_new(size, GFP_KERNEL);
30 if (!skb)
31 return -ENOMEM;
32
33 /* add the message headers */
34 data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
35 if (!data) {
36 nlmsg_free(skb);
37 return -EINVAL;
38 }
39
40 *skbp = skb;
41 return 0;
42}
43
44static struct dlm_lock_data *mk_data(struct sk_buff *skb)
45{
46 struct nlattr *ret;
47
48 ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
49 if (!ret)
50 return NULL;
51 return nla_data(ret);
52}
53
54static int send_data(struct sk_buff *skb)
55{
56 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
57 void *data = genlmsg_data(genlhdr);
58 int rv;
59
60 rv = genlmsg_end(skb, data);
61 if (rv < 0) {
62 nlmsg_free(skb);
63 return rv;
64 }
65
66 return genlmsg_unicast(skb, listener_nlpid);
67}
68
69static int user_cmd(struct sk_buff *skb, struct genl_info *info)
70{
71 listener_nlpid = info->snd_pid;
72 printk("user_cmd nlpid %u\n", listener_nlpid);
73 return 0;
74}
75
76static struct genl_ops dlm_nl_ops = {
77 .cmd = DLM_CMD_HELLO,
78 .doit = user_cmd,
79};
80
81int dlm_netlink_init(void)
82{
83 int rv;
84
85 rv = genl_register_family(&family);
86 if (rv)
87 return rv;
88
89 rv = genl_register_ops(&family, &dlm_nl_ops);
90 if (rv < 0)
91 goto err;
92 return 0;
93 err:
94 genl_unregister_family(&family);
95 return rv;
96}
97
98void dlm_netlink_exit(void)
99{
100 genl_unregister_ops(&family, &dlm_nl_ops);
101 genl_unregister_family(&family);
102}
103
104static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
105{
106 struct dlm_rsb *r = lkb->lkb_resource;
107 struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam;
108
109 memset(data, 0, sizeof(struct dlm_lock_data));
110
111 data->version = DLM_LOCK_DATA_VERSION;
112 data->nodeid = lkb->lkb_nodeid;
113 data->ownpid = lkb->lkb_ownpid;
114 data->id = lkb->lkb_id;
115 data->remid = lkb->lkb_remid;
116 data->status = lkb->lkb_status;
117 data->grmode = lkb->lkb_grmode;
118 data->rqmode = lkb->lkb_rqmode;
119 data->timestamp = lkb->lkb_timestamp;
120 if (ua)
121 data->xid = ua->xid;
122 if (r) {
123 data->lockspace_id = r->res_ls->ls_global_id;
124 data->resource_namelen = r->res_length;
125 memcpy(data->resource_name, r->res_name, r->res_length);
126 }
127}
128
129void dlm_timeout_warn(struct dlm_lkb *lkb)
130{
131 struct dlm_lock_data *data;
132 struct sk_buff *send_skb;
133 size_t size;
134 int rv;
135
136 size = nla_total_size(sizeof(struct dlm_lock_data)) +
137 nla_total_size(0); /* why this? */
138
139 rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
140 if (rv < 0)
141 return;
142
143 data = mk_data(send_skb);
144 if (!data) {
145 nlmsg_free(send_skb);
146 return;
147 }
148
149 fill_data(data, lkb);
150
151 send_data(send_skb);
152}
153
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6bfbd6153809..e3a1527cbdbe 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
38 char *mb; 38 char *mb;
39 int mb_len = sizeof(struct dlm_rcom) + len; 39 int mb_len = sizeof(struct dlm_rcom) + len;
40 40
41 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); 41 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
42 if (!mh) { 42 if (!mh) {
43 log_print("create_rcom to %d type %d len %d ENOBUFS", 43 log_print("create_rcom to %d type %d len %d ENOBUFS",
44 to_nodeid, type, len); 44 to_nodeid, type, len);
@@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
90 log_error(ls, "version mismatch: %x nodeid %d: %x", 90 log_error(ls, "version mismatch: %x nodeid %d: %x",
91 DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, 91 DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
92 rc->rc_header.h_version); 92 rc->rc_header.h_version);
93 return -EINVAL; 93 return -EPROTO;
94 } 94 }
95 95
96 if (rf->rf_lvblen != ls->ls_lvblen || 96 if (rf->rf_lvblen != ls->ls_lvblen ||
@@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
98 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", 98 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
99 ls->ls_lvblen, ls->ls_exflags, 99 ls->ls_lvblen, ls->ls_exflags,
100 nodeid, rf->rf_lvblen, rf->rf_lsflags); 100 nodeid, rf->rf_lvblen, rf->rf_lsflags);
101 return -EINVAL; 101 return -EPROTO;
102 } 102 }
103 return 0; 103 return 0;
104} 104}
@@ -386,7 +386,8 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
386 dlm_recover_process_copy(ls, rc_in); 386 dlm_recover_process_copy(ls, rc_in);
387} 387}
388 388
389static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) 389static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
390 struct dlm_rcom *rc_in)
390{ 391{
391 struct dlm_rcom *rc; 392 struct dlm_rcom *rc;
392 struct rcom_config *rf; 393 struct rcom_config *rf;
@@ -394,7 +395,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
394 char *mb; 395 char *mb;
395 int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); 396 int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
396 397
397 mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb); 398 mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
398 if (!mh) 399 if (!mh)
399 return -ENOBUFS; 400 return -ENOBUFS;
400 memset(mb, 0, mb_len); 401 memset(mb, 0, mb_len);
@@ -464,7 +465,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
464 log_print("lockspace %x from %d type %x not found", 465 log_print("lockspace %x from %d type %x not found",
465 hd->h_lockspace, nodeid, rc->rc_type); 466 hd->h_lockspace, nodeid, rc->rc_type);
466 if (rc->rc_type == DLM_RCOM_STATUS) 467 if (rc->rc_type == DLM_RCOM_STATUS)
467 send_ls_not_ready(nodeid, rc); 468 send_ls_not_ready(ls, nodeid, rc);
468 return; 469 return;
469 } 470 }
470 471
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 3cb636d60249..66575997861c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -190,6 +190,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
190 190
191 dlm_clear_members_gone(ls); 191 dlm_clear_members_gone(ls);
192 192
193 dlm_adjust_timeouts(ls);
194
193 error = enable_locking(ls, rv->seq); 195 error = enable_locking(ls, rv->seq);
194 if (error) { 196 if (error) {
195 log_debug(ls, "enable_locking failed %d", error); 197 log_debug(ls, "enable_locking failed %d", error);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index b0201ec325a7..6438941ab1f8 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -33,16 +33,17 @@ static const struct file_operations device_fops;
33struct dlm_lock_params32 { 33struct dlm_lock_params32 {
34 __u8 mode; 34 __u8 mode;
35 __u8 namelen; 35 __u8 namelen;
36 __u16 flags; 36 __u16 unused;
37 __u32 flags;
37 __u32 lkid; 38 __u32 lkid;
38 __u32 parent; 39 __u32 parent;
39 40 __u64 xid;
41 __u64 timeout;
40 __u32 castparam; 42 __u32 castparam;
41 __u32 castaddr; 43 __u32 castaddr;
42 __u32 bastparam; 44 __u32 bastparam;
43 __u32 bastaddr; 45 __u32 bastaddr;
44 __u32 lksb; 46 __u32 lksb;
45
46 char lvb[DLM_USER_LVB_LEN]; 47 char lvb[DLM_USER_LVB_LEN];
47 char name[0]; 48 char name[0];
48}; 49};
@@ -68,6 +69,7 @@ struct dlm_lksb32 {
68}; 69};
69 70
70struct dlm_lock_result32 { 71struct dlm_lock_result32 {
72 __u32 version[3];
71 __u32 length; 73 __u32 length;
72 __u32 user_astaddr; 74 __u32 user_astaddr;
73 __u32 user_astparam; 75 __u32 user_astparam;
@@ -102,6 +104,8 @@ static void compat_input(struct dlm_write_request *kb,
102 kb->i.lock.flags = kb32->i.lock.flags; 104 kb->i.lock.flags = kb32->i.lock.flags;
103 kb->i.lock.lkid = kb32->i.lock.lkid; 105 kb->i.lock.lkid = kb32->i.lock.lkid;
104 kb->i.lock.parent = kb32->i.lock.parent; 106 kb->i.lock.parent = kb32->i.lock.parent;
107 kb->i.lock.xid = kb32->i.lock.xid;
108 kb->i.lock.timeout = kb32->i.lock.timeout;
105 kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam; 109 kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
106 kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr; 110 kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
107 kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam; 111 kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
@@ -115,6 +119,10 @@ static void compat_input(struct dlm_write_request *kb,
115static void compat_output(struct dlm_lock_result *res, 119static void compat_output(struct dlm_lock_result *res,
116 struct dlm_lock_result32 *res32) 120 struct dlm_lock_result32 *res32)
117{ 121{
122 res32->version[0] = res->version[0];
123 res32->version[1] = res->version[1];
124 res32->version[2] = res->version[2];
125
118 res32->user_astaddr = (__u32)(long)res->user_astaddr; 126 res32->user_astaddr = (__u32)(long)res->user_astaddr;
119 res32->user_astparam = (__u32)(long)res->user_astparam; 127 res32->user_astparam = (__u32)(long)res->user_astparam;
120 res32->user_lksb = (__u32)(long)res->user_lksb; 128 res32->user_lksb = (__u32)(long)res->user_lksb;
@@ -130,6 +138,36 @@ static void compat_output(struct dlm_lock_result *res,
130} 138}
131#endif 139#endif
132 140
141/* Figure out if this lock is at the end of its life and no longer
142 available for the application to use. The lkb still exists until
143 the final ast is read. A lock becomes EOL in three situations:
144 1. a noqueue request fails with EAGAIN
145 2. an unlock completes with EUNLOCK
146 3. a cancel of a waiting request completes with ECANCEL/EDEADLK
147 An EOL lock needs to be removed from the process's list of locks.
148 And we can't allow any new operation on an EOL lock. This is
149 not related to the lifetime of the lkb struct which is managed
150 entirely by refcount. */
151
152static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
153{
154 switch (sb_status) {
155 case -DLM_EUNLOCK:
156 return 1;
157 case -DLM_ECANCEL:
158 case -ETIMEDOUT:
159 case -EDEADLK:
160 if (lkb->lkb_grmode == DLM_LOCK_IV)
161 return 1;
162 break;
163 case -EAGAIN:
164 if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
165 return 1;
166 break;
167 }
168 return 0;
169}
170
133/* we could possibly check if the cancel of an orphan has resulted in the lkb 171/* we could possibly check if the cancel of an orphan has resulted in the lkb
134 being removed and then remove that lkb from the orphans list and free it */ 172 being removed and then remove that lkb from the orphans list and free it */
135 173
@@ -176,25 +214,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
176 log_debug(ls, "ast overlap %x status %x %x", 214 log_debug(ls, "ast overlap %x status %x %x",
177 lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags); 215 lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
178 216
179 /* Figure out if this lock is at the end of its life and no longer 217 eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
180 available for the application to use. The lkb still exists until
181 the final ast is read. A lock becomes EOL in three situations:
182 1. a noqueue request fails with EAGAIN
183 2. an unlock completes with EUNLOCK
184 3. a cancel of a waiting request completes with ECANCEL
185 An EOL lock needs to be removed from the process's list of locks.
186 And we can't allow any new operation on an EOL lock. This is
187 not related to the lifetime of the lkb struct which is managed
188 entirely by refcount. */
189
190 if (type == AST_COMP &&
191 lkb->lkb_grmode == DLM_LOCK_IV &&
192 ua->lksb.sb_status == -EAGAIN)
193 eol = 1;
194 else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
195 (ua->lksb.sb_status == -DLM_ECANCEL &&
196 lkb->lkb_grmode == DLM_LOCK_IV))
197 eol = 1;
198 if (eol) { 218 if (eol) {
199 lkb->lkb_ast_type &= ~AST_BAST; 219 lkb->lkb_ast_type &= ~AST_BAST;
200 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; 220 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
@@ -252,16 +272,18 @@ static int device_user_lock(struct dlm_user_proc *proc,
252 ua->castaddr = params->castaddr; 272 ua->castaddr = params->castaddr;
253 ua->bastparam = params->bastparam; 273 ua->bastparam = params->bastparam;
254 ua->bastaddr = params->bastaddr; 274 ua->bastaddr = params->bastaddr;
275 ua->xid = params->xid;
255 276
256 if (params->flags & DLM_LKF_CONVERT) 277 if (params->flags & DLM_LKF_CONVERT)
257 error = dlm_user_convert(ls, ua, 278 error = dlm_user_convert(ls, ua,
258 params->mode, params->flags, 279 params->mode, params->flags,
259 params->lkid, params->lvb); 280 params->lkid, params->lvb,
281 (unsigned long) params->timeout);
260 else { 282 else {
261 error = dlm_user_request(ls, ua, 283 error = dlm_user_request(ls, ua,
262 params->mode, params->flags, 284 params->mode, params->flags,
263 params->name, params->namelen, 285 params->name, params->namelen,
264 params->parent); 286 (unsigned long) params->timeout);
265 if (!error) 287 if (!error)
266 error = ua->lksb.sb_lkid; 288 error = ua->lksb.sb_lkid;
267 } 289 }
@@ -299,6 +321,22 @@ static int device_user_unlock(struct dlm_user_proc *proc,
299 return error; 321 return error;
300} 322}
301 323
324static int device_user_deadlock(struct dlm_user_proc *proc,
325 struct dlm_lock_params *params)
326{
327 struct dlm_ls *ls;
328 int error;
329
330 ls = dlm_find_lockspace_local(proc->lockspace);
331 if (!ls)
332 return -ENOENT;
333
334 error = dlm_user_deadlock(ls, params->flags, params->lkid);
335
336 dlm_put_lockspace(ls);
337 return error;
338}
339
302static int create_misc_device(struct dlm_ls *ls, char *name) 340static int create_misc_device(struct dlm_ls *ls, char *name)
303{ 341{
304 int error, len; 342 int error, len;
@@ -348,7 +386,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
348 return -EPERM; 386 return -EPERM;
349 387
350 error = dlm_new_lockspace(params->name, strlen(params->name), 388 error = dlm_new_lockspace(params->name, strlen(params->name),
351 &lockspace, 0, DLM_USER_LVB_LEN); 389 &lockspace, params->flags, DLM_USER_LVB_LEN);
352 if (error) 390 if (error)
353 return error; 391 return error;
354 392
@@ -524,6 +562,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
524 error = device_user_unlock(proc, &kbuf->i.lock); 562 error = device_user_unlock(proc, &kbuf->i.lock);
525 break; 563 break;
526 564
565 case DLM_USER_DEADLOCK:
566 if (!proc) {
567 log_print("no locking on control device");
568 goto out_sig;
569 }
570 error = device_user_deadlock(proc, &kbuf->i.lock);
571 break;
572
527 case DLM_USER_CREATE_LOCKSPACE: 573 case DLM_USER_CREATE_LOCKSPACE:
528 if (proc) { 574 if (proc) {
529 log_print("create/remove only on control device"); 575 log_print("create/remove only on control device");
@@ -641,6 +687,9 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
641 int struct_len; 687 int struct_len;
642 688
643 memset(&result, 0, sizeof(struct dlm_lock_result)); 689 memset(&result, 0, sizeof(struct dlm_lock_result));
690 result.version[0] = DLM_DEVICE_VERSION_MAJOR;
691 result.version[1] = DLM_DEVICE_VERSION_MINOR;
692 result.version[2] = DLM_DEVICE_VERSION_PATCH;
644 memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb)); 693 memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
645 result.user_lksb = ua->user_lksb; 694 result.user_lksb = ua->user_lksb;
646 695
@@ -699,6 +748,20 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
699 return error; 748 return error;
700} 749}
701 750
751static int copy_version_to_user(char __user *buf, size_t count)
752{
753 struct dlm_device_version ver;
754
755 memset(&ver, 0, sizeof(struct dlm_device_version));
756 ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
757 ver.version[1] = DLM_DEVICE_VERSION_MINOR;
758 ver.version[2] = DLM_DEVICE_VERSION_PATCH;
759
760 if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
761 return -EFAULT;
762 return sizeof(struct dlm_device_version);
763}
764
702/* a read returns a single ast described in a struct dlm_lock_result */ 765/* a read returns a single ast described in a struct dlm_lock_result */
703 766
704static ssize_t device_read(struct file *file, char __user *buf, size_t count, 767static ssize_t device_read(struct file *file, char __user *buf, size_t count,
@@ -710,6 +773,16 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
710 DECLARE_WAITQUEUE(wait, current); 773 DECLARE_WAITQUEUE(wait, current);
711 int error, type=0, bmode=0, removed = 0; 774 int error, type=0, bmode=0, removed = 0;
712 775
776 if (count == sizeof(struct dlm_device_version)) {
777 error = copy_version_to_user(buf, count);
778 return error;
779 }
780
781 if (!proc) {
782 log_print("non-version read from control device %zu", count);
783 return -EINVAL;
784 }
785
713#ifdef CONFIG_COMPAT 786#ifdef CONFIG_COMPAT
714 if (count < sizeof(struct dlm_lock_result32)) 787 if (count < sizeof(struct dlm_lock_result32))
715#else 788#else
@@ -747,11 +820,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
747 } 820 }
748 } 821 }
749 822
750 if (list_empty(&proc->asts)) {
751 spin_unlock(&proc->asts_spin);
752 return -EAGAIN;
753 }
754
755 /* there may be both completion and blocking asts to return for 823 /* there may be both completion and blocking asts to return for
756 the lkb, don't remove lkb from asts list unless no asts remain */ 824 the lkb, don't remove lkb from asts list unless no asts remain */
757 825
@@ -823,6 +891,7 @@ static const struct file_operations device_fops = {
823static const struct file_operations ctl_device_fops = { 891static const struct file_operations ctl_device_fops = {
824 .open = ctl_device_open, 892 .open = ctl_device_open,
825 .release = ctl_device_close, 893 .release = ctl_device_close,
894 .read = device_read,
826 .write = device_write, 895 .write = device_write,
827 .owner = THIS_MODULE, 896 .owner = THIS_MODULE,
828}; 897};
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 403e3bad1455..1b9dd9a96f19 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -580,5 +580,7 @@ void
580ecryptfs_write_header_metadata(char *virt, 580ecryptfs_write_header_metadata(char *virt,
581 struct ecryptfs_crypt_stat *crypt_stat, 581 struct ecryptfs_crypt_stat *crypt_stat,
582 size_t *written); 582 size_t *written);
583int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start,
584 int num_zeros);
583 585
584#endif /* #ifndef ECRYPTFS_KERNEL_H */ 586#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 59288d817078..94f456fe4d9b 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -338,16 +338,17 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
338 return rc; 338 return rc;
339} 339}
340 340
341static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos, 341static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos,
342 size_t count, read_actor_t actor, void *target) 342 struct pipe_inode_info *pipe, size_t count,
343 unsigned int flags)
343{ 344{
344 struct file *lower_file = NULL; 345 struct file *lower_file = NULL;
345 int rc = -EINVAL; 346 int rc = -EINVAL;
346 347
347 lower_file = ecryptfs_file_to_lower(file); 348 lower_file = ecryptfs_file_to_lower(file);
348 if (lower_file->f_op && lower_file->f_op->sendfile) 349 if (lower_file->f_op && lower_file->f_op->splice_read)
349 rc = lower_file->f_op->sendfile(lower_file, ppos, count, 350 rc = lower_file->f_op->splice_read(lower_file, ppos, pipe,
350 actor, target); 351 count, flags);
351 352
352 return rc; 353 return rc;
353} 354}
@@ -364,7 +365,7 @@ const struct file_operations ecryptfs_dir_fops = {
364 .release = ecryptfs_release, 365 .release = ecryptfs_release,
365 .fsync = ecryptfs_fsync, 366 .fsync = ecryptfs_fsync,
366 .fasync = ecryptfs_fasync, 367 .fasync = ecryptfs_fasync,
367 .sendfile = ecryptfs_sendfile, 368 .splice_read = ecryptfs_splice_read,
368}; 369};
369 370
370const struct file_operations ecryptfs_main_fops = { 371const struct file_operations ecryptfs_main_fops = {
@@ -381,7 +382,7 @@ const struct file_operations ecryptfs_main_fops = {
381 .release = ecryptfs_release, 382 .release = ecryptfs_release,
382 .fsync = ecryptfs_fsync, 383 .fsync = ecryptfs_fsync,
383 .fasync = ecryptfs_fasync, 384 .fasync = ecryptfs_fasync,
384 .sendfile = ecryptfs_sendfile, 385 .splice_read = ecryptfs_splice_read,
385}; 386};
386 387
387static int 388static int
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 1548be26b5e6..83e94fedd4e9 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -800,6 +800,25 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
800 goto out_fput; 800 goto out_fput;
801 } 801 }
802 } else { /* new_length < i_size_read(inode) */ 802 } else { /* new_length < i_size_read(inode) */
803 pgoff_t index = 0;
804 int end_pos_in_page = -1;
805
806 if (new_length != 0) {
807 index = ((new_length - 1) >> PAGE_CACHE_SHIFT);
808 end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK);
809 }
810 if (end_pos_in_page != (PAGE_CACHE_SIZE - 1)) {
811 if ((rc = ecryptfs_write_zeros(&fake_ecryptfs_file,
812 index,
813 (end_pos_in_page + 1),
814 ((PAGE_CACHE_SIZE - 1)
815 - end_pos_in_page)))) {
816 printk(KERN_ERR "Error attempting to zero out "
817 "the remainder of the end page on "
818 "reducing truncate; rc = [%d]\n", rc);
819 goto out_fput;
820 }
821 }
803 vmtruncate(inode, new_length); 822 vmtruncate(inode, new_length);
804 rc = ecryptfs_write_inode_size_to_metadata( 823 rc = ecryptfs_write_inode_size_to_metadata(
805 lower_file, lower_dentry->d_inode, inode, dentry, 824 lower_file, lower_dentry->d_inode, inode, dentry,
@@ -875,9 +894,54 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
875 struct ecryptfs_crypt_stat *crypt_stat; 894 struct ecryptfs_crypt_stat *crypt_stat;
876 895
877 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 896 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
878 lower_dentry = ecryptfs_dentry_to_lower(dentry); 897 if (!(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
898 ecryptfs_init_crypt_stat(crypt_stat);
879 inode = dentry->d_inode; 899 inode = dentry->d_inode;
880 lower_inode = ecryptfs_inode_to_lower(inode); 900 lower_inode = ecryptfs_inode_to_lower(inode);
901 lower_dentry = ecryptfs_dentry_to_lower(dentry);
902 mutex_lock(&crypt_stat->cs_mutex);
903 if (S_ISDIR(dentry->d_inode->i_mode))
904 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
905 else if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
906 || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
907 struct vfsmount *lower_mnt;
908 struct file *lower_file = NULL;
909 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
910 int lower_flags;
911
912 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
913 lower_flags = O_RDONLY;
914 if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry,
915 lower_mnt, lower_flags))) {
916 printk(KERN_ERR
917 "Error opening lower file; rc = [%d]\n", rc);
918 mutex_unlock(&crypt_stat->cs_mutex);
919 goto out;
920 }
921 mount_crypt_stat = &ecryptfs_superblock_to_private(
922 dentry->d_sb)->mount_crypt_stat;
923 if ((rc = ecryptfs_read_metadata(dentry, lower_file))) {
924 if (!(mount_crypt_stat->flags
925 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
926 rc = -EIO;
927 printk(KERN_WARNING "Attempt to read file that "
928 "is not in a valid eCryptfs format, "
929 "and plaintext passthrough mode is not "
930 "enabled; returning -EIO\n");
931
932 mutex_unlock(&crypt_stat->cs_mutex);
933 fput(lower_file);
934 goto out;
935 }
936 rc = 0;
937 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
938 mutex_unlock(&crypt_stat->cs_mutex);
939 fput(lower_file);
940 goto out;
941 }
942 fput(lower_file);
943 }
944 mutex_unlock(&crypt_stat->cs_mutex);
881 if (ia->ia_valid & ATTR_SIZE) { 945 if (ia->ia_valid & ATTR_SIZE) {
882 ecryptfs_printk(KERN_DEBUG, 946 ecryptfs_printk(KERN_DEBUG,
883 "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n", 947 "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 606128f5c927..02ca6f1e55d7 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -840,8 +840,6 @@ static int __init ecryptfs_init(void)
840 goto out; 840 goto out;
841 } 841 }
842 kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); 842 kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
843 sysfs_attr_version.attr.owner = THIS_MODULE;
844 sysfs_attr_version_str.attr.owner = THIS_MODULE;
845 rc = do_sysfs_registration(); 843 rc = do_sysfs_registration();
846 if (rc) { 844 if (rc) {
847 printk(KERN_ERR "sysfs registration failed\n"); 845 printk(KERN_ERR "sysfs registration failed\n");
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 55cec98a84e7..7d5a43cb0d5c 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -56,9 +56,6 @@ static struct page *ecryptfs_get1page(struct file *file, int index)
56 return read_mapping_page(mapping, index, (void *)file); 56 return read_mapping_page(mapping, index, (void *)file);
57} 57}
58 58
59static
60int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros);
61
62/** 59/**
63 * ecryptfs_fill_zeros 60 * ecryptfs_fill_zeros
64 * @file: The ecryptfs file 61 * @file: The ecryptfs file
@@ -101,10 +98,13 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
101 if (old_end_page_index == new_end_page_index) { 98 if (old_end_page_index == new_end_page_index) {
102 /* Start and end are in the same page; we just need to 99 /* Start and end are in the same page; we just need to
103 * set a portion of the existing page to zero's */ 100 * set a portion of the existing page to zero's */
104 rc = write_zeros(file, index, (old_end_pos_in_page + 1), 101 rc = ecryptfs_write_zeros(file, index,
105 (new_end_pos_in_page - old_end_pos_in_page)); 102 (old_end_pos_in_page + 1),
103 (new_end_pos_in_page
104 - old_end_pos_in_page));
106 if (rc) 105 if (rc)
107 ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " 106 ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros("
107 "file=[%p], "
108 "index=[0x%.16x], " 108 "index=[0x%.16x], "
109 "old_end_pos_in_page=[d], " 109 "old_end_pos_in_page=[d], "
110 "(PAGE_CACHE_SIZE - new_end_pos_in_page" 110 "(PAGE_CACHE_SIZE - new_end_pos_in_page"
@@ -117,10 +117,10 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
117 goto out; 117 goto out;
118 } 118 }
119 /* Fill the remainder of the previous last page with zeros */ 119 /* Fill the remainder of the previous last page with zeros */
120 rc = write_zeros(file, index, (old_end_pos_in_page + 1), 120 rc = ecryptfs_write_zeros(file, index, (old_end_pos_in_page + 1),
121 ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page)); 121 ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page));
122 if (rc) { 122 if (rc) {
123 ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " 123 ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=[%p], "
124 "index=[0x%.16x], old_end_pos_in_page=[d], " 124 "index=[0x%.16x], old_end_pos_in_page=[d], "
125 "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) " 125 "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) "
126 "returned [%d]\n", file, index, 126 "returned [%d]\n", file, index,
@@ -131,9 +131,10 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
131 index++; 131 index++;
132 while (index < new_end_page_index) { 132 while (index < new_end_page_index) {
133 /* Fill all intermediate pages with zeros */ 133 /* Fill all intermediate pages with zeros */
134 rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE); 134 rc = ecryptfs_write_zeros(file, index, 0, PAGE_CACHE_SIZE);
135 if (rc) { 135 if (rc) {
136 ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " 136 ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros("
137 "file=[%p], "
137 "index=[0x%.16x], " 138 "index=[0x%.16x], "
138 "old_end_pos_in_page=[d], " 139 "old_end_pos_in_page=[d], "
139 "(PAGE_CACHE_SIZE - new_end_pos_in_page" 140 "(PAGE_CACHE_SIZE - new_end_pos_in_page"
@@ -149,9 +150,9 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
149 } 150 }
150 /* Fill the portion at the beginning of the last new page with 151 /* Fill the portion at the beginning of the last new page with
151 * zero's */ 152 * zero's */
152 rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1)); 153 rc = ecryptfs_write_zeros(file, index, 0, (new_end_pos_in_page + 1));
153 if (rc) { 154 if (rc) {
154 ecryptfs_printk(KERN_ERR, "write_zeros(file=" 155 ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file="
155 "[%p], index=[0x%.16x], 0, " 156 "[%p], index=[0x%.16x], 0, "
156 "new_end_pos_in_page=[%d]" 157 "new_end_pos_in_page=[%d]"
157 "returned [%d]\n", file, index, 158 "returned [%d]\n", file, index,
@@ -400,7 +401,6 @@ out:
400static int ecryptfs_prepare_write(struct file *file, struct page *page, 401static int ecryptfs_prepare_write(struct file *file, struct page *page,
401 unsigned from, unsigned to) 402 unsigned from, unsigned to)
402{ 403{
403 loff_t pos;
404 int rc = 0; 404 int rc = 0;
405 405
406 if (from == 0 && to == PAGE_CACHE_SIZE) 406 if (from == 0 && to == PAGE_CACHE_SIZE)
@@ -408,15 +408,22 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
408 up to date. */ 408 up to date. */
409 if (!PageUptodate(page)) 409 if (!PageUptodate(page))
410 rc = ecryptfs_do_readpage(file, page, page->index); 410 rc = ecryptfs_do_readpage(file, page, page->index);
411 pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 411 if (page->index != 0) {
412 if (pos > i_size_read(page->mapping->host)) { 412 loff_t end_of_prev_pg_pos =
413 rc = ecryptfs_truncate(file->f_path.dentry, pos); 413 (((loff_t)page->index << PAGE_CACHE_SHIFT) - 1);
414 if (rc) { 414
415 printk(KERN_ERR "Error on attempt to " 415 if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) {
416 "truncate to (higher) offset [%lld];" 416 rc = ecryptfs_truncate(file->f_path.dentry,
417 " rc = [%d]\n", pos, rc); 417 end_of_prev_pg_pos);
418 goto out; 418 if (rc) {
419 printk(KERN_ERR "Error on attempt to "
420 "truncate to (higher) offset [%lld];"
421 " rc = [%d]\n", end_of_prev_pg_pos, rc);
422 goto out;
423 }
419 } 424 }
425 if (end_of_prev_pg_pos + 1 > i_size_read(page->mapping->host))
426 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
420 } 427 }
421out: 428out:
422 return rc; 429 return rc;
@@ -753,7 +760,7 @@ out:
753} 760}
754 761
755/** 762/**
756 * write_zeros 763 * ecryptfs_write_zeros
757 * @file: The ecryptfs file 764 * @file: The ecryptfs file
758 * @index: The index in which we are writing 765 * @index: The index in which we are writing
759 * @start: The position after the last block of data 766 * @start: The position after the last block of data
@@ -763,8 +770,8 @@ out:
763 * 770 *
764 * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE 771 * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE
765 */ 772 */
766static 773int
767int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) 774ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
768{ 775{
769 int rc = 0; 776 int rc = 0;
770 struct page *tmp_page; 777 struct page *tmp_page;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 566d4e2d3852..04afeecaaef3 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -53,7 +53,6 @@ const struct file_operations ext2_file_operations = {
53 .open = generic_file_open, 53 .open = generic_file_open,
54 .release = ext2_release_file, 54 .release = ext2_release_file,
55 .fsync = ext2_sync_file, 55 .fsync = ext2_sync_file,
56 .sendfile = generic_file_sendfile,
57 .splice_read = generic_file_splice_read, 56 .splice_read = generic_file_splice_read,
58 .splice_write = generic_file_splice_write, 57 .splice_write = generic_file_splice_write,
59}; 58};
@@ -71,7 +70,6 @@ const struct file_operations ext2_xip_file_operations = {
71 .open = generic_file_open, 70 .open = generic_file_open,
72 .release = ext2_release_file, 71 .release = ext2_release_file,
73 .fsync = ext2_sync_file, 72 .fsync = ext2_sync_file,
74 .sendfile = xip_file_sendfile,
75}; 73};
76#endif 74#endif
77 75
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 16337bff0272..5de5061eb331 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1038,6 +1038,15 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1038 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1038 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1039 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1039 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
1040 1040
1041 ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
1042 EXT2_MOUNT_XIP if not */
1043
1044 if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) {
1045 printk("XIP: Unsupported blocksize\n");
1046 err = -EINVAL;
1047 goto restore_opts;
1048 }
1049
1041 es = sbi->s_es; 1050 es = sbi->s_es;
1042 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != 1051 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
1043 (old_mount_opt & EXT2_MOUNT_XIP)) && 1052 (old_mount_opt & EXT2_MOUNT_XIP)) &&
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 1e6f13864536..acc4913d3019 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext3_file_operations = {
120 .open = generic_file_open, 120 .open = generic_file_open,
121 .release = ext3_release_file, 121 .release = ext3_release_file,
122 .fsync = ext3_sync_file, 122 .fsync = ext3_sync_file,
123 .sendfile = generic_file_sendfile,
124 .splice_read = generic_file_splice_read, 123 .splice_read = generic_file_splice_read,
125 .splice_write = generic_file_splice_write, 124 .splice_write = generic_file_splice_write,
126}; 125};
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a6cb6171c3af..2a85ddee4740 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2677,8 +2677,10 @@ void ext3_read_inode(struct inode * inode)
2677 */ 2677 */
2678 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 2678 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
2679 if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 2679 if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
2680 EXT3_INODE_SIZE(inode->i_sb)) 2680 EXT3_INODE_SIZE(inode->i_sb)) {
2681 brelse (bh);
2681 goto bad_inode; 2682 goto bad_inode;
2683 }
2682 if (ei->i_extra_isize == 0) { 2684 if (ei->i_extra_isize == 0) {
2683 /* The extra space is currently unused. Use it. */ 2685 /* The extra space is currently unused. Use it. */
2684 ei->i_extra_isize = sizeof(struct ext3_inode) - 2686 ei->i_extra_isize = sizeof(struct ext3_inode) -
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 8a23483ca8d0..3b64bb16c727 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -30,15 +30,15 @@
30void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 30void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
31 unsigned long *blockgrpp, ext4_grpblk_t *offsetp) 31 unsigned long *blockgrpp, ext4_grpblk_t *offsetp)
32{ 32{
33 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 33 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
34 ext4_grpblk_t offset; 34 ext4_grpblk_t offset;
35 35
36 blocknr = blocknr - le32_to_cpu(es->s_first_data_block); 36 blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
37 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); 37 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
38 if (offsetp) 38 if (offsetp)
39 *offsetp = offset; 39 *offsetp = offset;
40 if (blockgrpp) 40 if (blockgrpp)
41 *blockgrpp = blocknr; 41 *blockgrpp = blocknr;
42 42
43} 43}
44 44
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a0f0c04e79b2..b9ce24129070 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -374,7 +374,7 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc
374 le32_to_cpu(ix[-1].ei_block)); 374 le32_to_cpu(ix[-1].ei_block));
375 } 375 }
376 BUG_ON(k && le32_to_cpu(ix->ei_block) 376 BUG_ON(k && le32_to_cpu(ix->ei_block)
377 <= le32_to_cpu(ix[-1].ei_block)); 377 <= le32_to_cpu(ix[-1].ei_block));
378 if (block < le32_to_cpu(ix->ei_block)) 378 if (block < le32_to_cpu(ix->ei_block))
379 break; 379 break;
380 chix = ix; 380 chix = ix;
@@ -423,8 +423,8 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
423 423
424 path->p_ext = l - 1; 424 path->p_ext = l - 1;
425 ext_debug(" -> %d:%llu:%d ", 425 ext_debug(" -> %d:%llu:%d ",
426 le32_to_cpu(path->p_ext->ee_block), 426 le32_to_cpu(path->p_ext->ee_block),
427 ext_pblock(path->p_ext), 427 ext_pblock(path->p_ext),
428 le16_to_cpu(path->p_ext->ee_len)); 428 le16_to_cpu(path->p_ext->ee_len));
429 429
430#ifdef CHECK_BINSEARCH 430#ifdef CHECK_BINSEARCH
@@ -435,7 +435,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
435 chex = ex = EXT_FIRST_EXTENT(eh); 435 chex = ex = EXT_FIRST_EXTENT(eh);
436 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { 436 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
437 BUG_ON(k && le32_to_cpu(ex->ee_block) 437 BUG_ON(k && le32_to_cpu(ex->ee_block)
438 <= le32_to_cpu(ex[-1].ee_block)); 438 <= le32_to_cpu(ex[-1].ee_block));
439 if (block < le32_to_cpu(ex->ee_block)) 439 if (block < le32_to_cpu(ex->ee_block))
440 break; 440 break;
441 chex = ex; 441 chex = ex;
@@ -577,7 +577,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
577 curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); 577 curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
578 578
579 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) 579 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
580 > le16_to_cpu(curp->p_hdr->eh_max)); 580 > le16_to_cpu(curp->p_hdr->eh_max));
581 BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); 581 BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));
582 582
583 err = ext4_ext_dirty(handle, inode, curp); 583 err = ext4_ext_dirty(handle, inode, curp);
@@ -621,12 +621,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
621 border = path[depth].p_ext[1].ee_block; 621 border = path[depth].p_ext[1].ee_block;
622 ext_debug("leaf will be split." 622 ext_debug("leaf will be split."
623 " next leaf starts at %d\n", 623 " next leaf starts at %d\n",
624 le32_to_cpu(border)); 624 le32_to_cpu(border));
625 } else { 625 } else {
626 border = newext->ee_block; 626 border = newext->ee_block;
627 ext_debug("leaf will be added." 627 ext_debug("leaf will be added."
628 " next leaf starts at %d\n", 628 " next leaf starts at %d\n",
629 le32_to_cpu(border)); 629 le32_to_cpu(border));
630 } 630 }
631 631
632 /* 632 /*
@@ -684,9 +684,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
684 while (path[depth].p_ext <= 684 while (path[depth].p_ext <=
685 EXT_MAX_EXTENT(path[depth].p_hdr)) { 685 EXT_MAX_EXTENT(path[depth].p_hdr)) {
686 ext_debug("move %d:%llu:%d in new leaf %llu\n", 686 ext_debug("move %d:%llu:%d in new leaf %llu\n",
687 le32_to_cpu(path[depth].p_ext->ee_block), 687 le32_to_cpu(path[depth].p_ext->ee_block),
688 ext_pblock(path[depth].p_ext), 688 ext_pblock(path[depth].p_ext),
689 le16_to_cpu(path[depth].p_ext->ee_len), 689 le16_to_cpu(path[depth].p_ext->ee_len),
690 newblock); 690 newblock);
691 /*memmove(ex++, path[depth].p_ext++, 691 /*memmove(ex++, path[depth].p_ext++,
692 sizeof(struct ext4_extent)); 692 sizeof(struct ext4_extent));
@@ -765,9 +765,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
765 EXT_LAST_INDEX(path[i].p_hdr)); 765 EXT_LAST_INDEX(path[i].p_hdr));
766 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 766 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
767 ext_debug("%d: move %d:%d in new index %llu\n", i, 767 ext_debug("%d: move %d:%d in new index %llu\n", i,
768 le32_to_cpu(path[i].p_idx->ei_block), 768 le32_to_cpu(path[i].p_idx->ei_block),
769 idx_pblock(path[i].p_idx), 769 idx_pblock(path[i].p_idx),
770 newblock); 770 newblock);
771 /*memmove(++fidx, path[i].p_idx++, 771 /*memmove(++fidx, path[i].p_idx++,
772 sizeof(struct ext4_extent_idx)); 772 sizeof(struct ext4_extent_idx));
773 neh->eh_entries++; 773 neh->eh_entries++;
@@ -1128,6 +1128,55 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1128} 1128}
1129 1129
1130/* 1130/*
1131 * check if a portion of the "newext" extent overlaps with an
1132 * existing extent.
1133 *
1134 * If there is an overlap discovered, it updates the length of the newext
1135 * such that there will be no overlap, and then returns 1.
1136 * If there is no overlap found, it returns 0.
1137 */
1138unsigned int ext4_ext_check_overlap(struct inode *inode,
1139 struct ext4_extent *newext,
1140 struct ext4_ext_path *path)
1141{
1142 unsigned long b1, b2;
1143 unsigned int depth, len1;
1144 unsigned int ret = 0;
1145
1146 b1 = le32_to_cpu(newext->ee_block);
1147 len1 = le16_to_cpu(newext->ee_len);
1148 depth = ext_depth(inode);
1149 if (!path[depth].p_ext)
1150 goto out;
1151 b2 = le32_to_cpu(path[depth].p_ext->ee_block);
1152
1153 /*
1154 * get the next allocated block if the extent in the path
1155 * is before the requested block(s)
1156 */
1157 if (b2 < b1) {
1158 b2 = ext4_ext_next_allocated_block(path);
1159 if (b2 == EXT_MAX_BLOCK)
1160 goto out;
1161 }
1162
1163 /* check for wrap through zero */
1164 if (b1 + len1 < b1) {
1165 len1 = EXT_MAX_BLOCK - b1;
1166 newext->ee_len = cpu_to_le16(len1);
1167 ret = 1;
1168 }
1169
1170 /* check for overlap */
1171 if (b1 + len1 > b2) {
1172 newext->ee_len = cpu_to_le16(b2 - b1);
1173 ret = 1;
1174 }
1175out:
1176 return ret;
1177}
1178
1179/*
1131 * ext4_ext_insert_extent: 1180 * ext4_ext_insert_extent:
1132 * tries to merge requsted extent into the existing extent or 1181 * tries to merge requsted extent into the existing extent or
1133 * inserts requested extent as new one into the tree, 1182 * inserts requested extent as new one into the tree,
@@ -1212,12 +1261,12 @@ has_space:
1212 if (!nearex) { 1261 if (!nearex) {
1213 /* there is no extent in this leaf, create first one */ 1262 /* there is no extent in this leaf, create first one */
1214 ext_debug("first extent in the leaf: %d:%llu:%d\n", 1263 ext_debug("first extent in the leaf: %d:%llu:%d\n",
1215 le32_to_cpu(newext->ee_block), 1264 le32_to_cpu(newext->ee_block),
1216 ext_pblock(newext), 1265 ext_pblock(newext),
1217 le16_to_cpu(newext->ee_len)); 1266 le16_to_cpu(newext->ee_len));
1218 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1267 path[depth].p_ext = EXT_FIRST_EXTENT(eh);
1219 } else if (le32_to_cpu(newext->ee_block) 1268 } else if (le32_to_cpu(newext->ee_block)
1220 > le32_to_cpu(nearex->ee_block)) { 1269 > le32_to_cpu(nearex->ee_block)) {
1221/* BUG_ON(newext->ee_block == nearex->ee_block); */ 1270/* BUG_ON(newext->ee_block == nearex->ee_block); */
1222 if (nearex != EXT_LAST_EXTENT(eh)) { 1271 if (nearex != EXT_LAST_EXTENT(eh)) {
1223 len = EXT_MAX_EXTENT(eh) - nearex; 1272 len = EXT_MAX_EXTENT(eh) - nearex;
@@ -1225,9 +1274,9 @@ has_space:
1225 len = len < 0 ? 0 : len; 1274 len = len < 0 ? 0 : len;
1226 ext_debug("insert %d:%llu:%d after: nearest 0x%p, " 1275 ext_debug("insert %d:%llu:%d after: nearest 0x%p, "
1227 "move %d from 0x%p to 0x%p\n", 1276 "move %d from 0x%p to 0x%p\n",
1228 le32_to_cpu(newext->ee_block), 1277 le32_to_cpu(newext->ee_block),
1229 ext_pblock(newext), 1278 ext_pblock(newext),
1230 le16_to_cpu(newext->ee_len), 1279 le16_to_cpu(newext->ee_len),
1231 nearex, len, nearex + 1, nearex + 2); 1280 nearex, len, nearex + 1, nearex + 2);
1232 memmove(nearex + 2, nearex + 1, len); 1281 memmove(nearex + 2, nearex + 1, len);
1233 } 1282 }
@@ -1358,9 +1407,9 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
1358 cbex.ec_start = 0; 1407 cbex.ec_start = 0;
1359 cbex.ec_type = EXT4_EXT_CACHE_GAP; 1408 cbex.ec_type = EXT4_EXT_CACHE_GAP;
1360 } else { 1409 } else {
1361 cbex.ec_block = le32_to_cpu(ex->ee_block); 1410 cbex.ec_block = le32_to_cpu(ex->ee_block);
1362 cbex.ec_len = le16_to_cpu(ex->ee_len); 1411 cbex.ec_len = le16_to_cpu(ex->ee_len);
1363 cbex.ec_start = ext_pblock(ex); 1412 cbex.ec_start = ext_pblock(ex);
1364 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1413 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1365 } 1414 }
1366 1415
@@ -1431,16 +1480,16 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
1431 len = le32_to_cpu(ex->ee_block) - block; 1480 len = le32_to_cpu(ex->ee_block) - block;
1432 ext_debug("cache gap(before): %lu [%lu:%lu]", 1481 ext_debug("cache gap(before): %lu [%lu:%lu]",
1433 (unsigned long) block, 1482 (unsigned long) block,
1434 (unsigned long) le32_to_cpu(ex->ee_block), 1483 (unsigned long) le32_to_cpu(ex->ee_block),
1435 (unsigned long) le16_to_cpu(ex->ee_len)); 1484 (unsigned long) le16_to_cpu(ex->ee_len));
1436 } else if (block >= le32_to_cpu(ex->ee_block) 1485 } else if (block >= le32_to_cpu(ex->ee_block)
1437 + le16_to_cpu(ex->ee_len)) { 1486 + le16_to_cpu(ex->ee_len)) {
1438 lblock = le32_to_cpu(ex->ee_block) 1487 lblock = le32_to_cpu(ex->ee_block)
1439 + le16_to_cpu(ex->ee_len); 1488 + le16_to_cpu(ex->ee_len);
1440 len = ext4_ext_next_allocated_block(path); 1489 len = ext4_ext_next_allocated_block(path);
1441 ext_debug("cache gap(after): [%lu:%lu] %lu", 1490 ext_debug("cache gap(after): [%lu:%lu] %lu",
1442 (unsigned long) le32_to_cpu(ex->ee_block), 1491 (unsigned long) le32_to_cpu(ex->ee_block),
1443 (unsigned long) le16_to_cpu(ex->ee_len), 1492 (unsigned long) le16_to_cpu(ex->ee_len),
1444 (unsigned long) block); 1493 (unsigned long) block);
1445 BUG_ON(len == lblock); 1494 BUG_ON(len == lblock);
1446 len = len - lblock; 1495 len = len - lblock;
@@ -1468,9 +1517,9 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
1468 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && 1517 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
1469 cex->ec_type != EXT4_EXT_CACHE_EXTENT); 1518 cex->ec_type != EXT4_EXT_CACHE_EXTENT);
1470 if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { 1519 if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
1471 ex->ee_block = cpu_to_le32(cex->ec_block); 1520 ex->ee_block = cpu_to_le32(cex->ec_block);
1472 ext4_ext_store_pblock(ex, cex->ec_start); 1521 ext4_ext_store_pblock(ex, cex->ec_start);
1473 ex->ee_len = cpu_to_le16(cex->ec_len); 1522 ex->ee_len = cpu_to_le16(cex->ec_len);
1474 ext_debug("%lu cached by %lu:%lu:%llu\n", 1523 ext_debug("%lu cached by %lu:%lu:%llu\n",
1475 (unsigned long) block, 1524 (unsigned long) block,
1476 (unsigned long) cex->ec_block, 1525 (unsigned long) cex->ec_block,
@@ -1956,9 +2005,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1956 /* we should allocate requested block */ 2005 /* we should allocate requested block */
1957 } else if (goal == EXT4_EXT_CACHE_EXTENT) { 2006 } else if (goal == EXT4_EXT_CACHE_EXTENT) {
1958 /* block is already allocated */ 2007 /* block is already allocated */
1959 newblock = iblock 2008 newblock = iblock
1960 - le32_to_cpu(newex.ee_block) 2009 - le32_to_cpu(newex.ee_block)
1961 + ext_pblock(&newex); 2010 + ext_pblock(&newex);
1962 /* number of remaining blocks in the extent */ 2011 /* number of remaining blocks in the extent */
1963 allocated = le16_to_cpu(newex.ee_len) - 2012 allocated = le16_to_cpu(newex.ee_len) -
1964 (iblock - le32_to_cpu(newex.ee_block)); 2013 (iblock - le32_to_cpu(newex.ee_block));
@@ -1987,7 +2036,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1987 2036
1988 ex = path[depth].p_ext; 2037 ex = path[depth].p_ext;
1989 if (ex) { 2038 if (ex) {
1990 unsigned long ee_block = le32_to_cpu(ex->ee_block); 2039 unsigned long ee_block = le32_to_cpu(ex->ee_block);
1991 ext4_fsblk_t ee_start = ext_pblock(ex); 2040 ext4_fsblk_t ee_start = ext_pblock(ex);
1992 unsigned short ee_len = le16_to_cpu(ex->ee_len); 2041 unsigned short ee_len = le16_to_cpu(ex->ee_len);
1993 2042
@@ -2000,7 +2049,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2000 if (ee_len > EXT_MAX_LEN) 2049 if (ee_len > EXT_MAX_LEN)
2001 goto out2; 2050 goto out2;
2002 /* if found extent covers block, simply return it */ 2051 /* if found extent covers block, simply return it */
2003 if (iblock >= ee_block && iblock < ee_block + ee_len) { 2052 if (iblock >= ee_block && iblock < ee_block + ee_len) {
2004 newblock = iblock - ee_block + ee_start; 2053 newblock = iblock - ee_block + ee_start;
2005 /* number of remaining blocks in the extent */ 2054 /* number of remaining blocks in the extent */
2006 allocated = ee_len - (iblock - ee_block); 2055 allocated = ee_len - (iblock - ee_block);
@@ -2031,7 +2080,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2031 2080
2032 /* allocate new block */ 2081 /* allocate new block */
2033 goal = ext4_ext_find_goal(inode, path, iblock); 2082 goal = ext4_ext_find_goal(inode, path, iblock);
2034 allocated = max_blocks; 2083
2084 /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
2085 newex.ee_block = cpu_to_le32(iblock);
2086 newex.ee_len = cpu_to_le16(max_blocks);
2087 err = ext4_ext_check_overlap(inode, &newex, path);
2088 if (err)
2089 allocated = le16_to_cpu(newex.ee_len);
2090 else
2091 allocated = max_blocks;
2035 newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err); 2092 newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
2036 if (!newblock) 2093 if (!newblock)
2037 goto out2; 2094 goto out2;
@@ -2039,12 +2096,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2039 goal, newblock, allocated); 2096 goal, newblock, allocated);
2040 2097
2041 /* try to insert new extent into found leaf and return */ 2098 /* try to insert new extent into found leaf and return */
2042 newex.ee_block = cpu_to_le32(iblock);
2043 ext4_ext_store_pblock(&newex, newblock); 2099 ext4_ext_store_pblock(&newex, newblock);
2044 newex.ee_len = cpu_to_le16(allocated); 2100 newex.ee_len = cpu_to_le16(allocated);
2045 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2101 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2046 if (err) 2102 if (err) {
2103 /* free data blocks we just allocated */
2104 ext4_free_blocks(handle, inode, ext_pblock(&newex),
2105 le16_to_cpu(newex.ee_len));
2047 goto out2; 2106 goto out2;
2107 }
2048 2108
2049 if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize) 2109 if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
2050 EXT4_I(inode)->i_disksize = inode->i_size; 2110 EXT4_I(inode)->i_disksize = inode->i_size;
@@ -2157,11 +2217,3 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
2157 2217
2158 return needed; 2218 return needed;
2159} 2219}
2160
2161EXPORT_SYMBOL(ext4_mark_inode_dirty);
2162EXPORT_SYMBOL(ext4_ext_invalidate_cache);
2163EXPORT_SYMBOL(ext4_ext_insert_extent);
2164EXPORT_SYMBOL(ext4_ext_walk_space);
2165EXPORT_SYMBOL(ext4_ext_find_goal);
2166EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert);
2167
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3c6c1fd2be90..d4c8186aed64 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext4_file_operations = {
120 .open = generic_file_open, 120 .open = generic_file_open,
121 .release = ext4_release_file, 121 .release = ext4_release_file,
122 .fsync = ext4_sync_file, 122 .fsync = ext4_sync_file,
123 .sendfile = generic_file_sendfile,
124 .splice_read = generic_file_splice_read, 123 .splice_read = generic_file_splice_read,
125 .splice_write = generic_file_splice_write, 124 .splice_write = generic_file_splice_write,
126}; 125};
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b34182b6ee4d..8416fa28c422 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -255,8 +255,8 @@ static int verify_chain(Indirect *from, Indirect *to)
255 * @inode: inode in question (we are only interested in its superblock) 255 * @inode: inode in question (we are only interested in its superblock)
256 * @i_block: block number to be parsed 256 * @i_block: block number to be parsed
257 * @offsets: array to store the offsets in 257 * @offsets: array to store the offsets in
258 * @boundary: set this non-zero if the referred-to block is likely to be 258 * @boundary: set this non-zero if the referred-to block is likely to be
259 * followed (on disk) by an indirect block. 259 * followed (on disk) by an indirect block.
260 * 260 *
261 * To store the locations of file's data ext4 uses a data structure common 261 * To store the locations of file's data ext4 uses a data structure common
262 * for UNIX filesystems - tree of pointers anchored in the inode, with 262 * for UNIX filesystems - tree of pointers anchored in the inode, with
@@ -2673,8 +2673,10 @@ void ext4_read_inode(struct inode * inode)
2673 */ 2673 */
2674 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 2674 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
2675 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 2675 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
2676 EXT4_INODE_SIZE(inode->i_sb)) 2676 EXT4_INODE_SIZE(inode->i_sb)) {
2677 brelse (bh);
2677 goto bad_inode; 2678 goto bad_inode;
2679 }
2678 if (ei->i_extra_isize == 0) { 2680 if (ei->i_extra_isize == 0) {
2679 /* The extra space is currently unused. Use it. */ 2681 /* The extra space is currently unused. Use it. */
2680 ei->i_extra_isize = sizeof(struct ext4_inode) - 2682 ei->i_extra_isize = sizeof(struct ext4_inode) -
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 4ec57be5baf5..2811e5720ad0 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -46,7 +46,7 @@
46 */ 46 */
47#define NAMEI_RA_CHUNKS 2 47#define NAMEI_RA_CHUNKS 2
48#define NAMEI_RA_BLOCKS 4 48#define NAMEI_RA_BLOCKS 4
49#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) 49#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
50#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) 50#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
51 51
52static struct buffer_head *ext4_append(handle_t *handle, 52static struct buffer_head *ext4_append(handle_t *handle,
@@ -241,7 +241,7 @@ static inline unsigned dx_node_limit (struct inode *dir)
241static void dx_show_index (char * label, struct dx_entry *entries) 241static void dx_show_index (char * label, struct dx_entry *entries)
242{ 242{
243 int i, n = dx_get_count (entries); 243 int i, n = dx_get_count (entries);
244 printk("%s index ", label); 244 printk("%s index ", label);
245 for (i = 0; i < n; i++) { 245 for (i = 0; i < n; i++) {
246 printk("%x->%u ", i? dx_get_hash(entries + i) : 246 printk("%x->%u ", i? dx_get_hash(entries + i) :
247 0, dx_get_block(entries + i)); 247 0, dx_get_block(entries + i));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cb9afdd0e26e..175b68c60968 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1985,7 +1985,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
1985 1985
1986 if (bd_claim(bdev, sb)) { 1986 if (bd_claim(bdev, sb)) {
1987 printk(KERN_ERR 1987 printk(KERN_ERR
1988 "EXT4: failed to claim external journal device.\n"); 1988 "EXT4: failed to claim external journal device.\n");
1989 blkdev_put(bdev); 1989 blkdev_put(bdev);
1990 return NULL; 1990 return NULL;
1991 } 1991 }
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 55d3c7461c5b..69a83b59dce8 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ const struct file_operations fat_file_operations = {
134 .release = fat_file_release, 134 .release = fat_file_release,
135 .ioctl = fat_generic_ioctl, 135 .ioctl = fat_generic_ioctl,
136 .fsync = file_fsync, 136 .fsync = file_fsync,
137 .sendfile = generic_file_sendfile, 137 .splice_read = generic_file_splice_read,
138}; 138};
139 139
140static int fat_cont_expand(struct inode *inode, loff_t size) 140static int fat_cont_expand(struct inode *inode, loff_t size)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index adf7995232b8..f79de7c8cdfa 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -802,7 +802,7 @@ static const struct file_operations fuse_file_operations = {
802 .release = fuse_release, 802 .release = fuse_release,
803 .fsync = fuse_fsync, 803 .fsync = fuse_fsync,
804 .lock = fuse_file_lock, 804 .lock = fuse_file_lock,
805 .sendfile = generic_file_sendfile, 805 .splice_read = generic_file_splice_read,
806}; 806};
807 807
808static const struct file_operations fuse_direct_io_file_operations = { 808static const struct file_operations fuse_direct_io_file_operations = {
@@ -814,7 +814,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
814 .release = fuse_release, 814 .release = fuse_release,
815 .fsync = fuse_fsync, 815 .fsync = fuse_fsync,
816 .lock = fuse_file_lock, 816 .lock = fuse_file_lock,
817 /* no mmap and sendfile */ 817 /* no mmap and splice_read */
818}; 818};
819 819
820static const struct address_space_operations fuse_file_aops = { 820static const struct address_space_operations fuse_file_aops = {
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9804c0cdcb42..cc5efc13496a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -655,10 +655,9 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
655static struct file_system_type fuseblk_fs_type = { 655static struct file_system_type fuseblk_fs_type = {
656 .owner = THIS_MODULE, 656 .owner = THIS_MODULE,
657 .name = "fuseblk", 657 .name = "fuseblk",
658 .fs_flags = FS_HAS_SUBTYPE,
659 .get_sb = fuse_get_sb_blk, 658 .get_sb = fuse_get_sb_blk,
660 .kill_sb = kill_block_super, 659 .kill_sb = kill_block_super,
661 .fs_flags = FS_REQUIRES_DEV, 660 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
662}; 661};
663 662
664static inline int register_fuseblk(void) 663static inline int register_fuseblk(void)
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index e3f1ada643ac..04ad0caebedb 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,7 +1,7 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o 1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ 2gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
3 glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ 3 glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
4 mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ 4 mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
5 ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \ 5 ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
6 recovery.o rgrp.o super.o sys.o trans.o util.o 6 recovery.o rgrp.o super.o sys.o trans.o util.o
7 7
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c53a5d2d0590..cd805a66880d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -718,7 +718,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
718 for (x = 0; x < rlist.rl_rgrps; x++) { 718 for (x = 0; x < rlist.rl_rgrps; x++) {
719 struct gfs2_rgrpd *rgd; 719 struct gfs2_rgrpd *rgd;
720 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 720 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
721 rg_blocks += rgd->rd_ri.ri_length; 721 rg_blocks += rgd->rd_length;
722 } 722 }
723 723
724 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 724 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
@@ -772,7 +772,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
772 gfs2_free_data(ip, bstart, blen); 772 gfs2_free_data(ip, bstart, blen);
773 } 773 }
774 774
775 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 775 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
776 776
777 gfs2_dinode_out(ip, dibh->b_data); 777 gfs2_dinode_out(ip, dibh->b_data);
778 778
@@ -824,7 +824,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
824 goto out_gunlock_q; 824 goto out_gunlock_q;
825 825
826 error = gfs2_trans_begin(sdp, 826 error = gfs2_trans_begin(sdp,
827 sdp->sd_max_height + al->al_rgd->rd_ri.ri_length + 827 sdp->sd_max_height + al->al_rgd->rd_length +
828 RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); 828 RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
829 if (error) 829 if (error)
830 goto out_ipres; 830 goto out_ipres;
@@ -847,7 +847,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
847 } 847 }
848 848
849 ip->i_di.di_size = size; 849 ip->i_di.di_size = size;
850 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 850 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
851 851
852 error = gfs2_meta_inode_buffer(ip, &dibh); 852 error = gfs2_meta_inode_buffer(ip, &dibh);
853 if (error) 853 if (error)
@@ -885,7 +885,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
885 unsigned blocksize, iblock, length, pos; 885 unsigned blocksize, iblock, length, pos;
886 struct buffer_head *bh; 886 struct buffer_head *bh;
887 struct page *page; 887 struct page *page;
888 void *kaddr;
889 int err; 888 int err;
890 889
891 page = grab_cache_page(mapping, index); 890 page = grab_cache_page(mapping, index);
@@ -928,15 +927,13 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
928 /* Uhhuh. Read error. Complain and punt. */ 927 /* Uhhuh. Read error. Complain and punt. */
929 if (!buffer_uptodate(bh)) 928 if (!buffer_uptodate(bh))
930 goto unlock; 929 goto unlock;
930 err = 0;
931 } 931 }
932 932
933 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) 933 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
934 gfs2_trans_add_bh(ip->i_gl, bh, 0); 934 gfs2_trans_add_bh(ip->i_gl, bh, 0);
935 935
936 kaddr = kmap_atomic(page, KM_USER0); 936 zero_user_page(page, offset, length, KM_USER0);
937 memset(kaddr + offset, 0, length);
938 flush_dcache_page(page);
939 kunmap_atomic(kaddr, KM_USER0);
940 937
941unlock: 938unlock:
942 unlock_page(page); 939 unlock_page(page);
@@ -962,7 +959,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
962 959
963 if (gfs2_is_stuffed(ip)) { 960 if (gfs2_is_stuffed(ip)) {
964 ip->i_di.di_size = size; 961 ip->i_di.di_size = size;
965 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 962 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
966 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 963 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
967 gfs2_dinode_out(ip, dibh->b_data); 964 gfs2_dinode_out(ip, dibh->b_data);
968 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); 965 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -974,7 +971,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
974 971
975 if (!error) { 972 if (!error) {
976 ip->i_di.di_size = size; 973 ip->i_di.di_size = size;
977 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 974 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
978 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; 975 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
979 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 976 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
980 gfs2_dinode_out(ip, dibh->b_data); 977 gfs2_dinode_out(ip, dibh->b_data);
@@ -1044,10 +1041,10 @@ static int trunc_end(struct gfs2_inode *ip)
1044 ip->i_di.di_height = 0; 1041 ip->i_di.di_height = 0;
1045 ip->i_di.di_goal_meta = 1042 ip->i_di.di_goal_meta =
1046 ip->i_di.di_goal_data = 1043 ip->i_di.di_goal_data =
1047 ip->i_num.no_addr; 1044 ip->i_no_addr;
1048 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1045 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1049 } 1046 }
1050 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 1047 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1051 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; 1048 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
1052 1049
1053 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1050 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index 683cb5bda870..3548d9f31e0d 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -16,6 +16,7 @@
16#include <linux/delay.h> 16#include <linux/delay.h>
17#include <linux/gfs2_ondisk.h> 17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h> 18#include <linux/lm_interface.h>
19#include <linux/freezer.h>
19 20
20#include "gfs2.h" 21#include "gfs2.h"
21#include "incore.h" 22#include "incore.h"
@@ -49,6 +50,8 @@ int gfs2_scand(void *data)
49 while (!kthread_should_stop()) { 50 while (!kthread_should_stop()) {
50 gfs2_scand_internal(sdp); 51 gfs2_scand_internal(sdp);
51 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ; 52 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
53 if (freezing(current))
54 refrigerator();
52 schedule_timeout_interruptible(t); 55 schedule_timeout_interruptible(t);
53 } 56 }
54 57
@@ -74,6 +77,8 @@ int gfs2_glockd(void *data)
74 wait_event_interruptible(sdp->sd_reclaim_wq, 77 wait_event_interruptible(sdp->sd_reclaim_wq,
75 (atomic_read(&sdp->sd_reclaim_count) || 78 (atomic_read(&sdp->sd_reclaim_count) ||
76 kthread_should_stop())); 79 kthread_should_stop()));
80 if (freezing(current))
81 refrigerator();
77 } 82 }
78 83
79 return 0; 84 return 0;
@@ -93,6 +98,8 @@ int gfs2_recoverd(void *data)
93 while (!kthread_should_stop()) { 98 while (!kthread_should_stop()) {
94 gfs2_check_journals(sdp); 99 gfs2_check_journals(sdp);
95 t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ; 100 t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
101 if (freezing(current))
102 refrigerator();
96 schedule_timeout_interruptible(t); 103 schedule_timeout_interruptible(t);
97 } 104 }
98 105
@@ -141,6 +148,8 @@ int gfs2_logd(void *data)
141 } 148 }
142 149
143 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 150 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
151 if (freezing(current))
152 refrigerator();
144 schedule_timeout_interruptible(t); 153 schedule_timeout_interruptible(t);
145 } 154 }
146 155
@@ -191,6 +200,8 @@ int gfs2_quotad(void *data)
191 gfs2_quota_scan(sdp); 200 gfs2_quota_scan(sdp);
192 201
193 t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ; 202 t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
203 if (freezing(current))
204 refrigerator();
194 schedule_timeout_interruptible(t); 205 schedule_timeout_interruptible(t);
195 } 206 }
196 207
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index a96fa07b3f3b..2beb2f401aa2 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
130 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 130 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
131 if (ip->i_di.di_size < offset + size) 131 if (ip->i_di.di_size < offset + size)
132 ip->i_di.di_size = offset + size; 132 ip->i_di.di_size = offset + size;
133 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 133 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
134 gfs2_dinode_out(ip, dibh->b_data); 134 gfs2_dinode_out(ip, dibh->b_data);
135 135
136 brelse(dibh); 136 brelse(dibh);
@@ -228,7 +228,7 @@ out:
228 228
229 if (ip->i_di.di_size < offset + copied) 229 if (ip->i_di.di_size < offset + copied)
230 ip->i_di.di_size = offset + copied; 230 ip->i_di.di_size = offset + copied;
231 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 231 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
232 232
233 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 233 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
234 gfs2_dinode_out(ip, dibh->b_data); 234 gfs2_dinode_out(ip, dibh->b_data);
@@ -1456,7 +1456,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1456 if (dip->i_di.di_entries != g.offset) { 1456 if (dip->i_di.di_entries != g.offset) {
1457 fs_warn(sdp, "Number of entries corrupt in dir %llu, " 1457 fs_warn(sdp, "Number of entries corrupt in dir %llu, "
1458 "ip->i_di.di_entries (%u) != g.offset (%u)\n", 1458 "ip->i_di.di_entries (%u) != g.offset (%u)\n",
1459 (unsigned long long)dip->i_num.no_addr, 1459 (unsigned long long)dip->i_no_addr,
1460 dip->i_di.di_entries, 1460 dip->i_di.di_entries,
1461 g.offset); 1461 g.offset);
1462 error = -EIO; 1462 error = -EIO;
@@ -1488,24 +1488,55 @@ out:
1488 * Returns: errno 1488 * Returns: errno
1489 */ 1489 */
1490 1490
1491int gfs2_dir_search(struct inode *dir, const struct qstr *name, 1491struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1492 struct gfs2_inum_host *inum, unsigned int *type)
1493{ 1492{
1494 struct buffer_head *bh; 1493 struct buffer_head *bh;
1495 struct gfs2_dirent *dent; 1494 struct gfs2_dirent *dent;
1495 struct inode *inode;
1496
1497 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1498 if (dent) {
1499 if (IS_ERR(dent))
1500 return ERR_PTR(PTR_ERR(dent));
1501 inode = gfs2_inode_lookup(dir->i_sb,
1502 be16_to_cpu(dent->de_type),
1503 be64_to_cpu(dent->de_inum.no_addr),
1504 be64_to_cpu(dent->de_inum.no_formal_ino));
1505 brelse(bh);
1506 return inode;
1507 }
1508 return ERR_PTR(-ENOENT);
1509}
1510
1511int gfs2_dir_check(struct inode *dir, const struct qstr *name,
1512 const struct gfs2_inode *ip)
1513{
1514 struct buffer_head *bh;
1515 struct gfs2_dirent *dent;
1516 int ret = -ENOENT;
1496 1517
1497 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); 1518 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1498 if (dent) { 1519 if (dent) {
1499 if (IS_ERR(dent)) 1520 if (IS_ERR(dent))
1500 return PTR_ERR(dent); 1521 return PTR_ERR(dent);
1501 if (inum) 1522 if (ip) {
1502 gfs2_inum_in(inum, (char *)&dent->de_inum); 1523 if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
1503 if (type) 1524 goto out;
1504 *type = be16_to_cpu(dent->de_type); 1525 if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
1526 ip->i_no_formal_ino)
1527 goto out;
1528 if (unlikely(IF2DT(ip->i_inode.i_mode) !=
1529 be16_to_cpu(dent->de_type))) {
1530 gfs2_consist_inode(GFS2_I(dir));
1531 ret = -EIO;
1532 goto out;
1533 }
1534 }
1535 ret = 0;
1536out:
1505 brelse(bh); 1537 brelse(bh);
1506 return 0;
1507 } 1538 }
1508 return -ENOENT; 1539 return ret;
1509} 1540}
1510 1541
1511static int dir_new_leaf(struct inode *inode, const struct qstr *name) 1542static int dir_new_leaf(struct inode *inode, const struct qstr *name)
@@ -1565,7 +1596,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1565 */ 1596 */
1566 1597
1567int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1598int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1568 const struct gfs2_inum_host *inum, unsigned type) 1599 const struct gfs2_inode *nip, unsigned type)
1569{ 1600{
1570 struct gfs2_inode *ip = GFS2_I(inode); 1601 struct gfs2_inode *ip = GFS2_I(inode);
1571 struct buffer_head *bh; 1602 struct buffer_head *bh;
@@ -1580,7 +1611,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1580 if (IS_ERR(dent)) 1611 if (IS_ERR(dent))
1581 return PTR_ERR(dent); 1612 return PTR_ERR(dent);
1582 dent = gfs2_init_dirent(inode, dent, name, bh); 1613 dent = gfs2_init_dirent(inode, dent, name, bh);
1583 gfs2_inum_out(inum, (char *)&dent->de_inum); 1614 gfs2_inum_out(nip, dent);
1584 dent->de_type = cpu_to_be16(type); 1615 dent->de_type = cpu_to_be16(type);
1585 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { 1616 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
1586 leaf = (struct gfs2_leaf *)bh->b_data; 1617 leaf = (struct gfs2_leaf *)bh->b_data;
@@ -1592,7 +1623,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1592 break; 1623 break;
1593 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1624 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1594 ip->i_di.di_entries++; 1625 ip->i_di.di_entries++;
1595 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 1626 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1596 gfs2_dinode_out(ip, bh->b_data); 1627 gfs2_dinode_out(ip, bh->b_data);
1597 brelse(bh); 1628 brelse(bh);
1598 error = 0; 1629 error = 0;
@@ -1678,7 +1709,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1678 gfs2_consist_inode(dip); 1709 gfs2_consist_inode(dip);
1679 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1710 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1680 dip->i_di.di_entries--; 1711 dip->i_di.di_entries--;
1681 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; 1712 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1682 gfs2_dinode_out(dip, bh->b_data); 1713 gfs2_dinode_out(dip, bh->b_data);
1683 brelse(bh); 1714 brelse(bh);
1684 mark_inode_dirty(&dip->i_inode); 1715 mark_inode_dirty(&dip->i_inode);
@@ -1700,7 +1731,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1700 */ 1731 */
1701 1732
1702int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 1733int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1703 struct gfs2_inum_host *inum, unsigned int new_type) 1734 const struct gfs2_inode *nip, unsigned int new_type)
1704{ 1735{
1705 struct buffer_head *bh; 1736 struct buffer_head *bh;
1706 struct gfs2_dirent *dent; 1737 struct gfs2_dirent *dent;
@@ -1715,7 +1746,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1715 return PTR_ERR(dent); 1746 return PTR_ERR(dent);
1716 1747
1717 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1748 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1718 gfs2_inum_out(inum, (char *)&dent->de_inum); 1749 gfs2_inum_out(nip, dent);
1719 dent->de_type = cpu_to_be16(new_type); 1750 dent->de_type = cpu_to_be16(new_type);
1720 1751
1721 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) { 1752 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
@@ -1726,7 +1757,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1726 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1757 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1727 } 1758 }
1728 1759
1729 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; 1760 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1730 gfs2_dinode_out(dip, bh->b_data); 1761 gfs2_dinode_out(dip, bh->b_data);
1731 brelse(bh); 1762 brelse(bh);
1732 return 0; 1763 return 0;
@@ -1867,7 +1898,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1867 for (x = 0; x < rlist.rl_rgrps; x++) { 1898 for (x = 0; x < rlist.rl_rgrps; x++) {
1868 struct gfs2_rgrpd *rgd; 1899 struct gfs2_rgrpd *rgd;
1869 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 1900 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1870 rg_blocks += rgd->rd_ri.ri_length; 1901 rg_blocks += rgd->rd_length;
1871 } 1902 }
1872 1903
1873 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 1904 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 48fe89046bba..8a468cac9328 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -16,15 +16,16 @@ struct inode;
16struct gfs2_inode; 16struct gfs2_inode;
17struct gfs2_inum; 17struct gfs2_inum;
18 18
19int gfs2_dir_search(struct inode *dir, const struct qstr *filename, 19struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename);
20 struct gfs2_inum_host *inum, unsigned int *type); 20int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
21 const struct gfs2_inode *ip);
21int gfs2_dir_add(struct inode *inode, const struct qstr *filename, 22int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
22 const struct gfs2_inum_host *inum, unsigned int type); 23 const struct gfs2_inode *ip, unsigned int type);
23int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); 24int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
24int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 25int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
25 filldir_t filldir); 26 filldir_t filldir);
26int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 27int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
27 struct gfs2_inum_host *new_inum, unsigned int new_type); 28 const struct gfs2_inode *nip, unsigned int new_type);
28 29
29int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); 30int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
30 31
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 5b83ca6acab1..2a7435b5c4dc 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -254,7 +254,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
254 if (error) 254 if (error)
255 return error; 255 return error;
256 256
257 error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE + 257 error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE +
258 RES_EATTR + RES_STATFS + RES_QUOTA, blks); 258 RES_EATTR + RES_STATFS + RES_QUOTA, blks);
259 if (error) 259 if (error)
260 goto out_gunlock; 260 goto out_gunlock;
@@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
300 300
301 error = gfs2_meta_inode_buffer(ip, &dibh); 301 error = gfs2_meta_inode_buffer(ip, &dibh);
302 if (!error) { 302 if (!error) {
303 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 303 ip->i_inode.i_ctime = CURRENT_TIME;
304 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 304 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
305 gfs2_dinode_out(ip, dibh->b_data); 305 gfs2_dinode_out(ip, dibh->b_data);
306 brelse(dibh); 306 brelse(dibh);
@@ -700,7 +700,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
700 goto out_gunlock_q; 700 goto out_gunlock_q;
701 701
702 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), 702 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
703 blks + al->al_rgd->rd_ri.ri_length + 703 blks + al->al_rgd->rd_length +
704 RES_DINODE + RES_STATFS + RES_QUOTA, 0); 704 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
705 if (error) 705 if (error)
706 goto out_ipres; 706 goto out_ipres;
@@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
717 (er->er_mode & S_IFMT)); 717 (er->er_mode & S_IFMT));
718 ip->i_inode.i_mode = er->er_mode; 718 ip->i_inode.i_mode = er->er_mode;
719 } 719 }
720 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 720 ip->i_inode.i_ctime = CURRENT_TIME;
721 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 721 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
722 gfs2_dinode_out(ip, dibh->b_data); 722 gfs2_dinode_out(ip, dibh->b_data);
723 brelse(dibh); 723 brelse(dibh);
@@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
852 (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); 852 (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
853 ip->i_inode.i_mode = er->er_mode; 853 ip->i_inode.i_mode = er->er_mode;
854 } 854 }
855 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 855 ip->i_inode.i_ctime = CURRENT_TIME;
856 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 856 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
857 gfs2_dinode_out(ip, dibh->b_data); 857 gfs2_dinode_out(ip, dibh->b_data);
858 brelse(dibh); 858 brelse(dibh);
@@ -1133,7 +1133,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1133 1133
1134 error = gfs2_meta_inode_buffer(ip, &dibh); 1134 error = gfs2_meta_inode_buffer(ip, &dibh);
1135 if (!error) { 1135 if (!error) {
1136 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 1136 ip->i_inode.i_ctime = CURRENT_TIME;
1137 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1137 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1138 gfs2_dinode_out(ip, dibh->b_data); 1138 gfs2_dinode_out(ip, dibh->b_data);
1139 brelse(dibh); 1139 brelse(dibh);
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1352 for (x = 0; x < rlist.rl_rgrps; x++) { 1352 for (x = 0; x < rlist.rl_rgrps; x++) {
1353 struct gfs2_rgrpd *rgd; 1353 struct gfs2_rgrpd *rgd;
1354 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 1354 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1355 rg_blocks += rgd->rd_ri.ri_length; 1355 rg_blocks += rgd->rd_length;
1356 } 1356 }
1357 1357
1358 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 1358 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1815429a2978..3f0974e1afef 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -422,11 +422,11 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
422static void gfs2_holder_wake(struct gfs2_holder *gh) 422static void gfs2_holder_wake(struct gfs2_holder *gh)
423{ 423{
424 clear_bit(HIF_WAIT, &gh->gh_iflags); 424 clear_bit(HIF_WAIT, &gh->gh_iflags);
425 smp_mb(); 425 smp_mb__after_clear_bit();
426 wake_up_bit(&gh->gh_iflags, HIF_WAIT); 426 wake_up_bit(&gh->gh_iflags, HIF_WAIT);
427} 427}
428 428
429static int holder_wait(void *word) 429static int just_schedule(void *word)
430{ 430{
431 schedule(); 431 schedule();
432 return 0; 432 return 0;
@@ -435,7 +435,20 @@ static int holder_wait(void *word)
435static void wait_on_holder(struct gfs2_holder *gh) 435static void wait_on_holder(struct gfs2_holder *gh)
436{ 436{
437 might_sleep(); 437 might_sleep();
438 wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE); 438 wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
439}
440
441static void gfs2_demote_wake(struct gfs2_glock *gl)
442{
443 clear_bit(GLF_DEMOTE, &gl->gl_flags);
444 smp_mb__after_clear_bit();
445 wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
446}
447
448static void wait_on_demote(struct gfs2_glock *gl)
449{
450 might_sleep();
451 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
439} 452}
440 453
441/** 454/**
@@ -528,7 +541,7 @@ static int rq_demote(struct gfs2_glock *gl)
528 541
529 if (gl->gl_state == gl->gl_demote_state || 542 if (gl->gl_state == gl->gl_demote_state ||
530 gl->gl_state == LM_ST_UNLOCKED) { 543 gl->gl_state == LM_ST_UNLOCKED) {
531 clear_bit(GLF_DEMOTE, &gl->gl_flags); 544 gfs2_demote_wake(gl);
532 return 0; 545 return 0;
533 } 546 }
534 set_bit(GLF_LOCK, &gl->gl_flags); 547 set_bit(GLF_LOCK, &gl->gl_flags);
@@ -666,12 +679,22 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
666 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 679 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
667 */ 680 */
668 681
669static void handle_callback(struct gfs2_glock *gl, unsigned int state) 682static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
670{ 683{
671 spin_lock(&gl->gl_spin); 684 spin_lock(&gl->gl_spin);
672 if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { 685 if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
673 gl->gl_demote_state = state; 686 gl->gl_demote_state = state;
674 gl->gl_demote_time = jiffies; 687 gl->gl_demote_time = jiffies;
688 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
689 gl->gl_object) {
690 struct inode *inode = igrab(gl->gl_object);
691 spin_unlock(&gl->gl_spin);
692 if (inode) {
693 d_prune_aliases(inode);
694 iput(inode);
695 }
696 return;
697 }
675 } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { 698 } else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
676 gl->gl_demote_state = state; 699 gl->gl_demote_state = state;
677 } 700 }
@@ -740,7 +763,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
740 if (ret & LM_OUT_CANCELED) 763 if (ret & LM_OUT_CANCELED)
741 op_done = 0; 764 op_done = 0;
742 else 765 else
743 clear_bit(GLF_DEMOTE, &gl->gl_flags); 766 gfs2_demote_wake(gl);
744 } else { 767 } else {
745 spin_lock(&gl->gl_spin); 768 spin_lock(&gl->gl_spin);
746 list_del_init(&gh->gh_list); 769 list_del_init(&gh->gh_list);
@@ -848,7 +871,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
848 gfs2_assert_warn(sdp, !ret); 871 gfs2_assert_warn(sdp, !ret);
849 872
850 state_change(gl, LM_ST_UNLOCKED); 873 state_change(gl, LM_ST_UNLOCKED);
851 clear_bit(GLF_DEMOTE, &gl->gl_flags); 874 gfs2_demote_wake(gl);
852 875
853 if (glops->go_inval) 876 if (glops->go_inval)
854 glops->go_inval(gl, DIO_METADATA); 877 glops->go_inval(gl, DIO_METADATA);
@@ -1174,7 +1197,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1174 const struct gfs2_glock_operations *glops = gl->gl_ops; 1197 const struct gfs2_glock_operations *glops = gl->gl_ops;
1175 1198
1176 if (gh->gh_flags & GL_NOCACHE) 1199 if (gh->gh_flags & GL_NOCACHE)
1177 handle_callback(gl, LM_ST_UNLOCKED); 1200 handle_callback(gl, LM_ST_UNLOCKED, 0);
1178 1201
1179 gfs2_glmutex_lock(gl); 1202 gfs2_glmutex_lock(gl);
1180 1203
@@ -1196,6 +1219,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1196 spin_unlock(&gl->gl_spin); 1219 spin_unlock(&gl->gl_spin);
1197} 1220}
1198 1221
1222void gfs2_glock_dq_wait(struct gfs2_holder *gh)
1223{
1224 struct gfs2_glock *gl = gh->gh_gl;
1225 gfs2_glock_dq(gh);
1226 wait_on_demote(gl);
1227}
1228
1199/** 1229/**
1200 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it 1230 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1201 * @gh: the holder structure 1231 * @gh: the holder structure
@@ -1297,10 +1327,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1297 * @num_gh: the number of structures 1327 * @num_gh: the number of structures
1298 * @ghs: an array of struct gfs2_holder structures 1328 * @ghs: an array of struct gfs2_holder structures
1299 * 1329 *
1300 * Figure out how big an impact this function has. Either:
1301 * 1) Replace this code with code that calls gfs2_glock_prefetch()
1302 * 2) Forget async stuff and just call nq_m_sync()
1303 * 3) Leave it like it is
1304 * 1330 *
1305 * Returns: 0 on success (all glocks acquired), 1331 * Returns: 0 on success (all glocks acquired),
1306 * errno on failure (no glocks acquired) 1332 * errno on failure (no glocks acquired)
@@ -1308,62 +1334,28 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1308 1334
1309int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1335int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1310{ 1336{
1311 int *e; 1337 struct gfs2_holder *tmp[4];
1312 unsigned int x; 1338 struct gfs2_holder **pph = tmp;
1313 int borked = 0, serious = 0;
1314 int error = 0; 1339 int error = 0;
1315 1340
1316 if (!num_gh) 1341 switch(num_gh) {
1342 case 0:
1317 return 0; 1343 return 0;
1318 1344 case 1:
1319 if (num_gh == 1) {
1320 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); 1345 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1321 return gfs2_glock_nq(ghs); 1346 return gfs2_glock_nq(ghs);
1322 } 1347 default:
1323 1348 if (num_gh <= 4)
1324 e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1325 if (!e)
1326 return -ENOMEM;
1327
1328 for (x = 0; x < num_gh; x++) {
1329 ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
1330 error = gfs2_glock_nq(&ghs[x]);
1331 if (error) {
1332 borked = 1;
1333 serious = error;
1334 num_gh = x;
1335 break; 1349 break;
1336 } 1350 pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS);
1337 } 1351 if (!pph)
1338 1352 return -ENOMEM;
1339 for (x = 0; x < num_gh; x++) {
1340 error = e[x] = glock_wait_internal(&ghs[x]);
1341 if (error) {
1342 borked = 1;
1343 if (error != GLR_TRYFAILED && error != GLR_CANCELED)
1344 serious = error;
1345 }
1346 } 1353 }
1347 1354
1348 if (!borked) { 1355 error = nq_m_sync(num_gh, ghs, pph);
1349 kfree(e);
1350 return 0;
1351 }
1352
1353 for (x = 0; x < num_gh; x++)
1354 if (!e[x])
1355 gfs2_glock_dq(&ghs[x]);
1356
1357 if (serious)
1358 error = serious;
1359 else {
1360 for (x = 0; x < num_gh; x++)
1361 gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
1362 &ghs[x]);
1363 error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
1364 }
1365 1356
1366 kfree(e); 1357 if (pph != tmp)
1358 kfree(pph);
1367 1359
1368 return error; 1360 return error;
1369} 1361}
@@ -1456,7 +1448,7 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1456 if (!gl) 1448 if (!gl)
1457 return; 1449 return;
1458 1450
1459 handle_callback(gl, state); 1451 handle_callback(gl, state, 1);
1460 1452
1461 spin_lock(&gl->gl_spin); 1453 spin_lock(&gl->gl_spin);
1462 run_queue(gl); 1454 run_queue(gl);
@@ -1596,7 +1588,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1596 if (gfs2_glmutex_trylock(gl)) { 1588 if (gfs2_glmutex_trylock(gl)) {
1597 if (list_empty(&gl->gl_holders) && 1589 if (list_empty(&gl->gl_holders) &&
1598 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1590 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1599 handle_callback(gl, LM_ST_UNLOCKED); 1591 handle_callback(gl, LM_ST_UNLOCKED, 0);
1600 gfs2_glmutex_unlock(gl); 1592 gfs2_glmutex_unlock(gl);
1601 } 1593 }
1602 1594
@@ -1709,7 +1701,7 @@ static void clear_glock(struct gfs2_glock *gl)
1709 if (gfs2_glmutex_trylock(gl)) { 1701 if (gfs2_glmutex_trylock(gl)) {
1710 if (list_empty(&gl->gl_holders) && 1702 if (list_empty(&gl->gl_holders) &&
1711 gl->gl_state != LM_ST_UNLOCKED) 1703 gl->gl_state != LM_ST_UNLOCKED)
1712 handle_callback(gl, LM_ST_UNLOCKED); 1704 handle_callback(gl, LM_ST_UNLOCKED, 0);
1713 gfs2_glmutex_unlock(gl); 1705 gfs2_glmutex_unlock(gl);
1714 } 1706 }
1715} 1707}
@@ -1823,7 +1815,8 @@ static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip)
1823 1815
1824 print_dbg(gi, " Inode:\n"); 1816 print_dbg(gi, " Inode:\n");
1825 print_dbg(gi, " num = %llu/%llu\n", 1817 print_dbg(gi, " num = %llu/%llu\n",
1826 ip->i_num.no_formal_ino, ip->i_num.no_addr); 1818 (unsigned long long)ip->i_no_formal_ino,
1819 (unsigned long long)ip->i_no_addr);
1827 print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); 1820 print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode));
1828 print_dbg(gi, " i_flags ="); 1821 print_dbg(gi, " i_flags =");
1829 for (x = 0; x < 32; x++) 1822 for (x = 0; x < 32; x++)
@@ -1909,8 +1902,8 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
1909 } 1902 }
1910 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { 1903 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
1911 print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", 1904 print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n",
1912 gl->gl_demote_state, 1905 gl->gl_demote_state, (unsigned long long)
1913 (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ)); 1906 (jiffies - gl->gl_demote_time)*(1000000/HZ));
1914 } 1907 }
1915 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { 1908 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
1916 if (!test_bit(GLF_LOCK, &gl->gl_flags) && 1909 if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index b3e152db70c8..7721ca3fff9e 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -87,6 +87,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh);
87int gfs2_glock_poll(struct gfs2_holder *gh); 87int gfs2_glock_poll(struct gfs2_holder *gh);
88int gfs2_glock_wait(struct gfs2_holder *gh); 88int gfs2_glock_wait(struct gfs2_holder *gh);
89void gfs2_glock_dq(struct gfs2_holder *gh); 89void gfs2_glock_dq(struct gfs2_holder *gh);
90void gfs2_glock_dq_wait(struct gfs2_holder *gh);
90 91
91void gfs2_glock_dq_uninit(struct gfs2_holder *gh); 92void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
92int gfs2_glock_nq_num(struct gfs2_sbd *sdp, 93int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 7b82657a9910..777ca46010e8 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -156,9 +156,9 @@ static void inode_go_sync(struct gfs2_glock *gl)
156 ip = NULL; 156 ip = NULL;
157 157
158 if (test_bit(GLF_DIRTY, &gl->gl_flags)) { 158 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
159 gfs2_log_flush(gl->gl_sbd, gl);
160 if (ip) 159 if (ip)
161 filemap_fdatawrite(ip->i_inode.i_mapping); 160 filemap_fdatawrite(ip->i_inode.i_mapping);
161 gfs2_log_flush(gl->gl_sbd, gl);
162 gfs2_meta_sync(gl); 162 gfs2_meta_sync(gl);
163 if (ip) { 163 if (ip) {
164 struct address_space *mapping = ip->i_inode.i_mapping; 164 struct address_space *mapping = ip->i_inode.i_mapping;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d995441373ab..170ba93829c0 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -28,6 +28,14 @@ struct gfs2_sbd;
28 28
29typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret); 29typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
30 30
31struct gfs2_log_header_host {
32 u64 lh_sequence; /* Sequence number of this transaction */
33 u32 lh_flags; /* GFS2_LOG_HEAD_... */
34 u32 lh_tail; /* Block number of log tail */
35 u32 lh_blkno;
36 u32 lh_hash;
37};
38
31/* 39/*
32 * Structure of operations that are associated with each 40 * Structure of operations that are associated with each
33 * type of element in the log. 41 * type of element in the log.
@@ -60,12 +68,23 @@ struct gfs2_bitmap {
60 u32 bi_len; 68 u32 bi_len;
61}; 69};
62 70
71struct gfs2_rgrp_host {
72 u32 rg_flags;
73 u32 rg_free;
74 u32 rg_dinodes;
75 u64 rg_igeneration;
76};
77
63struct gfs2_rgrpd { 78struct gfs2_rgrpd {
64 struct list_head rd_list; /* Link with superblock */ 79 struct list_head rd_list; /* Link with superblock */
65 struct list_head rd_list_mru; 80 struct list_head rd_list_mru;
66 struct list_head rd_recent; /* Recently used rgrps */ 81 struct list_head rd_recent; /* Recently used rgrps */
67 struct gfs2_glock *rd_gl; /* Glock for this rgrp */ 82 struct gfs2_glock *rd_gl; /* Glock for this rgrp */
68 struct gfs2_rindex_host rd_ri; 83 u64 rd_addr; /* grp block disk address */
84 u64 rd_data0; /* first data location */
85 u32 rd_length; /* length of rgrp header in fs blocks */
86 u32 rd_data; /* num of data blocks in rgrp */
87 u32 rd_bitbytes; /* number of bytes in data bitmaps */
69 struct gfs2_rgrp_host rd_rg; 88 struct gfs2_rgrp_host rd_rg;
70 u64 rd_rg_vn; 89 u64 rd_rg_vn;
71 struct gfs2_bitmap *rd_bits; 90 struct gfs2_bitmap *rd_bits;
@@ -76,6 +95,8 @@ struct gfs2_rgrpd {
76 u32 rd_last_alloc_data; 95 u32 rd_last_alloc_data;
77 u32 rd_last_alloc_meta; 96 u32 rd_last_alloc_meta;
78 struct gfs2_sbd *rd_sbd; 97 struct gfs2_sbd *rd_sbd;
98 unsigned long rd_flags;
99#define GFS2_RDF_CHECK 0x0001 /* Need to check for unlinked inodes */
79}; 100};
80 101
81enum gfs2_state_bits { 102enum gfs2_state_bits {
@@ -211,10 +232,24 @@ enum {
211 GIF_SW_PAGED = 3, 232 GIF_SW_PAGED = 3,
212}; 233};
213 234
235struct gfs2_dinode_host {
236 u64 di_size; /* number of bytes in file */
237 u64 di_blocks; /* number of blocks in file */
238 u64 di_goal_meta; /* rgrp to alloc from next */
239 u64 di_goal_data; /* data block goal */
240 u64 di_generation; /* generation number for NFS */
241 u32 di_flags; /* GFS2_DIF_... */
242 u16 di_height; /* height of metadata */
243 /* These only apply to directories */
244 u16 di_depth; /* Number of bits in the table */
245 u32 di_entries; /* The number of entries in the directory */
246 u64 di_eattr; /* extended attribute block number */
247};
248
214struct gfs2_inode { 249struct gfs2_inode {
215 struct inode i_inode; 250 struct inode i_inode;
216 struct gfs2_inum_host i_num; 251 u64 i_no_addr;
217 252 u64 i_no_formal_ino;
218 unsigned long i_flags; /* GIF_... */ 253 unsigned long i_flags; /* GIF_... */
219 254
220 struct gfs2_dinode_host i_di; /* To be replaced by ref to block */ 255 struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
@@ -275,14 +310,6 @@ enum {
275 QDF_LOCKED = 2, 310 QDF_LOCKED = 2,
276}; 311};
277 312
278struct gfs2_quota_lvb {
279 __be32 qb_magic;
280 u32 __pad;
281 __be64 qb_limit; /* Hard limit of # blocks to alloc */
282 __be64 qb_warn; /* Warn user when alloc is above this # */
283 __be64 qb_value; /* Current # blocks allocated */
284};
285
286struct gfs2_quota_data { 313struct gfs2_quota_data {
287 struct list_head qd_list; 314 struct list_head qd_list;
288 unsigned int qd_count; 315 unsigned int qd_count;
@@ -327,7 +354,9 @@ struct gfs2_trans {
327 354
328 unsigned int tr_num_buf; 355 unsigned int tr_num_buf;
329 unsigned int tr_num_buf_new; 356 unsigned int tr_num_buf_new;
357 unsigned int tr_num_databuf_new;
330 unsigned int tr_num_buf_rm; 358 unsigned int tr_num_buf_rm;
359 unsigned int tr_num_databuf_rm;
331 struct list_head tr_list_buf; 360 struct list_head tr_list_buf;
332 361
333 unsigned int tr_num_revoke; 362 unsigned int tr_num_revoke;
@@ -354,6 +383,12 @@ struct gfs2_jdesc {
354 unsigned int jd_blocks; 383 unsigned int jd_blocks;
355}; 384};
356 385
386struct gfs2_statfs_change_host {
387 s64 sc_total;
388 s64 sc_free;
389 s64 sc_dinodes;
390};
391
357#define GFS2_GLOCKD_DEFAULT 1 392#define GFS2_GLOCKD_DEFAULT 1
358#define GFS2_GLOCKD_MAX 16 393#define GFS2_GLOCKD_MAX 16
359 394
@@ -426,6 +461,28 @@ enum {
426 461
427#define GFS2_FSNAME_LEN 256 462#define GFS2_FSNAME_LEN 256
428 463
464struct gfs2_inum_host {
465 u64 no_formal_ino;
466 u64 no_addr;
467};
468
469struct gfs2_sb_host {
470 u32 sb_magic;
471 u32 sb_type;
472 u32 sb_format;
473
474 u32 sb_fs_format;
475 u32 sb_multihost_format;
476 u32 sb_bsize;
477 u32 sb_bsize_shift;
478
479 struct gfs2_inum_host sb_master_dir;
480 struct gfs2_inum_host sb_root_dir;
481
482 char sb_lockproto[GFS2_LOCKNAME_LEN];
483 char sb_locktable[GFS2_LOCKNAME_LEN];
484};
485
429struct gfs2_sbd { 486struct gfs2_sbd {
430 struct super_block *sd_vfs; 487 struct super_block *sd_vfs;
431 struct super_block *sd_vfs_meta; 488 struct super_block *sd_vfs_meta;
@@ -544,6 +601,7 @@ struct gfs2_sbd {
544 601
545 unsigned int sd_log_blks_reserved; 602 unsigned int sd_log_blks_reserved;
546 unsigned int sd_log_commited_buf; 603 unsigned int sd_log_commited_buf;
604 unsigned int sd_log_commited_databuf;
547 unsigned int sd_log_commited_revoke; 605 unsigned int sd_log_commited_revoke;
548 606
549 unsigned int sd_log_num_gl; 607 unsigned int sd_log_num_gl;
@@ -552,7 +610,6 @@ struct gfs2_sbd {
552 unsigned int sd_log_num_rg; 610 unsigned int sd_log_num_rg;
553 unsigned int sd_log_num_databuf; 611 unsigned int sd_log_num_databuf;
554 unsigned int sd_log_num_jdata; 612 unsigned int sd_log_num_jdata;
555 unsigned int sd_log_num_hdrs;
556 613
557 struct list_head sd_log_le_gl; 614 struct list_head sd_log_le_gl;
558 struct list_head sd_log_le_buf; 615 struct list_head sd_log_le_buf;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index df0b8b3018b9..34f7bcdea1e9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -38,12 +38,17 @@
38#include "trans.h" 38#include "trans.h"
39#include "util.h" 39#include "util.h"
40 40
41struct gfs2_inum_range_host {
42 u64 ir_start;
43 u64 ir_length;
44};
45
41static int iget_test(struct inode *inode, void *opaque) 46static int iget_test(struct inode *inode, void *opaque)
42{ 47{
43 struct gfs2_inode *ip = GFS2_I(inode); 48 struct gfs2_inode *ip = GFS2_I(inode);
44 struct gfs2_inum_host *inum = opaque; 49 u64 *no_addr = opaque;
45 50
46 if (ip->i_num.no_addr == inum->no_addr && 51 if (ip->i_no_addr == *no_addr &&
47 inode->i_private != NULL) 52 inode->i_private != NULL)
48 return 1; 53 return 1;
49 54
@@ -53,37 +58,70 @@ static int iget_test(struct inode *inode, void *opaque)
53static int iget_set(struct inode *inode, void *opaque) 58static int iget_set(struct inode *inode, void *opaque)
54{ 59{
55 struct gfs2_inode *ip = GFS2_I(inode); 60 struct gfs2_inode *ip = GFS2_I(inode);
56 struct gfs2_inum_host *inum = opaque; 61 u64 *no_addr = opaque;
57 62
58 ip->i_num = *inum; 63 inode->i_ino = (unsigned long)*no_addr;
59 inode->i_ino = inum->no_addr; 64 ip->i_no_addr = *no_addr;
60 return 0; 65 return 0;
61} 66}
62 67
63struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum) 68struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
69{
70 unsigned long hash = (unsigned long)no_addr;
71 return ilookup5(sb, hash, iget_test, &no_addr);
72}
73
74static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
64{ 75{
65 return ilookup5(sb, (unsigned long)inum->no_addr, 76 unsigned long hash = (unsigned long)no_addr;
66 iget_test, inum); 77 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
67} 78}
68 79
69static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum) 80/**
81 * GFS2 lookup code fills in vfs inode contents based on info obtained
82 * from directory entry inside gfs2_inode_lookup(). This has caused issues
83 * with NFS code path since its get_dentry routine doesn't have the relevant
84 * directory entry when gfs2_inode_lookup() is invoked. Part of the code
85 * segment inside gfs2_inode_lookup code needs to get moved around.
86 *
87 * Clean up I_LOCK and I_NEW as well.
88 **/
89
90void gfs2_set_iop(struct inode *inode)
70{ 91{
71 return iget5_locked(sb, (unsigned long)inum->no_addr, 92 umode_t mode = inode->i_mode;
72 iget_test, iget_set, inum); 93
94 if (S_ISREG(mode)) {
95 inode->i_op = &gfs2_file_iops;
96 inode->i_fop = &gfs2_file_fops;
97 inode->i_mapping->a_ops = &gfs2_file_aops;
98 } else if (S_ISDIR(mode)) {
99 inode->i_op = &gfs2_dir_iops;
100 inode->i_fop = &gfs2_dir_fops;
101 } else if (S_ISLNK(mode)) {
102 inode->i_op = &gfs2_symlink_iops;
103 } else {
104 inode->i_op = &gfs2_dev_iops;
105 }
106
107 unlock_new_inode(inode);
73} 108}
74 109
75/** 110/**
76 * gfs2_inode_lookup - Lookup an inode 111 * gfs2_inode_lookup - Lookup an inode
77 * @sb: The super block 112 * @sb: The super block
78 * @inum: The inode number 113 * @no_addr: The inode number
79 * @type: The type of the inode 114 * @type: The type of the inode
80 * 115 *
81 * Returns: A VFS inode, or an error 116 * Returns: A VFS inode, or an error
82 */ 117 */
83 118
84struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type) 119struct inode *gfs2_inode_lookup(struct super_block *sb,
120 unsigned int type,
121 u64 no_addr,
122 u64 no_formal_ino)
85{ 123{
86 struct inode *inode = gfs2_iget(sb, inum); 124 struct inode *inode = gfs2_iget(sb, no_addr);
87 struct gfs2_inode *ip = GFS2_I(inode); 125 struct gfs2_inode *ip = GFS2_I(inode);
88 struct gfs2_glock *io_gl; 126 struct gfs2_glock *io_gl;
89 int error; 127 int error;
@@ -93,29 +131,15 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
93 131
94 if (inode->i_state & I_NEW) { 132 if (inode->i_state & I_NEW) {
95 struct gfs2_sbd *sdp = GFS2_SB(inode); 133 struct gfs2_sbd *sdp = GFS2_SB(inode);
96 umode_t mode = DT2IF(type);
97 inode->i_private = ip; 134 inode->i_private = ip;
98 inode->i_mode = mode; 135 ip->i_no_formal_ino = no_formal_ino;
99
100 if (S_ISREG(mode)) {
101 inode->i_op = &gfs2_file_iops;
102 inode->i_fop = &gfs2_file_fops;
103 inode->i_mapping->a_ops = &gfs2_file_aops;
104 } else if (S_ISDIR(mode)) {
105 inode->i_op = &gfs2_dir_iops;
106 inode->i_fop = &gfs2_dir_fops;
107 } else if (S_ISLNK(mode)) {
108 inode->i_op = &gfs2_symlink_iops;
109 } else {
110 inode->i_op = &gfs2_dev_iops;
111 }
112 136
113 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 137 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
114 if (unlikely(error)) 138 if (unlikely(error))
115 goto fail; 139 goto fail;
116 ip->i_gl->gl_object = ip; 140 ip->i_gl->gl_object = ip;
117 141
118 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 142 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
119 if (unlikely(error)) 143 if (unlikely(error))
120 goto fail_put; 144 goto fail_put;
121 145
@@ -123,12 +147,38 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
123 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 147 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
124 if (unlikely(error)) 148 if (unlikely(error))
125 goto fail_iopen; 149 goto fail_iopen;
150 ip->i_iopen_gh.gh_gl->gl_object = ip;
126 151
127 gfs2_glock_put(io_gl); 152 gfs2_glock_put(io_gl);
128 unlock_new_inode(inode); 153
154 if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
155 goto gfs2_nfsbypass;
156
157 inode->i_mode = DT2IF(type);
158
159 /*
160 * We must read the inode in order to work out its type in
161 * this case. Note that this doesn't happen often as we normally
162 * know the type beforehand. This code path only occurs during
163 * unlinked inode recovery (where it is safe to do this glock,
164 * which is not true in the general case).
165 */
166 if (type == DT_UNKNOWN) {
167 struct gfs2_holder gh;
168 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
169 if (unlikely(error))
170 goto fail_glock;
171 /* Inode is now uptodate */
172 gfs2_glock_dq_uninit(&gh);
173 }
174
175 gfs2_set_iop(inode);
129 } 176 }
130 177
178gfs2_nfsbypass:
131 return inode; 179 return inode;
180fail_glock:
181 gfs2_glock_dq(&ip->i_iopen_gh);
132fail_iopen: 182fail_iopen:
133 gfs2_glock_put(io_gl); 183 gfs2_glock_put(io_gl);
134fail_put: 184fail_put:
@@ -144,14 +194,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
144 struct gfs2_dinode_host *di = &ip->i_di; 194 struct gfs2_dinode_host *di = &ip->i_di;
145 const struct gfs2_dinode *str = buf; 195 const struct gfs2_dinode *str = buf;
146 196
147 if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) { 197 if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) {
148 if (gfs2_consist_inode(ip)) 198 if (gfs2_consist_inode(ip))
149 gfs2_dinode_print(ip); 199 gfs2_dinode_print(ip);
150 return -EIO; 200 return -EIO;
151 } 201 }
152 if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino)) 202 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
153 return -ESTALE;
154
155 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 203 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
156 ip->i_inode.i_rdev = 0; 204 ip->i_inode.i_rdev = 0;
157 switch (ip->i_inode.i_mode & S_IFMT) { 205 switch (ip->i_inode.i_mode & S_IFMT) {
@@ -175,11 +223,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
175 di->di_blocks = be64_to_cpu(str->di_blocks); 223 di->di_blocks = be64_to_cpu(str->di_blocks);
176 gfs2_set_inode_blocks(&ip->i_inode); 224 gfs2_set_inode_blocks(&ip->i_inode);
177 ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); 225 ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
178 ip->i_inode.i_atime.tv_nsec = 0; 226 ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
179 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 227 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
180 ip->i_inode.i_mtime.tv_nsec = 0; 228 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
181 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 229 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
182 ip->i_inode.i_ctime.tv_nsec = 0; 230 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
183 231
184 di->di_goal_meta = be64_to_cpu(str->di_goal_meta); 232 di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
185 di->di_goal_data = be64_to_cpu(str->di_goal_data); 233 di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -247,7 +295,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
247 if (error) 295 if (error)
248 goto out_qs; 296 goto out_qs;
249 297
250 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); 298 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
251 if (!rgd) { 299 if (!rgd) {
252 gfs2_consist_inode(ip); 300 gfs2_consist_inode(ip);
253 error = -EIO; 301 error = -EIO;
@@ -314,7 +362,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
314 else 362 else
315 drop_nlink(&ip->i_inode); 363 drop_nlink(&ip->i_inode);
316 364
317 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 365 ip->i_inode.i_ctime = CURRENT_TIME;
318 366
319 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 367 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
320 gfs2_dinode_out(ip, dibh->b_data); 368 gfs2_dinode_out(ip, dibh->b_data);
@@ -366,9 +414,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
366 struct super_block *sb = dir->i_sb; 414 struct super_block *sb = dir->i_sb;
367 struct gfs2_inode *dip = GFS2_I(dir); 415 struct gfs2_inode *dip = GFS2_I(dir);
368 struct gfs2_holder d_gh; 416 struct gfs2_holder d_gh;
369 struct gfs2_inum_host inum; 417 int error = 0;
370 unsigned int type;
371 int error;
372 struct inode *inode = NULL; 418 struct inode *inode = NULL;
373 int unlock = 0; 419 int unlock = 0;
374 420
@@ -395,12 +441,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
395 goto out; 441 goto out;
396 } 442 }
397 443
398 error = gfs2_dir_search(dir, name, &inum, &type); 444 inode = gfs2_dir_search(dir, name);
399 if (error) 445 if (IS_ERR(inode))
400 goto out; 446 error = PTR_ERR(inode);
401
402 inode = gfs2_inode_lookup(sb, &inum, type);
403
404out: 447out:
405 if (unlock) 448 if (unlock)
406 gfs2_glock_dq_uninit(&d_gh); 449 gfs2_glock_dq_uninit(&d_gh);
@@ -409,6 +452,22 @@ out:
409 return inode ? inode : ERR_PTR(error); 452 return inode ? inode : ERR_PTR(error);
410} 453}
411 454
455static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
456{
457 const struct gfs2_inum_range *str = buf;
458
459 ir->ir_start = be64_to_cpu(str->ir_start);
460 ir->ir_length = be64_to_cpu(str->ir_length);
461}
462
463static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
464{
465 struct gfs2_inum_range *str = buf;
466
467 str->ir_start = cpu_to_be64(ir->ir_start);
468 str->ir_length = cpu_to_be64(ir->ir_length);
469}
470
412static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) 471static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
413{ 472{
414 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 473 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
@@ -548,7 +607,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
548 if (!dip->i_inode.i_nlink) 607 if (!dip->i_inode.i_nlink)
549 return -EPERM; 608 return -EPERM;
550 609
551 error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL); 610 error = gfs2_dir_check(&dip->i_inode, name, NULL);
552 switch (error) { 611 switch (error) {
553 case -ENOENT: 612 case -ENOENT:
554 error = 0; 613 error = 0;
@@ -588,8 +647,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
588 *gid = current->fsgid; 647 *gid = current->fsgid;
589} 648}
590 649
591static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum, 650static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
592 u64 *generation)
593{ 651{
594 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 652 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
595 int error; 653 int error;
@@ -605,7 +663,7 @@ static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
605 if (error) 663 if (error)
606 goto out_ipreserv; 664 goto out_ipreserv;
607 665
608 inum->no_addr = gfs2_alloc_di(dip, generation); 666 *no_addr = gfs2_alloc_di(dip, generation);
609 667
610 gfs2_trans_end(sdp); 668 gfs2_trans_end(sdp);
611 669
@@ -635,6 +693,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
635 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 693 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
636 struct gfs2_dinode *di; 694 struct gfs2_dinode *di;
637 struct buffer_head *dibh; 695 struct buffer_head *dibh;
696 struct timespec tv = CURRENT_TIME;
638 697
639 dibh = gfs2_meta_new(gl, inum->no_addr); 698 dibh = gfs2_meta_new(gl, inum->no_addr);
640 gfs2_trans_add_bh(gl, dibh, 1); 699 gfs2_trans_add_bh(gl, dibh, 1);
@@ -650,7 +709,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
650 di->di_nlink = 0; 709 di->di_nlink = 0;
651 di->di_size = 0; 710 di->di_size = 0;
652 di->di_blocks = cpu_to_be64(1); 711 di->di_blocks = cpu_to_be64(1);
653 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds()); 712 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
654 di->di_major = cpu_to_be32(MAJOR(dev)); 713 di->di_major = cpu_to_be32(MAJOR(dev));
655 di->di_minor = cpu_to_be32(MINOR(dev)); 714 di->di_minor = cpu_to_be32(MINOR(dev));
656 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 715 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
@@ -680,6 +739,9 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
680 di->di_entries = 0; 739 di->di_entries = 0;
681 memset(&di->__pad4, 0, sizeof(di->__pad4)); 740 memset(&di->__pad4, 0, sizeof(di->__pad4));
682 di->di_eattr = 0; 741 di->di_eattr = 0;
742 di->di_atime_nsec = cpu_to_be32(tv.tv_nsec);
743 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
744 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
683 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 745 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
684 746
685 brelse(dibh); 747 brelse(dibh);
@@ -749,7 +811,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
749 goto fail_quota_locks; 811 goto fail_quota_locks;
750 812
751 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 813 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
752 al->al_rgd->rd_ri.ri_length + 814 al->al_rgd->rd_length +
753 2 * RES_DINODE + 815 2 * RES_DINODE +
754 RES_STATFS + RES_QUOTA, 0); 816 RES_STATFS + RES_QUOTA, 0);
755 if (error) 817 if (error)
@@ -760,7 +822,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
760 goto fail_quota_locks; 822 goto fail_quota_locks;
761 } 823 }
762 824
763 error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode)); 825 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
764 if (error) 826 if (error)
765 goto fail_end_trans; 827 goto fail_end_trans;
766 828
@@ -840,11 +902,11 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
840struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 902struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
841 unsigned int mode, dev_t dev) 903 unsigned int mode, dev_t dev)
842{ 904{
843 struct inode *inode; 905 struct inode *inode = NULL;
844 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 906 struct gfs2_inode *dip = ghs->gh_gl->gl_object;
845 struct inode *dir = &dip->i_inode; 907 struct inode *dir = &dip->i_inode;
846 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 908 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
847 struct gfs2_inum_host inum; 909 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
848 int error; 910 int error;
849 u64 generation; 911 u64 generation;
850 912
@@ -864,7 +926,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
864 if (error) 926 if (error)
865 goto fail_gunlock; 927 goto fail_gunlock;
866 928
867 error = alloc_dinode(dip, &inum, &generation); 929 error = alloc_dinode(dip, &inum.no_addr, &generation);
868 if (error) 930 if (error)
869 goto fail_gunlock; 931 goto fail_gunlock;
870 932
@@ -877,34 +939,36 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
877 if (error) 939 if (error)
878 goto fail_gunlock2; 940 goto fail_gunlock2;
879 941
880 inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode)); 942 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
943 inum.no_addr,
944 inum.no_formal_ino);
881 if (IS_ERR(inode)) 945 if (IS_ERR(inode))
882 goto fail_gunlock2; 946 goto fail_gunlock2;
883 947
884 error = gfs2_inode_refresh(GFS2_I(inode)); 948 error = gfs2_inode_refresh(GFS2_I(inode));
885 if (error) 949 if (error)
886 goto fail_iput; 950 goto fail_gunlock2;
887 951
888 error = gfs2_acl_create(dip, GFS2_I(inode)); 952 error = gfs2_acl_create(dip, GFS2_I(inode));
889 if (error) 953 if (error)
890 goto fail_iput; 954 goto fail_gunlock2;
891 955
892 error = gfs2_security_init(dip, GFS2_I(inode)); 956 error = gfs2_security_init(dip, GFS2_I(inode));
893 if (error) 957 if (error)
894 goto fail_iput; 958 goto fail_gunlock2;
895 959
896 error = link_dinode(dip, name, GFS2_I(inode)); 960 error = link_dinode(dip, name, GFS2_I(inode));
897 if (error) 961 if (error)
898 goto fail_iput; 962 goto fail_gunlock2;
899 963
900 if (!inode) 964 if (!inode)
901 return ERR_PTR(-ENOMEM); 965 return ERR_PTR(-ENOMEM);
902 return inode; 966 return inode;
903 967
904fail_iput:
905 iput(inode);
906fail_gunlock2: 968fail_gunlock2:
907 gfs2_glock_dq_uninit(ghs + 1); 969 gfs2_glock_dq_uninit(ghs + 1);
970 if (inode)
971 iput(inode);
908fail_gunlock: 972fail_gunlock:
909 gfs2_glock_dq(ghs); 973 gfs2_glock_dq(ghs);
910fail: 974fail:
@@ -976,10 +1040,8 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
976 */ 1040 */
977 1041
978int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 1042int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
979 struct gfs2_inode *ip) 1043 const struct gfs2_inode *ip)
980{ 1044{
981 struct gfs2_inum_host inum;
982 unsigned int type;
983 int error; 1045 int error;
984 1046
985 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) 1047 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
@@ -997,18 +1059,10 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
997 if (error) 1059 if (error)
998 return error; 1060 return error;
999 1061
1000 error = gfs2_dir_search(&dip->i_inode, name, &inum, &type); 1062 error = gfs2_dir_check(&dip->i_inode, name, ip);
1001 if (error) 1063 if (error)
1002 return error; 1064 return error;
1003 1065
1004 if (!gfs2_inum_equal(&inum, &ip->i_num))
1005 return -ENOENT;
1006
1007 if (IF2DT(ip->i_inode.i_mode) != type) {
1008 gfs2_consist_inode(dip);
1009 return -EIO;
1010 }
1011
1012 return 0; 1066 return 0;
1013} 1067}
1014 1068
@@ -1132,10 +1186,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1132 struct gfs2_glock *gl = gh->gh_gl; 1186 struct gfs2_glock *gl = gh->gh_gl;
1133 struct gfs2_sbd *sdp = gl->gl_sbd; 1187 struct gfs2_sbd *sdp = gl->gl_sbd;
1134 struct gfs2_inode *ip = gl->gl_object; 1188 struct gfs2_inode *ip = gl->gl_object;
1135 s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum); 1189 s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
1136 unsigned int state; 1190 unsigned int state;
1137 int flags; 1191 int flags;
1138 int error; 1192 int error;
1193 struct timespec tv = CURRENT_TIME;
1139 1194
1140 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || 1195 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
1141 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || 1196 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
@@ -1153,8 +1208,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1153 (sdp->sd_vfs->s_flags & MS_RDONLY)) 1208 (sdp->sd_vfs->s_flags & MS_RDONLY))
1154 return 0; 1209 return 0;
1155 1210
1156 curtime = get_seconds(); 1211 if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
1157 if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
1158 gfs2_glock_dq(gh); 1212 gfs2_glock_dq(gh);
1159 gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, 1213 gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
1160 gh); 1214 gh);
@@ -1165,8 +1219,8 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1165 /* Verify that atime hasn't been updated while we were 1219 /* Verify that atime hasn't been updated while we were
1166 trying to get exclusive lock. */ 1220 trying to get exclusive lock. */
1167 1221
1168 curtime = get_seconds(); 1222 tv = CURRENT_TIME;
1169 if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) { 1223 if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
1170 struct buffer_head *dibh; 1224 struct buffer_head *dibh;
1171 struct gfs2_dinode *di; 1225 struct gfs2_dinode *di;
1172 1226
@@ -1180,11 +1234,12 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1180 if (error) 1234 if (error)
1181 goto fail_end_trans; 1235 goto fail_end_trans;
1182 1236
1183 ip->i_inode.i_atime.tv_sec = curtime; 1237 ip->i_inode.i_atime = tv;
1184 1238
1185 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1239 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1186 di = (struct gfs2_dinode *)dibh->b_data; 1240 di = (struct gfs2_dinode *)dibh->b_data;
1187 di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1241 di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
1242 di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
1188 brelse(dibh); 1243 brelse(dibh);
1189 1244
1190 gfs2_trans_end(sdp); 1245 gfs2_trans_end(sdp);
@@ -1252,3 +1307,66 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1252 return error; 1307 return error;
1253} 1308}
1254 1309
1310void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
1311{
1312 const struct gfs2_dinode_host *di = &ip->i_di;
1313 struct gfs2_dinode *str = buf;
1314
1315 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
1316 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
1317 str->di_header.__pad0 = 0;
1318 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
1319 str->di_header.__pad1 = 0;
1320 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
1321 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
1322 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
1323 str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
1324 str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
1325 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
1326 str->di_size = cpu_to_be64(di->di_size);
1327 str->di_blocks = cpu_to_be64(di->di_blocks);
1328 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
1329 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
1330 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
1331
1332 str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
1333 str->di_goal_data = cpu_to_be64(di->di_goal_data);
1334 str->di_generation = cpu_to_be64(di->di_generation);
1335
1336 str->di_flags = cpu_to_be32(di->di_flags);
1337 str->di_height = cpu_to_be16(di->di_height);
1338 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
1339 !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
1340 GFS2_FORMAT_DE : 0);
1341 str->di_depth = cpu_to_be16(di->di_depth);
1342 str->di_entries = cpu_to_be32(di->di_entries);
1343
1344 str->di_eattr = cpu_to_be64(di->di_eattr);
1345 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
1346 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
1347 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
1348}
1349
1350void gfs2_dinode_print(const struct gfs2_inode *ip)
1351{
1352 const struct gfs2_dinode_host *di = &ip->i_di;
1353
1354 printk(KERN_INFO " no_formal_ino = %llu\n",
1355 (unsigned long long)ip->i_no_formal_ino);
1356 printk(KERN_INFO " no_addr = %llu\n",
1357 (unsigned long long)ip->i_no_addr);
1358 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
1359 printk(KERN_INFO " di_blocks = %llu\n",
1360 (unsigned long long)di->di_blocks);
1361 printk(KERN_INFO " di_goal_meta = %llu\n",
1362 (unsigned long long)di->di_goal_meta);
1363 printk(KERN_INFO " di_goal_data = %llu\n",
1364 (unsigned long long)di->di_goal_data);
1365 printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags);
1366 printk(KERN_INFO " di_height = %u\n", di->di_height);
1367 printk(KERN_INFO " di_depth = %u\n", di->di_depth);
1368 printk(KERN_INFO " di_entries = %u\n", di->di_entries);
1369 printk(KERN_INFO " di_eattr = %llu\n",
1370 (unsigned long long)di->di_eattr);
1371}
1372
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index b57f448b15bc..4517ac82c01c 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -10,17 +10,17 @@
10#ifndef __INODE_DOT_H__ 10#ifndef __INODE_DOT_H__
11#define __INODE_DOT_H__ 11#define __INODE_DOT_H__
12 12
13static inline int gfs2_is_stuffed(struct gfs2_inode *ip) 13static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
14{ 14{
15 return !ip->i_di.di_height; 15 return !ip->i_di.di_height;
16} 16}
17 17
18static inline int gfs2_is_jdata(struct gfs2_inode *ip) 18static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
19{ 19{
20 return ip->i_di.di_flags & GFS2_DIF_JDATA; 20 return ip->i_di.di_flags & GFS2_DIF_JDATA;
21} 21}
22 22
23static inline int gfs2_is_dir(struct gfs2_inode *ip) 23static inline int gfs2_is_dir(const struct gfs2_inode *ip)
24{ 24{
25 return S_ISDIR(ip->i_inode.i_mode); 25 return S_ISDIR(ip->i_inode.i_mode);
26} 26}
@@ -32,9 +32,25 @@ static inline void gfs2_set_inode_blocks(struct inode *inode)
32 (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); 32 (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
33} 33}
34 34
35static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr,
36 u64 no_formal_ino)
37{
38 return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino;
39}
40
41static inline void gfs2_inum_out(const struct gfs2_inode *ip,
42 struct gfs2_dirent *dent)
43{
44 dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
45 dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr);
46}
47
48
35void gfs2_inode_attr_in(struct gfs2_inode *ip); 49void gfs2_inode_attr_in(struct gfs2_inode *ip);
36struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type); 50void gfs2_set_iop(struct inode *inode);
37struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum); 51struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
52 u64 no_addr, u64 no_formal_ino);
53struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
38 54
39int gfs2_inode_refresh(struct gfs2_inode *ip); 55int gfs2_inode_refresh(struct gfs2_inode *ip);
40 56
@@ -47,12 +63,14 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
47int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, 63int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
48 struct gfs2_inode *ip); 64 struct gfs2_inode *ip);
49int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 65int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
50 struct gfs2_inode *ip); 66 const struct gfs2_inode *ip);
51int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); 67int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
52int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); 68int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
53int gfs2_glock_nq_atime(struct gfs2_holder *gh); 69int gfs2_glock_nq_atime(struct gfs2_holder *gh);
54int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 70int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
55struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 71struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
72void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
73void gfs2_dinode_print(const struct gfs2_inode *ip);
56 74
57#endif /* __INODE_DOT_H__ */ 75#endif /* __INODE_DOT_H__ */
58 76
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
index c305255bfe8a..542a797ac89a 100644
--- a/fs/gfs2/locking/dlm/lock.c
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -174,7 +174,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
174 lp->cur = DLM_LOCK_IV; 174 lp->cur = DLM_LOCK_IV;
175 lp->lvb = NULL; 175 lp->lvb = NULL;
176 lp->hold_null = NULL; 176 lp->hold_null = NULL;
177 init_completion(&lp->ast_wait);
178 INIT_LIST_HEAD(&lp->clist); 177 INIT_LIST_HEAD(&lp->clist);
179 INIT_LIST_HEAD(&lp->blist); 178 INIT_LIST_HEAD(&lp->blist);
180 INIT_LIST_HEAD(&lp->delay_list); 179 INIT_LIST_HEAD(&lp->delay_list);
@@ -399,6 +398,12 @@ static void gdlm_del_lvb(struct gdlm_lock *lp)
399 lp->lksb.sb_lvbptr = NULL; 398 lp->lksb.sb_lvbptr = NULL;
400} 399}
401 400
401static int gdlm_ast_wait(void *word)
402{
403 schedule();
404 return 0;
405}
406
402/* This can do a synchronous dlm request (requiring a lock_dlm thread to get 407/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
403 the completion) because gfs won't call hold_lvb() during a callback (from 408 the completion) because gfs won't call hold_lvb() during a callback (from
404 the context of a lock_dlm thread). */ 409 the context of a lock_dlm thread). */
@@ -424,10 +429,10 @@ static int hold_null_lock(struct gdlm_lock *lp)
424 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE; 429 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
425 set_bit(LFL_NOBAST, &lpn->flags); 430 set_bit(LFL_NOBAST, &lpn->flags);
426 set_bit(LFL_INLOCK, &lpn->flags); 431 set_bit(LFL_INLOCK, &lpn->flags);
432 set_bit(LFL_AST_WAIT, &lpn->flags);
427 433
428 init_completion(&lpn->ast_wait);
429 gdlm_do_lock(lpn); 434 gdlm_do_lock(lpn);
430 wait_for_completion(&lpn->ast_wait); 435 wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
431 error = lpn->lksb.sb_status; 436 error = lpn->lksb.sb_status;
432 if (error) { 437 if (error) {
433 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n", 438 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index d074c6e6f9bf..24d70f73b651 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -101,6 +101,7 @@ enum {
101 LFL_NOBAST = 10, 101 LFL_NOBAST = 10,
102 LFL_HEADQUE = 11, 102 LFL_HEADQUE = 11,
103 LFL_UNLOCK_DELETE = 12, 103 LFL_UNLOCK_DELETE = 12,
104 LFL_AST_WAIT = 13,
104}; 105};
105 106
106struct gdlm_lock { 107struct gdlm_lock {
@@ -117,7 +118,6 @@ struct gdlm_lock {
117 unsigned long flags; /* lock_dlm flags LFL_ */ 118 unsigned long flags; /* lock_dlm flags LFL_ */
118 119
119 int bast_mode; /* protected by async_lock */ 120 int bast_mode; /* protected by async_lock */
120 struct completion ast_wait;
121 121
122 struct list_head clist; /* complete */ 122 struct list_head clist; /* complete */
123 struct list_head blist; /* blocking */ 123 struct list_head blist; /* blocking */
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 1d8faa3da8af..41c5b04caaba 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -147,7 +147,7 @@ static int gdlm_mount(char *table_name, char *host_data,
147 147
148 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), 148 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
149 &ls->dlm_lockspace, 149 &ls->dlm_lockspace,
150 nodir ? DLM_LSFL_NODIR : 0, 150 DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0),
151 GDLM_LVB_SIZE); 151 GDLM_LVB_SIZE);
152 if (error) { 152 if (error) {
153 log_error("dlm_new_lockspace error %d", error); 153 log_error("dlm_new_lockspace error %d", error);
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index f82495e18c2d..fba1f1d87e4f 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -242,7 +242,7 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
242 op->info.number = name->ln_number; 242 op->info.number = name->ln_number;
243 op->info.start = fl->fl_start; 243 op->info.start = fl->fl_start;
244 op->info.end = fl->fl_end; 244 op->info.end = fl->fl_end;
245 245 op->info.owner = (__u64)(long) fl->fl_owner;
246 246
247 send_op(op); 247 send_op(op);
248 wait_event(recv_wq, (op->done != 0)); 248 wait_event(recv_wq, (op->done != 0));
@@ -254,16 +254,20 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
254 } 254 }
255 spin_unlock(&ops_lock); 255 spin_unlock(&ops_lock);
256 256
257 /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
258 -ENOENT if there are no locks on the file */
259
257 rv = op->info.rv; 260 rv = op->info.rv;
258 261
259 fl->fl_type = F_UNLCK; 262 fl->fl_type = F_UNLCK;
260 if (rv == -ENOENT) 263 if (rv == -ENOENT)
261 rv = 0; 264 rv = 0;
262 else if (rv == 0 && op->info.pid != fl->fl_pid) { 265 else if (rv > 0) {
263 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; 266 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
264 fl->fl_pid = op->info.pid; 267 fl->fl_pid = op->info.pid;
265 fl->fl_start = op->info.start; 268 fl->fl_start = op->info.start;
266 fl->fl_end = op->info.end; 269 fl->fl_end = op->info.end;
270 rv = 0;
267 } 271 }
268 272
269 kfree(op); 273 kfree(op);
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index 9cf1f168eaf8..1aca51e45092 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -44,6 +44,13 @@ static void process_blocking(struct gdlm_lock *lp, int bast_mode)
44 ls->fscb(ls->sdp, cb, &lp->lockname); 44 ls->fscb(ls->sdp, cb, &lp->lockname);
45} 45}
46 46
47static void wake_up_ast(struct gdlm_lock *lp)
48{
49 clear_bit(LFL_AST_WAIT, &lp->flags);
50 smp_mb__after_clear_bit();
51 wake_up_bit(&lp->flags, LFL_AST_WAIT);
52}
53
47static void process_complete(struct gdlm_lock *lp) 54static void process_complete(struct gdlm_lock *lp)
48{ 55{
49 struct gdlm_ls *ls = lp->ls; 56 struct gdlm_ls *ls = lp->ls;
@@ -136,7 +143,7 @@ static void process_complete(struct gdlm_lock *lp)
136 */ 143 */
137 144
138 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { 145 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
139 complete(&lp->ast_wait); 146 wake_up_ast(lp);
140 return; 147 return;
141 } 148 }
142 149
@@ -214,7 +221,7 @@ out:
214 if (test_bit(LFL_INLOCK, &lp->flags)) { 221 if (test_bit(LFL_INLOCK, &lp->flags)) {
215 clear_bit(LFL_NOBLOCK, &lp->flags); 222 clear_bit(LFL_NOBLOCK, &lp->flags);
216 lp->cur = lp->req; 223 lp->cur = lp->req;
217 complete(&lp->ast_wait); 224 wake_up_ast(lp);
218 return; 225 return;
219 } 226 }
220 227
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 291415ddfe51..f49a12e24086 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -83,6 +83,11 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
83 83
84 gfs2_assert(sdp, bd->bd_ail == ai); 84 gfs2_assert(sdp, bd->bd_ail == ai);
85 85
86 if (!bh){
87 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
88 continue;
89 }
90
86 if (!buffer_busy(bh)) { 91 if (!buffer_busy(bh)) {
87 if (!buffer_uptodate(bh)) { 92 if (!buffer_uptodate(bh)) {
88 gfs2_log_unlock(sdp); 93 gfs2_log_unlock(sdp);
@@ -125,6 +130,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
125 bd_ail_st_list) { 130 bd_ail_st_list) {
126 bh = bd->bd_bh; 131 bh = bd->bd_bh;
127 132
133 if (!bh){
134 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
135 continue;
136 }
137
128 gfs2_assert(sdp, bd->bd_ail == ai); 138 gfs2_assert(sdp, bd->bd_ail == ai);
129 139
130 if (buffer_busy(bh)) { 140 if (buffer_busy(bh)) {
@@ -262,8 +272,8 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
262 * @sdp: The GFS2 superblock 272 * @sdp: The GFS2 superblock
263 * @blks: The number of blocks to reserve 273 * @blks: The number of blocks to reserve
264 * 274 *
265 * Note that we never give out the last 6 blocks of the journal. Thats 275 * Note that we never give out the last few blocks of the journal. Thats
266 * due to the fact that there is are a small number of header blocks 276 * due to the fact that there is a small number of header blocks
267 * associated with each log flush. The exact number can't be known until 277 * associated with each log flush. The exact number can't be known until
268 * flush time, so we ensure that we have just enough free blocks at all 278 * flush time, so we ensure that we have just enough free blocks at all
269 * times to avoid running out during a log flush. 279 * times to avoid running out during a log flush.
@@ -274,6 +284,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
274int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) 284int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
275{ 285{
276 unsigned int try = 0; 286 unsigned int try = 0;
287 unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
277 288
278 if (gfs2_assert_warn(sdp, blks) || 289 if (gfs2_assert_warn(sdp, blks) ||
279 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) 290 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
@@ -281,7 +292,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
281 292
282 mutex_lock(&sdp->sd_log_reserve_mutex); 293 mutex_lock(&sdp->sd_log_reserve_mutex);
283 gfs2_log_lock(sdp); 294 gfs2_log_lock(sdp);
284 while(sdp->sd_log_blks_free <= (blks + 6)) { 295 while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
285 gfs2_log_unlock(sdp); 296 gfs2_log_unlock(sdp);
286 gfs2_ail1_empty(sdp, 0); 297 gfs2_ail1_empty(sdp, 0);
287 gfs2_log_flush(sdp, NULL); 298 gfs2_log_flush(sdp, NULL);
@@ -357,6 +368,58 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
357 return dist; 368 return dist;
358} 369}
359 370
371/**
372 * calc_reserved - Calculate the number of blocks to reserve when
373 * refunding a transaction's unused buffers.
374 * @sdp: The GFS2 superblock
375 *
376 * This is complex. We need to reserve room for all our currently used
377 * metadata buffers (e.g. normal file I/O rewriting file time stamps) and
378 * all our journaled data buffers for journaled files (e.g. files in the
379 * meta_fs like rindex, or files for which chattr +j was done.)
380 * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
381 * will count it as free space (sd_log_blks_free) and corruption will follow.
382 *
383 * We can have metadata bufs and jdata bufs in the same journal. So each
384 * type gets its own log header, for which we need to reserve a block.
385 * In fact, each type has the potential for needing more than one header
386 * in cases where we have more buffers than will fit on a journal page.
387 * Metadata journal entries take up half the space of journaled buffer entries.
388 * Thus, metadata entries have buf_limit (502) and journaled buffers have
389 * databuf_limit (251) before they cause a wrap around.
390 *
391 * Also, we need to reserve blocks for revoke journal entries and one for an
392 * overall header for the lot.
393 *
394 * Returns: the number of blocks reserved
395 */
396static unsigned int calc_reserved(struct gfs2_sbd *sdp)
397{
398 unsigned int reserved = 0;
399 unsigned int mbuf_limit, metabufhdrs_needed;
400 unsigned int dbuf_limit, databufhdrs_needed;
401 unsigned int revokes = 0;
402
403 mbuf_limit = buf_limit(sdp);
404 metabufhdrs_needed = (sdp->sd_log_commited_buf +
405 (mbuf_limit - 1)) / mbuf_limit;
406 dbuf_limit = databuf_limit(sdp);
407 databufhdrs_needed = (sdp->sd_log_commited_databuf +
408 (dbuf_limit - 1)) / dbuf_limit;
409
410 if (sdp->sd_log_commited_revoke)
411 revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
412 sizeof(u64));
413
414 reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
415 sdp->sd_log_commited_databuf + databufhdrs_needed +
416 revokes;
417 /* One for the overall header */
418 if (reserved)
419 reserved++;
420 return reserved;
421}
422
360static unsigned int current_tail(struct gfs2_sbd *sdp) 423static unsigned int current_tail(struct gfs2_sbd *sdp)
361{ 424{
362 struct gfs2_ail *ai; 425 struct gfs2_ail *ai;
@@ -447,14 +510,14 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
447 return bh; 510 return bh;
448} 511}
449 512
450static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull) 513static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
451{ 514{
452 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); 515 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
453 516
454 ail2_empty(sdp, new_tail); 517 ail2_empty(sdp, new_tail);
455 518
456 gfs2_log_lock(sdp); 519 gfs2_log_lock(sdp);
457 sdp->sd_log_blks_free += dist - (pull ? 1 : 0); 520 sdp->sd_log_blks_free += dist;
458 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); 521 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
459 gfs2_log_unlock(sdp); 522 gfs2_log_unlock(sdp);
460 523
@@ -504,7 +567,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
504 brelse(bh); 567 brelse(bh);
505 568
506 if (sdp->sd_log_tail != tail) 569 if (sdp->sd_log_tail != tail)
507 log_pull_tail(sdp, tail, pull); 570 log_pull_tail(sdp, tail);
508 else 571 else
509 gfs2_assert_withdraw(sdp, !pull); 572 gfs2_assert_withdraw(sdp, !pull);
510 573
@@ -517,6 +580,7 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
517 struct list_head *head = &sdp->sd_log_flush_list; 580 struct list_head *head = &sdp->sd_log_flush_list;
518 struct gfs2_log_buf *lb; 581 struct gfs2_log_buf *lb;
519 struct buffer_head *bh; 582 struct buffer_head *bh;
583 int flushcount = 0;
520 584
521 while (!list_empty(head)) { 585 while (!list_empty(head)) {
522 lb = list_entry(head->next, struct gfs2_log_buf, lb_list); 586 lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
@@ -533,9 +597,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
533 } else 597 } else
534 brelse(bh); 598 brelse(bh);
535 kfree(lb); 599 kfree(lb);
600 flushcount++;
536 } 601 }
537 602
538 log_write_header(sdp, 0, 0); 603 /* If nothing was journaled, the header is unplanned and unwanted. */
604 if (flushcount) {
605 log_write_header(sdp, 0, 0);
606 } else {
607 unsigned int tail;
608 tail = current_tail(sdp);
609
610 gfs2_ail1_empty(sdp, 0);
611 if (sdp->sd_log_tail != tail)
612 log_pull_tail(sdp, tail);
613 }
539} 614}
540 615
541/** 616/**
@@ -565,7 +640,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
565 INIT_LIST_HEAD(&ai->ai_ail1_list); 640 INIT_LIST_HEAD(&ai->ai_ail1_list);
566 INIT_LIST_HEAD(&ai->ai_ail2_list); 641 INIT_LIST_HEAD(&ai->ai_ail2_list);
567 642
568 gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf); 643 gfs2_assert_withdraw(sdp,
644 sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
645 sdp->sd_log_commited_buf +
646 sdp->sd_log_commited_databuf);
569 gfs2_assert_withdraw(sdp, 647 gfs2_assert_withdraw(sdp,
570 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); 648 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
571 649
@@ -576,16 +654,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
576 lops_before_commit(sdp); 654 lops_before_commit(sdp);
577 if (!list_empty(&sdp->sd_log_flush_list)) 655 if (!list_empty(&sdp->sd_log_flush_list))
578 log_flush_commit(sdp); 656 log_flush_commit(sdp);
579 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle) 657 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
658 gfs2_log_lock(sdp);
659 sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
660 gfs2_log_unlock(sdp);
580 log_write_header(sdp, 0, PULL); 661 log_write_header(sdp, 0, PULL);
662 }
581 lops_after_commit(sdp, ai); 663 lops_after_commit(sdp, ai);
582 664
583 gfs2_log_lock(sdp); 665 gfs2_log_lock(sdp);
584 sdp->sd_log_head = sdp->sd_log_flush_head; 666 sdp->sd_log_head = sdp->sd_log_flush_head;
585 sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
586 sdp->sd_log_blks_reserved = 0; 667 sdp->sd_log_blks_reserved = 0;
587 sdp->sd_log_commited_buf = 0; 668 sdp->sd_log_commited_buf = 0;
588 sdp->sd_log_num_hdrs = 0; 669 sdp->sd_log_commited_databuf = 0;
589 sdp->sd_log_commited_revoke = 0; 670 sdp->sd_log_commited_revoke = 0;
590 671
591 if (!list_empty(&ai->ai_ail1_list)) { 672 if (!list_empty(&ai->ai_ail1_list)) {
@@ -602,32 +683,26 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
602 683
603static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) 684static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
604{ 685{
605 unsigned int reserved = 0; 686 unsigned int reserved;
606 unsigned int old; 687 unsigned int old;
607 688
608 gfs2_log_lock(sdp); 689 gfs2_log_lock(sdp);
609 690
610 sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm; 691 sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
611 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0); 692 sdp->sd_log_commited_databuf += tr->tr_num_databuf_new -
693 tr->tr_num_databuf_rm;
694 gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
695 (((int)sdp->sd_log_commited_databuf) >= 0));
612 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; 696 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
613 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); 697 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
614 698 reserved = calc_reserved(sdp);
615 if (sdp->sd_log_commited_buf)
616 reserved += sdp->sd_log_commited_buf;
617 if (sdp->sd_log_commited_revoke)
618 reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
619 sizeof(u64));
620 if (reserved)
621 reserved++;
622
623 old = sdp->sd_log_blks_free; 699 old = sdp->sd_log_blks_free;
624 sdp->sd_log_blks_free += tr->tr_reserved - 700 sdp->sd_log_blks_free += tr->tr_reserved -
625 (reserved - sdp->sd_log_blks_reserved); 701 (reserved - sdp->sd_log_blks_reserved);
626 702
627 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old); 703 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
628 gfs2_assert_withdraw(sdp, 704 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
629 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks + 705 sdp->sd_jdesc->jd_blocks);
630 sdp->sd_log_num_hdrs);
631 706
632 sdp->sd_log_blks_reserved = reserved; 707 sdp->sd_log_blks_reserved = reserved;
633 708
@@ -673,13 +748,13 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
673 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 748 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
674 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); 749 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
675 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); 750 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
676 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
677 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); 751 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
678 752
679 sdp->sd_log_flush_head = sdp->sd_log_head; 753 sdp->sd_log_flush_head = sdp->sd_log_head;
680 sdp->sd_log_flush_wrapped = 0; 754 sdp->sd_log_flush_wrapped = 0;
681 755
682 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0); 756 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
757 (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
683 758
684 gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks); 759 gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
685 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); 760 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index f82d84d05d23..aff70f0698fd 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -17,6 +17,7 @@
17 17
18#include "gfs2.h" 18#include "gfs2.h"
19#include "incore.h" 19#include "incore.h"
20#include "inode.h"
20#include "glock.h" 21#include "glock.h"
21#include "log.h" 22#include "log.h"
22#include "lops.h" 23#include "lops.h"
@@ -117,15 +118,13 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
117 struct gfs2_log_descriptor *ld; 118 struct gfs2_log_descriptor *ld;
118 struct gfs2_bufdata *bd1 = NULL, *bd2; 119 struct gfs2_bufdata *bd1 = NULL, *bd2;
119 unsigned int total = sdp->sd_log_num_buf; 120 unsigned int total = sdp->sd_log_num_buf;
120 unsigned int offset = sizeof(struct gfs2_log_descriptor); 121 unsigned int offset = BUF_OFFSET;
121 unsigned int limit; 122 unsigned int limit;
122 unsigned int num; 123 unsigned int num;
123 unsigned n; 124 unsigned n;
124 __be64 *ptr; 125 __be64 *ptr;
125 126
126 offset += sizeof(__be64) - 1; 127 limit = buf_limit(sdp);
127 offset &= ~(sizeof(__be64) - 1);
128 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
129 /* for 4k blocks, limit = 503 */ 128 /* for 4k blocks, limit = 503 */
130 129
131 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); 130 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
@@ -134,7 +133,6 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
134 if (total > limit) 133 if (total > limit)
135 num = limit; 134 num = limit;
136 bh = gfs2_log_get_buf(sdp); 135 bh = gfs2_log_get_buf(sdp);
137 sdp->sd_log_num_hdrs++;
138 ld = (struct gfs2_log_descriptor *)bh->b_data; 136 ld = (struct gfs2_log_descriptor *)bh->b_data;
139 ptr = (__be64 *)(bh->b_data + offset); 137 ptr = (__be64 *)(bh->b_data + offset);
140 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 138 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -469,25 +467,28 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
469 struct gfs2_inode *ip = GFS2_I(mapping->host); 467 struct gfs2_inode *ip = GFS2_I(mapping->host);
470 468
471 gfs2_log_lock(sdp); 469 gfs2_log_lock(sdp);
470 if (!list_empty(&bd->bd_list_tr)) {
471 gfs2_log_unlock(sdp);
472 return;
473 }
472 tr->tr_touched = 1; 474 tr->tr_touched = 1;
473 if (list_empty(&bd->bd_list_tr) && 475 if (gfs2_is_jdata(ip)) {
474 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
475 tr->tr_num_buf++; 476 tr->tr_num_buf++;
476 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 477 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
477 gfs2_log_unlock(sdp);
478 gfs2_pin(sdp, bd->bd_bh);
479 tr->tr_num_buf_new++;
480 } else {
481 gfs2_log_unlock(sdp);
482 } 478 }
479 gfs2_log_unlock(sdp);
480 if (!list_empty(&le->le_list))
481 return;
482
483 gfs2_trans_add_gl(bd->bd_gl); 483 gfs2_trans_add_gl(bd->bd_gl);
484 gfs2_log_lock(sdp); 484 if (gfs2_is_jdata(ip)) {
485 if (list_empty(&le->le_list)) { 485 sdp->sd_log_num_jdata++;
486 if (ip->i_di.di_flags & GFS2_DIF_JDATA) 486 gfs2_pin(sdp, bd->bd_bh);
487 sdp->sd_log_num_jdata++; 487 tr->tr_num_databuf_new++;
488 sdp->sd_log_num_databuf++;
489 list_add(&le->le_list, &sdp->sd_log_le_databuf);
490 } 488 }
489 sdp->sd_log_num_databuf++;
490 gfs2_log_lock(sdp);
491 list_add(&le->le_list, &sdp->sd_log_le_databuf);
491 gfs2_log_unlock(sdp); 492 gfs2_log_unlock(sdp);
492} 493}
493 494
@@ -520,7 +521,6 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
520 LIST_HEAD(started); 521 LIST_HEAD(started);
521 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; 522 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
522 struct buffer_head *bh = NULL,*bh1 = NULL; 523 struct buffer_head *bh = NULL,*bh1 = NULL;
523 unsigned int offset = sizeof(struct gfs2_log_descriptor);
524 struct gfs2_log_descriptor *ld; 524 struct gfs2_log_descriptor *ld;
525 unsigned int limit; 525 unsigned int limit;
526 unsigned int total_dbuf = sdp->sd_log_num_databuf; 526 unsigned int total_dbuf = sdp->sd_log_num_databuf;
@@ -528,9 +528,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
528 unsigned int num, n; 528 unsigned int num, n;
529 __be64 *ptr = NULL; 529 __be64 *ptr = NULL;
530 530
531 offset += 2*sizeof(__be64) - 1; 531 limit = databuf_limit(sdp);
532 offset &= ~(2*sizeof(__be64) - 1);
533 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
534 532
535 /* 533 /*
536 * Start writing ordered buffers, write journaled buffers 534 * Start writing ordered buffers, write journaled buffers
@@ -581,10 +579,10 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
581 gfs2_log_unlock(sdp); 579 gfs2_log_unlock(sdp);
582 if (!bh) { 580 if (!bh) {
583 bh = gfs2_log_get_buf(sdp); 581 bh = gfs2_log_get_buf(sdp);
584 sdp->sd_log_num_hdrs++;
585 ld = (struct gfs2_log_descriptor *) 582 ld = (struct gfs2_log_descriptor *)
586 bh->b_data; 583 bh->b_data;
587 ptr = (__be64 *)(bh->b_data + offset); 584 ptr = (__be64 *)(bh->b_data +
585 DATABUF_OFFSET);
588 ld->ld_header.mh_magic = 586 ld->ld_header.mh_magic =
589 cpu_to_be32(GFS2_MAGIC); 587 cpu_to_be32(GFS2_MAGIC);
590 ld->ld_header.mh_type = 588 ld->ld_header.mh_type =
@@ -605,7 +603,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
605 if (unlikely(magic != 0)) 603 if (unlikely(magic != 0))
606 set_buffer_escaped(bh1); 604 set_buffer_escaped(bh1);
607 gfs2_log_lock(sdp); 605 gfs2_log_lock(sdp);
608 if (n++ > num) 606 if (++n >= num)
609 break; 607 break;
610 } else if (!bh1) { 608 } else if (!bh1) {
611 total_dbuf--; 609 total_dbuf--;
@@ -622,6 +620,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
622 } 620 }
623 gfs2_log_unlock(sdp); 621 gfs2_log_unlock(sdp);
624 if (bh) { 622 if (bh) {
623 set_buffer_mapped(bh);
625 set_buffer_dirty(bh); 624 set_buffer_dirty(bh);
626 ll_rw_block(WRITE, 1, &bh); 625 ll_rw_block(WRITE, 1, &bh);
627 bh = NULL; 626 bh = NULL;
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 965bc65c7c64..41a00df75587 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -13,6 +13,13 @@
13#include <linux/list.h> 13#include <linux/list.h>
14#include "incore.h" 14#include "incore.h"
15 15
16#define BUF_OFFSET \
17 ((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
18 ~(sizeof(__be64) - 1))
19#define DATABUF_OFFSET \
20 ((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
21 ~(2 * sizeof(__be64) - 1))
22
16extern const struct gfs2_log_operations gfs2_glock_lops; 23extern const struct gfs2_log_operations gfs2_glock_lops;
17extern const struct gfs2_log_operations gfs2_buf_lops; 24extern const struct gfs2_log_operations gfs2_buf_lops;
18extern const struct gfs2_log_operations gfs2_revoke_lops; 25extern const struct gfs2_log_operations gfs2_revoke_lops;
@@ -21,6 +28,22 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
21 28
22extern const struct gfs2_log_operations *gfs2_log_ops[]; 29extern const struct gfs2_log_operations *gfs2_log_ops[];
23 30
31static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
32{
33 unsigned int limit;
34
35 limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
36 return limit;
37}
38
39static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
40{
41 unsigned int limit;
42
43 limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
44 return limit;
45}
46
24static inline void lops_init_le(struct gfs2_log_element *le, 47static inline void lops_init_le(struct gfs2_log_element *le,
25 const struct gfs2_log_operations *lops) 48 const struct gfs2_log_operations *lops)
26{ 49{
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index e62d4f620c58..8da343b34ae7 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -387,12 +387,18 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
387 387
388 if (test_clear_buffer_pinned(bh)) { 388 if (test_clear_buffer_pinned(bh)) {
389 struct gfs2_trans *tr = current->journal_info; 389 struct gfs2_trans *tr = current->journal_info;
390 struct gfs2_inode *bh_ip =
391 GFS2_I(bh->b_page->mapping->host);
392
390 gfs2_log_lock(sdp); 393 gfs2_log_lock(sdp);
391 list_del_init(&bd->bd_le.le_list); 394 list_del_init(&bd->bd_le.le_list);
392 gfs2_assert_warn(sdp, sdp->sd_log_num_buf); 395 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
393 sdp->sd_log_num_buf--; 396 sdp->sd_log_num_buf--;
394 gfs2_log_unlock(sdp); 397 gfs2_log_unlock(sdp);
395 tr->tr_num_buf_rm++; 398 if (bh_ip->i_inode.i_private != NULL)
399 tr->tr_num_databuf_rm++;
400 else
401 tr->tr_num_buf_rm++;
396 brelse(bh); 402 brelse(bh);
397 } 403 }
398 if (bd) { 404 if (bd) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index e037425bc042..527bf19d9690 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -63,7 +63,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
63static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, 63static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
64 struct buffer_head **bhp) 64 struct buffer_head **bhp)
65{ 65{
66 return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp); 66 return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp);
67} 67}
68 68
69struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen); 69struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 4864659555d4..6f006a804db3 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -82,20 +82,19 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
82 char *options, *o, *v; 82 char *options, *o, *v;
83 int error = 0; 83 int error = 0;
84 84
85 if (!remount) { 85 /* If someone preloaded options, use those instead */
86 /* If someone preloaded options, use those instead */ 86 spin_lock(&gfs2_sys_margs_lock);
87 spin_lock(&gfs2_sys_margs_lock); 87 if (!remount && gfs2_sys_margs) {
88 if (gfs2_sys_margs) { 88 data = gfs2_sys_margs;
89 data = gfs2_sys_margs; 89 gfs2_sys_margs = NULL;
90 gfs2_sys_margs = NULL;
91 }
92 spin_unlock(&gfs2_sys_margs_lock);
93
94 /* Set some defaults */
95 args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
96 args->ar_quota = GFS2_QUOTA_DEFAULT;
97 args->ar_data = GFS2_DATA_DEFAULT;
98 } 90 }
91 spin_unlock(&gfs2_sys_margs_lock);
92
93 /* Set some defaults */
94 memset(args, 0, sizeof(struct gfs2_args));
95 args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
96 args->ar_quota = GFS2_QUOTA_DEFAULT;
97 args->ar_data = GFS2_DATA_DEFAULT;
99 98
100 /* Split the options into tokens with the "," character and 99 /* Split the options into tokens with the "," character and
101 process them */ 100 process them */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
deleted file mode 100644
index d9ecfd23a49e..000000000000
--- a/fs/gfs2/ondisk.c
+++ /dev/null
@@ -1,251 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14
15#include "gfs2.h"
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include "incore.h"
19
20#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
21 struct->member);
22
23/*
24 * gfs2_xxx_in - read in an xxx struct
25 * first arg: the cpu-order structure
26 * buf: the disk-order buffer
27 *
28 * gfs2_xxx_out - write out an xxx struct
29 * first arg: the cpu-order structure
30 * buf: the disk-order buffer
31 *
32 * gfs2_xxx_print - print out an xxx struct
33 * first arg: the cpu-order structure
34 */
35
36void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
37{
38 const struct gfs2_inum *str = buf;
39
40 no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
41 no->no_addr = be64_to_cpu(str->no_addr);
42}
43
44void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
45{
46 struct gfs2_inum *str = buf;
47
48 str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
49 str->no_addr = cpu_to_be64(no->no_addr);
50}
51
52static void gfs2_inum_print(const struct gfs2_inum_host *no)
53{
54 printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
55 printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr);
56}
57
58static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
59{
60 const struct gfs2_meta_header *str = buf;
61
62 mh->mh_magic = be32_to_cpu(str->mh_magic);
63 mh->mh_type = be32_to_cpu(str->mh_type);
64 mh->mh_format = be32_to_cpu(str->mh_format);
65}
66
67void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
68{
69 const struct gfs2_sb *str = buf;
70
71 gfs2_meta_header_in(&sb->sb_header, buf);
72
73 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
74 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
75 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
76 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
77
78 gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
79 gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
80
81 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
82 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
83}
84
85void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
86{
87 const struct gfs2_rindex *str = buf;
88
89 ri->ri_addr = be64_to_cpu(str->ri_addr);
90 ri->ri_length = be32_to_cpu(str->ri_length);
91 ri->ri_data0 = be64_to_cpu(str->ri_data0);
92 ri->ri_data = be32_to_cpu(str->ri_data);
93 ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
94
95}
96
97void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
98{
99 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
100 pv(ri, ri_length, "%u");
101
102 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
103 pv(ri, ri_data, "%u");
104
105 pv(ri, ri_bitbytes, "%u");
106}
107
108void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
109{
110 const struct gfs2_rgrp *str = buf;
111
112 rg->rg_flags = be32_to_cpu(str->rg_flags);
113 rg->rg_free = be32_to_cpu(str->rg_free);
114 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
115 rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
116}
117
118void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
119{
120 struct gfs2_rgrp *str = buf;
121
122 str->rg_flags = cpu_to_be32(rg->rg_flags);
123 str->rg_free = cpu_to_be32(rg->rg_free);
124 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
125 str->__pad = cpu_to_be32(0);
126 str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
127 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
128}
129
130void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
131{
132 const struct gfs2_quota *str = buf;
133
134 qu->qu_limit = be64_to_cpu(str->qu_limit);
135 qu->qu_warn = be64_to_cpu(str->qu_warn);
136 qu->qu_value = be64_to_cpu(str->qu_value);
137}
138
139void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
140{
141 const struct gfs2_dinode_host *di = &ip->i_di;
142 struct gfs2_dinode *str = buf;
143
144 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
145 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
146 str->di_header.__pad0 = 0;
147 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
148 str->di_header.__pad1 = 0;
149
150 gfs2_inum_out(&ip->i_num, &str->di_num);
151
152 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
153 str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
154 str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
155 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
156 str->di_size = cpu_to_be64(di->di_size);
157 str->di_blocks = cpu_to_be64(di->di_blocks);
158 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
159 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
160 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
161
162 str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
163 str->di_goal_data = cpu_to_be64(di->di_goal_data);
164 str->di_generation = cpu_to_be64(di->di_generation);
165
166 str->di_flags = cpu_to_be32(di->di_flags);
167 str->di_height = cpu_to_be16(di->di_height);
168 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
169 !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
170 GFS2_FORMAT_DE : 0);
171 str->di_depth = cpu_to_be16(di->di_depth);
172 str->di_entries = cpu_to_be32(di->di_entries);
173
174 str->di_eattr = cpu_to_be64(di->di_eattr);
175}
176
177void gfs2_dinode_print(const struct gfs2_inode *ip)
178{
179 const struct gfs2_dinode_host *di = &ip->i_di;
180
181 gfs2_inum_print(&ip->i_num);
182
183 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
184 printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks);
185 printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
186 printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
187
188 pv(di, di_flags, "0x%.8X");
189 pv(di, di_height, "%u");
190
191 pv(di, di_depth, "%u");
192 pv(di, di_entries, "%u");
193
194 printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr);
195}
196
197void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
198{
199 const struct gfs2_log_header *str = buf;
200
201 gfs2_meta_header_in(&lh->lh_header, buf);
202 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
203 lh->lh_flags = be32_to_cpu(str->lh_flags);
204 lh->lh_tail = be32_to_cpu(str->lh_tail);
205 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
206 lh->lh_hash = be32_to_cpu(str->lh_hash);
207}
208
209void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
210{
211 const struct gfs2_inum_range *str = buf;
212
213 ir->ir_start = be64_to_cpu(str->ir_start);
214 ir->ir_length = be64_to_cpu(str->ir_length);
215}
216
217void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
218{
219 struct gfs2_inum_range *str = buf;
220
221 str->ir_start = cpu_to_be64(ir->ir_start);
222 str->ir_length = cpu_to_be64(ir->ir_length);
223}
224
225void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
226{
227 const struct gfs2_statfs_change *str = buf;
228
229 sc->sc_total = be64_to_cpu(str->sc_total);
230 sc->sc_free = be64_to_cpu(str->sc_free);
231 sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
232}
233
234void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
235{
236 struct gfs2_statfs_change *str = buf;
237
238 str->sc_total = cpu_to_be64(sc->sc_total);
239 str->sc_free = cpu_to_be64(sc->sc_free);
240 str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
241}
242
243void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
244{
245 const struct gfs2_quota_change *str = buf;
246
247 qc->qc_change = be64_to_cpu(str->qc_change);
248 qc->qc_flags = be32_to_cpu(str->qc_flags);
249 qc->qc_id = be32_to_cpu(str->qc_id);
250}
251
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 30c15622174f..26c888890c24 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -32,6 +32,7 @@
32#include "trans.h" 32#include "trans.h"
33#include "rgrp.h" 33#include "rgrp.h"
34#include "ops_file.h" 34#include "ops_file.h"
35#include "super.h"
35#include "util.h" 36#include "util.h"
36#include "glops.h" 37#include "glops.h"
37 38
@@ -49,6 +50,8 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
49 end = start + bsize; 50 end = start + bsize;
50 if (end <= from || start >= to) 51 if (end <= from || start >= to)
51 continue; 52 continue;
53 if (gfs2_is_jdata(ip))
54 set_buffer_uptodate(bh);
52 gfs2_trans_add_bh(ip->i_gl, bh, 0); 55 gfs2_trans_add_bh(ip->i_gl, bh, 0);
53 } 56 }
54} 57}
@@ -134,7 +137,9 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
134 return 0; /* don't care */ 137 return 0; /* don't care */
135 } 138 }
136 139
137 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) { 140 if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
141 PageChecked(page)) {
142 ClearPageChecked(page);
138 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); 143 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
139 if (error) 144 if (error)
140 goto out_ignore; 145 goto out_ignore;
@@ -203,11 +208,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
203 * so we need to supply one here. It doesn't happen often. 208 * so we need to supply one here. It doesn't happen often.
204 */ 209 */
205 if (unlikely(page->index)) { 210 if (unlikely(page->index)) {
206 kaddr = kmap_atomic(page, KM_USER0); 211 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
207 memset(kaddr, 0, PAGE_CACHE_SIZE);
208 kunmap_atomic(kaddr, KM_USER0);
209 flush_dcache_page(page);
210 SetPageUptodate(page);
211 return 0; 212 return 0;
212 } 213 }
213 214
@@ -450,6 +451,31 @@ out_uninit:
450} 451}
451 452
452/** 453/**
454 * adjust_fs_space - Adjusts the free space available due to gfs2_grow
455 * @inode: the rindex inode
456 */
457static void adjust_fs_space(struct inode *inode)
458{
459 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
460 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
461 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
462 u64 fs_total, new_free;
463
464 /* Total up the file system space, according to the latest rindex. */
465 fs_total = gfs2_ri_total(sdp);
466
467 spin_lock(&sdp->sd_statfs_spin);
468 if (fs_total > (m_sc->sc_total + l_sc->sc_total))
469 new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
470 else
471 new_free = 0;
472 spin_unlock(&sdp->sd_statfs_spin);
473 fs_warn(sdp, "File system extended by %llu blocks.\n",
474 (unsigned long long)new_free);
475 gfs2_statfs_change(sdp, new_free, new_free, 0);
476}
477
478/**
453 * gfs2_commit_write - Commit write to a file 479 * gfs2_commit_write - Commit write to a file
454 * @file: The file to write to 480 * @file: The file to write to
455 * @page: The page containing the data 481 * @page: The page containing the data
@@ -511,6 +537,9 @@ static int gfs2_commit_write(struct file *file, struct page *page,
511 di->di_size = cpu_to_be64(inode->i_size); 537 di->di_size = cpu_to_be64(inode->i_size);
512 } 538 }
513 539
540 if (inode == sdp->sd_rindex)
541 adjust_fs_space(inode);
542
514 brelse(dibh); 543 brelse(dibh);
515 gfs2_trans_end(sdp); 544 gfs2_trans_end(sdp);
516 if (al->al_requested) { 545 if (al->al_requested) {
@@ -543,6 +572,23 @@ fail_nounlock:
543} 572}
544 573
545/** 574/**
575 * gfs2_set_page_dirty - Page dirtying function
576 * @page: The page to dirty
577 *
578 * Returns: 1 if it dirtyed the page, or 0 otherwise
579 */
580
581static int gfs2_set_page_dirty(struct page *page)
582{
583 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
584 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
585
586 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
587 SetPageChecked(page);
588 return __set_page_dirty_buffers(page);
589}
590
591/**
546 * gfs2_bmap - Block map function 592 * gfs2_bmap - Block map function
547 * @mapping: Address space info 593 * @mapping: Address space info
548 * @lblock: The block to map 594 * @lblock: The block to map
@@ -578,6 +624,8 @@ static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
578 if (bd) { 624 if (bd) {
579 bd->bd_bh = NULL; 625 bd->bd_bh = NULL;
580 bh->b_private = NULL; 626 bh->b_private = NULL;
627 if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
628 kmem_cache_free(gfs2_bufdata_cachep, bd);
581 } 629 }
582 gfs2_log_unlock(sdp); 630 gfs2_log_unlock(sdp);
583 631
@@ -598,6 +646,8 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
598 unsigned int curr_off = 0; 646 unsigned int curr_off = 0;
599 647
600 BUG_ON(!PageLocked(page)); 648 BUG_ON(!PageLocked(page));
649 if (offset == 0)
650 ClearPageChecked(page);
601 if (!page_has_buffers(page)) 651 if (!page_has_buffers(page))
602 return; 652 return;
603 653
@@ -728,8 +778,8 @@ static unsigned limit = 0;
728 return; 778 return;
729 779
730 fs_warn(sdp, "ip = %llu %llu\n", 780 fs_warn(sdp, "ip = %llu %llu\n",
731 (unsigned long long)ip->i_num.no_formal_ino, 781 (unsigned long long)ip->i_no_formal_ino,
732 (unsigned long long)ip->i_num.no_addr); 782 (unsigned long long)ip->i_no_addr);
733 783
734 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) 784 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
735 fs_warn(sdp, "ip->i_cache[%u] = %s\n", 785 fs_warn(sdp, "ip->i_cache[%u] = %s\n",
@@ -810,6 +860,7 @@ const struct address_space_operations gfs2_file_aops = {
810 .sync_page = block_sync_page, 860 .sync_page = block_sync_page,
811 .prepare_write = gfs2_prepare_write, 861 .prepare_write = gfs2_prepare_write,
812 .commit_write = gfs2_commit_write, 862 .commit_write = gfs2_commit_write,
863 .set_page_dirty = gfs2_set_page_dirty,
813 .bmap = gfs2_bmap, 864 .bmap = gfs2_bmap,
814 .invalidatepage = gfs2_invalidatepage, 865 .invalidatepage = gfs2_invalidatepage,
815 .releasepage = gfs2_releasepage, 866 .releasepage = gfs2_releasepage,
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
index 35aaee4aa7e1..fa1b5b3d28b9 100644
--- a/fs/gfs2/ops_address.h
+++ b/fs/gfs2/ops_address.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index a6fdc52f554a..793e334d098e 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -21,6 +21,7 @@
21#include "glock.h" 21#include "glock.h"
22#include "ops_dentry.h" 22#include "ops_dentry.h"
23#include "util.h" 23#include "util.h"
24#include "inode.h"
24 25
25/** 26/**
26 * gfs2_drevalidate - Check directory lookup consistency 27 * gfs2_drevalidate - Check directory lookup consistency
@@ -40,14 +41,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
40 struct gfs2_inode *dip = GFS2_I(parent->d_inode); 41 struct gfs2_inode *dip = GFS2_I(parent->d_inode);
41 struct inode *inode = dentry->d_inode; 42 struct inode *inode = dentry->d_inode;
42 struct gfs2_holder d_gh; 43 struct gfs2_holder d_gh;
43 struct gfs2_inode *ip; 44 struct gfs2_inode *ip = NULL;
44 struct gfs2_inum_host inum;
45 unsigned int type;
46 int error; 45 int error;
47 int had_lock=0; 46 int had_lock=0;
48 47
49 if (inode && is_bad_inode(inode)) 48 if (inode) {
50 goto invalid; 49 if (is_bad_inode(inode))
50 goto invalid;
51 ip = GFS2_I(inode);
52 }
51 53
52 if (sdp->sd_args.ar_localcaching) 54 if (sdp->sd_args.ar_localcaching)
53 goto valid; 55 goto valid;
@@ -59,7 +61,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
59 goto fail; 61 goto fail;
60 } 62 }
61 63
62 error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); 64 error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
63 switch (error) { 65 switch (error) {
64 case 0: 66 case 0:
65 if (!inode) 67 if (!inode)
@@ -73,16 +75,6 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
73 goto fail_gunlock; 75 goto fail_gunlock;
74 } 76 }
75 77
76 ip = GFS2_I(inode);
77
78 if (!gfs2_inum_equal(&ip->i_num, &inum))
79 goto invalid_gunlock;
80
81 if (IF2DT(ip->i_inode.i_mode) != type) {
82 gfs2_consist_inode(dip);
83 goto fail_gunlock;
84 }
85
86valid_gunlock: 78valid_gunlock:
87 if (!had_lock) 79 if (!had_lock)
88 gfs2_glock_dq_uninit(&d_gh); 80 gfs2_glock_dq_uninit(&d_gh);
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index aad918337a46..99ea5659bc2c 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -22,10 +22,14 @@
22#include "glops.h" 22#include "glops.h"
23#include "inode.h" 23#include "inode.h"
24#include "ops_dentry.h" 24#include "ops_dentry.h"
25#include "ops_export.h" 25#include "ops_fstype.h"
26#include "rgrp.h" 26#include "rgrp.h"
27#include "util.h" 27#include "util.h"
28 28
29#define GFS2_SMALL_FH_SIZE 4
30#define GFS2_LARGE_FH_SIZE 8
31#define GFS2_OLD_FH_SIZE 10
32
29static struct dentry *gfs2_decode_fh(struct super_block *sb, 33static struct dentry *gfs2_decode_fh(struct super_block *sb,
30 __u32 *p, 34 __u32 *p,
31 int fh_len, 35 int fh_len,
@@ -35,31 +39,28 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
35 void *context) 39 void *context)
36{ 40{
37 __be32 *fh = (__force __be32 *)p; 41 __be32 *fh = (__force __be32 *)p;
38 struct gfs2_fh_obj fh_obj; 42 struct gfs2_inum_host inum, parent;
39 struct gfs2_inum_host *this, parent;
40 43
41 this = &fh_obj.this;
42 fh_obj.imode = DT_UNKNOWN;
43 memset(&parent, 0, sizeof(struct gfs2_inum)); 44 memset(&parent, 0, sizeof(struct gfs2_inum));
44 45
45 switch (fh_len) { 46 switch (fh_len) {
46 case GFS2_LARGE_FH_SIZE: 47 case GFS2_LARGE_FH_SIZE:
48 case GFS2_OLD_FH_SIZE:
47 parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; 49 parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
48 parent.no_formal_ino |= be32_to_cpu(fh[5]); 50 parent.no_formal_ino |= be32_to_cpu(fh[5]);
49 parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; 51 parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
50 parent.no_addr |= be32_to_cpu(fh[7]); 52 parent.no_addr |= be32_to_cpu(fh[7]);
51 fh_obj.imode = be32_to_cpu(fh[8]);
52 case GFS2_SMALL_FH_SIZE: 53 case GFS2_SMALL_FH_SIZE:
53 this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; 54 inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
54 this->no_formal_ino |= be32_to_cpu(fh[1]); 55 inum.no_formal_ino |= be32_to_cpu(fh[1]);
55 this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32; 56 inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
56 this->no_addr |= be32_to_cpu(fh[3]); 57 inum.no_addr |= be32_to_cpu(fh[3]);
57 break; 58 break;
58 default: 59 default:
59 return NULL; 60 return NULL;
60 } 61 }
61 62
62 return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent, 63 return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent,
63 acceptable, context); 64 acceptable, context);
64} 65}
65 66
@@ -75,10 +76,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
75 (connectable && *len < GFS2_LARGE_FH_SIZE)) 76 (connectable && *len < GFS2_LARGE_FH_SIZE))
76 return 255; 77 return 255;
77 78
78 fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32); 79 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
79 fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF); 80 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
80 fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32); 81 fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
81 fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF); 82 fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
82 *len = GFS2_SMALL_FH_SIZE; 83 *len = GFS2_SMALL_FH_SIZE;
83 84
84 if (!connectable || inode == sb->s_root->d_inode) 85 if (!connectable || inode == sb->s_root->d_inode)
@@ -90,13 +91,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
90 igrab(inode); 91 igrab(inode);
91 spin_unlock(&dentry->d_lock); 92 spin_unlock(&dentry->d_lock);
92 93
93 fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32); 94 fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
94 fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF); 95 fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
95 fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32); 96 fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
96 fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF); 97 fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
97
98 fh[8] = cpu_to_be32(inode->i_mode);
99 fh[9] = 0; /* pad to double word */
100 *len = GFS2_LARGE_FH_SIZE; 98 *len = GFS2_LARGE_FH_SIZE;
101 99
102 iput(inode); 100 iput(inode);
@@ -144,7 +142,8 @@ static int gfs2_get_name(struct dentry *parent, char *name,
144 ip = GFS2_I(inode); 142 ip = GFS2_I(inode);
145 143
146 *name = 0; 144 *name = 0;
147 gnfd.inum = ip->i_num; 145 gnfd.inum.no_addr = ip->i_no_addr;
146 gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
148 gnfd.name = name; 147 gnfd.name = name;
149 148
150 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); 149 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
@@ -192,8 +191,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
192static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) 191static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
193{ 192{
194 struct gfs2_sbd *sdp = sb->s_fs_info; 193 struct gfs2_sbd *sdp = sb->s_fs_info;
195 struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj; 194 struct gfs2_inum_host *inum = inum_obj;
196 struct gfs2_inum_host *inum = &fh_obj->this;
197 struct gfs2_holder i_gh, ri_gh, rgd_gh; 195 struct gfs2_holder i_gh, ri_gh, rgd_gh;
198 struct gfs2_rgrpd *rgd; 196 struct gfs2_rgrpd *rgd;
199 struct inode *inode; 197 struct inode *inode;
@@ -202,9 +200,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
202 200
203 /* System files? */ 201 /* System files? */
204 202
205 inode = gfs2_ilookup(sb, inum); 203 inode = gfs2_ilookup(sb, inum->no_addr);
206 if (inode) { 204 if (inode) {
207 if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) { 205 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
208 iput(inode); 206 iput(inode);
209 return ERR_PTR(-ESTALE); 207 return ERR_PTR(-ESTALE);
210 } 208 }
@@ -236,7 +234,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
236 gfs2_glock_dq_uninit(&rgd_gh); 234 gfs2_glock_dq_uninit(&rgd_gh);
237 gfs2_glock_dq_uninit(&ri_gh); 235 gfs2_glock_dq_uninit(&ri_gh);
238 236
239 inode = gfs2_inode_lookup(sb, inum, fh_obj->imode); 237 inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
238 inum->no_addr,
239 0);
240 if (!inode) 240 if (!inode)
241 goto fail; 241 goto fail;
242 if (IS_ERR(inode)) { 242 if (IS_ERR(inode)) {
@@ -250,6 +250,15 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
250 goto fail; 250 goto fail;
251 } 251 }
252 252
253 /* Pick up the works we bypass in gfs2_inode_lookup */
254 if (inode->i_state & I_NEW)
255 gfs2_set_iop(inode);
256
257 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
258 iput(inode);
259 goto fail;
260 }
261
253 error = -EIO; 262 error = -EIO;
254 if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) { 263 if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
255 iput(inode); 264 iput(inode);
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
deleted file mode 100644
index f925a955b3b8..000000000000
--- a/fs/gfs2/ops_export.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_EXPORT_DOT_H__
11#define __OPS_EXPORT_DOT_H__
12
13#define GFS2_SMALL_FH_SIZE 4
14#define GFS2_LARGE_FH_SIZE 10
15
16extern struct export_operations gfs2_export_ops;
17struct gfs2_fh_obj {
18 struct gfs2_inum_host this;
19 __u32 imode;
20};
21
22#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 064df8804582..196d83266e34 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -502,7 +502,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
502 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 502 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
503 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); 503 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
504 struct lm_lockname name = 504 struct lm_lockname name =
505 { .ln_number = ip->i_num.no_addr, 505 { .ln_number = ip->i_no_addr,
506 .ln_type = LM_TYPE_PLOCK }; 506 .ln_type = LM_TYPE_PLOCK };
507 507
508 if (!(fl->fl_flags & FL_POSIX)) 508 if (!(fl->fl_flags & FL_POSIX))
@@ -557,7 +557,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
557 gfs2_glock_dq_uninit(fl_gh); 557 gfs2_glock_dq_uninit(fl_gh);
558 } else { 558 } else {
559 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), 559 error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
560 ip->i_num.no_addr, &gfs2_flock_glops, 560 ip->i_no_addr, &gfs2_flock_glops,
561 CREATE, &gl); 561 CREATE, &gl);
562 if (error) 562 if (error)
563 goto out; 563 goto out;
@@ -635,7 +635,6 @@ const struct file_operations gfs2_file_fops = {
635 .release = gfs2_close, 635 .release = gfs2_close,
636 .fsync = gfs2_fsync, 636 .fsync = gfs2_fsync,
637 .lock = gfs2_lock, 637 .lock = gfs2_lock,
638 .sendfile = generic_file_sendfile,
639 .flock = gfs2_flock, 638 .flock = gfs2_flock,
640 .splice_read = generic_file_splice_read, 639 .splice_read = generic_file_splice_read,
641 .splice_write = generic_file_splice_write, 640 .splice_write = generic_file_splice_write,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 2c5f8e7def0d..cf5aa5050548 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -27,7 +27,6 @@
27#include "inode.h" 27#include "inode.h"
28#include "lm.h" 28#include "lm.h"
29#include "mount.h" 29#include "mount.h"
30#include "ops_export.h"
31#include "ops_fstype.h" 30#include "ops_fstype.h"
32#include "ops_super.h" 31#include "ops_super.h"
33#include "recovery.h" 32#include "recovery.h"
@@ -105,6 +104,7 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
105 sb->s_magic = GFS2_MAGIC; 104 sb->s_magic = GFS2_MAGIC;
106 sb->s_op = &gfs2_super_ops; 105 sb->s_op = &gfs2_super_ops;
107 sb->s_export_op = &gfs2_export_ops; 106 sb->s_export_op = &gfs2_export_ops;
107 sb->s_time_gran = 1;
108 sb->s_maxbytes = MAX_LFS_FILESIZE; 108 sb->s_maxbytes = MAX_LFS_FILESIZE;
109 109
110 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) 110 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
@@ -116,7 +116,6 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
116 116
117static int init_names(struct gfs2_sbd *sdp, int silent) 117static int init_names(struct gfs2_sbd *sdp, int silent)
118{ 118{
119 struct page *page;
120 char *proto, *table; 119 char *proto, *table;
121 int error = 0; 120 int error = 0;
122 121
@@ -126,14 +125,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
126 /* Try to autodetect */ 125 /* Try to autodetect */
127 126
128 if (!proto[0] || !table[0]) { 127 if (!proto[0] || !table[0]) {
129 struct gfs2_sb *sb; 128 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
130 page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 129 if (error)
131 if (!page) 130 return error;
132 return -ENOBUFS;
133 sb = kmap(page);
134 gfs2_sb_in(&sdp->sd_sb, sb);
135 kunmap(page);
136 __free_page(page);
137 131
138 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); 132 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
139 if (error) 133 if (error)
@@ -151,6 +145,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
151 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto); 145 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
152 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table); 146 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
153 147
148 while ((table = strchr(sdp->sd_table_name, '/')))
149 *table = '_';
150
154out: 151out:
155 return error; 152 return error;
156} 153}
@@ -236,17 +233,17 @@ fail:
236 return error; 233 return error;
237} 234}
238 235
239static struct inode *gfs2_lookup_root(struct super_block *sb, 236static inline struct inode *gfs2_lookup_root(struct super_block *sb,
240 struct gfs2_inum_host *inum) 237 u64 no_addr)
241{ 238{
242 return gfs2_inode_lookup(sb, inum, DT_DIR); 239 return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
243} 240}
244 241
245static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) 242static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
246{ 243{
247 struct super_block *sb = sdp->sd_vfs; 244 struct super_block *sb = sdp->sd_vfs;
248 struct gfs2_holder sb_gh; 245 struct gfs2_holder sb_gh;
249 struct gfs2_inum_host *inum; 246 u64 no_addr;
250 struct inode *inode; 247 struct inode *inode;
251 int error = 0; 248 int error = 0;
252 249
@@ -289,10 +286,10 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
289 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize); 286 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
290 287
291 /* Get the root inode */ 288 /* Get the root inode */
292 inum = &sdp->sd_sb.sb_root_dir; 289 no_addr = sdp->sd_sb.sb_root_dir.no_addr;
293 if (sb->s_type == &gfs2meta_fs_type) 290 if (sb->s_type == &gfs2meta_fs_type)
294 inum = &sdp->sd_sb.sb_master_dir; 291 no_addr = sdp->sd_sb.sb_master_dir.no_addr;
295 inode = gfs2_lookup_root(sb, inum); 292 inode = gfs2_lookup_root(sb, no_addr);
296 if (IS_ERR(inode)) { 293 if (IS_ERR(inode)) {
297 error = PTR_ERR(inode); 294 error = PTR_ERR(inode);
298 fs_err(sdp, "can't read in root inode: %d\n", error); 295 fs_err(sdp, "can't read in root inode: %d\n", error);
@@ -449,7 +446,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
449 if (undo) 446 if (undo)
450 goto fail_qinode; 447 goto fail_qinode;
451 448
452 inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir); 449 inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
453 if (IS_ERR(inode)) { 450 if (IS_ERR(inode)) {
454 error = PTR_ERR(inode); 451 error = PTR_ERR(inode);
455 fs_err(sdp, "can't read in master directory: %d\n", error); 452 fs_err(sdp, "can't read in master directory: %d\n", error);
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
index 7cc2c296271b..407029b3b2b3 100644
--- a/fs/gfs2/ops_fstype.h
+++ b/fs/gfs2/ops_fstype.h
@@ -14,5 +14,6 @@
14 14
15extern struct file_system_type gfs2_fs_type; 15extern struct file_system_type gfs2_fs_type;
16extern struct file_system_type gfs2meta_fs_type; 16extern struct file_system_type gfs2meta_fs_type;
17extern struct export_operations gfs2_export_ops;
17 18
18#endif /* __OPS_FSTYPE_DOT_H__ */ 19#endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d85f6e05cb95..911c115b5c6c 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -157,7 +157,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
157 if (error) 157 if (error)
158 goto out_gunlock; 158 goto out_gunlock;
159 159
160 error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL); 160 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
161 switch (error) { 161 switch (error) {
162 case -ENOENT: 162 case -ENOENT:
163 break; 163 break;
@@ -206,7 +206,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
206 goto out_gunlock_q; 206 goto out_gunlock_q;
207 207
208 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 208 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
209 al->al_rgd->rd_ri.ri_length + 209 al->al_rgd->rd_length +
210 2 * RES_DINODE + RES_STATFS + 210 2 * RES_DINODE + RES_STATFS +
211 RES_QUOTA, 0); 211 RES_QUOTA, 0);
212 if (error) 212 if (error)
@@ -217,8 +217,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
217 goto out_ipres; 217 goto out_ipres;
218 } 218 }
219 219
220 error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num, 220 error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
221 IF2DT(inode->i_mode));
222 if (error) 221 if (error)
223 goto out_end_trans; 222 goto out_end_trans;
224 223
@@ -275,7 +274,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
275 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 274 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
276 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 275 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
277 276
278 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); 277 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
279 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 278 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
280 279
281 280
@@ -420,7 +419,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
420 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); 419 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
421 gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); 420 gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
422 421
423 gfs2_inum_out(&dip->i_num, &dent->de_inum); 422 gfs2_inum_out(dip, dent);
424 dent->de_type = cpu_to_be16(DT_DIR); 423 dent->de_type = cpu_to_be16(DT_DIR);
425 424
426 gfs2_dinode_out(ip, di); 425 gfs2_dinode_out(ip, di);
@@ -472,7 +471,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
472 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 471 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
473 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 472 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
474 473
475 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); 474 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
476 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 475 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
477 476
478 error = gfs2_glock_nq_m(3, ghs); 477 error = gfs2_glock_nq_m(3, ghs);
@@ -614,7 +613,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
614 * this is the case of the target file already existing 613 * this is the case of the target file already existing
615 * so we unlink before doing the rename 614 * so we unlink before doing the rename
616 */ 615 */
617 nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr); 616 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
618 if (nrgd) 617 if (nrgd)
619 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); 618 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
620 } 619 }
@@ -653,7 +652,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
653 if (error) 652 if (error)
654 goto out_gunlock; 653 goto out_gunlock;
655 654
656 error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL); 655 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
657 switch (error) { 656 switch (error) {
658 case -ENOENT: 657 case -ENOENT:
659 error = 0; 658 error = 0;
@@ -712,7 +711,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
712 goto out_gunlock_q; 711 goto out_gunlock_q;
713 712
714 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 713 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
715 al->al_rgd->rd_ri.ri_length + 714 al->al_rgd->rd_length +
716 4 * RES_DINODE + 4 * RES_LEAF + 715 4 * RES_DINODE + 4 * RES_LEAF +
717 RES_STATFS + RES_QUOTA + 4, 0); 716 RES_STATFS + RES_QUOTA + 4, 0);
718 if (error) 717 if (error)
@@ -750,7 +749,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
750 if (error) 749 if (error)
751 goto out_end_trans; 750 goto out_end_trans;
752 751
753 error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR); 752 error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR);
754 if (error) 753 if (error)
755 goto out_end_trans; 754 goto out_end_trans;
756 } else { 755 } else {
@@ -758,7 +757,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
758 error = gfs2_meta_inode_buffer(ip, &dibh); 757 error = gfs2_meta_inode_buffer(ip, &dibh);
759 if (error) 758 if (error)
760 goto out_end_trans; 759 goto out_end_trans;
761 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 760 ip->i_inode.i_ctime = CURRENT_TIME;
762 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 761 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
763 gfs2_dinode_out(ip, dibh->b_data); 762 gfs2_dinode_out(ip, dibh->b_data);
764 brelse(dibh); 763 brelse(dibh);
@@ -768,8 +767,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
768 if (error) 767 if (error)
769 goto out_end_trans; 768 goto out_end_trans;
770 769
771 error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num, 770 error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
772 IF2DT(ip->i_inode.i_mode));
773 if (error) 771 if (error)
774 goto out_end_trans; 772 goto out_end_trans;
775 773
@@ -905,8 +903,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
905 } 903 }
906 904
907 error = gfs2_truncatei(ip, attr->ia_size); 905 error = gfs2_truncatei(ip, attr->ia_size);
908 if (error) 906 if (error && (inode->i_size != ip->i_di.di_size))
909 return error; 907 i_size_write(inode, ip->i_di.di_size);
910 908
911 return error; 909 return error;
912} 910}
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 485ce3d49923..603d940f1159 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -326,8 +326,10 @@ static void gfs2_clear_inode(struct inode *inode)
326 gfs2_glock_schedule_for_reclaim(ip->i_gl); 326 gfs2_glock_schedule_for_reclaim(ip->i_gl);
327 gfs2_glock_put(ip->i_gl); 327 gfs2_glock_put(ip->i_gl);
328 ip->i_gl = NULL; 328 ip->i_gl = NULL;
329 if (ip->i_iopen_gh.gh_gl) 329 if (ip->i_iopen_gh.gh_gl) {
330 ip->i_iopen_gh.gh_gl->gl_object = NULL;
330 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 331 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
332 }
331 } 333 }
332} 334}
333 335
@@ -422,13 +424,13 @@ static void gfs2_delete_inode(struct inode *inode)
422 if (!inode->i_private) 424 if (!inode->i_private)
423 goto out; 425 goto out;
424 426
425 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh); 427 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
426 if (unlikely(error)) { 428 if (unlikely(error)) {
427 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 429 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
428 goto out; 430 goto out;
429 } 431 }
430 432
431 gfs2_glock_dq(&ip->i_iopen_gh); 433 gfs2_glock_dq_wait(&ip->i_iopen_gh);
432 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); 434 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
433 error = gfs2_glock_nq(&ip->i_iopen_gh); 435 error = gfs2_glock_nq(&ip->i_iopen_gh);
434 if (error) 436 if (error)
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index aa0dbd2aac1b..404b7cc9f8c4 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -66,7 +66,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
66 if (error) 66 if (error)
67 goto out_gunlock_q; 67 goto out_gunlock_q;
68 68
69 error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length + 69 error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
70 ind_blocks + RES_DINODE + 70 ind_blocks + RES_DINODE +
71 RES_STATFS + RES_QUOTA, 0); 71 RES_STATFS + RES_QUOTA, 0);
72 if (error) 72 if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c186857e48a8..6e546ee8f3d4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -66,6 +66,18 @@
66#define QUOTA_USER 1 66#define QUOTA_USER 1
67#define QUOTA_GROUP 0 67#define QUOTA_GROUP 0
68 68
69struct gfs2_quota_host {
70 u64 qu_limit;
71 u64 qu_warn;
72 s64 qu_value;
73};
74
75struct gfs2_quota_change_host {
76 u64 qc_change;
77 u32 qc_flags; /* GFS2_QCF_... */
78 u32 qc_id;
79};
80
69static u64 qd2offset(struct gfs2_quota_data *qd) 81static u64 qd2offset(struct gfs2_quota_data *qd)
70{ 82{
71 u64 offset; 83 u64 offset;
@@ -561,6 +573,25 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
561 mutex_unlock(&sdp->sd_quota_mutex); 573 mutex_unlock(&sdp->sd_quota_mutex);
562} 574}
563 575
576static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
577{
578 const struct gfs2_quota *str = buf;
579
580 qu->qu_limit = be64_to_cpu(str->qu_limit);
581 qu->qu_warn = be64_to_cpu(str->qu_warn);
582 qu->qu_value = be64_to_cpu(str->qu_value);
583}
584
585static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
586{
587 struct gfs2_quota *str = buf;
588
589 str->qu_limit = cpu_to_be64(qu->qu_limit);
590 str->qu_warn = cpu_to_be64(qu->qu_warn);
591 str->qu_value = cpu_to_be64(qu->qu_value);
592 memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
593}
594
564/** 595/**
565 * gfs2_adjust_quota 596 * gfs2_adjust_quota
566 * 597 *
@@ -573,12 +604,13 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
573 struct inode *inode = &ip->i_inode; 604 struct inode *inode = &ip->i_inode;
574 struct address_space *mapping = inode->i_mapping; 605 struct address_space *mapping = inode->i_mapping;
575 unsigned long index = loc >> PAGE_CACHE_SHIFT; 606 unsigned long index = loc >> PAGE_CACHE_SHIFT;
576 unsigned offset = loc & (PAGE_CACHE_SHIFT - 1); 607 unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
577 unsigned blocksize, iblock, pos; 608 unsigned blocksize, iblock, pos;
578 struct buffer_head *bh; 609 struct buffer_head *bh;
579 struct page *page; 610 struct page *page;
580 void *kaddr; 611 void *kaddr;
581 __be64 *ptr; 612 char *ptr;
613 struct gfs2_quota_host qp;
582 s64 value; 614 s64 value;
583 int err = -EIO; 615 int err = -EIO;
584 616
@@ -620,13 +652,17 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
620 652
621 kaddr = kmap_atomic(page, KM_USER0); 653 kaddr = kmap_atomic(page, KM_USER0);
622 ptr = kaddr + offset; 654 ptr = kaddr + offset;
623 value = (s64)be64_to_cpu(*ptr) + change; 655 gfs2_quota_in(&qp, ptr);
624 *ptr = cpu_to_be64(value); 656 qp.qu_value += change;
657 value = qp.qu_value;
658 gfs2_quota_out(&qp, ptr);
625 flush_dcache_page(page); 659 flush_dcache_page(page);
626 kunmap_atomic(kaddr, KM_USER0); 660 kunmap_atomic(kaddr, KM_USER0);
627 err = 0; 661 err = 0;
628 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC); 662 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
629 qd->qd_qb.qb_value = cpu_to_be64(value); 663 qd->qd_qb.qb_value = cpu_to_be64(value);
664 ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC);
665 ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value);
630unlock: 666unlock:
631 unlock_page(page); 667 unlock_page(page);
632 page_cache_release(page); 668 page_cache_release(page);
@@ -689,7 +725,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
689 goto out_alloc; 725 goto out_alloc;
690 726
691 error = gfs2_trans_begin(sdp, 727 error = gfs2_trans_begin(sdp,
692 al->al_rgd->rd_ri.ri_length + 728 al->al_rgd->rd_length +
693 num_qd * data_blocks + 729 num_qd * data_blocks +
694 nalloc * ind_blocks + 730 nalloc * ind_blocks +
695 RES_DINODE + num_qd + 731 RES_DINODE + num_qd +
@@ -709,7 +745,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
709 offset = qd2offset(qd); 745 offset = qd2offset(qd);
710 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, 746 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
711 (struct gfs2_quota_data *) 747 (struct gfs2_quota_data *)
712 qd->qd_gl->gl_lvb); 748 qd);
713 if (error) 749 if (error)
714 goto out_end_trans; 750 goto out_end_trans;
715 751
@@ -1050,6 +1086,15 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
1050 return error; 1086 return error;
1051} 1087}
1052 1088
1089static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
1090{
1091 const struct gfs2_quota_change *str = buf;
1092
1093 qc->qc_change = be64_to_cpu(str->qc_change);
1094 qc->qc_flags = be32_to_cpu(str->qc_flags);
1095 qc->qc_id = be32_to_cpu(str->qc_id);
1096}
1097
1053int gfs2_quota_init(struct gfs2_sbd *sdp) 1098int gfs2_quota_init(struct gfs2_sbd *sdp)
1054{ 1099{
1055 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); 1100 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 8bc182c7e2ef..5ada38c99a2c 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -116,6 +116,22 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
116 } 116 }
117} 117}
118 118
119static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
120{
121 const struct gfs2_log_header *str = buf;
122
123 if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
124 str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
125 return 1;
126
127 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
128 lh->lh_flags = be32_to_cpu(str->lh_flags);
129 lh->lh_tail = be32_to_cpu(str->lh_tail);
130 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
131 lh->lh_hash = be32_to_cpu(str->lh_hash);
132 return 0;
133}
134
119/** 135/**
120 * get_log_header - read the log header for a given segment 136 * get_log_header - read the log header for a given segment
121 * @jd: the journal 137 * @jd: the journal
@@ -147,12 +163,10 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
147 sizeof(u32)); 163 sizeof(u32));
148 hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing)); 164 hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
149 hash ^= (u32)~0; 165 hash ^= (u32)~0;
150 gfs2_log_header_in(&lh, bh->b_data); 166 error = gfs2_log_header_in(&lh, bh->b_data);
151 brelse(bh); 167 brelse(bh);
152 168
153 if (lh.lh_header.mh_magic != GFS2_MAGIC || 169 if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
154 lh.lh_header.mh_type != GFS2_METATYPE_LH ||
155 lh.lh_blkno != blk || lh.lh_hash != hash)
156 return 1; 170 return 1;
157 171
158 *head = lh; 172 *head = lh;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 1727f5012efe..e4e040625153 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -28,6 +28,7 @@
28#include "ops_file.h" 28#include "ops_file.h"
29#include "util.h" 29#include "util.h"
30#include "log.h" 30#include "log.h"
31#include "inode.h"
31 32
32#define BFITNOENT ((u32)~0) 33#define BFITNOENT ((u32)~0)
33 34
@@ -50,6 +51,9 @@ static const char valid_change[16] = {
50 1, 0, 0, 0 51 1, 0, 0, 0
51}; 52};
52 53
54static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
55 unsigned char old_state, unsigned char new_state);
56
53/** 57/**
54 * gfs2_setbit - Set a bit in the bitmaps 58 * gfs2_setbit - Set a bit in the bitmaps
55 * @buffer: the buffer that holds the bitmaps 59 * @buffer: the buffer that holds the bitmaps
@@ -204,7 +208,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
204{ 208{
205 struct gfs2_sbd *sdp = rgd->rd_sbd; 209 struct gfs2_sbd *sdp = rgd->rd_sbd;
206 struct gfs2_bitmap *bi = NULL; 210 struct gfs2_bitmap *bi = NULL;
207 u32 length = rgd->rd_ri.ri_length; 211 u32 length = rgd->rd_length;
208 u32 count[4], tmp; 212 u32 count[4], tmp;
209 int buf, x; 213 int buf, x;
210 214
@@ -227,7 +231,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
227 return; 231 return;
228 } 232 }
229 233
230 tmp = rgd->rd_ri.ri_data - 234 tmp = rgd->rd_data -
231 rgd->rd_rg.rg_free - 235 rgd->rd_rg.rg_free -
232 rgd->rd_rg.rg_dinodes; 236 rgd->rd_rg.rg_dinodes;
233 if (count[1] + count[2] != tmp) { 237 if (count[1] + count[2] != tmp) {
@@ -253,10 +257,10 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
253 257
254} 258}
255 259
256static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block) 260static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
257{ 261{
258 u64 first = ri->ri_data0; 262 u64 first = rgd->rd_data0;
259 u64 last = first + ri->ri_data; 263 u64 last = first + rgd->rd_data;
260 return first <= block && block < last; 264 return first <= block && block < last;
261} 265}
262 266
@@ -275,7 +279,7 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
275 spin_lock(&sdp->sd_rindex_spin); 279 spin_lock(&sdp->sd_rindex_spin);
276 280
277 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) { 281 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
278 if (rgrp_contains_block(&rgd->rd_ri, blk)) { 282 if (rgrp_contains_block(rgd, blk)) {
279 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); 283 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
280 spin_unlock(&sdp->sd_rindex_spin); 284 spin_unlock(&sdp->sd_rindex_spin);
281 return rgd; 285 return rgd;
@@ -354,6 +358,15 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
354 mutex_unlock(&sdp->sd_rindex_mutex); 358 mutex_unlock(&sdp->sd_rindex_mutex);
355} 359}
356 360
361static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
362{
363 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
364 printk(KERN_INFO " ri_length = %u\n", rgd->rd_length);
365 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
366 printk(KERN_INFO " ri_data = %u\n", rgd->rd_data);
367 printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes);
368}
369
357/** 370/**
358 * gfs2_compute_bitstructs - Compute the bitmap sizes 371 * gfs2_compute_bitstructs - Compute the bitmap sizes
359 * @rgd: The resource group descriptor 372 * @rgd: The resource group descriptor
@@ -367,7 +380,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
367{ 380{
368 struct gfs2_sbd *sdp = rgd->rd_sbd; 381 struct gfs2_sbd *sdp = rgd->rd_sbd;
369 struct gfs2_bitmap *bi; 382 struct gfs2_bitmap *bi;
370 u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */ 383 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
371 u32 bytes_left, bytes; 384 u32 bytes_left, bytes;
372 int x; 385 int x;
373 386
@@ -378,7 +391,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
378 if (!rgd->rd_bits) 391 if (!rgd->rd_bits)
379 return -ENOMEM; 392 return -ENOMEM;
380 393
381 bytes_left = rgd->rd_ri.ri_bitbytes; 394 bytes_left = rgd->rd_bitbytes;
382 395
383 for (x = 0; x < length; x++) { 396 for (x = 0; x < length; x++) {
384 bi = rgd->rd_bits + x; 397 bi = rgd->rd_bits + x;
@@ -399,14 +412,14 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
399 } else if (x + 1 == length) { 412 } else if (x + 1 == length) {
400 bytes = bytes_left; 413 bytes = bytes_left;
401 bi->bi_offset = sizeof(struct gfs2_meta_header); 414 bi->bi_offset = sizeof(struct gfs2_meta_header);
402 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; 415 bi->bi_start = rgd->rd_bitbytes - bytes_left;
403 bi->bi_len = bytes; 416 bi->bi_len = bytes;
404 /* other blocks */ 417 /* other blocks */
405 } else { 418 } else {
406 bytes = sdp->sd_sb.sb_bsize - 419 bytes = sdp->sd_sb.sb_bsize -
407 sizeof(struct gfs2_meta_header); 420 sizeof(struct gfs2_meta_header);
408 bi->bi_offset = sizeof(struct gfs2_meta_header); 421 bi->bi_offset = sizeof(struct gfs2_meta_header);
409 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; 422 bi->bi_start = rgd->rd_bitbytes - bytes_left;
410 bi->bi_len = bytes; 423 bi->bi_len = bytes;
411 } 424 }
412 425
@@ -418,9 +431,9 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
418 return -EIO; 431 return -EIO;
419 } 432 }
420 bi = rgd->rd_bits + (length - 1); 433 bi = rgd->rd_bits + (length - 1);
421 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) { 434 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
422 if (gfs2_consist_rgrpd(rgd)) { 435 if (gfs2_consist_rgrpd(rgd)) {
423 gfs2_rindex_print(&rgd->rd_ri); 436 gfs2_rindex_print(rgd);
424 fs_err(sdp, "start=%u len=%u offset=%u\n", 437 fs_err(sdp, "start=%u len=%u offset=%u\n",
425 bi->bi_start, bi->bi_len, bi->bi_offset); 438 bi->bi_start, bi->bi_len, bi->bi_offset);
426 } 439 }
@@ -431,9 +444,104 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
431} 444}
432 445
433/** 446/**
434 * gfs2_ri_update - Pull in a new resource index from the disk 447 * gfs2_ri_total - Total up the file system space, according to the rindex.
448 *
449 */
450u64 gfs2_ri_total(struct gfs2_sbd *sdp)
451{
452 u64 total_data = 0;
453 struct inode *inode = sdp->sd_rindex;
454 struct gfs2_inode *ip = GFS2_I(inode);
455 char buf[sizeof(struct gfs2_rindex)];
456 struct file_ra_state ra_state;
457 int error, rgrps;
458
459 mutex_lock(&sdp->sd_rindex_mutex);
460 file_ra_state_init(&ra_state, inode->i_mapping);
461 for (rgrps = 0;; rgrps++) {
462 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
463
464 if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
465 break;
466 error = gfs2_internal_read(ip, &ra_state, buf, &pos,
467 sizeof(struct gfs2_rindex));
468 if (error != sizeof(struct gfs2_rindex))
469 break;
470 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
471 }
472 mutex_unlock(&sdp->sd_rindex_mutex);
473 return total_data;
474}
475
476static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
477{
478 const struct gfs2_rindex *str = buf;
479
480 rgd->rd_addr = be64_to_cpu(str->ri_addr);
481 rgd->rd_length = be32_to_cpu(str->ri_length);
482 rgd->rd_data0 = be64_to_cpu(str->ri_data0);
483 rgd->rd_data = be32_to_cpu(str->ri_data);
484 rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
485}
486
487/**
488 * read_rindex_entry - Pull in a new resource index entry from the disk
435 * @gl: The glock covering the rindex inode 489 * @gl: The glock covering the rindex inode
436 * 490 *
491 * Returns: 0 on success, error code otherwise
492 */
493
494static int read_rindex_entry(struct gfs2_inode *ip,
495 struct file_ra_state *ra_state)
496{
497 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
498 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
499 char buf[sizeof(struct gfs2_rindex)];
500 int error;
501 struct gfs2_rgrpd *rgd;
502
503 error = gfs2_internal_read(ip, ra_state, buf, &pos,
504 sizeof(struct gfs2_rindex));
505 if (!error)
506 return 0;
507 if (error != sizeof(struct gfs2_rindex)) {
508 if (error > 0)
509 error = -EIO;
510 return error;
511 }
512
513 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
514 error = -ENOMEM;
515 if (!rgd)
516 return error;
517
518 mutex_init(&rgd->rd_mutex);
519 lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
520 rgd->rd_sbd = sdp;
521
522 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
523 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
524
525 gfs2_rindex_in(rgd, buf);
526 error = compute_bitstructs(rgd);
527 if (error)
528 return error;
529
530 error = gfs2_glock_get(sdp, rgd->rd_addr,
531 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
532 if (error)
533 return error;
534
535 rgd->rd_gl->gl_object = rgd;
536 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
537 rgd->rd_flags |= GFS2_RDF_CHECK;
538 return error;
539}
540
541/**
542 * gfs2_ri_update - Pull in a new resource index from the disk
543 * @ip: pointer to the rindex inode
544 *
437 * Returns: 0 on successful update, error code otherwise 545 * Returns: 0 on successful update, error code otherwise
438 */ 546 */
439 547
@@ -441,13 +549,11 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
441{ 549{
442 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 550 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
443 struct inode *inode = &ip->i_inode; 551 struct inode *inode = &ip->i_inode;
444 struct gfs2_rgrpd *rgd;
445 char buf[sizeof(struct gfs2_rindex)];
446 struct file_ra_state ra_state; 552 struct file_ra_state ra_state;
447 u64 junk = ip->i_di.di_size; 553 u64 rgrp_count = ip->i_di.di_size;
448 int error; 554 int error;
449 555
450 if (do_div(junk, sizeof(struct gfs2_rindex))) { 556 if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
451 gfs2_consist_inode(ip); 557 gfs2_consist_inode(ip);
452 return -EIO; 558 return -EIO;
453 } 559 }
@@ -455,50 +561,50 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
455 clear_rgrpdi(sdp); 561 clear_rgrpdi(sdp);
456 562
457 file_ra_state_init(&ra_state, inode->i_mapping); 563 file_ra_state_init(&ra_state, inode->i_mapping);
458 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { 564 for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
459 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); 565 error = read_rindex_entry(ip, &ra_state);
460 error = gfs2_internal_read(ip, &ra_state, buf, &pos, 566 if (error) {
461 sizeof(struct gfs2_rindex)); 567 clear_rgrpdi(sdp);
462 if (!error) 568 return error;
463 break;
464 if (error != sizeof(struct gfs2_rindex)) {
465 if (error > 0)
466 error = -EIO;
467 goto fail;
468 } 569 }
570 }
469 571
470 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS); 572 sdp->sd_rindex_vn = ip->i_gl->gl_vn;
471 error = -ENOMEM; 573 return 0;
472 if (!rgd) 574}
473 goto fail;
474
475 mutex_init(&rgd->rd_mutex);
476 lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
477 rgd->rd_sbd = sdp;
478
479 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
480 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
481
482 gfs2_rindex_in(&rgd->rd_ri, buf);
483 error = compute_bitstructs(rgd);
484 if (error)
485 goto fail;
486 575
487 error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr, 576/**
488 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); 577 * gfs2_ri_update_special - Pull in a new resource index from the disk
489 if (error) 578 *
490 goto fail; 579 * This is a special version that's safe to call from gfs2_inplace_reserve_i.
580 * In this case we know that we don't have any resource groups in memory yet.
581 *
582 * @ip: pointer to the rindex inode
583 *
584 * Returns: 0 on successful update, error code otherwise
585 */
586static int gfs2_ri_update_special(struct gfs2_inode *ip)
587{
588 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
589 struct inode *inode = &ip->i_inode;
590 struct file_ra_state ra_state;
591 int error;
491 592
492 rgd->rd_gl->gl_object = rgd; 593 file_ra_state_init(&ra_state, inode->i_mapping);
493 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1; 594 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
595 /* Ignore partials */
596 if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
597 ip->i_di.di_size)
598 break;
599 error = read_rindex_entry(ip, &ra_state);
600 if (error) {
601 clear_rgrpdi(sdp);
602 return error;
603 }
494 } 604 }
495 605
496 sdp->sd_rindex_vn = ip->i_gl->gl_vn; 606 sdp->sd_rindex_vn = ip->i_gl->gl_vn;
497 return 0; 607 return 0;
498
499fail:
500 clear_rgrpdi(sdp);
501 return error;
502} 608}
503 609
504/** 610/**
@@ -543,6 +649,28 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
543 return error; 649 return error;
544} 650}
545 651
652static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
653{
654 const struct gfs2_rgrp *str = buf;
655
656 rg->rg_flags = be32_to_cpu(str->rg_flags);
657 rg->rg_free = be32_to_cpu(str->rg_free);
658 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
659 rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
660}
661
662static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
663{
664 struct gfs2_rgrp *str = buf;
665
666 str->rg_flags = cpu_to_be32(rg->rg_flags);
667 str->rg_free = cpu_to_be32(rg->rg_free);
668 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
669 str->__pad = cpu_to_be32(0);
670 str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
671 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
672}
673
546/** 674/**
547 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps 675 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
548 * @rgd: the struct gfs2_rgrpd describing the RG to read in 676 * @rgd: the struct gfs2_rgrpd describing the RG to read in
@@ -557,7 +685,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
557{ 685{
558 struct gfs2_sbd *sdp = rgd->rd_sbd; 686 struct gfs2_sbd *sdp = rgd->rd_sbd;
559 struct gfs2_glock *gl = rgd->rd_gl; 687 struct gfs2_glock *gl = rgd->rd_gl;
560 unsigned int length = rgd->rd_ri.ri_length; 688 unsigned int length = rgd->rd_length;
561 struct gfs2_bitmap *bi; 689 struct gfs2_bitmap *bi;
562 unsigned int x, y; 690 unsigned int x, y;
563 int error; 691 int error;
@@ -575,7 +703,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
575 703
576 for (x = 0; x < length; x++) { 704 for (x = 0; x < length; x++) {
577 bi = rgd->rd_bits + x; 705 bi = rgd->rd_bits + x;
578 error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh); 706 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
579 if (error) 707 if (error)
580 goto fail; 708 goto fail;
581 } 709 }
@@ -637,7 +765,7 @@ void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
637void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd) 765void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
638{ 766{
639 struct gfs2_sbd *sdp = rgd->rd_sbd; 767 struct gfs2_sbd *sdp = rgd->rd_sbd;
640 int x, length = rgd->rd_ri.ri_length; 768 int x, length = rgd->rd_length;
641 769
642 spin_lock(&sdp->sd_rindex_spin); 770 spin_lock(&sdp->sd_rindex_spin);
643 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); 771 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
@@ -660,7 +788,7 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
660void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) 788void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
661{ 789{
662 struct gfs2_sbd *sdp = rgd->rd_sbd; 790 struct gfs2_sbd *sdp = rgd->rd_sbd;
663 unsigned int length = rgd->rd_ri.ri_length; 791 unsigned int length = rgd->rd_length;
664 unsigned int x; 792 unsigned int x;
665 793
666 for (x = 0; x < length; x++) { 794 for (x = 0; x < length; x++) {
@@ -722,6 +850,38 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
722} 850}
723 851
724/** 852/**
853 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
854 * @rgd: The rgrp
855 *
856 * Returns: The inode, if one has been found
857 */
858
859static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
860{
861 struct inode *inode;
862 u32 goal = 0;
863 u64 no_addr;
864
865 for(;;) {
866 goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
867 GFS2_BLKST_UNLINKED);
868 if (goal == 0)
869 return 0;
870 no_addr = goal + rgd->rd_data0;
871 if (no_addr <= *last_unlinked)
872 continue;
873 *last_unlinked = no_addr;
874 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
875 no_addr, -1);
876 if (!IS_ERR(inode))
877 return inode;
878 }
879
880 rgd->rd_flags &= ~GFS2_RDF_CHECK;
881 return NULL;
882}
883
884/**
725 * recent_rgrp_first - get first RG from "recent" list 885 * recent_rgrp_first - get first RG from "recent" list
726 * @sdp: The GFS2 superblock 886 * @sdp: The GFS2 superblock
727 * @rglast: address of the rgrp used last 887 * @rglast: address of the rgrp used last
@@ -743,7 +903,7 @@ static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
743 goto first; 903 goto first;
744 904
745 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { 905 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
746 if (rgd->rd_ri.ri_addr == rglast) 906 if (rgd->rd_addr == rglast)
747 goto out; 907 goto out;
748 } 908 }
749 909
@@ -882,8 +1042,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
882 * Returns: errno 1042 * Returns: errno
883 */ 1043 */
884 1044
885static int get_local_rgrp(struct gfs2_inode *ip) 1045static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
886{ 1046{
1047 struct inode *inode = NULL;
887 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1048 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
888 struct gfs2_rgrpd *rgd, *begin = NULL; 1049 struct gfs2_rgrpd *rgd, *begin = NULL;
889 struct gfs2_alloc *al = &ip->i_alloc; 1050 struct gfs2_alloc *al = &ip->i_alloc;
@@ -903,7 +1064,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
903 case 0: 1064 case 0:
904 if (try_rgrp_fit(rgd, al)) 1065 if (try_rgrp_fit(rgd, al))
905 goto out; 1066 goto out;
1067 if (rgd->rd_flags & GFS2_RDF_CHECK)
1068 inode = try_rgrp_unlink(rgd, last_unlinked);
906 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1069 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1070 if (inode)
1071 return inode;
907 rgd = recent_rgrp_next(rgd, 1); 1072 rgd = recent_rgrp_next(rgd, 1);
908 break; 1073 break;
909 1074
@@ -912,7 +1077,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
912 break; 1077 break;
913 1078
914 default: 1079 default:
915 return error; 1080 return ERR_PTR(error);
916 } 1081 }
917 } 1082 }
918 1083
@@ -927,7 +1092,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
927 case 0: 1092 case 0:
928 if (try_rgrp_fit(rgd, al)) 1093 if (try_rgrp_fit(rgd, al))
929 goto out; 1094 goto out;
1095 if (rgd->rd_flags & GFS2_RDF_CHECK)
1096 inode = try_rgrp_unlink(rgd, last_unlinked);
930 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1097 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1098 if (inode)
1099 return inode;
931 break; 1100 break;
932 1101
933 case GLR_TRYFAILED: 1102 case GLR_TRYFAILED:
@@ -935,7 +1104,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
935 break; 1104 break;
936 1105
937 default: 1106 default:
938 return error; 1107 return ERR_PTR(error);
939 } 1108 }
940 1109
941 rgd = gfs2_rgrpd_get_next(rgd); 1110 rgd = gfs2_rgrpd_get_next(rgd);
@@ -944,7 +1113,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
944 1113
945 if (rgd == begin) { 1114 if (rgd == begin) {
946 if (++loops >= 3) 1115 if (++loops >= 3)
947 return -ENOSPC; 1116 return ERR_PTR(-ENOSPC);
948 if (!skipped) 1117 if (!skipped)
949 loops++; 1118 loops++;
950 flags = 0; 1119 flags = 0;
@@ -954,7 +1123,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
954 } 1123 }
955 1124
956out: 1125out:
957 ip->i_last_rg_alloc = rgd->rd_ri.ri_addr; 1126 ip->i_last_rg_alloc = rgd->rd_addr;
958 1127
959 if (begin) { 1128 if (begin) {
960 recent_rgrp_add(rgd); 1129 recent_rgrp_add(rgd);
@@ -964,7 +1133,7 @@ out:
964 forward_rgrp_set(sdp, rgd); 1133 forward_rgrp_set(sdp, rgd);
965 } 1134 }
966 1135
967 return 0; 1136 return NULL;
968} 1137}
969 1138
970/** 1139/**
@@ -978,19 +1147,33 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
978{ 1147{
979 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1148 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
980 struct gfs2_alloc *al = &ip->i_alloc; 1149 struct gfs2_alloc *al = &ip->i_alloc;
981 int error; 1150 struct inode *inode;
1151 int error = 0;
1152 u64 last_unlinked = 0;
982 1153
983 if (gfs2_assert_warn(sdp, al->al_requested)) 1154 if (gfs2_assert_warn(sdp, al->al_requested))
984 return -EINVAL; 1155 return -EINVAL;
985 1156
986 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 1157try_again:
1158 /* We need to hold the rindex unless the inode we're using is
1159 the rindex itself, in which case it's already held. */
1160 if (ip != GFS2_I(sdp->sd_rindex))
1161 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1162 else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
1163 error = gfs2_ri_update_special(ip);
1164
987 if (error) 1165 if (error)
988 return error; 1166 return error;
989 1167
990 error = get_local_rgrp(ip); 1168 inode = get_local_rgrp(ip, &last_unlinked);
991 if (error) { 1169 if (inode) {
992 gfs2_glock_dq_uninit(&al->al_ri_gh); 1170 if (ip != GFS2_I(sdp->sd_rindex))
993 return error; 1171 gfs2_glock_dq_uninit(&al->al_ri_gh);
1172 if (IS_ERR(inode))
1173 return PTR_ERR(inode);
1174 iput(inode);
1175 gfs2_log_flush(sdp, NULL);
1176 goto try_again;
994 } 1177 }
995 1178
996 al->al_file = file; 1179 al->al_file = file;
@@ -1019,7 +1202,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
1019 1202
1020 al->al_rgd = NULL; 1203 al->al_rgd = NULL;
1021 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1204 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1022 gfs2_glock_dq_uninit(&al->al_ri_gh); 1205 if (ip != GFS2_I(sdp->sd_rindex))
1206 gfs2_glock_dq_uninit(&al->al_ri_gh);
1023} 1207}
1024 1208
1025/** 1209/**
@@ -1037,8 +1221,8 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1037 unsigned int buf; 1221 unsigned int buf;
1038 unsigned char type; 1222 unsigned char type;
1039 1223
1040 length = rgd->rd_ri.ri_length; 1224 length = rgd->rd_length;
1041 rgrp_block = block - rgd->rd_ri.ri_data0; 1225 rgrp_block = block - rgd->rd_data0;
1042 1226
1043 for (buf = 0; buf < length; buf++) { 1227 for (buf = 0; buf < length; buf++) {
1044 bi = rgd->rd_bits + buf; 1228 bi = rgd->rd_bits + buf;
@@ -1077,10 +1261,10 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1077 */ 1261 */
1078 1262
1079static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 1263static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1080 unsigned char old_state, unsigned char new_state) 1264 unsigned char old_state, unsigned char new_state)
1081{ 1265{
1082 struct gfs2_bitmap *bi = NULL; 1266 struct gfs2_bitmap *bi = NULL;
1083 u32 length = rgd->rd_ri.ri_length; 1267 u32 length = rgd->rd_length;
1084 u32 blk = 0; 1268 u32 blk = 0;
1085 unsigned int buf, x; 1269 unsigned int buf, x;
1086 1270
@@ -1118,17 +1302,18 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1118 goal = 0; 1302 goal = 0;
1119 } 1303 }
1120 1304
1121 if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length)) 1305 if (old_state != new_state) {
1122 blk = 0; 1306 gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
1123 1307
1124 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1308 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1125 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1309 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1126 bi->bi_len, blk, new_state);
1127 if (bi->bi_clone)
1128 gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
1129 bi->bi_len, blk, new_state); 1310 bi->bi_len, blk, new_state);
1311 if (bi->bi_clone)
1312 gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
1313 bi->bi_len, blk, new_state);
1314 }
1130 1315
1131 return bi->bi_start * GFS2_NBBY + blk; 1316 return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk;
1132} 1317}
1133 1318
1134/** 1319/**
@@ -1156,9 +1341,9 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1156 return NULL; 1341 return NULL;
1157 } 1342 }
1158 1343
1159 length = rgd->rd_ri.ri_length; 1344 length = rgd->rd_length;
1160 1345
1161 rgrp_blk = bstart - rgd->rd_ri.ri_data0; 1346 rgrp_blk = bstart - rgd->rd_data0;
1162 1347
1163 while (blen--) { 1348 while (blen--) {
1164 for (buf = 0; buf < length; buf++) { 1349 for (buf = 0; buf < length; buf++) {
@@ -1202,15 +1387,15 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
1202 u32 goal, blk; 1387 u32 goal, blk;
1203 u64 block; 1388 u64 block;
1204 1389
1205 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data)) 1390 if (rgrp_contains_block(rgd, ip->i_di.di_goal_data))
1206 goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0; 1391 goal = ip->i_di.di_goal_data - rgd->rd_data0;
1207 else 1392 else
1208 goal = rgd->rd_last_alloc_data; 1393 goal = rgd->rd_last_alloc_data;
1209 1394
1210 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); 1395 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1211 rgd->rd_last_alloc_data = blk; 1396 rgd->rd_last_alloc_data = blk;
1212 1397
1213 block = rgd->rd_ri.ri_data0 + blk; 1398 block = rgd->rd_data0 + blk;
1214 ip->i_di.di_goal_data = block; 1399 ip->i_di.di_goal_data = block;
1215 1400
1216 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); 1401 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1246,15 +1431,15 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
1246 u32 goal, blk; 1431 u32 goal, blk;
1247 u64 block; 1432 u64 block;
1248 1433
1249 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta)) 1434 if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta))
1250 goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0; 1435 goal = ip->i_di.di_goal_meta - rgd->rd_data0;
1251 else 1436 else
1252 goal = rgd->rd_last_alloc_meta; 1437 goal = rgd->rd_last_alloc_meta;
1253 1438
1254 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); 1439 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1255 rgd->rd_last_alloc_meta = blk; 1440 rgd->rd_last_alloc_meta = blk;
1256 1441
1257 block = rgd->rd_ri.ri_data0 + blk; 1442 block = rgd->rd_data0 + blk;
1258 ip->i_di.di_goal_meta = block; 1443 ip->i_di.di_goal_meta = block;
1259 1444
1260 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); 1445 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1296,7 +1481,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1296 1481
1297 rgd->rd_last_alloc_meta = blk; 1482 rgd->rd_last_alloc_meta = blk;
1298 1483
1299 block = rgd->rd_ri.ri_data0 + blk; 1484 block = rgd->rd_data0 + blk;
1300 1485
1301 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); 1486 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1302 rgd->rd_rg.rg_free--; 1487 rgd->rd_rg.rg_free--;
@@ -1379,7 +1564,7 @@ void gfs2_unlink_di(struct inode *inode)
1379 struct gfs2_inode *ip = GFS2_I(inode); 1564 struct gfs2_inode *ip = GFS2_I(inode);
1380 struct gfs2_sbd *sdp = GFS2_SB(inode); 1565 struct gfs2_sbd *sdp = GFS2_SB(inode);
1381 struct gfs2_rgrpd *rgd; 1566 struct gfs2_rgrpd *rgd;
1382 u64 blkno = ip->i_num.no_addr; 1567 u64 blkno = ip->i_no_addr;
1383 1568
1384 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); 1569 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
1385 if (!rgd) 1570 if (!rgd)
@@ -1414,9 +1599,9 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1414 1599
1415void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1600void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1416{ 1601{
1417 gfs2_free_uninit_di(rgd, ip->i_num.no_addr); 1602 gfs2_free_uninit_di(rgd, ip->i_no_addr);
1418 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 1603 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
1419 gfs2_meta_wipe(ip, ip->i_num.no_addr, 1); 1604 gfs2_meta_wipe(ip, ip->i_no_addr, 1);
1420} 1605}
1421 1606
1422/** 1607/**
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b01e0cfc99b5..b4c6adfc6f2e 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -65,5 +65,6 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
65void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, 65void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
66 int flags); 66 int flags);
67void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); 67void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
68u64 gfs2_ri_total(struct gfs2_sbd *sdp);
68 69
69#endif /* __RGRP_DOT_H__ */ 70#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4fdda974dc83..f916b9740c75 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -95,8 +95,8 @@ int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
95{ 95{
96 unsigned int x; 96 unsigned int x;
97 97
98 if (sb->sb_header.mh_magic != GFS2_MAGIC || 98 if (sb->sb_magic != GFS2_MAGIC ||
99 sb->sb_header.mh_type != GFS2_METATYPE_SB) { 99 sb->sb_type != GFS2_METATYPE_SB) {
100 if (!silent) 100 if (!silent)
101 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); 101 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
102 return -EINVAL; 102 return -EINVAL;
@@ -174,10 +174,31 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
174 return 0; 174 return 0;
175} 175}
176 176
177static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
178{
179 const struct gfs2_sb *str = buf;
180
181 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
182 sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
183 sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
184 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
185 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
186 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
187 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
188 sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
189 sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
190 sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
191 sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
192
193 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
194 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
195}
196
177/** 197/**
178 * gfs2_read_super - Read the gfs2 super block from disk 198 * gfs2_read_super - Read the gfs2 super block from disk
179 * @sb: The VFS super block 199 * @sdp: The GFS2 super block
180 * @sector: The location of the super block 200 * @sector: The location of the super block
201 * @error: The error code to return
181 * 202 *
182 * This uses the bio functions to read the super block from disk 203 * This uses the bio functions to read the super block from disk
183 * because we want to be 100% sure that we never read cached data. 204 * because we want to be 100% sure that we never read cached data.
@@ -189,17 +210,19 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
189 * the master directory (contains pointers to journals etc) and the 210 * the master directory (contains pointers to journals etc) and the
190 * root directory. 211 * root directory.
191 * 212 *
192 * Returns: A page containing the sb or NULL 213 * Returns: 0 on success or error
193 */ 214 */
194 215
195struct page *gfs2_read_super(struct super_block *sb, sector_t sector) 216int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
196{ 217{
218 struct super_block *sb = sdp->sd_vfs;
219 struct gfs2_sb *p;
197 struct page *page; 220 struct page *page;
198 struct bio *bio; 221 struct bio *bio;
199 222
200 page = alloc_page(GFP_KERNEL); 223 page = alloc_page(GFP_KERNEL);
201 if (unlikely(!page)) 224 if (unlikely(!page))
202 return NULL; 225 return -ENOBUFS;
203 226
204 ClearPageUptodate(page); 227 ClearPageUptodate(page);
205 ClearPageDirty(page); 228 ClearPageDirty(page);
@@ -208,7 +231,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
208 bio = bio_alloc(GFP_KERNEL, 1); 231 bio = bio_alloc(GFP_KERNEL, 1);
209 if (unlikely(!bio)) { 232 if (unlikely(!bio)) {
210 __free_page(page); 233 __free_page(page);
211 return NULL; 234 return -ENOBUFS;
212 } 235 }
213 236
214 bio->bi_sector = sector * (sb->s_blocksize >> 9); 237 bio->bi_sector = sector * (sb->s_blocksize >> 9);
@@ -222,9 +245,13 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
222 bio_put(bio); 245 bio_put(bio);
223 if (!PageUptodate(page)) { 246 if (!PageUptodate(page)) {
224 __free_page(page); 247 __free_page(page);
225 return NULL; 248 return -EIO;
226 } 249 }
227 return page; 250 p = kmap(page);
251 gfs2_sb_in(&sdp->sd_sb, p);
252 kunmap(page);
253 __free_page(page);
254 return 0;
228} 255}
229 256
230/** 257/**
@@ -241,19 +268,13 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
241 u32 tmp_blocks; 268 u32 tmp_blocks;
242 unsigned int x; 269 unsigned int x;
243 int error; 270 int error;
244 struct page *page;
245 char *sb;
246 271
247 page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 272 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
248 if (!page) { 273 if (error) {
249 if (!silent) 274 if (!silent)
250 fs_err(sdp, "can't read superblock\n"); 275 fs_err(sdp, "can't read superblock\n");
251 return -EIO; 276 return error;
252 } 277 }
253 sb = kmap(page);
254 gfs2_sb_in(&sdp->sd_sb, sb);
255 kunmap(page);
256 __free_page(page);
257 278
258 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); 279 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
259 if (error) 280 if (error)
@@ -360,7 +381,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
360 name.len = sprintf(buf, "journal%u", sdp->sd_journals); 381 name.len = sprintf(buf, "journal%u", sdp->sd_journals);
361 name.hash = gfs2_disk_hash(name.name, name.len); 382 name.hash = gfs2_disk_hash(name.name, name.len);
362 383
363 error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL); 384 error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
364 if (error == -ENOENT) { 385 if (error == -ENOENT) {
365 error = 0; 386 error = 0;
366 break; 387 break;
@@ -593,6 +614,24 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
593 return error; 614 return error;
594} 615}
595 616
617static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
618{
619 const struct gfs2_statfs_change *str = buf;
620
621 sc->sc_total = be64_to_cpu(str->sc_total);
622 sc->sc_free = be64_to_cpu(str->sc_free);
623 sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
624}
625
626static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
627{
628 struct gfs2_statfs_change *str = buf;
629
630 str->sc_total = cpu_to_be64(sc->sc_total);
631 str->sc_free = cpu_to_be64(sc->sc_free);
632 str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
633}
634
596int gfs2_statfs_init(struct gfs2_sbd *sdp) 635int gfs2_statfs_init(struct gfs2_sbd *sdp)
597{ 636{
598 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 637 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -772,7 +811,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
772 struct gfs2_statfs_change_host *sc) 811 struct gfs2_statfs_change_host *sc)
773{ 812{
774 gfs2_rgrp_verify(rgd); 813 gfs2_rgrp_verify(rgd);
775 sc->sc_total += rgd->rd_ri.ri_data; 814 sc->sc_total += rgd->rd_data;
776 sc->sc_free += rgd->rd_rg.rg_free; 815 sc->sc_free += rgd->rd_rg.rg_free;
777 sc->sc_dinodes += rgd->rd_rg.rg_dinodes; 816 sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
778 return 0; 817 return 0;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index e590b2df11dc..60a870e430be 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -16,7 +16,7 @@ void gfs2_tune_init(struct gfs2_tune *gt);
16 16
17int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); 17int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
18int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); 18int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
19struct page *gfs2_read_super(struct super_block *sb, sector_t sector); 19int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
20 20
21static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) 21static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
22{ 22{
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 601eaa1b9ed6..424a0774eda8 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -115,8 +115,8 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
115 "GFS2: fsid=%s: inode = %llu %llu\n" 115 "GFS2: fsid=%s: inode = %llu %llu\n"
116 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", 116 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
117 sdp->sd_fsname, 117 sdp->sd_fsname,
118 sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino, 118 sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino,
119 (unsigned long long)ip->i_num.no_addr, 119 (unsigned long long)ip->i_no_addr,
120 sdp->sd_fsname, function, file, line); 120 sdp->sd_fsname, function, file, line);
121 return rv; 121 return rv;
122} 122}
@@ -137,7 +137,7 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
137 "GFS2: fsid=%s: RG = %llu\n" 137 "GFS2: fsid=%s: RG = %llu\n"
138 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", 138 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
139 sdp->sd_fsname, 139 sdp->sd_fsname,
140 sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr, 140 sdp->sd_fsname, (unsigned long long)rgd->rd_addr,
141 sdp->sd_fsname, function, file, line); 141 sdp->sd_fsname, function, file, line);
142 return rv; 142 return rv;
143} 143}
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9a934db0bd8a..bc835f272a6e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -607,7 +607,7 @@ static const struct file_operations hfs_file_operations = {
607 .write = do_sync_write, 607 .write = do_sync_write,
608 .aio_write = generic_file_aio_write, 608 .aio_write = generic_file_aio_write,
609 .mmap = generic_file_mmap, 609 .mmap = generic_file_mmap,
610 .sendfile = generic_file_sendfile, 610 .splice_read = generic_file_splice_read,
611 .fsync = file_fsync, 611 .fsync = file_fsync,
612 .open = hfs_file_open, 612 .open = hfs_file_open,
613 .release = hfs_file_release, 613 .release = hfs_file_release,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 45dab5d6cc10..409ce5429c91 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -288,7 +288,7 @@ static const struct file_operations hfsplus_file_operations = {
288 .write = do_sync_write, 288 .write = do_sync_write,
289 .aio_write = generic_file_aio_write, 289 .aio_write = generic_file_aio_write,
290 .mmap = generic_file_mmap, 290 .mmap = generic_file_mmap,
291 .sendfile = generic_file_sendfile, 291 .splice_read = generic_file_splice_read,
292 .fsync = file_fsync, 292 .fsync = file_fsync,
293 .open = hfsplus_file_open, 293 .open = hfsplus_file_open,
294 .release = hfsplus_file_release, 294 .release = hfsplus_file_release,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 8286491dbf31..c77862032e84 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -390,7 +390,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
390static const struct file_operations hostfs_file_fops = { 390static const struct file_operations hostfs_file_fops = {
391 .llseek = generic_file_llseek, 391 .llseek = generic_file_llseek,
392 .read = do_sync_read, 392 .read = do_sync_read,
393 .sendfile = generic_file_sendfile, 393 .splice_read = generic_file_splice_read,
394 .aio_read = generic_file_aio_read, 394 .aio_read = generic_file_aio_read,
395 .aio_write = generic_file_aio_write, 395 .aio_write = generic_file_aio_write,
396 .write = do_sync_write, 396 .write = do_sync_write,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index b4eafc0f1e54..5b53e5c5d8df 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -129,7 +129,7 @@ const struct file_operations hpfs_file_ops =
129 .mmap = generic_file_mmap, 129 .mmap = generic_file_mmap,
130 .release = hpfs_file_release, 130 .release = hpfs_file_release,
131 .fsync = hpfs_file_fsync, 131 .fsync = hpfs_file_fsync,
132 .sendfile = generic_file_sendfile, 132 .splice_read = generic_file_splice_read,
133}; 133};
134 134
135const struct inode_operations hpfs_file_iops = 135const struct inode_operations hpfs_file_iops =
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aa083dd34e92..e6b46b3ac2fe 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -736,15 +736,13 @@ static int can_do_hugetlb_shm(void)
736 can_do_mlock()); 736 can_do_mlock());
737} 737}
738 738
739struct file *hugetlb_zero_setup(size_t size) 739struct file *hugetlb_file_setup(const char *name, size_t size)
740{ 740{
741 int error = -ENOMEM; 741 int error = -ENOMEM;
742 struct file *file; 742 struct file *file;
743 struct inode *inode; 743 struct inode *inode;
744 struct dentry *dentry, *root; 744 struct dentry *dentry, *root;
745 struct qstr quick_string; 745 struct qstr quick_string;
746 char buf[16];
747 static atomic_t counter;
748 746
749 if (!hugetlbfs_vfsmount) 747 if (!hugetlbfs_vfsmount)
750 return ERR_PTR(-ENOENT); 748 return ERR_PTR(-ENOENT);
@@ -756,8 +754,7 @@ struct file *hugetlb_zero_setup(size_t size)
756 return ERR_PTR(-ENOMEM); 754 return ERR_PTR(-ENOMEM);
757 755
758 root = hugetlbfs_vfsmount->mnt_root; 756 root = hugetlbfs_vfsmount->mnt_root;
759 snprintf(buf, 16, "%u", atomic_inc_return(&counter)); 757 quick_string.name = name;
760 quick_string.name = buf;
761 quick_string.len = strlen(quick_string.name); 758 quick_string.len = strlen(quick_string.name);
762 quick_string.hash = 0; 759 quick_string.hash = 0;
763 dentry = d_alloc(root, &quick_string); 760 dentry = d_alloc(root, &quick_string);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 479c1038ed4a..8c90cbc903fa 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -12,6 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/security.h> 13#include <linux/security.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/kallsyms.h>
15 16
16#include <asm/uaccess.h> 17#include <asm/uaccess.h>
17#include <asm/ioctls.h> 18#include <asm/ioctls.h>
@@ -20,6 +21,7 @@ static long do_ioctl(struct file *filp, unsigned int cmd,
20 unsigned long arg) 21 unsigned long arg)
21{ 22{
22 int error = -ENOTTY; 23 int error = -ENOTTY;
24 void *f;
23 25
24 if (!filp->f_op) 26 if (!filp->f_op)
25 goto out; 27 goto out;
@@ -29,10 +31,16 @@ static long do_ioctl(struct file *filp, unsigned int cmd,
29 if (error == -ENOIOCTLCMD) 31 if (error == -ENOIOCTLCMD)
30 error = -EINVAL; 32 error = -EINVAL;
31 goto out; 33 goto out;
32 } else if (filp->f_op->ioctl) { 34 } else if ((f = filp->f_op->ioctl)) {
33 lock_kernel(); 35 lock_kernel();
34 error = filp->f_op->ioctl(filp->f_path.dentry->d_inode, 36 if (!filp->f_op->ioctl) {
35 filp, cmd, arg); 37 printk("%s: ioctl %p disappeared\n", __FUNCTION__, f);
38 print_symbol("symbol: %s\n", (unsigned long)f);
39 dump_stack();
40 } else {
41 error = filp->f_op->ioctl(filp->f_path.dentry->d_inode,
42 filp, cmd, arg);
43 }
36 unlock_kernel(); 44 unlock_kernel();
37 } 45 }
38 46
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 99871279a1ed..c2530197be0c 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -47,7 +47,7 @@ const struct file_operations jffs2_file_operations =
47 .ioctl = jffs2_ioctl, 47 .ioctl = jffs2_ioctl,
48 .mmap = generic_file_readonly_mmap, 48 .mmap = generic_file_readonly_mmap,
49 .fsync = jffs2_fsync, 49 .fsync = jffs2_fsync,
50 .sendfile = generic_file_sendfile 50 .splice_read = generic_file_splice_read,
51}; 51};
52 52
53/* jffs2_file_inode_operations */ 53/* jffs2_file_inode_operations */
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 4884d5edfe65..7b363786c2d2 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -210,8 +210,7 @@ static void jffs2_kill_tn(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *
210 * offset, and the one with the smallest length will come first in the 210 * offset, and the one with the smallest length will come first in the
211 * ordering. 211 * ordering.
212 * 212 *
213 * Returns 0 if the node was inserted 213 * Returns 0 if the node was handled (including marking it obsolete)
214 * 1 if the node is obsolete (because we can't mark it so yet)
215 * < 0 an if error occurred 214 * < 0 an if error occurred
216 */ 215 */
217static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, 216static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
@@ -229,9 +228,16 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
229 check anyway. */ 228 check anyway. */
230 if (!tn->fn->size) { 229 if (!tn->fn->size) {
231 if (rii->mdata_tn) { 230 if (rii->mdata_tn) {
232 /* We had a candidate mdata node already */ 231 if (rii->mdata_tn->version < tn->version) {
233 dbg_readinode("kill old mdata with ver %d\n", rii->mdata_tn->version); 232 /* We had a candidate mdata node already */
234 jffs2_kill_tn(c, rii->mdata_tn); 233 dbg_readinode("kill old mdata with ver %d\n", rii->mdata_tn->version);
234 jffs2_kill_tn(c, rii->mdata_tn);
235 } else {
236 dbg_readinode("kill new mdata with ver %d (older than existing %d\n",
237 tn->version, rii->mdata_tn->version);
238 jffs2_kill_tn(c, tn);
239 return 0;
240 }
235 } 241 }
236 rii->mdata_tn = tn; 242 rii->mdata_tn = tn;
237 dbg_readinode("keep new mdata with ver %d\n", tn->version); 243 dbg_readinode("keep new mdata with ver %d\n", tn->version);
@@ -565,8 +571,7 @@ static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_r
565 * Helper function for jffs2_get_inode_nodes(). 571 * Helper function for jffs2_get_inode_nodes().
566 * It is called every time an directory entry node is found. 572 * It is called every time an directory entry node is found.
567 * 573 *
568 * Returns: 0 on succes; 574 * Returns: 0 on success;
569 * 1 if the node should be marked obsolete;
570 * negative error code on failure. 575 * negative error code on failure.
571 */ 576 */
572static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, 577static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
@@ -673,8 +678,7 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
673 * Helper function for jffs2_get_inode_nodes(). 678 * Helper function for jffs2_get_inode_nodes().
674 * It is called every time an inode node is found. 679 * It is called every time an inode node is found.
675 * 680 *
676 * Returns: 0 on success; 681 * Returns: 0 on success (possibly after marking a bad node obsolete);
677 * 1 if the node should be marked obsolete;
678 * negative error code on failure. 682 * negative error code on failure.
679 */ 683 */
680static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, 684static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
@@ -683,7 +687,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
683{ 687{
684 struct jffs2_tmp_dnode_info *tn; 688 struct jffs2_tmp_dnode_info *tn;
685 uint32_t len, csize; 689 uint32_t len, csize;
686 int ret = 1; 690 int ret = 0;
687 uint32_t crc; 691 uint32_t crc;
688 692
689 /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */ 693 /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
@@ -712,8 +716,9 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
712 /* Sanity checks */ 716 /* Sanity checks */
713 if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) || 717 if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
714 unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) { 718 unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
715 JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref)); 719 JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
716 jffs2_dbg_dump_node(c, ref_offset(ref)); 720 jffs2_dbg_dump_node(c, ref_offset(ref));
721 jffs2_mark_node_obsolete(c, ref);
717 goto free_out; 722 goto free_out;
718 } 723 }
719 724
@@ -768,6 +773,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
768 if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) { 773 if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) {
769 JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n", 774 JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
770 ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc)); 775 ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc));
776 jffs2_mark_node_obsolete(c, ref);
771 goto free_out; 777 goto free_out;
772 } 778 }
773 779
@@ -847,7 +853,6 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
847 * It is called every time an unknown node is found. 853 * It is called every time an unknown node is found.
848 * 854 *
849 * Returns: 0 on success; 855 * Returns: 0 on success;
850 * 1 if the node should be marked obsolete;
851 * negative error code on failure. 856 * negative error code on failure.
852 */ 857 */
853static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un) 858static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
@@ -1044,7 +1049,8 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
1044 1049
1045 case JFFS2_NODETYPE_DIRENT: 1050 case JFFS2_NODETYPE_DIRENT:
1046 1051
1047 if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) { 1052 if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent) &&
1053 len < sizeof(struct jffs2_raw_dirent)) {
1048 err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf); 1054 err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf);
1049 if (unlikely(err)) 1055 if (unlikely(err))
1050 goto free_out; 1056 goto free_out;
@@ -1058,7 +1064,8 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
1058 1064
1059 case JFFS2_NODETYPE_INODE: 1065 case JFFS2_NODETYPE_INODE:
1060 1066
1061 if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) { 1067 if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode) &&
1068 len < sizeof(struct jffs2_raw_inode)) {
1062 err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf); 1069 err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf);
1063 if (unlikely(err)) 1070 if (unlikely(err))
1064 goto free_out; 1071 goto free_out;
@@ -1071,17 +1078,15 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
1071 break; 1078 break;
1072 1079
1073 default: 1080 default:
1074 if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) { 1081 if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node) &&
1082 len < sizeof(struct jffs2_unknown_node)) {
1075 err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf); 1083 err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf);
1076 if (unlikely(err)) 1084 if (unlikely(err))
1077 goto free_out; 1085 goto free_out;
1078 } 1086 }
1079 1087
1080 err = read_unknown(c, ref, &node->u); 1088 err = read_unknown(c, ref, &node->u);
1081 if (err == 1) { 1089 if (unlikely(err))
1082 jffs2_mark_node_obsolete(c, ref);
1083 break;
1084 } else if (unlikely(err))
1085 goto free_out; 1090 goto free_out;
1086 1091
1087 } 1092 }
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 6488af43bc9b..e220d3bd610d 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -19,7 +19,7 @@
19#include <linux/mount.h> 19#include <linux/mount.h>
20#include <linux/jffs2.h> 20#include <linux/jffs2.h>
21#include <linux/pagemap.h> 21#include <linux/pagemap.h>
22#include <linux/mtd/mtd.h> 22#include <linux/mtd/super.h>
23#include <linux/ctype.h> 23#include <linux/ctype.h>
24#include <linux/namei.h> 24#include <linux/namei.h>
25#include "compr.h" 25#include "compr.h"
@@ -75,69 +75,27 @@ static const struct super_operations jffs2_super_operations =
75 .sync_fs = jffs2_sync_fs, 75 .sync_fs = jffs2_sync_fs,
76}; 76};
77 77
78static int jffs2_sb_compare(struct super_block *sb, void *data) 78/*
79{ 79 * fill in the superblock
80 struct jffs2_sb_info *p = data; 80 */
81 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); 81static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
82
83 /* The superblocks are considered to be equivalent if the underlying MTD
84 device is the same one */
85 if (c->mtd == p->mtd) {
86 D1(printk(KERN_DEBUG "jffs2_sb_compare: match on device %d (\"%s\")\n", p->mtd->index, p->mtd->name));
87 return 1;
88 } else {
89 D1(printk(KERN_DEBUG "jffs2_sb_compare: No match, device %d (\"%s\"), device %d (\"%s\")\n",
90 c->mtd->index, c->mtd->name, p->mtd->index, p->mtd->name));
91 return 0;
92 }
93}
94
95static int jffs2_sb_set(struct super_block *sb, void *data)
96{
97 struct jffs2_sb_info *p = data;
98
99 /* For persistence of NFS exports etc. we use the same s_dev
100 each time we mount the device, don't just use an anonymous
101 device */
102 sb->s_fs_info = p;
103 p->os_priv = sb;
104 sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, p->mtd->index);
105
106 return 0;
107}
108
109static int jffs2_get_sb_mtd(struct file_system_type *fs_type,
110 int flags, const char *dev_name,
111 void *data, struct mtd_info *mtd,
112 struct vfsmount *mnt)
113{ 82{
114 struct super_block *sb;
115 struct jffs2_sb_info *c; 83 struct jffs2_sb_info *c;
116 int ret; 84
85 D1(printk(KERN_DEBUG "jffs2_get_sb_mtd():"
86 " New superblock for device %d (\"%s\")\n",
87 sb->s_mtd->index, sb->s_mtd->name));
117 88
118 c = kzalloc(sizeof(*c), GFP_KERNEL); 89 c = kzalloc(sizeof(*c), GFP_KERNEL);
119 if (!c) 90 if (!c)
120 return -ENOMEM; 91 return -ENOMEM;
121 c->mtd = mtd;
122
123 sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c);
124
125 if (IS_ERR(sb))
126 goto out_error;
127
128 if (sb->s_root) {
129 /* New mountpoint for JFFS2 which is already mounted */
130 D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): Device %d (\"%s\") is already mounted\n",
131 mtd->index, mtd->name));
132 ret = simple_set_mnt(mnt, sb);
133 goto out_put;
134 }
135 92
136 D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): New superblock for device %d (\"%s\")\n", 93 c->mtd = sb->s_mtd;
137 mtd->index, mtd->name)); 94 c->os_priv = sb;
95 sb->s_fs_info = c;
138 96
139 /* Initialize JFFS2 superblock locks, the further initialization will be 97 /* Initialize JFFS2 superblock locks, the further initialization will
140 * done later */ 98 * be done later */
141 init_MUTEX(&c->alloc_sem); 99 init_MUTEX(&c->alloc_sem);
142 init_MUTEX(&c->erase_free_sem); 100 init_MUTEX(&c->erase_free_sem);
143 init_waitqueue_head(&c->erase_wait); 101 init_waitqueue_head(&c->erase_wait);
@@ -146,133 +104,20 @@ static int jffs2_get_sb_mtd(struct file_system_type *fs_type,
146 spin_lock_init(&c->inocache_lock); 104 spin_lock_init(&c->inocache_lock);
147 105
148 sb->s_op = &jffs2_super_operations; 106 sb->s_op = &jffs2_super_operations;
149 sb->s_flags = flags | MS_NOATIME; 107 sb->s_flags = sb->s_flags | MS_NOATIME;
150 sb->s_xattr = jffs2_xattr_handlers; 108 sb->s_xattr = jffs2_xattr_handlers;
151#ifdef CONFIG_JFFS2_FS_POSIX_ACL 109#ifdef CONFIG_JFFS2_FS_POSIX_ACL
152 sb->s_flags |= MS_POSIXACL; 110 sb->s_flags |= MS_POSIXACL;
153#endif 111#endif
154 ret = jffs2_do_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 112 return jffs2_do_fill_super(sb, data, silent);
155
156 if (ret) {
157 /* Failure case... */
158 up_write(&sb->s_umount);
159 deactivate_super(sb);
160 return ret;
161 }
162
163 sb->s_flags |= MS_ACTIVE;
164 return simple_set_mnt(mnt, sb);
165
166out_error:
167 ret = PTR_ERR(sb);
168 out_put:
169 kfree(c);
170 put_mtd_device(mtd);
171
172 return ret;
173}
174
175static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type,
176 int flags, const char *dev_name,
177 void *data, int mtdnr,
178 struct vfsmount *mnt)
179{
180 struct mtd_info *mtd;
181
182 mtd = get_mtd_device(NULL, mtdnr);
183 if (IS_ERR(mtd)) {
184 D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr));
185 return PTR_ERR(mtd);
186 }
187
188 return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
189} 113}
190 114
191static int jffs2_get_sb(struct file_system_type *fs_type, 115static int jffs2_get_sb(struct file_system_type *fs_type,
192 int flags, const char *dev_name, 116 int flags, const char *dev_name,
193 void *data, struct vfsmount *mnt) 117 void *data, struct vfsmount *mnt)
194{ 118{
195 int err; 119 return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super,
196 struct nameidata nd; 120 mnt);
197 int mtdnr;
198
199 if (!dev_name)
200 return -EINVAL;
201
202 D1(printk(KERN_DEBUG "jffs2_get_sb(): dev_name \"%s\"\n", dev_name));
203
204 /* The preferred way of mounting in future; especially when
205 CONFIG_BLK_DEV is implemented - we specify the underlying
206 MTD device by number or by name, so that we don't require
207 block device support to be present in the kernel. */
208
209 /* FIXME: How to do the root fs this way? */
210
211 if (dev_name[0] == 'm' && dev_name[1] == 't' && dev_name[2] == 'd') {
212 /* Probably mounting without the blkdev crap */
213 if (dev_name[3] == ':') {
214 struct mtd_info *mtd;
215
216 /* Mount by MTD device name */
217 D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd:%%s, name \"%s\"\n", dev_name+4));
218 for (mtdnr = 0; mtdnr < MAX_MTD_DEVICES; mtdnr++) {
219 mtd = get_mtd_device(NULL, mtdnr);
220 if (!IS_ERR(mtd)) {
221 if (!strcmp(mtd->name, dev_name+4))
222 return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
223 put_mtd_device(mtd);
224 }
225 }
226 printk(KERN_NOTICE "jffs2_get_sb(): MTD device with name \"%s\" not found.\n", dev_name+4);
227 } else if (isdigit(dev_name[3])) {
228 /* Mount by MTD device number name */
229 char *endptr;
230
231 mtdnr = simple_strtoul(dev_name+3, &endptr, 0);
232 if (!*endptr) {
233 /* It was a valid number */
234 D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd%%d, mtdnr %d\n", mtdnr));
235 return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt);
236 }
237 }
238 }
239
240 /* Try the old way - the hack where we allowed users to mount
241 /dev/mtdblock$(n) but didn't actually _use_ the blkdev */
242
243 err = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
244
245 D1(printk(KERN_DEBUG "jffs2_get_sb(): path_lookup() returned %d, inode %p\n",
246 err, nd.dentry->d_inode));
247
248 if (err)
249 return err;
250
251 err = -EINVAL;
252
253 if (!S_ISBLK(nd.dentry->d_inode->i_mode))
254 goto out;
255
256 if (nd.mnt->mnt_flags & MNT_NODEV) {
257 err = -EACCES;
258 goto out;
259 }
260
261 if (imajor(nd.dentry->d_inode) != MTD_BLOCK_MAJOR) {
262 if (!(flags & MS_SILENT))
263 printk(KERN_NOTICE "Attempt to mount non-MTD device \"%s\" as JFFS2\n",
264 dev_name);
265 goto out;
266 }
267
268 mtdnr = iminor(nd.dentry->d_inode);
269 path_release(&nd);
270
271 return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt);
272
273out:
274 path_release(&nd);
275 return err;
276} 121}
277 122
278static void jffs2_put_super (struct super_block *sb) 123static void jffs2_put_super (struct super_block *sb)
@@ -307,8 +152,7 @@ static void jffs2_kill_sb(struct super_block *sb)
307 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); 152 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
308 if (!(sb->s_flags & MS_RDONLY)) 153 if (!(sb->s_flags & MS_RDONLY))
309 jffs2_stop_garbage_collect_thread(c); 154 jffs2_stop_garbage_collect_thread(c);
310 generic_shutdown_super(sb); 155 kill_mtd_super(sb);
311 put_mtd_device(c->mtd);
312 kfree(c); 156 kfree(c);
313} 157}
314 158
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 78fc08893a6c..e48665984cb3 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -754,6 +754,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
754 list_del(&xd->xindex); 754 list_del(&xd->xindex);
755 jffs2_free_xattr_datum(xd); 755 jffs2_free_xattr_datum(xd);
756 } 756 }
757 list_for_each_entry_safe(xd, _xd, &c->xattr_unchecked, xindex) {
758 list_del(&xd->xindex);
759 jffs2_free_xattr_datum(xd);
760 }
757} 761}
758 762
759#define XREF_TMPHASH_SIZE (128) 763#define XREF_TMPHASH_SIZE (128)
@@ -825,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
825 ref->xd and ref->ic are not valid yet. */ 829 ref->xd and ref->ic are not valid yet. */
826 xd = jffs2_find_xattr_datum(c, ref->xid); 830 xd = jffs2_find_xattr_datum(c, ref->xid);
827 ic = jffs2_get_ino_cache(c, ref->ino); 831 ic = jffs2_get_ino_cache(c, ref->ino);
828 if (!xd || !ic) { 832 if (!xd || !ic || !ic->nlink) {
829 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n", 833 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n",
830 ref->ino, ref->xid, ref->xseqno); 834 ref->ino, ref->xid, ref->xseqno);
831 ref->xseqno |= XREF_DELETE_MARKER; 835 ref->xseqno |= XREF_DELETE_MARKER;
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h
index 79494c4f2b10..fa92f7f1d0d0 100644
--- a/fs/jfs/endian24.h
+++ b/fs/jfs/endian24.h
@@ -29,7 +29,7 @@
29 __u32 __x = (x); \ 29 __u32 __x = (x); \
30 ((__u32)( \ 30 ((__u32)( \
31 ((__x & (__u32)0x000000ffUL) << 16) | \ 31 ((__x & (__u32)0x000000ffUL) << 16) | \
32 (__x & (__u32)0x0000ff00UL) | \ 32 (__x & (__u32)0x0000ff00UL) | \
33 ((__x & (__u32)0x00ff0000UL) >> 16) )); \ 33 ((__x & (__u32)0x00ff0000UL) >> 16) )); \
34}) 34})
35 35
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index f7f8eff19b7b..87eb93694af7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -108,7 +108,6 @@ const struct file_operations jfs_file_operations = {
108 .aio_read = generic_file_aio_read, 108 .aio_read = generic_file_aio_read,
109 .aio_write = generic_file_aio_write, 109 .aio_write = generic_file_aio_write,
110 .mmap = generic_file_mmap, 110 .mmap = generic_file_mmap,
111 .sendfile = generic_file_sendfile,
112 .splice_read = generic_file_splice_read, 111 .splice_read = generic_file_splice_read,
113 .splice_write = generic_file_splice_write, 112 .splice_write = generic_file_splice_write,
114 .fsync = jfs_fsync, 113 .fsync = jfs_fsync,
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 9c5d59632aac..887f5759e536 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -26,34 +26,6 @@
26#include "jfs_filsys.h" 26#include "jfs_filsys.h"
27#include "jfs_debug.h" 27#include "jfs_debug.h"
28 28
29#ifdef CONFIG_JFS_DEBUG
30void dump_mem(char *label, void *data, int length)
31{
32 int i, j;
33 int *intptr = data;
34 char *charptr = data;
35 char buf[10], line[80];
36
37 printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length,
38 data);
39 for (i = 0; i < length; i += 16) {
40 line[0] = 0;
41 for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
42 sprintf(buf, " %08x", intptr[i / 4 + j]);
43 strcat(line, buf);
44 }
45 buf[0] = ' ';
46 buf[2] = 0;
47 for (j = 0; (j < 16) && (i + j < length); j++) {
48 buf[1] =
49 isprint(charptr[i + j]) ? charptr[i + j] : '.';
50 strcat(line, buf);
51 }
52 printk("%s\n", line);
53 }
54}
55#endif
56
57#ifdef PROC_FS_JFS /* see jfs_debug.h */ 29#ifdef PROC_FS_JFS /* see jfs_debug.h */
58 30
59static struct proc_dir_entry *base; 31static struct proc_dir_entry *base;
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index 7378798f0b21..044c1e654cc0 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,6 @@ extern void jfs_proc_clean(void);
62 62
63extern int jfsloglevel; 63extern int jfsloglevel;
64 64
65extern void dump_mem(char *label, void *data, int length);
66extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); 65extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
67 66
68/* information message: e.g., configuration, major event */ 67/* information message: e.g., configuration, major event */
@@ -94,7 +93,6 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
94 * --------- 93 * ---------
95 */ 94 */
96#else /* CONFIG_JFS_DEBUG */ 95#else /* CONFIG_JFS_DEBUG */
97#define dump_mem(label,data,length) do {} while (0)
98#define ASSERT(p) do {} while (0) 96#define ASSERT(p) do {} while (0)
99#define jfs_info(fmt, arg...) do {} while (0) 97#define jfs_info(fmt, arg...) do {} while (0)
100#define jfs_debug(fmt, arg...) do {} while (0) 98#define jfs_debug(fmt, arg...) do {} while (0)
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 40b20111383c..c387540d3425 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -19,23 +19,23 @@
19#define _H_JFS_DINODE 19#define _H_JFS_DINODE
20 20
21/* 21/*
22 * jfs_dinode.h: on-disk inode manager 22 * jfs_dinode.h: on-disk inode manager
23 */ 23 */
24 24
25#define INODESLOTSIZE 128 25#define INODESLOTSIZE 128
26#define L2INODESLOTSIZE 7 26#define L2INODESLOTSIZE 7
27#define log2INODESIZE 9 /* log2(bytes per dinode) */ 27#define log2INODESIZE 9 /* log2(bytes per dinode) */
28 28
29 29
30/* 30/*
31 * on-disk inode : 512 bytes 31 * on-disk inode : 512 bytes
32 * 32 *
33 * note: align 64-bit fields on 8-byte boundary. 33 * note: align 64-bit fields on 8-byte boundary.
34 */ 34 */
35struct dinode { 35struct dinode {
36 /* 36 /*
37 * I. base area (128 bytes) 37 * I. base area (128 bytes)
38 * ------------------------ 38 * ------------------------
39 * 39 *
40 * define generic/POSIX attributes 40 * define generic/POSIX attributes
41 */ 41 */
@@ -70,16 +70,16 @@ struct dinode {
70 __le32 di_acltype; /* 4: Type of ACL */ 70 __le32 di_acltype; /* 4: Type of ACL */
71 71
72 /* 72 /*
73 * Extension Areas. 73 * Extension Areas.
74 * 74 *
75 * Historically, the inode was partitioned into 4 128-byte areas, 75 * Historically, the inode was partitioned into 4 128-byte areas,
76 * the last 3 being defined as unions which could have multiple 76 * the last 3 being defined as unions which could have multiple
77 * uses. The first 96 bytes had been completely unused until 77 * uses. The first 96 bytes had been completely unused until
78 * an index table was added to the directory. It is now more 78 * an index table was added to the directory. It is now more
79 * useful to describe the last 3/4 of the inode as a single 79 * useful to describe the last 3/4 of the inode as a single
80 * union. We would probably be better off redesigning the 80 * union. We would probably be better off redesigning the
81 * entire structure from scratch, but we don't want to break 81 * entire structure from scratch, but we don't want to break
82 * commonality with OS/2's JFS at this time. 82 * commonality with OS/2's JFS at this time.
83 */ 83 */
84 union { 84 union {
85 struct { 85 struct {
@@ -95,7 +95,7 @@ struct dinode {
95 } _dir; /* (384) */ 95 } _dir; /* (384) */
96#define di_dirtable u._dir._table 96#define di_dirtable u._dir._table
97#define di_dtroot u._dir._dtroot 97#define di_dtroot u._dir._dtroot
98#define di_parent di_dtroot.header.idotdot 98#define di_parent di_dtroot.header.idotdot
99#define di_DASD di_dtroot.header.DASD 99#define di_DASD di_dtroot.header.DASD
100 100
101 struct { 101 struct {
@@ -127,14 +127,14 @@ struct dinode {
127#define di_inlinedata u._file._u2._special._u 127#define di_inlinedata u._file._u2._special._u
128#define di_rdev u._file._u2._special._u._rdev 128#define di_rdev u._file._u2._special._u._rdev
129#define di_fastsymlink u._file._u2._special._u._fastsymlink 129#define di_fastsymlink u._file._u2._special._u._fastsymlink
130#define di_inlineea u._file._u2._special._inlineea 130#define di_inlineea u._file._u2._special._inlineea
131 } u; 131 } u;
132}; 132};
133 133
134/* extended mode bits (on-disk inode di_mode) */ 134/* extended mode bits (on-disk inode di_mode) */
135#define IFJOURNAL 0x00010000 /* journalled file */ 135#define IFJOURNAL 0x00010000 /* journalled file */
136#define ISPARSE 0x00020000 /* sparse file enabled */ 136#define ISPARSE 0x00020000 /* sparse file enabled */
137#define INLINEEA 0x00040000 /* inline EA area free */ 137#define INLINEEA 0x00040000 /* inline EA area free */
138#define ISWAPFILE 0x00800000 /* file open for pager swap space */ 138#define ISWAPFILE 0x00800000 /* file open for pager swap space */
139 139
140/* more extended mode bits: attributes for OS/2 */ 140/* more extended mode bits: attributes for OS/2 */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index f3b1ebb22280..e1985066b1c6 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -154,12 +154,12 @@ static const s8 budtab[256] = {
154 * the in-core descriptor is initialized from disk. 154 * the in-core descriptor is initialized from disk.
155 * 155 *
156 * PARAMETERS: 156 * PARAMETERS:
157 * ipbmap - pointer to in-core inode for the block map. 157 * ipbmap - pointer to in-core inode for the block map.
158 * 158 *
159 * RETURN VALUES: 159 * RETURN VALUES:
160 * 0 - success 160 * 0 - success
161 * -ENOMEM - insufficient memory 161 * -ENOMEM - insufficient memory
162 * -EIO - i/o error 162 * -EIO - i/o error
163 */ 163 */
164int dbMount(struct inode *ipbmap) 164int dbMount(struct inode *ipbmap)
165{ 165{
@@ -232,11 +232,11 @@ int dbMount(struct inode *ipbmap)
232 * the memory for this descriptor is freed. 232 * the memory for this descriptor is freed.
233 * 233 *
234 * PARAMETERS: 234 * PARAMETERS:
235 * ipbmap - pointer to in-core inode for the block map. 235 * ipbmap - pointer to in-core inode for the block map.
236 * 236 *
237 * RETURN VALUES: 237 * RETURN VALUES:
238 * 0 - success 238 * 0 - success
239 * -EIO - i/o error 239 * -EIO - i/o error
240 */ 240 */
241int dbUnmount(struct inode *ipbmap, int mounterror) 241int dbUnmount(struct inode *ipbmap, int mounterror)
242{ 242{
@@ -320,13 +320,13 @@ int dbSync(struct inode *ipbmap)
320 * at a time. 320 * at a time.
321 * 321 *
322 * PARAMETERS: 322 * PARAMETERS:
323 * ip - pointer to in-core inode; 323 * ip - pointer to in-core inode;
324 * blkno - starting block number to be freed. 324 * blkno - starting block number to be freed.
325 * nblocks - number of blocks to be freed. 325 * nblocks - number of blocks to be freed.
326 * 326 *
327 * RETURN VALUES: 327 * RETURN VALUES:
328 * 0 - success 328 * 0 - success
329 * -EIO - i/o error 329 * -EIO - i/o error
330 */ 330 */
331int dbFree(struct inode *ip, s64 blkno, s64 nblocks) 331int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
332{ 332{
@@ -395,23 +395,23 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
395/* 395/*
396 * NAME: dbUpdatePMap() 396 * NAME: dbUpdatePMap()
397 * 397 *
398 * FUNCTION: update the allocation state (free or allocate) of the 398 * FUNCTION: update the allocation state (free or allocate) of the
399 * specified block range in the persistent block allocation map. 399 * specified block range in the persistent block allocation map.
400 * 400 *
401 * the blocks will be updated in the persistent map one 401 * the blocks will be updated in the persistent map one
402 * dmap at a time. 402 * dmap at a time.
403 * 403 *
404 * PARAMETERS: 404 * PARAMETERS:
405 * ipbmap - pointer to in-core inode for the block map. 405 * ipbmap - pointer to in-core inode for the block map.
406 * free - 'true' if block range is to be freed from the persistent 406 * free - 'true' if block range is to be freed from the persistent
407 * map; 'false' if it is to be allocated. 407 * map; 'false' if it is to be allocated.
408 * blkno - starting block number of the range. 408 * blkno - starting block number of the range.
409 * nblocks - number of contiguous blocks in the range. 409 * nblocks - number of contiguous blocks in the range.
410 * tblk - transaction block; 410 * tblk - transaction block;
411 * 411 *
412 * RETURN VALUES: 412 * RETURN VALUES:
413 * 0 - success 413 * 0 - success
414 * -EIO - i/o error 414 * -EIO - i/o error
415 */ 415 */
416int 416int
417dbUpdatePMap(struct inode *ipbmap, 417dbUpdatePMap(struct inode *ipbmap,
@@ -573,7 +573,7 @@ dbUpdatePMap(struct inode *ipbmap,
573/* 573/*
574 * NAME: dbNextAG() 574 * NAME: dbNextAG()
575 * 575 *
576 * FUNCTION: find the preferred allocation group for new allocations. 576 * FUNCTION: find the preferred allocation group for new allocations.
577 * 577 *
578 * Within the allocation groups, we maintain a preferred 578 * Within the allocation groups, we maintain a preferred
579 * allocation group which consists of a group with at least 579 * allocation group which consists of a group with at least
@@ -589,10 +589,10 @@ dbUpdatePMap(struct inode *ipbmap,
589 * empty ags around for large allocations. 589 * empty ags around for large allocations.
590 * 590 *
591 * PARAMETERS: 591 * PARAMETERS:
592 * ipbmap - pointer to in-core inode for the block map. 592 * ipbmap - pointer to in-core inode for the block map.
593 * 593 *
594 * RETURN VALUES: 594 * RETURN VALUES:
595 * the preferred allocation group number. 595 * the preferred allocation group number.
596 */ 596 */
597int dbNextAG(struct inode *ipbmap) 597int dbNextAG(struct inode *ipbmap)
598{ 598{
@@ -656,7 +656,7 @@ unlock:
656/* 656/*
657 * NAME: dbAlloc() 657 * NAME: dbAlloc()
658 * 658 *
659 * FUNCTION: attempt to allocate a specified number of contiguous free 659 * FUNCTION: attempt to allocate a specified number of contiguous free
660 * blocks from the working allocation block map. 660 * blocks from the working allocation block map.
661 * 661 *
662 * the block allocation policy uses hints and a multi-step 662 * the block allocation policy uses hints and a multi-step
@@ -680,16 +680,16 @@ unlock:
680 * size or requests that specify no hint value. 680 * size or requests that specify no hint value.
681 * 681 *
682 * PARAMETERS: 682 * PARAMETERS:
683 * ip - pointer to in-core inode; 683 * ip - pointer to in-core inode;
684 * hint - allocation hint. 684 * hint - allocation hint.
685 * nblocks - number of contiguous blocks in the range. 685 * nblocks - number of contiguous blocks in the range.
686 * results - on successful return, set to the starting block number 686 * results - on successful return, set to the starting block number
687 * of the newly allocated contiguous range. 687 * of the newly allocated contiguous range.
688 * 688 *
689 * RETURN VALUES: 689 * RETURN VALUES:
690 * 0 - success 690 * 0 - success
691 * -ENOSPC - insufficient disk resources 691 * -ENOSPC - insufficient disk resources
692 * -EIO - i/o error 692 * -EIO - i/o error
693 */ 693 */
694int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) 694int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
695{ 695{
@@ -706,12 +706,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
706 /* assert that nblocks is valid */ 706 /* assert that nblocks is valid */
707 assert(nblocks > 0); 707 assert(nblocks > 0);
708 708
709#ifdef _STILL_TO_PORT
710 /* DASD limit check F226941 */
711 if (OVER_LIMIT(ip, nblocks))
712 return -ENOSPC;
713#endif /* _STILL_TO_PORT */
714
715 /* get the log2 number of blocks to be allocated. 709 /* get the log2 number of blocks to be allocated.
716 * if the number of blocks is not a log2 multiple, 710 * if the number of blocks is not a log2 multiple,
717 * it will be rounded up to the next log2 multiple. 711 * it will be rounded up to the next log2 multiple.
@@ -720,7 +714,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
720 714
721 bmp = JFS_SBI(ip->i_sb)->bmap; 715 bmp = JFS_SBI(ip->i_sb)->bmap;
722 716
723//retry: /* serialize w.r.t.extendfs() */
724 mapSize = bmp->db_mapsize; 717 mapSize = bmp->db_mapsize;
725 718
726 /* the hint should be within the map */ 719 /* the hint should be within the map */
@@ -879,17 +872,17 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
879/* 872/*
880 * NAME: dbAllocExact() 873 * NAME: dbAllocExact()
881 * 874 *
882 * FUNCTION: try to allocate the requested extent; 875 * FUNCTION: try to allocate the requested extent;
883 * 876 *
884 * PARAMETERS: 877 * PARAMETERS:
885 * ip - pointer to in-core inode; 878 * ip - pointer to in-core inode;
886 * blkno - extent address; 879 * blkno - extent address;
887 * nblocks - extent length; 880 * nblocks - extent length;
888 * 881 *
889 * RETURN VALUES: 882 * RETURN VALUES:
890 * 0 - success 883 * 0 - success
891 * -ENOSPC - insufficient disk resources 884 * -ENOSPC - insufficient disk resources
892 * -EIO - i/o error 885 * -EIO - i/o error
893 */ 886 */
894int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) 887int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
895{ 888{
@@ -946,7 +939,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
946/* 939/*
947 * NAME: dbReAlloc() 940 * NAME: dbReAlloc()
948 * 941 *
949 * FUNCTION: attempt to extend a current allocation by a specified 942 * FUNCTION: attempt to extend a current allocation by a specified
950 * number of blocks. 943 * number of blocks.
951 * 944 *
952 * this routine attempts to satisfy the allocation request 945 * this routine attempts to satisfy the allocation request
@@ -959,21 +952,21 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
959 * number of blocks required. 952 * number of blocks required.
960 * 953 *
961 * PARAMETERS: 954 * PARAMETERS:
962 * ip - pointer to in-core inode requiring allocation. 955 * ip - pointer to in-core inode requiring allocation.
963 * blkno - starting block of the current allocation. 956 * blkno - starting block of the current allocation.
964 * nblocks - number of contiguous blocks within the current 957 * nblocks - number of contiguous blocks within the current
965 * allocation. 958 * allocation.
966 * addnblocks - number of blocks to add to the allocation. 959 * addnblocks - number of blocks to add to the allocation.
967 * results - on successful return, set to the starting block number 960 * results - on successful return, set to the starting block number
968 * of the existing allocation if the existing allocation 961 * of the existing allocation if the existing allocation
969 * was extended in place or to a newly allocated contiguous 962 * was extended in place or to a newly allocated contiguous
970 * range if the existing allocation could not be extended 963 * range if the existing allocation could not be extended
971 * in place. 964 * in place.
972 * 965 *
973 * RETURN VALUES: 966 * RETURN VALUES:
974 * 0 - success 967 * 0 - success
975 * -ENOSPC - insufficient disk resources 968 * -ENOSPC - insufficient disk resources
976 * -EIO - i/o error 969 * -EIO - i/o error
977 */ 970 */
978int 971int
979dbReAlloc(struct inode *ip, 972dbReAlloc(struct inode *ip,
@@ -1004,7 +997,7 @@ dbReAlloc(struct inode *ip,
1004/* 997/*
1005 * NAME: dbExtend() 998 * NAME: dbExtend()
1006 * 999 *
1007 * FUNCTION: attempt to extend a current allocation by a specified 1000 * FUNCTION: attempt to extend a current allocation by a specified
1008 * number of blocks. 1001 * number of blocks.
1009 * 1002 *
1010 * this routine attempts to satisfy the allocation request 1003 * this routine attempts to satisfy the allocation request
@@ -1013,16 +1006,16 @@ dbReAlloc(struct inode *ip,
1013 * immediately following the current allocation. 1006 * immediately following the current allocation.
1014 * 1007 *
1015 * PARAMETERS: 1008 * PARAMETERS:
1016 * ip - pointer to in-core inode requiring allocation. 1009 * ip - pointer to in-core inode requiring allocation.
1017 * blkno - starting block of the current allocation. 1010 * blkno - starting block of the current allocation.
1018 * nblocks - number of contiguous blocks within the current 1011 * nblocks - number of contiguous blocks within the current
1019 * allocation. 1012 * allocation.
1020 * addnblocks - number of blocks to add to the allocation. 1013 * addnblocks - number of blocks to add to the allocation.
1021 * 1014 *
1022 * RETURN VALUES: 1015 * RETURN VALUES:
1023 * 0 - success 1016 * 0 - success
1024 * -ENOSPC - insufficient disk resources 1017 * -ENOSPC - insufficient disk resources
1025 * -EIO - i/o error 1018 * -EIO - i/o error
1026 */ 1019 */
1027static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) 1020static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1028{ 1021{
@@ -1109,19 +1102,19 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1109/* 1102/*
1110 * NAME: dbAllocNext() 1103 * NAME: dbAllocNext()
1111 * 1104 *
1112 * FUNCTION: attempt to allocate the blocks of the specified block 1105 * FUNCTION: attempt to allocate the blocks of the specified block
1113 * range within a dmap. 1106 * range within a dmap.
1114 * 1107 *
1115 * PARAMETERS: 1108 * PARAMETERS:
1116 * bmp - pointer to bmap descriptor 1109 * bmp - pointer to bmap descriptor
1117 * dp - pointer to dmap. 1110 * dp - pointer to dmap.
1118 * blkno - starting block number of the range. 1111 * blkno - starting block number of the range.
1119 * nblocks - number of contiguous free blocks of the range. 1112 * nblocks - number of contiguous free blocks of the range.
1120 * 1113 *
1121 * RETURN VALUES: 1114 * RETURN VALUES:
1122 * 0 - success 1115 * 0 - success
1123 * -ENOSPC - insufficient disk resources 1116 * -ENOSPC - insufficient disk resources
1124 * -EIO - i/o error 1117 * -EIO - i/o error
1125 * 1118 *
1126 * serialization: IREAD_LOCK(ipbmap) held on entry/exit; 1119 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1127 */ 1120 */
@@ -1233,7 +1226,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
1233/* 1226/*
1234 * NAME: dbAllocNear() 1227 * NAME: dbAllocNear()
1235 * 1228 *
1236 * FUNCTION: attempt to allocate a number of contiguous free blocks near 1229 * FUNCTION: attempt to allocate a number of contiguous free blocks near
1237 * a specified block (hint) within a dmap. 1230 * a specified block (hint) within a dmap.
1238 * 1231 *
1239 * starting with the dmap leaf that covers the hint, we'll 1232 * starting with the dmap leaf that covers the hint, we'll
@@ -1242,18 +1235,18 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
1242 * the desired free space. 1235 * the desired free space.
1243 * 1236 *
1244 * PARAMETERS: 1237 * PARAMETERS:
1245 * bmp - pointer to bmap descriptor 1238 * bmp - pointer to bmap descriptor
1246 * dp - pointer to dmap. 1239 * dp - pointer to dmap.
1247 * blkno - block number to allocate near. 1240 * blkno - block number to allocate near.
1248 * nblocks - actual number of contiguous free blocks desired. 1241 * nblocks - actual number of contiguous free blocks desired.
1249 * l2nb - log2 number of contiguous free blocks desired. 1242 * l2nb - log2 number of contiguous free blocks desired.
1250 * results - on successful return, set to the starting block number 1243 * results - on successful return, set to the starting block number
1251 * of the newly allocated range. 1244 * of the newly allocated range.
1252 * 1245 *
1253 * RETURN VALUES: 1246 * RETURN VALUES:
1254 * 0 - success 1247 * 0 - success
1255 * -ENOSPC - insufficient disk resources 1248 * -ENOSPC - insufficient disk resources
1256 * -EIO - i/o error 1249 * -EIO - i/o error
1257 * 1250 *
1258 * serialization: IREAD_LOCK(ipbmap) held on entry/exit; 1251 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1259 */ 1252 */
@@ -1316,7 +1309,7 @@ dbAllocNear(struct bmap * bmp,
1316/* 1309/*
1317 * NAME: dbAllocAG() 1310 * NAME: dbAllocAG()
1318 * 1311 *
1319 * FUNCTION: attempt to allocate the specified number of contiguous 1312 * FUNCTION: attempt to allocate the specified number of contiguous
1320 * free blocks within the specified allocation group. 1313 * free blocks within the specified allocation group.
1321 * 1314 *
1322 * unless the allocation group size is equal to the number 1315 * unless the allocation group size is equal to the number
@@ -1353,17 +1346,17 @@ dbAllocNear(struct bmap * bmp,
1353 * the allocation group. 1346 * the allocation group.
1354 * 1347 *
1355 * PARAMETERS: 1348 * PARAMETERS:
1356 * bmp - pointer to bmap descriptor 1349 * bmp - pointer to bmap descriptor
1357 * agno - allocation group number. 1350 * agno - allocation group number.
1358 * nblocks - actual number of contiguous free blocks desired. 1351 * nblocks - actual number of contiguous free blocks desired.
1359 * l2nb - log2 number of contiguous free blocks desired. 1352 * l2nb - log2 number of contiguous free blocks desired.
1360 * results - on successful return, set to the starting block number 1353 * results - on successful return, set to the starting block number
1361 * of the newly allocated range. 1354 * of the newly allocated range.
1362 * 1355 *
1363 * RETURN VALUES: 1356 * RETURN VALUES:
1364 * 0 - success 1357 * 0 - success
1365 * -ENOSPC - insufficient disk resources 1358 * -ENOSPC - insufficient disk resources
1366 * -EIO - i/o error 1359 * -EIO - i/o error
1367 * 1360 *
1368 * note: IWRITE_LOCK(ipmap) held on entry/exit; 1361 * note: IWRITE_LOCK(ipmap) held on entry/exit;
1369 */ 1362 */
@@ -1546,7 +1539,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1546/* 1539/*
1547 * NAME: dbAllocAny() 1540 * NAME: dbAllocAny()
1548 * 1541 *
1549 * FUNCTION: attempt to allocate the specified number of contiguous 1542 * FUNCTION: attempt to allocate the specified number of contiguous
1550 * free blocks anywhere in the file system. 1543 * free blocks anywhere in the file system.
1551 * 1544 *
1552 * dbAllocAny() attempts to find the sufficient free space by 1545 * dbAllocAny() attempts to find the sufficient free space by
@@ -1556,16 +1549,16 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1556 * desired free space is allocated. 1549 * desired free space is allocated.
1557 * 1550 *
1558 * PARAMETERS: 1551 * PARAMETERS:
1559 * bmp - pointer to bmap descriptor 1552 * bmp - pointer to bmap descriptor
1560 * nblocks - actual number of contiguous free blocks desired. 1553 * nblocks - actual number of contiguous free blocks desired.
1561 * l2nb - log2 number of contiguous free blocks desired. 1554 * l2nb - log2 number of contiguous free blocks desired.
1562 * results - on successful return, set to the starting block number 1555 * results - on successful return, set to the starting block number
1563 * of the newly allocated range. 1556 * of the newly allocated range.
1564 * 1557 *
1565 * RETURN VALUES: 1558 * RETURN VALUES:
1566 * 0 - success 1559 * 0 - success
1567 * -ENOSPC - insufficient disk resources 1560 * -ENOSPC - insufficient disk resources
1568 * -EIO - i/o error 1561 * -EIO - i/o error
1569 * 1562 *
1570 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 1563 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1571 */ 1564 */
@@ -1598,9 +1591,9 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
1598/* 1591/*
1599 * NAME: dbFindCtl() 1592 * NAME: dbFindCtl()
1600 * 1593 *
1601 * FUNCTION: starting at a specified dmap control page level and block 1594 * FUNCTION: starting at a specified dmap control page level and block
1602 * number, search down the dmap control levels for a range of 1595 * number, search down the dmap control levels for a range of
1603 * contiguous free blocks large enough to satisfy an allocation 1596 * contiguous free blocks large enough to satisfy an allocation
1604 * request for the specified number of free blocks. 1597 * request for the specified number of free blocks.
1605 * 1598 *
1606 * if sufficient contiguous free blocks are found, this routine 1599 * if sufficient contiguous free blocks are found, this routine
@@ -1609,17 +1602,17 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
1609 * is sufficient in size. 1602 * is sufficient in size.
1610 * 1603 *
1611 * PARAMETERS: 1604 * PARAMETERS:
1612 * bmp - pointer to bmap descriptor 1605 * bmp - pointer to bmap descriptor
1613 * level - starting dmap control page level. 1606 * level - starting dmap control page level.
1614 * l2nb - log2 number of contiguous free blocks desired. 1607 * l2nb - log2 number of contiguous free blocks desired.
1615 * *blkno - on entry, starting block number for conducting the search. 1608 * *blkno - on entry, starting block number for conducting the search.
1616 * on successful return, the first block within a dmap page 1609 * on successful return, the first block within a dmap page
1617 * that contains or starts a range of contiguous free blocks. 1610 * that contains or starts a range of contiguous free blocks.
1618 * 1611 *
1619 * RETURN VALUES: 1612 * RETURN VALUES:
1620 * 0 - success 1613 * 0 - success
1621 * -ENOSPC - insufficient disk resources 1614 * -ENOSPC - insufficient disk resources
1622 * -EIO - i/o error 1615 * -EIO - i/o error
1623 * 1616 *
1624 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 1617 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1625 */ 1618 */
@@ -1699,7 +1692,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1699/* 1692/*
1700 * NAME: dbAllocCtl() 1693 * NAME: dbAllocCtl()
1701 * 1694 *
1702 * FUNCTION: attempt to allocate a specified number of contiguous 1695 * FUNCTION: attempt to allocate a specified number of contiguous
1703 * blocks starting within a specific dmap. 1696 * blocks starting within a specific dmap.
1704 * 1697 *
1705 * this routine is called by higher level routines that search 1698 * this routine is called by higher level routines that search
@@ -1726,18 +1719,18 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1726 * first dmap (i.e. blkno). 1719 * first dmap (i.e. blkno).
1727 * 1720 *
1728 * PARAMETERS: 1721 * PARAMETERS:
1729 * bmp - pointer to bmap descriptor 1722 * bmp - pointer to bmap descriptor
1730 * nblocks - actual number of contiguous free blocks to allocate. 1723 * nblocks - actual number of contiguous free blocks to allocate.
1731 * l2nb - log2 number of contiguous free blocks to allocate. 1724 * l2nb - log2 number of contiguous free blocks to allocate.
1732 * blkno - starting block number of the dmap to start the allocation 1725 * blkno - starting block number of the dmap to start the allocation
1733 * from. 1726 * from.
1734 * results - on successful return, set to the starting block number 1727 * results - on successful return, set to the starting block number
1735 * of the newly allocated range. 1728 * of the newly allocated range.
1736 * 1729 *
1737 * RETURN VALUES: 1730 * RETURN VALUES:
1738 * 0 - success 1731 * 0 - success
1739 * -ENOSPC - insufficient disk resources 1732 * -ENOSPC - insufficient disk resources
1740 * -EIO - i/o error 1733 * -EIO - i/o error
1741 * 1734 *
1742 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 1735 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1743 */ 1736 */
@@ -1870,7 +1863,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
1870/* 1863/*
1871 * NAME: dbAllocDmapLev() 1864 * NAME: dbAllocDmapLev()
1872 * 1865 *
1873 * FUNCTION: attempt to allocate a specified number of contiguous blocks 1866 * FUNCTION: attempt to allocate a specified number of contiguous blocks
1874 * from a specified dmap. 1867 * from a specified dmap.
1875 * 1868 *
1876 * this routine checks if the contiguous blocks are available. 1869 * this routine checks if the contiguous blocks are available.
@@ -1878,17 +1871,17 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
1878 * returned. 1871 * returned.
1879 * 1872 *
1880 * PARAMETERS: 1873 * PARAMETERS:
1881 * mp - pointer to bmap descriptor 1874 * mp - pointer to bmap descriptor
1882 * dp - pointer to dmap to attempt to allocate blocks from. 1875 * dp - pointer to dmap to attempt to allocate blocks from.
1883 * l2nb - log2 number of contiguous block desired. 1876 * l2nb - log2 number of contiguous block desired.
1884 * nblocks - actual number of contiguous block desired. 1877 * nblocks - actual number of contiguous block desired.
1885 * results - on successful return, set to the starting block number 1878 * results - on successful return, set to the starting block number
1886 * of the newly allocated range. 1879 * of the newly allocated range.
1887 * 1880 *
1888 * RETURN VALUES: 1881 * RETURN VALUES:
1889 * 0 - success 1882 * 0 - success
1890 * -ENOSPC - insufficient disk resources 1883 * -ENOSPC - insufficient disk resources
1891 * -EIO - i/o error 1884 * -EIO - i/o error
1892 * 1885 *
1893 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or 1886 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
1894 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; 1887 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
@@ -1933,7 +1926,7 @@ dbAllocDmapLev(struct bmap * bmp,
1933/* 1926/*
1934 * NAME: dbAllocDmap() 1927 * NAME: dbAllocDmap()
1935 * 1928 *
1936 * FUNCTION: adjust the disk allocation map to reflect the allocation 1929 * FUNCTION: adjust the disk allocation map to reflect the allocation
1937 * of a specified block range within a dmap. 1930 * of a specified block range within a dmap.
1938 * 1931 *
1939 * this routine allocates the specified blocks from the dmap 1932 * this routine allocates the specified blocks from the dmap
@@ -1946,14 +1939,14 @@ dbAllocDmapLev(struct bmap * bmp,
1946 * covers this dmap. 1939 * covers this dmap.
1947 * 1940 *
1948 * PARAMETERS: 1941 * PARAMETERS:
1949 * bmp - pointer to bmap descriptor 1942 * bmp - pointer to bmap descriptor
1950 * dp - pointer to dmap to allocate the block range from. 1943 * dp - pointer to dmap to allocate the block range from.
1951 * blkno - starting block number of the block to be allocated. 1944 * blkno - starting block number of the block to be allocated.
1952 * nblocks - number of blocks to be allocated. 1945 * nblocks - number of blocks to be allocated.
1953 * 1946 *
1954 * RETURN VALUES: 1947 * RETURN VALUES:
1955 * 0 - success 1948 * 0 - success
1956 * -EIO - i/o error 1949 * -EIO - i/o error
1957 * 1950 *
1958 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 1951 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
1959 */ 1952 */
@@ -1989,7 +1982,7 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
1989/* 1982/*
1990 * NAME: dbFreeDmap() 1983 * NAME: dbFreeDmap()
1991 * 1984 *
1992 * FUNCTION: adjust the disk allocation map to reflect the allocation 1985 * FUNCTION: adjust the disk allocation map to reflect the allocation
1993 * of a specified block range within a dmap. 1986 * of a specified block range within a dmap.
1994 * 1987 *
1995 * this routine frees the specified blocks from the dmap through 1988 * this routine frees the specified blocks from the dmap through
@@ -1997,18 +1990,18 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
1997 * causes the maximum string of free blocks within the dmap to 1990 * causes the maximum string of free blocks within the dmap to
1998 * change (i.e. the value of the root of the dmap's dmtree), this 1991 * change (i.e. the value of the root of the dmap's dmtree), this
1999 * routine will cause this change to be reflected up through the 1992 * routine will cause this change to be reflected up through the
2000 * appropriate levels of the dmap control pages by a call to 1993 * appropriate levels of the dmap control pages by a call to
2001 * dbAdjCtl() for the L0 dmap control page that covers this dmap. 1994 * dbAdjCtl() for the L0 dmap control page that covers this dmap.
2002 * 1995 *
2003 * PARAMETERS: 1996 * PARAMETERS:
2004 * bmp - pointer to bmap descriptor 1997 * bmp - pointer to bmap descriptor
2005 * dp - pointer to dmap to free the block range from. 1998 * dp - pointer to dmap to free the block range from.
2006 * blkno - starting block number of the block to be freed. 1999 * blkno - starting block number of the block to be freed.
2007 * nblocks - number of blocks to be freed. 2000 * nblocks - number of blocks to be freed.
2008 * 2001 *
2009 * RETURN VALUES: 2002 * RETURN VALUES:
2010 * 0 - success 2003 * 0 - success
2011 * -EIO - i/o error 2004 * -EIO - i/o error
2012 * 2005 *
2013 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2006 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2014 */ 2007 */
@@ -2055,7 +2048,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2055/* 2048/*
2056 * NAME: dbAllocBits() 2049 * NAME: dbAllocBits()
2057 * 2050 *
2058 * FUNCTION: allocate a specified block range from a dmap. 2051 * FUNCTION: allocate a specified block range from a dmap.
2059 * 2052 *
2060 * this routine updates the dmap to reflect the working 2053 * this routine updates the dmap to reflect the working
2061 * state allocation of the specified block range. it directly 2054 * state allocation of the specified block range. it directly
@@ -2065,10 +2058,10 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2065 * dmap's dmtree, as a whole, to reflect the allocated range. 2058 * dmap's dmtree, as a whole, to reflect the allocated range.
2066 * 2059 *
2067 * PARAMETERS: 2060 * PARAMETERS:
2068 * bmp - pointer to bmap descriptor 2061 * bmp - pointer to bmap descriptor
2069 * dp - pointer to dmap to allocate bits from. 2062 * dp - pointer to dmap to allocate bits from.
2070 * blkno - starting block number of the bits to be allocated. 2063 * blkno - starting block number of the bits to be allocated.
2071 * nblocks - number of bits to be allocated. 2064 * nblocks - number of bits to be allocated.
2072 * 2065 *
2073 * RETURN VALUES: none 2066 * RETURN VALUES: none
2074 * 2067 *
@@ -2149,7 +2142,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2149 * the allocated words. 2142 * the allocated words.
2150 */ 2143 */
2151 for (; nwords > 0; nwords -= nw) { 2144 for (; nwords > 0; nwords -= nw) {
2152 if (leaf[word] < BUDMIN) { 2145 if (leaf[word] < BUDMIN) {
2153 jfs_error(bmp->db_ipbmap->i_sb, 2146 jfs_error(bmp->db_ipbmap->i_sb,
2154 "dbAllocBits: leaf page " 2147 "dbAllocBits: leaf page "
2155 "corrupt"); 2148 "corrupt");
@@ -2202,7 +2195,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2202/* 2195/*
2203 * NAME: dbFreeBits() 2196 * NAME: dbFreeBits()
2204 * 2197 *
2205 * FUNCTION: free a specified block range from a dmap. 2198 * FUNCTION: free a specified block range from a dmap.
2206 * 2199 *
2207 * this routine updates the dmap to reflect the working 2200 * this routine updates the dmap to reflect the working
2208 * state allocation of the specified block range. it directly 2201 * state allocation of the specified block range. it directly
@@ -2212,10 +2205,10 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2212 * dmtree, as a whole, to reflect the deallocated range. 2205 * dmtree, as a whole, to reflect the deallocated range.
2213 * 2206 *
2214 * PARAMETERS: 2207 * PARAMETERS:
2215 * bmp - pointer to bmap descriptor 2208 * bmp - pointer to bmap descriptor
2216 * dp - pointer to dmap to free bits from. 2209 * dp - pointer to dmap to free bits from.
2217 * blkno - starting block number of the bits to be freed. 2210 * blkno - starting block number of the bits to be freed.
2218 * nblocks - number of bits to be freed. 2211 * nblocks - number of bits to be freed.
2219 * 2212 *
2220 * RETURN VALUES: 0 for success 2213 * RETURN VALUES: 0 for success
2221 * 2214 *
@@ -2388,19 +2381,19 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2388 * the new root value and the next dmap control page level to 2381 * the new root value and the next dmap control page level to
2389 * be adjusted. 2382 * be adjusted.
2390 * PARAMETERS: 2383 * PARAMETERS:
2391 * bmp - pointer to bmap descriptor 2384 * bmp - pointer to bmap descriptor
2392 * blkno - the first block of a block range within a dmap. it is 2385 * blkno - the first block of a block range within a dmap. it is
2393 * the allocation or deallocation of this block range that 2386 * the allocation or deallocation of this block range that
2394 * requires the dmap control page to be adjusted. 2387 * requires the dmap control page to be adjusted.
2395 * newval - the new value of the lower level dmap or dmap control 2388 * newval - the new value of the lower level dmap or dmap control
2396 * page root. 2389 * page root.
2397 * alloc - 'true' if adjustment is due to an allocation. 2390 * alloc - 'true' if adjustment is due to an allocation.
2398 * level - current level of dmap control page (i.e. L0, L1, L2) to 2391 * level - current level of dmap control page (i.e. L0, L1, L2) to
2399 * be adjusted. 2392 * be adjusted.
2400 * 2393 *
2401 * RETURN VALUES: 2394 * RETURN VALUES:
2402 * 0 - success 2395 * 0 - success
2403 * -EIO - i/o error 2396 * -EIO - i/o error
2404 * 2397 *
2405 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2398 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2406 */ 2399 */
@@ -2544,16 +2537,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
2544/* 2537/*
2545 * NAME: dbSplit() 2538 * NAME: dbSplit()
2546 * 2539 *
2547 * FUNCTION: update the leaf of a dmtree with a new value, splitting 2540 * FUNCTION: update the leaf of a dmtree with a new value, splitting
2548 * the leaf from the binary buddy system of the dmtree's 2541 * the leaf from the binary buddy system of the dmtree's
2549 * leaves, as required. 2542 * leaves, as required.
2550 * 2543 *
2551 * PARAMETERS: 2544 * PARAMETERS:
2552 * tp - pointer to the tree containing the leaf. 2545 * tp - pointer to the tree containing the leaf.
2553 * leafno - the number of the leaf to be updated. 2546 * leafno - the number of the leaf to be updated.
2554 * splitsz - the size the binary buddy system starting at the leaf 2547 * splitsz - the size the binary buddy system starting at the leaf
2555 * must be split to, specified as the log2 number of blocks. 2548 * must be split to, specified as the log2 number of blocks.
2556 * newval - the new value for the leaf. 2549 * newval - the new value for the leaf.
2557 * 2550 *
2558 * RETURN VALUES: none 2551 * RETURN VALUES: none
2559 * 2552 *
@@ -2600,7 +2593,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
2600/* 2593/*
2601 * NAME: dbBackSplit() 2594 * NAME: dbBackSplit()
2602 * 2595 *
2603 * FUNCTION: back split the binary buddy system of dmtree leaves 2596 * FUNCTION: back split the binary buddy system of dmtree leaves
2604 * that hold a specified leaf until the specified leaf 2597 * that hold a specified leaf until the specified leaf
2605 * starts its own binary buddy system. 2598 * starts its own binary buddy system.
2606 * 2599 *
@@ -2617,8 +2610,8 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
2617 * in which a previous join operation must be backed out. 2610 * in which a previous join operation must be backed out.
2618 * 2611 *
2619 * PARAMETERS: 2612 * PARAMETERS:
2620 * tp - pointer to the tree containing the leaf. 2613 * tp - pointer to the tree containing the leaf.
2621 * leafno - the number of the leaf to be updated. 2614 * leafno - the number of the leaf to be updated.
2622 * 2615 *
2623 * RETURN VALUES: none 2616 * RETURN VALUES: none
2624 * 2617 *
@@ -2692,14 +2685,14 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
2692/* 2685/*
2693 * NAME: dbJoin() 2686 * NAME: dbJoin()
2694 * 2687 *
2695 * FUNCTION: update the leaf of a dmtree with a new value, joining 2688 * FUNCTION: update the leaf of a dmtree with a new value, joining
2696 * the leaf with other leaves of the dmtree into a multi-leaf 2689 * the leaf with other leaves of the dmtree into a multi-leaf
2697 * binary buddy system, as required. 2690 * binary buddy system, as required.
2698 * 2691 *
2699 * PARAMETERS: 2692 * PARAMETERS:
2700 * tp - pointer to the tree containing the leaf. 2693 * tp - pointer to the tree containing the leaf.
2701 * leafno - the number of the leaf to be updated. 2694 * leafno - the number of the leaf to be updated.
2702 * newval - the new value for the leaf. 2695 * newval - the new value for the leaf.
2703 * 2696 *
2704 * RETURN VALUES: none 2697 * RETURN VALUES: none
2705 */ 2698 */
@@ -2785,15 +2778,15 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
2785/* 2778/*
2786 * NAME: dbAdjTree() 2779 * NAME: dbAdjTree()
2787 * 2780 *
2788 * FUNCTION: update a leaf of a dmtree with a new value, adjusting 2781 * FUNCTION: update a leaf of a dmtree with a new value, adjusting
2789 * the dmtree, as required, to reflect the new leaf value. 2782 * the dmtree, as required, to reflect the new leaf value.
2790 * the combination of any buddies must already be done before 2783 * the combination of any buddies must already be done before
2791 * this is called. 2784 * this is called.
2792 * 2785 *
2793 * PARAMETERS: 2786 * PARAMETERS:
2794 * tp - pointer to the tree to be adjusted. 2787 * tp - pointer to the tree to be adjusted.
2795 * leafno - the number of the leaf to be updated. 2788 * leafno - the number of the leaf to be updated.
2796 * newval - the new value for the leaf. 2789 * newval - the new value for the leaf.
2797 * 2790 *
2798 * RETURN VALUES: none 2791 * RETURN VALUES: none
2799 */ 2792 */
@@ -2852,7 +2845,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
2852/* 2845/*
2853 * NAME: dbFindLeaf() 2846 * NAME: dbFindLeaf()
2854 * 2847 *
2855 * FUNCTION: search a dmtree_t for sufficient free blocks, returning 2848 * FUNCTION: search a dmtree_t for sufficient free blocks, returning
2856 * the index of a leaf describing the free blocks if 2849 * the index of a leaf describing the free blocks if
2857 * sufficient free blocks are found. 2850 * sufficient free blocks are found.
2858 * 2851 *
@@ -2861,15 +2854,15 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
2861 * free space. 2854 * free space.
2862 * 2855 *
2863 * PARAMETERS: 2856 * PARAMETERS:
2864 * tp - pointer to the tree to be searched. 2857 * tp - pointer to the tree to be searched.
2865 * l2nb - log2 number of free blocks to search for. 2858 * l2nb - log2 number of free blocks to search for.
2866 * leafidx - return pointer to be set to the index of the leaf 2859 * leafidx - return pointer to be set to the index of the leaf
2867 * describing at least l2nb free blocks if sufficient 2860 * describing at least l2nb free blocks if sufficient
2868 * free blocks are found. 2861 * free blocks are found.
2869 * 2862 *
2870 * RETURN VALUES: 2863 * RETURN VALUES:
2871 * 0 - success 2864 * 0 - success
2872 * -ENOSPC - insufficient free blocks. 2865 * -ENOSPC - insufficient free blocks.
2873 */ 2866 */
2874static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) 2867static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
2875{ 2868{
@@ -2916,18 +2909,18 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
2916/* 2909/*
2917 * NAME: dbFindBits() 2910 * NAME: dbFindBits()
2918 * 2911 *
2919 * FUNCTION: find a specified number of binary buddy free bits within a 2912 * FUNCTION: find a specified number of binary buddy free bits within a
2920 * dmap bitmap word value. 2913 * dmap bitmap word value.
2921 * 2914 *
2922 * this routine searches the bitmap value for (1 << l2nb) free 2915 * this routine searches the bitmap value for (1 << l2nb) free
2923 * bits at (1 << l2nb) alignments within the value. 2916 * bits at (1 << l2nb) alignments within the value.
2924 * 2917 *
2925 * PARAMETERS: 2918 * PARAMETERS:
2926 * word - dmap bitmap word value. 2919 * word - dmap bitmap word value.
2927 * l2nb - number of free bits specified as a log2 number. 2920 * l2nb - number of free bits specified as a log2 number.
2928 * 2921 *
2929 * RETURN VALUES: 2922 * RETURN VALUES:
2930 * starting bit number of free bits. 2923 * starting bit number of free bits.
2931 */ 2924 */
2932static int dbFindBits(u32 word, int l2nb) 2925static int dbFindBits(u32 word, int l2nb)
2933{ 2926{
@@ -2963,14 +2956,14 @@ static int dbFindBits(u32 word, int l2nb)
2963/* 2956/*
2964 * NAME: dbMaxBud(u8 *cp) 2957 * NAME: dbMaxBud(u8 *cp)
2965 * 2958 *
2966 * FUNCTION: determine the largest binary buddy string of free 2959 * FUNCTION: determine the largest binary buddy string of free
2967 * bits within 32-bits of the map. 2960 * bits within 32-bits of the map.
2968 * 2961 *
2969 * PARAMETERS: 2962 * PARAMETERS:
2970 * cp - pointer to the 32-bit value. 2963 * cp - pointer to the 32-bit value.
2971 * 2964 *
2972 * RETURN VALUES: 2965 * RETURN VALUES:
2973 * largest binary buddy of free bits within a dmap word. 2966 * largest binary buddy of free bits within a dmap word.
2974 */ 2967 */
2975static int dbMaxBud(u8 * cp) 2968static int dbMaxBud(u8 * cp)
2976{ 2969{
@@ -3000,14 +2993,14 @@ static int dbMaxBud(u8 * cp)
3000/* 2993/*
3001 * NAME: cnttz(uint word) 2994 * NAME: cnttz(uint word)
3002 * 2995 *
3003 * FUNCTION: determine the number of trailing zeros within a 32-bit 2996 * FUNCTION: determine the number of trailing zeros within a 32-bit
3004 * value. 2997 * value.
3005 * 2998 *
3006 * PARAMETERS: 2999 * PARAMETERS:
3007 * value - 32-bit value to be examined. 3000 * value - 32-bit value to be examined.
3008 * 3001 *
3009 * RETURN VALUES: 3002 * RETURN VALUES:
3010 * count of trailing zeros 3003 * count of trailing zeros
3011 */ 3004 */
3012static int cnttz(u32 word) 3005static int cnttz(u32 word)
3013{ 3006{
@@ -3025,14 +3018,14 @@ static int cnttz(u32 word)
3025/* 3018/*
3026 * NAME: cntlz(u32 value) 3019 * NAME: cntlz(u32 value)
3027 * 3020 *
3028 * FUNCTION: determine the number of leading zeros within a 32-bit 3021 * FUNCTION: determine the number of leading zeros within a 32-bit
3029 * value. 3022 * value.
3030 * 3023 *
3031 * PARAMETERS: 3024 * PARAMETERS:
3032 * value - 32-bit value to be examined. 3025 * value - 32-bit value to be examined.
3033 * 3026 *
3034 * RETURN VALUES: 3027 * RETURN VALUES:
3035 * count of leading zeros 3028 * count of leading zeros
3036 */ 3029 */
3037static int cntlz(u32 value) 3030static int cntlz(u32 value)
3038{ 3031{
@@ -3050,14 +3043,14 @@ static int cntlz(u32 value)
3050 * NAME: blkstol2(s64 nb) 3043 * NAME: blkstol2(s64 nb)
3051 * 3044 *
3052 * FUNCTION: convert a block count to its log2 value. if the block 3045 * FUNCTION: convert a block count to its log2 value. if the block
3053 * count is not a l2 multiple, it is rounded up to the next 3046 * count is not a l2 multiple, it is rounded up to the next
3054 * larger l2 multiple. 3047 * larger l2 multiple.
3055 * 3048 *
3056 * PARAMETERS: 3049 * PARAMETERS:
3057 * nb - number of blocks 3050 * nb - number of blocks
3058 * 3051 *
3059 * RETURN VALUES: 3052 * RETURN VALUES:
3060 * log2 number of blocks 3053 * log2 number of blocks
3061 */ 3054 */
3062static int blkstol2(s64 nb) 3055static int blkstol2(s64 nb)
3063{ 3056{
@@ -3099,13 +3092,13 @@ static int blkstol2(s64 nb)
3099 * at a time. 3092 * at a time.
3100 * 3093 *
3101 * PARAMETERS: 3094 * PARAMETERS:
3102 * ip - pointer to in-core inode; 3095 * ip - pointer to in-core inode;
3103 * blkno - starting block number to be freed. 3096 * blkno - starting block number to be freed.
3104 * nblocks - number of blocks to be freed. 3097 * nblocks - number of blocks to be freed.
3105 * 3098 *
3106 * RETURN VALUES: 3099 * RETURN VALUES:
3107 * 0 - success 3100 * 0 - success
3108 * -EIO - i/o error 3101 * -EIO - i/o error
3109 */ 3102 */
3110int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) 3103int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
3111{ 3104{
@@ -3278,10 +3271,10 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3278 * L2 3271 * L2
3279 * | 3272 * |
3280 * L1---------------------------------L1 3273 * L1---------------------------------L1
3281 * | | 3274 * | |
3282 * L0---------L0---------L0 L0---------L0---------L0 3275 * L0---------L0---------L0 L0---------L0---------L0
3283 * | | | | | | 3276 * | | | | | |
3284 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; 3277 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm;
3285 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm 3278 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
3286 * 3279 *
3287 * <---old---><----------------------------extend-----------------------> 3280 * <---old---><----------------------------extend----------------------->
@@ -3307,7 +3300,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3307 (long long) blkno, (long long) nblocks, (long long) newsize); 3300 (long long) blkno, (long long) nblocks, (long long) newsize);
3308 3301
3309 /* 3302 /*
3310 * initialize bmap control page. 3303 * initialize bmap control page.
3311 * 3304 *
3312 * all the data in bmap control page should exclude 3305 * all the data in bmap control page should exclude
3313 * the mkfs hidden dmap page. 3306 * the mkfs hidden dmap page.
@@ -3330,7 +3323,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3330 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; 3323 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
3331 3324
3332 /* 3325 /*
3333 * reconfigure db_agfree[] 3326 * reconfigure db_agfree[]
3334 * from old AG configuration to new AG configuration; 3327 * from old AG configuration to new AG configuration;
3335 * 3328 *
3336 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 3329 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
@@ -3362,7 +3355,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3362 bmp->db_maxag = bmp->db_maxag / k; 3355 bmp->db_maxag = bmp->db_maxag / k;
3363 3356
3364 /* 3357 /*
3365 * extend bmap 3358 * extend bmap
3366 * 3359 *
3367 * update bit maps and corresponding level control pages; 3360 * update bit maps and corresponding level control pages;
3368 * global control page db_nfree, db_agfree[agno], db_maxfreebud; 3361 * global control page db_nfree, db_agfree[agno], db_maxfreebud;
@@ -3410,7 +3403,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3410 /* compute start L0 */ 3403 /* compute start L0 */
3411 j = 0; 3404 j = 0;
3412 l1leaf = l1dcp->stree + CTLLEAFIND; 3405 l1leaf = l1dcp->stree + CTLLEAFIND;
3413 p += nbperpage; /* 1st L0 of L1.k */ 3406 p += nbperpage; /* 1st L0 of L1.k */
3414 } 3407 }
3415 3408
3416 /* 3409 /*
@@ -3548,7 +3541,7 @@ errout:
3548 return -EIO; 3541 return -EIO;
3549 3542
3550 /* 3543 /*
3551 * finalize bmap control page 3544 * finalize bmap control page
3552 */ 3545 */
3553finalize: 3546finalize:
3554 3547
@@ -3567,7 +3560,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
3567 int i, n; 3560 int i, n;
3568 3561
3569 /* 3562 /*
3570 * finalize bmap control page 3563 * finalize bmap control page
3571 */ 3564 */
3572//finalize: 3565//finalize:
3573 /* 3566 /*
@@ -3953,8 +3946,8 @@ static int dbGetL2AGSize(s64 nblocks)
3953 * convert number of map pages to the zero origin top dmapctl level 3946 * convert number of map pages to the zero origin top dmapctl level
3954 */ 3947 */
3955#define BMAPPGTOLEV(npages) \ 3948#define BMAPPGTOLEV(npages) \
3956 (((npages) <= 3 + MAXL0PAGES) ? 0 \ 3949 (((npages) <= 3 + MAXL0PAGES) ? 0 : \
3957 : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) 3950 ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
3958 3951
3959s64 dbMapFileSizeToMapSize(struct inode * ipbmap) 3952s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
3960{ 3953{
@@ -3981,8 +3974,8 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
3981 factor = 3974 factor =
3982 (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); 3975 (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
3983 complete = (u32) npages / factor; 3976 complete = (u32) npages / factor;
3984 ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL 3977 ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL :
3985 : ((i == 1) ? LPERCTL : 1)); 3978 ((i == 1) ? LPERCTL : 1));
3986 3979
3987 /* pages in last/incomplete child */ 3980 /* pages in last/incomplete child */
3988 npages = (u32) npages % factor; 3981 npages = (u32) npages % factor;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 45ea454c74bd..11e6d471b364 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -83,7 +83,7 @@ static __inline signed char TREEMAX(signed char *cp)
83 * - 1 is added to account for the control page of the map. 83 * - 1 is added to account for the control page of the map.
84 */ 84 */
85#define BLKTODMAP(b,s) \ 85#define BLKTODMAP(b,s) \
86 ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s)) 86 ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
87 87
88/* 88/*
89 * convert disk block number to the logical block number of the LEVEL 0 89 * convert disk block number to the logical block number of the LEVEL 0
@@ -98,7 +98,7 @@ static __inline signed char TREEMAX(signed char *cp)
98 * - 1 is added to account for the control page of the map. 98 * - 1 is added to account for the control page of the map.
99 */ 99 */
100#define BLKTOL0(b,s) \ 100#define BLKTOL0(b,s) \
101 (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s)) 101 (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
102 102
103/* 103/*
104 * convert disk block number to the logical block number of the LEVEL 1 104 * convert disk block number to the logical block number of the LEVEL 1
@@ -120,7 +120,7 @@ static __inline signed char TREEMAX(signed char *cp)
120 * at the specified level which describes the disk block. 120 * at the specified level which describes the disk block.
121 */ 121 */
122#define BLKTOCTL(b,s,l) \ 122#define BLKTOCTL(b,s,l) \
123 (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) 123 (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
124 124
125/* 125/*
126 * convert aggregate map size to the zero origin dmapctl level of the 126 * convert aggregate map size to the zero origin dmapctl level of the
@@ -145,27 +145,27 @@ static __inline signed char TREEMAX(signed char *cp)
145 * dmaptree must be consistent with dmapctl. 145 * dmaptree must be consistent with dmapctl.
146 */ 146 */
147struct dmaptree { 147struct dmaptree {
148 __le32 nleafs; /* 4: number of tree leafs */ 148 __le32 nleafs; /* 4: number of tree leafs */
149 __le32 l2nleafs; /* 4: l2 number of tree leafs */ 149 __le32 l2nleafs; /* 4: l2 number of tree leafs */
150 __le32 leafidx; /* 4: index of first tree leaf */ 150 __le32 leafidx; /* 4: index of first tree leaf */
151 __le32 height; /* 4: height of the tree */ 151 __le32 height; /* 4: height of the tree */
152 s8 budmin; /* 1: min l2 tree leaf value to combine */ 152 s8 budmin; /* 1: min l2 tree leaf value to combine */
153 s8 stree[TREESIZE]; /* TREESIZE: tree */ 153 s8 stree[TREESIZE]; /* TREESIZE: tree */
154 u8 pad[2]; /* 2: pad to word boundary */ 154 u8 pad[2]; /* 2: pad to word boundary */
155}; /* - 360 - */ 155}; /* - 360 - */
156 156
157/* 157/*
158 * dmap page per 8K blocks bitmap 158 * dmap page per 8K blocks bitmap
159 */ 159 */
160struct dmap { 160struct dmap {
161 __le32 nblocks; /* 4: num blks covered by this dmap */ 161 __le32 nblocks; /* 4: num blks covered by this dmap */
162 __le32 nfree; /* 4: num of free blks in this dmap */ 162 __le32 nfree; /* 4: num of free blks in this dmap */
163 __le64 start; /* 8: starting blkno for this dmap */ 163 __le64 start; /* 8: starting blkno for this dmap */
164 struct dmaptree tree; /* 360: dmap tree */ 164 struct dmaptree tree; /* 360: dmap tree */
165 u8 pad[1672]; /* 1672: pad to 2048 bytes */ 165 u8 pad[1672]; /* 1672: pad to 2048 bytes */
166 __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */ 166 __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */
167 __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */ 167 __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */
168}; /* - 4096 - */ 168}; /* - 4096 - */
169 169
170/* 170/*
171 * disk map control page per level. 171 * disk map control page per level.
@@ -173,14 +173,14 @@ struct dmap {
173 * dmapctl must be consistent with dmaptree. 173 * dmapctl must be consistent with dmaptree.
174 */ 174 */
175struct dmapctl { 175struct dmapctl {
176 __le32 nleafs; /* 4: number of tree leafs */ 176 __le32 nleafs; /* 4: number of tree leafs */
177 __le32 l2nleafs; /* 4: l2 number of tree leafs */ 177 __le32 l2nleafs; /* 4: l2 number of tree leafs */
178 __le32 leafidx; /* 4: index of the first tree leaf */ 178 __le32 leafidx; /* 4: index of the first tree leaf */
179 __le32 height; /* 4: height of tree */ 179 __le32 height; /* 4: height of tree */
180 s8 budmin; /* 1: minimum l2 tree leaf value */ 180 s8 budmin; /* 1: minimum l2 tree leaf value */
181 s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */ 181 s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */
182 u8 pad[2714]; /* 2714: pad to 4096 */ 182 u8 pad[2714]; /* 2714: pad to 4096 */
183}; /* - 4096 - */ 183}; /* - 4096 - */
184 184
185/* 185/*
186 * common definition for dmaptree within dmap and dmapctl 186 * common definition for dmaptree within dmap and dmapctl
@@ -202,41 +202,41 @@ typedef union dmtree {
202 * on-disk aggregate disk allocation map descriptor. 202 * on-disk aggregate disk allocation map descriptor.
203 */ 203 */
204struct dbmap_disk { 204struct dbmap_disk {
205 __le64 dn_mapsize; /* 8: number of blocks in aggregate */ 205 __le64 dn_mapsize; /* 8: number of blocks in aggregate */
206 __le64 dn_nfree; /* 8: num free blks in aggregate map */ 206 __le64 dn_nfree; /* 8: num free blks in aggregate map */
207 __le32 dn_l2nbperpage; /* 4: number of blks per page */ 207 __le32 dn_l2nbperpage; /* 4: number of blks per page */
208 __le32 dn_numag; /* 4: total number of ags */ 208 __le32 dn_numag; /* 4: total number of ags */
209 __le32 dn_maxlevel; /* 4: number of active ags */ 209 __le32 dn_maxlevel; /* 4: number of active ags */
210 __le32 dn_maxag; /* 4: max active alloc group number */ 210 __le32 dn_maxag; /* 4: max active alloc group number */
211 __le32 dn_agpref; /* 4: preferred alloc group (hint) */ 211 __le32 dn_agpref; /* 4: preferred alloc group (hint) */
212 __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ 212 __le32 dn_aglevel; /* 4: dmapctl level holding the AG */
213 __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ 213 __le32 dn_agheigth; /* 4: height in dmapctl of the AG */
214 __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ 214 __le32 dn_agwidth; /* 4: width in dmapctl of the AG */
215 __le32 dn_agstart; /* 4: start tree index at AG height */ 215 __le32 dn_agstart; /* 4: start tree index at AG height */
216 __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ 216 __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */
217 __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */ 217 __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */
218 __le64 dn_agsize; /* 8: num of blks per alloc group */ 218 __le64 dn_agsize; /* 8: num of blks per alloc group */
219 s8 dn_maxfreebud; /* 1: max free buddy system */ 219 s8 dn_maxfreebud; /* 1: max free buddy system */
220 u8 pad[3007]; /* 3007: pad to 4096 */ 220 u8 pad[3007]; /* 3007: pad to 4096 */
221}; /* - 4096 - */ 221}; /* - 4096 - */
222 222
223struct dbmap { 223struct dbmap {
224 s64 dn_mapsize; /* number of blocks in aggregate */ 224 s64 dn_mapsize; /* number of blocks in aggregate */
225 s64 dn_nfree; /* num free blks in aggregate map */ 225 s64 dn_nfree; /* num free blks in aggregate map */
226 int dn_l2nbperpage; /* number of blks per page */ 226 int dn_l2nbperpage; /* number of blks per page */
227 int dn_numag; /* total number of ags */ 227 int dn_numag; /* total number of ags */
228 int dn_maxlevel; /* number of active ags */ 228 int dn_maxlevel; /* number of active ags */
229 int dn_maxag; /* max active alloc group number */ 229 int dn_maxag; /* max active alloc group number */
230 int dn_agpref; /* preferred alloc group (hint) */ 230 int dn_agpref; /* preferred alloc group (hint) */
231 int dn_aglevel; /* dmapctl level holding the AG */ 231 int dn_aglevel; /* dmapctl level holding the AG */
232 int dn_agheigth; /* height in dmapctl of the AG */ 232 int dn_agheigth; /* height in dmapctl of the AG */
233 int dn_agwidth; /* width in dmapctl of the AG */ 233 int dn_agwidth; /* width in dmapctl of the AG */
234 int dn_agstart; /* start tree index at AG height */ 234 int dn_agstart; /* start tree index at AG height */
235 int dn_agl2size; /* l2 num of blks per alloc group */ 235 int dn_agl2size; /* l2 num of blks per alloc group */
236 s64 dn_agfree[MAXAG]; /* per AG free count */ 236 s64 dn_agfree[MAXAG]; /* per AG free count */
237 s64 dn_agsize; /* num of blks per alloc group */ 237 s64 dn_agsize; /* num of blks per alloc group */
238 signed char dn_maxfreebud; /* max free buddy system */ 238 signed char dn_maxfreebud; /* max free buddy system */
239}; /* - 4096 - */ 239}; /* - 4096 - */
240/* 240/*
241 * in-memory aggregate disk allocation map descriptor. 241 * in-memory aggregate disk allocation map descriptor.
242 */ 242 */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 6d62f3222892..c14ba3cfa818 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -315,8 +315,8 @@ static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
315 lv = &llck->lv[llck->index]; 315 lv = &llck->lv[llck->index];
316 316
317 /* 317 /*
318 * Linelock slot size is twice the size of directory table 318 * Linelock slot size is twice the size of directory table
319 * slot size. 512 entries per page. 319 * slot size. 512 entries per page.
320 */ 320 */
321 lv->offset = ((index - 2) & 511) >> 1; 321 lv->offset = ((index - 2) & 511) >> 1;
322 lv->length = 1; 322 lv->length = 1;
@@ -615,7 +615,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
615 btstack->nsplit = 1; 615 btstack->nsplit = 1;
616 616
617 /* 617 /*
618 * search down tree from root: 618 * search down tree from root:
619 * 619 *
620 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 620 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
621 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 621 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -659,7 +659,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
659 } 659 }
660 if (cmp == 0) { 660 if (cmp == 0) {
661 /* 661 /*
662 * search hit 662 * search hit
663 */ 663 */
664 /* search hit - leaf page: 664 /* search hit - leaf page:
665 * return the entry found 665 * return the entry found
@@ -723,7 +723,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
723 } 723 }
724 724
725 /* 725 /*
726 * search miss 726 * search miss
727 * 727 *
728 * base is the smallest index with key (Kj) greater than 728 * base is the smallest index with key (Kj) greater than
729 * search key (K) and may be zero or (maxindex + 1) index. 729 * search key (K) and may be zero or (maxindex + 1) index.
@@ -834,7 +834,7 @@ int dtInsert(tid_t tid, struct inode *ip,
834 struct lv *lv; 834 struct lv *lv;
835 835
836 /* 836 /*
837 * retrieve search result 837 * retrieve search result
838 * 838 *
839 * dtSearch() returns (leaf page pinned, index at which to insert). 839 * dtSearch() returns (leaf page pinned, index at which to insert).
840 * n.b. dtSearch() may return index of (maxindex + 1) of 840 * n.b. dtSearch() may return index of (maxindex + 1) of
@@ -843,7 +843,7 @@ int dtInsert(tid_t tid, struct inode *ip,
843 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 843 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
844 844
845 /* 845 /*
846 * insert entry for new key 846 * insert entry for new key
847 */ 847 */
848 if (DO_INDEX(ip)) { 848 if (DO_INDEX(ip)) {
849 if (JFS_IP(ip)->next_index == DIREND) { 849 if (JFS_IP(ip)->next_index == DIREND) {
@@ -860,9 +860,9 @@ int dtInsert(tid_t tid, struct inode *ip,
860 data.leaf.ino = *fsn; 860 data.leaf.ino = *fsn;
861 861
862 /* 862 /*
863 * leaf page does not have enough room for new entry: 863 * leaf page does not have enough room for new entry:
864 * 864 *
865 * extend/split the leaf page; 865 * extend/split the leaf page;
866 * 866 *
867 * dtSplitUp() will insert the entry and unpin the leaf page. 867 * dtSplitUp() will insert the entry and unpin the leaf page.
868 */ 868 */
@@ -877,9 +877,9 @@ int dtInsert(tid_t tid, struct inode *ip,
877 } 877 }
878 878
879 /* 879 /*
880 * leaf page does have enough room for new entry: 880 * leaf page does have enough room for new entry:
881 * 881 *
882 * insert the new data entry into the leaf page; 882 * insert the new data entry into the leaf page;
883 */ 883 */
884 BT_MARK_DIRTY(mp, ip); 884 BT_MARK_DIRTY(mp, ip);
885 /* 885 /*
@@ -967,13 +967,13 @@ static int dtSplitUp(tid_t tid,
967 } 967 }
968 968
969 /* 969 /*
970 * split leaf page 970 * split leaf page
971 * 971 *
972 * The split routines insert the new entry, and 972 * The split routines insert the new entry, and
973 * acquire txLock as appropriate. 973 * acquire txLock as appropriate.
974 */ 974 */
975 /* 975 /*
976 * split root leaf page: 976 * split root leaf page:
977 */ 977 */
978 if (sp->header.flag & BT_ROOT) { 978 if (sp->header.flag & BT_ROOT) {
979 /* 979 /*
@@ -1012,7 +1012,7 @@ static int dtSplitUp(tid_t tid,
1012 } 1012 }
1013 1013
1014 /* 1014 /*
1015 * extend first leaf page 1015 * extend first leaf page
1016 * 1016 *
1017 * extend the 1st extent if less than buffer page size 1017 * extend the 1st extent if less than buffer page size
1018 * (dtExtendPage() reurns leaf page unpinned) 1018 * (dtExtendPage() reurns leaf page unpinned)
@@ -1068,7 +1068,7 @@ static int dtSplitUp(tid_t tid,
1068 } 1068 }
1069 1069
1070 /* 1070 /*
1071 * split leaf page <sp> into <sp> and a new right page <rp>. 1071 * split leaf page <sp> into <sp> and a new right page <rp>.
1072 * 1072 *
1073 * return <rp> pinned and its extent descriptor <rpxd> 1073 * return <rp> pinned and its extent descriptor <rpxd>
1074 */ 1074 */
@@ -1433,7 +1433,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1433 rp->header.freecnt = rp->header.maxslot - fsi; 1433 rp->header.freecnt = rp->header.maxslot - fsi;
1434 1434
1435 /* 1435 /*
1436 * sequential append at tail: append without split 1436 * sequential append at tail: append without split
1437 * 1437 *
1438 * If splitting the last page on a level because of appending 1438 * If splitting the last page on a level because of appending
1439 * a entry to it (skip is maxentry), it's likely that the access is 1439 * a entry to it (skip is maxentry), it's likely that the access is
@@ -1467,7 +1467,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1467 } 1467 }
1468 1468
1469 /* 1469 /*
1470 * non-sequential insert (at possibly middle page) 1470 * non-sequential insert (at possibly middle page)
1471 */ 1471 */
1472 1472
1473 /* 1473 /*
@@ -1508,7 +1508,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1508 left = 0; 1508 left = 0;
1509 1509
1510 /* 1510 /*
1511 * compute fill factor for split pages 1511 * compute fill factor for split pages
1512 * 1512 *
1513 * <nxt> traces the next entry to move to rp 1513 * <nxt> traces the next entry to move to rp
1514 * <off> traces the next entry to stay in sp 1514 * <off> traces the next entry to stay in sp
@@ -1551,7 +1551,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1551 /* <nxt> poins to the 1st entry to move */ 1551 /* <nxt> poins to the 1st entry to move */
1552 1552
1553 /* 1553 /*
1554 * move entries to right page 1554 * move entries to right page
1555 * 1555 *
1556 * dtMoveEntry() initializes rp and reserves entry for insertion 1556 * dtMoveEntry() initializes rp and reserves entry for insertion
1557 * 1557 *
@@ -1677,7 +1677,7 @@ static int dtExtendPage(tid_t tid,
1677 return (rc); 1677 return (rc);
1678 1678
1679 /* 1679 /*
1680 * extend the extent 1680 * extend the extent
1681 */ 1681 */
1682 pxdlist = split->pxdlist; 1682 pxdlist = split->pxdlist;
1683 pxd = &pxdlist->pxd[pxdlist->npxd]; 1683 pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1722,7 +1722,7 @@ static int dtExtendPage(tid_t tid,
1722 } 1722 }
1723 1723
1724 /* 1724 /*
1725 * extend the page 1725 * extend the page
1726 */ 1726 */
1727 sp->header.self = *pxd; 1727 sp->header.self = *pxd;
1728 1728
@@ -1739,9 +1739,6 @@ static int dtExtendPage(tid_t tid,
1739 /* update buffer extent descriptor of extended page */ 1739 /* update buffer extent descriptor of extended page */
1740 xlen = lengthPXD(pxd); 1740 xlen = lengthPXD(pxd);
1741 xsize = xlen << JFS_SBI(sb)->l2bsize; 1741 xsize = xlen << JFS_SBI(sb)->l2bsize;
1742#ifdef _STILL_TO_PORT
1743 bmSetXD(smp, xaddr, xsize);
1744#endif /* _STILL_TO_PORT */
1745 1742
1746 /* 1743 /*
1747 * copy old stbl to new stbl at start of extended area 1744 * copy old stbl to new stbl at start of extended area
@@ -1836,7 +1833,7 @@ static int dtExtendPage(tid_t tid,
1836 } 1833 }
1837 1834
1838 /* 1835 /*
1839 * update parent entry on the parent/root page 1836 * update parent entry on the parent/root page
1840 */ 1837 */
1841 /* 1838 /*
1842 * acquire a transaction lock on the parent/root page 1839 * acquire a transaction lock on the parent/root page
@@ -1904,7 +1901,7 @@ static int dtSplitRoot(tid_t tid,
1904 sp = &JFS_IP(ip)->i_dtroot; 1901 sp = &JFS_IP(ip)->i_dtroot;
1905 1902
1906 /* 1903 /*
1907 * allocate/initialize a single (right) child page 1904 * allocate/initialize a single (right) child page
1908 * 1905 *
1909 * N.B. at first split, a one (or two) block to fit new entry 1906 * N.B. at first split, a one (or two) block to fit new entry
1910 * is allocated; at subsequent split, a full page is allocated; 1907 * is allocated; at subsequent split, a full page is allocated;
@@ -1943,7 +1940,7 @@ static int dtSplitRoot(tid_t tid,
1943 rp->header.prev = 0; 1940 rp->header.prev = 0;
1944 1941
1945 /* 1942 /*
1946 * move in-line root page into new right page extent 1943 * move in-line root page into new right page extent
1947 */ 1944 */
1948 /* linelock header + copied entries + new stbl (1st slot) in new page */ 1945 /* linelock header + copied entries + new stbl (1st slot) in new page */
1949 ASSERT(dtlck->index == 0); 1946 ASSERT(dtlck->index == 0);
@@ -2016,7 +2013,7 @@ static int dtSplitRoot(tid_t tid,
2016 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); 2013 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
2017 2014
2018 /* 2015 /*
2019 * reset parent/root page 2016 * reset parent/root page
2020 * 2017 *
2021 * set the 1st entry offset to 0, which force the left-most key 2018 * set the 1st entry offset to 0, which force the left-most key
2022 * at any level of the tree to be less than any search key. 2019 * at any level of the tree to be less than any search key.
@@ -2102,7 +2099,7 @@ int dtDelete(tid_t tid,
2102 dtpage_t *np; 2099 dtpage_t *np;
2103 2100
2104 /* 2101 /*
2105 * search for the entry to delete: 2102 * search for the entry to delete:
2106 * 2103 *
2107 * dtSearch() returns (leaf page pinned, index at which to delete). 2104 * dtSearch() returns (leaf page pinned, index at which to delete).
2108 */ 2105 */
@@ -2253,7 +2250,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
2253 int i; 2250 int i;
2254 2251
2255 /* 2252 /*
2256 * keep the root leaf page which has become empty 2253 * keep the root leaf page which has become empty
2257 */ 2254 */
2258 if (BT_IS_ROOT(fmp)) { 2255 if (BT_IS_ROOT(fmp)) {
2259 /* 2256 /*
@@ -2269,7 +2266,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
2269 } 2266 }
2270 2267
2271 /* 2268 /*
2272 * free the non-root leaf page 2269 * free the non-root leaf page
2273 */ 2270 */
2274 /* 2271 /*
2275 * acquire a transaction lock on the page 2272 * acquire a transaction lock on the page
@@ -2299,7 +2296,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
2299 discard_metapage(fmp); 2296 discard_metapage(fmp);
2300 2297
2301 /* 2298 /*
2302 * propagate page deletion up the directory tree 2299 * propagate page deletion up the directory tree
2303 * 2300 *
2304 * If the delete from the parent page makes it empty, 2301 * If the delete from the parent page makes it empty,
2305 * continue all the way up the tree. 2302 * continue all the way up the tree.
@@ -2440,10 +2437,10 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
2440 2437
2441#ifdef _NOTYET 2438#ifdef _NOTYET
2442/* 2439/*
2443 * NAME: dtRelocate() 2440 * NAME: dtRelocate()
2444 * 2441 *
2445 * FUNCTION: relocate dtpage (internal or leaf) of directory; 2442 * FUNCTION: relocate dtpage (internal or leaf) of directory;
2446 * This function is mainly used by defragfs utility. 2443 * This function is mainly used by defragfs utility.
2447 */ 2444 */
2448int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, 2445int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2449 s64 nxaddr) 2446 s64 nxaddr)
@@ -2471,8 +2468,8 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2471 xlen); 2468 xlen);
2472 2469
2473 /* 2470 /*
2474 * 1. get the internal parent dtpage covering 2471 * 1. get the internal parent dtpage covering
2475 * router entry for the tartget page to be relocated; 2472 * router entry for the tartget page to be relocated;
2476 */ 2473 */
2477 rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); 2474 rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
2478 if (rc) 2475 if (rc)
@@ -2483,7 +2480,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2483 jfs_info("dtRelocate: parent router entry validated."); 2480 jfs_info("dtRelocate: parent router entry validated.");
2484 2481
2485 /* 2482 /*
2486 * 2. relocate the target dtpage 2483 * 2. relocate the target dtpage
2487 */ 2484 */
2488 /* read in the target page from src extent */ 2485 /* read in the target page from src extent */
2489 DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2486 DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
@@ -2581,9 +2578,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2581 2578
2582 /* update the buffer extent descriptor of the dtpage */ 2579 /* update the buffer extent descriptor of the dtpage */
2583 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2580 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2584#ifdef _STILL_TO_PORT 2581
2585 bmSetXD(mp, nxaddr, xsize);
2586#endif /* _STILL_TO_PORT */
2587 /* unpin the relocated page */ 2582 /* unpin the relocated page */
2588 DT_PUTPAGE(mp); 2583 DT_PUTPAGE(mp);
2589 jfs_info("dtRelocate: target dtpage relocated."); 2584 jfs_info("dtRelocate: target dtpage relocated.");
@@ -2594,7 +2589,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2594 */ 2589 */
2595 2590
2596 /* 2591 /*
2597 * 3. acquire maplock for the source extent to be freed; 2592 * 3. acquire maplock for the source extent to be freed;
2598 */ 2593 */
2599 /* for dtpage relocation, write a LOG_NOREDOPAGE record 2594 /* for dtpage relocation, write a LOG_NOREDOPAGE record
2600 * for the source dtpage (logredo() will init NoRedoPage 2595 * for the source dtpage (logredo() will init NoRedoPage
@@ -2609,7 +2604,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2609 pxdlock->index = 1; 2604 pxdlock->index = 1;
2610 2605
2611 /* 2606 /*
2612 * 4. update the parent router entry for relocation; 2607 * 4. update the parent router entry for relocation;
2613 * 2608 *
2614 * acquire tlck for the parent entry covering the target dtpage; 2609 * acquire tlck for the parent entry covering the target dtpage;
2615 * write LOG_REDOPAGE to apply after image only; 2610 * write LOG_REDOPAGE to apply after image only;
@@ -2637,7 +2632,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2637 * NAME: dtSearchNode() 2632 * NAME: dtSearchNode()
2638 * 2633 *
2639 * FUNCTION: Search for an dtpage containing a specified address 2634 * FUNCTION: Search for an dtpage containing a specified address
2640 * This function is mainly used by defragfs utility. 2635 * This function is mainly used by defragfs utility.
2641 * 2636 *
2642 * NOTE: Search result on stack, the found page is pinned at exit. 2637 * NOTE: Search result on stack, the found page is pinned at exit.
2643 * The result page must be an internal dtpage. 2638 * The result page must be an internal dtpage.
@@ -2660,7 +2655,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
2660 BT_CLR(btstack); /* reset stack */ 2655 BT_CLR(btstack); /* reset stack */
2661 2656
2662 /* 2657 /*
2663 * descend tree to the level with specified leftmost page 2658 * descend tree to the level with specified leftmost page
2664 * 2659 *
2665 * by convention, root bn = 0. 2660 * by convention, root bn = 0.
2666 */ 2661 */
@@ -2699,7 +2694,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
2699 } 2694 }
2700 2695
2701 /* 2696 /*
2702 * search each page at the current levevl 2697 * search each page at the current levevl
2703 */ 2698 */
2704 loop: 2699 loop:
2705 stbl = DT_GETSTBL(p); 2700 stbl = DT_GETSTBL(p);
@@ -3044,9 +3039,9 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
3044 if (DO_INDEX(ip)) { 3039 if (DO_INDEX(ip)) {
3045 /* 3040 /*
3046 * persistent index is stored in directory entries. 3041 * persistent index is stored in directory entries.
3047 * Special cases: 0 = . 3042 * Special cases: 0 = .
3048 * 1 = .. 3043 * 1 = ..
3049 * -1 = End of directory 3044 * -1 = End of directory
3050 */ 3045 */
3051 do_index = 1; 3046 do_index = 1;
3052 3047
@@ -3128,10 +3123,10 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
3128 /* 3123 /*
3129 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 3124 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
3130 * 3125 *
3131 * pn = index = 0: First entry "." 3126 * pn = index = 0: First entry "."
3132 * pn = 0; index = 1: Second entry ".." 3127 * pn = 0; index = 1: Second entry ".."
3133 * pn > 0: Real entries, pn=1 -> leftmost page 3128 * pn > 0: Real entries, pn=1 -> leftmost page
3134 * pn = index = -1: No more entries 3129 * pn = index = -1: No more entries
3135 */ 3130 */
3136 dtpos = filp->f_pos; 3131 dtpos = filp->f_pos;
3137 if (dtpos == 0) { 3132 if (dtpos == 0) {
@@ -3351,7 +3346,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack)
3351 BT_CLR(btstack); /* reset stack */ 3346 BT_CLR(btstack); /* reset stack */
3352 3347
3353 /* 3348 /*
3354 * descend leftmost path of the tree 3349 * descend leftmost path of the tree
3355 * 3350 *
3356 * by convention, root bn = 0. 3351 * by convention, root bn = 0.
3357 */ 3352 */
@@ -4531,7 +4526,7 @@ int dtModify(tid_t tid, struct inode *ip,
4531 struct ldtentry *entry; 4526 struct ldtentry *entry;
4532 4527
4533 /* 4528 /*
4534 * search for the entry to modify: 4529 * search for the entry to modify:
4535 * 4530 *
4536 * dtSearch() returns (leaf page pinned, index at which to modify). 4531 * dtSearch() returns (leaf page pinned, index at which to modify).
4537 */ 4532 */
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index af8513f78648..8561c6ecece0 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -35,7 +35,7 @@ typedef union {
35 35
36 36
37/* 37/*
38 * entry segment/slot 38 * entry segment/slot
39 * 39 *
40 * an entry consists of type dependent head/only segment/slot and 40 * an entry consists of type dependent head/only segment/slot and
41 * additional segments/slots linked vi next field; 41 * additional segments/slots linked vi next field;
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index a35bdca6a805..7ae1e3281de9 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -34,8 +34,8 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
34#endif 34#endif
35static s64 extRoundDown(s64 nb); 35static s64 extRoundDown(s64 nb);
36 36
37#define DPD(a) (printk("(a): %d\n",(a))) 37#define DPD(a) (printk("(a): %d\n",(a)))
38#define DPC(a) (printk("(a): %c\n",(a))) 38#define DPC(a) (printk("(a): %c\n",(a)))
39#define DPL1(a) \ 39#define DPL1(a) \
40{ \ 40{ \
41 if ((a) >> 32) \ 41 if ((a) >> 32) \
@@ -51,19 +51,19 @@ static s64 extRoundDown(s64 nb);
51 printk("(a): %x\n",(a) << 32); \ 51 printk("(a): %x\n",(a) << 32); \
52} 52}
53 53
54#define DPD1(a) (printk("(a): %d ",(a))) 54#define DPD1(a) (printk("(a): %d ",(a)))
55#define DPX(a) (printk("(a): %08x\n",(a))) 55#define DPX(a) (printk("(a): %08x\n",(a)))
56#define DPX1(a) (printk("(a): %08x ",(a))) 56#define DPX1(a) (printk("(a): %08x ",(a)))
57#define DPS(a) (printk("%s\n",(a))) 57#define DPS(a) (printk("%s\n",(a)))
58#define DPE(a) (printk("\nENTERING: %s\n",(a))) 58#define DPE(a) (printk("\nENTERING: %s\n",(a)))
59#define DPE1(a) (printk("\nENTERING: %s",(a))) 59#define DPE1(a) (printk("\nENTERING: %s",(a)))
60#define DPS1(a) (printk(" %s ",(a))) 60#define DPS1(a) (printk(" %s ",(a)))
61 61
62 62
63/* 63/*
64 * NAME: extAlloc() 64 * NAME: extAlloc()
65 * 65 *
66 * FUNCTION: allocate an extent for a specified page range within a 66 * FUNCTION: allocate an extent for a specified page range within a
67 * file. 67 * file.
68 * 68 *
69 * PARAMETERS: 69 * PARAMETERS:
@@ -78,9 +78,9 @@ static s64 extRoundDown(s64 nb);
78 * should be marked as allocated but not recorded. 78 * should be marked as allocated but not recorded.
79 * 79 *
80 * RETURN VALUES: 80 * RETURN VALUES:
81 * 0 - success 81 * 0 - success
82 * -EIO - i/o error. 82 * -EIO - i/o error.
83 * -ENOSPC - insufficient disk resources. 83 * -ENOSPC - insufficient disk resources.
84 */ 84 */
85int 85int
86extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) 86extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
@@ -192,9 +192,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
192 192
193#ifdef _NOTYET 193#ifdef _NOTYET
194/* 194/*
195 * NAME: extRealloc() 195 * NAME: extRealloc()
196 * 196 *
197 * FUNCTION: extend the allocation of a file extent containing a 197 * FUNCTION: extend the allocation of a file extent containing a
198 * partial back last page. 198 * partial back last page.
199 * 199 *
200 * PARAMETERS: 200 * PARAMETERS:
@@ -207,9 +207,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
207 * should be marked as allocated but not recorded. 207 * should be marked as allocated but not recorded.
208 * 208 *
209 * RETURN VALUES: 209 * RETURN VALUES:
210 * 0 - success 210 * 0 - success
211 * -EIO - i/o error. 211 * -EIO - i/o error.
212 * -ENOSPC - insufficient disk resources. 212 * -ENOSPC - insufficient disk resources.
213 */ 213 */
214int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) 214int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
215{ 215{
@@ -345,9 +345,9 @@ exit:
345 345
346 346
347/* 347/*
348 * NAME: extHint() 348 * NAME: extHint()
349 * 349 *
350 * FUNCTION: produce an extent allocation hint for a file offset. 350 * FUNCTION: produce an extent allocation hint for a file offset.
351 * 351 *
352 * PARAMETERS: 352 * PARAMETERS:
353 * ip - the inode of the file. 353 * ip - the inode of the file.
@@ -356,8 +356,8 @@ exit:
356 * the hint. 356 * the hint.
357 * 357 *
358 * RETURN VALUES: 358 * RETURN VALUES:
359 * 0 - success 359 * 0 - success
360 * -EIO - i/o error. 360 * -EIO - i/o error.
361 */ 361 */
362int extHint(struct inode *ip, s64 offset, xad_t * xp) 362int extHint(struct inode *ip, s64 offset, xad_t * xp)
363{ 363{
@@ -387,7 +387,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
387 lxdl.nlxd = 1; 387 lxdl.nlxd = 1;
388 lxdl.lxd = &lxd; 388 lxdl.lxd = &lxd;
389 LXDoffset(&lxd, prev) 389 LXDoffset(&lxd, prev)
390 LXDlength(&lxd, nbperpage); 390 LXDlength(&lxd, nbperpage);
391 391
392 xadl.maxnxad = 1; 392 xadl.maxnxad = 1;
393 xadl.nxad = 0; 393 xadl.nxad = 0;
@@ -397,11 +397,11 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
397 if ((rc = xtLookupList(ip, &lxdl, &xadl, 0))) 397 if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
398 return (rc); 398 return (rc);
399 399
400 /* check if not extent exists for the previous page. 400 /* check if no extent exists for the previous page.
401 * this is possible for sparse files. 401 * this is possible for sparse files.
402 */ 402 */
403 if (xadl.nxad == 0) { 403 if (xadl.nxad == 0) {
404// assert(ISSPARSE(ip)); 404// assert(ISSPARSE(ip));
405 return (0); 405 return (0);
406 } 406 }
407 407
@@ -410,28 +410,28 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
410 */ 410 */
411 xp->flag &= XAD_NOTRECORDED; 411 xp->flag &= XAD_NOTRECORDED;
412 412
413 if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { 413 if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
414 jfs_error(ip->i_sb, "extHint: corrupt xtree"); 414 jfs_error(ip->i_sb, "extHint: corrupt xtree");
415 return -EIO; 415 return -EIO;
416 } 416 }
417 417
418 return (0); 418 return (0);
419} 419}
420 420
421 421
422/* 422/*
423 * NAME: extRecord() 423 * NAME: extRecord()
424 * 424 *
425 * FUNCTION: change a page with a file from not recorded to recorded. 425 * FUNCTION: change a page with a file from not recorded to recorded.
426 * 426 *
427 * PARAMETERS: 427 * PARAMETERS:
428 * ip - inode of the file. 428 * ip - inode of the file.
429 * cp - cbuf of the file page. 429 * cp - cbuf of the file page.
430 * 430 *
431 * RETURN VALUES: 431 * RETURN VALUES:
432 * 0 - success 432 * 0 - success
433 * -EIO - i/o error. 433 * -EIO - i/o error.
434 * -ENOSPC - insufficient disk resources. 434 * -ENOSPC - insufficient disk resources.
435 */ 435 */
436int extRecord(struct inode *ip, xad_t * xp) 436int extRecord(struct inode *ip, xad_t * xp)
437{ 437{
@@ -451,9 +451,9 @@ int extRecord(struct inode *ip, xad_t * xp)
451 451
452#ifdef _NOTYET 452#ifdef _NOTYET
453/* 453/*
454 * NAME: extFill() 454 * NAME: extFill()
455 * 455 *
456 * FUNCTION: allocate disk space for a file page that represents 456 * FUNCTION: allocate disk space for a file page that represents
457 * a file hole. 457 * a file hole.
458 * 458 *
459 * PARAMETERS: 459 * PARAMETERS:
@@ -461,16 +461,16 @@ int extRecord(struct inode *ip, xad_t * xp)
461 * cp - cbuf of the file page represent the hole. 461 * cp - cbuf of the file page represent the hole.
462 * 462 *
463 * RETURN VALUES: 463 * RETURN VALUES:
464 * 0 - success 464 * 0 - success
465 * -EIO - i/o error. 465 * -EIO - i/o error.
466 * -ENOSPC - insufficient disk resources. 466 * -ENOSPC - insufficient disk resources.
467 */ 467 */
468int extFill(struct inode *ip, xad_t * xp) 468int extFill(struct inode *ip, xad_t * xp)
469{ 469{
470 int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; 470 int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
471 s64 blkno = offsetXAD(xp) >> ip->i_blkbits; 471 s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
472 472
473// assert(ISSPARSE(ip)); 473// assert(ISSPARSE(ip));
474 474
475 /* initialize the extent allocation hint */ 475 /* initialize the extent allocation hint */
476 XADaddress(xp, 0); 476 XADaddress(xp, 0);
@@ -489,7 +489,7 @@ int extFill(struct inode *ip, xad_t * xp)
489/* 489/*
490 * NAME: extBalloc() 490 * NAME: extBalloc()
491 * 491 *
492 * FUNCTION: allocate disk blocks to form an extent. 492 * FUNCTION: allocate disk blocks to form an extent.
493 * 493 *
494 * initially, we will try to allocate disk blocks for the 494 * initially, we will try to allocate disk blocks for the
495 * requested size (nblocks). if this fails (nblocks 495 * requested size (nblocks). if this fails (nblocks
@@ -513,9 +513,9 @@ int extFill(struct inode *ip, xad_t * xp)
513 * allocated block range. 513 * allocated block range.
514 * 514 *
515 * RETURN VALUES: 515 * RETURN VALUES:
516 * 0 - success 516 * 0 - success
517 * -EIO - i/o error. 517 * -EIO - i/o error.
518 * -ENOSPC - insufficient disk resources. 518 * -ENOSPC - insufficient disk resources.
519 */ 519 */
520static int 520static int
521extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) 521extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
@@ -580,7 +580,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
580/* 580/*
581 * NAME: extBrealloc() 581 * NAME: extBrealloc()
582 * 582 *
583 * FUNCTION: attempt to extend an extent's allocation. 583 * FUNCTION: attempt to extend an extent's allocation.
584 * 584 *
585 * Initially, we will try to extend the extent's allocation 585 * Initially, we will try to extend the extent's allocation
586 * in place. If this fails, we'll try to move the extent 586 * in place. If this fails, we'll try to move the extent
@@ -597,8 +597,8 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
597 * 597 *
598 * PARAMETERS: 598 * PARAMETERS:
599 * ip - the inode of the file. 599 * ip - the inode of the file.
600 * blkno - starting block number of the extents current allocation. 600 * blkno - starting block number of the extents current allocation.
601 * nblks - number of blocks within the extents current allocation. 601 * nblks - number of blocks within the extents current allocation.
602 * newnblks - pointer to a s64 value. on entry, this value is the 602 * newnblks - pointer to a s64 value. on entry, this value is the
603 * the new desired extent size (number of blocks). on 603 * the new desired extent size (number of blocks). on
604 * successful exit, this value is set to the extent's actual 604 * successful exit, this value is set to the extent's actual
@@ -606,9 +606,9 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
606 * newblkno - the starting block number of the extents new allocation. 606 * newblkno - the starting block number of the extents new allocation.
607 * 607 *
608 * RETURN VALUES: 608 * RETURN VALUES:
609 * 0 - success 609 * 0 - success
610 * -EIO - i/o error. 610 * -EIO - i/o error.
611 * -ENOSPC - insufficient disk resources. 611 * -ENOSPC - insufficient disk resources.
612 */ 612 */
613static int 613static int
614extBrealloc(struct inode *ip, 614extBrealloc(struct inode *ip,
@@ -634,16 +634,16 @@ extBrealloc(struct inode *ip,
634 634
635 635
636/* 636/*
637 * NAME: extRoundDown() 637 * NAME: extRoundDown()
638 * 638 *
639 * FUNCTION: round down a specified number of blocks to the next 639 * FUNCTION: round down a specified number of blocks to the next
640 * smallest power of 2 number. 640 * smallest power of 2 number.
641 * 641 *
642 * PARAMETERS: 642 * PARAMETERS:
643 * nb - the inode of the file. 643 * nb - the inode of the file.
644 * 644 *
645 * RETURN VALUES: 645 * RETURN VALUES:
646 * next smallest power of 2 number. 646 * next smallest power of 2 number.
647 */ 647 */
648static s64 extRoundDown(s64 nb) 648static s64 extRoundDown(s64 nb)
649{ 649{
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 38f70ac03bec..b3f5463fbe52 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -34,9 +34,9 @@
34#define JFS_UNICODE 0x00000001 /* unicode name */ 34#define JFS_UNICODE 0x00000001 /* unicode name */
35 35
36/* mount time flags for error handling */ 36/* mount time flags for error handling */
37#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */ 37#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */
38#define JFS_ERR_CONTINUE 0x00000004 /* continue */ 38#define JFS_ERR_CONTINUE 0x00000004 /* continue */
39#define JFS_ERR_PANIC 0x00000008 /* panic */ 39#define JFS_ERR_PANIC 0x00000008 /* panic */
40 40
41/* Quota support */ 41/* Quota support */
42#define JFS_USRQUOTA 0x00000010 42#define JFS_USRQUOTA 0x00000010
@@ -83,7 +83,6 @@
83/* case-insensitive name/directory support */ 83/* case-insensitive name/directory support */
84 84
85#define JFS_AIX 0x80000000 /* AIX support */ 85#define JFS_AIX 0x80000000 /* AIX support */
86/* POSIX name/directory support - Never implemented*/
87 86
88/* 87/*
89 * buffer cache configuration 88 * buffer cache configuration
@@ -113,10 +112,10 @@
113#define IDATASIZE 256 /* inode inline data size */ 112#define IDATASIZE 256 /* inode inline data size */
114#define IXATTRSIZE 128 /* inode inline extended attribute size */ 113#define IXATTRSIZE 128 /* inode inline extended attribute size */
115 114
116#define XTPAGE_SIZE 4096 115#define XTPAGE_SIZE 4096
117#define log2_PAGESIZE 12 116#define log2_PAGESIZE 12
118 117
119#define IAG_SIZE 4096 118#define IAG_SIZE 4096
120#define IAG_EXTENT_SIZE 4096 119#define IAG_EXTENT_SIZE 4096
121#define INOSPERIAG 4096 /* number of disk inodes per iag */ 120#define INOSPERIAG 4096 /* number of disk inodes per iag */
122#define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */ 121#define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index c6530227cda6..3870ba8b9086 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -93,21 +93,21 @@ static int copy_from_dinode(struct dinode *, struct inode *);
93static void copy_to_dinode(struct dinode *, struct inode *); 93static void copy_to_dinode(struct dinode *, struct inode *);
94 94
95/* 95/*
96 * NAME: diMount() 96 * NAME: diMount()
97 * 97 *
98 * FUNCTION: initialize the incore inode map control structures for 98 * FUNCTION: initialize the incore inode map control structures for
99 * a fileset or aggregate init time. 99 * a fileset or aggregate init time.
100 * 100 *
101 * the inode map's control structure (dinomap) is 101 * the inode map's control structure (dinomap) is
102 * brought in from disk and placed in virtual memory. 102 * brought in from disk and placed in virtual memory.
103 * 103 *
104 * PARAMETERS: 104 * PARAMETERS:
105 * ipimap - pointer to inode map inode for the aggregate or fileset. 105 * ipimap - pointer to inode map inode for the aggregate or fileset.
106 * 106 *
107 * RETURN VALUES: 107 * RETURN VALUES:
108 * 0 - success 108 * 0 - success
109 * -ENOMEM - insufficient free virtual memory. 109 * -ENOMEM - insufficient free virtual memory.
110 * -EIO - i/o error. 110 * -EIO - i/o error.
111 */ 111 */
112int diMount(struct inode *ipimap) 112int diMount(struct inode *ipimap)
113{ 113{
@@ -180,18 +180,18 @@ int diMount(struct inode *ipimap)
180 180
181 181
182/* 182/*
183 * NAME: diUnmount() 183 * NAME: diUnmount()
184 * 184 *
185 * FUNCTION: write to disk the incore inode map control structures for 185 * FUNCTION: write to disk the incore inode map control structures for
186 * a fileset or aggregate at unmount time. 186 * a fileset or aggregate at unmount time.
187 * 187 *
188 * PARAMETERS: 188 * PARAMETERS:
189 * ipimap - pointer to inode map inode for the aggregate or fileset. 189 * ipimap - pointer to inode map inode for the aggregate or fileset.
190 * 190 *
191 * RETURN VALUES: 191 * RETURN VALUES:
192 * 0 - success 192 * 0 - success
193 * -ENOMEM - insufficient free virtual memory. 193 * -ENOMEM - insufficient free virtual memory.
194 * -EIO - i/o error. 194 * -EIO - i/o error.
195 */ 195 */
196int diUnmount(struct inode *ipimap, int mounterror) 196int diUnmount(struct inode *ipimap, int mounterror)
197{ 197{
@@ -274,9 +274,9 @@ int diSync(struct inode *ipimap)
274 274
275 275
276/* 276/*
277 * NAME: diRead() 277 * NAME: diRead()
278 * 278 *
279 * FUNCTION: initialize an incore inode from disk. 279 * FUNCTION: initialize an incore inode from disk.
280 * 280 *
281 * on entry, the specifed incore inode should itself 281 * on entry, the specifed incore inode should itself
282 * specify the disk inode number corresponding to the 282 * specify the disk inode number corresponding to the
@@ -285,7 +285,7 @@ int diSync(struct inode *ipimap)
285 * this routine handles incore inode initialization for 285 * this routine handles incore inode initialization for
286 * both "special" and "regular" inodes. special inodes 286 * both "special" and "regular" inodes. special inodes
287 * are those required early in the mount process and 287 * are those required early in the mount process and
288 * require special handling since much of the file system 288 * require special handling since much of the file system
289 * is not yet initialized. these "special" inodes are 289 * is not yet initialized. these "special" inodes are
290 * identified by a NULL inode map inode pointer and are 290 * identified by a NULL inode map inode pointer and are
291 * actually initialized by a call to diReadSpecial(). 291 * actually initialized by a call to diReadSpecial().
@@ -298,12 +298,12 @@ int diSync(struct inode *ipimap)
298 * incore inode. 298 * incore inode.
299 * 299 *
300 * PARAMETERS: 300 * PARAMETERS:
301 * ip - pointer to incore inode to be initialized from disk. 301 * ip - pointer to incore inode to be initialized from disk.
302 * 302 *
303 * RETURN VALUES: 303 * RETURN VALUES:
304 * 0 - success 304 * 0 - success
305 * -EIO - i/o error. 305 * -EIO - i/o error.
306 * -ENOMEM - insufficient memory 306 * -ENOMEM - insufficient memory
307 * 307 *
308 */ 308 */
309int diRead(struct inode *ip) 309int diRead(struct inode *ip)
@@ -410,26 +410,26 @@ int diRead(struct inode *ip)
410 410
411 411
412/* 412/*
413 * NAME: diReadSpecial() 413 * NAME: diReadSpecial()
414 * 414 *
415 * FUNCTION: initialize a 'special' inode from disk. 415 * FUNCTION: initialize a 'special' inode from disk.
416 * 416 *
417 * this routines handles aggregate level inodes. The 417 * this routines handles aggregate level inodes. The
418 * inode cache cannot differentiate between the 418 * inode cache cannot differentiate between the
419 * aggregate inodes and the filesystem inodes, so we 419 * aggregate inodes and the filesystem inodes, so we
420 * handle these here. We don't actually use the aggregate 420 * handle these here. We don't actually use the aggregate
421 * inode map, since these inodes are at a fixed location 421 * inode map, since these inodes are at a fixed location
422 * and in some cases the aggregate inode map isn't initialized 422 * and in some cases the aggregate inode map isn't initialized
423 * yet. 423 * yet.
424 * 424 *
425 * PARAMETERS: 425 * PARAMETERS:
426 * sb - filesystem superblock 426 * sb - filesystem superblock
427 * inum - aggregate inode number 427 * inum - aggregate inode number
428 * secondary - 1 if secondary aggregate inode table 428 * secondary - 1 if secondary aggregate inode table
429 * 429 *
430 * RETURN VALUES: 430 * RETURN VALUES:
431 * new inode - success 431 * new inode - success
432 * NULL - i/o error. 432 * NULL - i/o error.
433 */ 433 */
434struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) 434struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
435{ 435{
@@ -502,12 +502,12 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
502} 502}
503 503
504/* 504/*
505 * NAME: diWriteSpecial() 505 * NAME: diWriteSpecial()
506 * 506 *
507 * FUNCTION: Write the special inode to disk 507 * FUNCTION: Write the special inode to disk
508 * 508 *
509 * PARAMETERS: 509 * PARAMETERS:
510 * ip - special inode 510 * ip - special inode
511 * secondary - 1 if secondary aggregate inode table 511 * secondary - 1 if secondary aggregate inode table
512 * 512 *
513 * RETURN VALUES: none 513 * RETURN VALUES: none
@@ -554,9 +554,9 @@ void diWriteSpecial(struct inode *ip, int secondary)
554} 554}
555 555
556/* 556/*
557 * NAME: diFreeSpecial() 557 * NAME: diFreeSpecial()
558 * 558 *
559 * FUNCTION: Free allocated space for special inode 559 * FUNCTION: Free allocated space for special inode
560 */ 560 */
561void diFreeSpecial(struct inode *ip) 561void diFreeSpecial(struct inode *ip)
562{ 562{
@@ -572,9 +572,9 @@ void diFreeSpecial(struct inode *ip)
572 572
573 573
574/* 574/*
575 * NAME: diWrite() 575 * NAME: diWrite()
576 * 576 *
577 * FUNCTION: write the on-disk inode portion of the in-memory inode 577 * FUNCTION: write the on-disk inode portion of the in-memory inode
578 * to its corresponding on-disk inode. 578 * to its corresponding on-disk inode.
579 * 579 *
580 * on entry, the specifed incore inode should itself 580 * on entry, the specifed incore inode should itself
@@ -589,11 +589,11 @@ void diFreeSpecial(struct inode *ip)
589 * 589 *
590 * PARAMETERS: 590 * PARAMETERS:
591 * tid - transacation id 591 * tid - transacation id
592 * ip - pointer to incore inode to be written to the inode extent. 592 * ip - pointer to incore inode to be written to the inode extent.
593 * 593 *
594 * RETURN VALUES: 594 * RETURN VALUES:
595 * 0 - success 595 * 0 - success
596 * -EIO - i/o error. 596 * -EIO - i/o error.
597 */ 597 */
598int diWrite(tid_t tid, struct inode *ip) 598int diWrite(tid_t tid, struct inode *ip)
599{ 599{
@@ -730,7 +730,7 @@ int diWrite(tid_t tid, struct inode *ip)
730 ilinelock = (struct linelock *) & tlck->lock; 730 ilinelock = (struct linelock *) & tlck->lock;
731 731
732 /* 732 /*
733 * regular file: 16 byte (XAD slot) granularity 733 * regular file: 16 byte (XAD slot) granularity
734 */ 734 */
735 if (type & tlckXTREE) { 735 if (type & tlckXTREE) {
736 xtpage_t *p, *xp; 736 xtpage_t *p, *xp;
@@ -755,7 +755,7 @@ int diWrite(tid_t tid, struct inode *ip)
755 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 755 xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
756 } 756 }
757 /* 757 /*
758 * directory: 32 byte (directory entry slot) granularity 758 * directory: 32 byte (directory entry slot) granularity
759 */ 759 */
760 else if (type & tlckDTREE) { 760 else if (type & tlckDTREE) {
761 dtpage_t *p, *xp; 761 dtpage_t *p, *xp;
@@ -800,9 +800,8 @@ int diWrite(tid_t tid, struct inode *ip)
800 } 800 }
801 801
802 /* 802 /*
803 * lock/copy inode base: 128 byte slot granularity 803 * lock/copy inode base: 128 byte slot granularity
804 */ 804 */
805// baseDinode:
806 lv = & dilinelock->lv[dilinelock->index]; 805 lv = & dilinelock->lv[dilinelock->index];
807 lv->offset = dioffset >> L2INODESLOTSIZE; 806 lv->offset = dioffset >> L2INODESLOTSIZE;
808 copy_to_dinode(dp, ip); 807 copy_to_dinode(dp, ip);
@@ -813,17 +812,6 @@ int diWrite(tid_t tid, struct inode *ip)
813 lv->length = 1; 812 lv->length = 1;
814 dilinelock->index++; 813 dilinelock->index++;
815 814
816#ifdef _JFS_FASTDASD
817 /*
818 * We aren't logging changes to the DASD used in directory inodes,
819 * but we need to write them to disk. If we don't unmount cleanly,
820 * mount will recalculate the DASD used.
821 */
822 if (S_ISDIR(ip->i_mode)
823 && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED))
824 memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd));
825#endif /* _JFS_FASTDASD */
826
827 /* release the buffer holding the updated on-disk inode. 815 /* release the buffer holding the updated on-disk inode.
828 * the buffer will be later written by commit processing. 816 * the buffer will be later written by commit processing.
829 */ 817 */
@@ -834,9 +822,9 @@ int diWrite(tid_t tid, struct inode *ip)
834 822
835 823
836/* 824/*
837 * NAME: diFree(ip) 825 * NAME: diFree(ip)
838 * 826 *
839 * FUNCTION: free a specified inode from the inode working map 827 * FUNCTION: free a specified inode from the inode working map
840 * for a fileset or aggregate. 828 * for a fileset or aggregate.
841 * 829 *
842 * if the inode to be freed represents the first (only) 830 * if the inode to be freed represents the first (only)
@@ -865,11 +853,11 @@ int diWrite(tid_t tid, struct inode *ip)
865 * any updates and are held until all updates are complete. 853 * any updates and are held until all updates are complete.
866 * 854 *
867 * PARAMETERS: 855 * PARAMETERS:
868 * ip - inode to be freed. 856 * ip - inode to be freed.
869 * 857 *
870 * RETURN VALUES: 858 * RETURN VALUES:
871 * 0 - success 859 * 0 - success
872 * -EIO - i/o error. 860 * -EIO - i/o error.
873 */ 861 */
874int diFree(struct inode *ip) 862int diFree(struct inode *ip)
875{ 863{
@@ -902,7 +890,8 @@ int diFree(struct inode *ip)
902 * the map. 890 * the map.
903 */ 891 */
904 if (iagno >= imap->im_nextiag) { 892 if (iagno >= imap->im_nextiag) {
905 dump_mem("imap", imap, 32); 893 print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
894 imap, 32, 0);
906 jfs_error(ip->i_sb, 895 jfs_error(ip->i_sb,
907 "diFree: inum = %d, iagno = %d, nextiag = %d", 896 "diFree: inum = %d, iagno = %d, nextiag = %d",
908 (uint) inum, iagno, imap->im_nextiag); 897 (uint) inum, iagno, imap->im_nextiag);
@@ -964,8 +953,8 @@ int diFree(struct inode *ip)
964 return -EIO; 953 return -EIO;
965 } 954 }
966 /* 955 /*
967 * inode extent still has some inodes or below low water mark: 956 * inode extent still has some inodes or below low water mark:
968 * keep the inode extent; 957 * keep the inode extent;
969 */ 958 */
970 if (bitmap || 959 if (bitmap ||
971 imap->im_agctl[agno].numfree < 96 || 960 imap->im_agctl[agno].numfree < 96 ||
@@ -1047,12 +1036,12 @@ int diFree(struct inode *ip)
1047 1036
1048 1037
1049 /* 1038 /*
1050 * inode extent has become free and above low water mark: 1039 * inode extent has become free and above low water mark:
1051 * free the inode extent; 1040 * free the inode extent;
1052 */ 1041 */
1053 1042
1054 /* 1043 /*
1055 * prepare to update iag list(s) (careful update step 1) 1044 * prepare to update iag list(s) (careful update step 1)
1056 */ 1045 */
1057 amp = bmp = cmp = dmp = NULL; 1046 amp = bmp = cmp = dmp = NULL;
1058 fwd = back = -1; 1047 fwd = back = -1;
@@ -1152,7 +1141,7 @@ int diFree(struct inode *ip)
1152 invalidate_pxd_metapages(ip, freepxd); 1141 invalidate_pxd_metapages(ip, freepxd);
1153 1142
1154 /* 1143 /*
1155 * update iag list(s) (careful update step 2) 1144 * update iag list(s) (careful update step 2)
1156 */ 1145 */
1157 /* add the iag to the ag extent free list if this is the 1146 /* add the iag to the ag extent free list if this is the
1158 * first free extent for the iag. 1147 * first free extent for the iag.
@@ -1338,20 +1327,20 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1338 1327
1339 1328
1340/* 1329/*
1341 * NAME: diAlloc(pip,dir,ip) 1330 * NAME: diAlloc(pip,dir,ip)
1342 * 1331 *
1343 * FUNCTION: allocate a disk inode from the inode working map 1332 * FUNCTION: allocate a disk inode from the inode working map
1344 * for a fileset or aggregate. 1333 * for a fileset or aggregate.
1345 * 1334 *
1346 * PARAMETERS: 1335 * PARAMETERS:
1347 * pip - pointer to incore inode for the parent inode. 1336 * pip - pointer to incore inode for the parent inode.
1348 * dir - 'true' if the new disk inode is for a directory. 1337 * dir - 'true' if the new disk inode is for a directory.
1349 * ip - pointer to a new inode 1338 * ip - pointer to a new inode
1350 * 1339 *
1351 * RETURN VALUES: 1340 * RETURN VALUES:
1352 * 0 - success. 1341 * 0 - success.
1353 * -ENOSPC - insufficient disk resources. 1342 * -ENOSPC - insufficient disk resources.
1354 * -EIO - i/o error. 1343 * -EIO - i/o error.
1355 */ 1344 */
1356int diAlloc(struct inode *pip, bool dir, struct inode *ip) 1345int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1357{ 1346{
@@ -1433,7 +1422,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1433 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); 1422 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
1434 1423
1435 /* 1424 /*
1436 * try to allocate from the IAG 1425 * try to allocate from the IAG
1437 */ 1426 */
1438 /* check if the inode may be allocated from the iag 1427 /* check if the inode may be allocated from the iag
1439 * (i.e. the inode has free inodes or new extent can be added). 1428 * (i.e. the inode has free inodes or new extent can be added).
@@ -1633,9 +1622,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1633 1622
1634 1623
1635/* 1624/*
1636 * NAME: diAllocAG(imap,agno,dir,ip) 1625 * NAME: diAllocAG(imap,agno,dir,ip)
1637 * 1626 *
1638 * FUNCTION: allocate a disk inode from the allocation group. 1627 * FUNCTION: allocate a disk inode from the allocation group.
1639 * 1628 *
1640 * this routine first determines if a new extent of free 1629 * this routine first determines if a new extent of free
1641 * inodes should be added for the allocation group, with 1630 * inodes should be added for the allocation group, with
@@ -1649,17 +1638,17 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1649 * PRE CONDITION: Already have the AG lock for this AG. 1638 * PRE CONDITION: Already have the AG lock for this AG.
1650 * 1639 *
1651 * PARAMETERS: 1640 * PARAMETERS:
1652 * imap - pointer to inode map control structure. 1641 * imap - pointer to inode map control structure.
1653 * agno - allocation group to allocate from. 1642 * agno - allocation group to allocate from.
1654 * dir - 'true' if the new disk inode is for a directory. 1643 * dir - 'true' if the new disk inode is for a directory.
1655 * ip - pointer to the new inode to be filled in on successful return 1644 * ip - pointer to the new inode to be filled in on successful return
1656 * with the disk inode number allocated, its extent address 1645 * with the disk inode number allocated, its extent address
1657 * and the start of the ag. 1646 * and the start of the ag.
1658 * 1647 *
1659 * RETURN VALUES: 1648 * RETURN VALUES:
1660 * 0 - success. 1649 * 0 - success.
1661 * -ENOSPC - insufficient disk resources. 1650 * -ENOSPC - insufficient disk resources.
1662 * -EIO - i/o error. 1651 * -EIO - i/o error.
1663 */ 1652 */
1664static int 1653static int
1665diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) 1654diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1709,9 +1698,9 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1709 1698
1710 1699
1711/* 1700/*
1712 * NAME: diAllocAny(imap,agno,dir,iap) 1701 * NAME: diAllocAny(imap,agno,dir,iap)
1713 * 1702 *
1714 * FUNCTION: allocate a disk inode from any other allocation group. 1703 * FUNCTION: allocate a disk inode from any other allocation group.
1715 * 1704 *
1716 * this routine is called when an allocation attempt within 1705 * this routine is called when an allocation attempt within
1717 * the primary allocation group has failed. if attempts to 1706 * the primary allocation group has failed. if attempts to
@@ -1719,17 +1708,17 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1719 * specified primary group. 1708 * specified primary group.
1720 * 1709 *
1721 * PARAMETERS: 1710 * PARAMETERS:
1722 * imap - pointer to inode map control structure. 1711 * imap - pointer to inode map control structure.
1723 * agno - primary allocation group (to avoid). 1712 * agno - primary allocation group (to avoid).
1724 * dir - 'true' if the new disk inode is for a directory. 1713 * dir - 'true' if the new disk inode is for a directory.
1725 * ip - pointer to a new inode to be filled in on successful return 1714 * ip - pointer to a new inode to be filled in on successful return
1726 * with the disk inode number allocated, its extent address 1715 * with the disk inode number allocated, its extent address
1727 * and the start of the ag. 1716 * and the start of the ag.
1728 * 1717 *
1729 * RETURN VALUES: 1718 * RETURN VALUES:
1730 * 0 - success. 1719 * 0 - success.
1731 * -ENOSPC - insufficient disk resources. 1720 * -ENOSPC - insufficient disk resources.
1732 * -EIO - i/o error. 1721 * -EIO - i/o error.
1733 */ 1722 */
1734static int 1723static int
1735diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) 1724diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1772,9 +1761,9 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1772 1761
1773 1762
1774/* 1763/*
1775 * NAME: diAllocIno(imap,agno,ip) 1764 * NAME: diAllocIno(imap,agno,ip)
1776 * 1765 *
1777 * FUNCTION: allocate a disk inode from the allocation group's free 1766 * FUNCTION: allocate a disk inode from the allocation group's free
1778 * inode list, returning an error if this free list is 1767 * inode list, returning an error if this free list is
1779 * empty (i.e. no iags on the list). 1768 * empty (i.e. no iags on the list).
1780 * 1769 *
@@ -1785,16 +1774,16 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1785 * PRE CONDITION: Already have AG lock for this AG. 1774 * PRE CONDITION: Already have AG lock for this AG.
1786 * 1775 *
1787 * PARAMETERS: 1776 * PARAMETERS:
1788 * imap - pointer to inode map control structure. 1777 * imap - pointer to inode map control structure.
1789 * agno - allocation group. 1778 * agno - allocation group.
1790 * ip - pointer to new inode to be filled in on successful return 1779 * ip - pointer to new inode to be filled in on successful return
1791 * with the disk inode number allocated, its extent address 1780 * with the disk inode number allocated, its extent address
1792 * and the start of the ag. 1781 * and the start of the ag.
1793 * 1782 *
1794 * RETURN VALUES: 1783 * RETURN VALUES:
1795 * 0 - success. 1784 * 0 - success.
1796 * -ENOSPC - insufficient disk resources. 1785 * -ENOSPC - insufficient disk resources.
1797 * -EIO - i/o error. 1786 * -EIO - i/o error.
1798 */ 1787 */
1799static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) 1788static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1800{ 1789{
@@ -1890,7 +1879,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1890 1879
1891 1880
1892/* 1881/*
1893 * NAME: diAllocExt(imap,agno,ip) 1882 * NAME: diAllocExt(imap,agno,ip)
1894 * 1883 *
1895 * FUNCTION: add a new extent of free inodes to an iag, allocating 1884 * FUNCTION: add a new extent of free inodes to an iag, allocating
1896 * an inode from this extent to satisfy the current allocation 1885 * an inode from this extent to satisfy the current allocation
@@ -1910,16 +1899,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1910 * for the purpose of satisfying this request. 1899 * for the purpose of satisfying this request.
1911 * 1900 *
1912 * PARAMETERS: 1901 * PARAMETERS:
1913 * imap - pointer to inode map control structure. 1902 * imap - pointer to inode map control structure.
1914 * agno - allocation group number. 1903 * agno - allocation group number.
1915 * ip - pointer to new inode to be filled in on successful return 1904 * ip - pointer to new inode to be filled in on successful return
1916 * with the disk inode number allocated, its extent address 1905 * with the disk inode number allocated, its extent address
1917 * and the start of the ag. 1906 * and the start of the ag.
1918 * 1907 *
1919 * RETURN VALUES: 1908 * RETURN VALUES:
1920 * 0 - success. 1909 * 0 - success.
1921 * -ENOSPC - insufficient disk resources. 1910 * -ENOSPC - insufficient disk resources.
1922 * -EIO - i/o error. 1911 * -EIO - i/o error.
1923 */ 1912 */
1924static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) 1913static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
1925{ 1914{
@@ -2010,7 +1999,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
2010 1999
2011 2000
2012/* 2001/*
2013 * NAME: diAllocBit(imap,iagp,ino) 2002 * NAME: diAllocBit(imap,iagp,ino)
2014 * 2003 *
2015 * FUNCTION: allocate a backed inode from an iag. 2004 * FUNCTION: allocate a backed inode from an iag.
2016 * 2005 *
@@ -2030,14 +2019,14 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
2030 * this AG. Must have read lock on imap inode. 2019 * this AG. Must have read lock on imap inode.
2031 * 2020 *
2032 * PARAMETERS: 2021 * PARAMETERS:
2033 * imap - pointer to inode map control structure. 2022 * imap - pointer to inode map control structure.
2034 * iagp - pointer to iag. 2023 * iagp - pointer to iag.
2035 * ino - inode number to be allocated within the iag. 2024 * ino - inode number to be allocated within the iag.
2036 * 2025 *
2037 * RETURN VALUES: 2026 * RETURN VALUES:
2038 * 0 - success. 2027 * 0 - success.
2039 * -ENOSPC - insufficient disk resources. 2028 * -ENOSPC - insufficient disk resources.
2040 * -EIO - i/o error. 2029 * -EIO - i/o error.
2041 */ 2030 */
2042static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) 2031static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2043{ 2032{
@@ -2144,11 +2133,11 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2144 2133
2145 2134
2146/* 2135/*
2147 * NAME: diNewExt(imap,iagp,extno) 2136 * NAME: diNewExt(imap,iagp,extno)
2148 * 2137 *
2149 * FUNCTION: initialize a new extent of inodes for an iag, allocating 2138 * FUNCTION: initialize a new extent of inodes for an iag, allocating
2150 * the first inode of the extent for use for the current 2139 * the first inode of the extent for use for the current
2151 * allocation request. 2140 * allocation request.
2152 * 2141 *
2153 * disk resources are allocated for the new extent of inodes 2142 * disk resources are allocated for the new extent of inodes
2154 * and the inodes themselves are initialized to reflect their 2143 * and the inodes themselves are initialized to reflect their
@@ -2177,14 +2166,14 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2177 * this AG. Must have read lock on imap inode. 2166 * this AG. Must have read lock on imap inode.
2178 * 2167 *
2179 * PARAMETERS: 2168 * PARAMETERS:
2180 * imap - pointer to inode map control structure. 2169 * imap - pointer to inode map control structure.
2181 * iagp - pointer to iag. 2170 * iagp - pointer to iag.
2182 * extno - extent number. 2171 * extno - extent number.
2183 * 2172 *
2184 * RETURN VALUES: 2173 * RETURN VALUES:
2185 * 0 - success. 2174 * 0 - success.
2186 * -ENOSPC - insufficient disk resources. 2175 * -ENOSPC - insufficient disk resources.
2187 * -EIO - i/o error. 2176 * -EIO - i/o error.
2188 */ 2177 */
2189static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) 2178static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2190{ 2179{
@@ -2430,7 +2419,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2430 2419
2431 2420
2432/* 2421/*
2433 * NAME: diNewIAG(imap,iagnop,agno) 2422 * NAME: diNewIAG(imap,iagnop,agno)
2434 * 2423 *
2435 * FUNCTION: allocate a new iag for an allocation group. 2424 * FUNCTION: allocate a new iag for an allocation group.
2436 * 2425 *
@@ -2443,16 +2432,16 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2443 * and returned to satisfy the request. 2432 * and returned to satisfy the request.
2444 * 2433 *
2445 * PARAMETERS: 2434 * PARAMETERS:
2446 * imap - pointer to inode map control structure. 2435 * imap - pointer to inode map control structure.
2447 * iagnop - pointer to an iag number set with the number of the 2436 * iagnop - pointer to an iag number set with the number of the
2448 * newly allocated iag upon successful return. 2437 * newly allocated iag upon successful return.
2449 * agno - allocation group number. 2438 * agno - allocation group number.
2450 * bpp - Buffer pointer to be filled in with new IAG's buffer 2439 * bpp - Buffer pointer to be filled in with new IAG's buffer
2451 * 2440 *
2452 * RETURN VALUES: 2441 * RETURN VALUES:
2453 * 0 - success. 2442 * 0 - success.
2454 * -ENOSPC - insufficient disk resources. 2443 * -ENOSPC - insufficient disk resources.
2455 * -EIO - i/o error. 2444 * -EIO - i/o error.
2456 * 2445 *
2457 * serialization: 2446 * serialization:
2458 * AG lock held on entry/exit; 2447 * AG lock held on entry/exit;
@@ -2461,7 +2450,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2461 * 2450 *
2462 * note: new iag transaction: 2451 * note: new iag transaction:
2463 * . synchronously write iag; 2452 * . synchronously write iag;
2464 * . write log of xtree and inode of imap; 2453 * . write log of xtree and inode of imap;
2465 * . commit; 2454 * . commit;
2466 * . synchronous write of xtree (right to left, bottom to top); 2455 * . synchronous write of xtree (right to left, bottom to top);
2467 * . at start of logredo(): init in-memory imap with one additional iag page; 2456 * . at start of logredo(): init in-memory imap with one additional iag page;
@@ -2481,9 +2470,6 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2481 s64 xaddr = 0; 2470 s64 xaddr = 0;
2482 s64 blkno; 2471 s64 blkno;
2483 tid_t tid; 2472 tid_t tid;
2484#ifdef _STILL_TO_PORT
2485 xad_t xad;
2486#endif /* _STILL_TO_PORT */
2487 struct inode *iplist[1]; 2473 struct inode *iplist[1];
2488 2474
2489 /* pick up pointers to the inode map and mount inodes */ 2475 /* pick up pointers to the inode map and mount inodes */
@@ -2674,15 +2660,15 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2674} 2660}
2675 2661
2676/* 2662/*
2677 * NAME: diIAGRead() 2663 * NAME: diIAGRead()
2678 * 2664 *
2679 * FUNCTION: get the buffer for the specified iag within a fileset 2665 * FUNCTION: get the buffer for the specified iag within a fileset
2680 * or aggregate inode map. 2666 * or aggregate inode map.
2681 * 2667 *
2682 * PARAMETERS: 2668 * PARAMETERS:
2683 * imap - pointer to inode map control structure. 2669 * imap - pointer to inode map control structure.
2684 * iagno - iag number. 2670 * iagno - iag number.
2685 * bpp - point to buffer pointer to be filled in on successful 2671 * bpp - point to buffer pointer to be filled in on successful
2686 * exit. 2672 * exit.
2687 * 2673 *
2688 * SERIALIZATION: 2674 * SERIALIZATION:
@@ -2691,8 +2677,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2691 * the read lock is unnecessary.) 2677 * the read lock is unnecessary.)
2692 * 2678 *
2693 * RETURN VALUES: 2679 * RETURN VALUES:
2694 * 0 - success. 2680 * 0 - success.
2695 * -EIO - i/o error. 2681 * -EIO - i/o error.
2696 */ 2682 */
2697static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) 2683static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2698{ 2684{
@@ -2712,17 +2698,17 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2712} 2698}
2713 2699
2714/* 2700/*
2715 * NAME: diFindFree() 2701 * NAME: diFindFree()
2716 * 2702 *
2717 * FUNCTION: find the first free bit in a word starting at 2703 * FUNCTION: find the first free bit in a word starting at
2718 * the specified bit position. 2704 * the specified bit position.
2719 * 2705 *
2720 * PARAMETERS: 2706 * PARAMETERS:
2721 * word - word to be examined. 2707 * word - word to be examined.
2722 * start - starting bit position. 2708 * start - starting bit position.
2723 * 2709 *
2724 * RETURN VALUES: 2710 * RETURN VALUES:
2725 * bit position of first free bit in the word or 32 if 2711 * bit position of first free bit in the word or 32 if
2726 * no free bits were found. 2712 * no free bits were found.
2727 */ 2713 */
2728static int diFindFree(u32 word, int start) 2714static int diFindFree(u32 word, int start)
@@ -2897,7 +2883,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2897 atomic_read(&imap->im_numfree)); 2883 atomic_read(&imap->im_numfree));
2898 2884
2899 /* 2885 /*
2900 * reconstruct imap 2886 * reconstruct imap
2901 * 2887 *
2902 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 2888 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
2903 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 2889 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
@@ -2913,7 +2899,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2913 } 2899 }
2914 2900
2915 /* 2901 /*
2916 * process each iag page of the map. 2902 * process each iag page of the map.
2917 * 2903 *
2918 * rebuild AG Free Inode List, AG Free Inode Extent List; 2904 * rebuild AG Free Inode List, AG Free Inode Extent List;
2919 */ 2905 */
@@ -2932,7 +2918,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2932 2918
2933 /* leave free iag in the free iag list */ 2919 /* leave free iag in the free iag list */
2934 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2920 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2935 release_metapage(bp); 2921 release_metapage(bp);
2936 continue; 2922 continue;
2937 } 2923 }
2938 2924
@@ -3063,13 +3049,13 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno,
3063} 3049}
3064 3050
3065/* 3051/*
3066 * NAME: copy_from_dinode() 3052 * NAME: copy_from_dinode()
3067 * 3053 *
3068 * FUNCTION: Copies inode info from disk inode to in-memory inode 3054 * FUNCTION: Copies inode info from disk inode to in-memory inode
3069 * 3055 *
3070 * RETURN VALUES: 3056 * RETURN VALUES:
3071 * 0 - success 3057 * 0 - success
3072 * -ENOMEM - insufficient memory 3058 * -ENOMEM - insufficient memory
3073 */ 3059 */
3074static int copy_from_dinode(struct dinode * dip, struct inode *ip) 3060static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3075{ 3061{
@@ -3151,9 +3137,9 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3151} 3137}
3152 3138
3153/* 3139/*
3154 * NAME: copy_to_dinode() 3140 * NAME: copy_to_dinode()
3155 * 3141 *
3156 * FUNCTION: Copies inode info from in-memory inode to disk inode 3142 * FUNCTION: Copies inode info from in-memory inode to disk inode
3157 */ 3143 */
3158static void copy_to_dinode(struct dinode * dip, struct inode *ip) 3144static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3159{ 3145{
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h
index 4f9c346ed498..610a0e9d8941 100644
--- a/fs/jfs/jfs_imap.h
+++ b/fs/jfs/jfs_imap.h
@@ -24,17 +24,17 @@
24 * jfs_imap.h: disk inode manager 24 * jfs_imap.h: disk inode manager
25 */ 25 */
26 26
27#define EXTSPERIAG 128 /* number of disk inode extent per iag */ 27#define EXTSPERIAG 128 /* number of disk inode extent per iag */
28#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ 28#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */
29#define SMAPSZ 4 /* number of words per summary map */ 29#define SMAPSZ 4 /* number of words per summary map */
30#define EXTSPERSUM 32 /* number of extents per summary map entry */ 30#define EXTSPERSUM 32 /* number of extents per summary map entry */
31#define L2EXTSPERSUM 5 /* l2 number of extents per summary map */ 31#define L2EXTSPERSUM 5 /* l2 number of extents per summary map */
32#define PGSPERIEXT 4 /* number of 4K pages per dinode extent */ 32#define PGSPERIEXT 4 /* number of 4K pages per dinode extent */
33#define MAXIAGS ((1<<20)-1) /* maximum number of iags */ 33#define MAXIAGS ((1<<20)-1) /* maximum number of iags */
34#define MAXAG 128 /* maximum number of allocation groups */ 34#define MAXAG 128 /* maximum number of allocation groups */
35 35
36#define AMAPSIZE 512 /* bytes in the IAG allocation maps */ 36#define AMAPSIZE 512 /* bytes in the IAG allocation maps */
37#define SMAPSIZE 16 /* bytes in the IAG summary maps */ 37#define SMAPSIZE 16 /* bytes in the IAG summary maps */
38 38
39/* convert inode number to iag number */ 39/* convert inode number to iag number */
40#define INOTOIAG(ino) ((ino) >> L2INOSPERIAG) 40#define INOTOIAG(ino) ((ino) >> L2INOSPERIAG)
@@ -60,31 +60,31 @@
60 * inode allocation group page (per 4096 inodes of an AG) 60 * inode allocation group page (per 4096 inodes of an AG)
61 */ 61 */
62struct iag { 62struct iag {
63 __le64 agstart; /* 8: starting block of ag */ 63 __le64 agstart; /* 8: starting block of ag */
64 __le32 iagnum; /* 4: inode allocation group number */ 64 __le32 iagnum; /* 4: inode allocation group number */
65 __le32 inofreefwd; /* 4: ag inode free list forward */ 65 __le32 inofreefwd; /* 4: ag inode free list forward */
66 __le32 inofreeback; /* 4: ag inode free list back */ 66 __le32 inofreeback; /* 4: ag inode free list back */
67 __le32 extfreefwd; /* 4: ag inode extent free list forward */ 67 __le32 extfreefwd; /* 4: ag inode extent free list forward */
68 __le32 extfreeback; /* 4: ag inode extent free list back */ 68 __le32 extfreeback; /* 4: ag inode extent free list back */
69 __le32 iagfree; /* 4: iag free list */ 69 __le32 iagfree; /* 4: iag free list */
70 70
71 /* summary map: 1 bit per inode extent */ 71 /* summary map: 1 bit per inode extent */
72 __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes; 72 __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes;
73 * note: this indicates free and backed 73 * note: this indicates free and backed
74 * inodes, if the extent is not backed the 74 * inodes, if the extent is not backed the
75 * value will be 1. if the extent is 75 * value will be 1. if the extent is
76 * backed but all inodes are being used the 76 * backed but all inodes are being used the
77 * value will be 1. if the extent is 77 * value will be 1. if the extent is
78 * backed but at least one of the inodes is 78 * backed but at least one of the inodes is
79 * free the value will be 0. 79 * free the value will be 0.
80 */ 80 */
81 __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */ 81 __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */
82 __le32 nfreeinos; /* 4: number of free inodes */ 82 __le32 nfreeinos; /* 4: number of free inodes */
83 __le32 nfreeexts; /* 4: number of free extents */ 83 __le32 nfreeexts; /* 4: number of free extents */
84 /* (72) */ 84 /* (72) */
85 u8 pad[1976]; /* 1976: pad to 2048 bytes */ 85 u8 pad[1976]; /* 1976: pad to 2048 bytes */
86 /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */ 86 /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */
87 __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */ 87 __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */
88 __le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */ 88 __le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */
89 pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */ 89 pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */
90}; /* (4096) */ 90}; /* (4096) */
@@ -93,44 +93,44 @@ struct iag {
93 * per AG control information (in inode map control page) 93 * per AG control information (in inode map control page)
94 */ 94 */
95struct iagctl_disk { 95struct iagctl_disk {
96 __le32 inofree; /* 4: free inode list anchor */ 96 __le32 inofree; /* 4: free inode list anchor */
97 __le32 extfree; /* 4: free extent list anchor */ 97 __le32 extfree; /* 4: free extent list anchor */
98 __le32 numinos; /* 4: number of backed inodes */ 98 __le32 numinos; /* 4: number of backed inodes */
99 __le32 numfree; /* 4: number of free inodes */ 99 __le32 numfree; /* 4: number of free inodes */
100}; /* (16) */ 100}; /* (16) */
101 101
102struct iagctl { 102struct iagctl {
103 int inofree; /* free inode list anchor */ 103 int inofree; /* free inode list anchor */
104 int extfree; /* free extent list anchor */ 104 int extfree; /* free extent list anchor */
105 int numinos; /* number of backed inodes */ 105 int numinos; /* number of backed inodes */
106 int numfree; /* number of free inodes */ 106 int numfree; /* number of free inodes */
107}; 107};
108 108
109/* 109/*
110 * per fileset/aggregate inode map control page 110 * per fileset/aggregate inode map control page
111 */ 111 */
112struct dinomap_disk { 112struct dinomap_disk {
113 __le32 in_freeiag; /* 4: free iag list anchor */ 113 __le32 in_freeiag; /* 4: free iag list anchor */
114 __le32 in_nextiag; /* 4: next free iag number */ 114 __le32 in_nextiag; /* 4: next free iag number */
115 __le32 in_numinos; /* 4: num of backed inodes */ 115 __le32 in_numinos; /* 4: num of backed inodes */
116 __le32 in_numfree; /* 4: num of free backed inodes */ 116 __le32 in_numfree; /* 4: num of free backed inodes */
117 __le32 in_nbperiext; /* 4: num of blocks per inode extent */ 117 __le32 in_nbperiext; /* 4: num of blocks per inode extent */
118 __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ 118 __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */
119 __le32 in_diskblock; /* 4: for standalone test driver */ 119 __le32 in_diskblock; /* 4: for standalone test driver */
120 __le32 in_maxag; /* 4: for standalone test driver */ 120 __le32 in_maxag; /* 4: for standalone test driver */
121 u8 pad[2016]; /* 2016: pad to 2048 */ 121 u8 pad[2016]; /* 2016: pad to 2048 */
122 struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */ 122 struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */
123}; /* (4096) */ 123}; /* (4096) */
124 124
125struct dinomap { 125struct dinomap {
126 int in_freeiag; /* free iag list anchor */ 126 int in_freeiag; /* free iag list anchor */
127 int in_nextiag; /* next free iag number */ 127 int in_nextiag; /* next free iag number */
128 int in_numinos; /* num of backed inodes */ 128 int in_numinos; /* num of backed inodes */
129 int in_numfree; /* num of free backed inodes */ 129 int in_numfree; /* num of free backed inodes */
130 int in_nbperiext; /* num of blocks per inode extent */ 130 int in_nbperiext; /* num of blocks per inode extent */
131 int in_l2nbperiext; /* l2 of in_nbperiext */ 131 int in_l2nbperiext; /* l2 of in_nbperiext */
132 int in_diskblock; /* for standalone test driver */ 132 int in_diskblock; /* for standalone test driver */
133 int in_maxag; /* for standalone test driver */ 133 int in_maxag; /* for standalone test driver */
134 struct iagctl in_agctl[MAXAG]; /* AG control information */ 134 struct iagctl in_agctl[MAXAG]; /* AG control information */
135}; 135};
136 136
@@ -139,9 +139,9 @@ struct dinomap {
139 */ 139 */
140struct inomap { 140struct inomap {
141 struct dinomap im_imap; /* 4096: inode allocation control */ 141 struct dinomap im_imap; /* 4096: inode allocation control */
142 struct inode *im_ipimap; /* 4: ptr to inode for imap */ 142 struct inode *im_ipimap; /* 4: ptr to inode for imap */
143 struct mutex im_freelock; /* 4: iag free list lock */ 143 struct mutex im_freelock; /* 4: iag free list lock */
144 struct mutex im_aglock[MAXAG]; /* 512: per AG locks */ 144 struct mutex im_aglock[MAXAG]; /* 512: per AG locks */
145 u32 *im_DBGdimap; 145 u32 *im_DBGdimap;
146 atomic_t im_numinos; /* num of backed inodes */ 146 atomic_t im_numinos; /* num of backed inodes */
147 atomic_t im_numfree; /* num of free backed inodes */ 147 atomic_t im_numfree; /* num of free backed inodes */
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 8f453eff3c83..cb8f30985ad1 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -40,7 +40,7 @@ struct jfs_inode_info {
40 uint mode2; /* jfs-specific mode */ 40 uint mode2; /* jfs-specific mode */
41 uint saved_uid; /* saved for uid mount option */ 41 uint saved_uid; /* saved for uid mount option */
42 uint saved_gid; /* saved for gid mount option */ 42 uint saved_gid; /* saved for gid mount option */
43 pxd_t ixpxd; /* inode extent descriptor */ 43 pxd_t ixpxd; /* inode extent descriptor */
44 dxd_t acl; /* dxd describing acl */ 44 dxd_t acl; /* dxd describing acl */
45 dxd_t ea; /* dxd describing ea */ 45 dxd_t ea; /* dxd describing ea */
46 time_t otime; /* time created */ 46 time_t otime; /* time created */
@@ -190,7 +190,7 @@ struct jfs_sb_info {
190 uint gengen; /* inode generation generator*/ 190 uint gengen; /* inode generation generator*/
191 uint inostamp; /* shows inode belongs to fileset*/ 191 uint inostamp; /* shows inode belongs to fileset*/
192 192
193 /* Formerly in ipbmap */ 193 /* Formerly in ipbmap */
194 struct bmap *bmap; /* incore bmap descriptor */ 194 struct bmap *bmap; /* incore bmap descriptor */
195 struct nls_table *nls_tab; /* current codepage */ 195 struct nls_table *nls_tab; /* current codepage */
196 struct inode *direct_inode; /* metadata inode */ 196 struct inode *direct_inode; /* metadata inode */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 44a2f33cb98d..de3e4a506dbc 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -244,7 +244,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
244 goto writeRecord; 244 goto writeRecord;
245 245
246 /* 246 /*
247 * initialize/update page/transaction recovery lsn 247 * initialize/update page/transaction recovery lsn
248 */ 248 */
249 lsn = log->lsn; 249 lsn = log->lsn;
250 250
@@ -263,7 +263,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
263 } 263 }
264 264
265 /* 265 /*
266 * initialize/update lsn of tblock of the page 266 * initialize/update lsn of tblock of the page
267 * 267 *
268 * transaction inherits oldest lsn of pages associated 268 * transaction inherits oldest lsn of pages associated
269 * with allocation/deallocation of resources (their 269 * with allocation/deallocation of resources (their
@@ -307,7 +307,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
307 LOGSYNC_UNLOCK(log, flags); 307 LOGSYNC_UNLOCK(log, flags);
308 308
309 /* 309 /*
310 * write the log record 310 * write the log record
311 */ 311 */
312 writeRecord: 312 writeRecord:
313 lsn = lmWriteRecord(log, tblk, lrd, tlck); 313 lsn = lmWriteRecord(log, tblk, lrd, tlck);
@@ -372,7 +372,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
372 goto moveLrd; 372 goto moveLrd;
373 373
374 /* 374 /*
375 * move log record data 375 * move log record data
376 */ 376 */
377 /* retrieve source meta-data page to log */ 377 /* retrieve source meta-data page to log */
378 if (tlck->flag & tlckPAGELOCK) { 378 if (tlck->flag & tlckPAGELOCK) {
@@ -465,7 +465,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
465 } 465 }
466 466
467 /* 467 /*
468 * move log record descriptor 468 * move log record descriptor
469 */ 469 */
470 moveLrd: 470 moveLrd:
471 lrd->length = cpu_to_le16(len); 471 lrd->length = cpu_to_le16(len);
@@ -574,7 +574,7 @@ static int lmNextPage(struct jfs_log * log)
574 LOGGC_LOCK(log); 574 LOGGC_LOCK(log);
575 575
576 /* 576 /*
577 * write or queue the full page at the tail of write queue 577 * write or queue the full page at the tail of write queue
578 */ 578 */
579 /* get the tail tblk on commit queue */ 579 /* get the tail tblk on commit queue */
580 if (list_empty(&log->cqueue)) 580 if (list_empty(&log->cqueue))
@@ -625,7 +625,7 @@ static int lmNextPage(struct jfs_log * log)
625 LOGGC_UNLOCK(log); 625 LOGGC_UNLOCK(log);
626 626
627 /* 627 /*
628 * allocate/initialize next page 628 * allocate/initialize next page
629 */ 629 */
630 /* if log wraps, the first data page of log is 2 630 /* if log wraps, the first data page of log is 2
631 * (0 never used, 1 is superblock). 631 * (0 never used, 1 is superblock).
@@ -953,7 +953,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
953 } 953 }
954 954
955 /* 955 /*
956 * forward syncpt 956 * forward syncpt
957 */ 957 */
958 /* if last sync is same as last syncpt, 958 /* if last sync is same as last syncpt,
959 * invoke sync point forward processing to update sync. 959 * invoke sync point forward processing to update sync.
@@ -989,7 +989,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
989 lsn = log->lsn; 989 lsn = log->lsn;
990 990
991 /* 991 /*
992 * setup next syncpt trigger (SWAG) 992 * setup next syncpt trigger (SWAG)
993 */ 993 */
994 logsize = log->logsize; 994 logsize = log->logsize;
995 995
@@ -1000,11 +1000,11 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
1000 if (more < 2 * LOGPSIZE) { 1000 if (more < 2 * LOGPSIZE) {
1001 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); 1001 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
1002 /* 1002 /*
1003 * log wrapping 1003 * log wrapping
1004 * 1004 *
1005 * option 1 - panic ? No.! 1005 * option 1 - panic ? No.!
1006 * option 2 - shutdown file systems 1006 * option 2 - shutdown file systems
1007 * associated with log ? 1007 * associated with log ?
1008 * option 3 - extend log ? 1008 * option 3 - extend log ?
1009 */ 1009 */
1010 /* 1010 /*
@@ -1062,7 +1062,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync)
1062/* 1062/*
1063 * NAME: lmLogOpen() 1063 * NAME: lmLogOpen()
1064 * 1064 *
1065 * FUNCTION: open the log on first open; 1065 * FUNCTION: open the log on first open;
1066 * insert filesystem in the active list of the log. 1066 * insert filesystem in the active list of the log.
1067 * 1067 *
1068 * PARAMETER: ipmnt - file system mount inode 1068 * PARAMETER: ipmnt - file system mount inode
@@ -1113,7 +1113,7 @@ int lmLogOpen(struct super_block *sb)
1113 init_waitqueue_head(&log->syncwait); 1113 init_waitqueue_head(&log->syncwait);
1114 1114
1115 /* 1115 /*
1116 * external log as separate logical volume 1116 * external log as separate logical volume
1117 * 1117 *
1118 * file systems to log may have n-to-1 relationship; 1118 * file systems to log may have n-to-1 relationship;
1119 */ 1119 */
@@ -1155,7 +1155,7 @@ journal_found:
1155 return 0; 1155 return 0;
1156 1156
1157 /* 1157 /*
1158 * unwind on error 1158 * unwind on error
1159 */ 1159 */
1160 shutdown: /* unwind lbmLogInit() */ 1160 shutdown: /* unwind lbmLogInit() */
1161 list_del(&log->journal_list); 1161 list_del(&log->journal_list);
@@ -1427,7 +1427,7 @@ int lmLogInit(struct jfs_log * log)
1427 return 0; 1427 return 0;
1428 1428
1429 /* 1429 /*
1430 * unwind on error 1430 * unwind on error
1431 */ 1431 */
1432 errout30: /* release log page */ 1432 errout30: /* release log page */
1433 log->wqueue = NULL; 1433 log->wqueue = NULL;
@@ -1480,7 +1480,7 @@ int lmLogClose(struct super_block *sb)
1480 1480
1481 if (test_bit(log_INLINELOG, &log->flag)) { 1481 if (test_bit(log_INLINELOG, &log->flag)) {
1482 /* 1482 /*
1483 * in-line log in host file system 1483 * in-line log in host file system
1484 */ 1484 */
1485 rc = lmLogShutdown(log); 1485 rc = lmLogShutdown(log);
1486 kfree(log); 1486 kfree(log);
@@ -1504,7 +1504,7 @@ int lmLogClose(struct super_block *sb)
1504 goto out; 1504 goto out;
1505 1505
1506 /* 1506 /*
1507 * external log as separate logical volume 1507 * external log as separate logical volume
1508 */ 1508 */
1509 list_del(&log->journal_list); 1509 list_del(&log->journal_list);
1510 bdev = log->bdev; 1510 bdev = log->bdev;
@@ -1622,20 +1622,26 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
1622 if (!list_empty(&log->synclist)) { 1622 if (!list_empty(&log->synclist)) {
1623 struct logsyncblk *lp; 1623 struct logsyncblk *lp;
1624 1624
1625 printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1625 list_for_each_entry(lp, &log->synclist, synclist) { 1626 list_for_each_entry(lp, &log->synclist, synclist) {
1626 if (lp->xflag & COMMIT_PAGE) { 1627 if (lp->xflag & COMMIT_PAGE) {
1627 struct metapage *mp = (struct metapage *)lp; 1628 struct metapage *mp = (struct metapage *)lp;
1628 dump_mem("orphan metapage", lp, 1629 print_hex_dump(KERN_ERR, "metapage: ",
1629 sizeof(struct metapage)); 1630 DUMP_PREFIX_ADDRESS, 16, 4,
1630 dump_mem("page", mp->page, sizeof(struct page)); 1631 mp, sizeof(struct metapage), 0);
1631 } 1632 print_hex_dump(KERN_ERR, "page: ",
1632 else 1633 DUMP_PREFIX_ADDRESS, 16,
1633 dump_mem("orphan tblock", lp, 1634 sizeof(long), mp->page,
1634 sizeof(struct tblock)); 1635 sizeof(struct page), 0);
1636 } else
1637 print_hex_dump(KERN_ERR, "tblock:",
1638 DUMP_PREFIX_ADDRESS, 16, 4,
1639 lp, sizeof(struct tblock), 0);
1635 } 1640 }
1636 } 1641 }
1642#else
1643 WARN_ON(!list_empty(&log->synclist));
1637#endif 1644#endif
1638 //assert(list_empty(&log->synclist));
1639 clear_bit(log_FLUSH, &log->flag); 1645 clear_bit(log_FLUSH, &log->flag);
1640} 1646}
1641 1647
@@ -1723,7 +1729,7 @@ int lmLogShutdown(struct jfs_log * log)
1723 * 1729 *
1724 * PARAMETE: log - pointer to logs inode. 1730 * PARAMETE: log - pointer to logs inode.
1725 * fsdev - kdev_t of filesystem. 1731 * fsdev - kdev_t of filesystem.
1726 * serial - pointer to returned log serial number 1732 * serial - pointer to returned log serial number
1727 * activate - insert/remove device from active list. 1733 * activate - insert/remove device from active list.
1728 * 1734 *
1729 * RETURN: 0 - success 1735 * RETURN: 0 - success
@@ -1963,7 +1969,7 @@ static void lbmfree(struct lbuf * bp)
1963 * FUNCTION: add a log buffer to the log redrive list 1969 * FUNCTION: add a log buffer to the log redrive list
1964 * 1970 *
1965 * PARAMETER: 1971 * PARAMETER:
1966 * bp - log buffer 1972 * bp - log buffer
1967 * 1973 *
1968 * NOTES: 1974 * NOTES:
1969 * Takes log_redrive_lock. 1975 * Takes log_redrive_lock.
@@ -2054,7 +2060,7 @@ static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2054 bp->l_flag = flag; 2060 bp->l_flag = flag;
2055 2061
2056 /* 2062 /*
2057 * insert bp at tail of write queue associated with log 2063 * insert bp at tail of write queue associated with log
2058 * 2064 *
2059 * (request is either for bp already/currently at head of queue 2065 * (request is either for bp already/currently at head of queue
2060 * or new bp to be inserted at tail) 2066 * or new bp to be inserted at tail)
@@ -2117,7 +2123,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2117 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); 2123 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2118 2124
2119 /* 2125 /*
2120 * initiate pageout of the page 2126 * initiate pageout of the page
2121 */ 2127 */
2122 lbmStartIO(bp); 2128 lbmStartIO(bp);
2123} 2129}
@@ -2128,7 +2134,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2128 * 2134 *
2129 * FUNCTION: Interface to DD strategy routine 2135 * FUNCTION: Interface to DD strategy routine
2130 * 2136 *
2131 * RETURN: none 2137 * RETURN: none
2132 * 2138 *
2133 * serialization: LCACHE_LOCK() is NOT held during log i/o; 2139 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2134 */ 2140 */
@@ -2222,7 +2228,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2222 bio_put(bio); 2228 bio_put(bio);
2223 2229
2224 /* 2230 /*
2225 * pagein completion 2231 * pagein completion
2226 */ 2232 */
2227 if (bp->l_flag & lbmREAD) { 2233 if (bp->l_flag & lbmREAD) {
2228 bp->l_flag &= ~lbmREAD; 2234 bp->l_flag &= ~lbmREAD;
@@ -2236,7 +2242,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2236 } 2242 }
2237 2243
2238 /* 2244 /*
2239 * pageout completion 2245 * pageout completion
2240 * 2246 *
2241 * the bp at the head of write queue has completed pageout. 2247 * the bp at the head of write queue has completed pageout.
2242 * 2248 *
@@ -2302,7 +2308,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2302 } 2308 }
2303 2309
2304 /* 2310 /*
2305 * synchronous pageout: 2311 * synchronous pageout:
2306 * 2312 *
2307 * buffer has not necessarily been removed from write queue 2313 * buffer has not necessarily been removed from write queue
2308 * (e.g., synchronous write of partial-page with COMMIT): 2314 * (e.g., synchronous write of partial-page with COMMIT):
@@ -2316,7 +2322,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2316 } 2322 }
2317 2323
2318 /* 2324 /*
2319 * Group Commit pageout: 2325 * Group Commit pageout:
2320 */ 2326 */
2321 else if (bp->l_flag & lbmGC) { 2327 else if (bp->l_flag & lbmGC) {
2322 LCACHE_UNLOCK(flags); 2328 LCACHE_UNLOCK(flags);
@@ -2324,7 +2330,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2324 } 2330 }
2325 2331
2326 /* 2332 /*
2327 * asynchronous pageout: 2333 * asynchronous pageout:
2328 * 2334 *
2329 * buffer must have been removed from write queue: 2335 * buffer must have been removed from write queue:
2330 * insert buffer at head of freelist where it can be recycled 2336 * insert buffer at head of freelist where it can be recycled
@@ -2375,7 +2381,7 @@ int jfsIOWait(void *arg)
2375 * FUNCTION: format file system log 2381 * FUNCTION: format file system log
2376 * 2382 *
2377 * PARAMETERS: 2383 * PARAMETERS:
2378 * log - volume log 2384 * log - volume log
2379 * logAddress - start address of log space in FS block 2385 * logAddress - start address of log space in FS block
2380 * logSize - length of log space in FS block; 2386 * logSize - length of log space in FS block;
2381 * 2387 *
@@ -2407,16 +2413,16 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2407 npages = logSize >> sbi->l2nbperpage; 2413 npages = logSize >> sbi->l2nbperpage;
2408 2414
2409 /* 2415 /*
2410 * log space: 2416 * log space:
2411 * 2417 *
2412 * page 0 - reserved; 2418 * page 0 - reserved;
2413 * page 1 - log superblock; 2419 * page 1 - log superblock;
2414 * page 2 - log data page: A SYNC log record is written 2420 * page 2 - log data page: A SYNC log record is written
2415 * into this page at logform time; 2421 * into this page at logform time;
2416 * pages 3-N - log data page: set to empty log data pages; 2422 * pages 3-N - log data page: set to empty log data pages;
2417 */ 2423 */
2418 /* 2424 /*
2419 * init log superblock: log page 1 2425 * init log superblock: log page 1
2420 */ 2426 */
2421 logsuper = (struct logsuper *) bp->l_ldata; 2427 logsuper = (struct logsuper *) bp->l_ldata;
2422 2428
@@ -2436,7 +2442,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2436 goto exit; 2442 goto exit;
2437 2443
2438 /* 2444 /*
2439 * init pages 2 to npages-1 as log data pages: 2445 * init pages 2 to npages-1 as log data pages:
2440 * 2446 *
2441 * log page sequence number (lpsn) initialization: 2447 * log page sequence number (lpsn) initialization:
2442 * 2448 *
@@ -2479,7 +2485,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2479 goto exit; 2485 goto exit;
2480 2486
2481 /* 2487 /*
2482 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) 2488 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2483 */ 2489 */
2484 for (lspn = 0; lspn < npages - 3; lspn++) { 2490 for (lspn = 0; lspn < npages - 3; lspn++) {
2485 lp->h.page = lp->t.page = cpu_to_le32(lspn); 2491 lp->h.page = lp->t.page = cpu_to_le32(lspn);
@@ -2495,7 +2501,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2495 rc = 0; 2501 rc = 0;
2496exit: 2502exit:
2497 /* 2503 /*
2498 * finalize log 2504 * finalize log
2499 */ 2505 */
2500 /* release the buffer */ 2506 /* release the buffer */
2501 lbmFree(bp); 2507 lbmFree(bp);
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index a53fb17ea219..1f85ef0ec045 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -144,7 +144,7 @@ struct logpage {
144 * 144 *
145 * (this comment should be rewritten !) 145 * (this comment should be rewritten !)
146 * jfs uses only "after" log records (only a single writer is allowed 146 * jfs uses only "after" log records (only a single writer is allowed
147 * in a page, pages are written to temporary paging space if 147 * in a page, pages are written to temporary paging space if
148 * if they must be written to disk before commit, and i/o is 148 * if they must be written to disk before commit, and i/o is
149 * scheduled for modified pages to their home location after 149 * scheduled for modified pages to their home location after
150 * the log records containing the after values and the commit 150 * the log records containing the after values and the commit
@@ -153,7 +153,7 @@ struct logpage {
153 * 153 *
154 * a log record consists of a data area of variable length followed by 154 * a log record consists of a data area of variable length followed by
155 * a descriptor of fixed size LOGRDSIZE bytes. 155 * a descriptor of fixed size LOGRDSIZE bytes.
156 * the data area is rounded up to an integral number of 4-bytes and 156 * the data area is rounded up to an integral number of 4-bytes and
157 * must be no longer than LOGPSIZE. 157 * must be no longer than LOGPSIZE.
158 * the descriptor is of size of multiple of 4-bytes and aligned on a 158 * the descriptor is of size of multiple of 4-bytes and aligned on a
159 * 4-byte boundary. 159 * 4-byte boundary.
@@ -215,13 +215,13 @@ struct lrd {
215 union { 215 union {
216 216
217 /* 217 /*
218 * COMMIT: commit 218 * COMMIT: commit
219 * 219 *
220 * transaction commit: no type-dependent information; 220 * transaction commit: no type-dependent information;
221 */ 221 */
222 222
223 /* 223 /*
224 * REDOPAGE: after-image 224 * REDOPAGE: after-image
225 * 225 *
226 * apply after-image; 226 * apply after-image;
227 * 227 *
@@ -236,7 +236,7 @@ struct lrd {
236 } redopage; /* (20) */ 236 } redopage; /* (20) */
237 237
238 /* 238 /*
239 * NOREDOPAGE: the page is freed 239 * NOREDOPAGE: the page is freed
240 * 240 *
241 * do not apply after-image records which precede this record 241 * do not apply after-image records which precede this record
242 * in the log with the same page block number to this page. 242 * in the log with the same page block number to this page.
@@ -252,7 +252,7 @@ struct lrd {
252 } noredopage; /* (20) */ 252 } noredopage; /* (20) */
253 253
254 /* 254 /*
255 * UPDATEMAP: update block allocation map 255 * UPDATEMAP: update block allocation map
256 * 256 *
257 * either in-line PXD, 257 * either in-line PXD,
258 * or out-of-line XADLIST; 258 * or out-of-line XADLIST;
@@ -268,7 +268,7 @@ struct lrd {
268 } updatemap; /* (20) */ 268 } updatemap; /* (20) */
269 269
270 /* 270 /*
271 * NOREDOINOEXT: the inode extent is freed 271 * NOREDOINOEXT: the inode extent is freed
272 * 272 *
273 * do not apply after-image records which precede this 273 * do not apply after-image records which precede this
274 * record in the log with the any of the 4 page block 274 * record in the log with the any of the 4 page block
@@ -286,7 +286,7 @@ struct lrd {
286 } noredoinoext; /* (20) */ 286 } noredoinoext; /* (20) */
287 287
288 /* 288 /*
289 * SYNCPT: log sync point 289 * SYNCPT: log sync point
290 * 290 *
291 * replay log upto syncpt address specified; 291 * replay log upto syncpt address specified;
292 */ 292 */
@@ -295,13 +295,13 @@ struct lrd {
295 } syncpt; 295 } syncpt;
296 296
297 /* 297 /*
298 * MOUNT: file system mount 298 * MOUNT: file system mount
299 * 299 *
300 * file system mount: no type-dependent information; 300 * file system mount: no type-dependent information;
301 */ 301 */
302 302
303 /* 303 /*
304 * ? FREEXTENT: free specified extent(s) 304 * ? FREEXTENT: free specified extent(s)
305 * 305 *
306 * free specified extent(s) from block allocation map 306 * free specified extent(s) from block allocation map
307 * N.B.: nextents should be length of data/sizeof(xad_t) 307 * N.B.: nextents should be length of data/sizeof(xad_t)
@@ -314,7 +314,7 @@ struct lrd {
314 } freextent; 314 } freextent;
315 315
316 /* 316 /*
317 * ? NOREDOFILE: this file is freed 317 * ? NOREDOFILE: this file is freed
318 * 318 *
319 * do not apply records which precede this record in the log 319 * do not apply records which precede this record in the log
320 * with the same inode number. 320 * with the same inode number.
@@ -330,7 +330,7 @@ struct lrd {
330 } noredofile; 330 } noredofile;
331 331
332 /* 332 /*
333 * ? NEWPAGE: 333 * ? NEWPAGE:
334 * 334 *
335 * metadata type dependent 335 * metadata type dependent
336 */ 336 */
@@ -342,7 +342,7 @@ struct lrd {
342 } newpage; 342 } newpage;
343 343
344 /* 344 /*
345 * ? DUMMY: filler 345 * ? DUMMY: filler
346 * 346 *
347 * no type-dependent information 347 * no type-dependent information
348 */ 348 */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 43d4f69afbec..77c7f1129dde 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -472,7 +472,8 @@ add_failed:
472 printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); 472 printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
473 goto skip; 473 goto skip;
474dump_bio: 474dump_bio:
475 dump_mem("bio", bio, sizeof(*bio)); 475 print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
476 4, bio, sizeof(*bio), 0);
476skip: 477skip:
477 bio_put(bio); 478 bio_put(bio);
478 unlock_page(page); 479 unlock_page(page);
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 4dd479834897..644429acb8c0 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -80,7 +80,7 @@ static int logMOUNT(struct super_block *sb);
80 */ 80 */
81int jfs_mount(struct super_block *sb) 81int jfs_mount(struct super_block *sb)
82{ 82{
83 int rc = 0; /* Return code */ 83 int rc = 0; /* Return code */
84 struct jfs_sb_info *sbi = JFS_SBI(sb); 84 struct jfs_sb_info *sbi = JFS_SBI(sb);
85 struct inode *ipaimap = NULL; 85 struct inode *ipaimap = NULL;
86 struct inode *ipaimap2 = NULL; 86 struct inode *ipaimap2 = NULL;
@@ -169,7 +169,7 @@ int jfs_mount(struct super_block *sb)
169 sbi->ipaimap2 = NULL; 169 sbi->ipaimap2 = NULL;
170 170
171 /* 171 /*
172 * mount (the only/single) fileset 172 * mount (the only/single) fileset
173 */ 173 */
174 /* 174 /*
175 * open fileset inode allocation map (aka fileset inode) 175 * open fileset inode allocation map (aka fileset inode)
@@ -195,7 +195,7 @@ int jfs_mount(struct super_block *sb)
195 goto out; 195 goto out;
196 196
197 /* 197 /*
198 * unwind on error 198 * unwind on error
199 */ 199 */
200 errout41: /* close fileset inode allocation map inode */ 200 errout41: /* close fileset inode allocation map inode */
201 diFreeSpecial(ipimap); 201 diFreeSpecial(ipimap);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 25430d0b0d59..7aa1f7004eaf 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -18,7 +18,7 @@
18 */ 18 */
19 19
20/* 20/*
21 * jfs_txnmgr.c: transaction manager 21 * jfs_txnmgr.c: transaction manager
22 * 22 *
23 * notes: 23 * notes:
24 * transaction starts with txBegin() and ends with txCommit() 24 * transaction starts with txBegin() and ends with txCommit()
@@ -60,7 +60,7 @@
60#include "jfs_debug.h" 60#include "jfs_debug.h"
61 61
62/* 62/*
63 * transaction management structures 63 * transaction management structures
64 */ 64 */
65static struct { 65static struct {
66 int freetid; /* index of a free tid structure */ 66 int freetid; /* index of a free tid structure */
@@ -103,19 +103,19 @@ module_param(nTxLock, int, 0);
103MODULE_PARM_DESC(nTxLock, 103MODULE_PARM_DESC(nTxLock,
104 "Number of transaction locks (max:65536)"); 104 "Number of transaction locks (max:65536)");
105 105
106struct tblock *TxBlock; /* transaction block table */ 106struct tblock *TxBlock; /* transaction block table */
107static int TxLockLWM; /* Low water mark for number of txLocks used */ 107static int TxLockLWM; /* Low water mark for number of txLocks used */
108static int TxLockHWM; /* High water mark for number of txLocks used */ 108static int TxLockHWM; /* High water mark for number of txLocks used */
109static int TxLockVHWM; /* Very High water mark */ 109static int TxLockVHWM; /* Very High water mark */
110struct tlock *TxLock; /* transaction lock table */ 110struct tlock *TxLock; /* transaction lock table */
111 111
112/* 112/*
113 * transaction management lock 113 * transaction management lock
114 */ 114 */
115static DEFINE_SPINLOCK(jfsTxnLock); 115static DEFINE_SPINLOCK(jfsTxnLock);
116 116
117#define TXN_LOCK() spin_lock(&jfsTxnLock) 117#define TXN_LOCK() spin_lock(&jfsTxnLock)
118#define TXN_UNLOCK() spin_unlock(&jfsTxnLock) 118#define TXN_UNLOCK() spin_unlock(&jfsTxnLock)
119 119
120#define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); 120#define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock);
121#define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) 121#define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags)
@@ -148,7 +148,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
148#define TXN_WAKEUP(event) wake_up_all(event) 148#define TXN_WAKEUP(event) wake_up_all(event)
149 149
150/* 150/*
151 * statistics 151 * statistics
152 */ 152 */
153static struct { 153static struct {
154 tid_t maxtid; /* 4: biggest tid ever used */ 154 tid_t maxtid; /* 4: biggest tid ever used */
@@ -181,8 +181,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
181static void LogSyncRelease(struct metapage * mp); 181static void LogSyncRelease(struct metapage * mp);
182 182
183/* 183/*
184 * transaction block/lock management 184 * transaction block/lock management
185 * --------------------------------- 185 * ---------------------------------
186 */ 186 */
187 187
188/* 188/*
@@ -227,9 +227,9 @@ static void txLockFree(lid_t lid)
227} 227}
228 228
229/* 229/*
230 * NAME: txInit() 230 * NAME: txInit()
231 * 231 *
232 * FUNCTION: initialize transaction management structures 232 * FUNCTION: initialize transaction management structures
233 * 233 *
234 * RETURN: 234 * RETURN:
235 * 235 *
@@ -333,9 +333,9 @@ int txInit(void)
333} 333}
334 334
335/* 335/*
336 * NAME: txExit() 336 * NAME: txExit()
337 * 337 *
338 * FUNCTION: clean up when module is unloaded 338 * FUNCTION: clean up when module is unloaded
339 */ 339 */
340void txExit(void) 340void txExit(void)
341{ 341{
@@ -346,12 +346,12 @@ void txExit(void)
346} 346}
347 347
348/* 348/*
349 * NAME: txBegin() 349 * NAME: txBegin()
350 * 350 *
351 * FUNCTION: start a transaction. 351 * FUNCTION: start a transaction.
352 * 352 *
353 * PARAMETER: sb - superblock 353 * PARAMETER: sb - superblock
354 * flag - force for nested tx; 354 * flag - force for nested tx;
355 * 355 *
356 * RETURN: tid - transaction id 356 * RETURN: tid - transaction id
357 * 357 *
@@ -447,13 +447,13 @@ tid_t txBegin(struct super_block *sb, int flag)
447} 447}
448 448
449/* 449/*
450 * NAME: txBeginAnon() 450 * NAME: txBeginAnon()
451 * 451 *
452 * FUNCTION: start an anonymous transaction. 452 * FUNCTION: start an anonymous transaction.
453 * Blocks if logsync or available tlocks are low to prevent 453 * Blocks if logsync or available tlocks are low to prevent
454 * anonymous tlocks from depleting supply. 454 * anonymous tlocks from depleting supply.
455 * 455 *
456 * PARAMETER: sb - superblock 456 * PARAMETER: sb - superblock
457 * 457 *
458 * RETURN: none 458 * RETURN: none
459 */ 459 */
@@ -489,11 +489,11 @@ void txBeginAnon(struct super_block *sb)
489} 489}
490 490
491/* 491/*
492 * txEnd() 492 * txEnd()
493 * 493 *
494 * function: free specified transaction block. 494 * function: free specified transaction block.
495 * 495 *
496 * logsync barrier processing: 496 * logsync barrier processing:
497 * 497 *
498 * serialization: 498 * serialization:
499 */ 499 */
@@ -577,13 +577,13 @@ wakeup:
577} 577}
578 578
579/* 579/*
580 * txLock() 580 * txLock()
581 * 581 *
582 * function: acquire a transaction lock on the specified <mp> 582 * function: acquire a transaction lock on the specified <mp>
583 * 583 *
584 * parameter: 584 * parameter:
585 * 585 *
586 * return: transaction lock id 586 * return: transaction lock id
587 * 587 *
588 * serialization: 588 * serialization:
589 */ 589 */
@@ -829,12 +829,16 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
829 /* Only locks on ipimap or ipaimap should reach here */ 829 /* Only locks on ipimap or ipaimap should reach here */
830 /* assert(jfs_ip->fileset == AGGREGATE_I); */ 830 /* assert(jfs_ip->fileset == AGGREGATE_I); */
831 if (jfs_ip->fileset != AGGREGATE_I) { 831 if (jfs_ip->fileset != AGGREGATE_I) {
832 jfs_err("txLock: trying to lock locked page!"); 832 printk(KERN_ERR "txLock: trying to lock locked page!");
833 dump_mem("ip", ip, sizeof(struct inode)); 833 print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
834 dump_mem("mp", mp, sizeof(struct metapage)); 834 ip, sizeof(*ip), 0);
835 dump_mem("Locker's tblk", tid_to_tblock(tid), 835 print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
836 sizeof(struct tblock)); 836 mp, sizeof(*mp), 0);
837 dump_mem("Tlock", tlck, sizeof(struct tlock)); 837 print_hex_dump(KERN_ERR, "Locker's tblock: ",
838 DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
839 sizeof(struct tblock), 0);
840 print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
841 tlck, sizeof(*tlck), 0);
838 BUG(); 842 BUG();
839 } 843 }
840 INCREMENT(stattx.waitlock); /* statistics */ 844 INCREMENT(stattx.waitlock); /* statistics */
@@ -857,17 +861,17 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
857} 861}
858 862
859/* 863/*
860 * NAME: txRelease() 864 * NAME: txRelease()
861 * 865 *
862 * FUNCTION: Release buffers associated with transaction locks, but don't 866 * FUNCTION: Release buffers associated with transaction locks, but don't
863 * mark homeok yet. The allows other transactions to modify 867 * mark homeok yet. The allows other transactions to modify
864 * buffers, but won't let them go to disk until commit record 868 * buffers, but won't let them go to disk until commit record
865 * actually gets written. 869 * actually gets written.
866 * 870 *
867 * PARAMETER: 871 * PARAMETER:
868 * tblk - 872 * tblk -
869 * 873 *
870 * RETURN: Errors from subroutines. 874 * RETURN: Errors from subroutines.
871 */ 875 */
872static void txRelease(struct tblock * tblk) 876static void txRelease(struct tblock * tblk)
873{ 877{
@@ -896,10 +900,10 @@ static void txRelease(struct tblock * tblk)
896} 900}
897 901
898/* 902/*
899 * NAME: txUnlock() 903 * NAME: txUnlock()
900 * 904 *
901 * FUNCTION: Initiates pageout of pages modified by tid in journalled 905 * FUNCTION: Initiates pageout of pages modified by tid in journalled
902 * objects and frees their lockwords. 906 * objects and frees their lockwords.
903 */ 907 */
904static void txUnlock(struct tblock * tblk) 908static void txUnlock(struct tblock * tblk)
905{ 909{
@@ -983,10 +987,10 @@ static void txUnlock(struct tblock * tblk)
983} 987}
984 988
985/* 989/*
986 * txMaplock() 990 * txMaplock()
987 * 991 *
988 * function: allocate a transaction lock for freed page/entry; 992 * function: allocate a transaction lock for freed page/entry;
989 * for freed page, maplock is used as xtlock/dtlock type; 993 * for freed page, maplock is used as xtlock/dtlock type;
990 */ 994 */
991struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) 995struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
992{ 996{
@@ -1057,7 +1061,7 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
1057} 1061}
1058 1062
1059/* 1063/*
1060 * txLinelock() 1064 * txLinelock()
1061 * 1065 *
1062 * function: allocate a transaction lock for log vector list 1066 * function: allocate a transaction lock for log vector list
1063 */ 1067 */
@@ -1092,39 +1096,39 @@ struct linelock *txLinelock(struct linelock * tlock)
1092} 1096}
1093 1097
1094/* 1098/*
1095 * transaction commit management 1099 * transaction commit management
1096 * ----------------------------- 1100 * -----------------------------
1097 */ 1101 */
1098 1102
1099/* 1103/*
1100 * NAME: txCommit() 1104 * NAME: txCommit()
1101 * 1105 *
1102 * FUNCTION: commit the changes to the objects specified in 1106 * FUNCTION: commit the changes to the objects specified in
1103 * clist. For journalled segments only the 1107 * clist. For journalled segments only the
1104 * changes of the caller are committed, ie by tid. 1108 * changes of the caller are committed, ie by tid.
1105 * for non-journalled segments the data are flushed to 1109 * for non-journalled segments the data are flushed to
1106 * disk and then the change to the disk inode and indirect 1110 * disk and then the change to the disk inode and indirect
1107 * blocks committed (so blocks newly allocated to the 1111 * blocks committed (so blocks newly allocated to the
1108 * segment will be made a part of the segment atomically). 1112 * segment will be made a part of the segment atomically).
1109 * 1113 *
1110 * all of the segments specified in clist must be in 1114 * all of the segments specified in clist must be in
1111 * one file system. no more than 6 segments are needed 1115 * one file system. no more than 6 segments are needed
1112 * to handle all unix svcs. 1116 * to handle all unix svcs.
1113 * 1117 *
1114 * if the i_nlink field (i.e. disk inode link count) 1118 * if the i_nlink field (i.e. disk inode link count)
1115 * is zero, and the type of inode is a regular file or 1119 * is zero, and the type of inode is a regular file or
1116 * directory, or symbolic link , the inode is truncated 1120 * directory, or symbolic link , the inode is truncated
1117 * to zero length. the truncation is committed but the 1121 * to zero length. the truncation is committed but the
1118 * VM resources are unaffected until it is closed (see 1122 * VM resources are unaffected until it is closed (see
1119 * iput and iclose). 1123 * iput and iclose).
1120 * 1124 *
1121 * PARAMETER: 1125 * PARAMETER:
1122 * 1126 *
1123 * RETURN: 1127 * RETURN:
1124 * 1128 *
1125 * serialization: 1129 * serialization:
1126 * on entry the inode lock on each segment is assumed 1130 * on entry the inode lock on each segment is assumed
1127 * to be held. 1131 * to be held.
1128 * 1132 *
1129 * i/o error: 1133 * i/o error:
1130 */ 1134 */
@@ -1175,7 +1179,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1175 if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) 1179 if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1176 tblk->xflag |= COMMIT_LAZY; 1180 tblk->xflag |= COMMIT_LAZY;
1177 /* 1181 /*
1178 * prepare non-journaled objects for commit 1182 * prepare non-journaled objects for commit
1179 * 1183 *
1180 * flush data pages of non-journaled file 1184 * flush data pages of non-journaled file
1181 * to prevent the file getting non-initialized disk blocks 1185 * to prevent the file getting non-initialized disk blocks
@@ -1186,7 +1190,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1186 cd.nip = nip; 1190 cd.nip = nip;
1187 1191
1188 /* 1192 /*
1189 * acquire transaction lock on (on-disk) inodes 1193 * acquire transaction lock on (on-disk) inodes
1190 * 1194 *
1191 * update on-disk inode from in-memory inode 1195 * update on-disk inode from in-memory inode
1192 * acquiring transaction locks for AFTER records 1196 * acquiring transaction locks for AFTER records
@@ -1262,7 +1266,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1262 } 1266 }
1263 1267
1264 /* 1268 /*
1265 * write log records from transaction locks 1269 * write log records from transaction locks
1266 * 1270 *
1267 * txUpdateMap() resets XAD_NEW in XAD. 1271 * txUpdateMap() resets XAD_NEW in XAD.
1268 */ 1272 */
@@ -1294,7 +1298,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1294 !test_cflag(COMMIT_Nolink, tblk->u.ip))); 1298 !test_cflag(COMMIT_Nolink, tblk->u.ip)));
1295 1299
1296 /* 1300 /*
1297 * write COMMIT log record 1301 * write COMMIT log record
1298 */ 1302 */
1299 lrd->type = cpu_to_le16(LOG_COMMIT); 1303 lrd->type = cpu_to_le16(LOG_COMMIT);
1300 lrd->length = 0; 1304 lrd->length = 0;
@@ -1303,7 +1307,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1303 lmGroupCommit(log, tblk); 1307 lmGroupCommit(log, tblk);
1304 1308
1305 /* 1309 /*
1306 * - transaction is now committed - 1310 * - transaction is now committed -
1307 */ 1311 */
1308 1312
1309 /* 1313 /*
@@ -1314,11 +1318,11 @@ int txCommit(tid_t tid, /* transaction identifier */
1314 txForce(tblk); 1318 txForce(tblk);
1315 1319
1316 /* 1320 /*
1317 * update allocation map. 1321 * update allocation map.
1318 * 1322 *
1319 * update inode allocation map and inode: 1323 * update inode allocation map and inode:
1320 * free pager lock on memory object of inode if any. 1324 * free pager lock on memory object of inode if any.
1321 * update block allocation map. 1325 * update block allocation map.
1322 * 1326 *
1323 * txUpdateMap() resets XAD_NEW in XAD. 1327 * txUpdateMap() resets XAD_NEW in XAD.
1324 */ 1328 */
@@ -1326,7 +1330,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1326 txUpdateMap(tblk); 1330 txUpdateMap(tblk);
1327 1331
1328 /* 1332 /*
1329 * free transaction locks and pageout/free pages 1333 * free transaction locks and pageout/free pages
1330 */ 1334 */
1331 txRelease(tblk); 1335 txRelease(tblk);
1332 1336
@@ -1335,7 +1339,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1335 1339
1336 1340
1337 /* 1341 /*
1338 * reset in-memory object state 1342 * reset in-memory object state
1339 */ 1343 */
1340 for (k = 0; k < cd.nip; k++) { 1344 for (k = 0; k < cd.nip; k++) {
1341 ip = cd.iplist[k]; 1345 ip = cd.iplist[k];
@@ -1358,11 +1362,11 @@ int txCommit(tid_t tid, /* transaction identifier */
1358} 1362}
1359 1363
1360/* 1364/*
1361 * NAME: txLog() 1365 * NAME: txLog()
1362 * 1366 *
1363 * FUNCTION: Writes AFTER log records for all lines modified 1367 * FUNCTION: Writes AFTER log records for all lines modified
1364 * by tid for segments specified by inodes in comdata. 1368 * by tid for segments specified by inodes in comdata.
1365 * Code assumes only WRITELOCKS are recorded in lockwords. 1369 * Code assumes only WRITELOCKS are recorded in lockwords.
1366 * 1370 *
1367 * PARAMETERS: 1371 * PARAMETERS:
1368 * 1372 *
@@ -1421,12 +1425,12 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1421} 1425}
1422 1426
1423/* 1427/*
1424 * diLog() 1428 * diLog()
1425 * 1429 *
1426 * function: log inode tlock and format maplock to update bmap; 1430 * function: log inode tlock and format maplock to update bmap;
1427 */ 1431 */
1428static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1432static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1429 struct tlock * tlck, struct commit * cd) 1433 struct tlock * tlck, struct commit * cd)
1430{ 1434{
1431 int rc = 0; 1435 int rc = 0;
1432 struct metapage *mp; 1436 struct metapage *mp;
@@ -1442,7 +1446,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1442 pxd = &lrd->log.redopage.pxd; 1446 pxd = &lrd->log.redopage.pxd;
1443 1447
1444 /* 1448 /*
1445 * inode after image 1449 * inode after image
1446 */ 1450 */
1447 if (tlck->type & tlckENTRY) { 1451 if (tlck->type & tlckENTRY) {
1448 /* log after-image for logredo(): */ 1452 /* log after-image for logredo(): */
@@ -1456,7 +1460,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1456 tlck->flag |= tlckWRITEPAGE; 1460 tlck->flag |= tlckWRITEPAGE;
1457 } else if (tlck->type & tlckFREE) { 1461 } else if (tlck->type & tlckFREE) {
1458 /* 1462 /*
1459 * free inode extent 1463 * free inode extent
1460 * 1464 *
1461 * (pages of the freed inode extent have been invalidated and 1465 * (pages of the freed inode extent have been invalidated and
1462 * a maplock for free of the extent has been formatted at 1466 * a maplock for free of the extent has been formatted at
@@ -1498,7 +1502,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1498 jfs_err("diLog: UFO type tlck:0x%p", tlck); 1502 jfs_err("diLog: UFO type tlck:0x%p", tlck);
1499#ifdef _JFS_WIP 1503#ifdef _JFS_WIP
1500 /* 1504 /*
1501 * alloc/free external EA extent 1505 * alloc/free external EA extent
1502 * 1506 *
1503 * a maplock for txUpdateMap() to update bPWMAP for alloc/free 1507 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1504 * of the extent has been formatted at txLock() time; 1508 * of the extent has been formatted at txLock() time;
@@ -1534,9 +1538,9 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1534} 1538}
1535 1539
1536/* 1540/*
1537 * dataLog() 1541 * dataLog()
1538 * 1542 *
1539 * function: log data tlock 1543 * function: log data tlock
1540 */ 1544 */
1541static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1545static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1542 struct tlock * tlck) 1546 struct tlock * tlck)
@@ -1580,9 +1584,9 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1580} 1584}
1581 1585
1582/* 1586/*
1583 * dtLog() 1587 * dtLog()
1584 * 1588 *
1585 * function: log dtree tlock and format maplock to update bmap; 1589 * function: log dtree tlock and format maplock to update bmap;
1586 */ 1590 */
1587static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1591static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1588 struct tlock * tlck) 1592 struct tlock * tlck)
@@ -1603,10 +1607,10 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1603 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1607 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1604 1608
1605 /* 1609 /*
1606 * page extension via relocation: entry insertion; 1610 * page extension via relocation: entry insertion;
1607 * page extension in-place: entry insertion; 1611 * page extension in-place: entry insertion;
1608 * new right page from page split, reinitialized in-line 1612 * new right page from page split, reinitialized in-line
1609 * root from root page split: entry insertion; 1613 * root from root page split: entry insertion;
1610 */ 1614 */
1611 if (tlck->type & (tlckNEW | tlckEXTEND)) { 1615 if (tlck->type & (tlckNEW | tlckEXTEND)) {
1612 /* log after-image of the new page for logredo(): 1616 /* log after-image of the new page for logredo():
@@ -1641,8 +1645,8 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1641 } 1645 }
1642 1646
1643 /* 1647 /*
1644 * entry insertion/deletion, 1648 * entry insertion/deletion,
1645 * sibling page link update (old right page before split); 1649 * sibling page link update (old right page before split);
1646 */ 1650 */
1647 if (tlck->type & (tlckENTRY | tlckRELINK)) { 1651 if (tlck->type & (tlckENTRY | tlckRELINK)) {
1648 /* log after-image for logredo(): */ 1652 /* log after-image for logredo(): */
@@ -1658,11 +1662,11 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1658 } 1662 }
1659 1663
1660 /* 1664 /*
1661 * page deletion: page has been invalidated 1665 * page deletion: page has been invalidated
1662 * page relocation: source extent 1666 * page relocation: source extent
1663 * 1667 *
1664 * a maplock for free of the page has been formatted 1668 * a maplock for free of the page has been formatted
1665 * at txLock() time); 1669 * at txLock() time);
1666 */ 1670 */
1667 if (tlck->type & (tlckFREE | tlckRELOCATE)) { 1671 if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1668 /* log LOG_NOREDOPAGE of the deleted page for logredo() 1672 /* log LOG_NOREDOPAGE of the deleted page for logredo()
@@ -1683,9 +1687,9 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1683} 1687}
1684 1688
1685/* 1689/*
1686 * xtLog() 1690 * xtLog()
1687 * 1691 *
1688 * function: log xtree tlock and format maplock to update bmap; 1692 * function: log xtree tlock and format maplock to update bmap;
1689 */ 1693 */
1690static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1694static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1691 struct tlock * tlck) 1695 struct tlock * tlck)
@@ -1725,8 +1729,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1725 xadlock = (struct xdlistlock *) maplock; 1729 xadlock = (struct xdlistlock *) maplock;
1726 1730
1727 /* 1731 /*
1728 * entry insertion/extension; 1732 * entry insertion/extension;
1729 * sibling page link update (old right page before split); 1733 * sibling page link update (old right page before split);
1730 */ 1734 */
1731 if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { 1735 if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1732 /* log after-image for logredo(): 1736 /* log after-image for logredo():
@@ -1801,7 +1805,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1801 } 1805 }
1802 1806
1803 /* 1807 /*
1804 * page deletion: file deletion/truncation (ref. xtTruncate()) 1808 * page deletion: file deletion/truncation (ref. xtTruncate())
1805 * 1809 *
1806 * (page will be invalidated after log is written and bmap 1810 * (page will be invalidated after log is written and bmap
1807 * is updated from the page); 1811 * is updated from the page);
@@ -1908,13 +1912,13 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1908 } 1912 }
1909 1913
1910 /* 1914 /*
1911 * page/entry truncation: file truncation (ref. xtTruncate()) 1915 * page/entry truncation: file truncation (ref. xtTruncate())
1912 * 1916 *
1913 * |----------+------+------+---------------| 1917 * |----------+------+------+---------------|
1914 * | | | 1918 * | | |
1915 * | | hwm - hwm before truncation 1919 * | | hwm - hwm before truncation
1916 * | next - truncation point 1920 * | next - truncation point
1917 * lwm - lwm before truncation 1921 * lwm - lwm before truncation
1918 * header ? 1922 * header ?
1919 */ 1923 */
1920 if (tlck->type & tlckTRUNCATE) { 1924 if (tlck->type & tlckTRUNCATE) {
@@ -1937,7 +1941,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1937 twm = xtlck->twm.offset; 1941 twm = xtlck->twm.offset;
1938 1942
1939 /* 1943 /*
1940 * write log records 1944 * write log records
1941 */ 1945 */
1942 /* log after-image for logredo(): 1946 /* log after-image for logredo():
1943 * 1947 *
@@ -1997,7 +2001,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1997 } 2001 }
1998 2002
1999 /* 2003 /*
2000 * format maplock(s) for txUpdateMap() to update bmap 2004 * format maplock(s) for txUpdateMap() to update bmap
2001 */ 2005 */
2002 maplock->index = 0; 2006 maplock->index = 0;
2003 2007
@@ -2069,9 +2073,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2069} 2073}
2070 2074
2071/* 2075/*
2072 * mapLog() 2076 * mapLog()
2073 * 2077 *
2074 * function: log from maplock of freed data extents; 2078 * function: log from maplock of freed data extents;
2075 */ 2079 */
2076static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 2080static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2077 struct tlock * tlck) 2081 struct tlock * tlck)
@@ -2081,7 +2085,7 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2081 pxd_t *pxd; 2085 pxd_t *pxd;
2082 2086
2083 /* 2087 /*
2084 * page relocation: free the source page extent 2088 * page relocation: free the source page extent
2085 * 2089 *
2086 * a maplock for txUpdateMap() for free of the page 2090 * a maplock for txUpdateMap() for free of the page
2087 * has been formatted at txLock() time saving the src 2091 * has been formatted at txLock() time saving the src
@@ -2155,10 +2159,10 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2155} 2159}
2156 2160
2157/* 2161/*
2158 * txEA() 2162 * txEA()
2159 * 2163 *
2160 * function: acquire maplock for EA/ACL extents or 2164 * function: acquire maplock for EA/ACL extents or
2161 * set COMMIT_INLINE flag; 2165 * set COMMIT_INLINE flag;
2162 */ 2166 */
2163void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) 2167void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2164{ 2168{
@@ -2207,10 +2211,10 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2207} 2211}
2208 2212
2209/* 2213/*
2210 * txForce() 2214 * txForce()
2211 * 2215 *
2212 * function: synchronously write pages locked by transaction 2216 * function: synchronously write pages locked by transaction
2213 * after txLog() but before txUpdateMap(); 2217 * after txLog() but before txUpdateMap();
2214 */ 2218 */
2215static void txForce(struct tblock * tblk) 2219static void txForce(struct tblock * tblk)
2216{ 2220{
@@ -2273,10 +2277,10 @@ static void txForce(struct tblock * tblk)
2273} 2277}
2274 2278
2275/* 2279/*
2276 * txUpdateMap() 2280 * txUpdateMap()
2277 * 2281 *
2278 * function: update persistent allocation map (and working map 2282 * function: update persistent allocation map (and working map
2279 * if appropriate); 2283 * if appropriate);
2280 * 2284 *
2281 * parameter: 2285 * parameter:
2282 */ 2286 */
@@ -2298,7 +2302,7 @@ static void txUpdateMap(struct tblock * tblk)
2298 2302
2299 2303
2300 /* 2304 /*
2301 * update block allocation map 2305 * update block allocation map
2302 * 2306 *
2303 * update allocation state in pmap (and wmap) and 2307 * update allocation state in pmap (and wmap) and
2304 * update lsn of the pmap page; 2308 * update lsn of the pmap page;
@@ -2382,7 +2386,7 @@ static void txUpdateMap(struct tblock * tblk)
2382 } 2386 }
2383 } 2387 }
2384 /* 2388 /*
2385 * update inode allocation map 2389 * update inode allocation map
2386 * 2390 *
2387 * update allocation state in pmap and 2391 * update allocation state in pmap and
2388 * update lsn of the pmap page; 2392 * update lsn of the pmap page;
@@ -2407,24 +2411,24 @@ static void txUpdateMap(struct tblock * tblk)
2407} 2411}
2408 2412
2409/* 2413/*
2410 * txAllocPMap() 2414 * txAllocPMap()
2411 * 2415 *
2412 * function: allocate from persistent map; 2416 * function: allocate from persistent map;
2413 * 2417 *
2414 * parameter: 2418 * parameter:
2415 * ipbmap - 2419 * ipbmap -
2416 * malock - 2420 * malock -
2417 * xad list: 2421 * xad list:
2418 * pxd: 2422 * pxd:
2419 * 2423 *
2420 * maptype - 2424 * maptype -
2421 * allocate from persistent map; 2425 * allocate from persistent map;
2422 * free from persistent map; 2426 * free from persistent map;
2423 * (e.g., tmp file - free from working map at releae 2427 * (e.g., tmp file - free from working map at releae
2424 * of last reference); 2428 * of last reference);
2425 * free from persistent and working map; 2429 * free from persistent and working map;
2426 * 2430 *
2427 * lsn - log sequence number; 2431 * lsn - log sequence number;
2428 */ 2432 */
2429static void txAllocPMap(struct inode *ip, struct maplock * maplock, 2433static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2430 struct tblock * tblk) 2434 struct tblock * tblk)
@@ -2478,9 +2482,9 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2478} 2482}
2479 2483
2480/* 2484/*
2481 * txFreeMap() 2485 * txFreeMap()
2482 * 2486 *
2483 * function: free from persistent and/or working map; 2487 * function: free from persistent and/or working map;
2484 * 2488 *
2485 * todo: optimization 2489 * todo: optimization
2486 */ 2490 */
@@ -2579,9 +2583,9 @@ void txFreeMap(struct inode *ip,
2579} 2583}
2580 2584
2581/* 2585/*
2582 * txFreelock() 2586 * txFreelock()
2583 * 2587 *
2584 * function: remove tlock from inode anonymous locklist 2588 * function: remove tlock from inode anonymous locklist
2585 */ 2589 */
2586void txFreelock(struct inode *ip) 2590void txFreelock(struct inode *ip)
2587{ 2591{
@@ -2619,7 +2623,7 @@ void txFreelock(struct inode *ip)
2619} 2623}
2620 2624
2621/* 2625/*
2622 * txAbort() 2626 * txAbort()
2623 * 2627 *
2624 * function: abort tx before commit; 2628 * function: abort tx before commit;
2625 * 2629 *
@@ -2679,7 +2683,7 @@ void txAbort(tid_t tid, int dirty)
2679} 2683}
2680 2684
2681/* 2685/*
2682 * txLazyCommit(void) 2686 * txLazyCommit(void)
2683 * 2687 *
2684 * All transactions except those changing ipimap (COMMIT_FORCE) are 2688 * All transactions except those changing ipimap (COMMIT_FORCE) are
2685 * processed by this routine. This insures that the inode and block 2689 * processed by this routine. This insures that the inode and block
@@ -2728,7 +2732,7 @@ static void txLazyCommit(struct tblock * tblk)
2728} 2732}
2729 2733
2730/* 2734/*
2731 * jfs_lazycommit(void) 2735 * jfs_lazycommit(void)
2732 * 2736 *
2733 * To be run as a kernel daemon. If lbmIODone is called in an interrupt 2737 * To be run as a kernel daemon. If lbmIODone is called in an interrupt
2734 * context, or where blocking is not wanted, this routine will process 2738 * context, or where blocking is not wanted, this routine will process
@@ -2913,7 +2917,7 @@ void txResume(struct super_block *sb)
2913} 2917}
2914 2918
2915/* 2919/*
2916 * jfs_sync(void) 2920 * jfs_sync(void)
2917 * 2921 *
2918 * To be run as a kernel daemon. This is awakened when tlocks run low. 2922 * To be run as a kernel daemon. This is awakened when tlocks run low.
2919 * We write any inodes that have anonymous tlocks so they will become 2923 * We write any inodes that have anonymous tlocks so they will become
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index 7863cf21afca..ab7288937019 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -94,7 +94,7 @@ extern struct tblock *TxBlock; /* transaction block table */
94 */ 94 */
95struct tlock { 95struct tlock {
96 lid_t next; /* 2: index next lockword on tid locklist 96 lid_t next; /* 2: index next lockword on tid locklist
97 * next lockword on freelist 97 * next lockword on freelist
98 */ 98 */
99 tid_t tid; /* 2: transaction id holding lock */ 99 tid_t tid; /* 2: transaction id holding lock */
100 100
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h
index 09b252958687..649f9817accd 100644
--- a/fs/jfs/jfs_types.h
+++ b/fs/jfs/jfs_types.h
@@ -21,7 +21,7 @@
21/* 21/*
22 * jfs_types.h: 22 * jfs_types.h:
23 * 23 *
24 * basic type/utility definitions 24 * basic type/utility definitions
25 * 25 *
26 * note: this header file must be the 1st include file 26 * note: this header file must be the 1st include file
27 * of JFS include list in all JFS .c file. 27 * of JFS include list in all JFS .c file.
@@ -54,8 +54,8 @@ struct timestruc_t {
54 */ 54 */
55 55
56#define LEFTMOSTONE 0x80000000 56#define LEFTMOSTONE 0x80000000
57#define HIGHORDER 0x80000000u /* high order bit on */ 57#define HIGHORDER 0x80000000u /* high order bit on */
58#define ONES 0xffffffffu /* all bit on */ 58#define ONES 0xffffffffu /* all bit on */
59 59
60/* 60/*
61 * logical xd (lxd) 61 * logical xd (lxd)
@@ -148,7 +148,7 @@ typedef struct {
148#define sizeDXD(dxd) le32_to_cpu((dxd)->size) 148#define sizeDXD(dxd) le32_to_cpu((dxd)->size)
149 149
150/* 150/*
151 * directory entry argument 151 * directory entry argument
152 */ 152 */
153struct component_name { 153struct component_name {
154 int namlen; 154 int namlen;
@@ -160,14 +160,14 @@ struct component_name {
160 * DASD limit information - stored in directory inode 160 * DASD limit information - stored in directory inode
161 */ 161 */
162struct dasd { 162struct dasd {
163 u8 thresh; /* Alert Threshold (in percent) */ 163 u8 thresh; /* Alert Threshold (in percent) */
164 u8 delta; /* Alert Threshold delta (in percent) */ 164 u8 delta; /* Alert Threshold delta (in percent) */
165 u8 rsrvd1; 165 u8 rsrvd1;
166 u8 limit_hi; /* DASD limit (in logical blocks) */ 166 u8 limit_hi; /* DASD limit (in logical blocks) */
167 __le32 limit_lo; /* DASD limit (in logical blocks) */ 167 __le32 limit_lo; /* DASD limit (in logical blocks) */
168 u8 rsrvd2[3]; 168 u8 rsrvd2[3];
169 u8 used_hi; /* DASD usage (in logical blocks) */ 169 u8 used_hi; /* DASD usage (in logical blocks) */
170 __le32 used_lo; /* DASD usage (in logical blocks) */ 170 __le32 used_lo; /* DASD usage (in logical blocks) */
171}; 171};
172 172
173#define DASDLIMIT(dasdp) \ 173#define DASDLIMIT(dasdp) \
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index a386f48c73fc..7971f37534a3 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -60,7 +60,7 @@ int jfs_umount(struct super_block *sb)
60 jfs_info("UnMount JFS: sb:0x%p", sb); 60 jfs_info("UnMount JFS: sb:0x%p", sb);
61 61
62 /* 62 /*
63 * update superblock and close log 63 * update superblock and close log
64 * 64 *
65 * if mounted read-write and log based recovery was enabled 65 * if mounted read-write and log based recovery was enabled
66 */ 66 */
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index acc97c46d8a4..1543906a2e0d 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -16,7 +16,7 @@
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18/* 18/*
19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager
20 */ 20 */
21 21
22#include <linux/fs.h> 22#include <linux/fs.h>
@@ -32,30 +32,30 @@
32/* 32/*
33 * xtree local flag 33 * xtree local flag
34 */ 34 */
35#define XT_INSERT 0x00000001 35#define XT_INSERT 0x00000001
36 36
37/* 37/*
38 * xtree key/entry comparison: extent offset 38 * xtree key/entry comparison: extent offset
39 * 39 *
40 * return: 40 * return:
41 * -1: k < start of extent 41 * -1: k < start of extent
42 * 0: start_of_extent <= k <= end_of_extent 42 * 0: start_of_extent <= k <= end_of_extent
43 * 1: k > end_of_extent 43 * 1: k > end_of_extent
44 */ 44 */
45#define XT_CMP(CMP, K, X, OFFSET64)\ 45#define XT_CMP(CMP, K, X, OFFSET64)\
46{\ 46{\
47 OFFSET64 = offsetXAD(X);\ 47 OFFSET64 = offsetXAD(X);\
48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
49 ((K) < OFFSET64) ? -1 : 0;\ 49 ((K) < OFFSET64) ? -1 : 0;\
50} 50}
51 51
52/* write a xad entry */ 52/* write a xad entry */
53#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 53#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
54{\ 54{\
55 (XAD)->flag = (FLAG);\ 55 (XAD)->flag = (FLAG);\
56 XADoffset((XAD), (OFF));\ 56 XADoffset((XAD), (OFF));\
57 XADlength((XAD), (LEN));\ 57 XADlength((XAD), (LEN));\
58 XADaddress((XAD), (ADDR));\ 58 XADaddress((XAD), (ADDR));\
59} 59}
60 60
61#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 61#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
@@ -76,13 +76,13 @@
76 MP = NULL;\ 76 MP = NULL;\
77 RC = -EIO;\ 77 RC = -EIO;\
78 }\ 78 }\
79 }\ 79 }\
80} 80}
81 81
82/* for consistency */ 82/* for consistency */
83#define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 83#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
84 84
85#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 85#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
87/* xtree entry parameter descriptor */ 87/* xtree entry parameter descriptor */
88struct xtsplit { 88struct xtsplit {
@@ -97,7 +97,7 @@ struct xtsplit {
97 97
98 98
99/* 99/*
100 * statistics 100 * statistics
101 */ 101 */
102#ifdef CONFIG_JFS_STATISTICS 102#ifdef CONFIG_JFS_STATISTICS
103static struct { 103static struct {
@@ -136,7 +136,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
136#endif /* _STILL_TO_PORT */ 136#endif /* _STILL_TO_PORT */
137 137
138/* 138/*
139 * xtLookup() 139 * xtLookup()
140 * 140 *
141 * function: map a single page into a physical extent; 141 * function: map a single page into a physical extent;
142 */ 142 */
@@ -179,7 +179,7 @@ int xtLookup(struct inode *ip, s64 lstart,
179 } 179 }
180 180
181 /* 181 /*
182 * compute the physical extent covering logical extent 182 * compute the physical extent covering logical extent
183 * 183 *
184 * N.B. search may have failed (e.g., hole in sparse file), 184 * N.B. search may have failed (e.g., hole in sparse file),
185 * and returned the index of the next entry. 185 * and returned the index of the next entry.
@@ -220,27 +220,27 @@ int xtLookup(struct inode *ip, s64 lstart,
220 220
221 221
222/* 222/*
223 * xtLookupList() 223 * xtLookupList()
224 * 224 *
225 * function: map a single logical extent into a list of physical extent; 225 * function: map a single logical extent into a list of physical extent;
226 * 226 *
227 * parameter: 227 * parameter:
228 * struct inode *ip, 228 * struct inode *ip,
229 * struct lxdlist *lxdlist, lxd list (in) 229 * struct lxdlist *lxdlist, lxd list (in)
230 * struct xadlist *xadlist, xad list (in/out) 230 * struct xadlist *xadlist, xad list (in/out)
231 * int flag) 231 * int flag)
232 * 232 *
233 * coverage of lxd by xad under assumption of 233 * coverage of lxd by xad under assumption of
234 * . lxd's are ordered and disjoint. 234 * . lxd's are ordered and disjoint.
235 * . xad's are ordered and disjoint. 235 * . xad's are ordered and disjoint.
236 * 236 *
237 * return: 237 * return:
238 * 0: success 238 * 0: success
239 * 239 *
240 * note: a page being written (even a single byte) is backed fully, 240 * note: a page being written (even a single byte) is backed fully,
241 * except the last page which is only backed with blocks 241 * except the last page which is only backed with blocks
242 * required to cover the last byte; 242 * required to cover the last byte;
243 * the extent backing a page is fully contained within an xad; 243 * the extent backing a page is fully contained within an xad;
244 */ 244 */
245int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, 245int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
246 struct xadlist * xadlist, int flag) 246 struct xadlist * xadlist, int flag)
@@ -284,7 +284,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
284 return rc; 284 return rc;
285 285
286 /* 286 /*
287 * compute the physical extent covering logical extent 287 * compute the physical extent covering logical extent
288 * 288 *
289 * N.B. search may have failed (e.g., hole in sparse file), 289 * N.B. search may have failed (e.g., hole in sparse file),
290 * and returned the index of the next entry. 290 * and returned the index of the next entry.
@@ -343,7 +343,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
343 if (lstart >= size) 343 if (lstart >= size)
344 goto mapend; 344 goto mapend;
345 345
346 /* compare with the current xad */ 346 /* compare with the current xad */
347 goto compare1; 347 goto compare1;
348 } 348 }
349 /* lxd is covered by xad */ 349 /* lxd is covered by xad */
@@ -430,7 +430,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
430 /* 430 /*
431 * lxd is partially covered by xad 431 * lxd is partially covered by xad
432 */ 432 */
433 else { /* (xend < lend) */ 433 else { /* (xend < lend) */
434 434
435 /* 435 /*
436 * get next xad 436 * get next xad
@@ -477,22 +477,22 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
477 477
478 478
479/* 479/*
480 * xtSearch() 480 * xtSearch()
481 * 481 *
482 * function: search for the xad entry covering specified offset. 482 * function: search for the xad entry covering specified offset.
483 * 483 *
484 * parameters: 484 * parameters:
485 * ip - file object; 485 * ip - file object;
486 * xoff - extent offset; 486 * xoff - extent offset;
487 * nextp - address of next extent (if any) for search miss 487 * nextp - address of next extent (if any) for search miss
488 * cmpp - comparison result: 488 * cmpp - comparison result:
489 * btstack - traverse stack; 489 * btstack - traverse stack;
490 * flag - search process flag (XT_INSERT); 490 * flag - search process flag (XT_INSERT);
491 * 491 *
492 * returns: 492 * returns:
493 * btstack contains (bn, index) of search path traversed to the entry. 493 * btstack contains (bn, index) of search path traversed to the entry.
494 * *cmpp is set to result of comparison with the entry returned. 494 * *cmpp is set to result of comparison with the entry returned.
495 * the page containing the entry is pinned at exit. 495 * the page containing the entry is pinned at exit.
496 */ 496 */
497static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 497static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
498 int *cmpp, struct btstack * btstack, int flag) 498 int *cmpp, struct btstack * btstack, int flag)
@@ -517,7 +517,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
517 btstack->nsplit = 0; 517 btstack->nsplit = 0;
518 518
519 /* 519 /*
520 * search down tree from root: 520 * search down tree from root:
521 * 521 *
522 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 522 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
523 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 523 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -642,7 +642,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
642 XT_CMP(cmp, xoff, &p->xad[index], t64); 642 XT_CMP(cmp, xoff, &p->xad[index], t64);
643 if (cmp == 0) { 643 if (cmp == 0) {
644 /* 644 /*
645 * search hit 645 * search hit
646 */ 646 */
647 /* search hit - leaf page: 647 /* search hit - leaf page:
648 * return the entry found 648 * return the entry found
@@ -692,7 +692,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
692 } 692 }
693 693
694 /* 694 /*
695 * search miss 695 * search miss
696 * 696 *
697 * base is the smallest index with key (Kj) greater than 697 * base is the smallest index with key (Kj) greater than
698 * search key (K) and may be zero or maxentry index. 698 * search key (K) and may be zero or maxentry index.
@@ -773,22 +773,22 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
773} 773}
774 774
775/* 775/*
776 * xtInsert() 776 * xtInsert()
777 * 777 *
778 * function: 778 * function:
779 * 779 *
780 * parameter: 780 * parameter:
781 * tid - transaction id; 781 * tid - transaction id;
782 * ip - file object; 782 * ip - file object;
783 * xflag - extent flag (XAD_NOTRECORDED): 783 * xflag - extent flag (XAD_NOTRECORDED):
784 * xoff - extent offset; 784 * xoff - extent offset;
785 * xlen - extent length; 785 * xlen - extent length;
786 * xaddrp - extent address pointer (in/out): 786 * xaddrp - extent address pointer (in/out):
787 * if (*xaddrp) 787 * if (*xaddrp)
788 * caller allocated data extent at *xaddrp; 788 * caller allocated data extent at *xaddrp;
789 * else 789 * else
790 * allocate data extent and return its xaddr; 790 * allocate data extent and return its xaddr;
791 * flag - 791 * flag -
792 * 792 *
793 * return: 793 * return:
794 */ 794 */
@@ -813,7 +813,7 @@ int xtInsert(tid_t tid, /* transaction id */
813 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 813 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
814 814
815 /* 815 /*
816 * search for the entry location at which to insert: 816 * search for the entry location at which to insert:
817 * 817 *
818 * xtFastSearch() and xtSearch() both returns (leaf page 818 * xtFastSearch() and xtSearch() both returns (leaf page
819 * pinned, index at which to insert). 819 * pinned, index at which to insert).
@@ -853,13 +853,13 @@ int xtInsert(tid_t tid, /* transaction id */
853 } 853 }
854 854
855 /* 855 /*
856 * insert entry for new extent 856 * insert entry for new extent
857 */ 857 */
858 xflag |= XAD_NEW; 858 xflag |= XAD_NEW;
859 859
860 /* 860 /*
861 * if the leaf page is full, split the page and 861 * if the leaf page is full, split the page and
862 * propagate up the router entry for the new page from split 862 * propagate up the router entry for the new page from split
863 * 863 *
864 * The xtSplitUp() will insert the entry and unpin the leaf page. 864 * The xtSplitUp() will insert the entry and unpin the leaf page.
865 */ 865 */
@@ -886,7 +886,7 @@ int xtInsert(tid_t tid, /* transaction id */
886 } 886 }
887 887
888 /* 888 /*
889 * insert the new entry into the leaf page 889 * insert the new entry into the leaf page
890 */ 890 */
891 /* 891 /*
892 * acquire a transaction lock on the leaf page; 892 * acquire a transaction lock on the leaf page;
@@ -930,16 +930,16 @@ int xtInsert(tid_t tid, /* transaction id */
930 930
931 931
932/* 932/*
933 * xtSplitUp() 933 * xtSplitUp()
934 * 934 *
935 * function: 935 * function:
936 * split full pages as propagating insertion up the tree 936 * split full pages as propagating insertion up the tree
937 * 937 *
938 * parameter: 938 * parameter:
939 * tid - transaction id; 939 * tid - transaction id;
940 * ip - file object; 940 * ip - file object;
941 * split - entry parameter descriptor; 941 * split - entry parameter descriptor;
942 * btstack - traverse stack from xtSearch() 942 * btstack - traverse stack from xtSearch()
943 * 943 *
944 * return: 944 * return:
945 */ 945 */
@@ -1199,22 +1199,22 @@ xtSplitUp(tid_t tid,
1199 1199
1200 1200
1201/* 1201/*
1202 * xtSplitPage() 1202 * xtSplitPage()
1203 * 1203 *
1204 * function: 1204 * function:
1205 * split a full non-root page into 1205 * split a full non-root page into
1206 * original/split/left page and new right page 1206 * original/split/left page and new right page
1207 * i.e., the original/split page remains as left page. 1207 * i.e., the original/split page remains as left page.
1208 * 1208 *
1209 * parameter: 1209 * parameter:
1210 * int tid, 1210 * int tid,
1211 * struct inode *ip, 1211 * struct inode *ip,
1212 * struct xtsplit *split, 1212 * struct xtsplit *split,
1213 * struct metapage **rmpp, 1213 * struct metapage **rmpp,
1214 * u64 *rbnp, 1214 * u64 *rbnp,
1215 * 1215 *
1216 * return: 1216 * return:
1217 * Pointer to page in which to insert or NULL on error. 1217 * Pointer to page in which to insert or NULL on error.
1218 */ 1218 */
1219static int 1219static int
1220xtSplitPage(tid_t tid, struct inode *ip, 1220xtSplitPage(tid_t tid, struct inode *ip,
@@ -1248,9 +1248,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
1248 rbn = addressPXD(pxd); 1248 rbn = addressPXD(pxd);
1249 1249
1250 /* Allocate blocks to quota. */ 1250 /* Allocate blocks to quota. */
1251 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1251 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
1252 rc = -EDQUOT; 1252 rc = -EDQUOT;
1253 goto clean_up; 1253 goto clean_up;
1254 } 1254 }
1255 1255
1256 quota_allocation += lengthPXD(pxd); 1256 quota_allocation += lengthPXD(pxd);
@@ -1304,7 +1304,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
1304 skip = split->index; 1304 skip = split->index;
1305 1305
1306 /* 1306 /*
1307 * sequential append at tail (after last entry of last page) 1307 * sequential append at tail (after last entry of last page)
1308 * 1308 *
1309 * if splitting the last page on a level because of appending 1309 * if splitting the last page on a level because of appending
1310 * a entry to it (skip is maxentry), it's likely that the access is 1310 * a entry to it (skip is maxentry), it's likely that the access is
@@ -1342,7 +1342,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
1342 } 1342 }
1343 1343
1344 /* 1344 /*
1345 * non-sequential insert (at possibly middle page) 1345 * non-sequential insert (at possibly middle page)
1346 */ 1346 */
1347 1347
1348 /* 1348 /*
@@ -1465,25 +1465,24 @@ xtSplitPage(tid_t tid, struct inode *ip,
1465 1465
1466 1466
1467/* 1467/*
1468 * xtSplitRoot() 1468 * xtSplitRoot()
1469 * 1469 *
1470 * function: 1470 * function:
1471 * split the full root page into 1471 * split the full root page into original/root/split page and new
1472 * original/root/split page and new right page 1472 * right page
1473 * i.e., root remains fixed in tree anchor (inode) and 1473 * i.e., root remains fixed in tree anchor (inode) and the root is
1474 * the root is copied to a single new right child page 1474 * copied to a single new right child page since root page <<
1475 * since root page << non-root page, and 1475 * non-root page, and the split root page contains a single entry
1476 * the split root page contains a single entry for the 1476 * for the new right child page.
1477 * new right child page.
1478 * 1477 *
1479 * parameter: 1478 * parameter:
1480 * int tid, 1479 * int tid,
1481 * struct inode *ip, 1480 * struct inode *ip,
1482 * struct xtsplit *split, 1481 * struct xtsplit *split,
1483 * struct metapage **rmpp) 1482 * struct metapage **rmpp)
1484 * 1483 *
1485 * return: 1484 * return:
1486 * Pointer to page in which to insert or NULL on error. 1485 * Pointer to page in which to insert or NULL on error.
1487 */ 1486 */
1488static int 1487static int
1489xtSplitRoot(tid_t tid, 1488xtSplitRoot(tid_t tid,
@@ -1505,7 +1504,7 @@ xtSplitRoot(tid_t tid,
1505 INCREMENT(xtStat.split); 1504 INCREMENT(xtStat.split);
1506 1505
1507 /* 1506 /*
1508 * allocate a single (right) child page 1507 * allocate a single (right) child page
1509 */ 1508 */
1510 pxdlist = split->pxdlist; 1509 pxdlist = split->pxdlist;
1511 pxd = &pxdlist->pxd[pxdlist->npxd]; 1510 pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1573,7 +1572,7 @@ xtSplitRoot(tid_t tid,
1573 } 1572 }
1574 1573
1575 /* 1574 /*
1576 * reset the root 1575 * reset the root
1577 * 1576 *
1578 * init root with the single entry for the new right page 1577 * init root with the single entry for the new right page
1579 * set the 1st entry offset to 0, which force the left-most key 1578 * set the 1st entry offset to 0, which force the left-most key
@@ -1610,7 +1609,7 @@ xtSplitRoot(tid_t tid,
1610 1609
1611 1610
1612/* 1611/*
1613 * xtExtend() 1612 * xtExtend()
1614 * 1613 *
1615 * function: extend in-place; 1614 * function: extend in-place;
1616 * 1615 *
@@ -1677,7 +1676,7 @@ int xtExtend(tid_t tid, /* transaction id */
1677 goto extendOld; 1676 goto extendOld;
1678 1677
1679 /* 1678 /*
1680 * extent overflow: insert entry for new extent 1679 * extent overflow: insert entry for new extent
1681 */ 1680 */
1682//insertNew: 1681//insertNew:
1683 xoff = offsetXAD(xad) + MAXXLEN; 1682 xoff = offsetXAD(xad) + MAXXLEN;
@@ -1685,8 +1684,8 @@ int xtExtend(tid_t tid, /* transaction id */
1685 nextindex = le16_to_cpu(p->header.nextindex); 1684 nextindex = le16_to_cpu(p->header.nextindex);
1686 1685
1687 /* 1686 /*
1688 * if the leaf page is full, insert the new entry and 1687 * if the leaf page is full, insert the new entry and
1689 * propagate up the router entry for the new page from split 1688 * propagate up the router entry for the new page from split
1690 * 1689 *
1691 * The xtSplitUp() will insert the entry and unpin the leaf page. 1690 * The xtSplitUp() will insert the entry and unpin the leaf page.
1692 */ 1691 */
@@ -1731,7 +1730,7 @@ int xtExtend(tid_t tid, /* transaction id */
1731 } 1730 }
1732 } 1731 }
1733 /* 1732 /*
1734 * insert the new entry into the leaf page 1733 * insert the new entry into the leaf page
1735 */ 1734 */
1736 else { 1735 else {
1737 /* insert the new entry: mark the entry NEW */ 1736 /* insert the new entry: mark the entry NEW */
@@ -1771,11 +1770,11 @@ int xtExtend(tid_t tid, /* transaction id */
1771 1770
1772#ifdef _NOTYET 1771#ifdef _NOTYET
1773/* 1772/*
1774 * xtTailgate() 1773 * xtTailgate()
1775 * 1774 *
1776 * function: split existing 'tail' extent 1775 * function: split existing 'tail' extent
1777 * (split offset >= start offset of tail extent), and 1776 * (split offset >= start offset of tail extent), and
1778 * relocate and extend the split tail half; 1777 * relocate and extend the split tail half;
1779 * 1778 *
1780 * note: existing extent may or may not have been committed. 1779 * note: existing extent may or may not have been committed.
1781 * caller is responsible for pager buffer cache update, and 1780 * caller is responsible for pager buffer cache update, and
@@ -1804,7 +1803,7 @@ int xtTailgate(tid_t tid, /* transaction id */
1804 1803
1805/* 1804/*
1806printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1805printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1807 (ulong)xoff, xlen, (ulong)xaddr); 1806 (ulong)xoff, xlen, (ulong)xaddr);
1808*/ 1807*/
1809 1808
1810 /* there must exist extent to be tailgated */ 1809 /* there must exist extent to be tailgated */
@@ -1842,18 +1841,18 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1842 xad = &p->xad[index]; 1841 xad = &p->xad[index];
1843/* 1842/*
1844printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1843printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1845 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1844 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1846*/ 1845*/
1847 if ((llen = xoff - offsetXAD(xad)) == 0) 1846 if ((llen = xoff - offsetXAD(xad)) == 0)
1848 goto updateOld; 1847 goto updateOld;
1849 1848
1850 /* 1849 /*
1851 * partially replace extent: insert entry for new extent 1850 * partially replace extent: insert entry for new extent
1852 */ 1851 */
1853//insertNew: 1852//insertNew:
1854 /* 1853 /*
1855 * if the leaf page is full, insert the new entry and 1854 * if the leaf page is full, insert the new entry and
1856 * propagate up the router entry for the new page from split 1855 * propagate up the router entry for the new page from split
1857 * 1856 *
1858 * The xtSplitUp() will insert the entry and unpin the leaf page. 1857 * The xtSplitUp() will insert the entry and unpin the leaf page.
1859 */ 1858 */
@@ -1898,7 +1897,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1898 } 1897 }
1899 } 1898 }
1900 /* 1899 /*
1901 * insert the new entry into the leaf page 1900 * insert the new entry into the leaf page
1902 */ 1901 */
1903 else { 1902 else {
1904 /* insert the new entry: mark the entry NEW */ 1903 /* insert the new entry: mark the entry NEW */
@@ -1955,17 +1954,17 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1955#endif /* _NOTYET */ 1954#endif /* _NOTYET */
1956 1955
1957/* 1956/*
1958 * xtUpdate() 1957 * xtUpdate()
1959 * 1958 *
1960 * function: update XAD; 1959 * function: update XAD;
1961 * 1960 *
1962 * update extent for allocated_but_not_recorded or 1961 * update extent for allocated_but_not_recorded or
1963 * compressed extent; 1962 * compressed extent;
1964 * 1963 *
1965 * parameter: 1964 * parameter:
1966 * nxad - new XAD; 1965 * nxad - new XAD;
1967 * logical extent of the specified XAD must be completely 1966 * logical extent of the specified XAD must be completely
1968 * contained by an existing XAD; 1967 * contained by an existing XAD;
1969 */ 1968 */
1970int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1969int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1971{ /* new XAD */ 1970{ /* new XAD */
@@ -2416,19 +2415,19 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2416 2415
2417 2416
2418/* 2417/*
2419 * xtAppend() 2418 * xtAppend()
2420 * 2419 *
2421 * function: grow in append mode from contiguous region specified ; 2420 * function: grow in append mode from contiguous region specified ;
2422 * 2421 *
2423 * parameter: 2422 * parameter:
2424 * tid - transaction id; 2423 * tid - transaction id;
2425 * ip - file object; 2424 * ip - file object;
2426 * xflag - extent flag: 2425 * xflag - extent flag:
2427 * xoff - extent offset; 2426 * xoff - extent offset;
2428 * maxblocks - max extent length; 2427 * maxblocks - max extent length;
2429 * xlen - extent length (in/out); 2428 * xlen - extent length (in/out);
2430 * xaddrp - extent address pointer (in/out): 2429 * xaddrp - extent address pointer (in/out):
2431 * flag - 2430 * flag -
2432 * 2431 *
2433 * return: 2432 * return:
2434 */ 2433 */
@@ -2460,7 +2459,7 @@ int xtAppend(tid_t tid, /* transaction id */
2460 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2459 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
2461 2460
2462 /* 2461 /*
2463 * search for the entry location at which to insert: 2462 * search for the entry location at which to insert:
2464 * 2463 *
2465 * xtFastSearch() and xtSearch() both returns (leaf page 2464 * xtFastSearch() and xtSearch() both returns (leaf page
2466 * pinned, index at which to insert). 2465 * pinned, index at which to insert).
@@ -2482,13 +2481,13 @@ int xtAppend(tid_t tid, /* transaction id */
2482 xlen = min(xlen, (int)(next - xoff)); 2481 xlen = min(xlen, (int)(next - xoff));
2483//insert: 2482//insert:
2484 /* 2483 /*
2485 * insert entry for new extent 2484 * insert entry for new extent
2486 */ 2485 */
2487 xflag |= XAD_NEW; 2486 xflag |= XAD_NEW;
2488 2487
2489 /* 2488 /*
2490 * if the leaf page is full, split the page and 2489 * if the leaf page is full, split the page and
2491 * propagate up the router entry for the new page from split 2490 * propagate up the router entry for the new page from split
2492 * 2491 *
2493 * The xtSplitUp() will insert the entry and unpin the leaf page. 2492 * The xtSplitUp() will insert the entry and unpin the leaf page.
2494 */ 2493 */
@@ -2545,7 +2544,7 @@ int xtAppend(tid_t tid, /* transaction id */
2545 return 0; 2544 return 0;
2546 2545
2547 /* 2546 /*
2548 * insert the new entry into the leaf page 2547 * insert the new entry into the leaf page
2549 */ 2548 */
2550 insertLeaf: 2549 insertLeaf:
2551 /* 2550 /*
@@ -2589,17 +2588,17 @@ int xtAppend(tid_t tid, /* transaction id */
2589 2588
2590/* - TBD for defragmentaion/reorganization - 2589/* - TBD for defragmentaion/reorganization -
2591 * 2590 *
2592 * xtDelete() 2591 * xtDelete()
2593 * 2592 *
2594 * function: 2593 * function:
2595 * delete the entry with the specified key. 2594 * delete the entry with the specified key.
2596 * 2595 *
2597 * N.B.: whole extent of the entry is assumed to be deleted. 2596 * N.B.: whole extent of the entry is assumed to be deleted.
2598 * 2597 *
2599 * parameter: 2598 * parameter:
2600 * 2599 *
2601 * return: 2600 * return:
2602 * ENOENT: if the entry is not found. 2601 * ENOENT: if the entry is not found.
2603 * 2602 *
2604 * exception: 2603 * exception:
2605 */ 2604 */
@@ -2665,10 +2664,10 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2665 2664
2666/* - TBD for defragmentaion/reorganization - 2665/* - TBD for defragmentaion/reorganization -
2667 * 2666 *
2668 * xtDeleteUp() 2667 * xtDeleteUp()
2669 * 2668 *
2670 * function: 2669 * function:
2671 * free empty pages as propagating deletion up the tree 2670 * free empty pages as propagating deletion up the tree
2672 * 2671 *
2673 * parameter: 2672 * parameter:
2674 * 2673 *
@@ -2815,15 +2814,15 @@ xtDeleteUp(tid_t tid, struct inode *ip,
2815 2814
2816 2815
2817/* 2816/*
2818 * NAME: xtRelocate() 2817 * NAME: xtRelocate()
2819 * 2818 *
2820 * FUNCTION: relocate xtpage or data extent of regular file; 2819 * FUNCTION: relocate xtpage or data extent of regular file;
2821 * This function is mainly used by defragfs utility. 2820 * This function is mainly used by defragfs utility.
2822 * 2821 *
2823 * NOTE: This routine does not have the logic to handle 2822 * NOTE: This routine does not have the logic to handle
2824 * uncommitted allocated extent. The caller should call 2823 * uncommitted allocated extent. The caller should call
2825 * txCommit() to commit all the allocation before call 2824 * txCommit() to commit all the allocation before call
2826 * this routine. 2825 * this routine.
2827 */ 2826 */
2828int 2827int
2829xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2828xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
@@ -2865,8 +2864,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2865 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2864 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
2866 2865
2867 /* 2866 /*
2868 * 1. get and validate the parent xtpage/xad entry 2867 * 1. get and validate the parent xtpage/xad entry
2869 * covering the source extent to be relocated; 2868 * covering the source extent to be relocated;
2870 */ 2869 */
2871 if (xtype == DATAEXT) { 2870 if (xtype == DATAEXT) {
2872 /* search in leaf entry */ 2871 /* search in leaf entry */
@@ -2910,7 +2909,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2910 jfs_info("xtRelocate: parent xad entry validated."); 2909 jfs_info("xtRelocate: parent xad entry validated.");
2911 2910
2912 /* 2911 /*
2913 * 2. relocate the extent 2912 * 2. relocate the extent
2914 */ 2913 */
2915 if (xtype == DATAEXT) { 2914 if (xtype == DATAEXT) {
2916 /* if the extent is allocated-but-not-recorded 2915 /* if the extent is allocated-but-not-recorded
@@ -2923,7 +2922,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2923 XT_PUTPAGE(pmp); 2922 XT_PUTPAGE(pmp);
2924 2923
2925 /* 2924 /*
2926 * cmRelocate() 2925 * cmRelocate()
2927 * 2926 *
2928 * copy target data pages to be relocated; 2927 * copy target data pages to be relocated;
2929 * 2928 *
@@ -2945,8 +2944,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2945 pno = offset >> CM_L2BSIZE; 2944 pno = offset >> CM_L2BSIZE;
2946 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2945 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
2947/* 2946/*
2948 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2947 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2949 (offset >> CM_L2BSIZE) + 1; 2948 (offset >> CM_L2BSIZE) + 1;
2950*/ 2949*/
2951 sxaddr = oxaddr; 2950 sxaddr = oxaddr;
2952 dxaddr = nxaddr; 2951 dxaddr = nxaddr;
@@ -2981,7 +2980,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2981 2980
2982 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2981 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2983 jfs_info("xtRelocate: target data extent relocated."); 2982 jfs_info("xtRelocate: target data extent relocated.");
2984 } else { /* (xtype == XTPAGE) */ 2983 } else { /* (xtype == XTPAGE) */
2985 2984
2986 /* 2985 /*
2987 * read in the target xtpage from the source extent; 2986 * read in the target xtpage from the source extent;
@@ -3026,16 +3025,14 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3026 */ 3025 */
3027 if (lmp) { 3026 if (lmp) {
3028 BT_MARK_DIRTY(lmp, ip); 3027 BT_MARK_DIRTY(lmp, ip);
3029 tlck = 3028 tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
3030 txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
3031 lp->header.next = cpu_to_le64(nxaddr); 3029 lp->header.next = cpu_to_le64(nxaddr);
3032 XT_PUTPAGE(lmp); 3030 XT_PUTPAGE(lmp);
3033 } 3031 }
3034 3032
3035 if (rmp) { 3033 if (rmp) {
3036 BT_MARK_DIRTY(rmp, ip); 3034 BT_MARK_DIRTY(rmp, ip);
3037 tlck = 3035 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
3038 txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
3039 rp->header.prev = cpu_to_le64(nxaddr); 3036 rp->header.prev = cpu_to_le64(nxaddr);
3040 XT_PUTPAGE(rmp); 3037 XT_PUTPAGE(rmp);
3041 } 3038 }
@@ -3062,7 +3059,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3062 * scan may be skipped by commit() and logredo(); 3059 * scan may be skipped by commit() and logredo();
3063 */ 3060 */
3064 BT_MARK_DIRTY(mp, ip); 3061 BT_MARK_DIRTY(mp, ip);
3065 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 3062 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
3066 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 3063 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
3067 xtlck = (struct xtlock *) & tlck->lock; 3064 xtlck = (struct xtlock *) & tlck->lock;
3068 3065
@@ -3084,7 +3081,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3084 } 3081 }
3085 3082
3086 /* 3083 /*
3087 * 3. acquire maplock for the source extent to be freed; 3084 * 3. acquire maplock for the source extent to be freed;
3088 * 3085 *
3089 * acquire a maplock saving the src relocated extent address; 3086 * acquire a maplock saving the src relocated extent address;
3090 * to free of the extent at commit time; 3087 * to free of the extent at commit time;
@@ -3105,7 +3102,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3105 * is no buffer associated with this lock since the buffer 3102 * is no buffer associated with this lock since the buffer
3106 * has been redirected to the target location. 3103 * has been redirected to the target location.
3107 */ 3104 */
3108 else /* (xtype == XTPAGE) */ 3105 else /* (xtype == XTPAGE) */
3109 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 3106 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
3110 3107
3111 pxdlock = (struct pxd_lock *) & tlck->lock; 3108 pxdlock = (struct pxd_lock *) & tlck->lock;
@@ -3115,7 +3112,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3115 pxdlock->index = 1; 3112 pxdlock->index = 1;
3116 3113
3117 /* 3114 /*
3118 * 4. update the parent xad entry for relocation; 3115 * 4. update the parent xad entry for relocation;
3119 * 3116 *
3120 * acquire tlck for the parent entry with XAD_NEW as entry 3117 * acquire tlck for the parent entry with XAD_NEW as entry
3121 * update which will write LOG_REDOPAGE and update bmap for 3118 * update which will write LOG_REDOPAGE and update bmap for
@@ -3143,22 +3140,22 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3143 3140
3144 3141
3145/* 3142/*
3146 * xtSearchNode() 3143 * xtSearchNode()
3147 * 3144 *
3148 * function: search for the internal xad entry covering specified extent. 3145 * function: search for the internal xad entry covering specified extent.
3149 * This function is mainly used by defragfs utility. 3146 * This function is mainly used by defragfs utility.
3150 * 3147 *
3151 * parameters: 3148 * parameters:
3152 * ip - file object; 3149 * ip - file object;
3153 * xad - extent to find; 3150 * xad - extent to find;
3154 * cmpp - comparison result: 3151 * cmpp - comparison result:
3155 * btstack - traverse stack; 3152 * btstack - traverse stack;
3156 * flag - search process flag; 3153 * flag - search process flag;
3157 * 3154 *
3158 * returns: 3155 * returns:
3159 * btstack contains (bn, index) of search path traversed to the entry. 3156 * btstack contains (bn, index) of search path traversed to the entry.
3160 * *cmpp is set to result of comparison with the entry returned. 3157 * *cmpp is set to result of comparison with the entry returned.
3161 * the page containing the entry is pinned at exit. 3158 * the page containing the entry is pinned at exit.
3162 */ 3159 */
3163static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 3160static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3164 int *cmpp, struct btstack * btstack, int flag) 3161 int *cmpp, struct btstack * btstack, int flag)
@@ -3181,7 +3178,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3181 xaddr = addressXAD(xad); 3178 xaddr = addressXAD(xad);
3182 3179
3183 /* 3180 /*
3184 * search down tree from root: 3181 * search down tree from root:
3185 * 3182 *
3186 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 3183 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
3187 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 3184 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -3217,7 +3214,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3217 XT_CMP(cmp, xoff, &p->xad[index], t64); 3214 XT_CMP(cmp, xoff, &p->xad[index], t64);
3218 if (cmp == 0) { 3215 if (cmp == 0) {
3219 /* 3216 /*
3220 * search hit 3217 * search hit
3221 * 3218 *
3222 * verify for exact match; 3219 * verify for exact match;
3223 */ 3220 */
@@ -3245,7 +3242,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3245 } 3242 }
3246 3243
3247 /* 3244 /*
3248 * search miss - non-leaf page: 3245 * search miss - non-leaf page:
3249 * 3246 *
3250 * base is the smallest index with key (Kj) greater than 3247 * base is the smallest index with key (Kj) greater than
3251 * search key (K) and may be zero or maxentry index. 3248 * search key (K) and may be zero or maxentry index.
@@ -3268,15 +3265,15 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3268 3265
3269 3266
3270/* 3267/*
3271 * xtRelink() 3268 * xtRelink()
3272 * 3269 *
3273 * function: 3270 * function:
3274 * link around a freed page. 3271 * link around a freed page.
3275 * 3272 *
3276 * Parameter: 3273 * Parameter:
3277 * int tid, 3274 * int tid,
3278 * struct inode *ip, 3275 * struct inode *ip,
3279 * xtpage_t *p) 3276 * xtpage_t *p)
3280 * 3277 *
3281 * returns: 3278 * returns:
3282 */ 3279 */
@@ -3338,7 +3335,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
3338 3335
3339 3336
3340/* 3337/*
3341 * xtInitRoot() 3338 * xtInitRoot()
3342 * 3339 *
3343 * initialize file root (inline in inode) 3340 * initialize file root (inline in inode)
3344 */ 3341 */
@@ -3385,42 +3382,42 @@ void xtInitRoot(tid_t tid, struct inode *ip)
3385#define MAX_TRUNCATE_LEAVES 50 3382#define MAX_TRUNCATE_LEAVES 50
3386 3383
3387/* 3384/*
3388 * xtTruncate() 3385 * xtTruncate()
3389 * 3386 *
3390 * function: 3387 * function:
3391 * traverse for truncation logging backward bottom up; 3388 * traverse for truncation logging backward bottom up;
3392 * terminate at the last extent entry at the current subtree 3389 * terminate at the last extent entry at the current subtree
3393 * root page covering new down size. 3390 * root page covering new down size.
3394 * truncation may occur within the last extent entry. 3391 * truncation may occur within the last extent entry.
3395 * 3392 *
3396 * parameter: 3393 * parameter:
3397 * int tid, 3394 * int tid,
3398 * struct inode *ip, 3395 * struct inode *ip,
3399 * s64 newsize, 3396 * s64 newsize,
3400 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3397 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3401 * 3398 *
3402 * return: 3399 * return:
3403 * 3400 *
3404 * note: 3401 * note:
3405 * PWMAP: 3402 * PWMAP:
3406 * 1. truncate (non-COMMIT_NOLINK file) 3403 * 1. truncate (non-COMMIT_NOLINK file)
3407 * by jfs_truncate() or jfs_open(O_TRUNC): 3404 * by jfs_truncate() or jfs_open(O_TRUNC):
3408 * xtree is updated; 3405 * xtree is updated;
3409 * 2. truncate index table of directory when last entry removed 3406 * 2. truncate index table of directory when last entry removed
3410 * map update via tlock at commit time; 3407 * map update via tlock at commit time;
3411 * PMAP: 3408 * PMAP:
3412 * Call xtTruncate_pmap instead 3409 * Call xtTruncate_pmap instead
3413 * WMAP: 3410 * WMAP:
3414 * 1. remove (free zero link count) on last reference release 3411 * 1. remove (free zero link count) on last reference release
3415 * (pmap has been freed at commit zero link count); 3412 * (pmap has been freed at commit zero link count);
3416 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3413 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3417 * xtree is updated; 3414 * xtree is updated;
3418 * map update directly at truncation time; 3415 * map update directly at truncation time;
3419 * 3416 *
3420 * if (DELETE) 3417 * if (DELETE)
3421 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3418 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3422 * else if (TRUNCATE) 3419 * else if (TRUNCATE)
3423 * must write LOG_NOREDOPAGE for deleted index page; 3420 * must write LOG_NOREDOPAGE for deleted index page;
3424 * 3421 *
3425 * pages may already have been tlocked by anonymous transactions 3422 * pages may already have been tlocked by anonymous transactions
3426 * during file growth (i.e., write) before truncation; 3423 * during file growth (i.e., write) before truncation;
@@ -3493,7 +3490,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3493 * retained in the new sized file. 3490 * retained in the new sized file.
3494 * if type is PMAP, the data and index pages are NOT 3491 * if type is PMAP, the data and index pages are NOT
3495 * freed, and the data and index blocks are NOT freed 3492 * freed, and the data and index blocks are NOT freed
3496 * from working map. 3493 * from working map.
3497 * (this will allow continued access of data/index of 3494 * (this will allow continued access of data/index of
3498 * temporary file (zerolink count file truncated to zero-length)). 3495 * temporary file (zerolink count file truncated to zero-length)).
3499 */ 3496 */
@@ -3542,7 +3539,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3542 goto getChild; 3539 goto getChild;
3543 3540
3544 /* 3541 /*
3545 * leaf page 3542 * leaf page
3546 */ 3543 */
3547 freed = 0; 3544 freed = 0;
3548 3545
@@ -3916,7 +3913,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3916 } 3913 }
3917 3914
3918 /* 3915 /*
3919 * internal page: go down to child page of current entry 3916 * internal page: go down to child page of current entry
3920 */ 3917 */
3921 getChild: 3918 getChild:
3922 /* save current parent entry for the child page */ 3919 /* save current parent entry for the child page */
@@ -3965,7 +3962,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3965 3962
3966 3963
3967/* 3964/*
3968 * xtTruncate_pmap() 3965 * xtTruncate_pmap()
3969 * 3966 *
3970 * function: 3967 * function:
3971 * Perform truncate to zero lenghth for deleted file, leaving the 3968 * Perform truncate to zero lenghth for deleted file, leaving the
@@ -3974,9 +3971,9 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3974 * is committed to disk. 3971 * is committed to disk.
3975 * 3972 *
3976 * parameter: 3973 * parameter:
3977 * tid_t tid, 3974 * tid_t tid,
3978 * struct inode *ip, 3975 * struct inode *ip,
3979 * s64 committed_size) 3976 * s64 committed_size)
3980 * 3977 *
3981 * return: new committed size 3978 * return: new committed size
3982 * 3979 *
@@ -4050,7 +4047,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4050 } 4047 }
4051 4048
4052 /* 4049 /*
4053 * leaf page 4050 * leaf page
4054 */ 4051 */
4055 4052
4056 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 4053 if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
@@ -4062,7 +4059,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4062 xoff = offsetXAD(xad); 4059 xoff = offsetXAD(xad);
4063 xlen = lengthXAD(xad); 4060 xlen = lengthXAD(xad);
4064 XT_PUTPAGE(mp); 4061 XT_PUTPAGE(mp);
4065 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 4062 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
4066 } 4063 }
4067 tlck = txLock(tid, ip, mp, tlckXTREE); 4064 tlck = txLock(tid, ip, mp, tlckXTREE);
4068 tlck->type = tlckXTREE | tlckFREE; 4065 tlck->type = tlckXTREE | tlckFREE;
@@ -4099,8 +4096,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4099 */ 4096 */
4100 tlck = txLock(tid, ip, mp, tlckXTREE); 4097 tlck = txLock(tid, ip, mp, tlckXTREE);
4101 xtlck = (struct xtlock *) & tlck->lock; 4098 xtlck = (struct xtlock *) & tlck->lock;
4102 xtlck->hwm.offset = 4099 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
4103 le16_to_cpu(p->header.nextindex) - 1;
4104 tlck->type = tlckXTREE | tlckFREE; 4100 tlck->type = tlckXTREE | tlckFREE;
4105 4101
4106 XT_PUTPAGE(mp); 4102 XT_PUTPAGE(mp);
@@ -4118,7 +4114,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4118 else 4114 else
4119 index--; 4115 index--;
4120 /* 4116 /*
4121 * internal page: go down to child page of current entry 4117 * internal page: go down to child page of current entry
4122 */ 4118 */
4123 getChild: 4119 getChild:
4124 /* save current parent entry for the child page */ 4120 /* save current parent entry for the child page */
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index 164f6f2b1019..70815c8a3d6a 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -19,14 +19,14 @@
19#define _H_JFS_XTREE 19#define _H_JFS_XTREE
20 20
21/* 21/*
22 * jfs_xtree.h: extent allocation descriptor B+-tree manager 22 * jfs_xtree.h: extent allocation descriptor B+-tree manager
23 */ 23 */
24 24
25#include "jfs_btree.h" 25#include "jfs_btree.h"
26 26
27 27
28/* 28/*
29 * extent allocation descriptor (xad) 29 * extent allocation descriptor (xad)
30 */ 30 */
31typedef struct xad { 31typedef struct xad {
32 unsigned flag:8; /* 1: flag */ 32 unsigned flag:8; /* 1: flag */
@@ -38,30 +38,30 @@ typedef struct xad {
38 __le32 addr2; /* 4: address in unit of fsblksize */ 38 __le32 addr2; /* 4: address in unit of fsblksize */
39} xad_t; /* (16) */ 39} xad_t; /* (16) */
40 40
41#define MAXXLEN ((1 << 24) - 1) 41#define MAXXLEN ((1 << 24) - 1)
42 42
43#define XTSLOTSIZE 16 43#define XTSLOTSIZE 16
44#define L2XTSLOTSIZE 4 44#define L2XTSLOTSIZE 4
45 45
46/* xad_t field construction */ 46/* xad_t field construction */
47#define XADoffset(xad, offset64)\ 47#define XADoffset(xad, offset64)\
48{\ 48{\
49 (xad)->off1 = ((u64)offset64) >> 32;\ 49 (xad)->off1 = ((u64)offset64) >> 32;\
50 (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ 50 (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
51} 51}
52#define XADaddress(xad, address64)\ 52#define XADaddress(xad, address64)\
53{\ 53{\
54 (xad)->addr1 = ((u64)address64) >> 32;\ 54 (xad)->addr1 = ((u64)address64) >> 32;\
55 (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ 55 (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
56} 56}
57#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) 57#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32)
58 58
59/* xad_t field extraction */ 59/* xad_t field extraction */
60#define offsetXAD(xad)\ 60#define offsetXAD(xad)\
61 ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) 61 ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
62#define addressXAD(xad)\ 62#define addressXAD(xad)\
63 ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) 63 ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
64#define lengthXAD(xad) __le24_to_cpu((xad)->len) 64#define lengthXAD(xad) __le24_to_cpu((xad)->len)
65 65
66/* xad list */ 66/* xad list */
67struct xadlist { 67struct xadlist {
@@ -71,22 +71,22 @@ struct xadlist {
71}; 71};
72 72
73/* xad_t flags */ 73/* xad_t flags */
74#define XAD_NEW 0x01 /* new */ 74#define XAD_NEW 0x01 /* new */
75#define XAD_EXTENDED 0x02 /* extended */ 75#define XAD_EXTENDED 0x02 /* extended */
76#define XAD_COMPRESSED 0x04 /* compressed with recorded length */ 76#define XAD_COMPRESSED 0x04 /* compressed with recorded length */
77#define XAD_NOTRECORDED 0x08 /* allocated but not recorded */ 77#define XAD_NOTRECORDED 0x08 /* allocated but not recorded */
78#define XAD_COW 0x10 /* copy-on-write */ 78#define XAD_COW 0x10 /* copy-on-write */
79 79
80 80
81/* possible values for maxentry */ 81/* possible values for maxentry */
82#define XTROOTINITSLOT_DIR 6 82#define XTROOTINITSLOT_DIR 6
83#define XTROOTINITSLOT 10 83#define XTROOTINITSLOT 10
84#define XTROOTMAXSLOT 18 84#define XTROOTMAXSLOT 18
85#define XTPAGEMAXSLOT 256 85#define XTPAGEMAXSLOT 256
86#define XTENTRYSTART 2 86#define XTENTRYSTART 2
87 87
88/* 88/*
89 * xtree page: 89 * xtree page:
90 */ 90 */
91typedef union { 91typedef union {
92 struct xtheader { 92 struct xtheader {
@@ -106,7 +106,7 @@ typedef union {
106} xtpage_t; 106} xtpage_t;
107 107
108/* 108/*
109 * external declaration 109 * external declaration
110 */ 110 */
111extern int xtLookup(struct inode *ip, s64 lstart, s64 llen, 111extern int xtLookup(struct inode *ip, s64 lstart, s64 llen,
112 int *pflag, s64 * paddr, int *plen, int flag); 112 int *pflag, s64 * paddr, int *plen, int flag);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 41c204771262..25161c4121e4 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -328,7 +328,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
328 * dentry - child directory dentry 328 * dentry - child directory dentry
329 * 329 *
330 * RETURN: -EINVAL - if name is . or .. 330 * RETURN: -EINVAL - if name is . or ..
331 * -EINVAL - if . or .. exist but are invalid. 331 * -EINVAL - if . or .. exist but are invalid.
332 * errors from subroutines 332 * errors from subroutines
333 * 333 *
334 * note: 334 * note:
@@ -517,7 +517,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
517 inode_dec_link_count(ip); 517 inode_dec_link_count(ip);
518 518
519 /* 519 /*
520 * commit zero link count object 520 * commit zero link count object
521 */ 521 */
522 if (ip->i_nlink == 0) { 522 if (ip->i_nlink == 0) {
523 assert(!test_cflag(COMMIT_Nolink, ip)); 523 assert(!test_cflag(COMMIT_Nolink, ip));
@@ -596,7 +596,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
596/* 596/*
597 * NAME: commitZeroLink() 597 * NAME: commitZeroLink()
598 * 598 *
599 * FUNCTION: for non-directory, called by jfs_remove(), 599 * FUNCTION: for non-directory, called by jfs_remove(),
600 * truncate a regular file, directory or symbolic 600 * truncate a regular file, directory or symbolic
601 * link to zero length. return 0 if type is not 601 * link to zero length. return 0 if type is not
602 * one of these. 602 * one of these.
@@ -676,7 +676,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
676/* 676/*
677 * NAME: jfs_free_zero_link() 677 * NAME: jfs_free_zero_link()
678 * 678 *
679 * FUNCTION: for non-directory, called by iClose(), 679 * FUNCTION: for non-directory, called by iClose(),
680 * free resources of a file from cache and WORKING map 680 * free resources of a file from cache and WORKING map
681 * for a file previously committed with zero link count 681 * for a file previously committed with zero link count
682 * while associated with a pager object, 682 * while associated with a pager object,
@@ -855,12 +855,12 @@ static int jfs_link(struct dentry *old_dentry,
855 * NAME: jfs_symlink(dip, dentry, name) 855 * NAME: jfs_symlink(dip, dentry, name)
856 * 856 *
857 * FUNCTION: creates a symbolic link to <symlink> by name <name> 857 * FUNCTION: creates a symbolic link to <symlink> by name <name>
858 * in directory <dip> 858 * in directory <dip>
859 * 859 *
860 * PARAMETER: dip - parent directory vnode 860 * PARAMETER: dip - parent directory vnode
861 * dentry - dentry of symbolic link 861 * dentry - dentry of symbolic link
862 * name - the path name of the existing object 862 * name - the path name of the existing object
863 * that will be the source of the link 863 * that will be the source of the link
864 * 864 *
865 * RETURN: errors from subroutines 865 * RETURN: errors from subroutines
866 * 866 *
@@ -1052,9 +1052,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1052 1052
1053 1053
1054/* 1054/*
1055 * NAME: jfs_rename 1055 * NAME: jfs_rename
1056 * 1056 *
1057 * FUNCTION: rename a file or directory 1057 * FUNCTION: rename a file or directory
1058 */ 1058 */
1059static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, 1059static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1060 struct inode *new_dir, struct dentry *new_dentry) 1060 struct inode *new_dir, struct dentry *new_dentry)
@@ -1331,9 +1331,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1331 1331
1332 1332
1333/* 1333/*
1334 * NAME: jfs_mknod 1334 * NAME: jfs_mknod
1335 * 1335 *
1336 * FUNCTION: Create a special file (device) 1336 * FUNCTION: Create a special file (device)
1337 */ 1337 */
1338static int jfs_mknod(struct inode *dir, struct dentry *dentry, 1338static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1339 int mode, dev_t rdev) 1339 int mode, dev_t rdev)
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 79d625f3f733..71984ee95346 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -29,17 +29,17 @@
29#include "jfs_txnmgr.h" 29#include "jfs_txnmgr.h"
30#include "jfs_debug.h" 30#include "jfs_debug.h"
31 31
32#define BITSPERPAGE (PSIZE << 3) 32#define BITSPERPAGE (PSIZE << 3)
33#define L2MEGABYTE 20 33#define L2MEGABYTE 20
34#define MEGABYTE (1 << L2MEGABYTE) 34#define MEGABYTE (1 << L2MEGABYTE)
35#define MEGABYTE32 (MEGABYTE << 5) 35#define MEGABYTE32 (MEGABYTE << 5)
36 36
37/* convert block number to bmap file page number */ 37/* convert block number to bmap file page number */
38#define BLKTODMAPN(b)\ 38#define BLKTODMAPN(b)\
39 (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) 39 (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
40 40
41/* 41/*
42 * jfs_extendfs() 42 * jfs_extendfs()
43 * 43 *
44 * function: extend file system; 44 * function: extend file system;
45 * 45 *
@@ -48,9 +48,9 @@
48 * workspace space 48 * workspace space
49 * 49 *
50 * input: 50 * input:
51 * new LVSize: in LV blocks (required) 51 * new LVSize: in LV blocks (required)
52 * new LogSize: in LV blocks (optional) 52 * new LogSize: in LV blocks (optional)
53 * new FSSize: in LV blocks (optional) 53 * new FSSize: in LV blocks (optional)
54 * 54 *
55 * new configuration: 55 * new configuration:
56 * 1. set new LogSize as specified or default from new LVSize; 56 * 1. set new LogSize as specified or default from new LVSize;
@@ -125,8 +125,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
125 } 125 }
126 126
127 /* 127 /*
128 * reconfigure LV spaces 128 * reconfigure LV spaces
129 * --------------------- 129 * ---------------------
130 * 130 *
131 * validate new size, or, if not specified, determine new size 131 * validate new size, or, if not specified, determine new size
132 */ 132 */
@@ -198,7 +198,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
198 log_formatted = 1; 198 log_formatted = 1;
199 } 199 }
200 /* 200 /*
201 * quiesce file system 201 * quiesce file system
202 * 202 *
203 * (prepare to move the inline log and to prevent map update) 203 * (prepare to move the inline log and to prevent map update)
204 * 204 *
@@ -270,8 +270,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
270 } 270 }
271 271
272 /* 272 /*
273 * extend block allocation map 273 * extend block allocation map
274 * --------------------------- 274 * ---------------------------
275 * 275 *
276 * extendfs() for new extension, retry after crash recovery; 276 * extendfs() for new extension, retry after crash recovery;
277 * 277 *
@@ -283,7 +283,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
283 * s_size: aggregate size in physical blocks; 283 * s_size: aggregate size in physical blocks;
284 */ 284 */
285 /* 285 /*
286 * compute the new block allocation map configuration 286 * compute the new block allocation map configuration
287 * 287 *
288 * map dinode: 288 * map dinode:
289 * di_size: map file size in byte; 289 * di_size: map file size in byte;
@@ -301,7 +301,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
301 newNpages = BLKTODMAPN(t64) + 1; 301 newNpages = BLKTODMAPN(t64) + 1;
302 302
303 /* 303 /*
304 * extend map from current map (WITHOUT growing mapfile) 304 * extend map from current map (WITHOUT growing mapfile)
305 * 305 *
306 * map new extension with unmapped part of the last partial 306 * map new extension with unmapped part of the last partial
307 * dmap page, if applicable, and extra page(s) allocated 307 * dmap page, if applicable, and extra page(s) allocated
@@ -341,8 +341,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
341 XSize -= nblocks; 341 XSize -= nblocks;
342 342
343 /* 343 /*
344 * grow map file to cover remaining extension 344 * grow map file to cover remaining extension
345 * and/or one extra dmap page for next extendfs(); 345 * and/or one extra dmap page for next extendfs();
346 * 346 *
347 * allocate new map pages and its backing blocks, and 347 * allocate new map pages and its backing blocks, and
348 * update map file xtree 348 * update map file xtree
@@ -422,8 +422,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
422 dbFinalizeBmap(ipbmap); 422 dbFinalizeBmap(ipbmap);
423 423
424 /* 424 /*
425 * update inode allocation map 425 * update inode allocation map
426 * --------------------------- 426 * ---------------------------
427 * 427 *
428 * move iag lists from old to new iag; 428 * move iag lists from old to new iag;
429 * agstart field is not updated for logredo() to reconstruct 429 * agstart field is not updated for logredo() to reconstruct
@@ -442,8 +442,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
442 } 442 }
443 443
444 /* 444 /*
445 * finalize 445 * finalize
446 * -------- 446 * --------
447 * 447 *
448 * extension is committed when on-disk super block is 448 * extension is committed when on-disk super block is
449 * updated with new descriptors: logredo will recover 449 * updated with new descriptors: logredo will recover
@@ -480,7 +480,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
480 diFreeSpecial(ipbmap2); 480 diFreeSpecial(ipbmap2);
481 481
482 /* 482 /*
483 * update superblock 483 * update superblock
484 */ 484 */
485 if ((rc = readSuper(sb, &bh))) 485 if ((rc = readSuper(sb, &bh)))
486 goto error_out; 486 goto error_out;
@@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
530 530
531 resume: 531 resume:
532 /* 532 /*
533 * resume file system transactions 533 * resume file system transactions
534 */ 534 */
535 txResume(sb); 535 txResume(sb);
536 536
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index b753ba216450..b2375f0774b7 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -63,9 +63,9 @@
63 * 63 *
64 * On-disk: 64 * On-disk:
65 * 65 *
66 * FEALISTs are stored on disk using blocks allocated by dbAlloc() and 66 * FEALISTs are stored on disk using blocks allocated by dbAlloc() and
67 * written directly. An EA list may be in-lined in the inode if there is 67 * written directly. An EA list may be in-lined in the inode if there is
68 * sufficient room available. 68 * sufficient room available.
69 */ 69 */
70 70
71struct ea_buffer { 71struct ea_buffer {
@@ -590,7 +590,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
590 size_check: 590 size_check:
591 if (EALIST_SIZE(ea_buf->xattr) != ea_size) { 591 if (EALIST_SIZE(ea_buf->xattr) != ea_size) {
592 printk(KERN_ERR "ea_get: invalid extended attribute\n"); 592 printk(KERN_ERR "ea_get: invalid extended attribute\n");
593 dump_mem("xattr", ea_buf->xattr, ea_size); 593 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
594 ea_buf->xattr, ea_size, 1);
594 ea_release(inode, ea_buf); 595 ea_release(inode, ea_buf);
595 rc = -EIO; 596 rc = -EIO;
596 goto clean_up; 597 goto clean_up;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 96070bff93fc..572601e98dcd 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -44,9 +44,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
44 */ 44 */
45static struct nlm_host * 45static struct nlm_host *
46nlm_lookup_host(int server, const struct sockaddr_in *sin, 46nlm_lookup_host(int server, const struct sockaddr_in *sin,
47 int proto, int version, 47 int proto, int version, const char *hostname,
48 const char *hostname, 48 int hostname_len, const struct sockaddr_in *ssin)
49 int hostname_len)
50{ 49{
51 struct hlist_head *chain; 50 struct hlist_head *chain;
52 struct hlist_node *pos; 51 struct hlist_node *pos;
@@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
54 struct nsm_handle *nsm = NULL; 53 struct nsm_handle *nsm = NULL;
55 int hash; 54 int hash;
56 55
57 dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n", 56 dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
57 ", p=%d, v=%d, my role=%s, name=%.*s)\n",
58 NIPQUAD(ssin->sin_addr.s_addr),
58 NIPQUAD(sin->sin_addr.s_addr), proto, version, 59 NIPQUAD(sin->sin_addr.s_addr), proto, version,
59 server? "server" : "client", 60 server? "server" : "client",
60 hostname_len, 61 hostname_len,
@@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
91 continue; 92 continue;
92 if (host->h_server != server) 93 if (host->h_server != server)
93 continue; 94 continue;
95 if (!nlm_cmp_addr(&host->h_saddr, ssin))
96 continue;
94 97
95 /* Move to head of hash chain. */ 98 /* Move to head of hash chain. */
96 hlist_del(&host->h_hash); 99 hlist_del(&host->h_hash);
@@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
118 host->h_name = nsm->sm_name; 121 host->h_name = nsm->sm_name;
119 host->h_addr = *sin; 122 host->h_addr = *sin;
120 host->h_addr.sin_port = 0; /* ouch! */ 123 host->h_addr.sin_port = 0; /* ouch! */
124 host->h_saddr = *ssin;
121 host->h_version = version; 125 host->h_version = version;
122 host->h_proto = proto; 126 host->h_proto = proto;
123 host->h_rpcclnt = NULL; 127 host->h_rpcclnt = NULL;
@@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host)
161 */ 165 */
162 nsm_unmonitor(host); 166 nsm_unmonitor(host);
163 167
164 if ((clnt = host->h_rpcclnt) != NULL) { 168 clnt = host->h_rpcclnt;
165 if (atomic_read(&clnt->cl_users)) { 169 if (clnt != NULL)
166 printk(KERN_WARNING 170 rpc_shutdown_client(clnt);
167 "lockd: active RPC handle\n");
168 clnt->cl_dead = 1;
169 } else {
170 rpc_destroy_client(host->h_rpcclnt);
171 }
172 }
173 kfree(host); 171 kfree(host);
174} 172}
175 173
@@ -180,8 +178,10 @@ struct nlm_host *
180nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, 178nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
181 const char *hostname, int hostname_len) 179 const char *hostname, int hostname_len)
182{ 180{
181 struct sockaddr_in ssin = {0};
182
183 return nlm_lookup_host(0, sin, proto, version, 183 return nlm_lookup_host(0, sin, proto, version,
184 hostname, hostname_len); 184 hostname, hostname_len, &ssin);
185} 185}
186 186
187/* 187/*
@@ -191,9 +191,12 @@ struct nlm_host *
191nlmsvc_lookup_host(struct svc_rqst *rqstp, 191nlmsvc_lookup_host(struct svc_rqst *rqstp,
192 const char *hostname, int hostname_len) 192 const char *hostname, int hostname_len)
193{ 193{
194 struct sockaddr_in ssin = {0};
195
196 ssin.sin_addr = rqstp->rq_daddr.addr;
194 return nlm_lookup_host(1, svc_addr_in(rqstp), 197 return nlm_lookup_host(1, svc_addr_in(rqstp),
195 rqstp->rq_prot, rqstp->rq_vers, 198 rqstp->rq_prot, rqstp->rq_vers,
196 hostname, hostname_len); 199 hostname, hostname_len, &ssin);
197} 200}
198 201
199/* 202/*
@@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host)
204{ 207{
205 struct rpc_clnt *clnt; 208 struct rpc_clnt *clnt;
206 209
207 dprintk("lockd: nlm_bind_host(%08x)\n", 210 dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n",
208 (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); 211 NIPQUAD(host->h_saddr.sin_addr),
212 NIPQUAD(host->h_addr.sin_addr));
209 213
210 /* Lock host handle */ 214 /* Lock host handle */
211 mutex_lock(&host->h_mutex); 215 mutex_lock(&host->h_mutex);
@@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host)
232 .protocol = host->h_proto, 236 .protocol = host->h_proto,
233 .address = (struct sockaddr *)&host->h_addr, 237 .address = (struct sockaddr *)&host->h_addr,
234 .addrsize = sizeof(host->h_addr), 238 .addrsize = sizeof(host->h_addr),
239 .saddress = (struct sockaddr *)&host->h_saddr,
235 .timeout = &timeparms, 240 .timeout = &timeparms,
236 .servername = host->h_name, 241 .servername = host->h_name,
237 .program = &nlm_program, 242 .program = &nlm_program,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 2102e2d0134d..3353ed8421a7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
61 status); 61 status);
62 else 62 else
63 status = 0; 63 status = 0;
64 rpc_shutdown_client(clnt);
64 out: 65 out:
65 return status; 66 return status;
66} 67}
@@ -138,7 +139,6 @@ nsm_create(void)
138 .program = &nsm_program, 139 .program = &nsm_program,
139 .version = SM_VERSION, 140 .version = SM_VERSION,
140 .authflavor = RPC_AUTH_NULL, 141 .authflavor = RPC_AUTH_NULL,
141 .flags = (RPC_CLNT_CREATE_ONESHOT),
142 }; 142 };
143 143
144 return rpc_create(&args); 144 return rpc_create(&args);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 126b1bf02c0e..26809325469c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp)
123 /* Process request with signals blocked, but allow SIGKILL. */ 123 /* Process request with signals blocked, but allow SIGKILL. */
124 allow_signal(SIGKILL); 124 allow_signal(SIGKILL);
125 125
126 /* kick rpciod */
127 rpciod_up();
128
129 dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); 126 dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
130 127
131 if (!nlm_timeout) 128 if (!nlm_timeout)
@@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp)
202 /* Exit the RPC thread */ 199 /* Exit the RPC thread */
203 svc_exit_thread(rqstp); 200 svc_exit_thread(rqstp);
204 201
205 /* release rpciod */
206 rpciod_down();
207
208 /* Release module */ 202 /* Release module */
209 unlock_kernel(); 203 unlock_kernel();
210 module_put_and_exit(0); 204 module_put_and_exit(0);
diff --git a/fs/minix/file.c b/fs/minix/file.c
index f92baa1d7570..17765f697e50 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = {
23 .aio_write = generic_file_aio_write, 23 .aio_write = generic_file_aio_write,
24 .mmap = generic_file_mmap, 24 .mmap = generic_file_mmap,
25 .fsync = minix_sync_file, 25 .fsync = minix_sync_file,
26 .sendfile = generic_file_sendfile, 26 .splice_read = generic_file_splice_read,
27}; 27};
28 28
29const struct inode_operations minix_file_inode_operations = { 29const struct inode_operations minix_file_inode_operations = {
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index f4580b44eef4..b55cb236cf74 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
8 pagelist.o proc.o read.o symlink.o unlink.o \ 8 pagelist.o proc.o read.o symlink.o unlink.o \
9 write.o namespace.o 9 write.o namespace.o mount_clnt.o
10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o 10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
13nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ 13nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 881fa4900923..ccb455053ee4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
102 int nfsversion) 102 int nfsversion)
103{ 103{
104 struct nfs_client *clp; 104 struct nfs_client *clp;
105 int error;
106 105
107 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) 106 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
108 goto error_0; 107 goto error_0;
109 108
110 error = rpciod_up();
111 if (error < 0) {
112 dprintk("%s: couldn't start rpciod! Error = %d\n",
113 __FUNCTION__, error);
114 goto error_1;
115 }
116 __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
117
118 if (nfsversion == 4) { 109 if (nfsversion == 4) {
119 if (nfs_callback_up() < 0) 110 if (nfs_callback_up() < 0)
120 goto error_2; 111 goto error_2;
@@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
139#ifdef CONFIG_NFS_V4 130#ifdef CONFIG_NFS_V4
140 init_rwsem(&clp->cl_sem); 131 init_rwsem(&clp->cl_sem);
141 INIT_LIST_HEAD(&clp->cl_delegations); 132 INIT_LIST_HEAD(&clp->cl_delegations);
142 INIT_LIST_HEAD(&clp->cl_state_owners);
143 INIT_LIST_HEAD(&clp->cl_unused);
144 spin_lock_init(&clp->cl_lock); 133 spin_lock_init(&clp->cl_lock);
145 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); 134 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
146 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); 135 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -154,9 +143,6 @@ error_3:
154 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) 143 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
155 nfs_callback_down(); 144 nfs_callback_down();
156error_2: 145error_2:
157 rpciod_down();
158 __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
159error_1:
160 kfree(clp); 146 kfree(clp);
161error_0: 147error_0:
162 return NULL; 148 return NULL;
@@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
167#ifdef CONFIG_NFS_V4 153#ifdef CONFIG_NFS_V4
168 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) 154 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
169 nfs4_kill_renewd(clp); 155 nfs4_kill_renewd(clp);
170 while (!list_empty(&clp->cl_unused)) { 156 BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
171 struct nfs4_state_owner *sp;
172
173 sp = list_entry(clp->cl_unused.next,
174 struct nfs4_state_owner,
175 so_list);
176 list_del(&sp->so_list);
177 kfree(sp);
178 }
179 BUG_ON(!list_empty(&clp->cl_state_owners));
180 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) 157 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
181 nfs_idmap_delete(clp); 158 nfs_idmap_delete(clp);
182#endif 159#endif
@@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp)
198 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) 175 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
199 nfs_callback_down(); 176 nfs_callback_down();
200 177
201 if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
202 rpciod_down();
203
204 kfree(clp->cl_hostname); 178 kfree(clp->cl_hostname);
205 kfree(clp); 179 kfree(clp);
206 180
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f37d1bea83f..20ac403469a0 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
27 kfree(delegation); 27 kfree(delegation);
28} 28}
29 29
30static void nfs_free_delegation_callback(struct rcu_head *head)
31{
32 struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
33
34 nfs_free_delegation(delegation);
35}
36
30static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) 37static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
31{ 38{
32 struct inode *inode = state->inode; 39 struct inode *inode = state->inode;
@@ -57,7 +64,7 @@ out_err:
57 return status; 64 return status;
58} 65}
59 66
60static void nfs_delegation_claim_opens(struct inode *inode) 67static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
61{ 68{
62 struct nfs_inode *nfsi = NFS_I(inode); 69 struct nfs_inode *nfsi = NFS_I(inode);
63 struct nfs_open_context *ctx; 70 struct nfs_open_context *ctx;
@@ -72,9 +79,11 @@ again:
72 continue; 79 continue;
73 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) 80 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
74 continue; 81 continue;
82 if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
83 continue;
75 get_nfs_open_context(ctx); 84 get_nfs_open_context(ctx);
76 spin_unlock(&inode->i_lock); 85 spin_unlock(&inode->i_lock);
77 err = nfs4_open_delegation_recall(ctx->dentry, state); 86 err = nfs4_open_delegation_recall(ctx, state, stateid);
78 if (err >= 0) 87 if (err >= 0)
79 err = nfs_delegation_claim_locks(ctx, state); 88 err = nfs_delegation_claim_locks(ctx, state);
80 put_nfs_open_context(ctx); 89 put_nfs_open_context(ctx);
@@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
115 struct nfs_delegation *delegation; 124 struct nfs_delegation *delegation;
116 int status = 0; 125 int status = 0;
117 126
118 /* Ensure we first revalidate the attributes and page cache! */
119 if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
120 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
121
122 delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); 127 delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
123 if (delegation == NULL) 128 if (delegation == NULL)
124 return -ENOMEM; 129 return -ENOMEM;
@@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
131 delegation->inode = inode; 136 delegation->inode = inode;
132 137
133 spin_lock(&clp->cl_lock); 138 spin_lock(&clp->cl_lock);
134 if (nfsi->delegation == NULL) { 139 if (rcu_dereference(nfsi->delegation) == NULL) {
135 list_add(&delegation->super_list, &clp->cl_delegations); 140 list_add_rcu(&delegation->super_list, &clp->cl_delegations);
136 nfsi->delegation = delegation;
137 nfsi->delegation_state = delegation->type; 141 nfsi->delegation_state = delegation->type;
142 rcu_assign_pointer(nfsi->delegation, delegation);
138 delegation = NULL; 143 delegation = NULL;
139 } else { 144 } else {
140 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, 145 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
@@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
145 status = -EIO; 150 status = -EIO;
146 } 151 }
147 } 152 }
153
154 /* Ensure we revalidate the attributes and page cache! */
155 spin_lock(&inode->i_lock);
156 nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
157 spin_unlock(&inode->i_lock);
158
148 spin_unlock(&clp->cl_lock); 159 spin_unlock(&clp->cl_lock);
149 kfree(delegation); 160 kfree(delegation);
150 return status; 161 return status;
@@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
155 int res = 0; 166 int res = 0;
156 167
157 res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); 168 res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
158 nfs_free_delegation(delegation); 169 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
159 return res; 170 return res;
160} 171}
161 172
@@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode)
170/* 181/*
171 * Basic procedure for returning a delegation to the server 182 * Basic procedure for returning a delegation to the server
172 */ 183 */
173int __nfs_inode_return_delegation(struct inode *inode) 184static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
174{ 185{
175 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 186 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
176 struct nfs_inode *nfsi = NFS_I(inode); 187 struct nfs_inode *nfsi = NFS_I(inode);
177 struct nfs_delegation *delegation;
178 int res = 0;
179 188
180 nfs_msync_inode(inode); 189 nfs_msync_inode(inode);
181 down_read(&clp->cl_sem); 190 down_read(&clp->cl_sem);
182 /* Guard against new delegated open calls */ 191 /* Guard against new delegated open calls */
183 down_write(&nfsi->rwsem); 192 down_write(&nfsi->rwsem);
184 spin_lock(&clp->cl_lock); 193 nfs_delegation_claim_opens(inode, &delegation->stateid);
185 delegation = nfsi->delegation;
186 if (delegation != NULL) {
187 list_del_init(&delegation->super_list);
188 nfsi->delegation = NULL;
189 nfsi->delegation_state = 0;
190 }
191 spin_unlock(&clp->cl_lock);
192 nfs_delegation_claim_opens(inode);
193 up_write(&nfsi->rwsem); 194 up_write(&nfsi->rwsem);
194 up_read(&clp->cl_sem); 195 up_read(&clp->cl_sem);
195 nfs_msync_inode(inode); 196 nfs_msync_inode(inode);
196 197
197 if (delegation != NULL) 198 return nfs_do_return_delegation(inode, delegation);
198 res = nfs_do_return_delegation(inode, delegation); 199}
199 return res; 200
201static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
202{
203 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
204
205 if (delegation == NULL)
206 goto nomatch;
207 if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
208 sizeof(delegation->stateid.data)) != 0)
209 goto nomatch;
210 list_del_rcu(&delegation->super_list);
211 nfsi->delegation_state = 0;
212 rcu_assign_pointer(nfsi->delegation, NULL);
213 return delegation;
214nomatch:
215 return NULL;
216}
217
218int nfs_inode_return_delegation(struct inode *inode)
219{
220 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
221 struct nfs_inode *nfsi = NFS_I(inode);
222 struct nfs_delegation *delegation;
223 int err = 0;
224
225 if (rcu_dereference(nfsi->delegation) != NULL) {
226 spin_lock(&clp->cl_lock);
227 delegation = nfs_detach_delegation_locked(nfsi, NULL);
228 spin_unlock(&clp->cl_lock);
229 if (delegation != NULL)
230 err = __nfs_inode_return_delegation(inode, delegation);
231 }
232 return err;
200} 233}
201 234
202/* 235/*
@@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb)
211 if (clp == NULL) 244 if (clp == NULL)
212 return; 245 return;
213restart: 246restart:
214 spin_lock(&clp->cl_lock); 247 rcu_read_lock();
215 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 248 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
216 if (delegation->inode->i_sb != sb) 249 if (delegation->inode->i_sb != sb)
217 continue; 250 continue;
218 inode = igrab(delegation->inode); 251 inode = igrab(delegation->inode);
219 if (inode == NULL) 252 if (inode == NULL)
220 continue; 253 continue;
254 spin_lock(&clp->cl_lock);
255 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
221 spin_unlock(&clp->cl_lock); 256 spin_unlock(&clp->cl_lock);
222 nfs_inode_return_delegation(inode); 257 rcu_read_unlock();
258 if (delegation != NULL)
259 __nfs_inode_return_delegation(inode, delegation);
223 iput(inode); 260 iput(inode);
224 goto restart; 261 goto restart;
225 } 262 }
226 spin_unlock(&clp->cl_lock); 263 rcu_read_unlock();
227} 264}
228 265
229static int nfs_do_expire_all_delegations(void *ptr) 266static int nfs_do_expire_all_delegations(void *ptr)
@@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr)
234 271
235 allow_signal(SIGKILL); 272 allow_signal(SIGKILL);
236restart: 273restart:
237 spin_lock(&clp->cl_lock);
238 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0) 274 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
239 goto out; 275 goto out;
240 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) 276 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
241 goto out; 277 goto out;
242 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 278 rcu_read_lock();
279 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
243 inode = igrab(delegation->inode); 280 inode = igrab(delegation->inode);
244 if (inode == NULL) 281 if (inode == NULL)
245 continue; 282 continue;
283 spin_lock(&clp->cl_lock);
284 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
246 spin_unlock(&clp->cl_lock); 285 spin_unlock(&clp->cl_lock);
247 nfs_inode_return_delegation(inode); 286 rcu_read_unlock();
287 if (delegation)
288 __nfs_inode_return_delegation(inode, delegation);
248 iput(inode); 289 iput(inode);
249 goto restart; 290 goto restart;
250 } 291 }
292 rcu_read_unlock();
251out: 293out:
252 spin_unlock(&clp->cl_lock);
253 nfs_put_client(clp); 294 nfs_put_client(clp);
254 module_put_and_exit(0); 295 module_put_and_exit(0);
255} 296}
@@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
280 if (clp == NULL) 321 if (clp == NULL)
281 return; 322 return;
282restart: 323restart:
283 spin_lock(&clp->cl_lock); 324 rcu_read_lock();
284 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 325 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
285 inode = igrab(delegation->inode); 326 inode = igrab(delegation->inode);
286 if (inode == NULL) 327 if (inode == NULL)
287 continue; 328 continue;
329 spin_lock(&clp->cl_lock);
330 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
288 spin_unlock(&clp->cl_lock); 331 spin_unlock(&clp->cl_lock);
289 nfs_inode_return_delegation(inode); 332 rcu_read_unlock();
333 if (delegation != NULL)
334 __nfs_inode_return_delegation(inode, delegation);
290 iput(inode); 335 iput(inode);
291 goto restart; 336 goto restart;
292 } 337 }
293 spin_unlock(&clp->cl_lock); 338 rcu_read_unlock();
294} 339}
295 340
296struct recall_threadargs { 341struct recall_threadargs {
@@ -316,21 +361,14 @@ static int recall_thread(void *data)
316 down_read(&clp->cl_sem); 361 down_read(&clp->cl_sem);
317 down_write(&nfsi->rwsem); 362 down_write(&nfsi->rwsem);
318 spin_lock(&clp->cl_lock); 363 spin_lock(&clp->cl_lock);
319 delegation = nfsi->delegation; 364 delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
320 if (delegation != NULL && memcmp(delegation->stateid.data, 365 if (delegation != NULL)
321 args->stateid->data,
322 sizeof(delegation->stateid.data)) == 0) {
323 list_del_init(&delegation->super_list);
324 nfsi->delegation = NULL;
325 nfsi->delegation_state = 0;
326 args->result = 0; 366 args->result = 0;
327 } else { 367 else
328 delegation = NULL;
329 args->result = -ENOENT; 368 args->result = -ENOENT;
330 }
331 spin_unlock(&clp->cl_lock); 369 spin_unlock(&clp->cl_lock);
332 complete(&args->started); 370 complete(&args->started);
333 nfs_delegation_claim_opens(inode); 371 nfs_delegation_claim_opens(inode, args->stateid);
334 up_write(&nfsi->rwsem); 372 up_write(&nfsi->rwsem);
335 up_read(&clp->cl_sem); 373 up_read(&clp->cl_sem);
336 nfs_msync_inode(inode); 374 nfs_msync_inode(inode);
@@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
371{ 409{
372 struct nfs_delegation *delegation; 410 struct nfs_delegation *delegation;
373 struct inode *res = NULL; 411 struct inode *res = NULL;
374 spin_lock(&clp->cl_lock); 412 rcu_read_lock();
375 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 413 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
376 if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { 414 if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
377 res = igrab(delegation->inode); 415 res = igrab(delegation->inode);
378 break; 416 break;
379 } 417 }
380 } 418 }
381 spin_unlock(&clp->cl_lock); 419 rcu_read_unlock();
382 return res; 420 return res;
383} 421}
384 422
@@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
388void nfs_delegation_mark_reclaim(struct nfs_client *clp) 426void nfs_delegation_mark_reclaim(struct nfs_client *clp)
389{ 427{
390 struct nfs_delegation *delegation; 428 struct nfs_delegation *delegation;
391 spin_lock(&clp->cl_lock); 429 rcu_read_lock();
392 list_for_each_entry(delegation, &clp->cl_delegations, super_list) 430 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
393 delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; 431 delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
394 spin_unlock(&clp->cl_lock); 432 rcu_read_unlock();
395} 433}
396 434
397/* 435/*
@@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
399 */ 437 */
400void nfs_delegation_reap_unclaimed(struct nfs_client *clp) 438void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
401{ 439{
402 struct nfs_delegation *delegation, *n; 440 struct nfs_delegation *delegation;
403 LIST_HEAD(head); 441restart:
404 spin_lock(&clp->cl_lock); 442 rcu_read_lock();
405 list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) { 443 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
406 if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) 444 if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
407 continue; 445 continue;
408 list_move(&delegation->super_list, &head); 446 spin_lock(&clp->cl_lock);
409 NFS_I(delegation->inode)->delegation = NULL; 447 delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL);
410 NFS_I(delegation->inode)->delegation_state = 0; 448 spin_unlock(&clp->cl_lock);
411 } 449 rcu_read_unlock();
412 spin_unlock(&clp->cl_lock); 450 if (delegation != NULL)
413 while(!list_empty(&head)) { 451 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
414 delegation = list_entry(head.next, struct nfs_delegation, super_list); 452 goto restart;
415 list_del(&delegation->super_list);
416 nfs_free_delegation(delegation);
417 } 453 }
454 rcu_read_unlock();
418} 455}
419 456
420int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) 457int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
421{ 458{
422 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
423 struct nfs_inode *nfsi = NFS_I(inode); 459 struct nfs_inode *nfsi = NFS_I(inode);
424 struct nfs_delegation *delegation; 460 struct nfs_delegation *delegation;
425 int res = 0; 461 int ret = 0;
426 462
427 if (nfsi->delegation_state == 0) 463 rcu_read_lock();
428 return 0; 464 delegation = rcu_dereference(nfsi->delegation);
429 spin_lock(&clp->cl_lock);
430 delegation = nfsi->delegation;
431 if (delegation != NULL) { 465 if (delegation != NULL) {
432 memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); 466 memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
433 res = 1; 467 ret = 1;
434 } 468 }
435 spin_unlock(&clp->cl_lock); 469 rcu_read_unlock();
436 return res; 470 return ret;
437} 471}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2cfd4b24c7fe..5874ce7fdbae 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -22,11 +22,12 @@ struct nfs_delegation {
22 long flags; 22 long flags;
23 loff_t maxsize; 23 loff_t maxsize;
24 __u64 change_attr; 24 __u64 change_attr;
25 struct rcu_head rcu;
25}; 26};
26 27
27int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 28int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
28void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 29void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
29int __nfs_inode_return_delegation(struct inode *inode); 30int nfs_inode_return_delegation(struct inode *inode);
30int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); 31int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
31 32
32struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); 33struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
@@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
39 40
40/* NFSv4 delegation-related procedures */ 41/* NFSv4 delegation-related procedures */
41int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); 42int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
42int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); 43int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
43int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); 44int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
44int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); 45int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
45 46
46static inline int nfs_have_delegation(struct inode *inode, int flags) 47static inline int nfs_have_delegation(struct inode *inode, int flags)
47{ 48{
49 struct nfs_delegation *delegation;
50 int ret = 0;
51
48 flags &= FMODE_READ|FMODE_WRITE; 52 flags &= FMODE_READ|FMODE_WRITE;
49 smp_rmb(); 53 rcu_read_lock();
50 if ((NFS_I(inode)->delegation_state & flags) == flags) 54 delegation = rcu_dereference(NFS_I(inode)->delegation);
51 return 1; 55 if (delegation != NULL && (delegation->type & flags) == flags)
52 return 0; 56 ret = 1;
57 rcu_read_unlock();
58 return ret;
53} 59}
54 60
55static inline int nfs_inode_return_delegation(struct inode *inode)
56{
57 int err = 0;
58
59 if (NFS_I(inode)->delegation != NULL)
60 err = __nfs_inode_return_delegation(inode);
61 return err;
62}
63#else 61#else
64static inline int nfs_have_delegation(struct inode *inode, int flags) 62static inline int nfs_have_delegation(struct inode *inode, int flags)
65{ 63{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c27258b5d3e1..322141f4ab48 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
897 return (nd->intent.open.flags & O_EXCL) != 0; 897 return (nd->intent.open.flags & O_EXCL) != 0;
898} 898}
899 899
900static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, 900static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
901 struct nfs_fh *fh, struct nfs_fattr *fattr)
902{ 901{
903 struct nfs_server *server = NFS_SERVER(dir); 902 struct nfs_server *server = NFS_SERVER(dir);
904 903
905 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) 904 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
906 /* Revalidate fsid on root dir */ 905 /* Revalidate fsid using the parent directory */
907 return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); 906 return __nfs_revalidate_inode(server, dir);
908 return 0; 907 return 0;
909} 908}
910 909
@@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
946 res = ERR_PTR(error); 945 res = ERR_PTR(error);
947 goto out_unlock; 946 goto out_unlock;
948 } 947 }
949 error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); 948 error = nfs_reval_fsid(dir, &fattr);
950 if (error < 0) { 949 if (error < 0) {
951 res = ERR_PTR(error); 950 res = ERR_PTR(error);
952 goto out_unlock; 951 goto out_unlock;
@@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1244 attr.ia_mode = mode; 1243 attr.ia_mode = mode;
1245 attr.ia_valid = ATTR_MODE; 1244 attr.ia_valid = ATTR_MODE;
1246 1245
1247 if (nd && (nd->flags & LOOKUP_CREATE)) 1246 if ((nd->flags & LOOKUP_CREATE) != 0)
1248 open_flags = nd->intent.open.flags; 1247 open_flags = nd->intent.open.flags;
1249 1248
1250 lock_kernel(); 1249 lock_kernel();
@@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1535 1534
1536 lock_kernel(); 1535 lock_kernel();
1537 1536
1538 page = alloc_page(GFP_KERNEL); 1537 page = alloc_page(GFP_HIGHUSER);
1539 if (!page) { 1538 if (!page) {
1540 unlock_kernel(); 1539 unlock_kernel();
1541 return -ENOMEM; 1540 return -ENOMEM;
@@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
1744 struct nfs_inode *nfsi; 1743 struct nfs_inode *nfsi;
1745 struct nfs_access_entry *cache; 1744 struct nfs_access_entry *cache;
1746 1745
1747 spin_lock(&nfs_access_lru_lock);
1748restart: 1746restart:
1747 spin_lock(&nfs_access_lru_lock);
1749 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { 1748 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
1750 struct inode *inode; 1749 struct inode *inode;
1751 1750
@@ -1770,6 +1769,7 @@ remove_lru_entry:
1770 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); 1769 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
1771 } 1770 }
1772 spin_unlock(&inode->i_lock); 1771 spin_unlock(&inode->i_lock);
1772 spin_unlock(&nfs_access_lru_lock);
1773 iput(inode); 1773 iput(inode);
1774 goto restart; 1774 goto restart;
1775 } 1775 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 00eee87510fe..a5c82b6f3b45 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
266static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) 266static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
267{ 267{
268 struct nfs_open_context *ctx = dreq->ctx; 268 struct nfs_open_context *ctx = dreq->ctx;
269 struct inode *inode = ctx->dentry->d_inode; 269 struct inode *inode = ctx->path.dentry->d_inode;
270 size_t rsize = NFS_SERVER(inode)->rsize; 270 size_t rsize = NFS_SERVER(inode)->rsize;
271 unsigned int pgbase; 271 unsigned int pgbase;
272 int result; 272 int result;
@@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
295 break; 295 break;
296 } 296 }
297 if ((unsigned)result < data->npages) { 297 if ((unsigned)result < data->npages) {
298 nfs_direct_release_pages(data->pagevec, result); 298 bytes = result * PAGE_SIZE;
299 nfs_readdata_release(data); 299 if (bytes <= pgbase) {
300 break; 300 nfs_direct_release_pages(data->pagevec, result);
301 nfs_readdata_release(data);
302 break;
303 }
304 bytes -= pgbase;
305 data->npages = result;
301 } 306 }
302 307
303 get_dreq(dreq); 308 get_dreq(dreq);
@@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
601static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) 606static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
602{ 607{
603 struct nfs_open_context *ctx = dreq->ctx; 608 struct nfs_open_context *ctx = dreq->ctx;
604 struct inode *inode = ctx->dentry->d_inode; 609 struct inode *inode = ctx->path.dentry->d_inode;
605 size_t wsize = NFS_SERVER(inode)->wsize; 610 size_t wsize = NFS_SERVER(inode)->wsize;
606 unsigned int pgbase; 611 unsigned int pgbase;
607 int result; 612 int result;
@@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
630 break; 635 break;
631 } 636 }
632 if ((unsigned)result < data->npages) { 637 if ((unsigned)result < data->npages) {
633 nfs_direct_release_pages(data->pagevec, result); 638 bytes = result * PAGE_SIZE;
634 nfs_writedata_release(data); 639 if (bytes <= pgbase) {
635 break; 640 nfs_direct_release_pages(data->pagevec, result);
641 nfs_writedata_release(data);
642 break;
643 }
644 bytes -= pgbase;
645 data->npages = result;
636 } 646 }
637 647
638 get_dreq(dreq); 648 get_dreq(dreq);
@@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
763 (unsigned long) count, (long long) pos); 773 (unsigned long) count, (long long) pos);
764 774
765 if (nr_segs != 1) 775 if (nr_segs != 1)
766 return -EINVAL;
767
768 if (count < 0)
769 goto out; 776 goto out;
777
770 retval = -EFAULT; 778 retval = -EFAULT;
771 if (!access_ok(VERIFY_WRITE, buf, count)) 779 if (!access_ok(VERIFY_WRITE, buf, count))
772 goto out; 780 goto out;
@@ -814,7 +822,7 @@ out:
814ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, 822ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
815 unsigned long nr_segs, loff_t pos) 823 unsigned long nr_segs, loff_t pos)
816{ 824{
817 ssize_t retval; 825 ssize_t retval = -EINVAL;
818 struct file *file = iocb->ki_filp; 826 struct file *file = iocb->ki_filp;
819 struct address_space *mapping = file->f_mapping; 827 struct address_space *mapping = file->f_mapping;
820 /* XXX: temporary */ 828 /* XXX: temporary */
@@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
827 (unsigned long) count, (long long) pos); 835 (unsigned long) count, (long long) pos);
828 836
829 if (nr_segs != 1) 837 if (nr_segs != 1)
830 return -EINVAL; 838 goto out;
831 839
832 retval = generic_write_checks(file, &pos, &count, 0); 840 retval = generic_write_checks(file, &pos, &count, 0);
833 if (retval) 841 if (retval)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9eb8eb4e4a08..8689b736fdd9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -41,7 +41,9 @@ static int nfs_file_open(struct inode *, struct file *);
41static int nfs_file_release(struct inode *, struct file *); 41static int nfs_file_release(struct inode *, struct file *);
42static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); 42static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
43static int nfs_file_mmap(struct file *, struct vm_area_struct *); 43static int nfs_file_mmap(struct file *, struct vm_area_struct *);
44static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); 44static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
45 struct pipe_inode_info *pipe,
46 size_t count, unsigned int flags);
45static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, 47static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
46 unsigned long nr_segs, loff_t pos); 48 unsigned long nr_segs, loff_t pos);
47static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, 49static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
@@ -65,7 +67,7 @@ const struct file_operations nfs_file_operations = {
65 .fsync = nfs_fsync, 67 .fsync = nfs_fsync,
66 .lock = nfs_lock, 68 .lock = nfs_lock,
67 .flock = nfs_flock, 69 .flock = nfs_flock,
68 .sendfile = nfs_file_sendfile, 70 .splice_read = nfs_file_splice_read,
69 .check_flags = nfs_check_flags, 71 .check_flags = nfs_check_flags,
70}; 72};
71 73
@@ -224,20 +226,21 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
224} 226}
225 227
226static ssize_t 228static ssize_t
227nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, 229nfs_file_splice_read(struct file *filp, loff_t *ppos,
228 read_actor_t actor, void *target) 230 struct pipe_inode_info *pipe, size_t count,
231 unsigned int flags)
229{ 232{
230 struct dentry *dentry = filp->f_path.dentry; 233 struct dentry *dentry = filp->f_path.dentry;
231 struct inode *inode = dentry->d_inode; 234 struct inode *inode = dentry->d_inode;
232 ssize_t res; 235 ssize_t res;
233 236
234 dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n", 237 dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
235 dentry->d_parent->d_name.name, dentry->d_name.name, 238 dentry->d_parent->d_name.name, dentry->d_name.name,
236 (unsigned long) count, (unsigned long long) *ppos); 239 (unsigned long) count, (unsigned long long) *ppos);
237 240
238 res = nfs_revalidate_mapping(inode, filp->f_mapping); 241 res = nfs_revalidate_mapping(inode, filp->f_mapping);
239 if (!res) 242 if (!res)
240 res = generic_file_sendfile(filp, ppos, count, actor, target); 243 res = generic_file_splice_read(filp, ppos, pipe, count, flags);
241 return res; 244 return res;
242} 245}
243 246
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd9f5a836592..3d9fccf4ef93 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
461 461
462 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 462 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
463 if (ctx != NULL) { 463 if (ctx != NULL) {
464 atomic_set(&ctx->count, 1); 464 ctx->path.dentry = dget(dentry);
465 ctx->dentry = dget(dentry); 465 ctx->path.mnt = mntget(mnt);
466 ctx->vfsmnt = mntget(mnt);
467 ctx->cred = get_rpccred(cred); 466 ctx->cred = get_rpccred(cred);
468 ctx->state = NULL; 467 ctx->state = NULL;
469 ctx->lockowner = current->files; 468 ctx->lockowner = current->files;
470 ctx->error = 0; 469 ctx->error = 0;
471 ctx->dir_cookie = 0; 470 ctx->dir_cookie = 0;
471 kref_init(&ctx->kref);
472 } 472 }
473 return ctx; 473 return ctx;
474} 474}
@@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
476struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) 476struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
477{ 477{
478 if (ctx != NULL) 478 if (ctx != NULL)
479 atomic_inc(&ctx->count); 479 kref_get(&ctx->kref);
480 return ctx; 480 return ctx;
481} 481}
482 482
483void put_nfs_open_context(struct nfs_open_context *ctx) 483static void nfs_free_open_context(struct kref *kref)
484{ 484{
485 if (atomic_dec_and_test(&ctx->count)) { 485 struct nfs_open_context *ctx = container_of(kref,
486 if (!list_empty(&ctx->list)) { 486 struct nfs_open_context, kref);
487 struct inode *inode = ctx->dentry->d_inode; 487
488 spin_lock(&inode->i_lock); 488 if (!list_empty(&ctx->list)) {
489 list_del(&ctx->list); 489 struct inode *inode = ctx->path.dentry->d_inode;
490 spin_unlock(&inode->i_lock); 490 spin_lock(&inode->i_lock);
491 } 491 list_del(&ctx->list);
492 if (ctx->state != NULL) 492 spin_unlock(&inode->i_lock);
493 nfs4_close_state(ctx->state, ctx->mode);
494 if (ctx->cred != NULL)
495 put_rpccred(ctx->cred);
496 dput(ctx->dentry);
497 mntput(ctx->vfsmnt);
498 kfree(ctx);
499 } 493 }
494 if (ctx->state != NULL)
495 nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
496 if (ctx->cred != NULL)
497 put_rpccred(ctx->cred);
498 dput(ctx->path.dentry);
499 mntput(ctx->path.mnt);
500 kfree(ctx);
501}
502
503void put_nfs_open_context(struct nfs_open_context *ctx)
504{
505 kref_put(&ctx->kref, nfs_free_open_context);
500} 506}
501 507
502/* 508/*
@@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
961 goto out_changed; 967 goto out_changed;
962 968
963 server = NFS_SERVER(inode); 969 server = NFS_SERVER(inode);
964 /* Update the fsid if and only if this is the root directory */ 970 /* Update the fsid? */
965 if (inode == inode->i_sb->s_root->d_inode 971 if (S_ISDIR(inode->i_mode)
966 && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) 972 && !nfs_fsid_equal(&server->fsid, &fattr->fsid))
967 server->fsid = fattr->fsid; 973 server->fsid = fattr->fsid;
968 974
@@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1066 invalid &= ~NFS_INO_INVALID_DATA; 1072 invalid &= ~NFS_INO_INVALID_DATA;
1067 if (data_stable) 1073 if (data_stable)
1068 invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); 1074 invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
1069 if (!nfs_have_delegation(inode, FMODE_READ)) 1075 if (!nfs_have_delegation(inode, FMODE_READ) ||
1076 (nfsi->cache_validity & NFS_INO_REVAL_FORCED))
1070 nfsi->cache_validity |= invalid; 1077 nfsi->cache_validity |= invalid;
1078 nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
1071 1079
1072 return 0; 1080 return 0;
1073 out_changed: 1081 out_changed:
@@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1103 */ 1111 */
1104void nfs4_clear_inode(struct inode *inode) 1112void nfs4_clear_inode(struct inode *inode)
1105{ 1113{
1106 struct nfs_inode *nfsi = NFS_I(inode);
1107
1108 /* If we are holding a delegation, return it! */ 1114 /* If we are holding a delegation, return it! */
1109 nfs_inode_return_delegation(inode); 1115 nfs_inode_return_delegation(inode);
1110 /* First call standard NFS clear_inode() code */ 1116 /* First call standard NFS clear_inode() code */
1111 nfs_clear_inode(inode); 1117 nfs_clear_inode(inode);
1112 /* Now clear out any remaining state */
1113 while (!list_empty(&nfsi->open_states)) {
1114 struct nfs4_state *state;
1115
1116 state = list_entry(nfsi->open_states.next,
1117 struct nfs4_state,
1118 inode_states);
1119 dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
1120 __FUNCTION__,
1121 inode->i_sb->s_id,
1122 (long long)NFS_FILEID(inode),
1123 state);
1124 BUG_ON(atomic_read(&state->count) != 1);
1125 nfs4_close_state(state, state->state);
1126 }
1127} 1118}
1128#endif 1119#endif
1129 1120
@@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
1165 struct nfs_inode *nfsi = (struct nfs_inode *) foo; 1156 struct nfs_inode *nfsi = (struct nfs_inode *) foo;
1166 1157
1167 inode_init_once(&nfsi->vfs_inode); 1158 inode_init_once(&nfsi->vfs_inode);
1168 spin_lock_init(&nfsi->req_lock);
1169 INIT_LIST_HEAD(&nfsi->dirty);
1170 INIT_LIST_HEAD(&nfsi->commit);
1171 INIT_LIST_HEAD(&nfsi->open_files); 1159 INIT_LIST_HEAD(&nfsi->open_files);
1172 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1160 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1173 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1161 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1174 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1162 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1175 atomic_set(&nfsi->data_updates, 0); 1163 atomic_set(&nfsi->data_updates, 0);
1176 nfsi->ndirty = 0;
1177 nfsi->ncommit = 0; 1164 nfsi->ncommit = 0;
1178 nfsi->npages = 0; 1165 nfsi->npages = 0;
1179 nfs4_init_once(nfsi); 1166 nfs4_init_once(nfsi);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ad2b40db1e65..76cf55d57101 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
183/* 183/*
184 * Calculate the number of 512byte blocks used. 184 * Calculate the number of 512byte blocks used.
185 */ 185 */
186static inline unsigned long nfs_calc_block_size(u64 tsize) 186static inline blkcnt_t nfs_calc_block_size(u64 tsize)
187{ 187{
188 loff_t used = (tsize + 511) >> 9; 188 blkcnt_t used = (tsize + 511) >> 9;
189 return (used > ULONG_MAX) ? ULONG_MAX : used; 189 return (used > ULONG_MAX) ? ULONG_MAX : used;
190} 190}
191 191
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca5a266a3140..8afd9f7e7a97 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -1,7 +1,5 @@
1/* 1/*
2 * linux/fs/nfs/mount_clnt.c 2 * In-kernel MOUNT protocol client
3 *
4 * MOUNT client to support NFSroot.
5 * 3 *
6 * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
@@ -18,33 +16,31 @@
18#include <linux/nfs_fs.h> 16#include <linux/nfs_fs.h>
19 17
20#ifdef RPC_DEBUG 18#ifdef RPC_DEBUG
21# define NFSDBG_FACILITY NFSDBG_ROOT 19# define NFSDBG_FACILITY NFSDBG_MOUNT
22#endif 20#endif
23 21
24/*
25#define MOUNT_PROGRAM 100005
26#define MOUNT_VERSION 1
27#define MOUNT_MNT 1
28#define MOUNT_UMNT 3
29 */
30
31static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *,
32 int, int);
33static struct rpc_program mnt_program; 22static struct rpc_program mnt_program;
34 23
35struct mnt_fhstatus { 24struct mnt_fhstatus {
36 unsigned int status; 25 u32 status;
37 struct nfs_fh * fh; 26 struct nfs_fh *fh;
38}; 27};
39 28
40/* 29/**
41 * Obtain an NFS file handle for the given host and path 30 * nfs_mount - Obtain an NFS file handle for the given host and path
31 * @addr: pointer to server's address
32 * @len: size of server's address
33 * @hostname: name of server host, or NULL
34 * @path: pointer to string containing export path to mount
35 * @version: mount version to use for this request
36 * @protocol: transport protocol to use for thie request
37 * @fh: pointer to location to place returned file handle
38 *
39 * Uses default timeout parameters specified by underlying transport.
42 */ 40 */
43int 41int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
44nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, 42 int version, int protocol, struct nfs_fh *fh)
45 int version, int protocol)
46{ 43{
47 struct rpc_clnt *mnt_clnt;
48 struct mnt_fhstatus result = { 44 struct mnt_fhstatus result = {
49 .fh = fh 45 .fh = fh
50 }; 46 };
@@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
52 .rpc_argp = path, 48 .rpc_argp = path,
53 .rpc_resp = &result, 49 .rpc_resp = &result,
54 }; 50 };
55 char hostname[32]; 51 struct rpc_create_args args = {
52 .protocol = protocol,
53 .address = addr,
54 .addrsize = len,
55 .servername = hostname,
56 .program = &mnt_program,
57 .version = version,
58 .authflavor = RPC_AUTH_UNIX,
59 .flags = RPC_CLNT_CREATE_INTR,
60 };
61 struct rpc_clnt *mnt_clnt;
56 int status; 62 int status;
57 63
58 dprintk("NFS: nfs_mount(%08x:%s)\n", 64 dprintk("NFS: sending MNT request for %s:%s\n",
59 (unsigned)ntohl(addr->sin_addr.s_addr), path); 65 (hostname ? hostname : "server"), path);
60 66
61 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr)); 67 mnt_clnt = rpc_create(&args);
62 mnt_clnt = mnt_create(hostname, addr, version, protocol);
63 if (IS_ERR(mnt_clnt)) 68 if (IS_ERR(mnt_clnt))
64 return PTR_ERR(mnt_clnt); 69 goto out_clnt_err;
65 70
66 if (version == NFS_MNT3_VERSION) 71 if (version == NFS_MNT3_VERSION)
67 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; 72 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
@@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
69 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; 74 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
70 75
71 status = rpc_call_sync(mnt_clnt, &msg, 0); 76 status = rpc_call_sync(mnt_clnt, &msg, 0);
72 return status < 0? status : (result.status? -EACCES : 0); 77 rpc_shutdown_client(mnt_clnt);
73}
74 78
75static struct rpc_clnt * 79 if (status < 0)
76mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, 80 goto out_call_err;
77 int protocol) 81 if (result.status != 0)
78{ 82 goto out_mnt_err;
79 struct rpc_create_args args = { 83
80 .protocol = protocol, 84 dprintk("NFS: MNT request succeeded\n");
81 .address = (struct sockaddr *)srvaddr, 85 status = 0;
82 .addrsize = sizeof(*srvaddr), 86
83 .servername = hostname, 87out:
84 .program = &mnt_program, 88 return status;
85 .version = version, 89
86 .authflavor = RPC_AUTH_UNIX, 90out_clnt_err:
87 .flags = (RPC_CLNT_CREATE_ONESHOT | 91 status = PTR_ERR(mnt_clnt);
88 RPC_CLNT_CREATE_INTR), 92 dprintk("NFS: failed to create RPC client, status=%d\n", status);
89 }; 93 goto out;
94
95out_call_err:
96 dprintk("NFS: failed to start MNT request, status=%d\n", status);
97 goto out;
90 98
91 return rpc_create(&args); 99out_mnt_err:
100 dprintk("NFS: MNT server returned result %d\n", result.status);
101 status = -EACCES;
102 goto out;
92} 103}
93 104
94/* 105/*
95 * XDR encode/decode functions for MOUNT 106 * XDR encode/decode functions for MOUNT
96 */ 107 */
97static int 108static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p,
98xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path) 109 const char *path)
99{ 110{
100 p = xdr_encode_string(p, path); 111 p = xdr_encode_string(p, path);
101 112
@@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
103 return 0; 114 return 0;
104} 115}
105 116
106static int 117static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p,
107xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) 118 struct mnt_fhstatus *res)
108{ 119{
109 struct nfs_fh *fh = res->fh; 120 struct nfs_fh *fh = res->fh;
110 121
@@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
115 return 0; 126 return 0;
116} 127}
117 128
118static int 129static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
119xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) 130 struct mnt_fhstatus *res)
120{ 131{
121 struct nfs_fh *fh = res->fh; 132 struct nfs_fh *fh = res->fh;
122 133
@@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
135#define MNT_fhstatus_sz (1 + 8) 146#define MNT_fhstatus_sz (1 + 8)
136#define MNT_fhstatus3_sz (1 + 16) 147#define MNT_fhstatus3_sz (1 + 16)
137 148
138static struct rpc_procinfo mnt_procedures[] = { 149static struct rpc_procinfo mnt_procedures[] = {
139[MNTPROC_MNT] = { 150 [MNTPROC_MNT] = {
140 .p_proc = MNTPROC_MNT, 151 .p_proc = MNTPROC_MNT,
141 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 152 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
142 .p_decode = (kxdrproc_t) xdr_decode_fhstatus, 153 .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
143 .p_arglen = MNT_dirpath_sz, 154 .p_arglen = MNT_dirpath_sz,
144 .p_replen = MNT_fhstatus_sz, 155 .p_replen = MNT_fhstatus_sz,
145 .p_statidx = MNTPROC_MNT, 156 .p_statidx = MNTPROC_MNT,
146 .p_name = "MOUNT", 157 .p_name = "MOUNT",
147 }, 158 },
148}; 159};
149 160
150static struct rpc_procinfo mnt3_procedures[] = { 161static struct rpc_procinfo mnt3_procedures[] = {
151[MOUNTPROC3_MNT] = { 162 [MOUNTPROC3_MNT] = {
152 .p_proc = MOUNTPROC3_MNT, 163 .p_proc = MOUNTPROC3_MNT,
153 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 164 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
154 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, 165 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
155 .p_arglen = MNT_dirpath_sz, 166 .p_arglen = MNT_dirpath_sz,
156 .p_replen = MNT_fhstatus3_sz, 167 .p_replen = MNT_fhstatus3_sz,
157 .p_statidx = MOUNTPROC3_MNT, 168 .p_statidx = MOUNTPROC3_MNT,
158 .p_name = "MOUNT", 169 .p_name = "MOUNT",
159 }, 170 },
160}; 171};
161 172
162 173
163static struct rpc_version mnt_version1 = { 174static struct rpc_version mnt_version1 = {
164 .number = 1, 175 .number = 1,
165 .nrprocs = 2, 176 .nrprocs = 2,
166 .procs = mnt_procedures 177 .procs = mnt_procedures,
167}; 178};
168 179
169static struct rpc_version mnt_version3 = { 180static struct rpc_version mnt_version3 = {
170 .number = 3, 181 .number = 3,
171 .nrprocs = 2, 182 .nrprocs = 2,
172 .procs = mnt3_procedures 183 .procs = mnt3_procedures,
173}; 184};
174 185
175static struct rpc_version * mnt_version[] = { 186static struct rpc_version *mnt_version[] = {
176 NULL, 187 NULL,
177 &mnt_version1, 188 &mnt_version1,
178 NULL, 189 NULL,
179 &mnt_version3, 190 &mnt_version3,
180}; 191};
181 192
182static struct rpc_stat mnt_stats; 193static struct rpc_stat mnt_stats;
183 194
184static struct rpc_program mnt_program = { 195static struct rpc_program mnt_program = {
185 .name = "mount", 196 .name = "mount",
186 .number = NFS_MNT_PROGRAM, 197 .number = NFS_MNT_PROGRAM,
187 .nrvers = ARRAY_SIZE(mnt_version), 198 .nrvers = ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index cd3ca7b5d3db..7fcc78f2aa71 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
223static int 223static int
224nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 224nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
225{ 225{
226 struct rpc_auth *auth = req->rq_task->tk_auth; 226 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
227 unsigned int replen; 227 unsigned int replen;
228 u32 offset = (u32)args->offset; 228 u32 offset = (u32)args->offset;
229 u32 count = args->count; 229 u32 count = args->count;
@@ -380,7 +380,7 @@ static int
380nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) 380nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
381{ 381{
382 struct rpc_task *task = req->rq_task; 382 struct rpc_task *task = req->rq_task;
383 struct rpc_auth *auth = task->tk_auth; 383 struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth;
384 unsigned int replen; 384 unsigned int replen;
385 u32 count = args->count; 385 u32 count = args->count;
386 386
@@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
541static int 541static int
542nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) 542nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
543{ 543{
544 struct rpc_auth *auth = req->rq_task->tk_auth; 544 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
545 unsigned int replen; 545 unsigned int replen;
546 546
547 p = xdr_encode_fhandle(p, args->fh); 547 p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 45268d6def2e..814d886b6aa4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -335,9 +335,7 @@ again:
335 * not sure this buys us anything (and I'd have 335 * not sure this buys us anything (and I'd have
336 * to revamp the NFSv3 XDR code) */ 336 * to revamp the NFSv3 XDR code) */
337 status = nfs3_proc_setattr(dentry, &fattr, sattr); 337 status = nfs3_proc_setattr(dentry, &fattr, sattr);
338 if (status == 0) 338 nfs_post_op_update_inode(dentry->d_inode, &fattr);
339 nfs_setattr_update_inode(dentry->d_inode, sattr);
340 nfs_refresh_inode(dentry->d_inode, &fattr);
341 dprintk("NFS reply setattr (post-create): %d\n", status); 339 dprintk("NFS reply setattr (post-create): %d\n", status);
342 } 340 }
343 if (status != 0) 341 if (status != 0)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b51df8eb9f01..b4647a22f349 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
319static int 319static int
320nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 320nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
321{ 321{
322 struct rpc_auth *auth = req->rq_task->tk_auth; 322 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
323 unsigned int replen; 323 unsigned int replen;
324 u32 count = args->count; 324 u32 count = args->count;
325 325
@@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
458static int 458static int
459nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) 459nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
460{ 460{
461 struct rpc_auth *auth = req->rq_task->tk_auth; 461 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
462 unsigned int replen; 462 unsigned int replen;
463 u32 count = args->count; 463 u32 count = args->count;
464 464
@@ -643,7 +643,7 @@ static int
643nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, 643nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
644 struct nfs3_getaclargs *args) 644 struct nfs3_getaclargs *args)
645{ 645{
646 struct rpc_auth *auth = req->rq_task->tk_auth; 646 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
647 unsigned int replen; 647 unsigned int replen;
648 648
649 p = xdr_encode_fhandle(p, args->fh); 649 p = xdr_encode_fhandle(p, args->fh);
@@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
773static int 773static int
774nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) 774nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
775{ 775{
776 struct rpc_auth *auth = req->rq_task->tk_auth; 776 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
777 unsigned int replen; 777 unsigned int replen;
778 778
779 p = xdr_encode_fhandle(p, args->fh); 779 p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cf3a17eb5c09..6c028e734fe6 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
70 seqid->flags |= NFS_SEQID_CONFIRMED; 70 seqid->flags |= NFS_SEQID_CONFIRMED;
71} 71}
72 72
73struct nfs_unique_id {
74 struct rb_node rb_node;
75 __u64 id;
76};
77
73/* 78/*
74 * NFS4 state_owners and lock_owners are simply labels for ordered 79 * NFS4 state_owners and lock_owners are simply labels for ordered
75 * sequences of RPC calls. Their sole purpose is to provide once-only 80 * sequences of RPC calls. Their sole purpose is to provide once-only
76 * semantics by allowing the server to identify replayed requests. 81 * semantics by allowing the server to identify replayed requests.
77 */ 82 */
78struct nfs4_state_owner { 83struct nfs4_state_owner {
79 spinlock_t so_lock; 84 struct nfs_unique_id so_owner_id;
80 struct list_head so_list; /* per-clientid list of state_owners */
81 struct nfs_client *so_client; 85 struct nfs_client *so_client;
82 u32 so_id; /* 32-bit identifier, unique */ 86 struct nfs_server *so_server;
83 atomic_t so_count; 87 struct rb_node so_client_node;
84 88
85 struct rpc_cred *so_cred; /* Associated cred */ 89 struct rpc_cred *so_cred; /* Associated cred */
90
91 spinlock_t so_lock;
92 atomic_t so_count;
86 struct list_head so_states; 93 struct list_head so_states;
87 struct list_head so_delegations; 94 struct list_head so_delegations;
88 struct nfs_seqid_counter so_seqid; 95 struct nfs_seqid_counter so_seqid;
@@ -108,7 +115,7 @@ struct nfs4_lock_state {
108#define NFS_LOCK_INITIALIZED 1 115#define NFS_LOCK_INITIALIZED 1
109 int ls_flags; 116 int ls_flags;
110 struct nfs_seqid_counter ls_seqid; 117 struct nfs_seqid_counter ls_seqid;
111 u32 ls_id; 118 struct nfs_unique_id ls_id;
112 nfs4_stateid ls_stateid; 119 nfs4_stateid ls_stateid;
113 atomic_t ls_count; 120 atomic_t ls_count;
114}; 121};
@@ -116,7 +123,10 @@ struct nfs4_lock_state {
116/* bits for nfs4_state->flags */ 123/* bits for nfs4_state->flags */
117enum { 124enum {
118 LK_STATE_IN_USE, 125 LK_STATE_IN_USE,
119 NFS_DELEGATED_STATE, 126 NFS_DELEGATED_STATE, /* Current stateid is delegation */
127 NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
128 NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
129 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
120}; 130};
121 131
122struct nfs4_state { 132struct nfs4_state {
@@ -130,11 +140,14 @@ struct nfs4_state {
130 unsigned long flags; /* Do we hold any locks? */ 140 unsigned long flags; /* Do we hold any locks? */
131 spinlock_t state_lock; /* Protects the lock_states list */ 141 spinlock_t state_lock; /* Protects the lock_states list */
132 142
133 nfs4_stateid stateid; 143 seqlock_t seqlock; /* Protects the stateid/open_stateid */
144 nfs4_stateid stateid; /* Current stateid: may be delegation */
145 nfs4_stateid open_stateid; /* OPEN stateid */
134 146
135 unsigned int n_rdonly; 147 /* The following 3 fields are protected by owner->so_lock */
136 unsigned int n_wronly; 148 unsigned int n_rdonly; /* Number of read-only references */
137 unsigned int n_rdwr; 149 unsigned int n_wronly; /* Number of write-only references */
150 unsigned int n_rdwr; /* Number of read/write references */
138 int state; /* State on the server (R,W, or RW) */ 151 int state; /* State on the server (R,W, or RW) */
139 atomic_t count; 152 atomic_t count;
140}; 153};
@@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
165extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); 178extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
166extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); 179extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
167extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 180extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
168extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); 181extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
169extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 182extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
170extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); 183extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
171extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 184extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *);
189 202
190/* nfs4state.c */ 203/* nfs4state.c */
191struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); 204struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
192extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
193 205
194extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 206extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
195extern void nfs4_put_state_owner(struct nfs4_state_owner *); 207extern void nfs4_put_state_owner(struct nfs4_state_owner *);
196extern void nfs4_drop_state_owner(struct nfs4_state_owner *); 208extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
197extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 209extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
198extern void nfs4_put_open_state(struct nfs4_state *); 210extern void nfs4_put_open_state(struct nfs4_state *);
199extern void nfs4_close_state(struct nfs4_state *, mode_t); 211extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
200extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); 212extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
201extern void nfs4_schedule_state_recovery(struct nfs_client *); 213extern void nfs4_schedule_state_recovery(struct nfs_client *);
202extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 214extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
@@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1;
222 234
223#else 235#else
224 236
225#define nfs4_close_state(a, b) do { } while (0) 237#define nfs4_close_state(a, b, c) do { } while (0)
226 238
227#endif /* CONFIG_NFS_V4 */ 239#endif /* CONFIG_NFS_V4 */
228#endif /* __LINUX_FS_NFS_NFS4_FS.H */ 240#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 648e0ac0f90e..fee2da856c95 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *)
65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); 65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); 66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); 67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
68static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
68 69
69/* Prevent leaks of NFSv4 errors into userland */ 70/* Prevent leaks of NFSv4 errors into userland */
70int nfs4_map_errors(int err) 71int nfs4_map_errors(int err)
@@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
214} 215}
215 216
216struct nfs4_opendata { 217struct nfs4_opendata {
217 atomic_t count; 218 struct kref kref;
218 struct nfs_openargs o_arg; 219 struct nfs_openargs o_arg;
219 struct nfs_openres o_res; 220 struct nfs_openres o_res;
220 struct nfs_open_confirmargs c_arg; 221 struct nfs_open_confirmargs c_arg;
221 struct nfs_open_confirmres c_res; 222 struct nfs_open_confirmres c_res;
222 struct nfs_fattr f_attr; 223 struct nfs_fattr f_attr;
223 struct nfs_fattr dir_attr; 224 struct nfs_fattr dir_attr;
224 struct dentry *dentry; 225 struct path path;
225 struct dentry *dir; 226 struct dentry *dir;
226 struct nfs4_state_owner *owner; 227 struct nfs4_state_owner *owner;
228 struct nfs4_state *state;
227 struct iattr attrs; 229 struct iattr attrs;
228 unsigned long timestamp; 230 unsigned long timestamp;
231 unsigned int rpc_done : 1;
229 int rpc_status; 232 int rpc_status;
230 int cancelled; 233 int cancelled;
231}; 234};
232 235
233static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, 236
237static void nfs4_init_opendata_res(struct nfs4_opendata *p)
238{
239 p->o_res.f_attr = &p->f_attr;
240 p->o_res.dir_attr = &p->dir_attr;
241 p->o_res.server = p->o_arg.server;
242 nfs_fattr_init(&p->f_attr);
243 nfs_fattr_init(&p->dir_attr);
244}
245
246static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
234 struct nfs4_state_owner *sp, int flags, 247 struct nfs4_state_owner *sp, int flags,
235 const struct iattr *attrs) 248 const struct iattr *attrs)
236{ 249{
237 struct dentry *parent = dget_parent(dentry); 250 struct dentry *parent = dget_parent(path->dentry);
238 struct inode *dir = parent->d_inode; 251 struct inode *dir = parent->d_inode;
239 struct nfs_server *server = NFS_SERVER(dir); 252 struct nfs_server *server = NFS_SERVER(dir);
240 struct nfs4_opendata *p; 253 struct nfs4_opendata *p;
@@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
245 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); 258 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
246 if (p->o_arg.seqid == NULL) 259 if (p->o_arg.seqid == NULL)
247 goto err_free; 260 goto err_free;
248 atomic_set(&p->count, 1); 261 p->path.mnt = mntget(path->mnt);
249 p->dentry = dget(dentry); 262 p->path.dentry = dget(path->dentry);
250 p->dir = parent; 263 p->dir = parent;
251 p->owner = sp; 264 p->owner = sp;
252 atomic_inc(&sp->so_count); 265 atomic_inc(&sp->so_count);
253 p->o_arg.fh = NFS_FH(dir); 266 p->o_arg.fh = NFS_FH(dir);
254 p->o_arg.open_flags = flags, 267 p->o_arg.open_flags = flags,
255 p->o_arg.clientid = server->nfs_client->cl_clientid; 268 p->o_arg.clientid = server->nfs_client->cl_clientid;
256 p->o_arg.id = sp->so_id; 269 p->o_arg.id = sp->so_owner_id.id;
257 p->o_arg.name = &dentry->d_name; 270 p->o_arg.name = &p->path.dentry->d_name;
258 p->o_arg.server = server; 271 p->o_arg.server = server;
259 p->o_arg.bitmask = server->attr_bitmask; 272 p->o_arg.bitmask = server->attr_bitmask;
260 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 273 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
261 p->o_res.f_attr = &p->f_attr;
262 p->o_res.dir_attr = &p->dir_attr;
263 p->o_res.server = server;
264 nfs_fattr_init(&p->f_attr);
265 nfs_fattr_init(&p->dir_attr);
266 if (flags & O_EXCL) { 274 if (flags & O_EXCL) {
267 u32 *s = (u32 *) p->o_arg.u.verifier.data; 275 u32 *s = (u32 *) p->o_arg.u.verifier.data;
268 s[0] = jiffies; 276 s[0] = jiffies;
@@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
274 p->c_arg.fh = &p->o_res.fh; 282 p->c_arg.fh = &p->o_res.fh;
275 p->c_arg.stateid = &p->o_res.stateid; 283 p->c_arg.stateid = &p->o_res.stateid;
276 p->c_arg.seqid = p->o_arg.seqid; 284 p->c_arg.seqid = p->o_arg.seqid;
285 nfs4_init_opendata_res(p);
286 kref_init(&p->kref);
277 return p; 287 return p;
278err_free: 288err_free:
279 kfree(p); 289 kfree(p);
@@ -282,27 +292,25 @@ err:
282 return NULL; 292 return NULL;
283} 293}
284 294
285static void nfs4_opendata_free(struct nfs4_opendata *p) 295static void nfs4_opendata_free(struct kref *kref)
286{ 296{
287 if (p != NULL && atomic_dec_and_test(&p->count)) { 297 struct nfs4_opendata *p = container_of(kref,
288 nfs_free_seqid(p->o_arg.seqid); 298 struct nfs4_opendata, kref);
289 nfs4_put_state_owner(p->owner); 299
290 dput(p->dir); 300 nfs_free_seqid(p->o_arg.seqid);
291 dput(p->dentry); 301 if (p->state != NULL)
292 kfree(p); 302 nfs4_put_open_state(p->state);
293 } 303 nfs4_put_state_owner(p->owner);
304 dput(p->dir);
305 dput(p->path.dentry);
306 mntput(p->path.mnt);
307 kfree(p);
294} 308}
295 309
296/* Helper for asynchronous RPC calls */ 310static void nfs4_opendata_put(struct nfs4_opendata *p)
297static int nfs4_call_async(struct rpc_clnt *clnt,
298 const struct rpc_call_ops *tk_ops, void *calldata)
299{ 311{
300 struct rpc_task *task; 312 if (p != NULL)
301 313 kref_put(&p->kref, nfs4_opendata_free);
302 if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
303 return -ENOMEM;
304 rpc_execute(task);
305 return 0;
306} 314}
307 315
308static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) 316static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
@@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
316 return ret; 324 return ret;
317} 325}
318 326
319static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) 327static int can_open_cached(struct nfs4_state *state, int mode)
328{
329 int ret = 0;
330 switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
331 case FMODE_READ:
332 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
333 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
334 break;
335 case FMODE_WRITE:
336 ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
337 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
338 break;
339 case FMODE_READ|FMODE_WRITE:
340 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
341 }
342 return ret;
343}
344
345static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
346{
347 if ((delegation->type & open_flags) != open_flags)
348 return 0;
349 if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM)
350 return 0;
351 return 1;
352}
353
354static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
320{ 355{
321 switch (open_flags) { 356 switch (open_flags) {
322 case FMODE_WRITE: 357 case FMODE_WRITE:
@@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_
328 case FMODE_READ|FMODE_WRITE: 363 case FMODE_READ|FMODE_WRITE:
329 state->n_rdwr++; 364 state->n_rdwr++;
330 } 365 }
366 nfs4_state_set_mode_locked(state, state->state | open_flags);
331} 367}
332 368
333static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) 369static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
334{ 370{
335 struct inode *inode = state->inode; 371 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
372 memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
373 memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
374 switch (open_flags) {
375 case FMODE_READ:
376 set_bit(NFS_O_RDONLY_STATE, &state->flags);
377 break;
378 case FMODE_WRITE:
379 set_bit(NFS_O_WRONLY_STATE, &state->flags);
380 break;
381 case FMODE_READ|FMODE_WRITE:
382 set_bit(NFS_O_RDWR_STATE, &state->flags);
383 }
384}
385
386static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
387{
388 write_seqlock(&state->seqlock);
389 nfs_set_open_stateid_locked(state, stateid, open_flags);
390 write_sequnlock(&state->seqlock);
391}
336 392
393static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags)
394{
337 open_flags &= (FMODE_READ|FMODE_WRITE); 395 open_flags &= (FMODE_READ|FMODE_WRITE);
338 /* Protect against nfs4_find_state_byowner() */ 396 /*
397 * Protect the call to nfs4_state_set_mode_locked and
398 * serialise the stateid update
399 */
400 write_seqlock(&state->seqlock);
401 if (deleg_stateid != NULL) {
402 memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
403 set_bit(NFS_DELEGATED_STATE, &state->flags);
404 }
405 if (open_stateid != NULL)
406 nfs_set_open_stateid_locked(state, open_stateid, open_flags);
407 write_sequnlock(&state->seqlock);
339 spin_lock(&state->owner->so_lock); 408 spin_lock(&state->owner->so_lock);
340 spin_lock(&inode->i_lock);
341 memcpy(&state->stateid, stateid, sizeof(state->stateid));
342 update_open_stateflags(state, open_flags); 409 update_open_stateflags(state, open_flags);
343 nfs4_state_set_mode_locked(state, state->state | open_flags);
344 spin_unlock(&inode->i_lock);
345 spin_unlock(&state->owner->so_lock); 410 spin_unlock(&state->owner->so_lock);
346} 411}
347 412
413static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
414{
415 struct nfs_delegation *delegation;
416
417 rcu_read_lock();
418 delegation = rcu_dereference(NFS_I(inode)->delegation);
419 if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
420 rcu_read_unlock();
421 return;
422 }
423 rcu_read_unlock();
424 nfs_inode_return_delegation(inode);
425}
426
427static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
428{
429 struct nfs4_state *state = opendata->state;
430 struct nfs_inode *nfsi = NFS_I(state->inode);
431 struct nfs_delegation *delegation;
432 int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
433 nfs4_stateid stateid;
434 int ret = -EAGAIN;
435
436 rcu_read_lock();
437 delegation = rcu_dereference(nfsi->delegation);
438 for (;;) {
439 if (can_open_cached(state, open_mode)) {
440 spin_lock(&state->owner->so_lock);
441 if (can_open_cached(state, open_mode)) {
442 update_open_stateflags(state, open_mode);
443 spin_unlock(&state->owner->so_lock);
444 rcu_read_unlock();
445 goto out_return_state;
446 }
447 spin_unlock(&state->owner->so_lock);
448 }
449 if (delegation == NULL)
450 break;
451 if (!can_open_delegated(delegation, open_mode))
452 break;
453 /* Save the delegation */
454 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
455 rcu_read_unlock();
456 lock_kernel();
457 ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
458 unlock_kernel();
459 if (ret != 0)
460 goto out;
461 ret = -EAGAIN;
462 rcu_read_lock();
463 delegation = rcu_dereference(nfsi->delegation);
464 /* If no delegation, try a cached open */
465 if (delegation == NULL)
466 continue;
467 /* Is the delegation still valid? */
468 if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
469 continue;
470 rcu_read_unlock();
471 update_open_stateid(state, NULL, &stateid, open_mode);
472 goto out_return_state;
473 }
474 rcu_read_unlock();
475out:
476 return ERR_PTR(ret);
477out_return_state:
478 atomic_inc(&state->count);
479 return state;
480}
481
348static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) 482static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
349{ 483{
350 struct inode *inode; 484 struct inode *inode;
351 struct nfs4_state *state = NULL; 485 struct nfs4_state *state = NULL;
486 struct nfs_delegation *delegation;
487 nfs4_stateid *deleg_stateid = NULL;
488 int ret;
352 489
353 if (!(data->f_attr.valid & NFS_ATTR_FATTR)) 490 if (!data->rpc_done) {
491 state = nfs4_try_open_cached(data);
354 goto out; 492 goto out;
493 }
494
495 ret = -EAGAIN;
496 if (!(data->f_attr.valid & NFS_ATTR_FATTR))
497 goto err;
355 inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); 498 inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
499 ret = PTR_ERR(inode);
356 if (IS_ERR(inode)) 500 if (IS_ERR(inode))
357 goto out; 501 goto err;
502 ret = -ENOMEM;
358 state = nfs4_get_open_state(inode, data->owner); 503 state = nfs4_get_open_state(inode, data->owner);
359 if (state == NULL) 504 if (state == NULL)
360 goto put_inode; 505 goto err_put_inode;
361 update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags); 506 if (data->o_res.delegation_type != 0) {
362put_inode: 507 int delegation_flags = 0;
508
509 rcu_read_lock();
510 delegation = rcu_dereference(NFS_I(inode)->delegation);
511 if (delegation)
512 delegation_flags = delegation->flags;
513 rcu_read_unlock();
514 if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
515 nfs_inode_set_delegation(state->inode,
516 data->owner->so_cred,
517 &data->o_res);
518 else
519 nfs_inode_reclaim_delegation(state->inode,
520 data->owner->so_cred,
521 &data->o_res);
522 }
523 rcu_read_lock();
524 delegation = rcu_dereference(NFS_I(inode)->delegation);
525 if (delegation != NULL)
526 deleg_stateid = &delegation->stateid;
527 update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
528 rcu_read_unlock();
363 iput(inode); 529 iput(inode);
364out: 530out:
365 return state; 531 return state;
532err_put_inode:
533 iput(inode);
534err:
535 return ERR_PTR(ret);
366} 536}
367 537
368static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) 538static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
382 return ERR_PTR(-ENOENT); 552 return ERR_PTR(-ENOENT);
383} 553}
384 554
385static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid) 555static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
386{ 556{
557 struct nfs4_state *newstate;
387 int ret; 558 int ret;
388 559
389 opendata->o_arg.open_flags = openflags; 560 opendata->o_arg.open_flags = openflags;
561 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
562 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
563 nfs4_init_opendata_res(opendata);
390 ret = _nfs4_proc_open(opendata); 564 ret = _nfs4_proc_open(opendata);
391 if (ret != 0) 565 if (ret != 0)
392 return ret; 566 return ret;
393 memcpy(stateid->data, opendata->o_res.stateid.data, 567 newstate = nfs4_opendata_to_nfs4_state(opendata);
394 sizeof(stateid->data)); 568 if (IS_ERR(newstate))
569 return PTR_ERR(newstate);
570 nfs4_close_state(&opendata->path, newstate, openflags);
571 *res = newstate;
395 return 0; 572 return 0;
396} 573}
397 574
398static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) 575static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
399{ 576{
400 nfs4_stateid stateid;
401 struct nfs4_state *newstate; 577 struct nfs4_state *newstate;
402 int mode = 0;
403 int delegation = 0;
404 int ret; 578 int ret;
405 579
406 /* memory barrier prior to reading state->n_* */ 580 /* memory barrier prior to reading state->n_* */
581 clear_bit(NFS_DELEGATED_STATE, &state->flags);
407 smp_rmb(); 582 smp_rmb();
408 if (state->n_rdwr != 0) { 583 if (state->n_rdwr != 0) {
409 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid); 584 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
410 if (ret != 0) 585 if (ret != 0)
411 return ret; 586 return ret;
412 mode |= FMODE_READ|FMODE_WRITE; 587 if (newstate != state)
413 if (opendata->o_res.delegation_type != 0) 588 return -ESTALE;
414 delegation = opendata->o_res.delegation_type;
415 smp_rmb();
416 } 589 }
417 if (state->n_wronly != 0) { 590 if (state->n_wronly != 0) {
418 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid); 591 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
419 if (ret != 0) 592 if (ret != 0)
420 return ret; 593 return ret;
421 mode |= FMODE_WRITE; 594 if (newstate != state)
422 if (opendata->o_res.delegation_type != 0) 595 return -ESTALE;
423 delegation = opendata->o_res.delegation_type;
424 smp_rmb();
425 } 596 }
426 if (state->n_rdonly != 0) { 597 if (state->n_rdonly != 0) {
427 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid); 598 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
428 if (ret != 0) 599 if (ret != 0)
429 return ret; 600 return ret;
430 mode |= FMODE_READ; 601 if (newstate != state)
602 return -ESTALE;
431 } 603 }
432 clear_bit(NFS_DELEGATED_STATE, &state->flags); 604 /*
433 if (mode == 0) 605 * We may have performed cached opens for all three recoveries.
434 return 0; 606 * Check if we need to update the current stateid.
435 if (opendata->o_res.delegation_type == 0) 607 */
436 opendata->o_res.delegation_type = delegation; 608 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
437 opendata->o_arg.open_flags |= mode; 609 memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
438 newstate = nfs4_opendata_to_nfs4_state(opendata); 610 write_seqlock(&state->seqlock);
439 if (newstate != NULL) { 611 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
440 if (opendata->o_res.delegation_type != 0) { 612 memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
441 struct nfs_inode *nfsi = NFS_I(newstate->inode); 613 write_sequnlock(&state->seqlock);
442 int delegation_flags = 0;
443 if (nfsi->delegation)
444 delegation_flags = nfsi->delegation->flags;
445 if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
446 nfs_inode_set_delegation(newstate->inode,
447 opendata->owner->so_cred,
448 &opendata->o_res);
449 else
450 nfs_inode_reclaim_delegation(newstate->inode,
451 opendata->owner->so_cred,
452 &opendata->o_res);
453 }
454 nfs4_close_state(newstate, opendata->o_arg.open_flags);
455 } 614 }
456 if (newstate != state)
457 return -ESTALE;
458 return 0; 615 return 0;
459} 616}
460 617
@@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
462 * OPEN_RECLAIM: 619 * OPEN_RECLAIM:
463 * reclaim state on the server after a reboot. 620 * reclaim state on the server after a reboot.
464 */ 621 */
465static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 622static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
466{ 623{
467 struct nfs_delegation *delegation = NFS_I(state->inode)->delegation; 624 struct nfs_delegation *delegation;
468 struct nfs4_opendata *opendata; 625 struct nfs4_opendata *opendata;
469 int delegation_type = 0; 626 int delegation_type = 0;
470 int status; 627 int status;
471 628
472 if (delegation != NULL) { 629 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
473 if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
474 memcpy(&state->stateid, &delegation->stateid,
475 sizeof(state->stateid));
476 set_bit(NFS_DELEGATED_STATE, &state->flags);
477 return 0;
478 }
479 delegation_type = delegation->type;
480 }
481 opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
482 if (opendata == NULL) 630 if (opendata == NULL)
483 return -ENOMEM; 631 return -ENOMEM;
484 opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; 632 opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
485 opendata->o_arg.fh = NFS_FH(state->inode); 633 opendata->o_arg.fh = NFS_FH(state->inode);
486 nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh); 634 nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
635 rcu_read_lock();
636 delegation = rcu_dereference(NFS_I(state->inode)->delegation);
637 if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
638 delegation_type = delegation->flags;
639 rcu_read_unlock();
487 opendata->o_arg.u.delegation_type = delegation_type; 640 opendata->o_arg.u.delegation_type = delegation_type;
488 status = nfs4_open_recover(opendata, state); 641 status = nfs4_open_recover(opendata, state);
489 nfs4_opendata_free(opendata); 642 nfs4_opendata_put(opendata);
490 return status; 643 return status;
491} 644}
492 645
493static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 646static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
494{ 647{
495 struct nfs_server *server = NFS_SERVER(state->inode); 648 struct nfs_server *server = NFS_SERVER(state->inode);
496 struct nfs4_exception exception = { }; 649 struct nfs4_exception exception = { };
497 int err; 650 int err;
498 do { 651 do {
499 err = _nfs4_do_open_reclaim(sp, state, dentry); 652 err = _nfs4_do_open_reclaim(ctx, state);
500 if (err != -NFS4ERR_DELAY) 653 if (err != -NFS4ERR_DELAY)
501 break; 654 break;
502 nfs4_handle_exception(server, err, &exception); 655 nfs4_handle_exception(server, err, &exception);
@@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
512 ctx = nfs4_state_find_open_context(state); 665 ctx = nfs4_state_find_open_context(state);
513 if (IS_ERR(ctx)) 666 if (IS_ERR(ctx))
514 return PTR_ERR(ctx); 667 return PTR_ERR(ctx);
515 ret = nfs4_do_open_reclaim(sp, state, ctx->dentry); 668 ret = nfs4_do_open_reclaim(ctx, state);
516 put_nfs_open_context(ctx); 669 put_nfs_open_context(ctx);
517 return ret; 670 return ret;
518} 671}
519 672
520static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) 673static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
521{ 674{
522 struct nfs4_state_owner *sp = state->owner; 675 struct nfs4_state_owner *sp = state->owner;
523 struct nfs4_opendata *opendata; 676 struct nfs4_opendata *opendata;
524 int ret; 677 int ret;
525 678
526 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) 679 opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
527 return 0;
528 opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
529 if (opendata == NULL) 680 if (opendata == NULL)
530 return -ENOMEM; 681 return -ENOMEM;
531 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; 682 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
532 memcpy(opendata->o_arg.u.delegation.data, state->stateid.data, 683 memcpy(opendata->o_arg.u.delegation.data, stateid->data,
533 sizeof(opendata->o_arg.u.delegation.data)); 684 sizeof(opendata->o_arg.u.delegation.data));
534 ret = nfs4_open_recover(opendata, state); 685 ret = nfs4_open_recover(opendata, state);
535 nfs4_opendata_free(opendata); 686 nfs4_opendata_put(opendata);
536 return ret; 687 return ret;
537} 688}
538 689
539int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) 690int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
540{ 691{
541 struct nfs4_exception exception = { }; 692 struct nfs4_exception exception = { };
542 struct nfs_server *server = NFS_SERVER(dentry->d_inode); 693 struct nfs_server *server = NFS_SERVER(state->inode);
543 int err; 694 int err;
544 do { 695 do {
545 err = _nfs4_open_delegation_recall(dentry, state); 696 err = _nfs4_open_delegation_recall(ctx, state, stateid);
546 switch (err) { 697 switch (err) {
547 case 0: 698 case 0:
548 return err; 699 return err;
@@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
582 memcpy(data->o_res.stateid.data, data->c_res.stateid.data, 733 memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
583 sizeof(data->o_res.stateid.data)); 734 sizeof(data->o_res.stateid.data));
584 renew_lease(data->o_res.server, data->timestamp); 735 renew_lease(data->o_res.server, data->timestamp);
736 data->rpc_done = 1;
585 } 737 }
586 nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
587 nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status); 738 nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
739 nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
588} 740}
589 741
590static void nfs4_open_confirm_release(void *calldata) 742static void nfs4_open_confirm_release(void *calldata)
@@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata)
596 if (data->cancelled == 0) 748 if (data->cancelled == 0)
597 goto out_free; 749 goto out_free;
598 /* In case of error, no cleanup! */ 750 /* In case of error, no cleanup! */
599 if (data->rpc_status != 0) 751 if (!data->rpc_done)
600 goto out_free; 752 goto out_free;
601 nfs_confirm_seqid(&data->owner->so_seqid, 0); 753 nfs_confirm_seqid(&data->owner->so_seqid, 0);
602 state = nfs4_opendata_to_nfs4_state(data); 754 state = nfs4_opendata_to_nfs4_state(data);
603 if (state != NULL) 755 if (!IS_ERR(state))
604 nfs4_close_state(state, data->o_arg.open_flags); 756 nfs4_close_state(&data->path, state, data->o_arg.open_flags);
605out_free: 757out_free:
606 nfs4_opendata_free(data); 758 nfs4_opendata_put(data);
607} 759}
608 760
609static const struct rpc_call_ops nfs4_open_confirm_ops = { 761static const struct rpc_call_ops nfs4_open_confirm_ops = {
@@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
621 struct rpc_task *task; 773 struct rpc_task *task;
622 int status; 774 int status;
623 775
624 atomic_inc(&data->count); 776 kref_get(&data->kref);
625 /* 777 data->rpc_done = 0;
626 * If rpc_run_task() ends up calling ->rpc_release(), we 778 data->rpc_status = 0;
627 * want to ensure that it takes the 'error' code path.
628 */
629 data->rpc_status = -ENOMEM;
630 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); 779 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
631 if (IS_ERR(task)) 780 if (IS_ERR(task))
632 return PTR_ERR(task); 781 return PTR_ERR(task);
@@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
653 802
654 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) 803 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
655 return; 804 return;
805 /*
806 * Check if we still need to send an OPEN call, or if we can use
807 * a delegation instead.
808 */
809 if (data->state != NULL) {
810 struct nfs_delegation *delegation;
811
812 if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
813 goto out_no_action;
814 rcu_read_lock();
815 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
816 if (delegation != NULL &&
817 (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) {
818 rcu_read_unlock();
819 goto out_no_action;
820 }
821 rcu_read_unlock();
822 }
656 /* Update sequence id. */ 823 /* Update sequence id. */
657 data->o_arg.id = sp->so_id; 824 data->o_arg.id = sp->so_owner_id.id;
658 data->o_arg.clientid = sp->so_client->cl_clientid; 825 data->o_arg.clientid = sp->so_client->cl_clientid;
659 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) 826 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
660 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; 827 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
661 data->timestamp = jiffies; 828 data->timestamp = jiffies;
662 rpc_call_setup(task, &msg, 0); 829 rpc_call_setup(task, &msg, 0);
830 return;
831out_no_action:
832 task->tk_action = NULL;
833
663} 834}
664 835
665static void nfs4_open_done(struct rpc_task *task, void *calldata) 836static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
683 data->rpc_status = -ENOTDIR; 854 data->rpc_status = -ENOTDIR;
684 } 855 }
685 renew_lease(data->o_res.server, data->timestamp); 856 renew_lease(data->o_res.server, data->timestamp);
857 if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
858 nfs_confirm_seqid(&data->owner->so_seqid, 0);
686 } 859 }
687 nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid); 860 nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
861 data->rpc_done = 1;
688} 862}
689 863
690static void nfs4_open_release(void *calldata) 864static void nfs4_open_release(void *calldata)
@@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata)
696 if (data->cancelled == 0) 870 if (data->cancelled == 0)
697 goto out_free; 871 goto out_free;
698 /* In case of error, no cleanup! */ 872 /* In case of error, no cleanup! */
699 if (data->rpc_status != 0) 873 if (data->rpc_status != 0 || !data->rpc_done)
700 goto out_free; 874 goto out_free;
701 /* In case we need an open_confirm, no cleanup! */ 875 /* In case we need an open_confirm, no cleanup! */
702 if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) 876 if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
703 goto out_free; 877 goto out_free;
704 nfs_confirm_seqid(&data->owner->so_seqid, 0); 878 nfs_confirm_seqid(&data->owner->so_seqid, 0);
705 state = nfs4_opendata_to_nfs4_state(data); 879 state = nfs4_opendata_to_nfs4_state(data);
706 if (state != NULL) 880 if (!IS_ERR(state))
707 nfs4_close_state(state, data->o_arg.open_flags); 881 nfs4_close_state(&data->path, state, data->o_arg.open_flags);
708out_free: 882out_free:
709 nfs4_opendata_free(data); 883 nfs4_opendata_put(data);
710} 884}
711 885
712static const struct rpc_call_ops nfs4_open_ops = { 886static const struct rpc_call_ops nfs4_open_ops = {
@@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
727 struct rpc_task *task; 901 struct rpc_task *task;
728 int status; 902 int status;
729 903
730 atomic_inc(&data->count); 904 kref_get(&data->kref);
731 /* 905 data->rpc_done = 0;
732 * If rpc_run_task() ends up calling ->rpc_release(), we 906 data->rpc_status = 0;
733 * want to ensure that it takes the 'error' code path. 907 data->cancelled = 0;
734 */
735 data->rpc_status = -ENOMEM;
736 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); 908 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
737 if (IS_ERR(task)) 909 if (IS_ERR(task))
738 return PTR_ERR(task); 910 return PTR_ERR(task);
@@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
743 } else 915 } else
744 status = data->rpc_status; 916 status = data->rpc_status;
745 rpc_put_task(task); 917 rpc_put_task(task);
746 if (status != 0) 918 if (status != 0 || !data->rpc_done)
747 return status; 919 return status;
748 920
749 if (o_arg->open_flags & O_CREAT) { 921 if (o_arg->open_flags & O_CREAT) {
@@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
756 if (status != 0) 928 if (status != 0)
757 return status; 929 return status;
758 } 930 }
759 nfs_confirm_seqid(&data->owner->so_seqid, 0);
760 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) 931 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
761 return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); 932 return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
762 return 0; 933 return 0;
@@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
772 mask |= MAY_READ; 943 mask |= MAY_READ;
773 if (openflags & FMODE_WRITE) 944 if (openflags & FMODE_WRITE)
774 mask |= MAY_WRITE; 945 mask |= MAY_WRITE;
946 if (openflags & FMODE_EXEC)
947 mask |= MAY_EXEC;
775 status = nfs_access_get_cached(inode, cred, &cache); 948 status = nfs_access_get_cached(inode, cred, &cache);
776 if (status == 0) 949 if (status == 0)
777 goto out; 950 goto out;
@@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
811 * reclaim state on the server after a network partition. 984 * reclaim state on the server after a network partition.
812 * Assumes caller holds the appropriate lock 985 * Assumes caller holds the appropriate lock
813 */ 986 */
814static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 987static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
815{ 988{
816 struct inode *inode = state->inode;
817 struct nfs_delegation *delegation = NFS_I(inode)->delegation;
818 struct nfs4_opendata *opendata; 989 struct nfs4_opendata *opendata;
819 int openflags = state->state & (FMODE_READ|FMODE_WRITE);
820 int ret; 990 int ret;
821 991
822 if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { 992 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
823 ret = _nfs4_do_access(inode, sp->so_cred, openflags);
824 if (ret < 0)
825 return ret;
826 memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
827 set_bit(NFS_DELEGATED_STATE, &state->flags);
828 return 0;
829 }
830 opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
831 if (opendata == NULL) 993 if (opendata == NULL)
832 return -ENOMEM; 994 return -ENOMEM;
833 ret = nfs4_open_recover(opendata, state); 995 ret = nfs4_open_recover(opendata, state);
834 if (ret == -ESTALE) { 996 if (ret == -ESTALE) {
835 /* Invalidate the state owner so we don't ever use it again */ 997 /* Invalidate the state owner so we don't ever use it again */
836 nfs4_drop_state_owner(sp); 998 nfs4_drop_state_owner(state->owner);
837 d_drop(dentry); 999 d_drop(ctx->path.dentry);
838 } 1000 }
839 nfs4_opendata_free(opendata); 1001 nfs4_opendata_put(opendata);
840 return ret; 1002 return ret;
841} 1003}
842 1004
843static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 1005static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
844{ 1006{
845 struct nfs_server *server = NFS_SERVER(dentry->d_inode); 1007 struct nfs_server *server = NFS_SERVER(state->inode);
846 struct nfs4_exception exception = { }; 1008 struct nfs4_exception exception = { };
847 int err; 1009 int err;
848 1010
849 do { 1011 do {
850 err = _nfs4_open_expired(sp, state, dentry); 1012 err = _nfs4_open_expired(ctx, state);
851 if (err == -NFS4ERR_DELAY) 1013 if (err == -NFS4ERR_DELAY)
852 nfs4_handle_exception(server, err, &exception); 1014 nfs4_handle_exception(server, err, &exception);
853 } while (exception.retry); 1015 } while (exception.retry);
@@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
862 ctx = nfs4_state_find_open_context(state); 1024 ctx = nfs4_state_find_open_context(state);
863 if (IS_ERR(ctx)) 1025 if (IS_ERR(ctx))
864 return PTR_ERR(ctx); 1026 return PTR_ERR(ctx);
865 ret = nfs4_do_open_expired(sp, state, ctx->dentry); 1027 ret = nfs4_do_open_expired(ctx, state);
866 put_nfs_open_context(ctx); 1028 put_nfs_open_context(ctx);
867 return ret; 1029 return ret;
868} 1030}
869 1031
870/* 1032/*
871 * Returns a referenced nfs4_state if there is an open delegation on the file 1033 * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
1034 * fields corresponding to attributes that were used to store the verifier.
1035 * Make sure we clobber those fields in the later setattr call
872 */ 1036 */
873static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) 1037static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
874{
875 struct nfs_delegation *delegation;
876 struct nfs_server *server = NFS_SERVER(inode);
877 struct nfs_client *clp = server->nfs_client;
878 struct nfs_inode *nfsi = NFS_I(inode);
879 struct nfs4_state_owner *sp = NULL;
880 struct nfs4_state *state = NULL;
881 int open_flags = flags & (FMODE_READ|FMODE_WRITE);
882 int err;
883
884 err = -ENOMEM;
885 if (!(sp = nfs4_get_state_owner(server, cred))) {
886 dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
887 return err;
888 }
889 err = nfs4_recover_expired_lease(server);
890 if (err != 0)
891 goto out_put_state_owner;
892 /* Protect against reboot recovery - NOTE ORDER! */
893 down_read(&clp->cl_sem);
894 /* Protect against delegation recall */
895 down_read(&nfsi->rwsem);
896 delegation = NFS_I(inode)->delegation;
897 err = -ENOENT;
898 if (delegation == NULL || (delegation->type & open_flags) != open_flags)
899 goto out_err;
900 err = -ENOMEM;
901 state = nfs4_get_open_state(inode, sp);
902 if (state == NULL)
903 goto out_err;
904
905 err = -ENOENT;
906 if ((state->state & open_flags) == open_flags) {
907 spin_lock(&inode->i_lock);
908 update_open_stateflags(state, open_flags);
909 spin_unlock(&inode->i_lock);
910 goto out_ok;
911 } else if (state->state != 0)
912 goto out_put_open_state;
913
914 lock_kernel();
915 err = _nfs4_do_access(inode, cred, open_flags);
916 unlock_kernel();
917 if (err != 0)
918 goto out_put_open_state;
919 set_bit(NFS_DELEGATED_STATE, &state->flags);
920 update_open_stateid(state, &delegation->stateid, open_flags);
921out_ok:
922 nfs4_put_state_owner(sp);
923 up_read(&nfsi->rwsem);
924 up_read(&clp->cl_sem);
925 *res = state;
926 return 0;
927out_put_open_state:
928 nfs4_put_open_state(state);
929out_err:
930 up_read(&nfsi->rwsem);
931 up_read(&clp->cl_sem);
932 if (err != -EACCES)
933 nfs_inode_return_delegation(inode);
934out_put_state_owner:
935 nfs4_put_state_owner(sp);
936 return err;
937}
938
939static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
940{ 1038{
941 struct nfs4_exception exception = { }; 1039 if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
942 struct nfs4_state *res = ERR_PTR(-EIO); 1040 !(sattr->ia_valid & ATTR_ATIME_SET))
943 int err; 1041 sattr->ia_valid |= ATTR_ATIME;
944 1042
945 do { 1043 if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
946 err = _nfs4_open_delegated(inode, flags, cred, &res); 1044 !(sattr->ia_valid & ATTR_MTIME_SET))
947 if (err == 0) 1045 sattr->ia_valid |= ATTR_MTIME;
948 break;
949 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
950 err, &exception));
951 } while (exception.retry);
952 return res;
953} 1046}
954 1047
955/* 1048/*
956 * Returns a referenced nfs4_state 1049 * Returns a referenced nfs4_state
957 */ 1050 */
958static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) 1051static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
959{ 1052{
960 struct nfs4_state_owner *sp; 1053 struct nfs4_state_owner *sp;
961 struct nfs4_state *state = NULL; 1054 struct nfs4_state *state = NULL;
962 struct nfs_server *server = NFS_SERVER(dir); 1055 struct nfs_server *server = NFS_SERVER(dir);
963 struct nfs_client *clp = server->nfs_client; 1056 struct nfs_client *clp = server->nfs_client;
964 struct nfs4_opendata *opendata; 1057 struct nfs4_opendata *opendata;
965 int status; 1058 int status;
966 1059
967 /* Protect against reboot recovery conflicts */ 1060 /* Protect against reboot recovery conflicts */
968 status = -ENOMEM; 1061 status = -ENOMEM;
@@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
973 status = nfs4_recover_expired_lease(server); 1066 status = nfs4_recover_expired_lease(server);
974 if (status != 0) 1067 if (status != 0)
975 goto err_put_state_owner; 1068 goto err_put_state_owner;
1069 if (path->dentry->d_inode != NULL)
1070 nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
976 down_read(&clp->cl_sem); 1071 down_read(&clp->cl_sem);
977 status = -ENOMEM; 1072 status = -ENOMEM;
978 opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); 1073 opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
979 if (opendata == NULL) 1074 if (opendata == NULL)
980 goto err_release_rwsem; 1075 goto err_release_rwsem;
981 1076
1077 if (path->dentry->d_inode != NULL)
1078 opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
1079
982 status = _nfs4_proc_open(opendata); 1080 status = _nfs4_proc_open(opendata);
983 if (status != 0) 1081 if (status != 0)
984 goto err_opendata_free; 1082 goto err_opendata_put;
1083
1084 if (opendata->o_arg.open_flags & O_EXCL)
1085 nfs4_exclusive_attrset(opendata, sattr);
985 1086
986 status = -ENOMEM;
987 state = nfs4_opendata_to_nfs4_state(opendata); 1087 state = nfs4_opendata_to_nfs4_state(opendata);
988 if (state == NULL) 1088 status = PTR_ERR(state);
989 goto err_opendata_free; 1089 if (IS_ERR(state))
990 if (opendata->o_res.delegation_type != 0) 1090 goto err_opendata_put;
991 nfs_inode_set_delegation(state->inode, cred, &opendata->o_res); 1091 nfs4_opendata_put(opendata);
992 nfs4_opendata_free(opendata);
993 nfs4_put_state_owner(sp); 1092 nfs4_put_state_owner(sp);
994 up_read(&clp->cl_sem); 1093 up_read(&clp->cl_sem);
995 *res = state; 1094 *res = state;
996 return 0; 1095 return 0;
997err_opendata_free: 1096err_opendata_put:
998 nfs4_opendata_free(opendata); 1097 nfs4_opendata_put(opendata);
999err_release_rwsem: 1098err_release_rwsem:
1000 up_read(&clp->cl_sem); 1099 up_read(&clp->cl_sem);
1001err_put_state_owner: 1100err_put_state_owner:
@@ -1006,14 +1105,14 @@ out_err:
1006} 1105}
1007 1106
1008 1107
1009static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) 1108static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
1010{ 1109{
1011 struct nfs4_exception exception = { }; 1110 struct nfs4_exception exception = { };
1012 struct nfs4_state *res; 1111 struct nfs4_state *res;
1013 int status; 1112 int status;
1014 1113
1015 do { 1114 do {
1016 status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); 1115 status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
1017 if (status == 0) 1116 if (status == 0)
1018 break; 1117 break;
1019 /* NOTE: BAD_SEQID means the server and client disagree about the 1118 /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
1028 * the user though... 1127 * the user though...
1029 */ 1128 */
1030 if (status == -NFS4ERR_BAD_SEQID) { 1129 if (status == -NFS4ERR_BAD_SEQID) {
1031 printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); 1130 printk(KERN_WARNING "NFS: v4 server %s "
1131 " returned a bad sequence-id error!\n",
1132 NFS_SERVER(dir)->nfs_client->cl_hostname);
1032 exception.retry = 1; 1133 exception.retry = 1;
1033 continue; 1134 continue;
1034 } 1135 }
@@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
1042 exception.retry = 1; 1143 exception.retry = 1;
1043 continue; 1144 continue;
1044 } 1145 }
1146 if (status == -EAGAIN) {
1147 /* We must have found a delegation */
1148 exception.retry = 1;
1149 continue;
1150 }
1045 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), 1151 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
1046 status, &exception)); 1152 status, &exception));
1047 } while (exception.retry); 1153 } while (exception.retry);
@@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
1101} 1207}
1102 1208
1103struct nfs4_closedata { 1209struct nfs4_closedata {
1210 struct path path;
1104 struct inode *inode; 1211 struct inode *inode;
1105 struct nfs4_state *state; 1212 struct nfs4_state *state;
1106 struct nfs_closeargs arg; 1213 struct nfs_closeargs arg;
@@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data)
1117 nfs4_put_open_state(calldata->state); 1224 nfs4_put_open_state(calldata->state);
1118 nfs_free_seqid(calldata->arg.seqid); 1225 nfs_free_seqid(calldata->arg.seqid);
1119 nfs4_put_state_owner(sp); 1226 nfs4_put_state_owner(sp);
1227 dput(calldata->path.dentry);
1228 mntput(calldata->path.mnt);
1120 kfree(calldata); 1229 kfree(calldata);
1121} 1230}
1122 1231
@@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1134 nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); 1243 nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
1135 switch (task->tk_status) { 1244 switch (task->tk_status) {
1136 case 0: 1245 case 0:
1137 memcpy(&state->stateid, &calldata->res.stateid, 1246 nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags);
1138 sizeof(state->stateid));
1139 renew_lease(server, calldata->timestamp); 1247 renew_lease(server, calldata->timestamp);
1140 break; 1248 break;
1141 case -NFS4ERR_STALE_STATEID: 1249 case -NFS4ERR_STALE_STATEID:
@@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1160 .rpc_resp = &calldata->res, 1268 .rpc_resp = &calldata->res,
1161 .rpc_cred = state->owner->so_cred, 1269 .rpc_cred = state->owner->so_cred,
1162 }; 1270 };
1163 int mode = 0, old_mode; 1271 int clear_rd, clear_wr, clear_rdwr;
1272 int mode;
1164 1273
1165 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 1274 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
1166 return; 1275 return;
1167 /* Recalculate the new open mode in case someone reopened the file 1276
1168 * while we were waiting in line to be scheduled. 1277 mode = FMODE_READ|FMODE_WRITE;
1169 */ 1278 clear_rd = clear_wr = clear_rdwr = 0;
1170 spin_lock(&state->owner->so_lock); 1279 spin_lock(&state->owner->so_lock);
1171 spin_lock(&calldata->inode->i_lock); 1280 /* Calculate the change in open mode */
1172 mode = old_mode = state->state;
1173 if (state->n_rdwr == 0) { 1281 if (state->n_rdwr == 0) {
1174 if (state->n_rdonly == 0) 1282 if (state->n_rdonly == 0) {
1175 mode &= ~FMODE_READ; 1283 mode &= ~FMODE_READ;
1176 if (state->n_wronly == 0) 1284 clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1285 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
1286 }
1287 if (state->n_wronly == 0) {
1177 mode &= ~FMODE_WRITE; 1288 mode &= ~FMODE_WRITE;
1289 clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1290 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
1291 }
1178 } 1292 }
1179 nfs4_state_set_mode_locked(state, mode);
1180 spin_unlock(&calldata->inode->i_lock);
1181 spin_unlock(&state->owner->so_lock); 1293 spin_unlock(&state->owner->so_lock);
1182 if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) { 1294 if (!clear_rd && !clear_wr && !clear_rdwr) {
1183 /* Note: exit _without_ calling nfs4_close_done */ 1295 /* Note: exit _without_ calling nfs4_close_done */
1184 task->tk_action = NULL; 1296 task->tk_action = NULL;
1185 return; 1297 return;
@@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = {
1209 * 1321 *
1210 * NOTE: Caller must be holding the sp->so_owner semaphore! 1322 * NOTE: Caller must be holding the sp->so_owner semaphore!
1211 */ 1323 */
1212int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 1324int nfs4_do_close(struct path *path, struct nfs4_state *state)
1213{ 1325{
1214 struct nfs_server *server = NFS_SERVER(inode); 1326 struct nfs_server *server = NFS_SERVER(state->inode);
1215 struct nfs4_closedata *calldata; 1327 struct nfs4_closedata *calldata;
1328 struct nfs4_state_owner *sp = state->owner;
1329 struct rpc_task *task;
1216 int status = -ENOMEM; 1330 int status = -ENOMEM;
1217 1331
1218 calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); 1332 calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
1219 if (calldata == NULL) 1333 if (calldata == NULL)
1220 goto out; 1334 goto out;
1221 calldata->inode = inode; 1335 calldata->inode = state->inode;
1222 calldata->state = state; 1336 calldata->state = state;
1223 calldata->arg.fh = NFS_FH(inode); 1337 calldata->arg.fh = NFS_FH(state->inode);
1224 calldata->arg.stateid = &state->stateid; 1338 calldata->arg.stateid = &state->open_stateid;
1225 /* Serialization for the sequence id */ 1339 /* Serialization for the sequence id */
1226 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1340 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
1227 if (calldata->arg.seqid == NULL) 1341 if (calldata->arg.seqid == NULL)
@@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
1229 calldata->arg.bitmask = server->attr_bitmask; 1343 calldata->arg.bitmask = server->attr_bitmask;
1230 calldata->res.fattr = &calldata->fattr; 1344 calldata->res.fattr = &calldata->fattr;
1231 calldata->res.server = server; 1345 calldata->res.server = server;
1346 calldata->path.mnt = mntget(path->mnt);
1347 calldata->path.dentry = dget(path->dentry);
1232 1348
1233 status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); 1349 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
1234 if (status == 0) 1350 if (IS_ERR(task))
1235 goto out; 1351 return PTR_ERR(task);
1236 1352 rpc_put_task(task);
1237 nfs_free_seqid(calldata->arg.seqid); 1353 return 0;
1238out_free_calldata: 1354out_free_calldata:
1239 kfree(calldata); 1355 kfree(calldata);
1240out: 1356out:
1357 nfs4_put_open_state(state);
1358 nfs4_put_state_owner(sp);
1241 return status; 1359 return status;
1242} 1360}
1243 1361
1244static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) 1362static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
1245{ 1363{
1246 struct file *filp; 1364 struct file *filp;
1365 int ret;
1247 1366
1248 filp = lookup_instantiate_filp(nd, dentry, NULL); 1367 /* If the open_intent is for execute, we have an extra check to make */
1368 if (nd->intent.open.flags & FMODE_EXEC) {
1369 ret = _nfs4_do_access(state->inode,
1370 state->owner->so_cred,
1371 nd->intent.open.flags);
1372 if (ret < 0)
1373 goto out_close;
1374 }
1375 filp = lookup_instantiate_filp(nd, path->dentry, NULL);
1249 if (!IS_ERR(filp)) { 1376 if (!IS_ERR(filp)) {
1250 struct nfs_open_context *ctx; 1377 struct nfs_open_context *ctx;
1251 ctx = (struct nfs_open_context *)filp->private_data; 1378 ctx = (struct nfs_open_context *)filp->private_data;
1252 ctx->state = state; 1379 ctx->state = state;
1253 return 0; 1380 return 0;
1254 } 1381 }
1255 nfs4_close_state(state, nd->intent.open.flags); 1382 ret = PTR_ERR(filp);
1256 return PTR_ERR(filp); 1383out_close:
1384 nfs4_close_state(path, state, nd->intent.open.flags);
1385 return ret;
1257} 1386}
1258 1387
1259struct dentry * 1388struct dentry *
1260nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 1389nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1261{ 1390{
1391 struct path path = {
1392 .mnt = nd->mnt,
1393 .dentry = dentry,
1394 };
1262 struct iattr attr; 1395 struct iattr attr;
1263 struct rpc_cred *cred; 1396 struct rpc_cred *cred;
1264 struct nfs4_state *state; 1397 struct nfs4_state *state;
@@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1277 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); 1410 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1278 if (IS_ERR(cred)) 1411 if (IS_ERR(cred))
1279 return (struct dentry *)cred; 1412 return (struct dentry *)cred;
1280 state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); 1413 state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
1281 put_rpccred(cred); 1414 put_rpccred(cred);
1282 if (IS_ERR(state)) { 1415 if (IS_ERR(state)) {
1283 if (PTR_ERR(state) == -ENOENT) 1416 if (PTR_ERR(state) == -ENOENT)
@@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1287 res = d_add_unique(dentry, igrab(state->inode)); 1420 res = d_add_unique(dentry, igrab(state->inode));
1288 if (res != NULL) 1421 if (res != NULL)
1289 dentry = res; 1422 dentry = res;
1290 nfs4_intent_set_file(nd, dentry, state); 1423 nfs4_intent_set_file(nd, &path, state);
1291 return res; 1424 return res;
1292} 1425}
1293 1426
1294int 1427int
1295nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) 1428nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
1296{ 1429{
1430 struct path path = {
1431 .mnt = nd->mnt,
1432 .dentry = dentry,
1433 };
1297 struct rpc_cred *cred; 1434 struct rpc_cred *cred;
1298 struct nfs4_state *state; 1435 struct nfs4_state *state;
1299 1436
1300 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); 1437 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1301 if (IS_ERR(cred)) 1438 if (IS_ERR(cred))
1302 return PTR_ERR(cred); 1439 return PTR_ERR(cred);
1303 state = nfs4_open_delegated(dentry->d_inode, openflags, cred); 1440 state = nfs4_do_open(dir, &path, openflags, NULL, cred);
1304 if (IS_ERR(state))
1305 state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
1306 put_rpccred(cred); 1441 put_rpccred(cred);
1307 if (IS_ERR(state)) { 1442 if (IS_ERR(state)) {
1308 switch (PTR_ERR(state)) { 1443 switch (PTR_ERR(state)) {
@@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1318 } 1453 }
1319 } 1454 }
1320 if (state->inode == dentry->d_inode) { 1455 if (state->inode == dentry->d_inode) {
1321 nfs4_intent_set_file(nd, dentry, state); 1456 nfs4_intent_set_file(nd, &path, state);
1322 return 1; 1457 return 1;
1323 } 1458 }
1324 nfs4_close_state(state, openflags); 1459 nfs4_close_state(&path, state, openflags);
1325out_drop: 1460out_drop:
1326 d_drop(dentry); 1461 d_drop(dentry);
1327 return 0; 1462 return 0;
@@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1559 dprintk("NFS call lookupfh %s\n", name->name); 1694 dprintk("NFS call lookupfh %s\n", name->name);
1560 status = rpc_call_sync(server->client, &msg, 0); 1695 status = rpc_call_sync(server->client, &msg, 0);
1561 dprintk("NFS reply lookupfh: %d\n", status); 1696 dprintk("NFS reply lookupfh: %d\n", status);
1562 if (status == -NFS4ERR_MOVED)
1563 status = -EREMOTE;
1564 return status; 1697 return status;
1565} 1698}
1566 1699
@@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1571 struct nfs4_exception exception = { }; 1704 struct nfs4_exception exception = { };
1572 int err; 1705 int err;
1573 do { 1706 do {
1574 err = nfs4_handle_exception(server, 1707 err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr);
1575 _nfs4_proc_lookupfh(server, dirfh, name, 1708 /* FIXME: !!!! */
1576 fhandle, fattr), 1709 if (err == -NFS4ERR_MOVED) {
1577 &exception); 1710 err = -EREMOTE;
1711 break;
1712 }
1713 err = nfs4_handle_exception(server, err, &exception);
1578 } while (exception.retry); 1714 } while (exception.retry);
1579 return err; 1715 return err;
1580} 1716}
@@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1582static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, 1718static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
1583 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 1719 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
1584{ 1720{
1585 int status; 1721 int status;
1586 struct nfs_server *server = NFS_SERVER(dir);
1587 struct nfs4_lookup_arg args = {
1588 .bitmask = server->attr_bitmask,
1589 .dir_fh = NFS_FH(dir),
1590 .name = name,
1591 };
1592 struct nfs4_lookup_res res = {
1593 .server = server,
1594 .fattr = fattr,
1595 .fh = fhandle,
1596 };
1597 struct rpc_message msg = {
1598 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
1599 .rpc_argp = &args,
1600 .rpc_resp = &res,
1601 };
1602
1603 nfs_fattr_init(fattr);
1604 1722
1605 dprintk("NFS call lookup %s\n", name->name); 1723 dprintk("NFS call lookup %s\n", name->name);
1606 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 1724 status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
1607 if (status == -NFS4ERR_MOVED) 1725 if (status == -NFS4ERR_MOVED)
1608 status = nfs4_get_referral(dir, name, fattr, fhandle); 1726 status = nfs4_get_referral(dir, name, fattr, fhandle);
1609 dprintk("NFS reply lookup: %d\n", status); 1727 dprintk("NFS reply lookup: %d\n", status);
@@ -1752,6 +1870,10 @@ static int
1752nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 1870nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1753 int flags, struct nameidata *nd) 1871 int flags, struct nameidata *nd)
1754{ 1872{
1873 struct path path = {
1874 .mnt = nd->mnt,
1875 .dentry = dentry,
1876 };
1755 struct nfs4_state *state; 1877 struct nfs4_state *state;
1756 struct rpc_cred *cred; 1878 struct rpc_cred *cred;
1757 int status = 0; 1879 int status = 0;
@@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1761 status = PTR_ERR(cred); 1883 status = PTR_ERR(cred);
1762 goto out; 1884 goto out;
1763 } 1885 }
1764 state = nfs4_do_open(dir, dentry, flags, sattr, cred); 1886 state = nfs4_do_open(dir, &path, flags, sattr, cred);
1765 put_rpccred(cred); 1887 put_rpccred(cred);
1766 if (IS_ERR(state)) { 1888 if (IS_ERR(state)) {
1767 status = PTR_ERR(state); 1889 status = PTR_ERR(state);
@@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1773 status = nfs4_do_setattr(state->inode, &fattr, sattr, state); 1895 status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
1774 if (status == 0) 1896 if (status == 0)
1775 nfs_setattr_update_inode(state->inode, sattr); 1897 nfs_setattr_update_inode(state->inode, sattr);
1898 nfs_post_op_update_inode(state->inode, &fattr);
1776 } 1899 }
1777 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) 1900 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
1778 status = nfs4_intent_set_file(nd, dentry, state); 1901 status = nfs4_intent_set_file(nd, &path, state);
1779 else 1902 else
1780 nfs4_close_state(state, flags); 1903 nfs4_close_state(&path, state, flags);
1781out: 1904out:
1782 return status; 1905 return status;
1783} 1906}
@@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3008 if (status != 0) 3131 if (status != 0)
3009 goto out; 3132 goto out;
3010 lsp = request->fl_u.nfs4_fl.owner; 3133 lsp = request->fl_u.nfs4_fl.owner;
3011 arg.lock_owner.id = lsp->ls_id; 3134 arg.lock_owner.id = lsp->ls_id.id;
3012 status = rpc_call_sync(server->client, &msg, 0); 3135 status = rpc_call_sync(server->client, &msg, 0);
3013 switch (status) { 3136 switch (status) {
3014 case 0: 3137 case 0:
@@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3152{ 3275{
3153 struct nfs4_unlockdata *data; 3276 struct nfs4_unlockdata *data;
3154 3277
3278 /* Ensure this is an unlock - when canceling a lock, the
3279 * canceled lock is passed in, and it won't be an unlock.
3280 */
3281 fl->fl_type = F_UNLCK;
3282
3155 data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); 3283 data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
3156 if (data == NULL) { 3284 if (data == NULL) {
3157 nfs_free_seqid(seqid); 3285 nfs_free_seqid(seqid);
@@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3222 goto out_free; 3350 goto out_free;
3223 p->arg.lock_stateid = &lsp->ls_stateid; 3351 p->arg.lock_stateid = &lsp->ls_stateid;
3224 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 3352 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
3225 p->arg.lock_owner.id = lsp->ls_id; 3353 p->arg.lock_owner.id = lsp->ls_id.id;
3226 p->lsp = lsp; 3354 p->lsp = lsp;
3227 atomic_inc(&lsp->ls_count); 3355 atomic_inc(&lsp->ls_count);
3228 p->ctx = get_nfs_open_context(ctx); 3356 p->ctx = get_nfs_open_context(ctx);
@@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
3285 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, 3413 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
3286 sizeof(data->lsp->ls_stateid.data)); 3414 sizeof(data->lsp->ls_stateid.data));
3287 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; 3415 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
3288 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); 3416 renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
3289 } 3417 }
3290 nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid); 3418 nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
3291out: 3419out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8ed79d5c54f9..e9662ba81d86 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -38,12 +38,14 @@
38 * subsequent patch. 38 * subsequent patch.
39 */ 39 */
40 40
41#include <linux/kernel.h>
41#include <linux/slab.h> 42#include <linux/slab.h>
42#include <linux/smp_lock.h> 43#include <linux/smp_lock.h>
43#include <linux/nfs_fs.h> 44#include <linux/nfs_fs.h>
44#include <linux/nfs_idmap.h> 45#include <linux/nfs_idmap.h>
45#include <linux/kthread.h> 46#include <linux/kthread.h>
46#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/random.h>
47#include <linux/workqueue.h> 49#include <linux/workqueue.h>
48#include <linux/bitops.h> 50#include <linux/bitops.h>
49 51
@@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
69 return status; 71 return status;
70} 72}
71 73
72u32
73nfs4_alloc_lockowner_id(struct nfs_client *clp)
74{
75 return clp->cl_lockowner_id ++;
76}
77
78static struct nfs4_state_owner *
79nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
80{
81 struct nfs4_state_owner *sp = NULL;
82
83 if (!list_empty(&clp->cl_unused)) {
84 sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
85 atomic_inc(&sp->so_count);
86 sp->so_cred = cred;
87 list_move(&sp->so_list, &clp->cl_state_owners);
88 clp->cl_nunused--;
89 }
90 return sp;
91}
92
93struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) 74struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
94{ 75{
95 struct nfs4_state_owner *sp; 76 struct nfs4_state_owner *sp;
77 struct rb_node *pos;
96 struct rpc_cred *cred = NULL; 78 struct rpc_cred *cred = NULL;
97 79
98 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 80 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
81 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
99 if (list_empty(&sp->so_states)) 82 if (list_empty(&sp->so_states))
100 continue; 83 continue;
101 cred = get_rpccred(sp->so_cred); 84 cred = get_rpccred(sp->so_cred);
@@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
107static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) 90static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
108{ 91{
109 struct nfs4_state_owner *sp; 92 struct nfs4_state_owner *sp;
93 struct rb_node *pos;
110 94
111 if (!list_empty(&clp->cl_state_owners)) { 95 pos = rb_first(&clp->cl_state_owners);
112 sp = list_entry(clp->cl_state_owners.next, 96 if (pos != NULL) {
113 struct nfs4_state_owner, so_list); 97 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
114 return get_rpccred(sp->so_cred); 98 return get_rpccred(sp->so_cred);
115 } 99 }
116 return NULL; 100 return NULL;
117} 101}
118 102
103static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
104 __u64 minval, int maxbits)
105{
106 struct rb_node **p, *parent;
107 struct nfs_unique_id *pos;
108 __u64 mask = ~0ULL;
109
110 if (maxbits < 64)
111 mask = (1ULL << maxbits) - 1ULL;
112
113 /* Ensure distribution is more or less flat */
114 get_random_bytes(&new->id, sizeof(new->id));
115 new->id &= mask;
116 if (new->id < minval)
117 new->id += minval;
118retry:
119 p = &root->rb_node;
120 parent = NULL;
121
122 while (*p != NULL) {
123 parent = *p;
124 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
125
126 if (new->id < pos->id)
127 p = &(*p)->rb_left;
128 else if (new->id > pos->id)
129 p = &(*p)->rb_right;
130 else
131 goto id_exists;
132 }
133 rb_link_node(&new->rb_node, parent, p);
134 rb_insert_color(&new->rb_node, root);
135 return;
136id_exists:
137 for (;;) {
138 new->id++;
139 if (new->id < minval || (new->id & mask) != new->id) {
140 new->id = minval;
141 break;
142 }
143 parent = rb_next(parent);
144 if (parent == NULL)
145 break;
146 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
147 if (new->id < pos->id)
148 break;
149 }
150 goto retry;
151}
152
153static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
154{
155 rb_erase(&id->rb_node, root);
156}
157
119static struct nfs4_state_owner * 158static struct nfs4_state_owner *
120nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) 159nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
121{ 160{
161 struct nfs_client *clp = server->nfs_client;
162 struct rb_node **p = &clp->cl_state_owners.rb_node,
163 *parent = NULL;
122 struct nfs4_state_owner *sp, *res = NULL; 164 struct nfs4_state_owner *sp, *res = NULL;
123 165
124 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 166 while (*p != NULL) {
125 if (sp->so_cred != cred) 167 parent = *p;
168 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
169
170 if (server < sp->so_server) {
171 p = &parent->rb_left;
126 continue; 172 continue;
127 atomic_inc(&sp->so_count); 173 }
128 /* Move to the head of the list */ 174 if (server > sp->so_server) {
129 list_move(&sp->so_list, &clp->cl_state_owners); 175 p = &parent->rb_right;
130 res = sp; 176 continue;
131 break; 177 }
178 if (cred < sp->so_cred)
179 p = &parent->rb_left;
180 else if (cred > sp->so_cred)
181 p = &parent->rb_right;
182 else {
183 atomic_inc(&sp->so_count);
184 res = sp;
185 break;
186 }
132 } 187 }
133 return res; 188 return res;
134} 189}
135 190
191static struct nfs4_state_owner *
192nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
193{
194 struct rb_node **p = &clp->cl_state_owners.rb_node,
195 *parent = NULL;
196 struct nfs4_state_owner *sp;
197
198 while (*p != NULL) {
199 parent = *p;
200 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
201
202 if (new->so_server < sp->so_server) {
203 p = &parent->rb_left;
204 continue;
205 }
206 if (new->so_server > sp->so_server) {
207 p = &parent->rb_right;
208 continue;
209 }
210 if (new->so_cred < sp->so_cred)
211 p = &parent->rb_left;
212 else if (new->so_cred > sp->so_cred)
213 p = &parent->rb_right;
214 else {
215 atomic_inc(&sp->so_count);
216 return sp;
217 }
218 }
219 nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
220 rb_link_node(&new->so_client_node, parent, p);
221 rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
222 return new;
223}
224
225static void
226nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
227{
228 if (!RB_EMPTY_NODE(&sp->so_client_node))
229 rb_erase(&sp->so_client_node, &clp->cl_state_owners);
230 nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
231}
232
136/* 233/*
137 * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to 234 * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
138 * create a new state_owner. 235 * create a new state_owner.
@@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void)
160void 257void
161nfs4_drop_state_owner(struct nfs4_state_owner *sp) 258nfs4_drop_state_owner(struct nfs4_state_owner *sp)
162{ 259{
163 struct nfs_client *clp = sp->so_client; 260 if (!RB_EMPTY_NODE(&sp->so_client_node)) {
164 spin_lock(&clp->cl_lock); 261 struct nfs_client *clp = sp->so_client;
165 list_del_init(&sp->so_list); 262
166 spin_unlock(&clp->cl_lock); 263 spin_lock(&clp->cl_lock);
264 rb_erase(&sp->so_client_node, &clp->cl_state_owners);
265 RB_CLEAR_NODE(&sp->so_client_node);
266 spin_unlock(&clp->cl_lock);
267 }
167} 268}
168 269
169/* 270/*
@@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
175 struct nfs_client *clp = server->nfs_client; 276 struct nfs_client *clp = server->nfs_client;
176 struct nfs4_state_owner *sp, *new; 277 struct nfs4_state_owner *sp, *new;
177 278
178 get_rpccred(cred);
179 new = nfs4_alloc_state_owner();
180 spin_lock(&clp->cl_lock); 279 spin_lock(&clp->cl_lock);
181 sp = nfs4_find_state_owner(clp, cred); 280 sp = nfs4_find_state_owner(server, cred);
182 if (sp == NULL)
183 sp = nfs4_client_grab_unused(clp, cred);
184 if (sp == NULL && new != NULL) {
185 list_add(&new->so_list, &clp->cl_state_owners);
186 new->so_client = clp;
187 new->so_id = nfs4_alloc_lockowner_id(clp);
188 new->so_cred = cred;
189 sp = new;
190 new = NULL;
191 }
192 spin_unlock(&clp->cl_lock); 281 spin_unlock(&clp->cl_lock);
193 kfree(new);
194 if (sp != NULL) 282 if (sp != NULL)
195 return sp; 283 return sp;
196 put_rpccred(cred); 284 new = nfs4_alloc_state_owner();
197 return NULL; 285 if (new == NULL)
286 return NULL;
287 new->so_client = clp;
288 new->so_server = server;
289 new->so_cred = cred;
290 spin_lock(&clp->cl_lock);
291 sp = nfs4_insert_state_owner(clp, new);
292 spin_unlock(&clp->cl_lock);
293 if (sp == new)
294 get_rpccred(cred);
295 else
296 kfree(new);
297 return sp;
198} 298}
199 299
200/* 300/*
@@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
208 308
209 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 309 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
210 return; 310 return;
211 if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) 311 nfs4_remove_state_owner(clp, sp);
212 goto out_free;
213 if (list_empty(&sp->so_list))
214 goto out_free;
215 list_move(&sp->so_list, &clp->cl_unused);
216 clp->cl_nunused++;
217 spin_unlock(&clp->cl_lock);
218 put_rpccred(cred);
219 cred = NULL;
220 return;
221out_free:
222 list_del(&sp->so_list);
223 spin_unlock(&clp->cl_lock); 312 spin_unlock(&clp->cl_lock);
224 put_rpccred(cred); 313 put_rpccred(cred);
225 kfree(sp); 314 kfree(sp);
@@ -236,6 +325,7 @@ nfs4_alloc_open_state(void)
236 atomic_set(&state->count, 1); 325 atomic_set(&state->count, 1);
237 INIT_LIST_HEAD(&state->lock_states); 326 INIT_LIST_HEAD(&state->lock_states);
238 spin_lock_init(&state->state_lock); 327 spin_lock_init(&state->state_lock);
328 seqlock_init(&state->seqlock);
239 return state; 329 return state;
240} 330}
241 331
@@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
263 struct nfs4_state *state; 353 struct nfs4_state *state;
264 354
265 list_for_each_entry(state, &nfsi->open_states, inode_states) { 355 list_for_each_entry(state, &nfsi->open_states, inode_states) {
266 /* Is this in the process of being freed? */ 356 if (state->owner != owner)
267 if (state->state == 0)
268 continue; 357 continue;
269 if (state->owner == owner) { 358 if (atomic_inc_not_zero(&state->count))
270 atomic_inc(&state->count);
271 return state; 359 return state;
272 }
273 } 360 }
274 return NULL; 361 return NULL;
275} 362}
@@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
341/* 428/*
342 * Close the current file. 429 * Close the current file.
343 */ 430 */
344void nfs4_close_state(struct nfs4_state *state, mode_t mode) 431void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
345{ 432{
346 struct inode *inode = state->inode;
347 struct nfs4_state_owner *owner = state->owner; 433 struct nfs4_state_owner *owner = state->owner;
348 int oldstate, newstate = 0; 434 int call_close = 0;
435 int newstate;
349 436
350 atomic_inc(&owner->so_count); 437 atomic_inc(&owner->so_count);
351 /* Protect against nfs4_find_state() */ 438 /* Protect against nfs4_find_state() */
352 spin_lock(&owner->so_lock); 439 spin_lock(&owner->so_lock);
353 spin_lock(&inode->i_lock);
354 switch (mode & (FMODE_READ | FMODE_WRITE)) { 440 switch (mode & (FMODE_READ | FMODE_WRITE)) {
355 case FMODE_READ: 441 case FMODE_READ:
356 state->n_rdonly--; 442 state->n_rdonly--;
@@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
361 case FMODE_READ|FMODE_WRITE: 447 case FMODE_READ|FMODE_WRITE:
362 state->n_rdwr--; 448 state->n_rdwr--;
363 } 449 }
364 oldstate = newstate = state->state; 450 newstate = FMODE_READ|FMODE_WRITE;
365 if (state->n_rdwr == 0) { 451 if (state->n_rdwr == 0) {
366 if (state->n_rdonly == 0) 452 if (state->n_rdonly == 0) {
367 newstate &= ~FMODE_READ; 453 newstate &= ~FMODE_READ;
368 if (state->n_wronly == 0) 454 call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
455 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
456 }
457 if (state->n_wronly == 0) {
369 newstate &= ~FMODE_WRITE; 458 newstate &= ~FMODE_WRITE;
459 call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
460 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
461 }
462 if (newstate == 0)
463 clear_bit(NFS_DELEGATED_STATE, &state->flags);
370 } 464 }
371 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { 465 nfs4_state_set_mode_locked(state, newstate);
372 nfs4_state_set_mode_locked(state, newstate);
373 oldstate = newstate;
374 }
375 spin_unlock(&inode->i_lock);
376 spin_unlock(&owner->so_lock); 466 spin_unlock(&owner->so_lock);
377 467
378 if (oldstate != newstate && nfs4_do_close(inode, state) == 0) 468 if (!call_close) {
379 return; 469 nfs4_put_open_state(state);
380 nfs4_put_open_state(state); 470 nfs4_put_state_owner(owner);
381 nfs4_put_state_owner(owner); 471 } else
472 nfs4_do_close(path, state);
382} 473}
383 474
384/* 475/*
@@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
415 atomic_set(&lsp->ls_count, 1); 506 atomic_set(&lsp->ls_count, 1);
416 lsp->ls_owner = fl_owner; 507 lsp->ls_owner = fl_owner;
417 spin_lock(&clp->cl_lock); 508 spin_lock(&clp->cl_lock);
418 lsp->ls_id = nfs4_alloc_lockowner_id(clp); 509 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
419 spin_unlock(&clp->cl_lock); 510 spin_unlock(&clp->cl_lock);
420 INIT_LIST_HEAD(&lsp->ls_locks); 511 INIT_LIST_HEAD(&lsp->ls_locks);
421 return lsp; 512 return lsp;
422} 513}
423 514
515static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
516{
517 struct nfs_client *clp = lsp->ls_state->owner->so_client;
518
519 spin_lock(&clp->cl_lock);
520 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
521 spin_unlock(&clp->cl_lock);
522 kfree(lsp);
523}
524
424/* 525/*
425 * Return a compatible lock_state. If no initialized lock_state structure 526 * Return a compatible lock_state. If no initialized lock_state structure
426 * exists, return an uninitialized one. 527 * exists, return an uninitialized one.
@@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
450 return NULL; 551 return NULL;
451 } 552 }
452 spin_unlock(&state->state_lock); 553 spin_unlock(&state->state_lock);
453 kfree(new); 554 if (new != NULL)
555 nfs4_free_lock_state(new);
454 return lsp; 556 return lsp;
455} 557}
456 558
@@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
471 if (list_empty(&state->lock_states)) 573 if (list_empty(&state->lock_states))
472 clear_bit(LK_STATE_IN_USE, &state->flags); 574 clear_bit(LK_STATE_IN_USE, &state->flags);
473 spin_unlock(&state->state_lock); 575 spin_unlock(&state->state_lock);
474 kfree(lsp); 576 nfs4_free_lock_state(lsp);
475} 577}
476 578
477static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) 579static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
513void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) 615void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
514{ 616{
515 struct nfs4_lock_state *lsp; 617 struct nfs4_lock_state *lsp;
618 int seq;
516 619
517 memcpy(dst, &state->stateid, sizeof(*dst)); 620 do {
621 seq = read_seqbegin(&state->seqlock);
622 memcpy(dst, &state->stateid, sizeof(*dst));
623 } while (read_seqretry(&state->seqlock, seq));
518 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) 624 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
519 return; 625 return;
520 626
@@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
557 * failed with a seqid incrementing error - 663 * failed with a seqid incrementing error -
558 * see comments nfs_fs.h:seqid_mutating_error() 664 * see comments nfs_fs.h:seqid_mutating_error()
559 */ 665 */
560static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) 666static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
561{ 667{
562 switch (status) { 668 switch (status) {
563 case 0: 669 case 0:
564 break; 670 break;
565 case -NFS4ERR_BAD_SEQID: 671 case -NFS4ERR_BAD_SEQID:
672 if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
673 return;
674 printk(KERN_WARNING "NFS: v4 server returned a bad"
675 "sequence-id error on an"
676 "unconfirmed sequence %p!\n",
677 seqid->sequence);
566 case -NFS4ERR_STALE_CLIENTID: 678 case -NFS4ERR_STALE_CLIENTID:
567 case -NFS4ERR_STALE_STATEID: 679 case -NFS4ERR_STALE_STATEID:
568 case -NFS4ERR_BAD_STATEID: 680 case -NFS4ERR_BAD_STATEID:
@@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
586 struct nfs4_state_owner, so_seqid); 698 struct nfs4_state_owner, so_seqid);
587 nfs4_drop_state_owner(sp); 699 nfs4_drop_state_owner(sp);
588 } 700 }
589 return nfs_increment_seqid(status, seqid); 701 nfs_increment_seqid(status, seqid);
590} 702}
591 703
592/* 704/*
@@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
596 */ 708 */
597void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) 709void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
598{ 710{
599 return nfs_increment_seqid(status, seqid); 711 nfs_increment_seqid(status, seqid);
600} 712}
601 713
602int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) 714int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
@@ -748,15 +860,21 @@ out_err:
748static void nfs4_state_mark_reclaim(struct nfs_client *clp) 860static void nfs4_state_mark_reclaim(struct nfs_client *clp)
749{ 861{
750 struct nfs4_state_owner *sp; 862 struct nfs4_state_owner *sp;
863 struct rb_node *pos;
751 struct nfs4_state *state; 864 struct nfs4_state *state;
752 struct nfs4_lock_state *lock; 865 struct nfs4_lock_state *lock;
753 866
754 /* Reset all sequence ids to zero */ 867 /* Reset all sequence ids to zero */
755 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 868 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
869 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
756 sp->so_seqid.counter = 0; 870 sp->so_seqid.counter = 0;
757 sp->so_seqid.flags = 0; 871 sp->so_seqid.flags = 0;
758 spin_lock(&sp->so_lock); 872 spin_lock(&sp->so_lock);
759 list_for_each_entry(state, &sp->so_states, open_states) { 873 list_for_each_entry(state, &sp->so_states, open_states) {
874 clear_bit(NFS_DELEGATED_STATE, &state->flags);
875 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
876 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
877 clear_bit(NFS_O_RDWR_STATE, &state->flags);
760 list_for_each_entry(lock, &state->lock_states, ls_locks) { 878 list_for_each_entry(lock, &state->lock_states, ls_locks) {
761 lock->ls_seqid.counter = 0; 879 lock->ls_seqid.counter = 0;
762 lock->ls_seqid.flags = 0; 880 lock->ls_seqid.flags = 0;
@@ -771,6 +889,7 @@ static int reclaimer(void *ptr)
771{ 889{
772 struct nfs_client *clp = ptr; 890 struct nfs_client *clp = ptr;
773 struct nfs4_state_owner *sp; 891 struct nfs4_state_owner *sp;
892 struct rb_node *pos;
774 struct nfs4_state_recovery_ops *ops; 893 struct nfs4_state_recovery_ops *ops;
775 struct rpc_cred *cred; 894 struct rpc_cred *cred;
776 int status = 0; 895 int status = 0;
@@ -816,7 +935,8 @@ restart_loop:
816 /* Mark all delegations for reclaim */ 935 /* Mark all delegations for reclaim */
817 nfs_delegation_mark_reclaim(clp); 936 nfs_delegation_mark_reclaim(clp);
818 /* Note: list is protected by exclusive lock on cl->cl_sem */ 937 /* Note: list is protected by exclusive lock on cl->cl_sem */
819 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 938 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
939 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
820 status = nfs4_reclaim_open_state(ops, sp); 940 status = nfs4_reclaim_open_state(ops, sp);
821 if (status < 0) { 941 if (status < 0) {
822 if (status == -NFS4ERR_NO_GRACE) { 942 if (status == -NFS4ERR_NO_GRACE) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8003c91ccb9a..c08738441f73 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int);
68#endif 68#endif
69 69
70/* lock,open owner id: 70/* lock,open owner id:
71 * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) 71 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
72 */ 72 */
73#define owner_id_maxsz (1 + 1) 73#define open_owner_id_maxsz (1 + 4)
74#define lock_owner_id_maxsz (1 + 4)
74#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 75#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
75#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 76#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
76#define op_encode_hdr_maxsz (1) 77#define op_encode_hdr_maxsz (1)
@@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int);
87#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) 88#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
88#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) 89#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
89#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) 90#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
91#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
92#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
90/* This is based on getfattr, which uses the most attributes: */ 93/* This is based on getfattr, which uses the most attributes: */
91#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ 94#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
92 3 + 3 + 3 + 2 * nfs4_name_maxsz)) 95 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
93#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ 96#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
94 nfs4_fattr_value_maxsz) 97 nfs4_fattr_value_maxsz)
95#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) 98#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int);
116 3 + (NFS4_VERIFIER_SIZE >> 2)) 119 3 + (NFS4_VERIFIER_SIZE >> 2))
117#define decode_setclientid_confirm_maxsz \ 120#define decode_setclientid_confirm_maxsz \
118 (op_decode_hdr_maxsz) 121 (op_decode_hdr_maxsz)
119#define encode_lookup_maxsz (op_encode_hdr_maxsz + \ 122#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
120 1 + ((3 + NFS4_FHSIZE) >> 2)) 123#define decode_lookup_maxsz (op_decode_hdr_maxsz)
124#define encode_share_access_maxsz \
125 (2)
126#define encode_createmode_maxsz (1 + nfs4_fattr_maxsz)
127#define encode_opentype_maxsz (1 + encode_createmode_maxsz)
128#define encode_claim_null_maxsz (1 + nfs4_name_maxsz)
129#define encode_open_maxsz (op_encode_hdr_maxsz + \
130 2 + encode_share_access_maxsz + 2 + \
131 open_owner_id_maxsz + \
132 encode_opentype_maxsz + \
133 encode_claim_null_maxsz)
134#define decode_ace_maxsz (3 + nfs4_owner_maxsz)
135#define decode_delegation_maxsz (1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \
136 decode_ace_maxsz)
137#define decode_change_info_maxsz (5)
138#define decode_open_maxsz (op_decode_hdr_maxsz + \
139 XDR_QUADLEN(NFS4_STATEID_SIZE) + \
140 decode_change_info_maxsz + 1 + \
141 nfs4_fattr_bitmap_maxsz + \
142 decode_delegation_maxsz)
121#define encode_remove_maxsz (op_encode_hdr_maxsz + \ 143#define encode_remove_maxsz (op_encode_hdr_maxsz + \
122 nfs4_name_maxsz) 144 nfs4_name_maxsz)
123#define encode_rename_maxsz (op_encode_hdr_maxsz + \ 145#define encode_rename_maxsz (op_encode_hdr_maxsz + \
@@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int);
134#define encode_create_maxsz (op_encode_hdr_maxsz + \ 156#define encode_create_maxsz (op_encode_hdr_maxsz + \
135 2 + nfs4_name_maxsz + \ 157 2 + nfs4_name_maxsz + \
136 nfs4_fattr_maxsz) 158 nfs4_fattr_maxsz)
137#define decode_create_maxsz (op_decode_hdr_maxsz + 8) 159#define decode_create_maxsz (op_decode_hdr_maxsz + \
160 decode_change_info_maxsz + \
161 nfs4_fattr_bitmap_maxsz)
138#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) 162#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
139#define decode_delegreturn_maxsz (op_decode_hdr_maxsz) 163#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
164#define encode_fs_locations_maxsz \
165 (encode_getattr_maxsz)
166#define decode_fs_locations_maxsz \
167 (0)
140#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ 168#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */
141#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ 169#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */
142#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ 170#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \
@@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int);
174 op_decode_hdr_maxsz + 2 + \ 202 op_decode_hdr_maxsz + 2 + \
175 decode_getattr_maxsz) 203 decode_getattr_maxsz)
176#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ 204#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
177 encode_putfh_maxsz + \ 205 encode_putfh_maxsz + \
178 op_encode_hdr_maxsz + \ 206 encode_savefh_maxsz + \
179 13 + 3 + 2 + 64 + \ 207 encode_open_maxsz + \
180 encode_getattr_maxsz + \ 208 encode_getfh_maxsz + \
181 encode_getfh_maxsz) 209 encode_getattr_maxsz + \
210 encode_restorefh_maxsz + \
211 encode_getattr_maxsz)
182#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ 212#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
183 decode_putfh_maxsz + \ 213 decode_putfh_maxsz + \
184 op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \ 214 decode_savefh_maxsz + \
185 decode_getattr_maxsz + \ 215 decode_open_maxsz + \
186 decode_getfh_maxsz) 216 decode_getfh_maxsz + \
217 decode_getattr_maxsz + \
218 decode_restorefh_maxsz + \
219 decode_getattr_maxsz)
187#define NFS4_enc_open_confirm_sz \ 220#define NFS4_enc_open_confirm_sz \
188 (compound_encode_hdr_maxsz + \ 221 (compound_encode_hdr_maxsz + \
189 encode_putfh_maxsz + \ 222 encode_putfh_maxsz + \
@@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int);
193 op_decode_hdr_maxsz + 4) 226 op_decode_hdr_maxsz + 4)
194#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ 227#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \
195 encode_putfh_maxsz + \ 228 encode_putfh_maxsz + \
196 op_encode_hdr_maxsz + \ 229 encode_open_maxsz + \
197 11) 230 encode_getattr_maxsz)
198#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ 231#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
199 decode_putfh_maxsz + \ 232 decode_putfh_maxsz + \
200 op_decode_hdr_maxsz + \ 233 decode_open_maxsz + \
201 4 + 5 + 2 + 3) 234 decode_getattr_maxsz)
202#define NFS4_enc_open_downgrade_sz \ 235#define NFS4_enc_open_downgrade_sz \
203 (compound_encode_hdr_maxsz + \ 236 (compound_encode_hdr_maxsz + \
204 encode_putfh_maxsz + \ 237 encode_putfh_maxsz + \
@@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int);
256 op_encode_hdr_maxsz + \ 289 op_encode_hdr_maxsz + \
257 1 + 1 + 2 + 2 + \ 290 1 + 1 + 2 + 2 + \
258 1 + 4 + 1 + 2 + \ 291 1 + 4 + 1 + 2 + \
259 owner_id_maxsz) 292 lock_owner_id_maxsz)
260#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ 293#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \
261 decode_putfh_maxsz + \ 294 decode_putfh_maxsz + \
262 decode_getattr_maxsz + \ 295 decode_getattr_maxsz + \
263 op_decode_hdr_maxsz + \ 296 op_decode_hdr_maxsz + \
264 2 + 2 + 1 + 2 + \ 297 2 + 2 + 1 + 2 + \
265 owner_id_maxsz) 298 lock_owner_id_maxsz)
266#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ 299#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \
267 encode_putfh_maxsz + \ 300 encode_putfh_maxsz + \
268 encode_getattr_maxsz + \ 301 encode_getattr_maxsz + \
269 op_encode_hdr_maxsz + \ 302 op_encode_hdr_maxsz + \
270 1 + 2 + 2 + 2 + \ 303 1 + 2 + 2 + 2 + \
271 owner_id_maxsz) 304 lock_owner_id_maxsz)
272#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz) 305#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz)
273#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ 306#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \
274 encode_putfh_maxsz + \ 307 encode_putfh_maxsz + \
@@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int);
298 encode_getfh_maxsz) 331 encode_getfh_maxsz)
299#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ 332#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \
300 decode_putfh_maxsz + \ 333 decode_putfh_maxsz + \
301 op_decode_hdr_maxsz + \ 334 decode_lookup_maxsz + \
302 decode_getattr_maxsz + \ 335 decode_getattr_maxsz + \
303 decode_getfh_maxsz) 336 decode_getfh_maxsz)
304#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ 337#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
@@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int);
417#define NFS4_enc_fs_locations_sz \ 450#define NFS4_enc_fs_locations_sz \
418 (compound_encode_hdr_maxsz + \ 451 (compound_encode_hdr_maxsz + \
419 encode_putfh_maxsz + \ 452 encode_putfh_maxsz + \
420 encode_getattr_maxsz) 453 encode_lookup_maxsz + \
454 encode_fs_locations_maxsz)
421#define NFS4_dec_fs_locations_sz \ 455#define NFS4_dec_fs_locations_sz \
422 (compound_decode_hdr_maxsz + \ 456 (compound_decode_hdr_maxsz + \
423 decode_putfh_maxsz + \ 457 decode_putfh_maxsz + \
424 op_decode_hdr_maxsz + \ 458 decode_lookup_maxsz + \
425 nfs4_fattr_bitmap_maxsz) 459 decode_fs_locations_maxsz)
426 460
427static struct { 461static struct {
428 unsigned int mode; 462 unsigned int mode;
@@ -793,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
793 WRITE64(nfs4_lock_length(args->fl)); 827 WRITE64(nfs4_lock_length(args->fl));
794 WRITE32(args->new_lock_owner); 828 WRITE32(args->new_lock_owner);
795 if (args->new_lock_owner){ 829 if (args->new_lock_owner){
796 RESERVE_SPACE(4+NFS4_STATEID_SIZE+20); 830 RESERVE_SPACE(4+NFS4_STATEID_SIZE+32);
797 WRITE32(args->open_seqid->sequence->counter); 831 WRITE32(args->open_seqid->sequence->counter);
798 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); 832 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
799 WRITE32(args->lock_seqid->sequence->counter); 833 WRITE32(args->lock_seqid->sequence->counter);
800 WRITE64(args->lock_owner.clientid); 834 WRITE64(args->lock_owner.clientid);
801 WRITE32(4); 835 WRITE32(16);
802 WRITE32(args->lock_owner.id); 836 WRITEMEM("lock id:", 8);
837 WRITE64(args->lock_owner.id);
803 } 838 }
804 else { 839 else {
805 RESERVE_SPACE(NFS4_STATEID_SIZE+4); 840 RESERVE_SPACE(NFS4_STATEID_SIZE+4);
@@ -814,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
814{ 849{
815 __be32 *p; 850 __be32 *p;
816 851
817 RESERVE_SPACE(40); 852 RESERVE_SPACE(52);
818 WRITE32(OP_LOCKT); 853 WRITE32(OP_LOCKT);
819 WRITE32(nfs4_lock_type(args->fl, 0)); 854 WRITE32(nfs4_lock_type(args->fl, 0));
820 WRITE64(args->fl->fl_start); 855 WRITE64(args->fl->fl_start);
821 WRITE64(nfs4_lock_length(args->fl)); 856 WRITE64(nfs4_lock_length(args->fl));
822 WRITE64(args->lock_owner.clientid); 857 WRITE64(args->lock_owner.clientid);
823 WRITE32(4); 858 WRITE32(16);
824 WRITE32(args->lock_owner.id); 859 WRITEMEM("lock id:", 8);
860 WRITE64(args->lock_owner.id);
825 861
826 return 0; 862 return 0;
827} 863}
@@ -886,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
886 WRITE32(OP_OPEN); 922 WRITE32(OP_OPEN);
887 WRITE32(arg->seqid->sequence->counter); 923 WRITE32(arg->seqid->sequence->counter);
888 encode_share_access(xdr, arg->open_flags); 924 encode_share_access(xdr, arg->open_flags);
889 RESERVE_SPACE(16); 925 RESERVE_SPACE(28);
890 WRITE64(arg->clientid); 926 WRITE64(arg->clientid);
891 WRITE32(4); 927 WRITE32(16);
892 WRITE32(arg->id); 928 WRITEMEM("open id:", 8);
929 WRITE64(arg->id);
893} 930}
894 931
895static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) 932static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1071,7 +1108,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
1071 1108
1072static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) 1109static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
1073{ 1110{
1074 struct rpc_auth *auth = req->rq_task->tk_auth; 1111 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1075 uint32_t attrs[2] = { 1112 uint32_t attrs[2] = {
1076 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, 1113 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
1077 FATTR4_WORD1_MOUNTED_ON_FILEID, 1114 FATTR4_WORD1_MOUNTED_ON_FILEID,
@@ -1117,7 +1154,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1117 1154
1118static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) 1155static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
1119{ 1156{
1120 struct rpc_auth *auth = req->rq_task->tk_auth; 1157 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1121 unsigned int replen; 1158 unsigned int replen;
1122 __be32 *p; 1159 __be32 *p;
1123 1160
@@ -1735,7 +1772,7 @@ out:
1735 */ 1772 */
1736static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 1773static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
1737{ 1774{
1738 struct rpc_auth *auth = req->rq_task->tk_auth; 1775 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1739 struct xdr_stream xdr; 1776 struct xdr_stream xdr;
1740 struct compound_hdr hdr = { 1777 struct compound_hdr hdr = {
1741 .nops = 2, 1778 .nops = 2,
@@ -1795,7 +1832,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
1795 struct nfs_getaclargs *args) 1832 struct nfs_getaclargs *args)
1796{ 1833{
1797 struct xdr_stream xdr; 1834 struct xdr_stream xdr;
1798 struct rpc_auth *auth = req->rq_task->tk_auth; 1835 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1799 struct compound_hdr hdr = { 1836 struct compound_hdr hdr = {
1800 .nops = 2, 1837 .nops = 2,
1801 }; 1838 };
@@ -2030,7 +2067,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
2030 struct compound_hdr hdr = { 2067 struct compound_hdr hdr = {
2031 .nops = 3, 2068 .nops = 3,
2032 }; 2069 };
2033 struct rpc_auth *auth = req->rq_task->tk_auth; 2070 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
2034 int replen; 2071 int replen;
2035 int status; 2072 int status;
2036 2073
@@ -3269,7 +3306,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3269static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3306static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3270{ 3307{
3271 __be32 *p; 3308 __be32 *p;
3272 uint32_t bmlen; 3309 uint32_t savewords, bmlen, i;
3273 int status; 3310 int status;
3274 3311
3275 status = decode_op_hdr(xdr, OP_OPEN); 3312 status = decode_op_hdr(xdr, OP_OPEN);
@@ -3287,7 +3324,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3287 goto xdr_error; 3324 goto xdr_error;
3288 3325
3289 READ_BUF(bmlen << 2); 3326 READ_BUF(bmlen << 2);
3290 p += bmlen; 3327 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
3328 for (i = 0; i < savewords; ++i)
3329 READ32(res->attrset[i]);
3330 for (; i < NFS4_BITMAP_SIZE; i++)
3331 res->attrset[i] = 0;
3332
3291 return decode_delegation(xdr, res); 3333 return decode_delegation(xdr, res);
3292xdr_error: 3334xdr_error:
3293 dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen); 3335 dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 49d1008ce1d7..3490322d1145 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto)
428 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", 428 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
429 program, version, NIPQUAD(servaddr)); 429 program, version, NIPQUAD(servaddr));
430 set_sockaddr(&sin, servaddr, 0); 430 set_sockaddr(&sin, servaddr, 0);
431 return rpcb_getport_external(&sin, program, version, proto); 431 return rpcb_getport_sync(&sin, program, version, proto);
432} 432}
433 433
434 434
@@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void)
496 NFS_MNT3_VERSION : NFS_MNT_VERSION; 496 NFS_MNT3_VERSION : NFS_MNT_VERSION;
497 497
498 set_sockaddr(&sin, servaddr, htons(mount_port)); 498 set_sockaddr(&sin, servaddr, htons(mount_port));
499 status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol); 499 status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL,
500 nfs_path, version, protocol, &fh);
500 if (status < 0) 501 if (status < 0)
501 printk(KERN_ERR "Root-NFS: Server returned error %d " 502 printk(KERN_ERR "Root-NFS: Server returned error %d "
502 "while mounting %s\n", status, nfs_path); 503 "while mounting %s\n", status, nfs_path);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index c5bb51a29e80..f56dae5216f4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
85 req->wb_offset = offset; 85 req->wb_offset = offset;
86 req->wb_pgbase = offset; 86 req->wb_pgbase = offset;
87 req->wb_bytes = count; 87 req->wb_bytes = count;
88 atomic_set(&req->wb_count, 1);
89 req->wb_context = get_nfs_open_context(ctx); 88 req->wb_context = get_nfs_open_context(ctx);
90 89 kref_init(&req->wb_kref);
91 return req; 90 return req;
92} 91}
93 92
@@ -109,30 +108,31 @@ void nfs_unlock_request(struct nfs_page *req)
109} 108}
110 109
111/** 110/**
112 * nfs_set_page_writeback_locked - Lock a request for writeback 111 * nfs_set_page_tag_locked - Tag a request as locked
113 * @req: 112 * @req:
114 */ 113 */
115int nfs_set_page_writeback_locked(struct nfs_page *req) 114static int nfs_set_page_tag_locked(struct nfs_page *req)
116{ 115{
117 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); 116 struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
118 117
119 if (!nfs_lock_request(req)) 118 if (!nfs_lock_request(req))
120 return 0; 119 return 0;
121 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); 120 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
122 return 1; 121 return 1;
123} 122}
124 123
125/** 124/**
126 * nfs_clear_page_writeback - Unlock request and wake up sleepers 125 * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
127 */ 126 */
128void nfs_clear_page_writeback(struct nfs_page *req) 127void nfs_clear_page_tag_locked(struct nfs_page *req)
129{ 128{
130 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); 129 struct inode *inode = req->wb_context->path.dentry->d_inode;
130 struct nfs_inode *nfsi = NFS_I(inode);
131 131
132 if (req->wb_page != NULL) { 132 if (req->wb_page != NULL) {
133 spin_lock(&nfsi->req_lock); 133 spin_lock(&inode->i_lock);
134 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); 134 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
135 spin_unlock(&nfsi->req_lock); 135 spin_unlock(&inode->i_lock);
136 } 136 }
137 nfs_unlock_request(req); 137 nfs_unlock_request(req);
138} 138}
@@ -160,11 +160,9 @@ void nfs_clear_request(struct nfs_page *req)
160 * 160 *
161 * Note: Should never be called with the spinlock held! 161 * Note: Should never be called with the spinlock held!
162 */ 162 */
163void 163static void nfs_free_request(struct kref *kref)
164nfs_release_request(struct nfs_page *req)
165{ 164{
166 if (!atomic_dec_and_test(&req->wb_count)) 165 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
167 return;
168 166
169 /* Release struct file or cached credential */ 167 /* Release struct file or cached credential */
170 nfs_clear_request(req); 168 nfs_clear_request(req);
@@ -172,6 +170,11 @@ nfs_release_request(struct nfs_page *req)
172 nfs_page_free(req); 170 nfs_page_free(req);
173} 171}
174 172
173void nfs_release_request(struct nfs_page *req)
174{
175 kref_put(&req->wb_kref, nfs_free_request);
176}
177
175static int nfs_wait_bit_interruptible(void *word) 178static int nfs_wait_bit_interruptible(void *word)
176{ 179{
177 int ret = 0; 180 int ret = 0;
@@ -193,7 +196,7 @@ static int nfs_wait_bit_interruptible(void *word)
193int 196int
194nfs_wait_on_request(struct nfs_page *req) 197nfs_wait_on_request(struct nfs_page *req)
195{ 198{
196 struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); 199 struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
197 sigset_t oldmask; 200 sigset_t oldmask;
198 int ret = 0; 201 int ret = 0;
199 202
@@ -379,20 +382,20 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
379/** 382/**
380 * nfs_scan_list - Scan a list for matching requests 383 * nfs_scan_list - Scan a list for matching requests
381 * @nfsi: NFS inode 384 * @nfsi: NFS inode
382 * @head: One of the NFS inode request lists
383 * @dst: Destination list 385 * @dst: Destination list
384 * @idx_start: lower bound of page->index to scan 386 * @idx_start: lower bound of page->index to scan
385 * @npages: idx_start + npages sets the upper bound to scan. 387 * @npages: idx_start + npages sets the upper bound to scan.
388 * @tag: tag to scan for
386 * 389 *
387 * Moves elements from one of the inode request lists. 390 * Moves elements from one of the inode request lists.
388 * If the number of requests is set to 0, the entire address_space 391 * If the number of requests is set to 0, the entire address_space
389 * starting at index idx_start, is scanned. 392 * starting at index idx_start, is scanned.
390 * The requests are *not* checked to ensure that they form a contiguous set. 393 * The requests are *not* checked to ensure that they form a contiguous set.
391 * You must be holding the inode's req_lock when calling this function 394 * You must be holding the inode's i_lock when calling this function
392 */ 395 */
393int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, 396int nfs_scan_list(struct nfs_inode *nfsi,
394 struct list_head *dst, pgoff_t idx_start, 397 struct list_head *dst, pgoff_t idx_start,
395 unsigned int npages) 398 unsigned int npages, int tag)
396{ 399{
397 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; 400 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
398 struct nfs_page *req; 401 struct nfs_page *req;
@@ -407,9 +410,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
407 idx_end = idx_start + npages - 1; 410 idx_end = idx_start + npages - 1;
408 411
409 for (;;) { 412 for (;;) {
410 found = radix_tree_gang_lookup(&nfsi->nfs_page_tree, 413 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
411 (void **)&pgvec[0], idx_start, 414 (void **)&pgvec[0], idx_start,
412 NFS_SCAN_MAXENTRIES); 415 NFS_SCAN_MAXENTRIES, tag);
413 if (found <= 0) 416 if (found <= 0)
414 break; 417 break;
415 for (i = 0; i < found; i++) { 418 for (i = 0; i < found; i++) {
@@ -417,15 +420,18 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
417 if (req->wb_index > idx_end) 420 if (req->wb_index > idx_end)
418 goto out; 421 goto out;
419 idx_start = req->wb_index + 1; 422 idx_start = req->wb_index + 1;
420 if (req->wb_list_head != head) 423 if (nfs_set_page_tag_locked(req)) {
421 continue;
422 if (nfs_set_page_writeback_locked(req)) {
423 nfs_list_remove_request(req); 424 nfs_list_remove_request(req);
425 radix_tree_tag_clear(&nfsi->nfs_page_tree,
426 req->wb_index, tag);
424 nfs_list_add_request(req, dst); 427 nfs_list_add_request(req, dst);
425 res++; 428 res++;
429 if (res == INT_MAX)
430 goto out;
426 } 431 }
427 } 432 }
428 433 /* for latency reduction */
434 cond_resched_lock(&nfsi->vfs_inode.i_lock);
429 } 435 }
430out: 436out:
431 return res; 437 return res;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7bd7cb95c034..6ae2e58ed05a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req)
145 unlock_page(req->wb_page); 145 unlock_page(req->wb_page);
146 146
147 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 147 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
148 req->wb_context->dentry->d_inode->i_sb->s_id, 148 req->wb_context->path.dentry->d_inode->i_sb->s_id,
149 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 149 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
150 req->wb_bytes, 150 req->wb_bytes,
151 (long long)req_offset(req)); 151 (long long)req_offset(req));
152 nfs_clear_request(req); 152 nfs_clear_request(req);
@@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
164 int flags; 164 int flags;
165 165
166 data->req = req; 166 data->req = req;
167 data->inode = inode = req->wb_context->dentry->d_inode; 167 data->inode = inode = req->wb_context->path.dentry->d_inode;
168 data->cred = req->wb_context->cred; 168 data->cred = req->wb_context->cred;
169 169
170 data->args.fh = NFS_FH(inode); 170 data->args.fh = NFS_FH(inode);
@@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page)
483 */ 483 */
484 error = nfs_wb_page(inode, page); 484 error = nfs_wb_page(inode, page);
485 if (error) 485 if (error)
486 goto out_error; 486 goto out_unlock;
487 if (PageUptodate(page))
488 goto out_unlock;
487 489
488 error = -ESTALE; 490 error = -ESTALE;
489 if (NFS_STALE(inode)) 491 if (NFS_STALE(inode))
490 goto out_error; 492 goto out_unlock;
491 493
492 if (file == NULL) { 494 if (file == NULL) {
493 error = -EBADF; 495 error = -EBADF;
494 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 496 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
495 if (ctx == NULL) 497 if (ctx == NULL)
496 goto out_error; 498 goto out_unlock;
497 } else 499 } else
498 ctx = get_nfs_open_context((struct nfs_open_context *) 500 ctx = get_nfs_open_context((struct nfs_open_context *)
499 file->private_data); 501 file->private_data);
@@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page)
502 504
503 put_nfs_open_context(ctx); 505 put_nfs_open_context(ctx);
504 return error; 506 return error;
505 507out_unlock:
506out_error:
507 unlock_page(page); 508 unlock_page(page);
508 return error; 509 return error;
509} 510}
@@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page)
520 struct inode *inode = page->mapping->host; 521 struct inode *inode = page->mapping->host;
521 struct nfs_page *new; 522 struct nfs_page *new;
522 unsigned int len; 523 unsigned int len;
524 int error;
525
526 error = nfs_wb_page(inode, page);
527 if (error)
528 goto out_unlock;
529 if (PageUptodate(page))
530 goto out_unlock;
523 531
524 nfs_wb_page(inode, page);
525 len = nfs_page_length(page); 532 len = nfs_page_length(page);
526 if (len == 0) 533 if (len == 0)
527 return nfs_return_empty_page(page); 534 return nfs_return_empty_page(page);
535
528 new = nfs_create_request(desc->ctx, inode, page, 0, len); 536 new = nfs_create_request(desc->ctx, inode, page, 0, len);
529 if (IS_ERR(new)) { 537 if (IS_ERR(new))
530 SetPageError(page); 538 goto out_error;
531 unlock_page(page); 539
532 return PTR_ERR(new);
533 }
534 if (len < PAGE_CACHE_SIZE) 540 if (len < PAGE_CACHE_SIZE)
535 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); 541 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
536 nfs_pageio_add_request(desc->pgio, new); 542 nfs_pageio_add_request(desc->pgio, new);
537 return 0; 543 return 0;
544out_error:
545 error = PTR_ERR(new);
546 SetPageError(page);
547out_unlock:
548 unlock_page(page);
549 return error;
538} 550}
539 551
540int nfs_readpages(struct file *filp, struct address_space *mapping, 552int nfs_readpages(struct file *filp, struct address_space *mapping,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ca20d3cc2609..a2b1af89ca1a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -45,6 +45,7 @@
45#include <linux/inet.h> 45#include <linux/inet.h>
46#include <linux/nfs_xdr.h> 46#include <linux/nfs_xdr.h>
47#include <linux/magic.h> 47#include <linux/magic.h>
48#include <linux/parser.h>
48 49
49#include <asm/system.h> 50#include <asm/system.h>
50#include <asm/uaccess.h> 51#include <asm/uaccess.h>
@@ -57,6 +58,167 @@
57 58
58#define NFSDBG_FACILITY NFSDBG_VFS 59#define NFSDBG_FACILITY NFSDBG_VFS
59 60
61
62struct nfs_parsed_mount_data {
63 int flags;
64 int rsize, wsize;
65 int timeo, retrans;
66 int acregmin, acregmax,
67 acdirmin, acdirmax;
68 int namlen;
69 unsigned int bsize;
70 unsigned int auth_flavor_len;
71 rpc_authflavor_t auth_flavors[1];
72 char *client_address;
73
74 struct {
75 struct sockaddr_in address;
76 unsigned int program;
77 unsigned int version;
78 unsigned short port;
79 int protocol;
80 } mount_server;
81
82 struct {
83 struct sockaddr_in address;
84 char *hostname;
85 char *export_path;
86 unsigned int program;
87 int protocol;
88 } nfs_server;
89};
90
91enum {
92 /* Mount options that take no arguments */
93 Opt_soft, Opt_hard,
94 Opt_intr, Opt_nointr,
95 Opt_posix, Opt_noposix,
96 Opt_cto, Opt_nocto,
97 Opt_ac, Opt_noac,
98 Opt_lock, Opt_nolock,
99 Opt_v2, Opt_v3,
100 Opt_udp, Opt_tcp,
101 Opt_acl, Opt_noacl,
102 Opt_rdirplus, Opt_nordirplus,
103 Opt_sharecache, Opt_nosharecache,
104
105 /* Mount options that take integer arguments */
106 Opt_port,
107 Opt_rsize, Opt_wsize, Opt_bsize,
108 Opt_timeo, Opt_retrans,
109 Opt_acregmin, Opt_acregmax,
110 Opt_acdirmin, Opt_acdirmax,
111 Opt_actimeo,
112 Opt_namelen,
113 Opt_mountport,
114 Opt_mountprog, Opt_mountvers,
115 Opt_nfsprog, Opt_nfsvers,
116
117 /* Mount options that take string arguments */
118 Opt_sec, Opt_proto, Opt_mountproto,
119 Opt_addr, Opt_mounthost, Opt_clientaddr,
120
121 /* Mount options that are ignored */
122 Opt_userspace, Opt_deprecated,
123
124 Opt_err
125};
126
127static match_table_t nfs_mount_option_tokens = {
128 { Opt_userspace, "bg" },
129 { Opt_userspace, "fg" },
130 { Opt_soft, "soft" },
131 { Opt_hard, "hard" },
132 { Opt_intr, "intr" },
133 { Opt_nointr, "nointr" },
134 { Opt_posix, "posix" },
135 { Opt_noposix, "noposix" },
136 { Opt_cto, "cto" },
137 { Opt_nocto, "nocto" },
138 { Opt_ac, "ac" },
139 { Opt_noac, "noac" },
140 { Opt_lock, "lock" },
141 { Opt_nolock, "nolock" },
142 { Opt_v2, "v2" },
143 { Opt_v3, "v3" },
144 { Opt_udp, "udp" },
145 { Opt_tcp, "tcp" },
146 { Opt_acl, "acl" },
147 { Opt_noacl, "noacl" },
148 { Opt_rdirplus, "rdirplus" },
149 { Opt_nordirplus, "nordirplus" },
150 { Opt_sharecache, "sharecache" },
151 { Opt_nosharecache, "nosharecache" },
152
153 { Opt_port, "port=%u" },
154 { Opt_rsize, "rsize=%u" },
155 { Opt_wsize, "wsize=%u" },
156 { Opt_bsize, "bsize=%u" },
157 { Opt_timeo, "timeo=%u" },
158 { Opt_retrans, "retrans=%u" },
159 { Opt_acregmin, "acregmin=%u" },
160 { Opt_acregmax, "acregmax=%u" },
161 { Opt_acdirmin, "acdirmin=%u" },
162 { Opt_acdirmax, "acdirmax=%u" },
163 { Opt_actimeo, "actimeo=%u" },
164 { Opt_userspace, "retry=%u" },
165 { Opt_namelen, "namlen=%u" },
166 { Opt_mountport, "mountport=%u" },
167 { Opt_mountprog, "mountprog=%u" },
168 { Opt_mountvers, "mountvers=%u" },
169 { Opt_nfsprog, "nfsprog=%u" },
170 { Opt_nfsvers, "nfsvers=%u" },
171 { Opt_nfsvers, "vers=%u" },
172
173 { Opt_sec, "sec=%s" },
174 { Opt_proto, "proto=%s" },
175 { Opt_mountproto, "mountproto=%s" },
176 { Opt_addr, "addr=%s" },
177 { Opt_clientaddr, "clientaddr=%s" },
178 { Opt_mounthost, "mounthost=%s" },
179
180 { Opt_err, NULL }
181};
182
183enum {
184 Opt_xprt_udp, Opt_xprt_tcp,
185
186 Opt_xprt_err
187};
188
189static match_table_t nfs_xprt_protocol_tokens = {
190 { Opt_xprt_udp, "udp" },
191 { Opt_xprt_tcp, "tcp" },
192
193 { Opt_xprt_err, NULL }
194};
195
196enum {
197 Opt_sec_none, Opt_sec_sys,
198 Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
199 Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
200 Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
201
202 Opt_sec_err
203};
204
205static match_table_t nfs_secflavor_tokens = {
206 { Opt_sec_none, "none" },
207 { Opt_sec_none, "null" },
208 { Opt_sec_sys, "sys" },
209
210 { Opt_sec_krb5, "krb5" },
211 { Opt_sec_krb5i, "krb5i" },
212 { Opt_sec_krb5p, "krb5p" },
213
214 { Opt_sec_lkey, "lkey" },
215 { Opt_sec_lkeyi, "lkeyi" },
216 { Opt_sec_lkeyp, "lkeyp" },
217
218 { Opt_sec_err, NULL }
219};
220
221
60static void nfs_umount_begin(struct vfsmount *, int); 222static void nfs_umount_begin(struct vfsmount *, int);
61static int nfs_statfs(struct dentry *, struct kstatfs *); 223static int nfs_statfs(struct dentry *, struct kstatfs *);
62static int nfs_show_options(struct seq_file *, struct vfsmount *); 224static int nfs_show_options(struct seq_file *, struct vfsmount *);
@@ -263,11 +425,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
263 { RPC_AUTH_GSS_SPKM, "spkm" }, 425 { RPC_AUTH_GSS_SPKM, "spkm" },
264 { RPC_AUTH_GSS_SPKMI, "spkmi" }, 426 { RPC_AUTH_GSS_SPKMI, "spkmi" },
265 { RPC_AUTH_GSS_SPKMP, "spkmp" }, 427 { RPC_AUTH_GSS_SPKMP, "spkmp" },
266 { -1, "unknown" } 428 { UINT_MAX, "unknown" }
267 }; 429 };
268 int i; 430 int i;
269 431
270 for (i=0; sec_flavours[i].flavour != -1; i++) { 432 for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) {
271 if (sec_flavours[i].flavour == flavour) 433 if (sec_flavours[i].flavour == flavour)
272 break; 434 break;
273 } 435 }
@@ -291,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
291 { NFS_MOUNT_NONLM, ",nolock", "" }, 453 { NFS_MOUNT_NONLM, ",nolock", "" },
292 { NFS_MOUNT_NOACL, ",noacl", "" }, 454 { NFS_MOUNT_NOACL, ",noacl", "" },
293 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, 455 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
456 { NFS_MOUNT_UNSHARED, ",nosharecache", ""},
294 { 0, NULL, NULL } 457 { 0, NULL, NULL }
295 }; 458 };
296 const struct proc_nfs_info *nfs_infop; 459 const struct proc_nfs_info *nfs_infop;
@@ -430,87 +593,641 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
430 */ 593 */
431static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) 594static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
432{ 595{
596 struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
597 struct rpc_clnt *rpc;
598
433 shrink_submounts(vfsmnt, &nfs_automount_list); 599 shrink_submounts(vfsmnt, &nfs_automount_list);
600
601 if (!(flags & MNT_FORCE))
602 return;
603 /* -EIO all pending I/O */
604 rpc = server->client_acl;
605 if (!IS_ERR(rpc))
606 rpc_killall_tasks(rpc);
607 rpc = server->client;
608 if (!IS_ERR(rpc))
609 rpc_killall_tasks(rpc);
434} 610}
435 611
436/* 612/*
437 * Validate the NFS2/NFS3 mount data 613 * Sanity-check a server address provided by the mount command
438 * - fills in the mount root filehandle
439 */ 614 */
440static int nfs_validate_mount_data(struct nfs_mount_data *data, 615static int nfs_verify_server_address(struct sockaddr *addr)
441 struct nfs_fh *mntfh)
442{ 616{
443 if (data == NULL) { 617 switch (addr->sa_family) {
444 dprintk("%s: missing data argument\n", __FUNCTION__); 618 case AF_INET: {
445 return -EINVAL; 619 struct sockaddr_in *sa = (struct sockaddr_in *) addr;
620 if (sa->sin_addr.s_addr != INADDR_ANY)
621 return 1;
622 break;
623 }
446 } 624 }
447 625
448 if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { 626 return 0;
449 dprintk("%s: bad mount version\n", __FUNCTION__); 627}
450 return -EINVAL; 628
629/*
630 * Error-check and convert a string of mount options from user space into
631 * a data structure
632 */
633static int nfs_parse_mount_options(char *raw,
634 struct nfs_parsed_mount_data *mnt)
635{
636 char *p, *string;
637
638 if (!raw) {
639 dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
640 return 1;
451 } 641 }
642 dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
452 643
453 switch (data->version) { 644 while ((p = strsep(&raw, ",")) != NULL) {
454 case 1: 645 substring_t args[MAX_OPT_ARGS];
455 data->namlen = 0; 646 int option, token;
456 case 2: 647
457 data->bsize = 0; 648 if (!*p)
458 case 3: 649 continue;
459 if (data->flags & NFS_MOUNT_VER3) { 650
460 dprintk("%s: mount structure version %d does not support NFSv3\n", 651 dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p);
461 __FUNCTION__, 652
462 data->version); 653 token = match_token(p, nfs_mount_option_tokens, args);
463 return -EINVAL; 654 switch (token) {
655 case Opt_soft:
656 mnt->flags |= NFS_MOUNT_SOFT;
657 break;
658 case Opt_hard:
659 mnt->flags &= ~NFS_MOUNT_SOFT;
660 break;
661 case Opt_intr:
662 mnt->flags |= NFS_MOUNT_INTR;
663 break;
664 case Opt_nointr:
665 mnt->flags &= ~NFS_MOUNT_INTR;
666 break;
667 case Opt_posix:
668 mnt->flags |= NFS_MOUNT_POSIX;
669 break;
670 case Opt_noposix:
671 mnt->flags &= ~NFS_MOUNT_POSIX;
672 break;
673 case Opt_cto:
674 mnt->flags &= ~NFS_MOUNT_NOCTO;
675 break;
676 case Opt_nocto:
677 mnt->flags |= NFS_MOUNT_NOCTO;
678 break;
679 case Opt_ac:
680 mnt->flags &= ~NFS_MOUNT_NOAC;
681 break;
682 case Opt_noac:
683 mnt->flags |= NFS_MOUNT_NOAC;
684 break;
685 case Opt_lock:
686 mnt->flags &= ~NFS_MOUNT_NONLM;
687 break;
688 case Opt_nolock:
689 mnt->flags |= NFS_MOUNT_NONLM;
690 break;
691 case Opt_v2:
692 mnt->flags &= ~NFS_MOUNT_VER3;
693 break;
694 case Opt_v3:
695 mnt->flags |= NFS_MOUNT_VER3;
696 break;
697 case Opt_udp:
698 mnt->flags &= ~NFS_MOUNT_TCP;
699 mnt->nfs_server.protocol = IPPROTO_UDP;
700 mnt->timeo = 7;
701 mnt->retrans = 5;
702 break;
703 case Opt_tcp:
704 mnt->flags |= NFS_MOUNT_TCP;
705 mnt->nfs_server.protocol = IPPROTO_TCP;
706 mnt->timeo = 600;
707 mnt->retrans = 2;
708 break;
709 case Opt_acl:
710 mnt->flags &= ~NFS_MOUNT_NOACL;
711 break;
712 case Opt_noacl:
713 mnt->flags |= NFS_MOUNT_NOACL;
714 break;
715 case Opt_rdirplus:
716 mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
717 break;
718 case Opt_nordirplus:
719 mnt->flags |= NFS_MOUNT_NORDIRPLUS;
720 break;
721 case Opt_sharecache:
722 mnt->flags &= ~NFS_MOUNT_UNSHARED;
723 break;
724 case Opt_nosharecache:
725 mnt->flags |= NFS_MOUNT_UNSHARED;
726 break;
727
728 case Opt_port:
729 if (match_int(args, &option))
730 return 0;
731 if (option < 0 || option > 65535)
732 return 0;
733 mnt->nfs_server.address.sin_port = htonl(option);
734 break;
735 case Opt_rsize:
736 if (match_int(args, &mnt->rsize))
737 return 0;
738 break;
739 case Opt_wsize:
740 if (match_int(args, &mnt->wsize))
741 return 0;
742 break;
743 case Opt_bsize:
744 if (match_int(args, &option))
745 return 0;
746 if (option < 0)
747 return 0;
748 mnt->bsize = option;
749 break;
750 case Opt_timeo:
751 if (match_int(args, &mnt->timeo))
752 return 0;
753 break;
754 case Opt_retrans:
755 if (match_int(args, &mnt->retrans))
756 return 0;
757 break;
758 case Opt_acregmin:
759 if (match_int(args, &mnt->acregmin))
760 return 0;
761 break;
762 case Opt_acregmax:
763 if (match_int(args, &mnt->acregmax))
764 return 0;
765 break;
766 case Opt_acdirmin:
767 if (match_int(args, &mnt->acdirmin))
768 return 0;
769 break;
770 case Opt_acdirmax:
771 if (match_int(args, &mnt->acdirmax))
772 return 0;
773 break;
774 case Opt_actimeo:
775 if (match_int(args, &option))
776 return 0;
777 if (option < 0)
778 return 0;
779 mnt->acregmin =
780 mnt->acregmax =
781 mnt->acdirmin =
782 mnt->acdirmax = option;
783 break;
784 case Opt_namelen:
785 if (match_int(args, &mnt->namlen))
786 return 0;
787 break;
788 case Opt_mountport:
789 if (match_int(args, &option))
790 return 0;
791 if (option < 0 || option > 65535)
792 return 0;
793 mnt->mount_server.port = option;
794 break;
795 case Opt_mountprog:
796 if (match_int(args, &option))
797 return 0;
798 if (option < 0)
799 return 0;
800 mnt->mount_server.program = option;
801 break;
802 case Opt_mountvers:
803 if (match_int(args, &option))
804 return 0;
805 if (option < 0)
806 return 0;
807 mnt->mount_server.version = option;
808 break;
809 case Opt_nfsprog:
810 if (match_int(args, &option))
811 return 0;
812 if (option < 0)
813 return 0;
814 mnt->nfs_server.program = option;
815 break;
816 case Opt_nfsvers:
817 if (match_int(args, &option))
818 return 0;
819 switch (option) {
820 case 2:
821 mnt->flags &= ~NFS_MOUNT_VER3;
822 break;
823 case 3:
824 mnt->flags |= NFS_MOUNT_VER3;
825 break;
826 default:
827 goto out_unrec_vers;
464 } 828 }
465 data->root.size = NFS2_FHSIZE; 829 break;
466 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); 830
467 case 4: 831 case Opt_sec:
468 if (data->flags & NFS_MOUNT_SECFLAVOUR) { 832 string = match_strdup(args);
469 dprintk("%s: mount structure version %d does not support strong security\n", 833 if (string == NULL)
470 __FUNCTION__, 834 goto out_nomem;
471 data->version); 835 token = match_token(string, nfs_secflavor_tokens, args);
472 return -EINVAL; 836 kfree(string);
837
838 /*
839 * The flags setting is for v2/v3. The flavor_len
840 * setting is for v4. v2/v3 also need to know the
841 * difference between NULL and UNIX.
842 */
843 switch (token) {
844 case Opt_sec_none:
845 mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
846 mnt->auth_flavor_len = 0;
847 mnt->auth_flavors[0] = RPC_AUTH_NULL;
848 break;
849 case Opt_sec_sys:
850 mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
851 mnt->auth_flavor_len = 0;
852 mnt->auth_flavors[0] = RPC_AUTH_UNIX;
853 break;
854 case Opt_sec_krb5:
855 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
856 mnt->auth_flavor_len = 1;
857 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
858 break;
859 case Opt_sec_krb5i:
860 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
861 mnt->auth_flavor_len = 1;
862 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
863 break;
864 case Opt_sec_krb5p:
865 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
866 mnt->auth_flavor_len = 1;
867 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
868 break;
869 case Opt_sec_lkey:
870 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
871 mnt->auth_flavor_len = 1;
872 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
873 break;
874 case Opt_sec_lkeyi:
875 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
876 mnt->auth_flavor_len = 1;
877 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
878 break;
879 case Opt_sec_lkeyp:
880 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
881 mnt->auth_flavor_len = 1;
882 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
883 break;
884 case Opt_sec_spkm:
885 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
886 mnt->auth_flavor_len = 1;
887 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
888 break;
889 case Opt_sec_spkmi:
890 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
891 mnt->auth_flavor_len = 1;
892 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
893 break;
894 case Opt_sec_spkmp:
895 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
896 mnt->auth_flavor_len = 1;
897 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
898 break;
899 default:
900 goto out_unrec_sec;
473 } 901 }
474 case 5: 902 break;
475 memset(data->context, 0, sizeof(data->context)); 903 case Opt_proto:
476 } 904 string = match_strdup(args);
905 if (string == NULL)
906 goto out_nomem;
907 token = match_token(string,
908 nfs_xprt_protocol_tokens, args);
909 kfree(string);
910
911 switch (token) {
912 case Opt_udp:
913 mnt->flags &= ~NFS_MOUNT_TCP;
914 mnt->nfs_server.protocol = IPPROTO_UDP;
915 mnt->timeo = 7;
916 mnt->retrans = 5;
917 break;
918 case Opt_tcp:
919 mnt->flags |= NFS_MOUNT_TCP;
920 mnt->nfs_server.protocol = IPPROTO_TCP;
921 mnt->timeo = 600;
922 mnt->retrans = 2;
923 break;
924 default:
925 goto out_unrec_xprt;
926 }
927 break;
928 case Opt_mountproto:
929 string = match_strdup(args);
930 if (string == NULL)
931 goto out_nomem;
932 token = match_token(string,
933 nfs_xprt_protocol_tokens, args);
934 kfree(string);
935
936 switch (token) {
937 case Opt_udp:
938 mnt->mount_server.protocol = IPPROTO_UDP;
939 break;
940 case Opt_tcp:
941 mnt->mount_server.protocol = IPPROTO_TCP;
942 break;
943 default:
944 goto out_unrec_xprt;
945 }
946 break;
947 case Opt_addr:
948 string = match_strdup(args);
949 if (string == NULL)
950 goto out_nomem;
951 mnt->nfs_server.address.sin_family = AF_INET;
952 mnt->nfs_server.address.sin_addr.s_addr =
953 in_aton(string);
954 kfree(string);
955 break;
956 case Opt_clientaddr:
957 string = match_strdup(args);
958 if (string == NULL)
959 goto out_nomem;
960 mnt->client_address = string;
961 break;
962 case Opt_mounthost:
963 string = match_strdup(args);
964 if (string == NULL)
965 goto out_nomem;
966 mnt->mount_server.address.sin_family = AF_INET;
967 mnt->mount_server.address.sin_addr.s_addr =
968 in_aton(string);
969 kfree(string);
970 break;
477 971
478 /* Set the pseudoflavor */ 972 case Opt_userspace:
479 if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) 973 case Opt_deprecated:
480 data->pseudoflavor = RPC_AUTH_UNIX; 974 break;
481 975
482#ifndef CONFIG_NFS_V3 976 default:
483 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ 977 goto out_unknown;
484 if (data->flags & NFS_MOUNT_VER3) { 978 }
485 dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
486 return -EPROTONOSUPPORT;
487 } 979 }
488#endif /* CONFIG_NFS_V3 */
489 980
490 /* We now require that the mount process passes the remote address */ 981 return 1;
491 if (data->addr.sin_addr.s_addr == INADDR_ANY) { 982
492 dprintk("%s: mount program didn't pass remote address!\n", 983out_nomem:
493 __FUNCTION__); 984 printk(KERN_INFO "NFS: not enough memory to parse option\n");
494 return -EINVAL; 985 return 0;
986
987out_unrec_vers:
988 printk(KERN_INFO "NFS: unrecognized NFS version number\n");
989 return 0;
990
991out_unrec_xprt:
992 printk(KERN_INFO "NFS: unrecognized transport protocol\n");
993 return 0;
994
995out_unrec_sec:
996 printk(KERN_INFO "NFS: unrecognized security flavor\n");
997 return 0;
998
999out_unknown:
1000 printk(KERN_INFO "NFS: unknown mount option: %s\n", p);
1001 return 0;
1002}
1003
1004/*
1005 * Use the remote server's MOUNT service to request the NFS file handle
1006 * corresponding to the provided path.
1007 */
1008static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1009 struct nfs_fh *root_fh)
1010{
1011 struct sockaddr_in sin;
1012 int status;
1013
1014 if (args->mount_server.version == 0) {
1015 if (args->flags & NFS_MOUNT_VER3)
1016 args->mount_server.version = NFS_MNT3_VERSION;
1017 else
1018 args->mount_server.version = NFS_MNT_VERSION;
495 } 1019 }
496 1020
497 /* Prepare the root filehandle */ 1021 /*
498 if (data->flags & NFS_MOUNT_VER3) 1022 * Construct the mount server's address.
499 mntfh->size = data->root.size; 1023 */
1024 if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
1025 sin = args->mount_server.address;
500 else 1026 else
501 mntfh->size = NFS2_FHSIZE; 1027 sin = args->nfs_server.address;
1028 if (args->mount_server.port == 0) {
1029 status = rpcb_getport_sync(&sin,
1030 args->mount_server.program,
1031 args->mount_server.version,
1032 args->mount_server.protocol);
1033 if (status < 0)
1034 goto out_err;
1035 sin.sin_port = htons(status);
1036 } else
1037 sin.sin_port = htons(args->mount_server.port);
1038
1039 /*
1040 * Now ask the mount server to map our export path
1041 * to a file handle.
1042 */
1043 status = nfs_mount((struct sockaddr *) &sin,
1044 sizeof(sin),
1045 args->nfs_server.hostname,
1046 args->nfs_server.export_path,
1047 args->mount_server.version,
1048 args->mount_server.protocol,
1049 root_fh);
1050 if (status < 0)
1051 goto out_err;
1052
1053 return status;
502 1054
503 if (mntfh->size > sizeof(mntfh->data)) { 1055out_err:
504 dprintk("%s: invalid root filehandle\n", __FUNCTION__); 1056 dfprintk(MOUNT, "NFS: unable to contact server on host "
505 return -EINVAL; 1057 NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
1058 return status;
1059}
1060
1061/*
1062 * Validate the NFS2/NFS3 mount data
1063 * - fills in the mount root filehandle
1064 *
1065 * For option strings, user space handles the following behaviors:
1066 *
1067 * + DNS: mapping server host name to IP address ("addr=" option)
1068 *
1069 * + failure mode: how to behave if a mount request can't be handled
1070 * immediately ("fg/bg" option)
1071 *
1072 * + retry: how often to retry a mount request ("retry=" option)
1073 *
1074 * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
1075 * mountproto=tcp after mountproto=udp, and so on
1076 *
1077 * XXX: as far as I can tell, changing the NFS program number is not
1078 * supported in the NFS client.
1079 */
1080static int nfs_validate_mount_data(struct nfs_mount_data **options,
1081 struct nfs_fh *mntfh,
1082 const char *dev_name)
1083{
1084 struct nfs_mount_data *data = *options;
1085
1086 if (data == NULL)
1087 goto out_no_data;
1088
1089 switch (data->version) {
1090 case 1:
1091 data->namlen = 0;
1092 case 2:
1093 data->bsize = 0;
1094 case 3:
1095 if (data->flags & NFS_MOUNT_VER3)
1096 goto out_no_v3;
1097 data->root.size = NFS2_FHSIZE;
1098 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
1099 case 4:
1100 if (data->flags & NFS_MOUNT_SECFLAVOUR)
1101 goto out_no_sec;
1102 case 5:
1103 memset(data->context, 0, sizeof(data->context));
1104 case 6:
1105 if (data->flags & NFS_MOUNT_VER3)
1106 mntfh->size = data->root.size;
1107 else
1108 mntfh->size = NFS2_FHSIZE;
1109
1110 if (mntfh->size > sizeof(mntfh->data))
1111 goto out_invalid_fh;
1112
1113 memcpy(mntfh->data, data->root.data, mntfh->size);
1114 if (mntfh->size < sizeof(mntfh->data))
1115 memset(mntfh->data + mntfh->size, 0,
1116 sizeof(mntfh->data) - mntfh->size);
1117 break;
1118 default: {
1119 unsigned int len;
1120 char *c;
1121 int status;
1122 struct nfs_parsed_mount_data args = {
1123 .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
1124 .rsize = NFS_MAX_FILE_IO_SIZE,
1125 .wsize = NFS_MAX_FILE_IO_SIZE,
1126 .timeo = 600,
1127 .retrans = 2,
1128 .acregmin = 3,
1129 .acregmax = 60,
1130 .acdirmin = 30,
1131 .acdirmax = 60,
1132 .mount_server.protocol = IPPROTO_UDP,
1133 .mount_server.program = NFS_MNT_PROGRAM,
1134 .nfs_server.protocol = IPPROTO_TCP,
1135 .nfs_server.program = NFS_PROGRAM,
1136 };
1137
1138 if (nfs_parse_mount_options((char *) *options, &args) == 0)
1139 return -EINVAL;
1140
1141 data = kzalloc(sizeof(*data), GFP_KERNEL);
1142 if (data == NULL)
1143 return -ENOMEM;
1144
1145 /*
1146 * NB: after this point, caller will free "data"
1147 * if we return an error
1148 */
1149 *options = data;
1150
1151 c = strchr(dev_name, ':');
1152 if (c == NULL)
1153 return -EINVAL;
1154 len = c - dev_name - 1;
1155 if (len > sizeof(data->hostname))
1156 return -EINVAL;
1157 strncpy(data->hostname, dev_name, len);
1158 args.nfs_server.hostname = data->hostname;
1159
1160 c++;
1161 if (strlen(c) > NFS_MAXPATHLEN)
1162 return -EINVAL;
1163 args.nfs_server.export_path = c;
1164
1165 status = nfs_try_mount(&args, mntfh);
1166 if (status)
1167 return -EINVAL;
1168
1169 /*
1170 * Translate to nfs_mount_data, which nfs_fill_super
1171 * can deal with.
1172 */
1173 data->version = 6;
1174 data->flags = args.flags;
1175 data->rsize = args.rsize;
1176 data->wsize = args.wsize;
1177 data->timeo = args.timeo;
1178 data->retrans = args.retrans;
1179 data->acregmin = args.acregmin;
1180 data->acregmax = args.acregmax;
1181 data->acdirmin = args.acdirmin;
1182 data->acdirmax = args.acdirmax;
1183 data->addr = args.nfs_server.address;
1184 data->namlen = args.namlen;
1185 data->bsize = args.bsize;
1186 data->pseudoflavor = args.auth_flavors[0];
1187
1188 break;
1189 }
506 } 1190 }
507 1191
508 memcpy(mntfh->data, data->root.data, mntfh->size); 1192 if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
509 if (mntfh->size < sizeof(mntfh->data)) 1193 data->pseudoflavor = RPC_AUTH_UNIX;
510 memset(mntfh->data + mntfh->size, 0, 1194
511 sizeof(mntfh->data) - mntfh->size); 1195#ifndef CONFIG_NFS_V3
1196 if (data->flags & NFS_MOUNT_VER3)
1197 goto out_v3_not_compiled;
1198#endif /* !CONFIG_NFS_V3 */
1199
1200 if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
1201 goto out_no_address;
512 1202
513 return 0; 1203 return 0;
1204
1205out_no_data:
1206 dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
1207 return -EINVAL;
1208
1209out_no_v3:
1210 dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
1211 data->version);
1212 return -EINVAL;
1213
1214out_no_sec:
1215 dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
1216 return -EINVAL;
1217
1218#ifndef CONFIG_NFS_V3
1219out_v3_not_compiled:
1220 dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n");
1221 return -EPROTONOSUPPORT;
1222#endif /* !CONFIG_NFS_V3 */
1223
1224out_no_address:
1225 dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
1226 return -EINVAL;
1227
1228out_invalid_fh:
1229 dfprintk(MOUNT, "NFS: invalid root filehandle\n");
1230 return -EINVAL;
514} 1231}
515 1232
516/* 1233/*
@@ -600,13 +1317,51 @@ static int nfs_compare_super(struct super_block *sb, void *data)
600{ 1317{
601 struct nfs_server *server = data, *old = NFS_SB(sb); 1318 struct nfs_server *server = data, *old = NFS_SB(sb);
602 1319
603 if (old->nfs_client != server->nfs_client) 1320 if (memcmp(&old->nfs_client->cl_addr,
1321 &server->nfs_client->cl_addr,
1322 sizeof(old->nfs_client->cl_addr)) != 0)
1323 return 0;
1324 /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
1325 if (old->flags & NFS_MOUNT_UNSHARED)
604 return 0; 1326 return 0;
605 if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) 1327 if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
606 return 0; 1328 return 0;
607 return 1; 1329 return 1;
608} 1330}
609 1331
1332#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
1333
1334static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
1335{
1336 const struct nfs_server *a = s->s_fs_info;
1337 const struct rpc_clnt *clnt_a = a->client;
1338 const struct rpc_clnt *clnt_b = b->client;
1339
1340 if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
1341 goto Ebusy;
1342 if (a->nfs_client != b->nfs_client)
1343 goto Ebusy;
1344 if (a->flags != b->flags)
1345 goto Ebusy;
1346 if (a->wsize != b->wsize)
1347 goto Ebusy;
1348 if (a->rsize != b->rsize)
1349 goto Ebusy;
1350 if (a->acregmin != b->acregmin)
1351 goto Ebusy;
1352 if (a->acregmax != b->acregmax)
1353 goto Ebusy;
1354 if (a->acdirmin != b->acdirmin)
1355 goto Ebusy;
1356 if (a->acdirmax != b->acdirmax)
1357 goto Ebusy;
1358 if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
1359 goto Ebusy;
1360 return 0;
1361Ebusy:
1362 return -EBUSY;
1363}
1364
610static int nfs_get_sb(struct file_system_type *fs_type, 1365static int nfs_get_sb(struct file_system_type *fs_type,
611 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1366 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
612{ 1367{
@@ -615,30 +1370,37 @@ static int nfs_get_sb(struct file_system_type *fs_type,
615 struct nfs_fh mntfh; 1370 struct nfs_fh mntfh;
616 struct nfs_mount_data *data = raw_data; 1371 struct nfs_mount_data *data = raw_data;
617 struct dentry *mntroot; 1372 struct dentry *mntroot;
1373 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
618 int error; 1374 int error;
619 1375
620 /* Validate the mount data */ 1376 /* Validate the mount data */
621 error = nfs_validate_mount_data(data, &mntfh); 1377 error = nfs_validate_mount_data(&data, &mntfh, dev_name);
622 if (error < 0) 1378 if (error < 0)
623 return error; 1379 goto out;
624 1380
625 /* Get a volume representation */ 1381 /* Get a volume representation */
626 server = nfs_create_server(data, &mntfh); 1382 server = nfs_create_server(data, &mntfh);
627 if (IS_ERR(server)) { 1383 if (IS_ERR(server)) {
628 error = PTR_ERR(server); 1384 error = PTR_ERR(server);
629 goto out_err_noserver; 1385 goto out;
630 } 1386 }
631 1387
1388 if (server->flags & NFS_MOUNT_UNSHARED)
1389 compare_super = NULL;
1390
632 /* Get a superblock - note that we may end up sharing one that already exists */ 1391 /* Get a superblock - note that we may end up sharing one that already exists */
633 s = sget(fs_type, nfs_compare_super, nfs_set_super, server); 1392 s = sget(fs_type, compare_super, nfs_set_super, server);
634 if (IS_ERR(s)) { 1393 if (IS_ERR(s)) {
635 error = PTR_ERR(s); 1394 error = PTR_ERR(s);
636 goto out_err_nosb; 1395 goto out_err_nosb;
637 } 1396 }
638 1397
639 if (s->s_fs_info != server) { 1398 if (s->s_fs_info != server) {
1399 error = nfs_compare_mount_options(s, server, flags);
640 nfs_free_server(server); 1400 nfs_free_server(server);
641 server = NULL; 1401 server = NULL;
1402 if (error < 0)
1403 goto error_splat_super;
642 } 1404 }
643 1405
644 if (!s->s_root) { 1406 if (!s->s_root) {
@@ -656,17 +1418,21 @@ static int nfs_get_sb(struct file_system_type *fs_type,
656 s->s_flags |= MS_ACTIVE; 1418 s->s_flags |= MS_ACTIVE;
657 mnt->mnt_sb = s; 1419 mnt->mnt_sb = s;
658 mnt->mnt_root = mntroot; 1420 mnt->mnt_root = mntroot;
659 return 0; 1421 error = 0;
1422
1423out:
1424 if (data != raw_data)
1425 kfree(data);
1426 return error;
660 1427
661out_err_nosb: 1428out_err_nosb:
662 nfs_free_server(server); 1429 nfs_free_server(server);
663out_err_noserver: 1430 goto out;
664 return error;
665 1431
666error_splat_super: 1432error_splat_super:
667 up_write(&s->s_umount); 1433 up_write(&s->s_umount);
668 deactivate_super(s); 1434 deactivate_super(s);
669 return error; 1435 goto out;
670} 1436}
671 1437
672/* 1438/*
@@ -691,6 +1457,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
691 struct super_block *s; 1457 struct super_block *s;
692 struct nfs_server *server; 1458 struct nfs_server *server;
693 struct dentry *mntroot; 1459 struct dentry *mntroot;
1460 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
694 int error; 1461 int error;
695 1462
696 dprintk("--> nfs_xdev_get_sb()\n"); 1463 dprintk("--> nfs_xdev_get_sb()\n");
@@ -702,16 +1469,22 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
702 goto out_err_noserver; 1469 goto out_err_noserver;
703 } 1470 }
704 1471
1472 if (server->flags & NFS_MOUNT_UNSHARED)
1473 compare_super = NULL;
1474
705 /* Get a superblock - note that we may end up sharing one that already exists */ 1475 /* Get a superblock - note that we may end up sharing one that already exists */
706 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); 1476 s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
707 if (IS_ERR(s)) { 1477 if (IS_ERR(s)) {
708 error = PTR_ERR(s); 1478 error = PTR_ERR(s);
709 goto out_err_nosb; 1479 goto out_err_nosb;
710 } 1480 }
711 1481
712 if (s->s_fs_info != server) { 1482 if (s->s_fs_info != server) {
1483 error = nfs_compare_mount_options(s, server, flags);
713 nfs_free_server(server); 1484 nfs_free_server(server);
714 server = NULL; 1485 server = NULL;
1486 if (error < 0)
1487 goto error_splat_super;
715 } 1488 }
716 1489
717 if (!s->s_root) { 1490 if (!s->s_root) {
@@ -772,25 +1545,164 @@ static void nfs4_fill_super(struct super_block *sb)
772 nfs_initialise_sb(sb); 1545 nfs_initialise_sb(sb);
773} 1546}
774 1547
775static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) 1548/*
1549 * Validate NFSv4 mount options
1550 */
1551static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
1552 const char *dev_name,
1553 struct sockaddr_in *addr,
1554 rpc_authflavor_t *authflavour,
1555 char **hostname,
1556 char **mntpath,
1557 char **ip_addr)
776{ 1558{
777 void *p = NULL; 1559 struct nfs4_mount_data *data = *options;
778 1560 char *c;
779 if (!src->len) 1561
780 return ERR_PTR(-EINVAL); 1562 if (data == NULL)
781 if (src->len < maxlen) 1563 goto out_no_data;
782 maxlen = src->len; 1564
783 if (dst == NULL) { 1565 switch (data->version) {
784 p = dst = kmalloc(maxlen + 1, GFP_KERNEL); 1566 case 1:
785 if (p == NULL) 1567 if (data->host_addrlen != sizeof(*addr))
786 return ERR_PTR(-ENOMEM); 1568 goto out_no_address;
787 } 1569 if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
788 if (copy_from_user(dst, src->data, maxlen)) { 1570 return -EFAULT;
789 kfree(p); 1571 if (addr->sin_port == 0)
790 return ERR_PTR(-EFAULT); 1572 addr->sin_port = htons(NFS_PORT);
1573 if (!nfs_verify_server_address((struct sockaddr *) addr))
1574 goto out_no_address;
1575
1576 switch (data->auth_flavourlen) {
1577 case 0:
1578 *authflavour = RPC_AUTH_UNIX;
1579 break;
1580 case 1:
1581 if (copy_from_user(authflavour, data->auth_flavours,
1582 sizeof(*authflavour)))
1583 return -EFAULT;
1584 break;
1585 default:
1586 goto out_inval_auth;
1587 }
1588
1589 c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
1590 if (IS_ERR(c))
1591 return PTR_ERR(c);
1592 *hostname = c;
1593
1594 c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
1595 if (IS_ERR(c))
1596 return PTR_ERR(c);
1597 *mntpath = c;
1598 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
1599
1600 c = strndup_user(data->client_addr.data, 16);
1601 if (IS_ERR(c))
1602 return PTR_ERR(c);
1603 *ip_addr = c;
1604
1605 break;
1606 default: {
1607 unsigned int len;
1608 struct nfs_parsed_mount_data args = {
1609 .rsize = NFS_MAX_FILE_IO_SIZE,
1610 .wsize = NFS_MAX_FILE_IO_SIZE,
1611 .timeo = 600,
1612 .retrans = 2,
1613 .acregmin = 3,
1614 .acregmax = 60,
1615 .acdirmin = 30,
1616 .acdirmax = 60,
1617 .nfs_server.protocol = IPPROTO_TCP,
1618 };
1619
1620 if (nfs_parse_mount_options((char *) *options, &args) == 0)
1621 return -EINVAL;
1622
1623 if (!nfs_verify_server_address((struct sockaddr *)
1624 &args.nfs_server.address))
1625 return -EINVAL;
1626 *addr = args.nfs_server.address;
1627
1628 switch (args.auth_flavor_len) {
1629 case 0:
1630 *authflavour = RPC_AUTH_UNIX;
1631 break;
1632 case 1:
1633 *authflavour = (rpc_authflavor_t) args.auth_flavors[0];
1634 break;
1635 default:
1636 goto out_inval_auth;
1637 }
1638
1639 /*
1640 * Translate to nfs4_mount_data, which nfs4_fill_super
1641 * can deal with.
1642 */
1643 data = kzalloc(sizeof(*data), GFP_KERNEL);
1644 if (data == NULL)
1645 return -ENOMEM;
1646 *options = data;
1647
1648 data->version = 1;
1649 data->flags = args.flags & NFS4_MOUNT_FLAGMASK;
1650 data->rsize = args.rsize;
1651 data->wsize = args.wsize;
1652 data->timeo = args.timeo;
1653 data->retrans = args.retrans;
1654 data->acregmin = args.acregmin;
1655 data->acregmax = args.acregmax;
1656 data->acdirmin = args.acdirmin;
1657 data->acdirmax = args.acdirmax;
1658 data->proto = args.nfs_server.protocol;
1659
1660 /*
1661 * Split "dev_name" into "hostname:mntpath".
1662 */
1663 c = strchr(dev_name, ':');
1664 if (c == NULL)
1665 return -EINVAL;
1666 /* while calculating len, pretend ':' is '\0' */
1667 len = c - dev_name;
1668 if (len > NFS4_MAXNAMLEN)
1669 return -EINVAL;
1670 *hostname = kzalloc(len, GFP_KERNEL);
1671 if (*hostname == NULL)
1672 return -ENOMEM;
1673 strncpy(*hostname, dev_name, len - 1);
1674
1675 c++; /* step over the ':' */
1676 len = strlen(c);
1677 if (len > NFS4_MAXPATHLEN)
1678 return -EINVAL;
1679 *mntpath = kzalloc(len + 1, GFP_KERNEL);
1680 if (*mntpath == NULL)
1681 return -ENOMEM;
1682 strncpy(*mntpath, c, len);
1683
1684 dprintk("MNTPATH: %s\n", *mntpath);
1685
1686 *ip_addr = args.client_address;
1687
1688 break;
1689 }
791 } 1690 }
792 dst[maxlen] = '\0'; 1691
793 return dst; 1692 return 0;
1693
1694out_no_data:
1695 dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
1696 return -EINVAL;
1697
1698out_inval_auth:
1699 dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
1700 data->auth_flavourlen);
1701 return -EINVAL;
1702
1703out_no_address:
1704 dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
1705 return -EINVAL;
794} 1706}
795 1707
796/* 1708/*
@@ -806,81 +1718,29 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
806 rpc_authflavor_t authflavour; 1718 rpc_authflavor_t authflavour;
807 struct nfs_fh mntfh; 1719 struct nfs_fh mntfh;
808 struct dentry *mntroot; 1720 struct dentry *mntroot;
809 char *mntpath = NULL, *hostname = NULL, ip_addr[16]; 1721 char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
810 void *p; 1722 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
811 int error; 1723 int error;
812 1724
813 if (data == NULL) { 1725 /* Validate the mount data */
814 dprintk("%s: missing data argument\n", __FUNCTION__); 1726 error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
815 return -EINVAL; 1727 &hostname, &mntpath, &ip_addr);
816 } 1728 if (error < 0)
817 if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { 1729 goto out;
818 dprintk("%s: bad mount version\n", __FUNCTION__);
819 return -EINVAL;
820 }
821
822 /* We now require that the mount process passes the remote address */
823 if (data->host_addrlen != sizeof(addr))
824 return -EINVAL;
825
826 if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
827 return -EFAULT;
828
829 if (addr.sin_family != AF_INET ||
830 addr.sin_addr.s_addr == INADDR_ANY
831 ) {
832 dprintk("%s: mount program didn't pass remote IP address!\n",
833 __FUNCTION__);
834 return -EINVAL;
835 }
836 /* RFC3530: The default port for NFS is 2049 */
837 if (addr.sin_port == 0)
838 addr.sin_port = htons(NFS_PORT);
839
840 /* Grab the authentication type */
841 authflavour = RPC_AUTH_UNIX;
842 if (data->auth_flavourlen != 0) {
843 if (data->auth_flavourlen != 1) {
844 dprintk("%s: Invalid number of RPC auth flavours %d.\n",
845 __FUNCTION__, data->auth_flavourlen);
846 error = -EINVAL;
847 goto out_err_noserver;
848 }
849
850 if (copy_from_user(&authflavour, data->auth_flavours,
851 sizeof(authflavour))) {
852 error = -EFAULT;
853 goto out_err_noserver;
854 }
855 }
856
857 p = nfs_copy_user_string(NULL, &data->hostname, 256);
858 if (IS_ERR(p))
859 goto out_err;
860 hostname = p;
861
862 p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
863 if (IS_ERR(p))
864 goto out_err;
865 mntpath = p;
866
867 dprintk("MNTPATH: %s\n", mntpath);
868
869 p = nfs_copy_user_string(ip_addr, &data->client_addr,
870 sizeof(ip_addr) - 1);
871 if (IS_ERR(p))
872 goto out_err;
873 1730
874 /* Get a volume representation */ 1731 /* Get a volume representation */
875 server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, 1732 server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
876 authflavour, &mntfh); 1733 authflavour, &mntfh);
877 if (IS_ERR(server)) { 1734 if (IS_ERR(server)) {
878 error = PTR_ERR(server); 1735 error = PTR_ERR(server);
879 goto out_err_noserver; 1736 goto out;
880 } 1737 }
881 1738
1739 if (server->flags & NFS4_MOUNT_UNSHARED)
1740 compare_super = NULL;
1741
882 /* Get a superblock - note that we may end up sharing one that already exists */ 1742 /* Get a superblock - note that we may end up sharing one that already exists */
883 s = sget(fs_type, nfs_compare_super, nfs_set_super, server); 1743 s = sget(fs_type, compare_super, nfs_set_super, server);
884 if (IS_ERR(s)) { 1744 if (IS_ERR(s)) {
885 error = PTR_ERR(s); 1745 error = PTR_ERR(s);
886 goto out_free; 1746 goto out_free;
@@ -906,25 +1766,22 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
906 s->s_flags |= MS_ACTIVE; 1766 s->s_flags |= MS_ACTIVE;
907 mnt->mnt_sb = s; 1767 mnt->mnt_sb = s;
908 mnt->mnt_root = mntroot; 1768 mnt->mnt_root = mntroot;
1769 error = 0;
1770
1771out:
1772 kfree(ip_addr);
909 kfree(mntpath); 1773 kfree(mntpath);
910 kfree(hostname); 1774 kfree(hostname);
911 return 0; 1775 return error;
912
913out_err:
914 error = PTR_ERR(p);
915 goto out_err_noserver;
916 1776
917out_free: 1777out_free:
918 nfs_free_server(server); 1778 nfs_free_server(server);
919out_err_noserver: 1779 goto out;
920 kfree(mntpath);
921 kfree(hostname);
922 return error;
923 1780
924error_splat_super: 1781error_splat_super:
925 up_write(&s->s_umount); 1782 up_write(&s->s_umount);
926 deactivate_super(s); 1783 deactivate_super(s);
927 goto out_err_noserver; 1784 goto out;
928} 1785}
929 1786
930static void nfs4_kill_super(struct super_block *sb) 1787static void nfs4_kill_super(struct super_block *sb)
@@ -949,6 +1806,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
949 struct super_block *s; 1806 struct super_block *s;
950 struct nfs_server *server; 1807 struct nfs_server *server;
951 struct dentry *mntroot; 1808 struct dentry *mntroot;
1809 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
952 int error; 1810 int error;
953 1811
954 dprintk("--> nfs4_xdev_get_sb()\n"); 1812 dprintk("--> nfs4_xdev_get_sb()\n");
@@ -960,8 +1818,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
960 goto out_err_noserver; 1818 goto out_err_noserver;
961 } 1819 }
962 1820
1821 if (server->flags & NFS4_MOUNT_UNSHARED)
1822 compare_super = NULL;
1823
963 /* Get a superblock - note that we may end up sharing one that already exists */ 1824 /* Get a superblock - note that we may end up sharing one that already exists */
964 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); 1825 s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
965 if (IS_ERR(s)) { 1826 if (IS_ERR(s)) {
966 error = PTR_ERR(s); 1827 error = PTR_ERR(s);
967 goto out_err_nosb; 1828 goto out_err_nosb;
@@ -1016,6 +1877,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
1016 struct nfs_server *server; 1877 struct nfs_server *server;
1017 struct dentry *mntroot; 1878 struct dentry *mntroot;
1018 struct nfs_fh mntfh; 1879 struct nfs_fh mntfh;
1880 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
1019 int error; 1881 int error;
1020 1882
1021 dprintk("--> nfs4_referral_get_sb()\n"); 1883 dprintk("--> nfs4_referral_get_sb()\n");
@@ -1027,8 +1889,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
1027 goto out_err_noserver; 1889 goto out_err_noserver;
1028 } 1890 }
1029 1891
1892 if (server->flags & NFS4_MOUNT_UNSHARED)
1893 compare_super = NULL;
1894
1030 /* Get a superblock - note that we may end up sharing one that already exists */ 1895 /* Get a superblock - note that we may end up sharing one that already exists */
1031 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); 1896 s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
1032 if (IS_ERR(s)) { 1897 if (IS_ERR(s)) {
1033 error = PTR_ERR(s); 1898 error = PTR_ERR(s);
1034 goto out_err_nosb; 1899 goto out_err_nosb;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af344a158e01..73ac992ece85 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -117,19 +117,19 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page)
117 if (PagePrivate(page)) { 117 if (PagePrivate(page)) {
118 req = (struct nfs_page *)page_private(page); 118 req = (struct nfs_page *)page_private(page);
119 if (req != NULL) 119 if (req != NULL)
120 atomic_inc(&req->wb_count); 120 kref_get(&req->wb_kref);
121 } 121 }
122 return req; 122 return req;
123} 123}
124 124
125static struct nfs_page *nfs_page_find_request(struct page *page) 125static struct nfs_page *nfs_page_find_request(struct page *page)
126{ 126{
127 struct inode *inode = page->mapping->host;
127 struct nfs_page *req = NULL; 128 struct nfs_page *req = NULL;
128 spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
129 129
130 spin_lock(req_lock); 130 spin_lock(&inode->i_lock);
131 req = nfs_page_find_request_locked(page); 131 req = nfs_page_find_request_locked(page);
132 spin_unlock(req_lock); 132 spin_unlock(&inode->i_lock);
133 return req; 133 return req;
134} 134}
135 135
@@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
191 } 191 }
192 /* Update file length */ 192 /* Update file length */
193 nfs_grow_file(page, offset, count); 193 nfs_grow_file(page, offset, count);
194 /* Set the PG_uptodate flag? */
195 nfs_mark_uptodate(page, offset, count);
196 nfs_unlock_request(req); 194 nfs_unlock_request(req);
197 return 0; 195 return 0;
198} 196}
@@ -253,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page)
253static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 251static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
254 struct page *page) 252 struct page *page)
255{ 253{
254 struct inode *inode = page->mapping->host;
255 struct nfs_inode *nfsi = NFS_I(inode);
256 struct nfs_page *req; 256 struct nfs_page *req;
257 struct nfs_inode *nfsi = NFS_I(page->mapping->host);
258 spinlock_t *req_lock = &nfsi->req_lock;
259 int ret; 257 int ret;
260 258
261 spin_lock(req_lock); 259 spin_lock(&inode->i_lock);
262 for(;;) { 260 for(;;) {
263 req = nfs_page_find_request_locked(page); 261 req = nfs_page_find_request_locked(page);
264 if (req == NULL) { 262 if (req == NULL) {
265 spin_unlock(req_lock); 263 spin_unlock(&inode->i_lock);
266 return 1; 264 return 1;
267 } 265 }
268 if (nfs_lock_request_dontget(req)) 266 if (nfs_lock_request_dontget(req))
@@ -272,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
272 * succeed provided that someone hasn't already marked the 270 * succeed provided that someone hasn't already marked the
273 * request as dirty (in which case we don't care). 271 * request as dirty (in which case we don't care).
274 */ 272 */
275 spin_unlock(req_lock); 273 spin_unlock(&inode->i_lock);
276 ret = nfs_wait_on_request(req); 274 ret = nfs_wait_on_request(req);
277 nfs_release_request(req); 275 nfs_release_request(req);
278 if (ret != 0) 276 if (ret != 0)
279 return ret; 277 return ret;
280 spin_lock(req_lock); 278 spin_lock(&inode->i_lock);
281 } 279 }
282 if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { 280 if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
283 /* This request is marked for commit */ 281 /* This request is marked for commit */
284 spin_unlock(req_lock); 282 spin_unlock(&inode->i_lock);
285 nfs_unlock_request(req); 283 nfs_unlock_request(req);
286 nfs_pageio_complete(pgio); 284 nfs_pageio_complete(pgio);
287 return 1; 285 return 1;
288 } 286 }
289 if (nfs_set_page_writeback(page) != 0) { 287 if (nfs_set_page_writeback(page) != 0) {
290 spin_unlock(req_lock); 288 spin_unlock(&inode->i_lock);
291 BUG(); 289 BUG();
292 } 290 }
293 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, 291 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
294 NFS_PAGE_TAG_WRITEBACK); 292 NFS_PAGE_TAG_LOCKED);
295 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); 293 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
296 spin_unlock(req_lock); 294 spin_unlock(&inode->i_lock);
297 nfs_pageio_add_request(pgio, req); 295 nfs_pageio_add_request(pgio, req);
298 return ret; 296 return ret;
299} 297}
@@ -400,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
400 if (PageDirty(req->wb_page)) 398 if (PageDirty(req->wb_page))
401 set_bit(PG_NEED_FLUSH, &req->wb_flags); 399 set_bit(PG_NEED_FLUSH, &req->wb_flags);
402 nfsi->npages++; 400 nfsi->npages++;
403 atomic_inc(&req->wb_count); 401 kref_get(&req->wb_kref);
404 return 0; 402 return 0;
405} 403}
406 404
@@ -409,12 +407,12 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
409 */ 407 */
410static void nfs_inode_remove_request(struct nfs_page *req) 408static void nfs_inode_remove_request(struct nfs_page *req)
411{ 409{
412 struct inode *inode = req->wb_context->dentry->d_inode; 410 struct inode *inode = req->wb_context->path.dentry->d_inode;
413 struct nfs_inode *nfsi = NFS_I(inode); 411 struct nfs_inode *nfsi = NFS_I(inode);
414 412
415 BUG_ON (!NFS_WBACK_BUSY(req)); 413 BUG_ON (!NFS_WBACK_BUSY(req));
416 414
417 spin_lock(&nfsi->req_lock); 415 spin_lock(&inode->i_lock);
418 set_page_private(req->wb_page, 0); 416 set_page_private(req->wb_page, 0);
419 ClearPagePrivate(req->wb_page); 417 ClearPagePrivate(req->wb_page);
420 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 418 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
@@ -422,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
422 __set_page_dirty_nobuffers(req->wb_page); 420 __set_page_dirty_nobuffers(req->wb_page);
423 nfsi->npages--; 421 nfsi->npages--;
424 if (!nfsi->npages) { 422 if (!nfsi->npages) {
425 spin_unlock(&nfsi->req_lock); 423 spin_unlock(&inode->i_lock);
426 nfs_end_data_update(inode); 424 nfs_end_data_update(inode);
427 iput(inode); 425 iput(inode);
428 } else 426 } else
429 spin_unlock(&nfsi->req_lock); 427 spin_unlock(&inode->i_lock);
430 nfs_clear_request(req); 428 nfs_clear_request(req);
431 nfs_release_request(req); 429 nfs_release_request(req);
432} 430}
@@ -457,14 +455,16 @@ nfs_dirty_request(struct nfs_page *req)
457static void 455static void
458nfs_mark_request_commit(struct nfs_page *req) 456nfs_mark_request_commit(struct nfs_page *req)
459{ 457{
460 struct inode *inode = req->wb_context->dentry->d_inode; 458 struct inode *inode = req->wb_context->path.dentry->d_inode;
461 struct nfs_inode *nfsi = NFS_I(inode); 459 struct nfs_inode *nfsi = NFS_I(inode);
462 460
463 spin_lock(&nfsi->req_lock); 461 spin_lock(&inode->i_lock);
464 nfs_list_add_request(req, &nfsi->commit);
465 nfsi->ncommit++; 462 nfsi->ncommit++;
466 set_bit(PG_NEED_COMMIT, &(req)->wb_flags); 463 set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
467 spin_unlock(&nfsi->req_lock); 464 radix_tree_tag_set(&nfsi->nfs_page_tree,
465 req->wb_index,
466 NFS_PAGE_TAG_COMMIT);
467 spin_unlock(&inode->i_lock);
468 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 468 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
469 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 469 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
470} 470}
@@ -526,18 +526,18 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
526 idx_end = idx_start + npages - 1; 526 idx_end = idx_start + npages - 1;
527 527
528 next = idx_start; 528 next = idx_start;
529 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { 529 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
530 if (req->wb_index > idx_end) 530 if (req->wb_index > idx_end)
531 break; 531 break;
532 532
533 next = req->wb_index + 1; 533 next = req->wb_index + 1;
534 BUG_ON(!NFS_WBACK_BUSY(req)); 534 BUG_ON(!NFS_WBACK_BUSY(req));
535 535
536 atomic_inc(&req->wb_count); 536 kref_get(&req->wb_kref);
537 spin_unlock(&nfsi->req_lock); 537 spin_unlock(&inode->i_lock);
538 error = nfs_wait_on_request(req); 538 error = nfs_wait_on_request(req);
539 nfs_release_request(req); 539 nfs_release_request(req);
540 spin_lock(&nfsi->req_lock); 540 spin_lock(&inode->i_lock);
541 if (error < 0) 541 if (error < 0)
542 return error; 542 return error;
543 res++; 543 res++;
@@ -577,10 +577,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
577 int res = 0; 577 int res = 0;
578 578
579 if (nfsi->ncommit != 0) { 579 if (nfsi->ncommit != 0) {
580 res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); 580 res = nfs_scan_list(nfsi, dst, idx_start, npages,
581 NFS_PAGE_TAG_COMMIT);
581 nfsi->ncommit -= res; 582 nfsi->ncommit -= res;
582 if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
583 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
584 } 583 }
585 return res; 584 return res;
586} 585}
@@ -603,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
603{ 602{
604 struct address_space *mapping = page->mapping; 603 struct address_space *mapping = page->mapping;
605 struct inode *inode = mapping->host; 604 struct inode *inode = mapping->host;
606 struct nfs_inode *nfsi = NFS_I(inode);
607 struct nfs_page *req, *new = NULL; 605 struct nfs_page *req, *new = NULL;
608 pgoff_t rqend, end; 606 pgoff_t rqend, end;
609 607
@@ -613,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
613 /* Loop over all inode entries and see if we find 611 /* Loop over all inode entries and see if we find
614 * A request for the page we wish to update 612 * A request for the page we wish to update
615 */ 613 */
616 spin_lock(&nfsi->req_lock); 614 spin_lock(&inode->i_lock);
617 req = nfs_page_find_request_locked(page); 615 req = nfs_page_find_request_locked(page);
618 if (req) { 616 if (req) {
619 if (!nfs_lock_request_dontget(req)) { 617 if (!nfs_lock_request_dontget(req)) {
620 int error; 618 int error;
621 619
622 spin_unlock(&nfsi->req_lock); 620 spin_unlock(&inode->i_lock);
623 error = nfs_wait_on_request(req); 621 error = nfs_wait_on_request(req);
624 nfs_release_request(req); 622 nfs_release_request(req);
625 if (error < 0) { 623 if (error < 0) {
@@ -629,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
629 } 627 }
630 continue; 628 continue;
631 } 629 }
632 spin_unlock(&nfsi->req_lock); 630 spin_unlock(&inode->i_lock);
633 if (new) 631 if (new)
634 nfs_release_request(new); 632 nfs_release_request(new);
635 break; 633 break;
@@ -640,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
640 nfs_lock_request_dontget(new); 638 nfs_lock_request_dontget(new);
641 error = nfs_inode_add_request(inode, new); 639 error = nfs_inode_add_request(inode, new);
642 if (error) { 640 if (error) {
643 spin_unlock(&nfsi->req_lock); 641 spin_unlock(&inode->i_lock);
644 nfs_unlock_request(new); 642 nfs_unlock_request(new);
645 return ERR_PTR(error); 643 return ERR_PTR(error);
646 } 644 }
647 spin_unlock(&nfsi->req_lock); 645 spin_unlock(&inode->i_lock);
648 return new; 646 return new;
649 } 647 }
650 spin_unlock(&nfsi->req_lock); 648 spin_unlock(&inode->i_lock);
651 649
652 new = nfs_create_request(ctx, inode, page, offset, bytes); 650 new = nfs_create_request(ctx, inode, page, offset, bytes);
653 if (IS_ERR(new)) 651 if (IS_ERR(new))
@@ -751,12 +749,17 @@ int nfs_updatepage(struct file *file, struct page *page,
751static void nfs_writepage_release(struct nfs_page *req) 749static void nfs_writepage_release(struct nfs_page *req)
752{ 750{
753 751
754 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { 752 if (PageError(req->wb_page)) {
753 nfs_end_page_writeback(req->wb_page);
754 nfs_inode_remove_request(req);
755 } else if (!nfs_reschedule_unstable_write(req)) {
756 /* Set the PG_uptodate flag */
757 nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
755 nfs_end_page_writeback(req->wb_page); 758 nfs_end_page_writeback(req->wb_page);
756 nfs_inode_remove_request(req); 759 nfs_inode_remove_request(req);
757 } else 760 } else
758 nfs_end_page_writeback(req->wb_page); 761 nfs_end_page_writeback(req->wb_page);
759 nfs_clear_page_writeback(req); 762 nfs_clear_page_tag_locked(req);
760} 763}
761 764
762static inline int flush_task_priority(int how) 765static inline int flush_task_priority(int how)
@@ -786,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
786 * NB: take care not to mess about with data->commit et al. */ 789 * NB: take care not to mess about with data->commit et al. */
787 790
788 data->req = req; 791 data->req = req;
789 data->inode = inode = req->wb_context->dentry->d_inode; 792 data->inode = inode = req->wb_context->path.dentry->d_inode;
790 data->cred = req->wb_context->cred; 793 data->cred = req->wb_context->cred;
791 794
792 data->args.fh = NFS_FH(inode); 795 data->args.fh = NFS_FH(inode);
@@ -885,7 +888,7 @@ out_bad:
885 } 888 }
886 nfs_redirty_request(req); 889 nfs_redirty_request(req);
887 nfs_end_page_writeback(req->wb_page); 890 nfs_end_page_writeback(req->wb_page);
888 nfs_clear_page_writeback(req); 891 nfs_clear_page_tag_locked(req);
889 return -ENOMEM; 892 return -ENOMEM;
890} 893}
891 894
@@ -928,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
928 nfs_list_remove_request(req); 931 nfs_list_remove_request(req);
929 nfs_redirty_request(req); 932 nfs_redirty_request(req);
930 nfs_end_page_writeback(req->wb_page); 933 nfs_end_page_writeback(req->wb_page);
931 nfs_clear_page_writeback(req); 934 nfs_clear_page_tag_locked(req);
932 } 935 }
933 return -ENOMEM; 936 return -ENOMEM;
934} 937}
@@ -954,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
954 struct page *page = req->wb_page; 957 struct page *page = req->wb_page;
955 958
956 dprintk("NFS: write (%s/%Ld %d@%Ld)", 959 dprintk("NFS: write (%s/%Ld %d@%Ld)",
957 req->wb_context->dentry->d_inode->i_sb->s_id, 960 req->wb_context->path.dentry->d_inode->i_sb->s_id,
958 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 961 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
959 req->wb_bytes, 962 req->wb_bytes,
960 (long long)req_offset(req)); 963 (long long)req_offset(req));
961 964
@@ -970,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
970 } 973 }
971 974
972 if (nfs_write_need_commit(data)) { 975 if (nfs_write_need_commit(data)) {
973 spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; 976 struct inode *inode = page->mapping->host;
974 977
975 spin_lock(req_lock); 978 spin_lock(&inode->i_lock);
976 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { 979 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
977 /* Do nothing we need to resend the writes */ 980 /* Do nothing we need to resend the writes */
978 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { 981 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
@@ -983,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
983 clear_bit(PG_NEED_COMMIT, &req->wb_flags); 986 clear_bit(PG_NEED_COMMIT, &req->wb_flags);
984 dprintk(" server reboot detected\n"); 987 dprintk(" server reboot detected\n");
985 } 988 }
986 spin_unlock(req_lock); 989 spin_unlock(&inode->i_lock);
987 } else 990 } else
988 dprintk(" OK\n"); 991 dprintk(" OK\n");
989 992
@@ -1020,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1020 page = req->wb_page; 1023 page = req->wb_page;
1021 1024
1022 dprintk("NFS: write (%s/%Ld %d@%Ld)", 1025 dprintk("NFS: write (%s/%Ld %d@%Ld)",
1023 req->wb_context->dentry->d_inode->i_sb->s_id, 1026 req->wb_context->path.dentry->d_inode->i_sb->s_id,
1024 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 1027 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
1025 req->wb_bytes, 1028 req->wb_bytes,
1026 (long long)req_offset(req)); 1029 (long long)req_offset(req));
1027 1030
@@ -1039,12 +1042,14 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1039 dprintk(" marked for commit\n"); 1042 dprintk(" marked for commit\n");
1040 goto next; 1043 goto next;
1041 } 1044 }
1045 /* Set the PG_uptodate flag? */
1046 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
1042 dprintk(" OK\n"); 1047 dprintk(" OK\n");
1043remove_request: 1048remove_request:
1044 nfs_end_page_writeback(page); 1049 nfs_end_page_writeback(page);
1045 nfs_inode_remove_request(req); 1050 nfs_inode_remove_request(req);
1046 next: 1051 next:
1047 nfs_clear_page_writeback(req); 1052 nfs_clear_page_tag_locked(req);
1048 } 1053 }
1049} 1054}
1050 1055
@@ -1157,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
1157 1162
1158 list_splice_init(head, &data->pages); 1163 list_splice_init(head, &data->pages);
1159 first = nfs_list_entry(data->pages.next); 1164 first = nfs_list_entry(data->pages.next);
1160 inode = first->wb_context->dentry->d_inode; 1165 inode = first->wb_context->path.dentry->d_inode;
1161 1166
1162 data->inode = inode; 1167 data->inode = inode;
1163 data->cred = first->wb_context->cred; 1168 data->cred = first->wb_context->cred;
@@ -1207,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1207 nfs_list_remove_request(req); 1212 nfs_list_remove_request(req);
1208 nfs_mark_request_commit(req); 1213 nfs_mark_request_commit(req);
1209 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1214 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1210 nfs_clear_page_writeback(req); 1215 nfs_clear_page_tag_locked(req);
1211 } 1216 }
1212 return -ENOMEM; 1217 return -ENOMEM;
1213} 1218}
@@ -1234,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1234 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1239 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1235 1240
1236 dprintk("NFS: commit (%s/%Ld %d@%Ld)", 1241 dprintk("NFS: commit (%s/%Ld %d@%Ld)",
1237 req->wb_context->dentry->d_inode->i_sb->s_id, 1242 req->wb_context->path.dentry->d_inode->i_sb->s_id,
1238 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 1243 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
1239 req->wb_bytes, 1244 req->wb_bytes,
1240 (long long)req_offset(req)); 1245 (long long)req_offset(req));
1241 if (task->tk_status < 0) { 1246 if (task->tk_status < 0) {
@@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1249 * returned by the server against all stored verfs. */ 1254 * returned by the server against all stored verfs. */
1250 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { 1255 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1251 /* We have a match */ 1256 /* We have a match */
1257 /* Set the PG_uptodate flag */
1258 nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
1259 req->wb_bytes);
1252 nfs_inode_remove_request(req); 1260 nfs_inode_remove_request(req);
1253 dprintk(" OK\n"); 1261 dprintk(" OK\n");
1254 goto next; 1262 goto next;
@@ -1257,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1257 dprintk(" mismatch\n"); 1265 dprintk(" mismatch\n");
1258 nfs_redirty_request(req); 1266 nfs_redirty_request(req);
1259 next: 1267 next:
1260 nfs_clear_page_writeback(req); 1268 nfs_clear_page_tag_locked(req);
1261 } 1269 }
1262} 1270}
1263 1271
@@ -1268,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = {
1268 1276
1269int nfs_commit_inode(struct inode *inode, int how) 1277int nfs_commit_inode(struct inode *inode, int how)
1270{ 1278{
1271 struct nfs_inode *nfsi = NFS_I(inode);
1272 LIST_HEAD(head); 1279 LIST_HEAD(head);
1273 int res; 1280 int res;
1274 1281
1275 spin_lock(&nfsi->req_lock); 1282 spin_lock(&inode->i_lock);
1276 res = nfs_scan_commit(inode, &head, 0, 0); 1283 res = nfs_scan_commit(inode, &head, 0, 0);
1277 spin_unlock(&nfsi->req_lock); 1284 spin_unlock(&inode->i_lock);
1278 if (res) { 1285 if (res) {
1279 int error = nfs_commit_list(inode, &head, how); 1286 int error = nfs_commit_list(inode, &head, how);
1280 if (error < 0) 1287 if (error < 0)
@@ -1292,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
1292long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) 1299long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
1293{ 1300{
1294 struct inode *inode = mapping->host; 1301 struct inode *inode = mapping->host;
1295 struct nfs_inode *nfsi = NFS_I(inode);
1296 pgoff_t idx_start, idx_end; 1302 pgoff_t idx_start, idx_end;
1297 unsigned int npages = 0; 1303 unsigned int npages = 0;
1298 LIST_HEAD(head); 1304 LIST_HEAD(head);
@@ -1314,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
1314 } 1320 }
1315 } 1321 }
1316 how &= ~FLUSH_NOCOMMIT; 1322 how &= ~FLUSH_NOCOMMIT;
1317 spin_lock(&nfsi->req_lock); 1323 spin_lock(&inode->i_lock);
1318 do { 1324 do {
1319 ret = nfs_wait_on_requests_locked(inode, idx_start, npages); 1325 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1320 if (ret != 0) 1326 if (ret != 0)
@@ -1325,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
1325 if (pages == 0) 1331 if (pages == 0)
1326 break; 1332 break;
1327 if (how & FLUSH_INVALIDATE) { 1333 if (how & FLUSH_INVALIDATE) {
1328 spin_unlock(&nfsi->req_lock); 1334 spin_unlock(&inode->i_lock);
1329 nfs_cancel_commit_list(&head); 1335 nfs_cancel_commit_list(&head);
1330 ret = pages; 1336 ret = pages;
1331 spin_lock(&nfsi->req_lock); 1337 spin_lock(&inode->i_lock);
1332 continue; 1338 continue;
1333 } 1339 }
1334 pages += nfs_scan_commit(inode, &head, 0, 0); 1340 pages += nfs_scan_commit(inode, &head, 0, 0);
1335 spin_unlock(&nfsi->req_lock); 1341 spin_unlock(&inode->i_lock);
1336 ret = nfs_commit_list(inode, &head, how); 1342 ret = nfs_commit_list(inode, &head, how);
1337 spin_lock(&nfsi->req_lock); 1343 spin_lock(&inode->i_lock);
1344
1338 } while (ret >= 0); 1345 } while (ret >= 0);
1339 spin_unlock(&nfsi->req_lock); 1346 spin_unlock(&inode->i_lock);
1340 return ret; 1347 return ret;
1341} 1348}
1342 1349
@@ -1430,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page)
1430{ 1437{
1431 struct address_space *mapping = page->mapping; 1438 struct address_space *mapping = page->mapping;
1432 struct inode *inode; 1439 struct inode *inode;
1433 spinlock_t *req_lock;
1434 struct nfs_page *req; 1440 struct nfs_page *req;
1435 int ret; 1441 int ret;
1436 1442
@@ -1439,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page)
1439 inode = mapping->host; 1445 inode = mapping->host;
1440 if (!inode) 1446 if (!inode)
1441 goto out_raced; 1447 goto out_raced;
1442 req_lock = &NFS_I(inode)->req_lock; 1448 spin_lock(&inode->i_lock);
1443 spin_lock(req_lock);
1444 req = nfs_page_find_request_locked(page); 1449 req = nfs_page_find_request_locked(page);
1445 if (req != NULL) { 1450 if (req != NULL) {
1446 /* Mark any existing write requests for flushing */ 1451 /* Mark any existing write requests for flushing */
1447 ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); 1452 ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
1448 spin_unlock(req_lock); 1453 spin_unlock(&inode->i_lock);
1449 nfs_release_request(req); 1454 nfs_release_request(req);
1450 return ret; 1455 return ret;
1451 } 1456 }
1452 ret = __set_page_dirty_nobuffers(page); 1457 ret = __set_page_dirty_nobuffers(page);
1453 spin_unlock(req_lock); 1458 spin_unlock(&inode->i_lock);
1454 return ret; 1459 return ret;
1455out_raced: 1460out_raced:
1456 return !TestSetPageDirty(page); 1461 return !TestSetPageDirty(page);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 864090edc28b..5443c52b57aa 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
394 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 394 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
395 .rpc_argp = clp, 395 .rpc_argp = clp,
396 }; 396 };
397 char clientname[16];
398 int status; 397 int status;
399 398
400 if (atomic_read(&cb->cb_set)) 399 if (atomic_read(&cb->cb_set))
@@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
417 memset(program->stats, 0, sizeof(cb->cb_stat)); 416 memset(program->stats, 0, sizeof(cb->cb_stat));
418 program->stats->program = program; 417 program->stats->program = program;
419 418
420 /* Just here to make some printk's more useful: */
421 snprintf(clientname, sizeof(clientname),
422 "%u.%u.%u.%u", NIPQUAD(addr.sin_addr));
423 args.servername = clientname;
424
425 /* Create RPC client */ 419 /* Create RPC client */
426 cb->cb_client = rpc_create(&args); 420 cb->cb_client = rpc_create(&args);
427 if (IS_ERR(cb->cb_client)) { 421 if (IS_ERR(cb->cb_client)) {
@@ -429,29 +423,23 @@ nfsd4_probe_callback(struct nfs4_client *clp)
429 goto out_err; 423 goto out_err;
430 } 424 }
431 425
432 /* Kick rpciod, put the call on the wire. */
433 if (rpciod_up() != 0)
434 goto out_clnt;
435
436 /* the task holds a reference to the nfs4_client struct */ 426 /* the task holds a reference to the nfs4_client struct */
437 atomic_inc(&clp->cl_count); 427 atomic_inc(&clp->cl_count);
438 428
439 msg.rpc_cred = nfsd4_lookupcred(clp,0); 429 msg.rpc_cred = nfsd4_lookupcred(clp,0);
440 if (IS_ERR(msg.rpc_cred)) 430 if (IS_ERR(msg.rpc_cred))
441 goto out_rpciod; 431 goto out_release_clp;
442 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); 432 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
443 put_rpccred(msg.rpc_cred); 433 put_rpccred(msg.rpc_cred);
444 434
445 if (status != 0) { 435 if (status != 0) {
446 dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); 436 dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
447 goto out_rpciod; 437 goto out_release_clp;
448 } 438 }
449 return; 439 return;
450 440
451out_rpciod: 441out_release_clp:
452 atomic_dec(&clp->cl_count); 442 atomic_dec(&clp->cl_count);
453 rpciod_down();
454out_clnt:
455 rpc_shutdown_client(cb->cb_client); 443 rpc_shutdown_client(cb->cb_client);
456out_err: 444out_err:
457 cb->cb_client = NULL; 445 cb->cb_client = NULL;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3cc8ce422ab1..8c52913d7cb6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp)
378 if (clnt) { 378 if (clnt) {
379 clp->cl_callback.cb_client = NULL; 379 clp->cl_callback.cb_client = NULL;
380 rpc_shutdown_client(clnt); 380 rpc_shutdown_client(clnt);
381 rpciod_down();
382 } 381 }
383} 382}
384 383
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7e6aa245b5d5..945b1cedde2b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -23,7 +23,7 @@
23#include <linux/file.h> 23#include <linux/file.h>
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/major.h> 25#include <linux/major.h>
26#include <linux/ext2_fs.h> 26#include <linux/splice.h>
27#include <linux/proc_fs.h> 27#include <linux/proc_fs.h>
28#include <linux/stat.h> 28#include <linux/stat.h>
29#include <linux/fcntl.h> 29#include <linux/fcntl.h>
@@ -801,26 +801,32 @@ found:
801} 801}
802 802
803/* 803/*
804 * Grab and keep cached pages assosiated with a file in the svc_rqst 804 * Grab and keep cached pages associated with a file in the svc_rqst
805 * so that they can be passed to the netowork sendmsg/sendpage routines 805 * so that they can be passed to the network sendmsg/sendpage routines
806 * directrly. They will be released after the sending has completed. 806 * directly. They will be released after the sending has completed.
807 */ 807 */
808static int 808static int
809nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size) 809nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
810 struct splice_desc *sd)
810{ 811{
811 unsigned long count = desc->count; 812 struct svc_rqst *rqstp = sd->u.data;
812 struct svc_rqst *rqstp = desc->arg.data;
813 struct page **pp = rqstp->rq_respages + rqstp->rq_resused; 813 struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
814 struct page *page = buf->page;
815 size_t size;
816 int ret;
817
818 ret = buf->ops->confirm(pipe, buf);
819 if (unlikely(ret))
820 return ret;
814 821
815 if (size > count) 822 size = sd->len;
816 size = count;
817 823
818 if (rqstp->rq_res.page_len == 0) { 824 if (rqstp->rq_res.page_len == 0) {
819 get_page(page); 825 get_page(page);
820 put_page(*pp); 826 put_page(*pp);
821 *pp = page; 827 *pp = page;
822 rqstp->rq_resused++; 828 rqstp->rq_resused++;
823 rqstp->rq_res.page_base = offset; 829 rqstp->rq_res.page_base = buf->offset;
824 rqstp->rq_res.page_len = size; 830 rqstp->rq_res.page_len = size;
825 } else if (page != pp[-1]) { 831 } else if (page != pp[-1]) {
826 get_page(page); 832 get_page(page);
@@ -832,11 +838,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
832 } else 838 } else
833 rqstp->rq_res.page_len += size; 839 rqstp->rq_res.page_len += size;
834 840
835 desc->count = count - size;
836 desc->written += size;
837 return size; 841 return size;
838} 842}
839 843
844static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
845 struct splice_desc *sd)
846{
847 return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
848}
849
840static __be32 850static __be32
841nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 851nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
842 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 852 loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
@@ -861,10 +871,16 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
861 if (ra && ra->p_set) 871 if (ra && ra->p_set)
862 file->f_ra = ra->p_ra; 872 file->f_ra = ra->p_ra;
863 873
864 if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { 874 if (file->f_op->splice_read && rqstp->rq_splice_ok) {
875 struct splice_desc sd = {
876 .len = 0,
877 .total_len = *count,
878 .pos = offset,
879 .u.data = rqstp,
880 };
881
865 rqstp->rq_resused = 1; 882 rqstp->rq_resused = 1;
866 host_err = file->f_op->sendfile(file, &offset, *count, 883 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
867 nfsd_read_actor, rqstp);
868 } else { 884 } else {
869 oldfs = get_fs(); 885 oldfs = get_fs();
870 set_fs(KERNEL_DS); 886 set_fs(KERNEL_DS);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7ed56390b582..ffcc504a1667 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2276,7 +2276,7 @@ const struct file_operations ntfs_file_ops = {
2276 mounted filesystem. */ 2276 mounted filesystem. */
2277 .mmap = generic_file_mmap, /* Mmap file. */ 2277 .mmap = generic_file_mmap, /* Mmap file. */
2278 .open = ntfs_file_open, /* Open file. */ 2278 .open = ntfs_file_open, /* Open file. */
2279 .sendfile = generic_file_sendfile, /* Zero-copy data send with 2279 .splice_read = generic_file_splice_read /* Zero-copy data send with
2280 the data source being on 2280 the data source being on
2281 the ntfs partition. We do 2281 the ntfs partition. We do
2282 not need to care about the 2282 not need to care about the
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 074791ce4ab2..b532a730cec2 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -140,7 +140,7 @@ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
140 if (!ni->name) 140 if (!ni->name)
141 return -ENOMEM; 141 return -ENOMEM;
142 memcpy(ni->name, na->name, i); 142 memcpy(ni->name, na->name, i);
143 ni->name[i] = 0; 143 ni->name[na->name_len] = 0;
144 } 144 }
145 return 0; 145 return 0;
146} 146}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0023b31e48a8..a480b09c79b9 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -798,6 +798,11 @@ int ocfs2_map_and_write_splice_data(struct inode *inode,
798 } 798 }
799 to = from + bytes; 799 to = from + bytes;
800 800
801 BUG_ON(from > PAGE_CACHE_SIZE);
802 BUG_ON(to > PAGE_CACHE_SIZE);
803 BUG_ON(from < cluster_start);
804 BUG_ON(to > cluster_end);
805
801 if (wc->w_this_page_new) 806 if (wc->w_this_page_new)
802 ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, 807 ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
803 cluster_start, cluster_end, 1); 808 cluster_start, cluster_end, 1);
@@ -809,11 +814,6 @@ int ocfs2_map_and_write_splice_data(struct inode *inode,
809 goto out; 814 goto out;
810 } 815 }
811 816
812 BUG_ON(from > PAGE_CACHE_SIZE);
813 BUG_ON(to > PAGE_CACHE_SIZE);
814 BUG_ON(from > osb->s_clustersize);
815 BUG_ON(to > osb->s_clustersize);
816
817 src = buf->ops->map(sp->s_pipe, buf, 1); 817 src = buf->ops->map(sp->s_pipe, buf, 1);
818 dst = kmap_atomic(wc->w_this_page, KM_USER1); 818 dst = kmap_atomic(wc->w_this_page, KM_USER1);
819 memcpy(dst + from, src + src_from, bytes); 819 memcpy(dst + from, src + src_from, bytes);
@@ -890,6 +890,11 @@ int ocfs2_map_and_write_user_data(struct inode *inode,
890 890
891 to = from + bytes; 891 to = from + bytes;
892 892
893 BUG_ON(from > PAGE_CACHE_SIZE);
894 BUG_ON(to > PAGE_CACHE_SIZE);
895 BUG_ON(from < cluster_start);
896 BUG_ON(to > cluster_end);
897
893 if (wc->w_this_page_new) 898 if (wc->w_this_page_new)
894 ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, 899 ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
895 cluster_start, cluster_end, 1); 900 cluster_start, cluster_end, 1);
@@ -901,11 +906,6 @@ int ocfs2_map_and_write_user_data(struct inode *inode,
901 goto out; 906 goto out;
902 } 907 }
903 908
904 BUG_ON(from > PAGE_CACHE_SIZE);
905 BUG_ON(to > PAGE_CACHE_SIZE);
906 BUG_ON(from > osb->s_clustersize);
907 BUG_ON(to > osb->s_clustersize);
908
909 dst = kmap(wc->w_this_page); 909 dst = kmap(wc->w_this_page);
910 memcpy(dst + from, bp->b_src_buf + src_from, bytes); 910 memcpy(dst + from, bp->b_src_buf + src_from, bytes);
911 kunmap(wc->w_this_page); 911 kunmap(wc->w_this_page);
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index a93620ce4aca..e9e042b93dbf 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -74,7 +74,6 @@ struct mlog_attribute {
74#define define_mask(_name) { \ 74#define define_mask(_name) { \
75 .attr = { \ 75 .attr = { \
76 .name = #_name, \ 76 .name = #_name, \
77 .owner = THIS_MODULE, \
78 .mode = S_IRUGO | S_IWUSR, \ 77 .mode = S_IRUGO | S_IWUSR, \
79 }, \ 78 }, \
80 .mask = ML_##_name, \ 79 .mask = ML_##_name, \
@@ -144,8 +143,7 @@ static struct kobj_type mlog_ktype = {
144}; 143};
145 144
146static struct kset mlog_kset = { 145static struct kset mlog_kset = {
147 .kobj = {.name = "logmask"}, 146 .kobj = {.name = "logmask", .ktype = &mlog_ktype},
148 .ktype = &mlog_ktype
149}; 147};
150 148
151int mlog_sys_init(struct kset *o2cb_subsys) 149int mlog_sys_init(struct kset *o2cb_subsys)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ac6c96431bbc..4979b6675717 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,7 +31,7 @@
31#include <linux/pagemap.h> 31#include <linux/pagemap.h>
32#include <linux/uio.h> 32#include <linux/uio.h>
33#include <linux/sched.h> 33#include <linux/sched.h>
34#include <linux/pipe_fs_i.h> 34#include <linux/splice.h>
35#include <linux/mount.h> 35#include <linux/mount.h>
36#include <linux/writeback.h> 36#include <linux/writeback.h>
37 37
@@ -1583,7 +1583,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
1583 ssize_t copied = 0; 1583 ssize_t copied = 0;
1584 struct ocfs2_splice_write_priv sp; 1584 struct ocfs2_splice_write_priv sp;
1585 1585
1586 ret = buf->ops->pin(pipe, buf); 1586 ret = buf->ops->confirm(pipe, buf);
1587 if (ret) 1587 if (ret)
1588 goto out; 1588 goto out;
1589 1589
@@ -1604,7 +1604,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
1604 * might enter ocfs2_buffered_write_cluster() more 1604 * might enter ocfs2_buffered_write_cluster() more
1605 * than once, so keep track of our progress here. 1605 * than once, so keep track of our progress here.
1606 */ 1606 */
1607 copied = ocfs2_buffered_write_cluster(sd->file, 1607 copied = ocfs2_buffered_write_cluster(sd->u.file,
1608 (loff_t)sd->pos + total, 1608 (loff_t)sd->pos + total,
1609 count, 1609 count,
1610 ocfs2_map_and_write_splice_data, 1610 ocfs2_map_and_write_splice_data,
@@ -1636,9 +1636,14 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1636 int ret, err; 1636 int ret, err;
1637 struct address_space *mapping = out->f_mapping; 1637 struct address_space *mapping = out->f_mapping;
1638 struct inode *inode = mapping->host; 1638 struct inode *inode = mapping->host;
1639 1639 struct splice_desc sd = {
1640 ret = __splice_from_pipe(pipe, out, ppos, len, flags, 1640 .total_len = len,
1641 ocfs2_splice_write_actor); 1641 .flags = flags,
1642 .pos = *ppos,
1643 .u.file = out,
1644 };
1645
1646 ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
1642 if (ret > 0) { 1647 if (ret > 0) {
1643 *ppos += ret; 1648 *ppos += ret;
1644 1649
@@ -1817,7 +1822,6 @@ const struct inode_operations ocfs2_special_file_iops = {
1817const struct file_operations ocfs2_fops = { 1822const struct file_operations ocfs2_fops = {
1818 .read = do_sync_read, 1823 .read = do_sync_read,
1819 .write = do_sync_write, 1824 .write = do_sync_write,
1820 .sendfile = generic_file_sendfile,
1821 .mmap = ocfs2_mmap, 1825 .mmap = ocfs2_mmap,
1822 .fsync = ocfs2_sync_file, 1826 .fsync = ocfs2_sync_file,
1823 .release = ocfs2_file_release, 1827 .release = ocfs2_file_release,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9a3a058f3553..98e0b85a9bb2 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -397,7 +397,6 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
397 static struct attribute addpartattr = { 397 static struct attribute addpartattr = {
398 .name = "whole_disk", 398 .name = "whole_disk",
399 .mode = S_IRUSR | S_IRGRP | S_IROTH, 399 .mode = S_IRUSR | S_IRGRP | S_IROTH,
400 .owner = THIS_MODULE,
401 }; 400 };
402 401
403 sysfs_create_file(&p->kobj, &addpartattr); 402 sysfs_create_file(&p->kobj, &addpartattr);
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 9f7ad4244f63..1e064c4a4f86 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -45,7 +45,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
45{ 45{
46 int blocksize, offset, size,res; 46 int blocksize, offset, size,res;
47 loff_t i_size; 47 loff_t i_size;
48 dasd_information_t *info; 48 dasd_information2_t *info;
49 struct hd_geometry *geo; 49 struct hd_geometry *geo;
50 char type[5] = {0,}; 50 char type[5] = {0,};
51 char name[7] = {0,}; 51 char name[7] = {0,};
@@ -64,14 +64,17 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
64 if (i_size == 0) 64 if (i_size == 0)
65 goto out_exit; 65 goto out_exit;
66 66
67 if ((info = kmalloc(sizeof(dasd_information_t), GFP_KERNEL)) == NULL) 67 info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
68 if (info == NULL)
68 goto out_exit; 69 goto out_exit;
69 if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL) 70 geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL);
71 if (geo == NULL)
70 goto out_nogeo; 72 goto out_nogeo;
71 if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL) 73 label = kmalloc(sizeof(union label_t), GFP_KERNEL);
74 if (label == NULL)
72 goto out_nolab; 75 goto out_nolab;
73 76
74 if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 || 77 if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 ||
75 ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) 78 ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
76 goto out_freeall; 79 goto out_freeall;
77 80
@@ -96,84 +99,108 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
96 res = 1; 99 res = 1;
97 100
98 /* 101 /*
99 * Three different types: CMS1, VOL1 and LNX1/unlabeled 102 * Three different formats: LDL, CDL and unformated disk
103 *
104 * identified by info->format
105 *
106 * unformated disks we do not have to care about
100 */ 107 */
101 if (strncmp(type, "CMS1", 4) == 0) { 108 if (info->format == DASD_FORMAT_LDL) {
102 /* 109 if (strncmp(type, "CMS1", 4) == 0) {
103 * VM style CMS1 labeled disk 110 /*
104 */ 111 * VM style CMS1 labeled disk
105 if (label->cms.disk_offset != 0) { 112 */
106 printk("CMS1/%8s(MDSK):", name); 113 if (label->cms.disk_offset != 0) {
107 /* disk is reserved minidisk */ 114 printk("CMS1/%8s(MDSK):", name);
108 blocksize = label->cms.block_size; 115 /* disk is reserved minidisk */
109 offset = label->cms.disk_offset; 116 blocksize = label->cms.block_size;
110 size = (label->cms.block_count - 1) * (blocksize >> 9); 117 offset = label->cms.disk_offset;
118 size = (label->cms.block_count - 1)
119 * (blocksize >> 9);
120 } else {
121 printk("CMS1/%8s:", name);
122 offset = (info->label_block + 1);
123 size = i_size >> 9;
124 }
111 } else { 125 } else {
112 printk("CMS1/%8s:", name); 126 /*
127 * Old style LNX1 or unlabeled disk
128 */
129 if (strncmp(type, "LNX1", 4) == 0)
130 printk ("LNX1/%8s:", name);
131 else
132 printk("(nonl)");
113 offset = (info->label_block + 1); 133 offset = (info->label_block + 1);
114 size = i_size >> 9; 134 size = i_size >> 9;
115 } 135 }
116 put_partition(state, 1, offset*(blocksize >> 9), 136 put_partition(state, 1, offset*(blocksize >> 9),
117 size-offset*(blocksize >> 9)); 137 size-offset*(blocksize >> 9));
118 } else if ((strncmp(type, "VOL1", 4) == 0) && 138 } else if (info->format == DASD_FORMAT_CDL) {
119 (!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) {
120 /* 139 /*
121 * New style VOL1 labeled disk 140 * New style CDL formatted disk
122 */ 141 */
123 unsigned int blk; 142 unsigned int blk;
124 int counter; 143 int counter;
125 144
126 printk("VOL1/%8s:", name);
127
128 /* get block number and read then go through format1 labels */
129 blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
130 counter = 0;
131 while ((data = read_dev_sector(bdev, blk*(blocksize/512),
132 &sect)) != NULL) {
133 struct vtoc_format1_label f1;
134
135 memcpy(&f1, data, sizeof(struct vtoc_format1_label));
136 put_dev_sector(sect);
137
138 /* skip FMT4 / FMT5 / FMT7 labels */
139 if (f1.DS1FMTID == _ascebc['4']
140 || f1.DS1FMTID == _ascebc['5']
141 || f1.DS1FMTID == _ascebc['7']) {
142 blk++;
143 continue;
144 }
145
146 /* only FMT1 valid at this point */
147 if (f1.DS1FMTID != _ascebc['1'])
148 break;
149
150 /* OK, we got valid partition data */
151 offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
152 size = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
153 offset + geo->sectors;
154 if (counter >= state->limit)
155 break;
156 put_partition(state, counter + 1,
157 offset * (blocksize >> 9),
158 size * (blocksize >> 9));
159 counter++;
160 blk++;
161 }
162 if (!data)
163 /* Are we not supposed to report this ? */
164 goto out_readerr;
165 } else {
166 /* 145 /*
167 * Old style LNX1 or unlabeled disk 146 * check if VOL1 label is available
147 * if not, something is wrong, skipping partition detection
168 */ 148 */
169 if (strncmp(type, "LNX1", 4) == 0) 149 if (strncmp(type, "VOL1", 4) == 0) {
170 printk ("LNX1/%8s:", name); 150 printk("VOL1/%8s:", name);
171 else 151 /*
172 printk("(nonl)/%8s:", name); 152 * get block number and read then go through format1
173 offset = (info->label_block + 1); 153 * labels
174 size = i_size >> 9; 154 */
175 put_partition(state, 1, offset*(blocksize >> 9), 155 blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
176 size-offset*(blocksize >> 9)); 156 counter = 0;
157 data = read_dev_sector(bdev, blk * (blocksize/512),
158 &sect);
159 while (data != NULL) {
160 struct vtoc_format1_label f1;
161
162 memcpy(&f1, data,
163 sizeof(struct vtoc_format1_label));
164 put_dev_sector(sect);
165
166 /* skip FMT4 / FMT5 / FMT7 labels */
167 if (f1.DS1FMTID == _ascebc['4']
168 || f1.DS1FMTID == _ascebc['5']
169 || f1.DS1FMTID == _ascebc['7']) {
170 blk++;
171 data = read_dev_sector(bdev, blk *
172 (blocksize/512),
173 &sect);
174 continue;
175 }
176
177 /* only FMT1 valid at this point */
178 if (f1.DS1FMTID != _ascebc['1'])
179 break;
180
181 /* OK, we got valid partition data */
182 offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
183 size = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
184 offset + geo->sectors;
185 if (counter >= state->limit)
186 break;
187 put_partition(state, counter + 1,
188 offset * (blocksize >> 9),
189 size * (blocksize >> 9));
190 counter++;
191 blk++;
192 data = read_dev_sector(bdev,
193 blk * (blocksize/512),
194 &sect);
195 }
196
197 if (!data)
198 /* Are we not supposed to report this ? */
199 goto out_readerr;
200 } else
201 printk(KERN_WARNING "Warning, expected Label VOL1 not "
202 "found, treating as CDL formated Disk");
203
177 } 204 }
178 205
179 printk("\n"); 206 printk("\n");
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a89592bdf57..d007830d9c87 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -164,6 +164,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
164 page_cache_release(page); 164 page_cache_release(page);
165} 165}
166 166
167/**
168 * generic_pipe_buf_map - virtually map a pipe buffer
169 * @pipe: the pipe that the buffer belongs to
170 * @buf: the buffer that should be mapped
171 * @atomic: whether to use an atomic map
172 *
173 * Description:
174 * This function returns a kernel virtual address mapping for the
175 * passed in @pipe_buffer. If @atomic is set, an atomic map is provided
176 * and the caller has to be careful not to fault before calling
177 * the unmap function.
178 *
179 * Note that this function occupies KM_USER0 if @atomic != 0.
180 */
167void *generic_pipe_buf_map(struct pipe_inode_info *pipe, 181void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
168 struct pipe_buffer *buf, int atomic) 182 struct pipe_buffer *buf, int atomic)
169{ 183{
@@ -175,6 +189,15 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
175 return kmap(buf->page); 189 return kmap(buf->page);
176} 190}
177 191
192/**
193 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
194 * @pipe: the pipe that the buffer belongs to
195 * @buf: the buffer that should be unmapped
196 * @map_data: the data that the mapping function returned
197 *
198 * Description:
199 * This function undoes the mapping that ->map() provided.
200 */
178void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, 201void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
179 struct pipe_buffer *buf, void *map_data) 202 struct pipe_buffer *buf, void *map_data)
180{ 203{
@@ -185,11 +208,28 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
185 kunmap(buf->page); 208 kunmap(buf->page);
186} 209}
187 210
211/**
212 * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer
213 * @pipe: the pipe that the buffer belongs to
214 * @buf: the buffer to attempt to steal
215 *
216 * Description:
217 * This function attempts to steal the @struct page attached to
218 * @buf. If successful, this function returns 0 and returns with
219 * the page locked. The caller may then reuse the page for whatever
220 * he wishes, the typical use is insertion into a different file
221 * page cache.
222 */
188int generic_pipe_buf_steal(struct pipe_inode_info *pipe, 223int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
189 struct pipe_buffer *buf) 224 struct pipe_buffer *buf)
190{ 225{
191 struct page *page = buf->page; 226 struct page *page = buf->page;
192 227
228 /*
229 * A reference of one is golden, that means that the owner of this
230 * page is the only one holding a reference to it. lock the page
231 * and return OK.
232 */
193 if (page_count(page) == 1) { 233 if (page_count(page) == 1) {
194 lock_page(page); 234 lock_page(page);
195 return 0; 235 return 0;
@@ -198,12 +238,32 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
198 return 1; 238 return 1;
199} 239}
200 240
201void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) 241/**
242 * generic_pipe_buf_get - get a reference to a @struct pipe_buffer
243 * @pipe: the pipe that the buffer belongs to
244 * @buf: the buffer to get a reference to
245 *
246 * Description:
247 * This function grabs an extra reference to @buf. It's used in
248 * in the tee() system call, when we duplicate the buffers in one
249 * pipe into another.
250 */
251void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
202{ 252{
203 page_cache_get(buf->page); 253 page_cache_get(buf->page);
204} 254}
205 255
206int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf) 256/**
257 * generic_pipe_buf_confirm - verify contents of the pipe buffer
258 * @pipe: the pipe that the buffer belongs to
259 * @buf: the buffer to confirm
260 *
261 * Description:
262 * This function does nothing, because the generic pipe code uses
263 * pages that are always good when inserted into the pipe.
264 */
265int generic_pipe_buf_confirm(struct pipe_inode_info *info,
266 struct pipe_buffer *buf)
207{ 267{
208 return 0; 268 return 0;
209} 269}
@@ -212,7 +272,7 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
212 .can_merge = 1, 272 .can_merge = 1,
213 .map = generic_pipe_buf_map, 273 .map = generic_pipe_buf_map,
214 .unmap = generic_pipe_buf_unmap, 274 .unmap = generic_pipe_buf_unmap,
215 .pin = generic_pipe_buf_pin, 275 .confirm = generic_pipe_buf_confirm,
216 .release = anon_pipe_buf_release, 276 .release = anon_pipe_buf_release,
217 .steal = generic_pipe_buf_steal, 277 .steal = generic_pipe_buf_steal,
218 .get = generic_pipe_buf_get, 278 .get = generic_pipe_buf_get,
@@ -252,7 +312,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
252 if (chars > total_len) 312 if (chars > total_len)
253 chars = total_len; 313 chars = total_len;
254 314
255 error = ops->pin(pipe, buf); 315 error = ops->confirm(pipe, buf);
256 if (error) { 316 if (error) {
257 if (!ret) 317 if (!ret)
258 error = ret; 318 error = ret;
@@ -373,7 +433,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
373 int error, atomic = 1; 433 int error, atomic = 1;
374 void *addr; 434 void *addr;
375 435
376 error = ops->pin(pipe, buf); 436 error = ops->confirm(pipe, buf);
377 if (error) 437 if (error)
378 goto out; 438 goto out;
379 439
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 74f30e0c0381..98e78e2f18d6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -165,7 +165,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
165 rcu_read_lock(); 165 rcu_read_lock();
166 buffer += sprintf(buffer, 166 buffer += sprintf(buffer,
167 "State:\t%s\n" 167 "State:\t%s\n"
168 "SleepAVG:\t%lu%%\n"
169 "Tgid:\t%d\n" 168 "Tgid:\t%d\n"
170 "Pid:\t%d\n" 169 "Pid:\t%d\n"
171 "PPid:\t%d\n" 170 "PPid:\t%d\n"
@@ -173,7 +172,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
173 "Uid:\t%d\t%d\t%d\t%d\n" 172 "Uid:\t%d\t%d\t%d\t%d\n"
174 "Gid:\t%d\t%d\t%d\t%d\n", 173 "Gid:\t%d\t%d\t%d\t%d\n",
175 get_task_state(p), 174 get_task_state(p),
176 (p->sleep_avg/1024)*100/(1020000000/1024),
177 p->tgid, p->pid, 175 p->tgid, p->pid,
178 pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, 176 pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
179 pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, 177 pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
@@ -312,6 +310,41 @@ int proc_pid_status(struct task_struct *task, char * buffer)
312 return buffer - orig; 310 return buffer - orig;
313} 311}
314 312
313static clock_t task_utime(struct task_struct *p)
314{
315 clock_t utime = cputime_to_clock_t(p->utime),
316 total = utime + cputime_to_clock_t(p->stime);
317 u64 temp;
318
319 /*
320 * Use CFS's precise accounting:
321 */
322 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
323
324 if (total) {
325 temp *= utime;
326 do_div(temp, total);
327 }
328 utime = (clock_t)temp;
329
330 return utime;
331}
332
333static clock_t task_stime(struct task_struct *p)
334{
335 clock_t stime = cputime_to_clock_t(p->stime);
336
337 /*
338 * Use CFS's precise accounting. (we subtract utime from
339 * the total, to make sure the total observed by userspace
340 * grows monotonically - apps rely on that):
341 */
342 stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
343
344 return stime;
345}
346
347
315static int do_task_stat(struct task_struct *task, char * buffer, int whole) 348static int do_task_stat(struct task_struct *task, char * buffer, int whole)
316{ 349{
317 unsigned long vsize, eip, esp, wchan = ~0UL; 350 unsigned long vsize, eip, esp, wchan = ~0UL;
@@ -326,7 +359,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
326 unsigned long long start_time; 359 unsigned long long start_time;
327 unsigned long cmin_flt = 0, cmaj_flt = 0; 360 unsigned long cmin_flt = 0, cmaj_flt = 0;
328 unsigned long min_flt = 0, maj_flt = 0; 361 unsigned long min_flt = 0, maj_flt = 0;
329 cputime_t cutime, cstime, utime, stime; 362 cputime_t cutime, cstime;
363 clock_t utime, stime;
330 unsigned long rsslim = 0; 364 unsigned long rsslim = 0;
331 char tcomm[sizeof(task->comm)]; 365 char tcomm[sizeof(task->comm)];
332 unsigned long flags; 366 unsigned long flags;
@@ -344,7 +378,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
344 378
345 sigemptyset(&sigign); 379 sigemptyset(&sigign);
346 sigemptyset(&sigcatch); 380 sigemptyset(&sigcatch);
347 cutime = cstime = utime = stime = cputime_zero; 381 cutime = cstime = cputime_zero;
382 utime = stime = 0;
348 383
349 rcu_read_lock(); 384 rcu_read_lock();
350 if (lock_task_sighand(task, &flags)) { 385 if (lock_task_sighand(task, &flags)) {
@@ -370,15 +405,15 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
370 do { 405 do {
371 min_flt += t->min_flt; 406 min_flt += t->min_flt;
372 maj_flt += t->maj_flt; 407 maj_flt += t->maj_flt;
373 utime = cputime_add(utime, t->utime); 408 utime += task_utime(t);
374 stime = cputime_add(stime, t->stime); 409 stime += task_stime(t);
375 t = next_thread(t); 410 t = next_thread(t);
376 } while (t != task); 411 } while (t != task);
377 412
378 min_flt += sig->min_flt; 413 min_flt += sig->min_flt;
379 maj_flt += sig->maj_flt; 414 maj_flt += sig->maj_flt;
380 utime = cputime_add(utime, sig->utime); 415 utime += cputime_to_clock_t(sig->utime);
381 stime = cputime_add(stime, sig->stime); 416 stime += cputime_to_clock_t(sig->stime);
382 } 417 }
383 418
384 sid = signal_session(sig); 419 sid = signal_session(sig);
@@ -394,8 +429,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
394 if (!whole) { 429 if (!whole) {
395 min_flt = task->min_flt; 430 min_flt = task->min_flt;
396 maj_flt = task->maj_flt; 431 maj_flt = task->maj_flt;
397 utime = task->utime; 432 utime = task_utime(task);
398 stime = task->stime; 433 stime = task_stime(task);
399 } 434 }
400 435
401 /* scale priority and nice values from timeslices to -20..20 */ 436 /* scale priority and nice values from timeslices to -20..20 */
@@ -426,8 +461,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
426 cmin_flt, 461 cmin_flt,
427 maj_flt, 462 maj_flt,
428 cmaj_flt, 463 cmaj_flt,
429 cputime_to_clock_t(utime), 464 utime,
430 cputime_to_clock_t(stime), 465 stime,
431 cputime_to_clock_t(cutime), 466 cputime_to_clock_t(cutime),
432 cputime_to_clock_t(cstime), 467 cputime_to_clock_t(cstime),
433 priority, 468 priority,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a5fa1fdafc4e..46ea5d56e1bb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -296,7 +296,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
296 */ 296 */
297static int proc_pid_schedstat(struct task_struct *task, char *buffer) 297static int proc_pid_schedstat(struct task_struct *task, char *buffer)
298{ 298{
299 return sprintf(buffer, "%lu %lu %lu\n", 299 return sprintf(buffer, "%llu %llu %lu\n",
300 task->sched_info.cpu_time, 300 task->sched_info.cpu_time,
301 task->sched_info.run_delay, 301 task->sched_info.run_delay,
302 task->sched_info.pcnt); 302 task->sched_info.pcnt);
@@ -929,6 +929,69 @@ static const struct file_operations proc_fault_inject_operations = {
929}; 929};
930#endif 930#endif
931 931
932#ifdef CONFIG_SCHED_DEBUG
933/*
934 * Print out various scheduling related per-task fields:
935 */
936static int sched_show(struct seq_file *m, void *v)
937{
938 struct inode *inode = m->private;
939 struct task_struct *p;
940
941 WARN_ON(!inode);
942
943 p = get_proc_task(inode);
944 if (!p)
945 return -ESRCH;
946 proc_sched_show_task(p, m);
947
948 put_task_struct(p);
949
950 return 0;
951}
952
953static ssize_t
954sched_write(struct file *file, const char __user *buf,
955 size_t count, loff_t *offset)
956{
957 struct inode *inode = file->f_path.dentry->d_inode;
958 struct task_struct *p;
959
960 WARN_ON(!inode);
961
962 p = get_proc_task(inode);
963 if (!p)
964 return -ESRCH;
965 proc_sched_set_task(p);
966
967 put_task_struct(p);
968
969 return count;
970}
971
972static int sched_open(struct inode *inode, struct file *filp)
973{
974 int ret;
975
976 ret = single_open(filp, sched_show, NULL);
977 if (!ret) {
978 struct seq_file *m = filp->private_data;
979
980 m->private = inode;
981 }
982 return ret;
983}
984
985static const struct file_operations proc_pid_sched_operations = {
986 .open = sched_open,
987 .read = seq_read,
988 .write = sched_write,
989 .llseek = seq_lseek,
990 .release = seq_release,
991};
992
993#endif
994
932static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 995static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
933{ 996{
934 struct inode *inode = dentry->d_inode; 997 struct inode *inode = dentry->d_inode;
@@ -1963,6 +2026,9 @@ static const struct pid_entry tgid_base_stuff[] = {
1963 INF("environ", S_IRUSR, pid_environ), 2026 INF("environ", S_IRUSR, pid_environ),
1964 INF("auxv", S_IRUSR, pid_auxv), 2027 INF("auxv", S_IRUSR, pid_auxv),
1965 INF("status", S_IRUGO, pid_status), 2028 INF("status", S_IRUGO, pid_status),
2029#ifdef CONFIG_SCHED_DEBUG
2030 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2031#endif
1966 INF("cmdline", S_IRUGO, pid_cmdline), 2032 INF("cmdline", S_IRUGO, pid_cmdline),
1967 INF("stat", S_IRUGO, tgid_stat), 2033 INF("stat", S_IRUGO, tgid_stat),
1968 INF("statm", S_IRUGO, pid_statm), 2034 INF("statm", S_IRUGO, pid_statm),
@@ -2247,6 +2313,9 @@ static const struct pid_entry tid_base_stuff[] = {
2247 INF("environ", S_IRUSR, pid_environ), 2313 INF("environ", S_IRUSR, pid_environ),
2248 INF("auxv", S_IRUSR, pid_auxv), 2314 INF("auxv", S_IRUSR, pid_auxv),
2249 INF("status", S_IRUGO, pid_status), 2315 INF("status", S_IRUGO, pid_status),
2316#ifdef CONFIG_SCHED_DEBUG
2317 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2318#endif
2250 INF("cmdline", S_IRUGO, pid_cmdline), 2319 INF("cmdline", S_IRUGO, pid_cmdline),
2251 INF("stat", S_IRUGO, tid_stat), 2320 INF("stat", S_IRUGO, tid_stat),
2252 INF("statm", S_IRUGO, pid_statm), 2321 INF("statm", S_IRUGO, pid_statm),
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 44649981bbc8..867f42b02035 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -25,7 +25,7 @@ const struct file_operations qnx4_file_operations =
25 .read = do_sync_read, 25 .read = do_sync_read,
26 .aio_read = generic_file_aio_read, 26 .aio_read = generic_file_aio_read,
27 .mmap = generic_file_mmap, 27 .mmap = generic_file_mmap,
28 .sendfile = generic_file_sendfile, 28 .splice_read = generic_file_splice_read,
29#ifdef CONFIG_QNX4FS_RW 29#ifdef CONFIG_QNX4FS_RW
30 .write = do_sync_write, 30 .write = do_sync_write,
31 .aio_write = generic_file_aio_write, 31 .aio_write = generic_file_aio_write,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 2f14774a124f..97bdc0b2f9d2 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -41,7 +41,7 @@ const struct file_operations ramfs_file_operations = {
41 .aio_write = generic_file_aio_write, 41 .aio_write = generic_file_aio_write,
42 .mmap = generic_file_mmap, 42 .mmap = generic_file_mmap,
43 .fsync = simple_sync_file, 43 .fsync = simple_sync_file,
44 .sendfile = generic_file_sendfile, 44 .splice_read = generic_file_splice_read,
45 .llseek = generic_file_llseek, 45 .llseek = generic_file_llseek,
46}; 46};
47 47
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9345a46ffb32..cad2b7ace630 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = {
42 .write = do_sync_write, 42 .write = do_sync_write,
43 .aio_write = generic_file_aio_write, 43 .aio_write = generic_file_aio_write,
44 .fsync = simple_sync_file, 44 .fsync = simple_sync_file,
45 .sendfile = generic_file_sendfile, 45 .splice_read = generic_file_splice_read,
46 .llseek = generic_file_llseek, 46 .llseek = generic_file_llseek,
47}; 47};
48 48
@@ -195,6 +195,11 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
195 unsigned int old_ia_valid = ia->ia_valid; 195 unsigned int old_ia_valid = ia->ia_valid;
196 int ret = 0; 196 int ret = 0;
197 197
198 /* POSIX UID/GID verification for setting inode attributes */
199 ret = inode_change_ok(inode, ia);
200 if (ret)
201 return ret;
202
198 /* by providing our own setattr() method, we skip this quotaism */ 203 /* by providing our own setattr() method, we skip this quotaism */
199 if ((old_ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) || 204 if ((old_ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) ||
200 (old_ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid)) 205 (old_ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid))
diff --git a/fs/read_write.c b/fs/read_write.c
index 4d03008f015b..507ddff48a9a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/splice.h>
18#include "read_write.h" 19#include "read_write.h"
19 20
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
@@ -25,7 +26,7 @@ const struct file_operations generic_ro_fops = {
25 .read = do_sync_read, 26 .read = do_sync_read,
26 .aio_read = generic_file_aio_read, 27 .aio_read = generic_file_aio_read,
27 .mmap = generic_file_readonly_mmap, 28 .mmap = generic_file_readonly_mmap,
28 .sendfile = generic_file_sendfile, 29 .splice_read = generic_file_splice_read,
29}; 30};
30 31
31EXPORT_SYMBOL(generic_ro_fops); 32EXPORT_SYMBOL(generic_ro_fops);
@@ -708,7 +709,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
708 struct inode * in_inode, * out_inode; 709 struct inode * in_inode, * out_inode;
709 loff_t pos; 710 loff_t pos;
710 ssize_t retval; 711 ssize_t retval;
711 int fput_needed_in, fput_needed_out; 712 int fput_needed_in, fput_needed_out, fl;
712 713
713 /* 714 /*
714 * Get input file, and verify that it is ok.. 715 * Get input file, and verify that it is ok..
@@ -723,7 +724,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
723 in_inode = in_file->f_path.dentry->d_inode; 724 in_inode = in_file->f_path.dentry->d_inode;
724 if (!in_inode) 725 if (!in_inode)
725 goto fput_in; 726 goto fput_in;
726 if (!in_file->f_op || !in_file->f_op->sendfile) 727 if (!in_file->f_op || !in_file->f_op->splice_read)
727 goto fput_in; 728 goto fput_in;
728 retval = -ESPIPE; 729 retval = -ESPIPE;
729 if (!ppos) 730 if (!ppos)
@@ -776,7 +777,18 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
776 count = max - pos; 777 count = max - pos;
777 } 778 }
778 779
779 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); 780 fl = 0;
781#if 0
782 /*
783 * We need to debate whether we can enable this or not. The
784 * man page documents EAGAIN return for the output at least,
785 * and the application is arguably buggy if it doesn't expect
786 * EAGAIN on a non-blocking file descriptor.
787 */
788 if (in_file->f_flags & O_NONBLOCK)
789 fl = SPLICE_F_NONBLOCK;
790#endif
791 retval = do_splice_direct(in_file, ppos, out_file, count, fl);
780 792
781 if (retval > 0) { 793 if (retval > 0) {
782 add_rchar(current, retval); 794 add_rchar(current, retval);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9e451a68580f..30eebfb1b2d8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1531,7 +1531,6 @@ const struct file_operations reiserfs_file_operations = {
1531 .open = generic_file_open, 1531 .open = generic_file_open,
1532 .release = reiserfs_file_release, 1532 .release = reiserfs_file_release,
1533 .fsync = reiserfs_sync_file, 1533 .fsync = reiserfs_sync_file,
1534 .sendfile = generic_file_sendfile,
1535 .aio_read = generic_file_aio_read, 1534 .aio_read = generic_file_aio_read,
1536 .aio_write = generic_file_aio_write, 1535 .aio_write = generic_file_aio_write,
1537 .splice_read = generic_file_splice_read, 1536 .splice_read = generic_file_splice_read,
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0ac22af7afe5..49194a4e6b91 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -447,3 +447,37 @@ int seq_puts(struct seq_file *m, const char *s)
447 return -1; 447 return -1;
448} 448}
449EXPORT_SYMBOL(seq_puts); 449EXPORT_SYMBOL(seq_puts);
450
451struct list_head *seq_list_start(struct list_head *head, loff_t pos)
452{
453 struct list_head *lh;
454
455 list_for_each(lh, head)
456 if (pos-- == 0)
457 return lh;
458
459 return NULL;
460}
461
462EXPORT_SYMBOL(seq_list_start);
463
464struct list_head *seq_list_start_head(struct list_head *head, loff_t pos)
465{
466 if (!pos)
467 return head;
468
469 return seq_list_start(head, pos - 1);
470}
471
472EXPORT_SYMBOL(seq_list_start_head);
473
474struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
475{
476 struct list_head *lh;
477
478 lh = ((struct list_head *)v)->next;
479 ++*ppos;
480 return lh == head ? NULL : lh;
481}
482
483EXPORT_SYMBOL(seq_list_next);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index f1da89203a9a..3b07f26d984d 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -133,7 +133,8 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait)
133 * the peer disconnects. 133 * the peer disconnects.
134 */ 134 */
135 if (signalfd_lock(ctx, &lk)) { 135 if (signalfd_lock(ctx, &lk)) {
136 if (next_signal(&lk.tsk->pending, &ctx->sigmask) > 0 || 136 if ((lk.tsk == current &&
137 next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) ||
137 next_signal(&lk.tsk->signal->shared_pending, 138 next_signal(&lk.tsk->signal->shared_pending,
138 &ctx->sigmask) > 0) 139 &ctx->sigmask) > 0)
139 events |= POLLIN; 140 events |= POLLIN;
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index aea3f8aa54c0..c5d78a7e492b 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -262,8 +262,9 @@ out:
262} 262}
263 263
264static ssize_t 264static ssize_t
265smb_file_sendfile(struct file *file, loff_t *ppos, 265smb_file_splice_read(struct file *file, loff_t *ppos,
266 size_t count, read_actor_t actor, void *target) 266 struct pipe_inode_info *pipe, size_t count,
267 unsigned int flags)
267{ 268{
268 struct dentry *dentry = file->f_path.dentry; 269 struct dentry *dentry = file->f_path.dentry;
269 ssize_t status; 270 ssize_t status;
@@ -277,7 +278,7 @@ smb_file_sendfile(struct file *file, loff_t *ppos,
277 DENTRY_PATH(dentry), status); 278 DENTRY_PATH(dentry), status);
278 goto out; 279 goto out;
279 } 280 }
280 status = generic_file_sendfile(file, ppos, count, actor, target); 281 status = generic_file_splice_read(file, ppos, pipe, count, flags);
281out: 282out:
282 return status; 283 return status;
283} 284}
@@ -416,7 +417,7 @@ const struct file_operations smb_file_operations =
416 .open = smb_file_open, 417 .open = smb_file_open,
417 .release = smb_file_release, 418 .release = smb_file_release,
418 .fsync = smb_fsync, 419 .fsync = smb_fsync,
419 .sendfile = smb_file_sendfile, 420 .splice_read = smb_file_splice_read,
420}; 421};
421 422
422const struct inode_operations smb_file_inode_operations = 423const struct inode_operations smb_file_inode_operations =
diff --git a/fs/splice.c b/fs/splice.c
index 12f28281d2b1..6c9828651e6f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -20,7 +20,7 @@
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/file.h> 21#include <linux/file.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/pipe_fs_i.h> 23#include <linux/splice.h>
24#include <linux/mm_inline.h> 24#include <linux/mm_inline.h>
25#include <linux/swap.h> 25#include <linux/swap.h>
26#include <linux/writeback.h> 26#include <linux/writeback.h>
@@ -28,22 +28,7 @@
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h> 30#include <linux/uio.h>
31 31#include <linux/security.h>
32struct partial_page {
33 unsigned int offset;
34 unsigned int len;
35};
36
37/*
38 * Passed to splice_to_pipe
39 */
40struct splice_pipe_desc {
41 struct page **pages; /* page map */
42 struct partial_page *partial; /* pages[] may not be contig */
43 int nr_pages; /* number of pages in map */
44 unsigned int flags; /* splice flags */
45 const struct pipe_buf_operations *ops;/* ops associated with output pipe */
46};
47 32
48/* 33/*
49 * Attempt to steal a page from a pipe buffer. This should perhaps go into 34 * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -101,8 +86,12 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
101 buf->flags &= ~PIPE_BUF_FLAG_LRU; 86 buf->flags &= ~PIPE_BUF_FLAG_LRU;
102} 87}
103 88
104static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe, 89/*
105 struct pipe_buffer *buf) 90 * Check whether the contents of buf is OK to access. Since the content
91 * is a page cache page, IO may be in flight.
92 */
93static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
94 struct pipe_buffer *buf)
106{ 95{
107 struct page *page = buf->page; 96 struct page *page = buf->page;
108 int err; 97 int err;
@@ -143,7 +132,7 @@ static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
143 .can_merge = 0, 132 .can_merge = 0,
144 .map = generic_pipe_buf_map, 133 .map = generic_pipe_buf_map,
145 .unmap = generic_pipe_buf_unmap, 134 .unmap = generic_pipe_buf_unmap,
146 .pin = page_cache_pipe_buf_pin, 135 .confirm = page_cache_pipe_buf_confirm,
147 .release = page_cache_pipe_buf_release, 136 .release = page_cache_pipe_buf_release,
148 .steal = page_cache_pipe_buf_steal, 137 .steal = page_cache_pipe_buf_steal,
149 .get = generic_pipe_buf_get, 138 .get = generic_pipe_buf_get,
@@ -163,19 +152,27 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
163 .can_merge = 0, 152 .can_merge = 0,
164 .map = generic_pipe_buf_map, 153 .map = generic_pipe_buf_map,
165 .unmap = generic_pipe_buf_unmap, 154 .unmap = generic_pipe_buf_unmap,
166 .pin = generic_pipe_buf_pin, 155 .confirm = generic_pipe_buf_confirm,
167 .release = page_cache_pipe_buf_release, 156 .release = page_cache_pipe_buf_release,
168 .steal = user_page_pipe_buf_steal, 157 .steal = user_page_pipe_buf_steal,
169 .get = generic_pipe_buf_get, 158 .get = generic_pipe_buf_get,
170}; 159};
171 160
172/* 161/**
173 * Pipe output worker. This sets up our pipe format with the page cache 162 * splice_to_pipe - fill passed data into a pipe
174 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 163 * @pipe: pipe to fill
164 * @spd: data to fill
165 *
166 * Description:
167 * @spd contains a map of pages and len/offset tupples, a long with
168 * the struct pipe_buf_operations associated with these pages. This
169 * function will link that data to the pipe.
170 *
175 */ 171 */
176static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, 172ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
177 struct splice_pipe_desc *spd) 173 struct splice_pipe_desc *spd)
178{ 174{
175 unsigned int spd_pages = spd->nr_pages;
179 int ret, do_wakeup, page_nr; 176 int ret, do_wakeup, page_nr;
180 177
181 ret = 0; 178 ret = 0;
@@ -200,6 +197,7 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
200 buf->page = spd->pages[page_nr]; 197 buf->page = spd->pages[page_nr];
201 buf->offset = spd->partial[page_nr].offset; 198 buf->offset = spd->partial[page_nr].offset;
202 buf->len = spd->partial[page_nr].len; 199 buf->len = spd->partial[page_nr].len;
200 buf->private = spd->partial[page_nr].private;
203 buf->ops = spd->ops; 201 buf->ops = spd->ops;
204 if (spd->flags & SPLICE_F_GIFT) 202 if (spd->flags & SPLICE_F_GIFT)
205 buf->flags |= PIPE_BUF_FLAG_GIFT; 203 buf->flags |= PIPE_BUF_FLAG_GIFT;
@@ -244,17 +242,18 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
244 pipe->waiting_writers--; 242 pipe->waiting_writers--;
245 } 243 }
246 244
247 if (pipe->inode) 245 if (pipe->inode) {
248 mutex_unlock(&pipe->inode->i_mutex); 246 mutex_unlock(&pipe->inode->i_mutex);
249 247
250 if (do_wakeup) { 248 if (do_wakeup) {
251 smp_mb(); 249 smp_mb();
252 if (waitqueue_active(&pipe->wait)) 250 if (waitqueue_active(&pipe->wait))
253 wake_up_interruptible(&pipe->wait); 251 wake_up_interruptible(&pipe->wait);
254 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 252 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
253 }
255 } 254 }
256 255
257 while (page_nr < spd->nr_pages) 256 while (page_nr < spd_pages)
258 page_cache_release(spd->pages[page_nr++]); 257 page_cache_release(spd->pages[page_nr++]);
259 258
260 return ret; 259 return ret;
@@ -272,7 +271,6 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
272 struct page *page; 271 struct page *page;
273 pgoff_t index, end_index; 272 pgoff_t index, end_index;
274 loff_t isize; 273 loff_t isize;
275 size_t total_len;
276 int error, page_nr; 274 int error, page_nr;
277 struct splice_pipe_desc spd = { 275 struct splice_pipe_desc spd = {
278 .pages = pages, 276 .pages = pages,
@@ -295,20 +293,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
295 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages); 293 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
296 294
297 /* 295 /*
298 * Now fill in the holes:
299 */
300 error = 0;
301 total_len = 0;
302
303 /*
304 * Lookup the (hopefully) full range of pages we need. 296 * Lookup the (hopefully) full range of pages we need.
305 */ 297 */
306 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); 298 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
307 299
308 /* 300 /*
309 * If find_get_pages_contig() returned fewer pages than we needed, 301 * If find_get_pages_contig() returned fewer pages than we needed,
310 * allocate the rest. 302 * allocate the rest and fill in the holes.
311 */ 303 */
304 error = 0;
312 index += spd.nr_pages; 305 index += spd.nr_pages;
313 while (spd.nr_pages < nr_pages) { 306 while (spd.nr_pages < nr_pages) {
314 /* 307 /*
@@ -415,43 +408,47 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
415 408
416 break; 409 break;
417 } 410 }
411 }
412fill_it:
413 /*
414 * i_size must be checked after PageUptodate.
415 */
416 isize = i_size_read(mapping->host);
417 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
418 if (unlikely(!isize || index > end_index))
419 break;
420
421 /*
422 * if this is the last page, see if we need to shrink
423 * the length and stop
424 */
425 if (end_index == index) {
426 unsigned int plen;
418 427
419 /* 428 /*
420 * i_size must be checked after ->readpage(). 429 * max good bytes in this page
421 */ 430 */
422 isize = i_size_read(mapping->host); 431 plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
423 end_index = (isize - 1) >> PAGE_CACHE_SHIFT; 432 if (plen <= loff)
424 if (unlikely(!isize || index > end_index))
425 break; 433 break;
426 434
427 /* 435 /*
428 * if this is the last page, see if we need to shrink 436 * force quit after adding this page
429 * the length and stop
430 */ 437 */
431 if (end_index == index) { 438 this_len = min(this_len, plen - loff);
432 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); 439 len = this_len;
433 if (total_len + loff > isize)
434 break;
435 /*
436 * force quit after adding this page
437 */
438 len = this_len;
439 this_len = min(this_len, loff);
440 loff = 0;
441 }
442 } 440 }
443fill_it: 441
444 partial[page_nr].offset = loff; 442 partial[page_nr].offset = loff;
445 partial[page_nr].len = this_len; 443 partial[page_nr].len = this_len;
446 len -= this_len; 444 len -= this_len;
447 total_len += this_len;
448 loff = 0; 445 loff = 0;
449 spd.nr_pages++; 446 spd.nr_pages++;
450 index++; 447 index++;
451 } 448 }
452 449
453 /* 450 /*
454 * Release any pages at the end, if we quit early. 'i' is how far 451 * Release any pages at the end, if we quit early. 'page_nr' is how far
455 * we got, 'nr_pages' is how many pages are in the map. 452 * we got, 'nr_pages' is how many pages are in the map.
456 */ 453 */
457 while (page_nr < nr_pages) 454 while (page_nr < nr_pages)
@@ -466,11 +463,16 @@ fill_it:
466/** 463/**
467 * generic_file_splice_read - splice data from file to a pipe 464 * generic_file_splice_read - splice data from file to a pipe
468 * @in: file to splice from 465 * @in: file to splice from
466 * @ppos: position in @in
469 * @pipe: pipe to splice to 467 * @pipe: pipe to splice to
470 * @len: number of bytes to splice 468 * @len: number of bytes to splice
471 * @flags: splice modifier flags 469 * @flags: splice modifier flags
472 * 470 *
473 * Will read pages from given file and fill them into a pipe. 471 * Description:
472 * Will read pages from given file and fill them into a pipe. Can be
473 * used as long as the address_space operations for the source implements
474 * a readpage() hook.
475 *
474 */ 476 */
475ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, 477ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
476 struct pipe_inode_info *pipe, size_t len, 478 struct pipe_inode_info *pipe, size_t len,
@@ -478,11 +480,19 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
478{ 480{
479 ssize_t spliced; 481 ssize_t spliced;
480 int ret; 482 int ret;
483 loff_t isize, left;
484
485 isize = i_size_read(in->f_mapping->host);
486 if (unlikely(*ppos >= isize))
487 return 0;
488
489 left = isize - *ppos;
490 if (unlikely(left < len))
491 len = left;
481 492
482 ret = 0; 493 ret = 0;
483 spliced = 0; 494 spliced = 0;
484 495 while (len && !spliced) {
485 while (len) {
486 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 496 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
487 497
488 if (ret < 0) 498 if (ret < 0)
@@ -516,11 +526,11 @@ EXPORT_SYMBOL(generic_file_splice_read);
516static int pipe_to_sendpage(struct pipe_inode_info *pipe, 526static int pipe_to_sendpage(struct pipe_inode_info *pipe,
517 struct pipe_buffer *buf, struct splice_desc *sd) 527 struct pipe_buffer *buf, struct splice_desc *sd)
518{ 528{
519 struct file *file = sd->file; 529 struct file *file = sd->u.file;
520 loff_t pos = sd->pos; 530 loff_t pos = sd->pos;
521 int ret, more; 531 int ret, more;
522 532
523 ret = buf->ops->pin(pipe, buf); 533 ret = buf->ops->confirm(pipe, buf);
524 if (!ret) { 534 if (!ret) {
525 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 535 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
526 536
@@ -554,7 +564,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
554static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 564static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
555 struct splice_desc *sd) 565 struct splice_desc *sd)
556{ 566{
557 struct file *file = sd->file; 567 struct file *file = sd->u.file;
558 struct address_space *mapping = file->f_mapping; 568 struct address_space *mapping = file->f_mapping;
559 unsigned int offset, this_len; 569 unsigned int offset, this_len;
560 struct page *page; 570 struct page *page;
@@ -564,7 +574,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
564 /* 574 /*
565 * make sure the data in this buffer is uptodate 575 * make sure the data in this buffer is uptodate
566 */ 576 */
567 ret = buf->ops->pin(pipe, buf); 577 ret = buf->ops->confirm(pipe, buf);
568 if (unlikely(ret)) 578 if (unlikely(ret))
569 return ret; 579 return ret;
570 580
@@ -644,7 +654,6 @@ find_page:
644 * accessed, we are now done! 654 * accessed, we are now done!
645 */ 655 */
646 mark_page_accessed(page); 656 mark_page_accessed(page);
647 balance_dirty_pages_ratelimited(mapping);
648out: 657out:
649 page_cache_release(page); 658 page_cache_release(page);
650 unlock_page(page); 659 unlock_page(page);
@@ -652,36 +661,37 @@ out_ret:
652 return ret; 661 return ret;
653} 662}
654 663
655/* 664/**
656 * Pipe input worker. Most of this logic works like a regular pipe, the 665 * __splice_from_pipe - splice data from a pipe to given actor
657 * key here is the 'actor' worker passed in that actually moves the data 666 * @pipe: pipe to splice from
658 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 667 * @sd: information to @actor
668 * @actor: handler that splices the data
669 *
670 * Description:
671 * This function does little more than loop over the pipe and call
672 * @actor to do the actual moving of a single struct pipe_buffer to
673 * the desired destination. See pipe_to_file, pipe_to_sendpage, or
674 * pipe_to_user.
675 *
659 */ 676 */
660ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, 677ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
661 struct file *out, loff_t *ppos, size_t len, 678 splice_actor *actor)
662 unsigned int flags, splice_actor *actor)
663{ 679{
664 int ret, do_wakeup, err; 680 int ret, do_wakeup, err;
665 struct splice_desc sd;
666 681
667 ret = 0; 682 ret = 0;
668 do_wakeup = 0; 683 do_wakeup = 0;
669 684
670 sd.total_len = len;
671 sd.flags = flags;
672 sd.file = out;
673 sd.pos = *ppos;
674
675 for (;;) { 685 for (;;) {
676 if (pipe->nrbufs) { 686 if (pipe->nrbufs) {
677 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 687 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
678 const struct pipe_buf_operations *ops = buf->ops; 688 const struct pipe_buf_operations *ops = buf->ops;
679 689
680 sd.len = buf->len; 690 sd->len = buf->len;
681 if (sd.len > sd.total_len) 691 if (sd->len > sd->total_len)
682 sd.len = sd.total_len; 692 sd->len = sd->total_len;
683 693
684 err = actor(pipe, buf, &sd); 694 err = actor(pipe, buf, sd);
685 if (err <= 0) { 695 if (err <= 0) {
686 if (!ret && err != -ENODATA) 696 if (!ret && err != -ENODATA)
687 ret = err; 697 ret = err;
@@ -693,10 +703,10 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
693 buf->offset += err; 703 buf->offset += err;
694 buf->len -= err; 704 buf->len -= err;
695 705
696 sd.len -= err; 706 sd->len -= err;
697 sd.pos += err; 707 sd->pos += err;
698 sd.total_len -= err; 708 sd->total_len -= err;
699 if (sd.len) 709 if (sd->len)
700 continue; 710 continue;
701 711
702 if (!buf->len) { 712 if (!buf->len) {
@@ -708,7 +718,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
708 do_wakeup = 1; 718 do_wakeup = 1;
709 } 719 }
710 720
711 if (!sd.total_len) 721 if (!sd->total_len)
712 break; 722 break;
713 } 723 }
714 724
@@ -721,7 +731,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
721 break; 731 break;
722 } 732 }
723 733
724 if (flags & SPLICE_F_NONBLOCK) { 734 if (sd->flags & SPLICE_F_NONBLOCK) {
725 if (!ret) 735 if (!ret)
726 ret = -EAGAIN; 736 ret = -EAGAIN;
727 break; 737 break;
@@ -755,12 +765,32 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
755} 765}
756EXPORT_SYMBOL(__splice_from_pipe); 766EXPORT_SYMBOL(__splice_from_pipe);
757 767
768/**
769 * splice_from_pipe - splice data from a pipe to a file
770 * @pipe: pipe to splice from
771 * @out: file to splice to
772 * @ppos: position in @out
773 * @len: how many bytes to splice
774 * @flags: splice modifier flags
775 * @actor: handler that splices the data
776 *
777 * Description:
778 * See __splice_from_pipe. This function locks the input and output inodes,
779 * otherwise it's identical to __splice_from_pipe().
780 *
781 */
758ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, 782ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
759 loff_t *ppos, size_t len, unsigned int flags, 783 loff_t *ppos, size_t len, unsigned int flags,
760 splice_actor *actor) 784 splice_actor *actor)
761{ 785{
762 ssize_t ret; 786 ssize_t ret;
763 struct inode *inode = out->f_mapping->host; 787 struct inode *inode = out->f_mapping->host;
788 struct splice_desc sd = {
789 .total_len = len,
790 .flags = flags,
791 .pos = *ppos,
792 .u.file = out,
793 };
764 794
765 /* 795 /*
766 * The actor worker might be calling ->prepare_write and 796 * The actor worker might be calling ->prepare_write and
@@ -769,7 +799,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
769 * pipe->inode, we have to order lock acquiry here. 799 * pipe->inode, we have to order lock acquiry here.
770 */ 800 */
771 inode_double_lock(inode, pipe->inode); 801 inode_double_lock(inode, pipe->inode);
772 ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor); 802 ret = __splice_from_pipe(pipe, &sd, actor);
773 inode_double_unlock(inode, pipe->inode); 803 inode_double_unlock(inode, pipe->inode);
774 804
775 return ret; 805 return ret;
@@ -779,12 +809,14 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
779 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes 809 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
780 * @pipe: pipe info 810 * @pipe: pipe info
781 * @out: file to write to 811 * @out: file to write to
812 * @ppos: position in @out
782 * @len: number of bytes to splice 813 * @len: number of bytes to splice
783 * @flags: splice modifier flags 814 * @flags: splice modifier flags
784 * 815 *
785 * Will either move or copy pages (determined by @flags options) from 816 * Description:
786 * the given pipe inode to the given file. The caller is responsible 817 * Will either move or copy pages (determined by @flags options) from
787 * for acquiring i_mutex on both inodes. 818 * the given pipe inode to the given file. The caller is responsible
819 * for acquiring i_mutex on both inodes.
788 * 820 *
789 */ 821 */
790ssize_t 822ssize_t
@@ -793,6 +825,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
793{ 825{
794 struct address_space *mapping = out->f_mapping; 826 struct address_space *mapping = out->f_mapping;
795 struct inode *inode = mapping->host; 827 struct inode *inode = mapping->host;
828 struct splice_desc sd = {
829 .total_len = len,
830 .flags = flags,
831 .pos = *ppos,
832 .u.file = out,
833 };
796 ssize_t ret; 834 ssize_t ret;
797 int err; 835 int err;
798 836
@@ -800,9 +838,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
800 if (unlikely(err)) 838 if (unlikely(err))
801 return err; 839 return err;
802 840
803 ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 841 ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
804 if (ret > 0) { 842 if (ret > 0) {
843 unsigned long nr_pages;
844
805 *ppos += ret; 845 *ppos += ret;
846 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
806 847
807 /* 848 /*
808 * If file or inode is SYNC and we actually wrote some data, 849 * If file or inode is SYNC and we actually wrote some data,
@@ -815,6 +856,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
815 if (err) 856 if (err)
816 ret = err; 857 ret = err;
817 } 858 }
859 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
818 } 860 }
819 861
820 return ret; 862 return ret;
@@ -826,11 +868,13 @@ EXPORT_SYMBOL(generic_file_splice_write_nolock);
826 * generic_file_splice_write - splice data from a pipe to a file 868 * generic_file_splice_write - splice data from a pipe to a file
827 * @pipe: pipe info 869 * @pipe: pipe info
828 * @out: file to write to 870 * @out: file to write to
871 * @ppos: position in @out
829 * @len: number of bytes to splice 872 * @len: number of bytes to splice
830 * @flags: splice modifier flags 873 * @flags: splice modifier flags
831 * 874 *
832 * Will either move or copy pages (determined by @flags options) from 875 * Description:
833 * the given pipe inode to the given file. 876 * Will either move or copy pages (determined by @flags options) from
877 * the given pipe inode to the given file.
834 * 878 *
835 */ 879 */
836ssize_t 880ssize_t
@@ -853,7 +897,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
853 897
854 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 898 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
855 if (ret > 0) { 899 if (ret > 0) {
900 unsigned long nr_pages;
901
856 *ppos += ret; 902 *ppos += ret;
903 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
857 904
858 /* 905 /*
859 * If file or inode is SYNC and we actually wrote some data, 906 * If file or inode is SYNC and we actually wrote some data,
@@ -868,6 +915,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
868 if (err) 915 if (err)
869 ret = err; 916 ret = err;
870 } 917 }
918 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
871 } 919 }
872 920
873 return ret; 921 return ret;
@@ -877,13 +925,15 @@ EXPORT_SYMBOL(generic_file_splice_write);
877 925
878/** 926/**
879 * generic_splice_sendpage - splice data from a pipe to a socket 927 * generic_splice_sendpage - splice data from a pipe to a socket
880 * @inode: pipe inode 928 * @pipe: pipe to splice from
881 * @out: socket to write to 929 * @out: socket to write to
930 * @ppos: position in @out
882 * @len: number of bytes to splice 931 * @len: number of bytes to splice
883 * @flags: splice modifier flags 932 * @flags: splice modifier flags
884 * 933 *
885 * Will send @len bytes from the pipe to a network socket. No data copying 934 * Description:
886 * is involved. 935 * Will send @len bytes from the pipe to a network socket. No data copying
936 * is involved.
887 * 937 *
888 */ 938 */
889ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 939ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
@@ -912,6 +962,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
912 if (unlikely(ret < 0)) 962 if (unlikely(ret < 0))
913 return ret; 963 return ret;
914 964
965 ret = security_file_permission(out, MAY_WRITE);
966 if (unlikely(ret < 0))
967 return ret;
968
915 return out->f_op->splice_write(pipe, out, ppos, len, flags); 969 return out->f_op->splice_write(pipe, out, ppos, len, flags);
916} 970}
917 971
@@ -922,7 +976,6 @@ static long do_splice_to(struct file *in, loff_t *ppos,
922 struct pipe_inode_info *pipe, size_t len, 976 struct pipe_inode_info *pipe, size_t len,
923 unsigned int flags) 977 unsigned int flags)
924{ 978{
925 loff_t isize, left;
926 int ret; 979 int ret;
927 980
928 if (unlikely(!in->f_op || !in->f_op->splice_read)) 981 if (unlikely(!in->f_op || !in->f_op->splice_read))
@@ -935,25 +988,34 @@ static long do_splice_to(struct file *in, loff_t *ppos,
935 if (unlikely(ret < 0)) 988 if (unlikely(ret < 0))
936 return ret; 989 return ret;
937 990
938 isize = i_size_read(in->f_mapping->host); 991 ret = security_file_permission(in, MAY_READ);
939 if (unlikely(*ppos >= isize)) 992 if (unlikely(ret < 0))
940 return 0; 993 return ret;
941
942 left = isize - *ppos;
943 if (unlikely(left < len))
944 len = left;
945 994
946 return in->f_op->splice_read(in, ppos, pipe, len, flags); 995 return in->f_op->splice_read(in, ppos, pipe, len, flags);
947} 996}
948 997
949long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 998/**
950 size_t len, unsigned int flags) 999 * splice_direct_to_actor - splices data directly between two non-pipes
1000 * @in: file to splice from
1001 * @sd: actor information on where to splice to
1002 * @actor: handles the data splicing
1003 *
1004 * Description:
1005 * This is a special case helper to splice directly between two
1006 * points, without requiring an explicit pipe. Internally an allocated
1007 * pipe is cached in the process, and reused during the life time of
1008 * that process.
1009 *
1010 */
1011ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1012 splice_direct_actor *actor)
951{ 1013{
952 struct pipe_inode_info *pipe; 1014 struct pipe_inode_info *pipe;
953 long ret, bytes; 1015 long ret, bytes;
954 loff_t out_off;
955 umode_t i_mode; 1016 umode_t i_mode;
956 int i; 1017 size_t len;
1018 int i, flags;
957 1019
958 /* 1020 /*
959 * We require the input being a regular file, as we don't want to 1021 * We require the input being a regular file, as we don't want to
@@ -989,49 +1051,41 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
989 */ 1051 */
990 ret = 0; 1052 ret = 0;
991 bytes = 0; 1053 bytes = 0;
992 out_off = 0; 1054 len = sd->total_len;
1055 flags = sd->flags;
993 1056
994 while (len) { 1057 /*
995 size_t read_len, max_read_len; 1058 * Don't block on output, we have to drain the direct pipe.
1059 */
1060 sd->flags &= ~SPLICE_F_NONBLOCK;
996 1061
997 /* 1062 while (len) {
998 * Do at most PIPE_BUFFERS pages worth of transfer: 1063 size_t read_len;
999 */
1000 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
1001 1064
1002 ret = do_splice_to(in, ppos, pipe, max_read_len, flags); 1065 ret = do_splice_to(in, &sd->pos, pipe, len, flags);
1003 if (unlikely(ret < 0)) 1066 if (unlikely(ret <= 0))
1004 goto out_release; 1067 goto out_release;
1005 1068
1006 read_len = ret; 1069 read_len = ret;
1070 sd->total_len = read_len;
1007 1071
1008 /* 1072 /*
1009 * NOTE: nonblocking mode only applies to the input. We 1073 * NOTE: nonblocking mode only applies to the input. We
1010 * must not do the output in nonblocking mode as then we 1074 * must not do the output in nonblocking mode as then we
1011 * could get stuck data in the internal pipe: 1075 * could get stuck data in the internal pipe:
1012 */ 1076 */
1013 ret = do_splice_from(pipe, out, &out_off, read_len, 1077 ret = actor(pipe, sd);
1014 flags & ~SPLICE_F_NONBLOCK); 1078 if (unlikely(ret <= 0))
1015 if (unlikely(ret < 0))
1016 goto out_release; 1079 goto out_release;
1017 1080
1018 bytes += ret; 1081 bytes += ret;
1019 len -= ret; 1082 len -= ret;
1020 1083
1021 /* 1084 if (ret < read_len)
1022 * In nonblocking mode, if we got back a short read then 1085 goto out_release;
1023 * that was due to either an IO error or due to the
1024 * pagecache entry not being there. In the IO error case
1025 * the _next_ splice attempt will produce a clean IO error
1026 * return value (not a short read), so in both cases it's
1027 * correct to break out of the loop here:
1028 */
1029 if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
1030 break;
1031 } 1086 }
1032 1087
1033 pipe->nrbufs = pipe->curbuf = 0; 1088 pipe->nrbufs = pipe->curbuf = 0;
1034
1035 return bytes; 1089 return bytes;
1036 1090
1037out_release: 1091out_release:
@@ -1056,9 +1110,51 @@ out_release:
1056 return bytes; 1110 return bytes;
1057 1111
1058 return ret; 1112 return ret;
1113
1059} 1114}
1115EXPORT_SYMBOL(splice_direct_to_actor);
1060 1116
1061EXPORT_SYMBOL(do_splice_direct); 1117static int direct_splice_actor(struct pipe_inode_info *pipe,
1118 struct splice_desc *sd)
1119{
1120 struct file *file = sd->u.file;
1121
1122 return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
1123}
1124
1125/**
1126 * do_splice_direct - splices data directly between two files
1127 * @in: file to splice from
1128 * @ppos: input file offset
1129 * @out: file to splice to
1130 * @len: number of bytes to splice
1131 * @flags: splice modifier flags
1132 *
1133 * Description:
1134 * For use by do_sendfile(). splice can easily emulate sendfile, but
1135 * doing it in the application would incur an extra system call
1136 * (splice in + splice out, as compared to just sendfile()). So this helper
1137 * can splice directly through a process-private pipe.
1138 *
1139 */
1140long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1141 size_t len, unsigned int flags)
1142{
1143 struct splice_desc sd = {
1144 .len = len,
1145 .total_len = len,
1146 .flags = flags,
1147 .pos = *ppos,
1148 .u.file = out,
1149 };
1150 long ret;
1151
1152 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1153 if (ret > 0)
1154 *ppos += ret;
1155
1156 return ret;
1157}
1062 1158
1063/* 1159/*
1064 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same 1160 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
@@ -1240,28 +1336,131 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1240 return error; 1336 return error;
1241} 1337}
1242 1338
1339static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1340 struct splice_desc *sd)
1341{
1342 char *src;
1343 int ret;
1344
1345 ret = buf->ops->confirm(pipe, buf);
1346 if (unlikely(ret))
1347 return ret;
1348
1349 /*
1350 * See if we can use the atomic maps, by prefaulting in the
1351 * pages and doing an atomic copy
1352 */
1353 if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
1354 src = buf->ops->map(pipe, buf, 1);
1355 ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
1356 sd->len);
1357 buf->ops->unmap(pipe, buf, src);
1358 if (!ret) {
1359 ret = sd->len;
1360 goto out;
1361 }
1362 }
1363
1364 /*
1365 * No dice, use slow non-atomic map and copy
1366 */
1367 src = buf->ops->map(pipe, buf, 0);
1368
1369 ret = sd->len;
1370 if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
1371 ret = -EFAULT;
1372
1373out:
1374 if (ret > 0)
1375 sd->u.userptr += ret;
1376 buf->ops->unmap(pipe, buf, src);
1377 return ret;
1378}
1379
1380/*
1381 * For lack of a better implementation, implement vmsplice() to userspace
1382 * as a simple copy of the pipes pages to the user iov.
1383 */
1384static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
1385 unsigned long nr_segs, unsigned int flags)
1386{
1387 struct pipe_inode_info *pipe;
1388 struct splice_desc sd;
1389 ssize_t size;
1390 int error;
1391 long ret;
1392
1393 pipe = pipe_info(file->f_path.dentry->d_inode);
1394 if (!pipe)
1395 return -EBADF;
1396
1397 if (pipe->inode)
1398 mutex_lock(&pipe->inode->i_mutex);
1399
1400 error = ret = 0;
1401 while (nr_segs) {
1402 void __user *base;
1403 size_t len;
1404
1405 /*
1406 * Get user address base and length for this iovec.
1407 */
1408 error = get_user(base, &iov->iov_base);
1409 if (unlikely(error))
1410 break;
1411 error = get_user(len, &iov->iov_len);
1412 if (unlikely(error))
1413 break;
1414
1415 /*
1416 * Sanity check this iovec. 0 read succeeds.
1417 */
1418 if (unlikely(!len))
1419 break;
1420 if (unlikely(!base)) {
1421 error = -EFAULT;
1422 break;
1423 }
1424
1425 sd.len = 0;
1426 sd.total_len = len;
1427 sd.flags = flags;
1428 sd.u.userptr = base;
1429 sd.pos = 0;
1430
1431 size = __splice_from_pipe(pipe, &sd, pipe_to_user);
1432 if (size < 0) {
1433 if (!ret)
1434 ret = size;
1435
1436 break;
1437 }
1438
1439 ret += size;
1440
1441 if (size < len)
1442 break;
1443
1444 nr_segs--;
1445 iov++;
1446 }
1447
1448 if (pipe->inode)
1449 mutex_unlock(&pipe->inode->i_mutex);
1450
1451 if (!ret)
1452 ret = error;
1453
1454 return ret;
1455}
1456
1243/* 1457/*
1244 * vmsplice splices a user address range into a pipe. It can be thought of 1458 * vmsplice splices a user address range into a pipe. It can be thought of
1245 * as splice-from-memory, where the regular splice is splice-from-file (or 1459 * as splice-from-memory, where the regular splice is splice-from-file (or
1246 * to file). In both cases the output is a pipe, naturally. 1460 * to file). In both cases the output is a pipe, naturally.
1247 *
1248 * Note that vmsplice only supports splicing _from_ user memory to a pipe,
1249 * not the other way around. Splicing from user memory is a simple operation
1250 * that can be supported without any funky alignment restrictions or nasty
1251 * vm tricks. We simply map in the user memory and fill them into a pipe.
1252 * The reverse isn't quite as easy, though. There are two possible solutions
1253 * for that:
1254 *
1255 * - memcpy() the data internally, at which point we might as well just
1256 * do a regular read() on the buffer anyway.
1257 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1258 * has restriction limitations on both ends of the pipe).
1259 *
1260 * Alas, it isn't here.
1261 *
1262 */ 1461 */
1263static long do_vmsplice(struct file *file, const struct iovec __user *iov, 1462static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1264 unsigned long nr_segs, unsigned int flags) 1463 unsigned long nr_segs, unsigned int flags)
1265{ 1464{
1266 struct pipe_inode_info *pipe; 1465 struct pipe_inode_info *pipe;
1267 struct page *pages[PIPE_BUFFERS]; 1466 struct page *pages[PIPE_BUFFERS];
@@ -1276,10 +1475,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1276 pipe = pipe_info(file->f_path.dentry->d_inode); 1475 pipe = pipe_info(file->f_path.dentry->d_inode);
1277 if (!pipe) 1476 if (!pipe)
1278 return -EBADF; 1477 return -EBADF;
1279 if (unlikely(nr_segs > UIO_MAXIOV))
1280 return -EINVAL;
1281 else if (unlikely(!nr_segs))
1282 return 0;
1283 1478
1284 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, 1479 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
1285 flags & SPLICE_F_GIFT); 1480 flags & SPLICE_F_GIFT);
@@ -1289,6 +1484,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1289 return splice_to_pipe(pipe, &spd); 1484 return splice_to_pipe(pipe, &spd);
1290} 1485}
1291 1486
1487/*
1488 * Note that vmsplice only really supports true splicing _from_ user memory
1489 * to a pipe, not the other way around. Splicing from user memory is a simple
1490 * operation that can be supported without any funky alignment restrictions
1491 * or nasty vm tricks. We simply map in the user memory and fill them into
1492 * a pipe. The reverse isn't quite as easy, though. There are two possible
1493 * solutions for that:
1494 *
1495 * - memcpy() the data internally, at which point we might as well just
1496 * do a regular read() on the buffer anyway.
1497 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1498 * has restriction limitations on both ends of the pipe).
1499 *
1500 * Currently we punt and implement it as a normal copy, see pipe_to_user().
1501 *
1502 */
1292asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, 1503asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1293 unsigned long nr_segs, unsigned int flags) 1504 unsigned long nr_segs, unsigned int flags)
1294{ 1505{
@@ -1296,11 +1507,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1296 long error; 1507 long error;
1297 int fput; 1508 int fput;
1298 1509
1510 if (unlikely(nr_segs > UIO_MAXIOV))
1511 return -EINVAL;
1512 else if (unlikely(!nr_segs))
1513 return 0;
1514
1299 error = -EBADF; 1515 error = -EBADF;
1300 file = fget_light(fd, &fput); 1516 file = fget_light(fd, &fput);
1301 if (file) { 1517 if (file) {
1302 if (file->f_mode & FMODE_WRITE) 1518 if (file->f_mode & FMODE_WRITE)
1303 error = do_vmsplice(file, iov, nr_segs, flags); 1519 error = vmsplice_to_pipe(file, iov, nr_segs, flags);
1520 else if (file->f_mode & FMODE_READ)
1521 error = vmsplice_to_user(file, iov, nr_segs, flags);
1304 1522
1305 fput_light(file, fput); 1523 fput_light(file, fput);
1306 } 1524 }
diff --git a/fs/sync.c b/fs/sync.c
index 2f97576355b8..7cd005ea7639 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -236,6 +236,14 @@ out:
236 return ret; 236 return ret;
237} 237}
238 238
239/* It would be nice if people remember that not all the world's an i386
240 when they introduce new system calls */
241asmlinkage long sys_sync_file_range2(int fd, unsigned int flags,
242 loff_t offset, loff_t nbytes)
243{
244 return sys_sync_file_range(fd, offset, nbytes, flags);
245}
246
239/* 247/*
240 * `endbyte' is inclusive 248 * `endbyte' is inclusive
241 */ 249 */
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index d3b9f5f07db1..135353f8a296 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -20,29 +20,41 @@
20 20
21#include "sysfs.h" 21#include "sysfs.h"
22 22
23struct bin_buffer {
24 struct mutex mutex;
25 void *buffer;
26 int mmapped;
27};
28
23static int 29static int
24fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) 30fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
25{ 31{
26 struct bin_attribute * attr = to_bin_attr(dentry); 32 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
27 struct kobject * kobj = to_kobj(dentry->d_parent); 33 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
34 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
35 int rc;
36
37 /* need attr_sd for attr, its parent for kobj */
38 if (!sysfs_get_active_two(attr_sd))
39 return -ENODEV;
28 40
29 if (!attr->read) 41 rc = -EIO;
30 return -EIO; 42 if (attr->read)
43 rc = attr->read(kobj, attr, buffer, off, count);
31 44
32 return attr->read(kobj, buffer, off, count); 45 sysfs_put_active_two(attr_sd);
46
47 return rc;
33} 48}
34 49
35static ssize_t 50static ssize_t
36read(struct file * file, char __user * userbuf, size_t count, loff_t * off) 51read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
37{ 52{
38 char *buffer = file->private_data; 53 struct bin_buffer *bb = file->private_data;
39 struct dentry *dentry = file->f_path.dentry; 54 struct dentry *dentry = file->f_path.dentry;
40 int size = dentry->d_inode->i_size; 55 int size = dentry->d_inode->i_size;
41 loff_t offs = *off; 56 loff_t offs = *off;
42 int ret; 57 int count = min_t(size_t, bytes, PAGE_SIZE);
43
44 if (count > PAGE_SIZE)
45 count = PAGE_SIZE;
46 58
47 if (size) { 59 if (size) {
48 if (offs > size) 60 if (offs > size)
@@ -51,43 +63,56 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
51 count = size - offs; 63 count = size - offs;
52 } 64 }
53 65
54 ret = fill_read(dentry, buffer, offs, count); 66 mutex_lock(&bb->mutex);
55 if (ret < 0) 67
56 return ret; 68 count = fill_read(dentry, bb->buffer, offs, count);
57 count = ret; 69 if (count < 0)
70 goto out_unlock;
58 71
59 if (copy_to_user(userbuf, buffer, count)) 72 if (copy_to_user(userbuf, bb->buffer, count)) {
60 return -EFAULT; 73 count = -EFAULT;
74 goto out_unlock;
75 }
61 76
62 pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); 77 pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
63 78
64 *off = offs + count; 79 *off = offs + count;
65 80
81 out_unlock:
82 mutex_unlock(&bb->mutex);
66 return count; 83 return count;
67} 84}
68 85
69static int 86static int
70flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) 87flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
71{ 88{
72 struct bin_attribute *attr = to_bin_attr(dentry); 89 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
73 struct kobject *kobj = to_kobj(dentry->d_parent); 90 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
91 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
92 int rc;
93
94 /* need attr_sd for attr, its parent for kobj */
95 if (!sysfs_get_active_two(attr_sd))
96 return -ENODEV;
97
98 rc = -EIO;
99 if (attr->write)
100 rc = attr->write(kobj, attr, buffer, offset, count);
74 101
75 if (!attr->write) 102 sysfs_put_active_two(attr_sd);
76 return -EIO;
77 103
78 return attr->write(kobj, buffer, offset, count); 104 return rc;
79} 105}
80 106
81static ssize_t write(struct file * file, const char __user * userbuf, 107static ssize_t write(struct file *file, const char __user *userbuf,
82 size_t count, loff_t * off) 108 size_t bytes, loff_t *off)
83{ 109{
84 char *buffer = file->private_data; 110 struct bin_buffer *bb = file->private_data;
85 struct dentry *dentry = file->f_path.dentry; 111 struct dentry *dentry = file->f_path.dentry;
86 int size = dentry->d_inode->i_size; 112 int size = dentry->d_inode->i_size;
87 loff_t offs = *off; 113 loff_t offs = *off;
114 int count = min_t(size_t, bytes, PAGE_SIZE);
88 115
89 if (count > PAGE_SIZE)
90 count = PAGE_SIZE;
91 if (size) { 116 if (size) {
92 if (offs > size) 117 if (offs > size)
93 return 0; 118 return 0;
@@ -95,72 +120,100 @@ static ssize_t write(struct file * file, const char __user * userbuf,
95 count = size - offs; 120 count = size - offs;
96 } 121 }
97 122
98 if (copy_from_user(buffer, userbuf, count)) 123 mutex_lock(&bb->mutex);
99 return -EFAULT;
100 124
101 count = flush_write(dentry, buffer, offs, count); 125 if (copy_from_user(bb->buffer, userbuf, count)) {
126 count = -EFAULT;
127 goto out_unlock;
128 }
129
130 count = flush_write(dentry, bb->buffer, offs, count);
102 if (count > 0) 131 if (count > 0)
103 *off = offs + count; 132 *off = offs + count;
133
134 out_unlock:
135 mutex_unlock(&bb->mutex);
104 return count; 136 return count;
105} 137}
106 138
107static int mmap(struct file *file, struct vm_area_struct *vma) 139static int mmap(struct file *file, struct vm_area_struct *vma)
108{ 140{
109 struct dentry *dentry = file->f_path.dentry; 141 struct bin_buffer *bb = file->private_data;
110 struct bin_attribute *attr = to_bin_attr(dentry); 142 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
111 struct kobject *kobj = to_kobj(dentry->d_parent); 143 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
144 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
145 int rc;
146
147 mutex_lock(&bb->mutex);
148
149 /* need attr_sd for attr, its parent for kobj */
150 if (!sysfs_get_active_two(attr_sd))
151 return -ENODEV;
112 152
113 if (!attr->mmap) 153 rc = -EINVAL;
114 return -EINVAL; 154 if (attr->mmap)
155 rc = attr->mmap(kobj, attr, vma);
115 156
116 return attr->mmap(kobj, attr, vma); 157 if (rc == 0 && !bb->mmapped)
158 bb->mmapped = 1;
159 else
160 sysfs_put_active_two(attr_sd);
161
162 mutex_unlock(&bb->mutex);
163
164 return rc;
117} 165}
118 166
119static int open(struct inode * inode, struct file * file) 167static int open(struct inode * inode, struct file * file)
120{ 168{
121 struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); 169 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
122 struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); 170 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
123 int error = -EINVAL; 171 struct bin_buffer *bb = NULL;
124 172 int error;
125 if (!kobj || !attr)
126 goto Done;
127 173
128 /* Grab the module reference for this attribute if we have one */ 174 /* need attr_sd for attr */
129 error = -ENODEV; 175 if (!sysfs_get_active(attr_sd))
130 if (!try_module_get(attr->attr.owner)) 176 return -ENODEV;
131 goto Done;
132 177
133 error = -EACCES; 178 error = -EACCES;
134 if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) 179 if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
135 goto Error; 180 goto err_out;
136 if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) 181 if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
137 goto Error; 182 goto err_out;
138 183
139 error = -ENOMEM; 184 error = -ENOMEM;
140 file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); 185 bb = kzalloc(sizeof(*bb), GFP_KERNEL);
141 if (!file->private_data) 186 if (!bb)
142 goto Error; 187 goto err_out;
143 188
144 error = 0; 189 bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
145 goto Done; 190 if (!bb->buffer)
146 191 goto err_out;
147 Error: 192
148 module_put(attr->attr.owner); 193 mutex_init(&bb->mutex);
149 Done: 194 file->private_data = bb;
150 if (error) 195
151 kobject_put(kobj); 196 /* open succeeded, put active reference and pin attr_sd */
197 sysfs_put_active(attr_sd);
198 sysfs_get(attr_sd);
199 return 0;
200
201 err_out:
202 sysfs_put_active(attr_sd);
203 kfree(bb);
152 return error; 204 return error;
153} 205}
154 206
155static int release(struct inode * inode, struct file * file) 207static int release(struct inode * inode, struct file * file)
156{ 208{
157 struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent); 209 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
158 struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); 210 struct bin_buffer *bb = file->private_data;
159 u8 * buffer = file->private_data; 211
160 212 if (bb->mmapped)
161 kobject_put(kobj); 213 sysfs_put_active_two(attr_sd);
162 module_put(attr->attr.owner); 214 sysfs_put(attr_sd);
163 kfree(buffer); 215 kfree(bb->buffer);
216 kfree(bb);
164 return 0; 217 return 0;
165} 218}
166 219
@@ -181,9 +234,9 @@ const struct file_operations bin_fops = {
181 234
182int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) 235int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
183{ 236{
184 BUG_ON(!kobj || !kobj->dentry || !attr); 237 BUG_ON(!kobj || !kobj->sd || !attr);
185 238
186 return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR); 239 return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
187} 240}
188 241
189 242
@@ -195,7 +248,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
195 248
196void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) 249void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
197{ 250{
198 if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) { 251 if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) {
199 printk(KERN_ERR "%s: " 252 printk(KERN_ERR "%s: "
200 "bad dentry or inode or no such file: \"%s\"\n", 253 "bad dentry or inode or no such file: \"%s\"\n",
201 __FUNCTION__, attr->attr.name); 254 __FUNCTION__, attr->attr.name);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 85a668680f82..aee966c44aac 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -9,18 +9,346 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/kobject.h> 10#include <linux/kobject.h>
11#include <linux/namei.h> 11#include <linux/namei.h>
12#include <linux/idr.h>
13#include <linux/completion.h>
12#include <asm/semaphore.h> 14#include <asm/semaphore.h>
13#include "sysfs.h" 15#include "sysfs.h"
14 16
15DECLARE_RWSEM(sysfs_rename_sem); 17DEFINE_MUTEX(sysfs_mutex);
18spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
19
20static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
21static DEFINE_IDA(sysfs_ino_ida);
22
23/**
24 * sysfs_link_sibling - link sysfs_dirent into sibling list
25 * @sd: sysfs_dirent of interest
26 *
27 * Link @sd into its sibling list which starts from
28 * sd->s_parent->s_children.
29 *
30 * Locking:
31 * mutex_lock(sysfs_mutex)
32 */
33void sysfs_link_sibling(struct sysfs_dirent *sd)
34{
35 struct sysfs_dirent *parent_sd = sd->s_parent;
36
37 BUG_ON(sd->s_sibling);
38 sd->s_sibling = parent_sd->s_children;
39 parent_sd->s_children = sd;
40}
41
42/**
43 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
44 * @sd: sysfs_dirent of interest
45 *
46 * Unlink @sd from its sibling list which starts from
47 * sd->s_parent->s_children.
48 *
49 * Locking:
50 * mutex_lock(sysfs_mutex)
51 */
52void sysfs_unlink_sibling(struct sysfs_dirent *sd)
53{
54 struct sysfs_dirent **pos;
55
56 for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
57 if (*pos == sd) {
58 *pos = sd->s_sibling;
59 sd->s_sibling = NULL;
60 break;
61 }
62 }
63}
64
65/**
66 * sysfs_get_dentry - get dentry for the given sysfs_dirent
67 * @sd: sysfs_dirent of interest
68 *
69 * Get dentry for @sd. Dentry is looked up if currently not
70 * present. This function climbs sysfs_dirent tree till it
71 * reaches a sysfs_dirent with valid dentry attached and descends
72 * down from there looking up dentry for each step.
73 *
74 * LOCKING:
75 * Kernel thread context (may sleep)
76 *
77 * RETURNS:
78 * Pointer to found dentry on success, ERR_PTR() value on error.
79 */
80struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
81{
82 struct sysfs_dirent *cur;
83 struct dentry *parent_dentry, *dentry;
84 int i, depth;
85
86 /* Find the first parent which has valid s_dentry and get the
87 * dentry.
88 */
89 mutex_lock(&sysfs_mutex);
90 restart0:
91 spin_lock(&sysfs_assoc_lock);
92 restart1:
93 spin_lock(&dcache_lock);
94
95 dentry = NULL;
96 depth = 0;
97 cur = sd;
98 while (!cur->s_dentry || !cur->s_dentry->d_inode) {
99 if (cur->s_flags & SYSFS_FLAG_REMOVED) {
100 dentry = ERR_PTR(-ENOENT);
101 depth = 0;
102 break;
103 }
104 cur = cur->s_parent;
105 depth++;
106 }
107 if (!IS_ERR(dentry))
108 dentry = dget_locked(cur->s_dentry);
109
110 spin_unlock(&dcache_lock);
111 spin_unlock(&sysfs_assoc_lock);
112
113 /* from the found dentry, look up depth times */
114 while (depth--) {
115 /* find and get depth'th ancestor */
116 for (cur = sd, i = 0; cur && i < depth; i++)
117 cur = cur->s_parent;
118
119 /* This can happen if tree structure was modified due
120 * to move/rename. Restart.
121 */
122 if (i != depth) {
123 dput(dentry);
124 goto restart0;
125 }
126
127 sysfs_get(cur);
128
129 mutex_unlock(&sysfs_mutex);
130
131 /* look it up */
132 parent_dentry = dentry;
133 dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
134 strlen(cur->s_name));
135 dput(parent_dentry);
136
137 if (IS_ERR(dentry)) {
138 sysfs_put(cur);
139 return dentry;
140 }
141
142 mutex_lock(&sysfs_mutex);
143 spin_lock(&sysfs_assoc_lock);
144
145 /* This, again, can happen if tree structure has
146 * changed and we looked up the wrong thing. Restart.
147 */
148 if (cur->s_dentry != dentry) {
149 dput(dentry);
150 sysfs_put(cur);
151 goto restart1;
152 }
153
154 spin_unlock(&sysfs_assoc_lock);
155
156 sysfs_put(cur);
157 }
158
159 mutex_unlock(&sysfs_mutex);
160 return dentry;
161}
162
163/**
164 * sysfs_get_active - get an active reference to sysfs_dirent
165 * @sd: sysfs_dirent to get an active reference to
166 *
167 * Get an active reference of @sd. This function is noop if @sd
168 * is NULL.
169 *
170 * RETURNS:
171 * Pointer to @sd on success, NULL on failure.
172 */
173struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
174{
175 if (unlikely(!sd))
176 return NULL;
177
178 while (1) {
179 int v, t;
180
181 v = atomic_read(&sd->s_active);
182 if (unlikely(v < 0))
183 return NULL;
184
185 t = atomic_cmpxchg(&sd->s_active, v, v + 1);
186 if (likely(t == v))
187 return sd;
188 if (t < 0)
189 return NULL;
190
191 cpu_relax();
192 }
193}
194
195/**
196 * sysfs_put_active - put an active reference to sysfs_dirent
197 * @sd: sysfs_dirent to put an active reference to
198 *
199 * Put an active reference to @sd. This function is noop if @sd
200 * is NULL.
201 */
202void sysfs_put_active(struct sysfs_dirent *sd)
203{
204 struct completion *cmpl;
205 int v;
206
207 if (unlikely(!sd))
208 return;
209
210 v = atomic_dec_return(&sd->s_active);
211 if (likely(v != SD_DEACTIVATED_BIAS))
212 return;
213
214 /* atomic_dec_return() is a mb(), we'll always see the updated
215 * sd->s_sibling.
216 */
217 cmpl = (void *)sd->s_sibling;
218 complete(cmpl);
219}
220
221/**
222 * sysfs_get_active_two - get active references to sysfs_dirent and parent
223 * @sd: sysfs_dirent of interest
224 *
225 * Get active reference to @sd and its parent. Parent's active
226 * reference is grabbed first. This function is noop if @sd is
227 * NULL.
228 *
229 * RETURNS:
230 * Pointer to @sd on success, NULL on failure.
231 */
232struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
233{
234 if (sd) {
235 if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
236 return NULL;
237 if (unlikely(!sysfs_get_active(sd))) {
238 sysfs_put_active(sd->s_parent);
239 return NULL;
240 }
241 }
242 return sd;
243}
244
245/**
246 * sysfs_put_active_two - put active references to sysfs_dirent and parent
247 * @sd: sysfs_dirent of interest
248 *
249 * Put active references to @sd and its parent. This function is
250 * noop if @sd is NULL.
251 */
252void sysfs_put_active_two(struct sysfs_dirent *sd)
253{
254 if (sd) {
255 sysfs_put_active(sd);
256 sysfs_put_active(sd->s_parent);
257 }
258}
259
260/**
261 * sysfs_deactivate - deactivate sysfs_dirent
262 * @sd: sysfs_dirent to deactivate
263 *
264 * Deny new active references and drain existing ones.
265 */
266static void sysfs_deactivate(struct sysfs_dirent *sd)
267{
268 DECLARE_COMPLETION_ONSTACK(wait);
269 int v;
270
271 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
272 sd->s_sibling = (void *)&wait;
273
274 /* atomic_add_return() is a mb(), put_active() will always see
275 * the updated sd->s_sibling.
276 */
277 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
278
279 if (v != SD_DEACTIVATED_BIAS)
280 wait_for_completion(&wait);
281
282 sd->s_sibling = NULL;
283}
284
285static int sysfs_alloc_ino(ino_t *pino)
286{
287 int ino, rc;
288
289 retry:
290 spin_lock(&sysfs_ino_lock);
291 rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
292 spin_unlock(&sysfs_ino_lock);
293
294 if (rc == -EAGAIN) {
295 if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
296 goto retry;
297 rc = -ENOMEM;
298 }
299
300 *pino = ino;
301 return rc;
302}
303
304static void sysfs_free_ino(ino_t ino)
305{
306 spin_lock(&sysfs_ino_lock);
307 ida_remove(&sysfs_ino_ida, ino);
308 spin_unlock(&sysfs_ino_lock);
309}
310
311void release_sysfs_dirent(struct sysfs_dirent * sd)
312{
313 struct sysfs_dirent *parent_sd;
314
315 repeat:
316 /* Moving/renaming is always done while holding reference.
317 * sd->s_parent won't change beneath us.
318 */
319 parent_sd = sd->s_parent;
320
321 if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
322 sysfs_put(sd->s_elem.symlink.target_sd);
323 if (sysfs_type(sd) & SYSFS_COPY_NAME)
324 kfree(sd->s_name);
325 kfree(sd->s_iattr);
326 sysfs_free_ino(sd->s_ino);
327 kmem_cache_free(sysfs_dir_cachep, sd);
328
329 sd = parent_sd;
330 if (sd && atomic_dec_and_test(&sd->s_count))
331 goto repeat;
332}
16 333
17static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) 334static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
18{ 335{
19 struct sysfs_dirent * sd = dentry->d_fsdata; 336 struct sysfs_dirent * sd = dentry->d_fsdata;
20 337
21 if (sd) { 338 if (sd) {
22 BUG_ON(sd->s_dentry != dentry); 339 /* sd->s_dentry is protected with sysfs_assoc_lock.
23 sd->s_dentry = NULL; 340 * This allows sysfs_drop_dentry() to dereference it.
341 */
342 spin_lock(&sysfs_assoc_lock);
343
344 /* The dentry might have been deleted or another
345 * lookup could have happened updating sd->s_dentry to
346 * point the new dentry. Ignore if it isn't pointing
347 * to this dentry.
348 */
349 if (sd->s_dentry == dentry)
350 sd->s_dentry = NULL;
351 spin_unlock(&sysfs_assoc_lock);
24 sysfs_put(sd); 352 sysfs_put(sd);
25 } 353 }
26 iput(inode); 354 iput(inode);
@@ -30,245 +358,402 @@ static struct dentry_operations sysfs_dentry_ops = {
30 .d_iput = sysfs_d_iput, 358 .d_iput = sysfs_d_iput,
31}; 359};
32 360
33/* 361struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
34 * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent
35 */
36static struct sysfs_dirent * __sysfs_new_dirent(void * element)
37{ 362{
38 struct sysfs_dirent * sd; 363 char *dup_name = NULL;
364 struct sysfs_dirent *sd = NULL;
365
366 if (type & SYSFS_COPY_NAME) {
367 name = dup_name = kstrdup(name, GFP_KERNEL);
368 if (!name)
369 goto err_out;
370 }
39 371
40 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); 372 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
41 if (!sd) 373 if (!sd)
42 return NULL; 374 goto err_out;
375
376 if (sysfs_alloc_ino(&sd->s_ino))
377 goto err_out;
43 378
44 atomic_set(&sd->s_count, 1); 379 atomic_set(&sd->s_count, 1);
380 atomic_set(&sd->s_active, 0);
45 atomic_set(&sd->s_event, 1); 381 atomic_set(&sd->s_event, 1);
46 INIT_LIST_HEAD(&sd->s_children); 382
47 INIT_LIST_HEAD(&sd->s_sibling); 383 sd->s_name = name;
48 sd->s_element = element; 384 sd->s_mode = mode;
385 sd->s_flags = type;
49 386
50 return sd; 387 return sd;
388
389 err_out:
390 kfree(dup_name);
391 kmem_cache_free(sysfs_dir_cachep, sd);
392 return NULL;
51} 393}
52 394
53static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd, 395/**
54 struct sysfs_dirent *sd) 396 * sysfs_attach_dentry - associate sysfs_dirent with dentry
397 * @sd: target sysfs_dirent
398 * @dentry: dentry to associate
399 *
400 * Associate @sd with @dentry. This is protected by
401 * sysfs_assoc_lock to avoid race with sysfs_d_iput().
402 *
403 * LOCKING:
404 * mutex_lock(sysfs_mutex)
405 */
406static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
55{ 407{
56 if (sd) 408 dentry->d_op = &sysfs_dentry_ops;
57 list_add(&sd->s_sibling, &parent_sd->s_children); 409 dentry->d_fsdata = sysfs_get(sd);
410
411 /* protect sd->s_dentry against sysfs_d_iput */
412 spin_lock(&sysfs_assoc_lock);
413 sd->s_dentry = dentry;
414 spin_unlock(&sysfs_assoc_lock);
415
416 d_rehash(dentry);
58} 417}
59 418
60static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd, 419static int sysfs_ilookup_test(struct inode *inode, void *arg)
61 void * element)
62{ 420{
63 struct sysfs_dirent *sd; 421 struct sysfs_dirent *sd = arg;
64 sd = __sysfs_new_dirent(element); 422 return inode->i_ino == sd->s_ino;
65 __sysfs_list_dirent(parent_sd, sd);
66 return sd;
67} 423}
68 424
69/* 425/**
426 * sysfs_addrm_start - prepare for sysfs_dirent add/remove
427 * @acxt: pointer to sysfs_addrm_cxt to be used
428 * @parent_sd: parent sysfs_dirent
70 * 429 *
71 * Return -EEXIST if there is already a sysfs element with the same name for 430 * This function is called when the caller is about to add or
72 * the same parent. 431 * remove sysfs_dirent under @parent_sd. This function acquires
432 * sysfs_mutex, grabs inode for @parent_sd if available and lock
433 * i_mutex of it. @acxt is used to keep and pass context to
434 * other addrm functions.
73 * 435 *
74 * called with parent inode's i_mutex held 436 * LOCKING:
437 * Kernel thread context (may sleep). sysfs_mutex is locked on
438 * return. i_mutex of parent inode is locked on return if
439 * available.
75 */ 440 */
76int sysfs_dirent_exist(struct sysfs_dirent *parent_sd, 441void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
77 const unsigned char *new) 442 struct sysfs_dirent *parent_sd)
78{ 443{
79 struct sysfs_dirent * sd; 444 struct inode *inode;
80 445
81 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 446 memset(acxt, 0, sizeof(*acxt));
82 if (sd->s_element) { 447 acxt->parent_sd = parent_sd;
83 const unsigned char *existing = sysfs_get_name(sd);
84 if (strcmp(existing, new))
85 continue;
86 else
87 return -EEXIST;
88 }
89 }
90 448
91 return 0; 449 /* Lookup parent inode. inode initialization and I_NEW
450 * clearing are protected by sysfs_mutex. By grabbing it and
451 * looking up with _nowait variant, inode state can be
452 * determined reliably.
453 */
454 mutex_lock(&sysfs_mutex);
455
456 inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
457 parent_sd);
458
459 if (inode && !(inode->i_state & I_NEW)) {
460 /* parent inode available */
461 acxt->parent_inode = inode;
462
463 /* sysfs_mutex is below i_mutex in lock hierarchy.
464 * First, trylock i_mutex. If fails, unlock
465 * sysfs_mutex and lock them in order.
466 */
467 if (!mutex_trylock(&inode->i_mutex)) {
468 mutex_unlock(&sysfs_mutex);
469 mutex_lock(&inode->i_mutex);
470 mutex_lock(&sysfs_mutex);
471 }
472 } else
473 iput(inode);
92} 474}
93 475
476/**
477 * sysfs_add_one - add sysfs_dirent to parent
478 * @acxt: addrm context to use
479 * @sd: sysfs_dirent to be added
480 *
481 * Get @acxt->parent_sd and set sd->s_parent to it and increment
482 * nlink of parent inode if @sd is a directory. @sd is NOT
483 * linked into the children list of the parent. The caller
484 * should invoke sysfs_link_sibling() after this function
485 * completes if @sd needs to be on the children list.
486 *
487 * This function should be called between calls to
488 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
489 * passed the same @acxt as passed to sysfs_addrm_start().
490 *
491 * LOCKING:
492 * Determined by sysfs_addrm_start().
493 */
494void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
495{
496 sd->s_parent = sysfs_get(acxt->parent_sd);
497
498 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
499 inc_nlink(acxt->parent_inode);
500
501 acxt->cnt++;
502}
94 503
95static struct sysfs_dirent * 504/**
96__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type) 505 * sysfs_remove_one - remove sysfs_dirent from parent
506 * @acxt: addrm context to use
507 * @sd: sysfs_dirent to be added
508 *
509 * Mark @sd removed and drop nlink of parent inode if @sd is a
510 * directory. @sd is NOT unlinked from the children list of the
511 * parent. The caller is repsonsible for removing @sd from the
512 * children list before calling this function.
513 *
514 * This function should be called between calls to
515 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
516 * passed the same @acxt as passed to sysfs_addrm_start().
517 *
518 * LOCKING:
519 * Determined by sysfs_addrm_start().
520 */
521void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
97{ 522{
98 struct sysfs_dirent * sd; 523 BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
99 524
100 sd = __sysfs_new_dirent(element); 525 sd->s_flags |= SYSFS_FLAG_REMOVED;
101 if (!sd) 526 sd->s_sibling = acxt->removed;
102 goto out; 527 acxt->removed = sd;
103 528
104 sd->s_mode = mode; 529 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
105 sd->s_type = type; 530 drop_nlink(acxt->parent_inode);
106 sd->s_dentry = dentry;
107 if (dentry) {
108 dentry->d_fsdata = sysfs_get(sd);
109 dentry->d_op = &sysfs_dentry_ops;
110 }
111 531
112out: 532 acxt->cnt++;
113 return sd;
114} 533}
115 534
116int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry, 535/**
117 void * element, umode_t mode, int type) 536 * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
537 * @sd: target sysfs_dirent
538 *
539 * Drop dentry for @sd. @sd must have been unlinked from its
540 * parent on entry to this function such that it can't be looked
541 * up anymore.
542 *
543 * @sd->s_dentry which is protected with sysfs_assoc_lock points
544 * to the currently associated dentry but we're not holding a
545 * reference to it and racing with dput(). Grab dcache_lock and
546 * verify dentry before dropping it. If @sd->s_dentry is NULL or
547 * dput() beats us, no need to bother.
548 */
549static void sysfs_drop_dentry(struct sysfs_dirent *sd)
118{ 550{
119 struct sysfs_dirent *sd; 551 struct dentry *dentry = NULL;
552 struct inode *inode;
553
554 /* We're not holding a reference to ->s_dentry dentry but the
555 * field will stay valid as long as sysfs_assoc_lock is held.
556 */
557 spin_lock(&sysfs_assoc_lock);
558 spin_lock(&dcache_lock);
559
560 /* drop dentry if it's there and dput() didn't kill it yet */
561 if (sd->s_dentry && sd->s_dentry->d_inode) {
562 dentry = dget_locked(sd->s_dentry);
563 spin_lock(&dentry->d_lock);
564 __d_drop(dentry);
565 spin_unlock(&dentry->d_lock);
566 }
120 567
121 sd = __sysfs_make_dirent(dentry, element, mode, type); 568 spin_unlock(&dcache_lock);
122 __sysfs_list_dirent(parent_sd, sd); 569 spin_unlock(&sysfs_assoc_lock);
123 570
124 return sd ? 0 : -ENOMEM; 571 /* dentries for shadowed inodes are pinned, unpin */
572 if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
573 dput(dentry);
574 dput(dentry);
575
576 /* adjust nlink and update timestamp */
577 inode = ilookup(sysfs_sb, sd->s_ino);
578 if (inode) {
579 mutex_lock(&inode->i_mutex);
580
581 inode->i_ctime = CURRENT_TIME;
582 drop_nlink(inode);
583 if (sysfs_type(sd) == SYSFS_DIR)
584 drop_nlink(inode);
585
586 mutex_unlock(&inode->i_mutex);
587 iput(inode);
588 }
125} 589}
126 590
127static int init_dir(struct inode * inode) 591/**
592 * sysfs_addrm_finish - finish up sysfs_dirent add/remove
593 * @acxt: addrm context to finish up
594 *
595 * Finish up sysfs_dirent add/remove. Resources acquired by
596 * sysfs_addrm_start() are released and removed sysfs_dirents are
597 * cleaned up. Timestamps on the parent inode are updated.
598 *
599 * LOCKING:
600 * All mutexes acquired by sysfs_addrm_start() are released.
601 *
602 * RETURNS:
603 * Number of added/removed sysfs_dirents since sysfs_addrm_start().
604 */
605int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
128{ 606{
129 inode->i_op = &sysfs_dir_inode_operations; 607 /* release resources acquired by sysfs_addrm_start() */
130 inode->i_fop = &sysfs_dir_operations; 608 mutex_unlock(&sysfs_mutex);
609 if (acxt->parent_inode) {
610 struct inode *inode = acxt->parent_inode;
131 611
132 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 612 /* if added/removed, update timestamps on the parent */
133 inc_nlink(inode); 613 if (acxt->cnt)
134 return 0; 614 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
615
616 mutex_unlock(&inode->i_mutex);
617 iput(inode);
618 }
619
620 /* kill removed sysfs_dirents */
621 while (acxt->removed) {
622 struct sysfs_dirent *sd = acxt->removed;
623
624 acxt->removed = sd->s_sibling;
625 sd->s_sibling = NULL;
626
627 sysfs_drop_dentry(sd);
628 sysfs_deactivate(sd);
629 sysfs_put(sd);
630 }
631
632 return acxt->cnt;
135} 633}
136 634
137static int init_file(struct inode * inode) 635/**
636 * sysfs_find_dirent - find sysfs_dirent with the given name
637 * @parent_sd: sysfs_dirent to search under
638 * @name: name to look for
639 *
640 * Look for sysfs_dirent with name @name under @parent_sd.
641 *
642 * LOCKING:
643 * mutex_lock(sysfs_mutex)
644 *
645 * RETURNS:
646 * Pointer to sysfs_dirent if found, NULL if not.
647 */
648struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
649 const unsigned char *name)
138{ 650{
139 inode->i_size = PAGE_SIZE; 651 struct sysfs_dirent *sd;
140 inode->i_fop = &sysfs_file_operations; 652
141 return 0; 653 for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
654 if (sysfs_type(sd) && !strcmp(sd->s_name, name))
655 return sd;
656 return NULL;
142} 657}
143 658
144static int init_symlink(struct inode * inode) 659/**
660 * sysfs_get_dirent - find and get sysfs_dirent with the given name
661 * @parent_sd: sysfs_dirent to search under
662 * @name: name to look for
663 *
664 * Look for sysfs_dirent with name @name under @parent_sd and get
665 * it if found.
666 *
667 * LOCKING:
668 * Kernel thread context (may sleep). Grabs sysfs_mutex.
669 *
670 * RETURNS:
671 * Pointer to sysfs_dirent if found, NULL if not.
672 */
673struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
674 const unsigned char *name)
145{ 675{
146 inode->i_op = &sysfs_symlink_inode_operations; 676 struct sysfs_dirent *sd;
147 return 0; 677
678 mutex_lock(&sysfs_mutex);
679 sd = sysfs_find_dirent(parent_sd, name);
680 sysfs_get(sd);
681 mutex_unlock(&sysfs_mutex);
682
683 return sd;
148} 684}
149 685
150static int create_dir(struct kobject * k, struct dentry * p, 686static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
151 const char * n, struct dentry ** d) 687 const char *name, struct sysfs_dirent **p_sd)
152{ 688{
153 int error;
154 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 689 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
690 struct sysfs_addrm_cxt acxt;
691 struct sysfs_dirent *sd;
155 692
156 mutex_lock(&p->d_inode->i_mutex); 693 /* allocate */
157 *d = lookup_one_len(n, p, strlen(n)); 694 sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
158 if (!IS_ERR(*d)) { 695 if (!sd)
159 if (sysfs_dirent_exist(p->d_fsdata, n)) 696 return -ENOMEM;
160 error = -EEXIST; 697 sd->s_elem.dir.kobj = kobj;
161 else
162 error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
163 SYSFS_DIR);
164 if (!error) {
165 error = sysfs_create(*d, mode, init_dir);
166 if (!error) {
167 inc_nlink(p->d_inode);
168 (*d)->d_op = &sysfs_dentry_ops;
169 d_rehash(*d);
170 }
171 }
172 if (error && (error != -EEXIST)) {
173 struct sysfs_dirent *sd = (*d)->d_fsdata;
174 if (sd) {
175 list_del_init(&sd->s_sibling);
176 sysfs_put(sd);
177 }
178 d_drop(*d);
179 }
180 dput(*d);
181 } else
182 error = PTR_ERR(*d);
183 mutex_unlock(&p->d_inode->i_mutex);
184 return error;
185}
186 698
699 /* link in */
700 sysfs_addrm_start(&acxt, parent_sd);
701 if (!sysfs_find_dirent(parent_sd, name)) {
702 sysfs_add_one(&acxt, sd);
703 sysfs_link_sibling(sd);
704 }
705 if (sysfs_addrm_finish(&acxt)) {
706 *p_sd = sd;
707 return 0;
708 }
187 709
188int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d) 710 sysfs_put(sd);
711 return -EEXIST;
712}
713
714int sysfs_create_subdir(struct kobject *kobj, const char *name,
715 struct sysfs_dirent **p_sd)
189{ 716{
190 return create_dir(k,k->dentry,n,d); 717 return create_dir(kobj, kobj->sd, name, p_sd);
191} 718}
192 719
193/** 720/**
194 * sysfs_create_dir - create a directory for an object. 721 * sysfs_create_dir - create a directory for an object.
195 * @kobj: object we're creating directory for. 722 * @kobj: object we're creating directory for.
196 * @shadow_parent: parent parent object. 723 * @shadow_parent: parent object.
197 */ 724 */
198 725int sysfs_create_dir(struct kobject *kobj,
199int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent) 726 struct sysfs_dirent *shadow_parent_sd)
200{ 727{
201 struct dentry * dentry = NULL; 728 struct sysfs_dirent *parent_sd, *sd;
202 struct dentry * parent;
203 int error = 0; 729 int error = 0;
204 730
205 BUG_ON(!kobj); 731 BUG_ON(!kobj);
206 732
207 if (shadow_parent) 733 if (shadow_parent_sd)
208 parent = shadow_parent; 734 parent_sd = shadow_parent_sd;
209 else if (kobj->parent) 735 else if (kobj->parent)
210 parent = kobj->parent->dentry; 736 parent_sd = kobj->parent->sd;
211 else if (sysfs_mount && sysfs_mount->mnt_sb) 737 else if (sysfs_mount && sysfs_mount->mnt_sb)
212 parent = sysfs_mount->mnt_sb->s_root; 738 parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
213 else 739 else
214 return -EFAULT; 740 return -EFAULT;
215 741
216 error = create_dir(kobj,parent,kobject_name(kobj),&dentry); 742 error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
217 if (!error) 743 if (!error)
218 kobj->dentry = dentry; 744 kobj->sd = sd;
219 return error; 745 return error;
220} 746}
221 747
222/* attaches attribute's sysfs_dirent to the dentry corresponding to the 748static int sysfs_count_nlink(struct sysfs_dirent *sd)
223 * attribute file
224 */
225static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry)
226{ 749{
227 struct attribute * attr = NULL; 750 struct sysfs_dirent *child;
228 struct bin_attribute * bin_attr = NULL; 751 int nr = 0;
229 int (* init) (struct inode *) = NULL;
230 int error = 0;
231 752
232 if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) { 753 for (child = sd->s_children; child; child = child->s_sibling)
233 bin_attr = sd->s_element; 754 if (sysfs_type(child) == SYSFS_DIR)
234 attr = &bin_attr->attr; 755 nr++;
235 } else { 756 return nr + 2;
236 attr = sd->s_element;
237 init = init_file;
238 }
239
240 dentry->d_fsdata = sysfs_get(sd);
241 sd->s_dentry = dentry;
242 error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init);
243 if (error) {
244 sysfs_put(sd);
245 return error;
246 }
247
248 if (bin_attr) {
249 dentry->d_inode->i_size = bin_attr->size;
250 dentry->d_inode->i_fop = &bin_fops;
251 }
252 dentry->d_op = &sysfs_dentry_ops;
253 d_rehash(dentry);
254
255 return 0;
256}
257
258static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry)
259{
260 int err = 0;
261
262 dentry->d_fsdata = sysfs_get(sd);
263 sd->s_dentry = dentry;
264 err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink);
265 if (!err) {
266 dentry->d_op = &sysfs_dentry_ops;
267 d_rehash(dentry);
268 } else
269 sysfs_put(sd);
270
271 return err;
272} 757}
273 758
274static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, 759static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
@@ -276,24 +761,60 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
276{ 761{
277 struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; 762 struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
278 struct sysfs_dirent * sd; 763 struct sysfs_dirent * sd;
279 int err = 0; 764 struct bin_attribute *bin_attr;
765 struct inode *inode;
766 int found = 0;
280 767
281 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 768 for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
282 if (sd->s_type & SYSFS_NOT_PINNED) { 769 if (sysfs_type(sd) &&
283 const unsigned char * name = sysfs_get_name(sd); 770 !strcmp(sd->s_name, dentry->d_name.name)) {
771 found = 1;
772 break;
773 }
774 }
775
776 /* no such entry */
777 if (!found)
778 return NULL;
284 779
285 if (strcmp(name, dentry->d_name.name)) 780 /* attach dentry and inode */
286 continue; 781 inode = sysfs_get_inode(sd);
782 if (!inode)
783 return ERR_PTR(-ENOMEM);
287 784
288 if (sd->s_type & SYSFS_KOBJ_LINK) 785 mutex_lock(&sysfs_mutex);
289 err = sysfs_attach_link(sd, dentry); 786
290 else 787 if (inode->i_state & I_NEW) {
291 err = sysfs_attach_attr(sd, dentry); 788 /* initialize inode according to type */
789 switch (sysfs_type(sd)) {
790 case SYSFS_DIR:
791 inode->i_op = &sysfs_dir_inode_operations;
792 inode->i_fop = &sysfs_dir_operations;
793 inode->i_nlink = sysfs_count_nlink(sd);
292 break; 794 break;
795 case SYSFS_KOBJ_ATTR:
796 inode->i_size = PAGE_SIZE;
797 inode->i_fop = &sysfs_file_operations;
798 break;
799 case SYSFS_KOBJ_BIN_ATTR:
800 bin_attr = sd->s_elem.bin_attr.bin_attr;
801 inode->i_size = bin_attr->size;
802 inode->i_fop = &bin_fops;
803 break;
804 case SYSFS_KOBJ_LINK:
805 inode->i_op = &sysfs_symlink_inode_operations;
806 break;
807 default:
808 BUG();
293 } 809 }
294 } 810 }
295 811
296 return ERR_PTR(err); 812 sysfs_instantiate(dentry, inode);
813 sysfs_attach_dentry(sd, dentry);
814
815 mutex_unlock(&sysfs_mutex);
816
817 return NULL;
297} 818}
298 819
299const struct inode_operations sysfs_dir_inode_operations = { 820const struct inode_operations sysfs_dir_inode_operations = {
@@ -301,58 +822,46 @@ const struct inode_operations sysfs_dir_inode_operations = {
301 .setattr = sysfs_setattr, 822 .setattr = sysfs_setattr,
302}; 823};
303 824
304static void remove_dir(struct dentry * d) 825static void remove_dir(struct sysfs_dirent *sd)
305{ 826{
306 struct dentry * parent = dget(d->d_parent); 827 struct sysfs_addrm_cxt acxt;
307 struct sysfs_dirent * sd;
308
309 mutex_lock(&parent->d_inode->i_mutex);
310 d_delete(d);
311 sd = d->d_fsdata;
312 list_del_init(&sd->s_sibling);
313 sysfs_put(sd);
314 if (d->d_inode)
315 simple_rmdir(parent->d_inode,d);
316 828
317 pr_debug(" o %s removing done (%d)\n",d->d_name.name, 829 sysfs_addrm_start(&acxt, sd->s_parent);
318 atomic_read(&d->d_count)); 830 sysfs_unlink_sibling(sd);
319 831 sysfs_remove_one(&acxt, sd);
320 mutex_unlock(&parent->d_inode->i_mutex); 832 sysfs_addrm_finish(&acxt);
321 dput(parent);
322} 833}
323 834
324void sysfs_remove_subdir(struct dentry * d) 835void sysfs_remove_subdir(struct sysfs_dirent *sd)
325{ 836{
326 remove_dir(d); 837 remove_dir(sd);
327} 838}
328 839
329 840
330static void __sysfs_remove_dir(struct dentry *dentry) 841static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
331{ 842{
332 struct sysfs_dirent * parent_sd; 843 struct sysfs_addrm_cxt acxt;
333 struct sysfs_dirent * sd, * tmp; 844 struct sysfs_dirent **pos;
334 845
335 dget(dentry); 846 if (!dir_sd)
336 if (!dentry)
337 return; 847 return;
338 848
339 pr_debug("sysfs %s: removing dir\n",dentry->d_name.name); 849 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
340 mutex_lock(&dentry->d_inode->i_mutex); 850 sysfs_addrm_start(&acxt, dir_sd);
341 parent_sd = dentry->d_fsdata; 851 pos = &dir_sd->s_children;
342 list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { 852 while (*pos) {
343 if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED)) 853 struct sysfs_dirent *sd = *pos;
344 continue; 854
345 list_del_init(&sd->s_sibling); 855 if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
346 sysfs_drop_dentry(sd, dentry); 856 *pos = sd->s_sibling;
347 sysfs_put(sd); 857 sd->s_sibling = NULL;
858 sysfs_remove_one(&acxt, sd);
859 } else
860 pos = &(*pos)->s_sibling;
348 } 861 }
349 mutex_unlock(&dentry->d_inode->i_mutex); 862 sysfs_addrm_finish(&acxt);
350 863
351 remove_dir(dentry); 864 remove_dir(dir_sd);
352 /**
353 * Drop reference from dget() on entrance.
354 */
355 dput(dentry);
356} 865}
357 866
358/** 867/**
@@ -366,102 +875,166 @@ static void __sysfs_remove_dir(struct dentry *dentry)
366 875
367void sysfs_remove_dir(struct kobject * kobj) 876void sysfs_remove_dir(struct kobject * kobj)
368{ 877{
369 __sysfs_remove_dir(kobj->dentry); 878 struct sysfs_dirent *sd = kobj->sd;
370 kobj->dentry = NULL; 879
880 spin_lock(&sysfs_assoc_lock);
881 kobj->sd = NULL;
882 spin_unlock(&sysfs_assoc_lock);
883
884 __sysfs_remove_dir(sd);
371} 885}
372 886
373int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent, 887int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
374 const char *new_name) 888 const char *new_name)
375{ 889{
376 int error = 0; 890 struct sysfs_dirent *sd = kobj->sd;
377 struct dentry * new_dentry; 891 struct dentry *new_parent = NULL;
892 struct dentry *old_dentry = NULL, *new_dentry = NULL;
893 const char *dup_name = NULL;
894 int error;
378 895
379 if (!new_parent) 896 /* get dentries */
380 return -EFAULT; 897 old_dentry = sysfs_get_dentry(sd);
898 if (IS_ERR(old_dentry)) {
899 error = PTR_ERR(old_dentry);
900 goto out_dput;
901 }
902
903 new_parent = sysfs_get_dentry(new_parent_sd);
904 if (IS_ERR(new_parent)) {
905 error = PTR_ERR(new_parent);
906 goto out_dput;
907 }
381 908
382 down_write(&sysfs_rename_sem); 909 /* lock new_parent and get dentry for new name */
383 mutex_lock(&new_parent->d_inode->i_mutex); 910 mutex_lock(&new_parent->d_inode->i_mutex);
384 911
385 new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name)); 912 new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
386 if (!IS_ERR(new_dentry)) { 913 if (IS_ERR(new_dentry)) {
387 /* By allowing two different directories with the 914 error = PTR_ERR(new_dentry);
388 * same d_parent we allow this routine to move 915 goto out_unlock;
389 * between different shadows of the same directory
390 */
391 if (kobj->dentry->d_parent->d_inode != new_parent->d_inode)
392 return -EINVAL;
393 else if (new_dentry->d_parent->d_inode != new_parent->d_inode)
394 error = -EINVAL;
395 else if (new_dentry == kobj->dentry)
396 error = -EINVAL;
397 else if (!new_dentry->d_inode) {
398 error = kobject_set_name(kobj, "%s", new_name);
399 if (!error) {
400 struct sysfs_dirent *sd, *parent_sd;
401
402 d_add(new_dentry, NULL);
403 d_move(kobj->dentry, new_dentry);
404
405 sd = kobj->dentry->d_fsdata;
406 parent_sd = new_parent->d_fsdata;
407
408 list_del_init(&sd->s_sibling);
409 list_add(&sd->s_sibling, &parent_sd->s_children);
410 }
411 else
412 d_drop(new_dentry);
413 } else
414 error = -EEXIST;
415 dput(new_dentry);
416 } 916 }
417 mutex_unlock(&new_parent->d_inode->i_mutex);
418 up_write(&sysfs_rename_sem);
419 917
918 /* By allowing two different directories with the same
919 * d_parent we allow this routine to move between different
920 * shadows of the same directory
921 */
922 error = -EINVAL;
923 if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
924 new_dentry->d_parent->d_inode != new_parent->d_inode ||
925 old_dentry == new_dentry)
926 goto out_unlock;
927
928 error = -EEXIST;
929 if (new_dentry->d_inode)
930 goto out_unlock;
931
932 /* rename kobject and sysfs_dirent */
933 error = -ENOMEM;
934 new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
935 if (!new_name)
936 goto out_drop;
937
938 error = kobject_set_name(kobj, "%s", new_name);
939 if (error)
940 goto out_drop;
941
942 dup_name = sd->s_name;
943 sd->s_name = new_name;
944
945 /* move under the new parent */
946 d_add(new_dentry, NULL);
947 d_move(sd->s_dentry, new_dentry);
948
949 mutex_lock(&sysfs_mutex);
950
951 sysfs_unlink_sibling(sd);
952 sysfs_get(new_parent_sd);
953 sysfs_put(sd->s_parent);
954 sd->s_parent = new_parent_sd;
955 sysfs_link_sibling(sd);
956
957 mutex_unlock(&sysfs_mutex);
958
959 error = 0;
960 goto out_unlock;
961
962 out_drop:
963 d_drop(new_dentry);
964 out_unlock:
965 mutex_unlock(&new_parent->d_inode->i_mutex);
966 out_dput:
967 kfree(dup_name);
968 dput(new_parent);
969 dput(old_dentry);
970 dput(new_dentry);
420 return error; 971 return error;
421} 972}
422 973
423int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent) 974int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
424{ 975{
425 struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry; 976 struct sysfs_dirent *sd = kobj->sd;
426 struct sysfs_dirent *new_parent_sd, *sd; 977 struct sysfs_dirent *new_parent_sd;
978 struct dentry *old_parent, *new_parent = NULL;
979 struct dentry *old_dentry = NULL, *new_dentry = NULL;
427 int error; 980 int error;
428 981
429 old_parent_dentry = kobj->parent ? 982 BUG_ON(!sd->s_parent);
430 kobj->parent->dentry : sysfs_mount->mnt_sb->s_root; 983 new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
431 new_parent_dentry = new_parent ? 984
432 new_parent->dentry : sysfs_mount->mnt_sb->s_root; 985 /* get dentries */
986 old_dentry = sysfs_get_dentry(sd);
987 if (IS_ERR(old_dentry)) {
988 error = PTR_ERR(old_dentry);
989 goto out_dput;
990 }
991 old_parent = sd->s_parent->s_dentry;
992
993 new_parent = sysfs_get_dentry(new_parent_sd);
994 if (IS_ERR(new_parent)) {
995 error = PTR_ERR(new_parent);
996 goto out_dput;
997 }
433 998
434 if (old_parent_dentry->d_inode == new_parent_dentry->d_inode) 999 if (old_parent->d_inode == new_parent->d_inode) {
435 return 0; /* nothing to move */ 1000 error = 0;
1001 goto out_dput; /* nothing to move */
1002 }
436again: 1003again:
437 mutex_lock(&old_parent_dentry->d_inode->i_mutex); 1004 mutex_lock(&old_parent->d_inode->i_mutex);
438 if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) { 1005 if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
439 mutex_unlock(&old_parent_dentry->d_inode->i_mutex); 1006 mutex_unlock(&old_parent->d_inode->i_mutex);
440 goto again; 1007 goto again;
441 } 1008 }
442 1009
443 new_parent_sd = new_parent_dentry->d_fsdata; 1010 new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
444 sd = kobj->dentry->d_fsdata;
445
446 new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
447 strlen(kobj->name));
448 if (IS_ERR(new_dentry)) { 1011 if (IS_ERR(new_dentry)) {
449 error = PTR_ERR(new_dentry); 1012 error = PTR_ERR(new_dentry);
450 goto out; 1013 goto out_unlock;
451 } else 1014 } else
452 error = 0; 1015 error = 0;
453 d_add(new_dentry, NULL); 1016 d_add(new_dentry, NULL);
454 d_move(kobj->dentry, new_dentry); 1017 d_move(sd->s_dentry, new_dentry);
455 dput(new_dentry); 1018 dput(new_dentry);
456 1019
457 /* Remove from old parent's list and insert into new parent's list. */ 1020 /* Remove from old parent's list and insert into new parent's list. */
458 list_del_init(&sd->s_sibling); 1021 mutex_lock(&sysfs_mutex);
459 list_add(&sd->s_sibling, &new_parent_sd->s_children);
460 1022
461out: 1023 sysfs_unlink_sibling(sd);
462 mutex_unlock(&new_parent_dentry->d_inode->i_mutex); 1024 sysfs_get(new_parent_sd);
463 mutex_unlock(&old_parent_dentry->d_inode->i_mutex); 1025 sysfs_put(sd->s_parent);
1026 sd->s_parent = new_parent_sd;
1027 sysfs_link_sibling(sd);
464 1028
1029 mutex_unlock(&sysfs_mutex);
1030
1031 out_unlock:
1032 mutex_unlock(&new_parent->d_inode->i_mutex);
1033 mutex_unlock(&old_parent->d_inode->i_mutex);
1034 out_dput:
1035 dput(new_parent);
1036 dput(old_dentry);
1037 dput(new_dentry);
465 return error; 1038 return error;
466} 1039}
467 1040
@@ -469,23 +1042,27 @@ static int sysfs_dir_open(struct inode *inode, struct file *file)
469{ 1042{
470 struct dentry * dentry = file->f_path.dentry; 1043 struct dentry * dentry = file->f_path.dentry;
471 struct sysfs_dirent * parent_sd = dentry->d_fsdata; 1044 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1045 struct sysfs_dirent * sd;
472 1046
473 mutex_lock(&dentry->d_inode->i_mutex); 1047 sd = sysfs_new_dirent("_DIR_", 0, 0);
474 file->private_data = sysfs_new_dirent(parent_sd, NULL); 1048 if (sd) {
475 mutex_unlock(&dentry->d_inode->i_mutex); 1049 mutex_lock(&sysfs_mutex);
476 1050 sd->s_parent = sysfs_get(parent_sd);
477 return file->private_data ? 0 : -ENOMEM; 1051 sysfs_link_sibling(sd);
1052 mutex_unlock(&sysfs_mutex);
1053 }
478 1054
1055 file->private_data = sd;
1056 return sd ? 0 : -ENOMEM;
479} 1057}
480 1058
481static int sysfs_dir_close(struct inode *inode, struct file *file) 1059static int sysfs_dir_close(struct inode *inode, struct file *file)
482{ 1060{
483 struct dentry * dentry = file->f_path.dentry;
484 struct sysfs_dirent * cursor = file->private_data; 1061 struct sysfs_dirent * cursor = file->private_data;
485 1062
486 mutex_lock(&dentry->d_inode->i_mutex); 1063 mutex_lock(&sysfs_mutex);
487 list_del_init(&cursor->s_sibling); 1064 sysfs_unlink_sibling(cursor);
488 mutex_unlock(&dentry->d_inode->i_mutex); 1065 mutex_unlock(&sysfs_mutex);
489 1066
490 release_sysfs_dirent(cursor); 1067 release_sysfs_dirent(cursor);
491 1068
@@ -503,54 +1080,65 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
503 struct dentry *dentry = filp->f_path.dentry; 1080 struct dentry *dentry = filp->f_path.dentry;
504 struct sysfs_dirent * parent_sd = dentry->d_fsdata; 1081 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
505 struct sysfs_dirent *cursor = filp->private_data; 1082 struct sysfs_dirent *cursor = filp->private_data;
506 struct list_head *p, *q = &cursor->s_sibling; 1083 struct sysfs_dirent **pos;
507 ino_t ino; 1084 ino_t ino;
508 int i = filp->f_pos; 1085 int i = filp->f_pos;
509 1086
510 switch (i) { 1087 switch (i) {
511 case 0: 1088 case 0:
512 ino = dentry->d_inode->i_ino; 1089 ino = parent_sd->s_ino;
513 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 1090 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
514 break; 1091 break;
515 filp->f_pos++; 1092 filp->f_pos++;
516 i++; 1093 i++;
517 /* fallthrough */ 1094 /* fallthrough */
518 case 1: 1095 case 1:
519 ino = parent_ino(dentry); 1096 if (parent_sd->s_parent)
1097 ino = parent_sd->s_parent->s_ino;
1098 else
1099 ino = parent_sd->s_ino;
520 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 1100 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
521 break; 1101 break;
522 filp->f_pos++; 1102 filp->f_pos++;
523 i++; 1103 i++;
524 /* fallthrough */ 1104 /* fallthrough */
525 default: 1105 default:
1106 mutex_lock(&sysfs_mutex);
1107
1108 pos = &parent_sd->s_children;
1109 while (*pos != cursor)
1110 pos = &(*pos)->s_sibling;
1111
1112 /* unlink cursor */
1113 *pos = cursor->s_sibling;
1114
526 if (filp->f_pos == 2) 1115 if (filp->f_pos == 2)
527 list_move(q, &parent_sd->s_children); 1116 pos = &parent_sd->s_children;
528 1117
529 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 1118 for ( ; *pos; pos = &(*pos)->s_sibling) {
530 struct sysfs_dirent *next; 1119 struct sysfs_dirent *next = *pos;
531 const char * name; 1120 const char * name;
532 int len; 1121 int len;
533 1122
534 next = list_entry(p, struct sysfs_dirent, 1123 if (!sysfs_type(next))
535 s_sibling);
536 if (!next->s_element)
537 continue; 1124 continue;
538 1125
539 name = sysfs_get_name(next); 1126 name = next->s_name;
540 len = strlen(name); 1127 len = strlen(name);
541 if (next->s_dentry) 1128 ino = next->s_ino;
542 ino = next->s_dentry->d_inode->i_ino;
543 else
544 ino = iunique(sysfs_sb, 2);
545 1129
546 if (filldir(dirent, name, len, filp->f_pos, ino, 1130 if (filldir(dirent, name, len, filp->f_pos, ino,
547 dt_type(next)) < 0) 1131 dt_type(next)) < 0)
548 return 0; 1132 break;
549 1133
550 list_move(q, p);
551 p = q;
552 filp->f_pos++; 1134 filp->f_pos++;
553 } 1135 }
1136
1137 /* put cursor back in */
1138 cursor->s_sibling = *pos;
1139 *pos = cursor;
1140
1141 mutex_unlock(&sysfs_mutex);
554 } 1142 }
555 return 0; 1143 return 0;
556} 1144}
@@ -559,7 +1147,6 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
559{ 1147{
560 struct dentry * dentry = file->f_path.dentry; 1148 struct dentry * dentry = file->f_path.dentry;
561 1149
562 mutex_lock(&dentry->d_inode->i_mutex);
563 switch (origin) { 1150 switch (origin) {
564 case 1: 1151 case 1:
565 offset += file->f_pos; 1152 offset += file->f_pos;
@@ -567,31 +1154,35 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
567 if (offset >= 0) 1154 if (offset >= 0)
568 break; 1155 break;
569 default: 1156 default:
570 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
571 return -EINVAL; 1157 return -EINVAL;
572 } 1158 }
573 if (offset != file->f_pos) { 1159 if (offset != file->f_pos) {
1160 mutex_lock(&sysfs_mutex);
1161
574 file->f_pos = offset; 1162 file->f_pos = offset;
575 if (file->f_pos >= 2) { 1163 if (file->f_pos >= 2) {
576 struct sysfs_dirent *sd = dentry->d_fsdata; 1164 struct sysfs_dirent *sd = dentry->d_fsdata;
577 struct sysfs_dirent *cursor = file->private_data; 1165 struct sysfs_dirent *cursor = file->private_data;
578 struct list_head *p; 1166 struct sysfs_dirent **pos;
579 loff_t n = file->f_pos - 2; 1167 loff_t n = file->f_pos - 2;
580 1168
581 list_del(&cursor->s_sibling); 1169 sysfs_unlink_sibling(cursor);
582 p = sd->s_children.next; 1170
583 while (n && p != &sd->s_children) { 1171 pos = &sd->s_children;
584 struct sysfs_dirent *next; 1172 while (n && *pos) {
585 next = list_entry(p, struct sysfs_dirent, 1173 struct sysfs_dirent *next = *pos;
586 s_sibling); 1174 if (sysfs_type(next))
587 if (next->s_element)
588 n--; 1175 n--;
589 p = p->next; 1176 pos = &(*pos)->s_sibling;
590 } 1177 }
591 list_add_tail(&cursor->s_sibling, p); 1178
1179 cursor->s_sibling = *pos;
1180 *pos = cursor;
592 } 1181 }
1182
1183 mutex_unlock(&sysfs_mutex);
593 } 1184 }
594 mutex_unlock(&dentry->d_inode->i_mutex); 1185
595 return offset; 1186 return offset;
596} 1187}
597 1188
@@ -604,12 +1195,20 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
604int sysfs_make_shadowed_dir(struct kobject *kobj, 1195int sysfs_make_shadowed_dir(struct kobject *kobj,
605 void * (*follow_link)(struct dentry *, struct nameidata *)) 1196 void * (*follow_link)(struct dentry *, struct nameidata *))
606{ 1197{
1198 struct dentry *dentry;
607 struct inode *inode; 1199 struct inode *inode;
608 struct inode_operations *i_op; 1200 struct inode_operations *i_op;
609 1201
610 inode = kobj->dentry->d_inode; 1202 /* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
611 if (inode->i_op != &sysfs_dir_inode_operations) 1203 dentry = sysfs_get_dentry(kobj->sd);
1204 if (IS_ERR(dentry))
1205 return PTR_ERR(dentry);
1206
1207 inode = dentry->d_inode;
1208 if (inode->i_op != &sysfs_dir_inode_operations) {
1209 dput(dentry);
612 return -EINVAL; 1210 return -EINVAL;
1211 }
613 1212
614 i_op = kmalloc(sizeof(*i_op), GFP_KERNEL); 1213 i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
615 if (!i_op) 1214 if (!i_op)
@@ -634,54 +1233,72 @@ int sysfs_make_shadowed_dir(struct kobject *kobj,
634 * directory. 1233 * directory.
635 */ 1234 */
636 1235
637struct dentry *sysfs_create_shadow_dir(struct kobject *kobj) 1236struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
638{ 1237{
639 struct sysfs_dirent *sd; 1238 struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
640 struct dentry *parent, *dir, *shadow; 1239 struct dentry *dir, *parent, *shadow;
641 struct inode *inode; 1240 struct inode *inode;
1241 struct sysfs_dirent *sd;
1242 struct sysfs_addrm_cxt acxt;
642 1243
643 dir = kobj->dentry; 1244 dir = sysfs_get_dentry(kobj->sd);
644 inode = dir->d_inode; 1245 if (IS_ERR(dir)) {
1246 sd = (void *)dir;
1247 goto out;
1248 }
645 parent = dir->d_parent; 1249 parent = dir->d_parent;
646 shadow = ERR_PTR(-EINVAL); 1250
1251 inode = dir->d_inode;
1252 sd = ERR_PTR(-EINVAL);
647 if (!sysfs_is_shadowed_inode(inode)) 1253 if (!sysfs_is_shadowed_inode(inode))
648 goto out; 1254 goto out_dput;
649 1255
650 shadow = d_alloc(parent, &dir->d_name); 1256 shadow = d_alloc(parent, &dir->d_name);
651 if (!shadow) 1257 if (!shadow)
652 goto nomem; 1258 goto nomem;
653 1259
654 sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR); 1260 sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
655 if (!sd) 1261 if (!sd)
656 goto nomem; 1262 goto nomem;
1263 sd->s_elem.dir.kobj = kobj;
657 1264
1265 sysfs_addrm_start(&acxt, parent_sd);
1266
1267 /* add but don't link into children list */
1268 sysfs_add_one(&acxt, sd);
1269
1270 /* attach and instantiate dentry */
1271 sysfs_attach_dentry(sd, shadow);
658 d_instantiate(shadow, igrab(inode)); 1272 d_instantiate(shadow, igrab(inode));
659 inc_nlink(inode); 1273 inc_nlink(inode); /* tj: synchronization? */
660 inc_nlink(parent->d_inode); 1274
661 shadow->d_op = &sysfs_dentry_ops; 1275 sysfs_addrm_finish(&acxt);
662 1276
663 dget(shadow); /* Extra count - pin the dentry in core */ 1277 dget(shadow); /* Extra count - pin the dentry in core */
664 1278
665out: 1279 goto out_dput;
666 return shadow; 1280
667nomem: 1281 nomem:
668 dput(shadow); 1282 dput(shadow);
669 shadow = ERR_PTR(-ENOMEM); 1283 sd = ERR_PTR(-ENOMEM);
670 goto out; 1284 out_dput:
1285 dput(dir);
1286 out:
1287 return sd;
671} 1288}
672 1289
673/** 1290/**
674 * sysfs_remove_shadow_dir - remove an object's directory. 1291 * sysfs_remove_shadow_dir - remove an object's directory.
675 * @shadow: dentry of shadow directory 1292 * @shadow_sd: sysfs_dirent of shadow directory
676 * 1293 *
677 * The only thing special about this is that we remove any files in 1294 * The only thing special about this is that we remove any files in
678 * the directory before we remove the directory, and we've inlined 1295 * the directory before we remove the directory, and we've inlined
679 * what used to be sysfs_rmdir() below, instead of calling separately. 1296 * what used to be sysfs_rmdir() below, instead of calling separately.
680 */ 1297 */
681 1298
682void sysfs_remove_shadow_dir(struct dentry *shadow) 1299void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
683{ 1300{
684 __sysfs_remove_dir(shadow); 1301 __sysfs_remove_dir(shadow_sd);
685} 1302}
686 1303
687const struct file_operations sysfs_dir_operations = { 1304const struct file_operations sysfs_dir_operations = {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b502c7197ec0..cc497994b2a8 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -50,29 +50,15 @@ static struct sysfs_ops subsys_sysfs_ops = {
50 .store = subsys_attr_store, 50 .store = subsys_attr_store,
51}; 51};
52 52
53/** 53struct sysfs_buffer {
54 * add_to_collection - add buffer to a collection 54 size_t count;
55 * @buffer: buffer to be added 55 loff_t pos;
56 * @node: inode of set to add to 56 char * page;
57 */ 57 struct sysfs_ops * ops;
58 58 struct semaphore sem;
59static inline void 59 int needs_read_fill;
60add_to_collection(struct sysfs_buffer *buffer, struct inode *node) 60 int event;
61{ 61};
62 struct sysfs_buffer_collection *set = node->i_private;
63
64 mutex_lock(&node->i_mutex);
65 list_add(&buffer->associates, &set->associates);
66 mutex_unlock(&node->i_mutex);
67}
68
69static inline void
70remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
71{
72 mutex_lock(&node->i_mutex);
73 list_del(&buffer->associates);
74 mutex_unlock(&node->i_mutex);
75}
76 62
77/** 63/**
78 * fill_read_buffer - allocate and fill buffer from object. 64 * fill_read_buffer - allocate and fill buffer from object.
@@ -87,9 +73,8 @@ remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
87 */ 73 */
88static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) 74static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
89{ 75{
90 struct sysfs_dirent * sd = dentry->d_fsdata; 76 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
91 struct attribute * attr = to_attr(dentry); 77 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
92 struct kobject * kobj = to_kobj(dentry->d_parent);
93 struct sysfs_ops * ops = buffer->ops; 78 struct sysfs_ops * ops = buffer->ops;
94 int ret = 0; 79 int ret = 0;
95 ssize_t count; 80 ssize_t count;
@@ -99,8 +84,15 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
99 if (!buffer->page) 84 if (!buffer->page)
100 return -ENOMEM; 85 return -ENOMEM;
101 86
102 buffer->event = atomic_read(&sd->s_event); 87 /* need attr_sd for attr and ops, its parent for kobj */
103 count = ops->show(kobj,attr,buffer->page); 88 if (!sysfs_get_active_two(attr_sd))
89 return -ENODEV;
90
91 buffer->event = atomic_read(&attr_sd->s_event);
92 count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page);
93
94 sysfs_put_active_two(attr_sd);
95
104 BUG_ON(count > (ssize_t)PAGE_SIZE); 96 BUG_ON(count > (ssize_t)PAGE_SIZE);
105 if (count >= 0) { 97 if (count >= 0) {
106 buffer->needs_read_fill = 0; 98 buffer->needs_read_fill = 0;
@@ -138,10 +130,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
138 130
139 down(&buffer->sem); 131 down(&buffer->sem);
140 if (buffer->needs_read_fill) { 132 if (buffer->needs_read_fill) {
141 if (buffer->orphaned) 133 retval = fill_read_buffer(file->f_path.dentry,buffer);
142 retval = -ENODEV;
143 else
144 retval = fill_read_buffer(file->f_path.dentry,buffer);
145 if (retval) 134 if (retval)
146 goto out; 135 goto out;
147 } 136 }
@@ -196,14 +185,23 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
196 * passing the buffer that we acquired in fill_write_buffer(). 185 * passing the buffer that we acquired in fill_write_buffer().
197 */ 186 */
198 187
199static int 188static int
200flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count) 189flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
201{ 190{
202 struct attribute * attr = to_attr(dentry); 191 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
203 struct kobject * kobj = to_kobj(dentry->d_parent); 192 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
204 struct sysfs_ops * ops = buffer->ops; 193 struct sysfs_ops * ops = buffer->ops;
194 int rc;
195
196 /* need attr_sd for attr and ops, its parent for kobj */
197 if (!sysfs_get_active_two(attr_sd))
198 return -ENODEV;
199
200 rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
205 201
206 return ops->store(kobj,attr,buffer->page,count); 202 sysfs_put_active_two(attr_sd);
203
204 return rc;
207} 205}
208 206
209 207
@@ -231,37 +229,26 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
231 ssize_t len; 229 ssize_t len;
232 230
233 down(&buffer->sem); 231 down(&buffer->sem);
234 if (buffer->orphaned) {
235 len = -ENODEV;
236 goto out;
237 }
238 len = fill_write_buffer(buffer, buf, count); 232 len = fill_write_buffer(buffer, buf, count);
239 if (len > 0) 233 if (len > 0)
240 len = flush_write_buffer(file->f_path.dentry, buffer, len); 234 len = flush_write_buffer(file->f_path.dentry, buffer, len);
241 if (len > 0) 235 if (len > 0)
242 *ppos += len; 236 *ppos += len;
243out:
244 up(&buffer->sem); 237 up(&buffer->sem);
245 return len; 238 return len;
246} 239}
247 240
248static int sysfs_open_file(struct inode *inode, struct file *file) 241static int sysfs_open_file(struct inode *inode, struct file *file)
249{ 242{
250 struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); 243 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
251 struct attribute * attr = to_attr(file->f_path.dentry); 244 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
252 struct sysfs_buffer_collection *set;
253 struct sysfs_buffer * buffer; 245 struct sysfs_buffer * buffer;
254 struct sysfs_ops * ops = NULL; 246 struct sysfs_ops * ops = NULL;
255 int error = 0; 247 int error;
256
257 if (!kobj || !attr)
258 goto Einval;
259 248
260 /* Grab the module reference for this attribute if we have one */ 249 /* need attr_sd for attr and ops, its parent for kobj */
261 if (!try_module_get(attr->owner)) { 250 if (!sysfs_get_active_two(attr_sd))
262 error = -ENODEV; 251 return -ENODEV;
263 goto Done;
264 }
265 252
266 /* if the kobject has no ktype, then we assume that it is a subsystem 253 /* if the kobject has no ktype, then we assume that it is a subsystem
267 * itself, and use ops for it. 254 * itself, and use ops for it.
@@ -273,33 +260,21 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
273 else 260 else
274 ops = &subsys_sysfs_ops; 261 ops = &subsys_sysfs_ops;
275 262
263 error = -EACCES;
264
276 /* No sysfs operations, either from having no subsystem, 265 /* No sysfs operations, either from having no subsystem,
277 * or the subsystem have no operations. 266 * or the subsystem have no operations.
278 */ 267 */
279 if (!ops) 268 if (!ops)
280 goto Eaccess; 269 goto err_out;
281
282 /* make sure we have a collection to add our buffers to */
283 mutex_lock(&inode->i_mutex);
284 if (!(set = inode->i_private)) {
285 if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
286 error = -ENOMEM;
287 goto Done;
288 } else {
289 INIT_LIST_HEAD(&set->associates);
290 }
291 }
292 mutex_unlock(&inode->i_mutex);
293 270
294 /* File needs write support. 271 /* File needs write support.
295 * The inode's perms must say it's ok, 272 * The inode's perms must say it's ok,
296 * and we must have a store method. 273 * and we must have a store method.
297 */ 274 */
298 if (file->f_mode & FMODE_WRITE) { 275 if (file->f_mode & FMODE_WRITE) {
299
300 if (!(inode->i_mode & S_IWUGO) || !ops->store) 276 if (!(inode->i_mode & S_IWUGO) || !ops->store)
301 goto Eaccess; 277 goto err_out;
302
303 } 278 }
304 279
305 /* File needs read support. 280 /* File needs read support.
@@ -308,48 +283,38 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
308 */ 283 */
309 if (file->f_mode & FMODE_READ) { 284 if (file->f_mode & FMODE_READ) {
310 if (!(inode->i_mode & S_IRUGO) || !ops->show) 285 if (!(inode->i_mode & S_IRUGO) || !ops->show)
311 goto Eaccess; 286 goto err_out;
312 } 287 }
313 288
314 /* No error? Great, allocate a buffer for the file, and store it 289 /* No error? Great, allocate a buffer for the file, and store it
315 * it in file->private_data for easy access. 290 * it in file->private_data for easy access.
316 */ 291 */
292 error = -ENOMEM;
317 buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); 293 buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
318 if (buffer) { 294 if (!buffer)
319 INIT_LIST_HEAD(&buffer->associates); 295 goto err_out;
320 init_MUTEX(&buffer->sem); 296
321 buffer->needs_read_fill = 1; 297 init_MUTEX(&buffer->sem);
322 buffer->ops = ops; 298 buffer->needs_read_fill = 1;
323 add_to_collection(buffer, inode); 299 buffer->ops = ops;
324 file->private_data = buffer; 300 file->private_data = buffer;
325 } else 301
326 error = -ENOMEM; 302 /* open succeeded, put active references and pin attr_sd */
327 goto Done; 303 sysfs_put_active_two(attr_sd);
328 304 sysfs_get(attr_sd);
329 Einval: 305 return 0;
330 error = -EINVAL; 306
331 goto Done; 307 err_out:
332 Eaccess: 308 sysfs_put_active_two(attr_sd);
333 error = -EACCES;
334 module_put(attr->owner);
335 Done:
336 if (error)
337 kobject_put(kobj);
338 return error; 309 return error;
339} 310}
340 311
341static int sysfs_release(struct inode * inode, struct file * filp) 312static int sysfs_release(struct inode * inode, struct file * filp)
342{ 313{
343 struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); 314 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
344 struct attribute * attr = to_attr(filp->f_path.dentry); 315 struct sysfs_buffer *buffer = filp->private_data;
345 struct module * owner = attr->owner;
346 struct sysfs_buffer * buffer = filp->private_data;
347 316
348 if (buffer) 317 sysfs_put(attr_sd);
349 remove_from_collection(buffer, inode);
350 kobject_put(kobj);
351 /* After this point, attr should not be accessed. */
352 module_put(owner);
353 318
354 if (buffer) { 319 if (buffer) {
355 if (buffer->page) 320 if (buffer->page)
@@ -376,57 +341,43 @@ static int sysfs_release(struct inode * inode, struct file * filp)
376static unsigned int sysfs_poll(struct file *filp, poll_table *wait) 341static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
377{ 342{
378 struct sysfs_buffer * buffer = filp->private_data; 343 struct sysfs_buffer * buffer = filp->private_data;
379 struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); 344 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
380 struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata; 345 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
381 int res = 0; 346
347 /* need parent for the kobj, grab both */
348 if (!sysfs_get_active_two(attr_sd))
349 goto trigger;
382 350
383 poll_wait(filp, &kobj->poll, wait); 351 poll_wait(filp, &kobj->poll, wait);
384 352
385 if (buffer->event != atomic_read(&sd->s_event)) { 353 sysfs_put_active_two(attr_sd);
386 res = POLLERR|POLLPRI;
387 buffer->needs_read_fill = 1;
388 }
389 354
390 return res; 355 if (buffer->event != atomic_read(&attr_sd->s_event))
391} 356 goto trigger;
392 357
358 return 0;
393 359
394static struct dentry *step_down(struct dentry *dir, const char * name) 360 trigger:
395{ 361 buffer->needs_read_fill = 1;
396 struct dentry * de; 362 return POLLERR|POLLPRI;
397
398 if (dir == NULL || dir->d_inode == NULL)
399 return NULL;
400
401 mutex_lock(&dir->d_inode->i_mutex);
402 de = lookup_one_len(name, dir, strlen(name));
403 mutex_unlock(&dir->d_inode->i_mutex);
404 dput(dir);
405 if (IS_ERR(de))
406 return NULL;
407 if (de->d_inode == NULL) {
408 dput(de);
409 return NULL;
410 }
411 return de;
412} 363}
413 364
414void sysfs_notify(struct kobject * k, char *dir, char *attr) 365void sysfs_notify(struct kobject *k, char *dir, char *attr)
415{ 366{
416 struct dentry *de = k->dentry; 367 struct sysfs_dirent *sd = k->sd;
417 if (de) 368
418 dget(de); 369 mutex_lock(&sysfs_mutex);
419 if (de && dir) 370
420 de = step_down(de, dir); 371 if (sd && dir)
421 if (de && attr) 372 sd = sysfs_find_dirent(sd, dir);
422 de = step_down(de, attr); 373 if (sd && attr)
423 if (de) { 374 sd = sysfs_find_dirent(sd, attr);
424 struct sysfs_dirent * sd = de->d_fsdata; 375 if (sd) {
425 if (sd) 376 atomic_inc(&sd->s_event);
426 atomic_inc(&sd->s_event);
427 wake_up_interruptible(&k->poll); 377 wake_up_interruptible(&k->poll);
428 dput(de);
429 } 378 }
379
380 mutex_unlock(&sysfs_mutex);
430} 381}
431EXPORT_SYMBOL_GPL(sysfs_notify); 382EXPORT_SYMBOL_GPL(sysfs_notify);
432 383
@@ -440,19 +391,30 @@ const struct file_operations sysfs_file_operations = {
440}; 391};
441 392
442 393
443int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type) 394int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
395 int type)
444{ 396{
445 struct sysfs_dirent * parent_sd = dir->d_fsdata;
446 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; 397 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
447 int error = -EEXIST; 398 struct sysfs_addrm_cxt acxt;
399 struct sysfs_dirent *sd;
448 400
449 mutex_lock(&dir->d_inode->i_mutex); 401 sd = sysfs_new_dirent(attr->name, mode, type);
450 if (!sysfs_dirent_exist(parent_sd, attr->name)) 402 if (!sd)
451 error = sysfs_make_dirent(parent_sd, NULL, (void *)attr, 403 return -ENOMEM;
452 mode, type); 404 sd->s_elem.attr.attr = (void *)attr;
453 mutex_unlock(&dir->d_inode->i_mutex);
454 405
455 return error; 406 sysfs_addrm_start(&acxt, dir_sd);
407
408 if (!sysfs_find_dirent(dir_sd, attr->name)) {
409 sysfs_add_one(&acxt, sd);
410 sysfs_link_sibling(sd);
411 }
412
413 if (sysfs_addrm_finish(&acxt))
414 return 0;
415
416 sysfs_put(sd);
417 return -EEXIST;
456} 418}
457 419
458 420
@@ -464,9 +426,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
464 426
465int sysfs_create_file(struct kobject * kobj, const struct attribute * attr) 427int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
466{ 428{
467 BUG_ON(!kobj || !kobj->dentry || !attr); 429 BUG_ON(!kobj || !kobj->sd || !attr);
468 430
469 return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR); 431 return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
470 432
471} 433}
472 434
@@ -480,16 +442,16 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
480int sysfs_add_file_to_group(struct kobject *kobj, 442int sysfs_add_file_to_group(struct kobject *kobj,
481 const struct attribute *attr, const char *group) 443 const struct attribute *attr, const char *group)
482{ 444{
483 struct dentry *dir; 445 struct sysfs_dirent *dir_sd;
484 int error; 446 int error;
485 447
486 dir = lookup_one_len(group, kobj->dentry, strlen(group)); 448 dir_sd = sysfs_get_dirent(kobj->sd, group);
487 if (IS_ERR(dir)) 449 if (!dir_sd)
488 error = PTR_ERR(dir); 450 return -ENOENT;
489 else { 451
490 error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR); 452 error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR);
491 dput(dir); 453 sysfs_put(dir_sd);
492 } 454
493 return error; 455 return error;
494} 456}
495EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); 457EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
@@ -502,30 +464,31 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
502 */ 464 */
503int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) 465int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
504{ 466{
505 struct dentry * dir = kobj->dentry; 467 struct sysfs_dirent *victim_sd = NULL;
506 struct dentry * victim; 468 struct dentry *victim = NULL;
507 int res = -ENOENT; 469 int rc;
508 470
509 mutex_lock(&dir->d_inode->i_mutex); 471 rc = -ENOENT;
510 victim = lookup_one_len(attr->name, dir, strlen(attr->name)); 472 victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
511 if (!IS_ERR(victim)) { 473 if (!victim_sd)
512 /* make sure dentry is really there */ 474 goto out;
513 if (victim->d_inode && 475
514 (victim->d_parent->d_inode == dir->d_inode)) { 476 victim = sysfs_get_dentry(victim_sd);
515 victim->d_inode->i_mtime = CURRENT_TIME; 477 if (IS_ERR(victim)) {
516 fsnotify_modify(victim); 478 rc = PTR_ERR(victim);
517 res = 0; 479 victim = NULL;
518 } else 480 goto out;
519 d_drop(victim);
520
521 /**
522 * Drop the reference acquired from lookup_one_len() above.
523 */
524 dput(victim);
525 } 481 }
526 mutex_unlock(&dir->d_inode->i_mutex);
527 482
528 return res; 483 mutex_lock(&victim->d_inode->i_mutex);
484 victim->d_inode->i_mtime = CURRENT_TIME;
485 fsnotify_modify(victim);
486 mutex_unlock(&victim->d_inode->i_mutex);
487 rc = 0;
488 out:
489 dput(victim);
490 sysfs_put(victim_sd);
491 return rc;
529} 492}
530 493
531 494
@@ -538,30 +501,34 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
538 */ 501 */
539int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) 502int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
540{ 503{
541 struct dentry *dir = kobj->dentry; 504 struct sysfs_dirent *victim_sd = NULL;
542 struct dentry *victim; 505 struct dentry *victim = NULL;
543 struct inode * inode; 506 struct inode * inode;
544 struct iattr newattrs; 507 struct iattr newattrs;
545 int res = -ENOENT; 508 int rc;
546 509
547 mutex_lock(&dir->d_inode->i_mutex); 510 rc = -ENOENT;
548 victim = lookup_one_len(attr->name, dir, strlen(attr->name)); 511 victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
549 if (!IS_ERR(victim)) { 512 if (!victim_sd)
550 if (victim->d_inode && 513 goto out;
551 (victim->d_parent->d_inode == dir->d_inode)) { 514
552 inode = victim->d_inode; 515 victim = sysfs_get_dentry(victim_sd);
553 mutex_lock(&inode->i_mutex); 516 if (IS_ERR(victim)) {
554 newattrs.ia_mode = (mode & S_IALLUGO) | 517 rc = PTR_ERR(victim);
555 (inode->i_mode & ~S_IALLUGO); 518 victim = NULL;
556 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 519 goto out;
557 res = notify_change(victim, &newattrs);
558 mutex_unlock(&inode->i_mutex);
559 }
560 dput(victim);
561 } 520 }
562 mutex_unlock(&dir->d_inode->i_mutex);
563 521
564 return res; 522 inode = victim->d_inode;
523 mutex_lock(&inode->i_mutex);
524 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
525 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
526 rc = notify_change(victim, &newattrs);
527 mutex_unlock(&inode->i_mutex);
528 out:
529 dput(victim);
530 sysfs_put(victim_sd);
531 return rc;
565} 532}
566EXPORT_SYMBOL_GPL(sysfs_chmod_file); 533EXPORT_SYMBOL_GPL(sysfs_chmod_file);
567 534
@@ -576,7 +543,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
576 543
577void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) 544void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
578{ 545{
579 sysfs_hash_and_remove(kobj->dentry, attr->name); 546 sysfs_hash_and_remove(kobj->sd, attr->name);
580} 547}
581 548
582 549
@@ -589,12 +556,12 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
589void sysfs_remove_file_from_group(struct kobject *kobj, 556void sysfs_remove_file_from_group(struct kobject *kobj,
590 const struct attribute *attr, const char *group) 557 const struct attribute *attr, const char *group)
591{ 558{
592 struct dentry *dir; 559 struct sysfs_dirent *dir_sd;
593 560
594 dir = lookup_one_len(group, kobj->dentry, strlen(group)); 561 dir_sd = sysfs_get_dirent(kobj->sd, group);
595 if (!IS_ERR(dir)) { 562 if (dir_sd) {
596 sysfs_hash_and_remove(dir, attr->name); 563 sysfs_hash_and_remove(dir_sd, attr->name);
597 dput(dir); 564 sysfs_put(dir_sd);
598 } 565 }
599} 566}
600EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); 567EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 52eed2a7a5ef..f318b73c790c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -18,26 +18,25 @@
18#include "sysfs.h" 18#include "sysfs.h"
19 19
20 20
21static void remove_files(struct dentry * dir, 21static void remove_files(struct sysfs_dirent *dir_sd,
22 const struct attribute_group * grp) 22 const struct attribute_group *grp)
23{ 23{
24 struct attribute *const* attr; 24 struct attribute *const* attr;
25 25
26 for (attr = grp->attrs; *attr; attr++) 26 for (attr = grp->attrs; *attr; attr++)
27 sysfs_hash_and_remove(dir,(*attr)->name); 27 sysfs_hash_and_remove(dir_sd, (*attr)->name);
28} 28}
29 29
30static int create_files(struct dentry * dir, 30static int create_files(struct sysfs_dirent *dir_sd,
31 const struct attribute_group * grp) 31 const struct attribute_group *grp)
32{ 32{
33 struct attribute *const* attr; 33 struct attribute *const* attr;
34 int error = 0; 34 int error = 0;
35 35
36 for (attr = grp->attrs; *attr && !error; attr++) { 36 for (attr = grp->attrs; *attr && !error; attr++)
37 error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR); 37 error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
38 }
39 if (error) 38 if (error)
40 remove_files(dir,grp); 39 remove_files(dir_sd, grp);
41 return error; 40 return error;
42} 41}
43 42
@@ -45,44 +44,44 @@ static int create_files(struct dentry * dir,
45int sysfs_create_group(struct kobject * kobj, 44int sysfs_create_group(struct kobject * kobj,
46 const struct attribute_group * grp) 45 const struct attribute_group * grp)
47{ 46{
48 struct dentry * dir; 47 struct sysfs_dirent *sd;
49 int error; 48 int error;
50 49
51 BUG_ON(!kobj || !kobj->dentry); 50 BUG_ON(!kobj || !kobj->sd);
52 51
53 if (grp->name) { 52 if (grp->name) {
54 error = sysfs_create_subdir(kobj,grp->name,&dir); 53 error = sysfs_create_subdir(kobj, grp->name, &sd);
55 if (error) 54 if (error)
56 return error; 55 return error;
57 } else 56 } else
58 dir = kobj->dentry; 57 sd = kobj->sd;
59 dir = dget(dir); 58 sysfs_get(sd);
60 if ((error = create_files(dir,grp))) { 59 error = create_files(sd, grp);
60 if (error) {
61 if (grp->name) 61 if (grp->name)
62 sysfs_remove_subdir(dir); 62 sysfs_remove_subdir(sd);
63 } 63 }
64 dput(dir); 64 sysfs_put(sd);
65 return error; 65 return error;
66} 66}
67 67
68void sysfs_remove_group(struct kobject * kobj, 68void sysfs_remove_group(struct kobject * kobj,
69 const struct attribute_group * grp) 69 const struct attribute_group * grp)
70{ 70{
71 struct dentry * dir; 71 struct sysfs_dirent *dir_sd = kobj->sd;
72 struct sysfs_dirent *sd;
72 73
73 if (grp->name) { 74 if (grp->name) {
74 dir = lookup_one_len_kern(grp->name, kobj->dentry, 75 sd = sysfs_get_dirent(dir_sd, grp->name);
75 strlen(grp->name)); 76 BUG_ON(!sd);
76 BUG_ON(IS_ERR(dir)); 77 } else
77 } 78 sd = sysfs_get(dir_sd);
78 else
79 dir = dget(kobj->dentry);
80 79
81 remove_files(dir,grp); 80 remove_files(sd, grp);
82 if (grp->name) 81 if (grp->name)
83 sysfs_remove_subdir(dir); 82 sysfs_remove_subdir(sd);
84 /* release the ref. taken in this routine */ 83
85 dput(dir); 84 sysfs_put(sd);
86} 85}
87 86
88 87
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index bdd30e74de6b..3756e152285a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -133,170 +133,94 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
133 */ 133 */
134static struct lock_class_key sysfs_inode_imutex_key; 134static struct lock_class_key sysfs_inode_imutex_key;
135 135
136struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) 136void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
137{ 137{
138 struct inode * inode = new_inode(sysfs_sb); 138 inode->i_blocks = 0;
139 if (inode) { 139 inode->i_mapping->a_ops = &sysfs_aops;
140 inode->i_blocks = 0; 140 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
141 inode->i_mapping->a_ops = &sysfs_aops; 141 inode->i_op = &sysfs_inode_operations;
142 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 142 inode->i_ino = sd->s_ino;
143 inode->i_op = &sysfs_inode_operations; 143 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
144 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); 144
145 145 if (sd->s_iattr) {
146 if (sd->s_iattr) { 146 /* sysfs_dirent has non-default attributes
147 /* sysfs_dirent has non-default attributes 147 * get them for the new inode from persistent copy
148 * get them for the new inode from persistent copy 148 * in sysfs_dirent
149 * in sysfs_dirent 149 */
150 */ 150 set_inode_attr(inode, sd->s_iattr);
151 set_inode_attr(inode, sd->s_iattr);
152 } else
153 set_default_inode_attr(inode, mode);
154 }
155 return inode;
156}
157
158int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
159{
160 int error = 0;
161 struct inode * inode = NULL;
162 if (dentry) {
163 if (!dentry->d_inode) {
164 struct sysfs_dirent * sd = dentry->d_fsdata;
165 if ((inode = sysfs_new_inode(mode, sd))) {
166 if (dentry->d_parent && dentry->d_parent->d_inode) {
167 struct inode *p_inode = dentry->d_parent->d_inode;
168 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
169 }
170 goto Proceed;
171 }
172 else
173 error = -ENOMEM;
174 } else
175 error = -EEXIST;
176 } else
177 error = -ENOENT;
178 goto Done;
179
180 Proceed:
181 if (init)
182 error = init(inode);
183 if (!error) {
184 d_instantiate(dentry, inode);
185 if (S_ISDIR(mode))
186 dget(dentry); /* pin only directory dentry in core */
187 } else 151 } else
188 iput(inode); 152 set_default_inode_attr(inode, sd->s_mode);
189 Done:
190 return error;
191} 153}
192 154
193/* 155/**
194 * Get the name for corresponding element represented by the given sysfs_dirent 156 * sysfs_get_inode - get inode for sysfs_dirent
157 * @sd: sysfs_dirent to allocate inode for
158 *
159 * Get inode for @sd. If such inode doesn't exist, a new inode
160 * is allocated and basics are initialized. New inode is
161 * returned locked.
162 *
163 * LOCKING:
164 * Kernel thread context (may sleep).
165 *
166 * RETURNS:
167 * Pointer to allocated inode on success, NULL on failure.
195 */ 168 */
196const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) 169struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
197{ 170{
198 struct attribute * attr; 171 struct inode *inode;
199 struct bin_attribute * bin_attr;
200 struct sysfs_symlink * sl;
201
202 BUG_ON(!sd || !sd->s_element);
203
204 switch (sd->s_type) {
205 case SYSFS_DIR:
206 /* Always have a dentry so use that */
207 return sd->s_dentry->d_name.name;
208
209 case SYSFS_KOBJ_ATTR:
210 attr = sd->s_element;
211 return attr->name;
212 172
213 case SYSFS_KOBJ_BIN_ATTR: 173 inode = iget_locked(sysfs_sb, sd->s_ino);
214 bin_attr = sd->s_element; 174 if (inode && (inode->i_state & I_NEW))
215 return bin_attr->attr.name; 175 sysfs_init_inode(sd, inode);
216 176
217 case SYSFS_KOBJ_LINK: 177 return inode;
218 sl = sd->s_element;
219 return sl->link_name;
220 }
221 return NULL;
222}
223
224static inline void orphan_all_buffers(struct inode *node)
225{
226 struct sysfs_buffer_collection *set;
227 struct sysfs_buffer *buf;
228
229 mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
230 set = node->i_private;
231 if (set) {
232 list_for_each_entry(buf, &set->associates, associates) {
233 down(&buf->sem);
234 buf->orphaned = 1;
235 up(&buf->sem);
236 }
237 }
238 mutex_unlock(&node->i_mutex);
239} 178}
240 179
241 180/**
242/* 181 * sysfs_instantiate - instantiate dentry
243 * Unhashes the dentry corresponding to given sysfs_dirent 182 * @dentry: dentry to be instantiated
244 * Called with parent inode's i_mutex held. 183 * @inode: inode associated with @sd
184 *
185 * Unlock @inode if locked and instantiate @dentry with @inode.
186 *
187 * LOCKING:
188 * None.
245 */ 189 */
246void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) 190void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
247{ 191{
248 struct dentry * dentry = sd->s_dentry; 192 BUG_ON(!dentry || dentry->d_inode);
249 struct inode *inode;
250 193
251 if (dentry) { 194 if (inode->i_state & I_NEW)
252 spin_lock(&dcache_lock); 195 unlock_new_inode(inode);
253 spin_lock(&dentry->d_lock); 196
254 if (!(d_unhashed(dentry) && dentry->d_inode)) { 197 d_instantiate(dentry, inode);
255 inode = dentry->d_inode;
256 spin_lock(&inode->i_lock);
257 __iget(inode);
258 spin_unlock(&inode->i_lock);
259 dget_locked(dentry);
260 __d_drop(dentry);
261 spin_unlock(&dentry->d_lock);
262 spin_unlock(&dcache_lock);
263 simple_unlink(parent->d_inode, dentry);
264 orphan_all_buffers(inode);
265 iput(inode);
266 } else {
267 spin_unlock(&dentry->d_lock);
268 spin_unlock(&dcache_lock);
269 }
270 }
271} 198}
272 199
273int sysfs_hash_and_remove(struct dentry * dir, const char * name) 200int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
274{ 201{
275 struct sysfs_dirent * sd; 202 struct sysfs_addrm_cxt acxt;
276 struct sysfs_dirent * parent_sd; 203 struct sysfs_dirent **pos, *sd;
277 int found = 0;
278 204
279 if (!dir) 205 if (!dir_sd)
280 return -ENOENT; 206 return -ENOENT;
281 207
282 if (dir->d_inode == NULL) 208 sysfs_addrm_start(&acxt, dir_sd);
283 /* no inode means this hasn't been made visible yet */ 209
284 return -ENOENT; 210 for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
211 sd = *pos;
285 212
286 parent_sd = dir->d_fsdata; 213 if (!sysfs_type(sd))
287 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
288 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
289 if (!sd->s_element)
290 continue; 214 continue;
291 if (!strcmp(sysfs_get_name(sd), name)) { 215 if (!strcmp(sd->s_name, name)) {
292 list_del_init(&sd->s_sibling); 216 *pos = sd->s_sibling;
293 sysfs_drop_dentry(sd, dir); 217 sd->s_sibling = NULL;
294 sysfs_put(sd); 218 sysfs_remove_one(&acxt, sd);
295 found = 1;
296 break; 219 break;
297 } 220 }
298 } 221 }
299 mutex_unlock(&dir->d_inode->i_mutex);
300 222
301 return found ? 0 : -ENOENT; 223 if (sysfs_addrm_finish(&acxt))
224 return 0;
225 return -ENOENT;
302} 226}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 23a48a38e6af..402cc356203c 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,27 +19,18 @@ struct vfsmount *sysfs_mount;
19struct super_block * sysfs_sb = NULL; 19struct super_block * sysfs_sb = NULL;
20struct kmem_cache *sysfs_dir_cachep; 20struct kmem_cache *sysfs_dir_cachep;
21 21
22static void sysfs_clear_inode(struct inode *inode);
23
24static const struct super_operations sysfs_ops = { 22static const struct super_operations sysfs_ops = {
25 .statfs = simple_statfs, 23 .statfs = simple_statfs,
26 .drop_inode = sysfs_delete_inode, 24 .drop_inode = sysfs_delete_inode,
27 .clear_inode = sysfs_clear_inode,
28}; 25};
29 26
30static struct sysfs_dirent sysfs_root = { 27struct sysfs_dirent sysfs_root = {
31 .s_sibling = LIST_HEAD_INIT(sysfs_root.s_sibling), 28 .s_count = ATOMIC_INIT(1),
32 .s_children = LIST_HEAD_INIT(sysfs_root.s_children), 29 .s_flags = SYSFS_ROOT,
33 .s_element = NULL, 30 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
34 .s_type = SYSFS_ROOT, 31 .s_ino = 1,
35 .s_iattr = NULL,
36}; 32};
37 33
38static void sysfs_clear_inode(struct inode *inode)
39{
40 kfree(inode->i_private);
41}
42
43static int sysfs_fill_super(struct super_block *sb, void *data, int silent) 34static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
44{ 35{
45 struct inode *inode; 36 struct inode *inode;
@@ -52,24 +43,26 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
52 sb->s_time_gran = 1; 43 sb->s_time_gran = 1;
53 sysfs_sb = sb; 44 sysfs_sb = sb;
54 45
55 inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 46 inode = new_inode(sysfs_sb);
56 &sysfs_root); 47 if (!inode) {
57 if (inode) {
58 inode->i_op = &sysfs_dir_inode_operations;
59 inode->i_fop = &sysfs_dir_operations;
60 /* directory inodes start off with i_nlink == 2 (for "." entry) */
61 inc_nlink(inode);
62 } else {
63 pr_debug("sysfs: could not get root inode\n"); 48 pr_debug("sysfs: could not get root inode\n");
64 return -ENOMEM; 49 return -ENOMEM;
65 } 50 }
66 51
52 sysfs_init_inode(&sysfs_root, inode);
53
54 inode->i_op = &sysfs_dir_inode_operations;
55 inode->i_fop = &sysfs_dir_operations;
56 /* directory inodes start off with i_nlink == 2 (for "." entry) */
57 inc_nlink(inode);
58
67 root = d_alloc_root(inode); 59 root = d_alloc_root(inode);
68 if (!root) { 60 if (!root) {
69 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 61 pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
70 iput(inode); 62 iput(inode);
71 return -ENOMEM; 63 return -ENOMEM;
72 } 64 }
65 sysfs_root.s_dentry = root;
73 root->d_fsdata = &sysfs_root; 66 root->d_fsdata = &sysfs_root;
74 sb->s_root = root; 67 sb->s_root = root;
75 return 0; 68 return 0;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 7b9c5bfde920..2f86e0422290 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,71 +11,39 @@
11 11
12#include "sysfs.h" 12#include "sysfs.h"
13 13
14static int object_depth(struct kobject * kobj) 14static int object_depth(struct sysfs_dirent *sd)
15{ 15{
16 struct kobject * p = kobj;
17 int depth = 0; 16 int depth = 0;
18 do { depth++; } while ((p = p->parent)); 17
18 for (; sd->s_parent; sd = sd->s_parent)
19 depth++;
20
19 return depth; 21 return depth;
20} 22}
21 23
22static int object_path_length(struct kobject * kobj) 24static int object_path_length(struct sysfs_dirent * sd)
23{ 25{
24 struct kobject * p = kobj;
25 int length = 1; 26 int length = 1;
26 do { 27
27 length += strlen(kobject_name(p)) + 1; 28 for (; sd->s_parent; sd = sd->s_parent)
28 p = p->parent; 29 length += strlen(sd->s_name) + 1;
29 } while (p); 30
30 return length; 31 return length;
31} 32}
32 33
33static void fill_object_path(struct kobject * kobj, char * buffer, int length) 34static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length)
34{ 35{
35 struct kobject * p;
36
37 --length; 36 --length;
38 for (p = kobj; p; p = p->parent) { 37 for (; sd->s_parent; sd = sd->s_parent) {
39 int cur = strlen(kobject_name(p)); 38 int cur = strlen(sd->s_name);
40 39
41 /* back up enough to print this bus id with '/' */ 40 /* back up enough to print this bus id with '/' */
42 length -= cur; 41 length -= cur;
43 strncpy(buffer + length,kobject_name(p),cur); 42 strncpy(buffer + length, sd->s_name, cur);
44 *(buffer + --length) = '/'; 43 *(buffer + --length) = '/';
45 } 44 }
46} 45}
47 46
48static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target)
49{
50 struct sysfs_dirent * parent_sd = parent->d_fsdata;
51 struct sysfs_symlink * sl;
52 int error = 0;
53
54 error = -ENOMEM;
55 sl = kmalloc(sizeof(*sl), GFP_KERNEL);
56 if (!sl)
57 goto exit1;
58
59 sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
60 if (!sl->link_name)
61 goto exit2;
62
63 strcpy(sl->link_name, name);
64 sl->target_kobj = kobject_get(target);
65
66 error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO,
67 SYSFS_KOBJ_LINK);
68 if (!error)
69 return 0;
70
71 kobject_put(target);
72 kfree(sl->link_name);
73exit2:
74 kfree(sl);
75exit1:
76 return error;
77}
78
79/** 47/**
80 * sysfs_create_link - create symlink between two objects. 48 * sysfs_create_link - create symlink between two objects.
81 * @kobj: object whose directory we're creating the link in. 49 * @kobj: object whose directory we're creating the link in.
@@ -84,24 +52,57 @@ exit1:
84 */ 52 */
85int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) 53int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
86{ 54{
87 struct dentry *dentry = NULL; 55 struct sysfs_dirent *parent_sd = NULL;
88 int error = -EEXIST; 56 struct sysfs_dirent *target_sd = NULL;
57 struct sysfs_dirent *sd = NULL;
58 struct sysfs_addrm_cxt acxt;
59 int error;
89 60
90 BUG_ON(!name); 61 BUG_ON(!name);
91 62
92 if (!kobj) { 63 if (!kobj) {
93 if (sysfs_mount && sysfs_mount->mnt_sb) 64 if (sysfs_mount && sysfs_mount->mnt_sb)
94 dentry = sysfs_mount->mnt_sb->s_root; 65 parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
95 } else 66 } else
96 dentry = kobj->dentry; 67 parent_sd = kobj->sd;
68
69 error = -EFAULT;
70 if (!parent_sd)
71 goto out_put;
72
73 /* target->sd can go away beneath us but is protected with
74 * sysfs_assoc_lock. Fetch target_sd from it.
75 */
76 spin_lock(&sysfs_assoc_lock);
77 if (target->sd)
78 target_sd = sysfs_get(target->sd);
79 spin_unlock(&sysfs_assoc_lock);
80
81 error = -ENOENT;
82 if (!target_sd)
83 goto out_put;
84
85 error = -ENOMEM;
86 sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
87 if (!sd)
88 goto out_put;
89 sd->s_elem.symlink.target_sd = target_sd;
97 90
98 if (!dentry) 91 sysfs_addrm_start(&acxt, parent_sd);
99 return -EFAULT;
100 92
101 mutex_lock(&dentry->d_inode->i_mutex); 93 if (!sysfs_find_dirent(parent_sd, name)) {
102 if (!sysfs_dirent_exist(dentry->d_fsdata, name)) 94 sysfs_add_one(&acxt, sd);
103 error = sysfs_add_link(dentry, name, target); 95 sysfs_link_sibling(sd);
104 mutex_unlock(&dentry->d_inode->i_mutex); 96 }
97
98 if (sysfs_addrm_finish(&acxt))
99 return 0;
100
101 error = -EEXIST;
102 /* fall through */
103 out_put:
104 sysfs_put(target_sd);
105 sysfs_put(sd);
105 return error; 106 return error;
106} 107}
107 108
@@ -114,17 +115,17 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
114 115
115void sysfs_remove_link(struct kobject * kobj, const char * name) 116void sysfs_remove_link(struct kobject * kobj, const char * name)
116{ 117{
117 sysfs_hash_and_remove(kobj->dentry,name); 118 sysfs_hash_and_remove(kobj->sd, name);
118} 119}
119 120
120static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target, 121static int sysfs_get_target_path(struct sysfs_dirent * parent_sd,
121 char *path) 122 struct sysfs_dirent * target_sd, char *path)
122{ 123{
123 char * s; 124 char * s;
124 int depth, size; 125 int depth, size;
125 126
126 depth = object_depth(kobj); 127 depth = object_depth(parent_sd);
127 size = object_path_length(target) + depth * 3 - 1; 128 size = object_path_length(target_sd) + depth * 3 - 1;
128 if (size > PATH_MAX) 129 if (size > PATH_MAX)
129 return -ENAMETOOLONG; 130 return -ENAMETOOLONG;
130 131
@@ -133,7 +134,7 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
133 for (s = path; depth--; s += 3) 134 for (s = path; depth--; s += 3)
134 strcpy(s,"../"); 135 strcpy(s,"../");
135 136
136 fill_object_path(target, path, size); 137 fill_object_path(target_sd, path, size);
137 pr_debug("%s: path = '%s'\n", __FUNCTION__, path); 138 pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
138 139
139 return 0; 140 return 0;
@@ -141,27 +142,16 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
141 142
142static int sysfs_getlink(struct dentry *dentry, char * path) 143static int sysfs_getlink(struct dentry *dentry, char * path)
143{ 144{
144 struct kobject *kobj, *target_kobj; 145 struct sysfs_dirent *sd = dentry->d_fsdata;
145 int error = 0; 146 struct sysfs_dirent *parent_sd = sd->s_parent;
147 struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd;
148 int error;
146 149
147 kobj = sysfs_get_kobject(dentry->d_parent); 150 mutex_lock(&sysfs_mutex);
148 if (!kobj) 151 error = sysfs_get_target_path(parent_sd, target_sd, path);
149 return -EINVAL; 152 mutex_unlock(&sysfs_mutex);
150 153
151 target_kobj = sysfs_get_kobject(dentry);
152 if (!target_kobj) {
153 kobject_put(kobj);
154 return -EINVAL;
155 }
156
157 down_read(&sysfs_rename_sem);
158 error = sysfs_get_target_path(kobj, target_kobj, path);
159 up_read(&sysfs_rename_sem);
160
161 kobject_put(kobj);
162 kobject_put(target_kobj);
163 return error; 154 return error;
164
165} 155}
166 156
167static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) 157static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index a77c57e5a6d5..6a37f2386a8d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,38 +1,101 @@
1struct sysfs_elem_dir {
2 struct kobject * kobj;
3};
4
5struct sysfs_elem_symlink {
6 struct sysfs_dirent * target_sd;
7};
8
9struct sysfs_elem_attr {
10 struct attribute * attr;
11};
12
13struct sysfs_elem_bin_attr {
14 struct bin_attribute * bin_attr;
15};
16
17/*
18 * As long as s_count reference is held, the sysfs_dirent itself is
19 * accessible. Dereferencing s_elem or any other outer entity
20 * requires s_active reference.
21 */
1struct sysfs_dirent { 22struct sysfs_dirent {
2 atomic_t s_count; 23 atomic_t s_count;
3 struct list_head s_sibling; 24 atomic_t s_active;
4 struct list_head s_children; 25 struct sysfs_dirent * s_parent;
5 void * s_element; 26 struct sysfs_dirent * s_sibling;
6 int s_type; 27 struct sysfs_dirent * s_children;
28 const char * s_name;
29
30 union {
31 struct sysfs_elem_dir dir;
32 struct sysfs_elem_symlink symlink;
33 struct sysfs_elem_attr attr;
34 struct sysfs_elem_bin_attr bin_attr;
35 } s_elem;
36
37 unsigned int s_flags;
7 umode_t s_mode; 38 umode_t s_mode;
39 ino_t s_ino;
8 struct dentry * s_dentry; 40 struct dentry * s_dentry;
9 struct iattr * s_iattr; 41 struct iattr * s_iattr;
10 atomic_t s_event; 42 atomic_t s_event;
11}; 43};
12 44
45#define SD_DEACTIVATED_BIAS INT_MIN
46
47struct sysfs_addrm_cxt {
48 struct sysfs_dirent *parent_sd;
49 struct inode *parent_inode;
50 struct sysfs_dirent *removed;
51 int cnt;
52};
53
13extern struct vfsmount * sysfs_mount; 54extern struct vfsmount * sysfs_mount;
55extern struct sysfs_dirent sysfs_root;
14extern struct kmem_cache *sysfs_dir_cachep; 56extern struct kmem_cache *sysfs_dir_cachep;
15 57
16extern void sysfs_delete_inode(struct inode *inode); 58extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
17extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); 59extern void sysfs_link_sibling(struct sysfs_dirent *sd);
18extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); 60extern void sysfs_unlink_sibling(struct sysfs_dirent *sd);
61extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
62extern void sysfs_put_active(struct sysfs_dirent *sd);
63extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
64extern void sysfs_put_active_two(struct sysfs_dirent *sd);
65extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
66 struct sysfs_dirent *parent_sd);
67extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt,
68 struct sysfs_dirent *sd);
69extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
70 struct sysfs_dirent *sd);
71extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
19 72
20extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *); 73extern void sysfs_delete_inode(struct inode *inode);
21extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, 74extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode);
22 umode_t, int); 75extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
23 76extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
24extern int sysfs_add_file(struct dentry *, const struct attribute *, int); 77
25extern int sysfs_hash_and_remove(struct dentry * dir, const char * name); 78extern void release_sysfs_dirent(struct sysfs_dirent * sd);
79extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
80 const unsigned char *name);
81extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
82 const unsigned char *name);
83extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode,
84 int type);
85
86extern int sysfs_add_file(struct sysfs_dirent *dir_sd,
87 const struct attribute *attr, int type);
88extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
26extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); 89extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
27 90
28extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); 91extern int sysfs_create_subdir(struct kobject *kobj, const char *name,
29extern void sysfs_remove_subdir(struct dentry *); 92 struct sysfs_dirent **p_sd);
93extern void sysfs_remove_subdir(struct sysfs_dirent *sd);
30 94
31extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd);
32extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent);
33extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 95extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
34 96
35extern struct rw_semaphore sysfs_rename_sem; 97extern spinlock_t sysfs_assoc_lock;
98extern struct mutex sysfs_mutex;
36extern struct super_block * sysfs_sb; 99extern struct super_block * sysfs_sb;
37extern const struct file_operations sysfs_dir_operations; 100extern const struct file_operations sysfs_dir_operations;
38extern const struct file_operations sysfs_file_operations; 101extern const struct file_operations sysfs_file_operations;
@@ -40,73 +103,9 @@ extern const struct file_operations bin_fops;
40extern const struct inode_operations sysfs_dir_inode_operations; 103extern const struct inode_operations sysfs_dir_inode_operations;
41extern const struct inode_operations sysfs_symlink_inode_operations; 104extern const struct inode_operations sysfs_symlink_inode_operations;
42 105
43struct sysfs_symlink { 106static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
44 char * link_name;
45 struct kobject * target_kobj;
46};
47
48struct sysfs_buffer {
49 struct list_head associates;
50 size_t count;
51 loff_t pos;
52 char * page;
53 struct sysfs_ops * ops;
54 struct semaphore sem;
55 int orphaned;
56 int needs_read_fill;
57 int event;
58};
59
60struct sysfs_buffer_collection {
61 struct list_head associates;
62};
63
64static inline struct kobject * to_kobj(struct dentry * dentry)
65{
66 struct sysfs_dirent * sd = dentry->d_fsdata;
67 return ((struct kobject *) sd->s_element);
68}
69
70static inline struct attribute * to_attr(struct dentry * dentry)
71{ 107{
72 struct sysfs_dirent * sd = dentry->d_fsdata; 108 return sd->s_flags & SYSFS_TYPE_MASK;
73 return ((struct attribute *) sd->s_element);
74}
75
76static inline struct bin_attribute * to_bin_attr(struct dentry * dentry)
77{
78 struct sysfs_dirent * sd = dentry->d_fsdata;
79 return ((struct bin_attribute *) sd->s_element);
80}
81
82static inline struct kobject *sysfs_get_kobject(struct dentry *dentry)
83{
84 struct kobject * kobj = NULL;
85
86 spin_lock(&dcache_lock);
87 if (!d_unhashed(dentry)) {
88 struct sysfs_dirent * sd = dentry->d_fsdata;
89 if (sd->s_type & SYSFS_KOBJ_LINK) {
90 struct sysfs_symlink * sl = sd->s_element;
91 kobj = kobject_get(sl->target_kobj);
92 } else
93 kobj = kobject_get(sd->s_element);
94 }
95 spin_unlock(&dcache_lock);
96
97 return kobj;
98}
99
100static inline void release_sysfs_dirent(struct sysfs_dirent * sd)
101{
102 if (sd->s_type & SYSFS_KOBJ_LINK) {
103 struct sysfs_symlink * sl = sd->s_element;
104 kfree(sl->link_name);
105 kobject_put(sl->target_kobj);
106 kfree(sl);
107 }
108 kfree(sd->s_iattr);
109 kmem_cache_free(sysfs_dir_cachep, sd);
110} 109}
111 110
112static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd) 111static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
@@ -120,7 +119,7 @@ static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
120 119
121static inline void sysfs_put(struct sysfs_dirent * sd) 120static inline void sysfs_put(struct sysfs_dirent * sd)
122{ 121{
123 if (atomic_dec_and_test(&sd->s_count)) 122 if (sd && atomic_dec_and_test(&sd->s_count))
124 release_sysfs_dirent(sd); 123 release_sysfs_dirent(sd);
125} 124}
126 125
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 0732ddb9020b..589be21d884e 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -27,7 +27,7 @@ const struct file_operations sysv_file_operations = {
27 .aio_write = generic_file_aio_write, 27 .aio_write = generic_file_aio_write,
28 .mmap = generic_file_mmap, 28 .mmap = generic_file_mmap,
29 .fsync = sysv_sync_file, 29 .fsync = sysv_sync_file,
30 .sendfile = generic_file_sendfile, 30 .splice_read = generic_file_splice_read,
31}; 31};
32 32
33const struct inode_operations sysv_file_inode_operations = { 33const struct inode_operations sysv_file_inode_operations = {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 51b5764685e7..df070bee8d4f 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -261,7 +261,7 @@ const struct file_operations udf_file_operations = {
261 .aio_write = udf_file_aio_write, 261 .aio_write = udf_file_aio_write,
262 .release = udf_release_file, 262 .release = udf_release_file,
263 .fsync = udf_fsync_file, 263 .fsync = udf_fsync_file,
264 .sendfile = generic_file_sendfile, 264 .splice_read = generic_file_splice_read,
265}; 265};
266 266
267const struct inode_operations udf_file_inode_operations = { 267const struct inode_operations udf_file_inode_operations = {
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index c8461551e108..bf7de0bdbab3 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -100,14 +100,23 @@ no_delete:
100 clear_inode(inode); 100 clear_inode(inode);
101} 101}
102 102
103/*
104 * If we are going to release inode from memory, we discard preallocation and
105 * truncate last inode extent to proper length. We could use drop_inode() but
106 * it's called under inode_lock and thus we cannot mark inode dirty there. We
107 * use clear_inode() but we have to make sure to write inode as it's not written
108 * automatically.
109 */
103void udf_clear_inode(struct inode *inode) 110void udf_clear_inode(struct inode *inode)
104{ 111{
105 if (!(inode->i_sb->s_flags & MS_RDONLY)) { 112 if (!(inode->i_sb->s_flags & MS_RDONLY)) {
106 lock_kernel(); 113 lock_kernel();
114 /* Discard preallocation for directories, symlinks, etc. */
107 udf_discard_prealloc(inode); 115 udf_discard_prealloc(inode);
116 udf_truncate_tail_extent(inode);
108 unlock_kernel(); 117 unlock_kernel();
118 write_inode_now(inode, 1);
109 } 119 }
110
111 kfree(UDF_I_DATA(inode)); 120 kfree(UDF_I_DATA(inode));
112 UDF_I_DATA(inode) = NULL; 121 UDF_I_DATA(inode) = NULL;
113} 122}
@@ -460,8 +469,8 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block,
460 kernel_long_ad laarr[EXTENT_MERGE_SIZE]; 469 kernel_long_ad laarr[EXTENT_MERGE_SIZE];
461 struct extent_position prev_epos, cur_epos, next_epos; 470 struct extent_position prev_epos, cur_epos, next_epos;
462 int count = 0, startnum = 0, endnum = 0; 471 int count = 0, startnum = 0, endnum = 0;
463 uint32_t elen = 0; 472 uint32_t elen = 0, tmpelen;
464 kernel_lb_addr eloc; 473 kernel_lb_addr eloc, tmpeloc;
465 int c = 1; 474 int c = 1;
466 loff_t lbcount = 0, b_off = 0; 475 loff_t lbcount = 0, b_off = 0;
467 uint32_t newblocknum, newblock; 476 uint32_t newblocknum, newblock;
@@ -520,8 +529,12 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block,
520 529
521 b_off -= lbcount; 530 b_off -= lbcount;
522 offset = b_off >> inode->i_sb->s_blocksize_bits; 531 offset = b_off >> inode->i_sb->s_blocksize_bits;
523 /* Move into indirect extent if we are at a pointer to it */ 532 /*
524 udf_next_aext(inode, &prev_epos, &eloc, &elen, 0); 533 * Move prev_epos and cur_epos into indirect extent if we are at
534 * the pointer to it
535 */
536 udf_next_aext(inode, &prev_epos, &tmpeloc, &tmpelen, 0);
537 udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, 0);
525 538
526 /* if the extent is allocated and recorded, return the block 539 /* if the extent is allocated and recorded, return the block
527 if the extent is not a multiple of the blocksize, round up */ 540 if the extent is not a multiple of the blocksize, round up */
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 3a743d854c17..6658afb41cc7 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1351,7 +1351,7 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
1351 1351
1352 for (i=0; i<UDF_SB_NUMPARTS(sb); i++) 1352 for (i=0; i<UDF_SB_NUMPARTS(sb); i++)
1353 { 1353 {
1354 switch UDF_SB_PARTTYPE(sb, i) 1354 switch (UDF_SB_PARTTYPE(sb, i))
1355 { 1355 {
1356 case UDF_VIRTUAL_MAP15: 1356 case UDF_VIRTUAL_MAP15:
1357 case UDF_VIRTUAL_MAP20: 1357 case UDF_VIRTUAL_MAP20:
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 77975ae291a5..60d277644248 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -61,7 +61,11 @@ static void extent_trunc(struct inode * inode, struct extent_position *epos,
61 } 61 }
62} 62}
63 63
64void udf_discard_prealloc(struct inode * inode) 64/*
65 * Truncate the last extent to match i_size. This function assumes
66 * that preallocation extent is already truncated.
67 */
68void udf_truncate_tail_extent(struct inode *inode)
65{ 69{
66 struct extent_position epos = { NULL, 0, {0, 0}}; 70 struct extent_position epos = { NULL, 0, {0, 0}};
67 kernel_lb_addr eloc; 71 kernel_lb_addr eloc;
@@ -71,7 +75,10 @@ void udf_discard_prealloc(struct inode * inode)
71 int adsize; 75 int adsize;
72 76
73 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB || 77 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB ||
74 inode->i_size == UDF_I_LENEXTENTS(inode)) 78 inode->i_size == UDF_I_LENEXTENTS(inode))
79 return;
80 /* Are we going to delete the file anyway? */
81 if (inode->i_nlink == 0)
75 return; 82 return;
76 83
77 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) 84 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
@@ -79,36 +86,76 @@ void udf_discard_prealloc(struct inode * inode)
79 else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) 86 else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG)
80 adsize = sizeof(long_ad); 87 adsize = sizeof(long_ad);
81 else 88 else
82 adsize = 0; 89 BUG();
83
84 epos.block = UDF_I_LOCATION(inode);
85 90
86 /* Find the last extent in the file */ 91 /* Find the last extent in the file */
87 while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) 92 while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1)
88 { 93 {
89 etype = netype; 94 etype = netype;
90 lbcount += elen; 95 lbcount += elen;
91 if (lbcount > inode->i_size && lbcount - elen < inode->i_size) 96 if (lbcount > inode->i_size) {
92 { 97 if (lbcount - inode->i_size >= inode->i_sb->s_blocksize)
93 WARN_ON(lbcount - inode->i_size >= inode->i_sb->s_blocksize); 98 printk(KERN_WARNING
99 "udf_truncate_tail_extent(): Too long "
100 "extent after EOF in inode %u: i_size: "
101 "%Ld lbcount: %Ld extent %u+%u\n",
102 (unsigned)inode->i_ino,
103 (long long)inode->i_size,
104 (long long)lbcount,
105 (unsigned)eloc.logicalBlockNum,
106 (unsigned)elen);
94 nelen = elen - (lbcount - inode->i_size); 107 nelen = elen - (lbcount - inode->i_size);
95 epos.offset -= adsize; 108 epos.offset -= adsize;
96 extent_trunc(inode, &epos, eloc, etype, elen, nelen); 109 extent_trunc(inode, &epos, eloc, etype, elen, nelen);
97 epos.offset += adsize; 110 epos.offset += adsize;
98 lbcount = inode->i_size; 111 if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
112 printk(KERN_ERR "udf_truncate_tail_extent(): "
113 "Extent after EOF in inode %u.\n",
114 (unsigned)inode->i_ino);
115 break;
99 } 116 }
100 } 117 }
118 /* This inode entry is in-memory only and thus we don't have to mark
119 * the inode dirty */
120 UDF_I_LENEXTENTS(inode) = inode->i_size;
121 brelse(epos.bh);
122}
123
124void udf_discard_prealloc(struct inode *inode)
125{
126 struct extent_position epos = { NULL, 0, {0, 0}};
127 kernel_lb_addr eloc;
128 uint32_t elen;
129 uint64_t lbcount = 0;
130 int8_t etype = -1, netype;
131 int adsize;
132
133 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB ||
134 inode->i_size == UDF_I_LENEXTENTS(inode))
135 return;
136
137 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
138 adsize = sizeof(short_ad);
139 else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG)
140 adsize = sizeof(long_ad);
141 else
142 adsize = 0;
143
144 epos.block = UDF_I_LOCATION(inode);
145
146 /* Find the last extent in the file */
147 while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
148 etype = netype;
149 lbcount += elen;
150 }
101 if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { 151 if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
102 epos.offset -= adsize; 152 epos.offset -= adsize;
103 lbcount -= elen; 153 lbcount -= elen;
104 extent_trunc(inode, &epos, eloc, etype, elen, 0); 154 extent_trunc(inode, &epos, eloc, etype, elen, 0);
105 if (!epos.bh) 155 if (!epos.bh) {
106 {
107 UDF_I_LENALLOC(inode) = epos.offset - udf_file_entry_alloc_offset(inode); 156 UDF_I_LENALLOC(inode) = epos.offset - udf_file_entry_alloc_offset(inode);
108 mark_inode_dirty(inode); 157 mark_inode_dirty(inode);
109 } 158 } else {
110 else
111 {
112 struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data); 159 struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data);
113 aed->lengthAllocDescs = cpu_to_le32(epos.offset - sizeof(struct allocExtDesc)); 160 aed->lengthAllocDescs = cpu_to_le32(epos.offset - sizeof(struct allocExtDesc));
114 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) 161 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
@@ -118,9 +165,9 @@ void udf_discard_prealloc(struct inode * inode)
118 mark_buffer_dirty_inode(epos.bh, inode); 165 mark_buffer_dirty_inode(epos.bh, inode);
119 } 166 }
120 } 167 }
168 /* This inode entry is in-memory only and thus we don't have to mark
169 * the inode dirty */
121 UDF_I_LENEXTENTS(inode) = lbcount; 170 UDF_I_LENEXTENTS(inode) = lbcount;
122
123 WARN_ON(lbcount != inode->i_size);
124 brelse(epos.bh); 171 brelse(epos.bh);
125} 172}
126 173
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 67ded289497c..f581f2f69c0f 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -146,6 +146,7 @@ extern void udf_free_inode(struct inode *);
146extern struct inode * udf_new_inode (struct inode *, int, int *); 146extern struct inode * udf_new_inode (struct inode *, int, int *);
147 147
148/* truncate.c */ 148/* truncate.c */
149extern void udf_truncate_tail_extent(struct inode *);
149extern void udf_discard_prealloc(struct inode *); 150extern void udf_discard_prealloc(struct inode *);
150extern void udf_truncate_extents(struct inode *); 151extern void udf_truncate_extents(struct inode *);
151 152
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 1e096323bad4..6705d74c6d2d 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -60,5 +60,5 @@ const struct file_operations ufs_file_operations = {
60 .mmap = generic_file_mmap, 60 .mmap = generic_file_mmap,
61 .open = generic_file_open, 61 .open = generic_file_open,
62 .fsync = ufs_sync_file, 62 .fsync = ufs_sync_file,
63 .sendfile = generic_file_sendfile, 63 .splice_read = generic_file_splice_read,
64}; 64};
diff --git a/fs/utimes.c b/fs/utimes.c
index 480f7c8c29da..b3c88952465f 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -106,9 +106,16 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
106 if (IS_IMMUTABLE(inode)) 106 if (IS_IMMUTABLE(inode))
107 goto dput_and_out; 107 goto dput_and_out;
108 108
109 if (current->fsuid != inode->i_uid && 109 if (current->fsuid != inode->i_uid) {
110 (error = vfs_permission(&nd, MAY_WRITE)) != 0) 110 if (f) {
111 goto dput_and_out; 111 if (!(f->f_mode & FMODE_WRITE))
112 goto dput_and_out;
113 } else {
114 error = vfs_permission(&nd, MAY_WRITE);
115 if (error)
116 goto dput_and_out;
117 }
118 }
112 } 119 }
113 mutex_lock(&inode->i_mutex); 120 mutex_lock(&inode->i_mutex);
114 error = notify_change(dentry, &newattrs); 121 error = notify_change(dentry, &newattrs);
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index b49989bb89ad..e7a9a83f0087 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -64,6 +64,7 @@ xfs-y += xfs_alloc.o \
64 xfs_dir2_sf.o \ 64 xfs_dir2_sf.o \
65 xfs_error.o \ 65 xfs_error.o \
66 xfs_extfree_item.o \ 66 xfs_extfree_item.o \
67 xfs_filestream.o \
67 xfs_fsops.o \ 68 xfs_fsops.o \
68 xfs_ialloc.o \ 69 xfs_ialloc.o \
69 xfs_ialloc_btree.o \ 70 xfs_ialloc_btree.o \
@@ -77,6 +78,7 @@ xfs-y += xfs_alloc.o \
77 xfs_log.o \ 78 xfs_log.o \
78 xfs_log_recover.o \ 79 xfs_log_recover.o \
79 xfs_mount.o \ 80 xfs_mount.o \
81 xfs_mru_cache.o \
80 xfs_rename.o \ 82 xfs_rename.o \
81 xfs_trans.o \ 83 xfs_trans.o \
82 xfs_trans_ail.o \ 84 xfs_trans_ail.o \
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 9ebabdf7829c..4b6470cf87f0 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -100,25 +100,6 @@ kmem_zone_destroy(kmem_zone_t *zone)
100extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); 100extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
101extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); 101extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
102 102
103/*
104 * Low memory cache shrinkers
105 */
106
107typedef struct shrinker *kmem_shaker_t;
108typedef int (*kmem_shake_func_t)(int, gfp_t);
109
110static inline kmem_shaker_t
111kmem_shake_register(kmem_shake_func_t sfunc)
112{
113 return set_shrinker(DEFAULT_SEEKS, sfunc);
114}
115
116static inline void
117kmem_shake_deregister(kmem_shaker_t shrinker)
118{
119 remove_shrinker(shrinker);
120}
121
122static inline int 103static inline int
123kmem_shake_allow(gfp_t gfp_mask) 104kmem_shake_allow(gfp_t gfp_mask)
124{ 105{
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7361861e3aac..fd4105d662e0 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -108,14 +108,19 @@ xfs_page_trace(
108 108
109/* 109/*
110 * Schedule IO completion handling on a xfsdatad if this was 110 * Schedule IO completion handling on a xfsdatad if this was
111 * the final hold on this ioend. 111 * the final hold on this ioend. If we are asked to wait,
112 * flush the workqueue.
112 */ 113 */
113STATIC void 114STATIC void
114xfs_finish_ioend( 115xfs_finish_ioend(
115 xfs_ioend_t *ioend) 116 xfs_ioend_t *ioend,
117 int wait)
116{ 118{
117 if (atomic_dec_and_test(&ioend->io_remaining)) 119 if (atomic_dec_and_test(&ioend->io_remaining)) {
118 queue_work(xfsdatad_workqueue, &ioend->io_work); 120 queue_work(xfsdatad_workqueue, &ioend->io_work);
121 if (wait)
122 flush_workqueue(xfsdatad_workqueue);
123 }
119} 124}
120 125
121/* 126/*
@@ -156,6 +161,8 @@ xfs_setfilesize(
156 xfs_fsize_t bsize; 161 xfs_fsize_t bsize;
157 162
158 ip = xfs_vtoi(ioend->io_vnode); 163 ip = xfs_vtoi(ioend->io_vnode);
164 if (!ip)
165 return;
159 166
160 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); 167 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
161 ASSERT(ioend->io_type != IOMAP_READ); 168 ASSERT(ioend->io_type != IOMAP_READ);
@@ -334,7 +341,7 @@ xfs_end_bio(
334 bio->bi_end_io = NULL; 341 bio->bi_end_io = NULL;
335 bio_put(bio); 342 bio_put(bio);
336 343
337 xfs_finish_ioend(ioend); 344 xfs_finish_ioend(ioend, 0);
338 return 0; 345 return 0;
339} 346}
340 347
@@ -470,7 +477,7 @@ xfs_submit_ioend(
470 } 477 }
471 if (bio) 478 if (bio)
472 xfs_submit_ioend_bio(ioend, bio); 479 xfs_submit_ioend_bio(ioend, bio);
473 xfs_finish_ioend(ioend); 480 xfs_finish_ioend(ioend, 0);
474 } while ((ioend = next) != NULL); 481 } while ((ioend = next) != NULL);
475} 482}
476 483
@@ -1003,6 +1010,8 @@ xfs_page_state_convert(
1003 if (buffer_unwritten(bh) || buffer_delay(bh) || 1010 if (buffer_unwritten(bh) || buffer_delay(bh) ||
1004 ((buffer_uptodate(bh) || PageUptodate(page)) && 1011 ((buffer_uptodate(bh) || PageUptodate(page)) &&
1005 !buffer_mapped(bh) && (unmapped || startio))) { 1012 !buffer_mapped(bh) && (unmapped || startio))) {
1013 int new_ioend = 0;
1014
1006 /* 1015 /*
1007 * Make sure we don't use a read-only iomap 1016 * Make sure we don't use a read-only iomap
1008 */ 1017 */
@@ -1021,6 +1030,15 @@ xfs_page_state_convert(
1021 } 1030 }
1022 1031
1023 if (!iomap_valid) { 1032 if (!iomap_valid) {
1033 /*
1034 * if we didn't have a valid mapping then we
1035 * need to ensure that we put the new mapping
1036 * in a new ioend structure. This needs to be
1037 * done to ensure that the ioends correctly
1038 * reflect the block mappings at io completion
1039 * for unwritten extent conversion.
1040 */
1041 new_ioend = 1;
1024 if (type == IOMAP_NEW) { 1042 if (type == IOMAP_NEW) {
1025 size = xfs_probe_cluster(inode, 1043 size = xfs_probe_cluster(inode,
1026 page, bh, head, 0); 1044 page, bh, head, 0);
@@ -1040,7 +1058,7 @@ xfs_page_state_convert(
1040 if (startio) { 1058 if (startio) {
1041 xfs_add_to_ioend(inode, bh, offset, 1059 xfs_add_to_ioend(inode, bh, offset,
1042 type, &ioend, 1060 type, &ioend,
1043 !iomap_valid); 1061 new_ioend);
1044 } else { 1062 } else {
1045 set_buffer_dirty(bh); 1063 set_buffer_dirty(bh);
1046 unlock_buffer(bh); 1064 unlock_buffer(bh);
@@ -1416,6 +1434,13 @@ xfs_end_io_direct(
1416 * This is not necessary for synchronous direct I/O, but we do 1434 * This is not necessary for synchronous direct I/O, but we do
1417 * it anyway to keep the code uniform and simpler. 1435 * it anyway to keep the code uniform and simpler.
1418 * 1436 *
1437 * Well, if only it were that simple. Because synchronous direct I/O
1438 * requires extent conversion to occur *before* we return to userspace,
1439 * we have to wait for extent conversion to complete. Look at the
1440 * iocb that has been passed to us to determine if this is AIO or
1441 * not. If it is synchronous, tell xfs_finish_ioend() to kick the
1442 * workqueue and wait for it to complete.
1443 *
1419 * The core direct I/O code might be changed to always call the 1444 * The core direct I/O code might be changed to always call the
1420 * completion handler in the future, in which case all this can 1445 * completion handler in the future, in which case all this can
1421 * go away. 1446 * go away.
@@ -1423,9 +1448,9 @@ xfs_end_io_direct(
1423 ioend->io_offset = offset; 1448 ioend->io_offset = offset;
1424 ioend->io_size = size; 1449 ioend->io_size = size;
1425 if (ioend->io_type == IOMAP_READ) { 1450 if (ioend->io_type == IOMAP_READ) {
1426 xfs_finish_ioend(ioend); 1451 xfs_finish_ioend(ioend, 0);
1427 } else if (private && size > 0) { 1452 } else if (private && size > 0) {
1428 xfs_finish_ioend(ioend); 1453 xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
1429 } else { 1454 } else {
1430 /* 1455 /*
1431 * A direct I/O write ioend starts it's life in unwritten 1456 * A direct I/O write ioend starts it's life in unwritten
@@ -1434,7 +1459,7 @@ xfs_end_io_direct(
1434 * handler. 1459 * handler.
1435 */ 1460 */
1436 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 1461 INIT_WORK(&ioend->io_work, xfs_end_bio_written);
1437 xfs_finish_ioend(ioend); 1462 xfs_finish_ioend(ioend, 0);
1438 } 1463 }
1439 1464
1440 /* 1465 /*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index fe4f66a5af14..2df63622354e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -35,7 +35,7 @@
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36 36
37static kmem_zone_t *xfs_buf_zone; 37static kmem_zone_t *xfs_buf_zone;
38static kmem_shaker_t xfs_buf_shake; 38static struct shrinker *xfs_buf_shake;
39STATIC int xfsbufd(void *); 39STATIC int xfsbufd(void *);
40STATIC int xfsbufd_wakeup(int, gfp_t); 40STATIC int xfsbufd_wakeup(int, gfp_t);
41STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 41STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
@@ -314,7 +314,7 @@ xfs_buf_free(
314 314
315 ASSERT(list_empty(&bp->b_hash_list)); 315 ASSERT(list_empty(&bp->b_hash_list));
316 316
317 if (bp->b_flags & _XBF_PAGE_CACHE) { 317 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
318 uint i; 318 uint i;
319 319
320 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) 320 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
@@ -323,18 +323,11 @@ xfs_buf_free(
323 for (i = 0; i < bp->b_page_count; i++) { 323 for (i = 0; i < bp->b_page_count; i++) {
324 struct page *page = bp->b_pages[i]; 324 struct page *page = bp->b_pages[i];
325 325
326 ASSERT(!PagePrivate(page)); 326 if (bp->b_flags & _XBF_PAGE_CACHE)
327 ASSERT(!PagePrivate(page));
327 page_cache_release(page); 328 page_cache_release(page);
328 } 329 }
329 _xfs_buf_free_pages(bp); 330 _xfs_buf_free_pages(bp);
330 } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
331 /*
332 * XXX(hch): bp->b_count_desired might be incorrect (see
333 * xfs_buf_associate_memory for details), but fortunately
334 * the Linux version of kmem_free ignores the len argument..
335 */
336 kmem_free(bp->b_addr, bp->b_count_desired);
337 _xfs_buf_free_pages(bp);
338 } 331 }
339 332
340 xfs_buf_deallocate(bp); 333 xfs_buf_deallocate(bp);
@@ -764,43 +757,44 @@ xfs_buf_get_noaddr(
764 size_t len, 757 size_t len,
765 xfs_buftarg_t *target) 758 xfs_buftarg_t *target)
766{ 759{
767 size_t malloc_len = len; 760 unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
761 int error, i;
768 xfs_buf_t *bp; 762 xfs_buf_t *bp;
769 void *data;
770 int error;
771 763
772 bp = xfs_buf_allocate(0); 764 bp = xfs_buf_allocate(0);
773 if (unlikely(bp == NULL)) 765 if (unlikely(bp == NULL))
774 goto fail; 766 goto fail;
775 _xfs_buf_initialize(bp, target, 0, len, 0); 767 _xfs_buf_initialize(bp, target, 0, len, 0);
776 768
777 try_again: 769 error = _xfs_buf_get_pages(bp, page_count, 0);
778 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE); 770 if (error)
779 if (unlikely(data == NULL))
780 goto fail_free_buf; 771 goto fail_free_buf;
781 772
782 /* check whether alignment matches.. */ 773 for (i = 0; i < page_count; i++) {
783 if ((__psunsigned_t)data != 774 bp->b_pages[i] = alloc_page(GFP_KERNEL);
784 ((__psunsigned_t)data & ~target->bt_smask)) { 775 if (!bp->b_pages[i])
785 /* .. else double the size and try again */ 776 goto fail_free_mem;
786 kmem_free(data, malloc_len);
787 malloc_len <<= 1;
788 goto try_again;
789 } 777 }
778 bp->b_flags |= _XBF_PAGES;
790 779
791 error = xfs_buf_associate_memory(bp, data, len); 780 error = _xfs_buf_map_pages(bp, XBF_MAPPED);
792 if (error) 781 if (unlikely(error)) {
782 printk(KERN_WARNING "%s: failed to map pages\n",
783 __FUNCTION__);
793 goto fail_free_mem; 784 goto fail_free_mem;
794 bp->b_flags |= _XBF_KMEM_ALLOC; 785 }
795 786
796 xfs_buf_unlock(bp); 787 xfs_buf_unlock(bp);
797 788
798 XB_TRACE(bp, "no_daddr", data); 789 XB_TRACE(bp, "no_daddr", len);
799 return bp; 790 return bp;
791
800 fail_free_mem: 792 fail_free_mem:
801 kmem_free(data, malloc_len); 793 while (--i >= 0)
794 __free_page(bp->b_pages[i]);
795 _xfs_buf_free_pages(bp);
802 fail_free_buf: 796 fail_free_buf:
803 xfs_buf_free(bp); 797 xfs_buf_deallocate(bp);
804 fail: 798 fail:
805 return NULL; 799 return NULL;
806} 800}
@@ -1453,6 +1447,7 @@ xfs_free_buftarg(
1453 int external) 1447 int external)
1454{ 1448{
1455 xfs_flush_buftarg(btp, 1); 1449 xfs_flush_buftarg(btp, 1);
1450 xfs_blkdev_issue_flush(btp);
1456 if (external) 1451 if (external)
1457 xfs_blkdev_put(btp->bt_bdev); 1452 xfs_blkdev_put(btp->bt_bdev);
1458 xfs_free_bufhash(btp); 1453 xfs_free_bufhash(btp);
@@ -1837,7 +1832,7 @@ xfs_buf_init(void)
1837 if (!xfsdatad_workqueue) 1832 if (!xfsdatad_workqueue)
1838 goto out_destroy_xfslogd_workqueue; 1833 goto out_destroy_xfslogd_workqueue;
1839 1834
1840 xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup); 1835 xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup);
1841 if (!xfs_buf_shake) 1836 if (!xfs_buf_shake)
1842 goto out_destroy_xfsdatad_workqueue; 1837 goto out_destroy_xfsdatad_workqueue;
1843 1838
@@ -1859,7 +1854,7 @@ xfs_buf_init(void)
1859void 1854void
1860xfs_buf_terminate(void) 1855xfs_buf_terminate(void)
1861{ 1856{
1862 kmem_shake_deregister(xfs_buf_shake); 1857 remove_shrinker(xfs_buf_shake);
1863 destroy_workqueue(xfsdatad_workqueue); 1858 destroy_workqueue(xfsdatad_workqueue);
1864 destroy_workqueue(xfslogd_workqueue); 1859 destroy_workqueue(xfslogd_workqueue);
1865 kmem_zone_destroy(xfs_buf_zone); 1860 kmem_zone_destroy(xfs_buf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index b6241f6201a5..b5908a34b15d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -63,7 +63,7 @@ typedef enum {
63 63
64 /* flags used only internally */ 64 /* flags used only internally */
65 _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 65 _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
66 _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ 66 _XBF_PAGES = (1 << 18), /* backed by refcounted pages */
67 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 67 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
68 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ 68 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
69} xfs_buf_flags_t; 69} xfs_buf_flags_t;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index cb51dc961355..cbcd40c8c2a0 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -124,30 +124,6 @@ xfs_file_aio_write_invis(
124} 124}
125 125
126STATIC ssize_t 126STATIC ssize_t
127xfs_file_sendfile(
128 struct file *filp,
129 loff_t *pos,
130 size_t count,
131 read_actor_t actor,
132 void *target)
133{
134 return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
135 filp, pos, 0, count, actor, target, NULL);
136}
137
138STATIC ssize_t
139xfs_file_sendfile_invis(
140 struct file *filp,
141 loff_t *pos,
142 size_t count,
143 read_actor_t actor,
144 void *target)
145{
146 return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
147 filp, pos, IO_INVIS, count, actor, target, NULL);
148}
149
150STATIC ssize_t
151xfs_file_splice_read( 127xfs_file_splice_read(
152 struct file *infilp, 128 struct file *infilp,
153 loff_t *ppos, 129 loff_t *ppos,
@@ -208,15 +184,6 @@ xfs_file_open(
208} 184}
209 185
210STATIC int 186STATIC int
211xfs_file_close(
212 struct file *filp,
213 fl_owner_t id)
214{
215 return -bhv_vop_close(vn_from_inode(filp->f_path.dentry->d_inode), 0,
216 file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL);
217}
218
219STATIC int
220xfs_file_release( 187xfs_file_release(
221 struct inode *inode, 188 struct inode *inode,
222 struct file *filp) 189 struct file *filp)
@@ -452,7 +419,6 @@ const struct file_operations xfs_file_operations = {
452 .write = do_sync_write, 419 .write = do_sync_write,
453 .aio_read = xfs_file_aio_read, 420 .aio_read = xfs_file_aio_read,
454 .aio_write = xfs_file_aio_write, 421 .aio_write = xfs_file_aio_write,
455 .sendfile = xfs_file_sendfile,
456 .splice_read = xfs_file_splice_read, 422 .splice_read = xfs_file_splice_read,
457 .splice_write = xfs_file_splice_write, 423 .splice_write = xfs_file_splice_write,
458 .unlocked_ioctl = xfs_file_ioctl, 424 .unlocked_ioctl = xfs_file_ioctl,
@@ -461,7 +427,6 @@ const struct file_operations xfs_file_operations = {
461#endif 427#endif
462 .mmap = xfs_file_mmap, 428 .mmap = xfs_file_mmap,
463 .open = xfs_file_open, 429 .open = xfs_file_open,
464 .flush = xfs_file_close,
465 .release = xfs_file_release, 430 .release = xfs_file_release,
466 .fsync = xfs_file_fsync, 431 .fsync = xfs_file_fsync,
467#ifdef HAVE_FOP_OPEN_EXEC 432#ifdef HAVE_FOP_OPEN_EXEC
@@ -475,7 +440,6 @@ const struct file_operations xfs_invis_file_operations = {
475 .write = do_sync_write, 440 .write = do_sync_write,
476 .aio_read = xfs_file_aio_read_invis, 441 .aio_read = xfs_file_aio_read_invis,
477 .aio_write = xfs_file_aio_write_invis, 442 .aio_write = xfs_file_aio_write_invis,
478 .sendfile = xfs_file_sendfile_invis,
479 .splice_read = xfs_file_splice_read_invis, 443 .splice_read = xfs_file_splice_read_invis,
480 .splice_write = xfs_file_splice_write_invis, 444 .splice_write = xfs_file_splice_write_invis,
481 .unlocked_ioctl = xfs_file_ioctl_invis, 445 .unlocked_ioctl = xfs_file_ioctl_invis,
@@ -484,7 +448,6 @@ const struct file_operations xfs_invis_file_operations = {
484#endif 448#endif
485 .mmap = xfs_file_mmap, 449 .mmap = xfs_file_mmap,
486 .open = xfs_file_open, 450 .open = xfs_file_open,
487 .flush = xfs_file_close,
488 .release = xfs_file_release, 451 .release = xfs_file_release,
489 .fsync = xfs_file_fsync, 452 .fsync = xfs_file_fsync,
490}; 453};
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ed3a5e1b4b67..bb72c3d4141f 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -46,6 +46,7 @@ xfs_param_t xfs_params = {
46 .inherit_nosym = { 0, 0, 1 }, 46 .inherit_nosym = { 0, 0, 1 },
47 .rotorstep = { 1, 1, 255 }, 47 .rotorstep = { 1, 1, 255 },
48 .inherit_nodfrg = { 0, 1, 1 }, 48 .inherit_nodfrg = { 0, 1, 1 },
49 .fstrm_timer = { 1, 50, 3600*100},
49}; 50};
50 51
51/* 52/*
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index ff5c41ff8d40..5917808abbd6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -1019,7 +1019,7 @@ xfs_ioc_bulkstat(
1019 1019
1020 if (cmd == XFS_IOC_FSINUMBERS) 1020 if (cmd == XFS_IOC_FSINUMBERS)
1021 error = xfs_inumbers(mp, &inlast, &count, 1021 error = xfs_inumbers(mp, &inlast, &count,
1022 bulkreq.ubuffer); 1022 bulkreq.ubuffer, xfs_inumbers_fmt);
1023 else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) 1023 else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
1024 error = xfs_bulkstat_single(mp, &inlast, 1024 error = xfs_bulkstat_single(mp, &inlast,
1025 bulkreq.ubuffer, &done); 1025 bulkreq.ubuffer, &done);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b83cebc165f1..141cf15067c2 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -23,10 +23,25 @@
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <asm/uaccess.h> 24#include <asm/uaccess.h>
25#include "xfs.h" 25#include "xfs.h"
26#include "xfs_types.h"
27#include "xfs_fs.h" 26#include "xfs_fs.h"
27#include "xfs_bit.h"
28#include "xfs_log.h"
29#include "xfs_inum.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_ag.h"
33#include "xfs_dir2.h"
34#include "xfs_dmapi.h"
35#include "xfs_mount.h"
36#include "xfs_bmap_btree.h"
37#include "xfs_attr_sf.h"
38#include "xfs_dir2_sf.h"
28#include "xfs_vfs.h" 39#include "xfs_vfs.h"
29#include "xfs_vnode.h" 40#include "xfs_vnode.h"
41#include "xfs_dinode.h"
42#include "xfs_inode.h"
43#include "xfs_itable.h"
44#include "xfs_error.h"
30#include "xfs_dfrag.h" 45#include "xfs_dfrag.h"
31 46
32#define _NATIVE_IOC(cmd, type) \ 47#define _NATIVE_IOC(cmd, type) \
@@ -34,6 +49,7 @@
34 49
35#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 50#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
36#define BROKEN_X86_ALIGNMENT 51#define BROKEN_X86_ALIGNMENT
52#define _PACKED __attribute__((packed))
37/* on ia32 l_start is on a 32-bit boundary */ 53/* on ia32 l_start is on a 32-bit boundary */
38typedef struct xfs_flock64_32 { 54typedef struct xfs_flock64_32 {
39 __s16 l_type; 55 __s16 l_type;
@@ -75,35 +91,276 @@ xfs_ioctl32_flock(
75 return (unsigned long)p; 91 return (unsigned long)p;
76} 92}
77 93
94typedef struct compat_xfs_fsop_geom_v1 {
95 __u32 blocksize; /* filesystem (data) block size */
96 __u32 rtextsize; /* realtime extent size */
97 __u32 agblocks; /* fsblocks in an AG */
98 __u32 agcount; /* number of allocation groups */
99 __u32 logblocks; /* fsblocks in the log */
100 __u32 sectsize; /* (data) sector size, bytes */
101 __u32 inodesize; /* inode size in bytes */
102 __u32 imaxpct; /* max allowed inode space(%) */
103 __u64 datablocks; /* fsblocks in data subvolume */
104 __u64 rtblocks; /* fsblocks in realtime subvol */
105 __u64 rtextents; /* rt extents in realtime subvol*/
106 __u64 logstart; /* starting fsblock of the log */
107 unsigned char uuid[16]; /* unique id of the filesystem */
108 __u32 sunit; /* stripe unit, fsblocks */
109 __u32 swidth; /* stripe width, fsblocks */
110 __s32 version; /* structure version */
111 __u32 flags; /* superblock version flags */
112 __u32 logsectsize; /* log sector size, bytes */
113 __u32 rtsectsize; /* realtime sector size, bytes */
114 __u32 dirblocksize; /* directory block size, bytes */
115} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
116
117#define XFS_IOC_FSGEOMETRY_V1_32 \
118 _IOR ('X', 100, struct compat_xfs_fsop_geom_v1)
119
120STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
121{
122 compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg;
123 xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p));
124
125 if (copy_in_user(p, p32, sizeof(*p32)))
126 return -EFAULT;
127 return (unsigned long)p;
128}
129
130typedef struct compat_xfs_inogrp {
131 __u64 xi_startino; /* starting inode number */
132 __s32 xi_alloccount; /* # bits set in allocmask */
133 __u64 xi_allocmask; /* mask of allocated inodes */
134} __attribute__((packed)) compat_xfs_inogrp_t;
135
136STATIC int xfs_inumbers_fmt_compat(
137 void __user *ubuffer,
138 const xfs_inogrp_t *buffer,
139 long count,
140 long *written)
141{
142 compat_xfs_inogrp_t *p32 = ubuffer;
143 long i;
144
145 for (i = 0; i < count; i++) {
146 if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
147 put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
148 put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
149 return -EFAULT;
150 }
151 *written = count * sizeof(*p32);
152 return 0;
153}
154
78#else 155#else
79 156
80typedef struct xfs_fsop_bulkreq32 { 157#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
158#define _PACKED
159
160#endif
161
162/* XFS_IOC_FSBULKSTAT and friends */
163
164typedef struct compat_xfs_bstime {
165 __s32 tv_sec; /* seconds */
166 __s32 tv_nsec; /* and nanoseconds */
167} compat_xfs_bstime_t;
168
169STATIC int xfs_bstime_store_compat(
170 compat_xfs_bstime_t __user *p32,
171 const xfs_bstime_t *p)
172{
173 __s32 sec32;
174
175 sec32 = p->tv_sec;
176 if (put_user(sec32, &p32->tv_sec) ||
177 put_user(p->tv_nsec, &p32->tv_nsec))
178 return -EFAULT;
179 return 0;
180}
181
182typedef struct compat_xfs_bstat {
183 __u64 bs_ino; /* inode number */
184 __u16 bs_mode; /* type and mode */
185 __u16 bs_nlink; /* number of links */
186 __u32 bs_uid; /* user id */
187 __u32 bs_gid; /* group id */
188 __u32 bs_rdev; /* device value */
189 __s32 bs_blksize; /* block size */
190 __s64 bs_size; /* file size */
191 compat_xfs_bstime_t bs_atime; /* access time */
192 compat_xfs_bstime_t bs_mtime; /* modify time */
193 compat_xfs_bstime_t bs_ctime; /* inode change time */
194 int64_t bs_blocks; /* number of blocks */
195 __u32 bs_xflags; /* extended flags */
196 __s32 bs_extsize; /* extent size */
197 __s32 bs_extents; /* number of extents */
198 __u32 bs_gen; /* generation count */
199 __u16 bs_projid; /* project id */
200 unsigned char bs_pad[14]; /* pad space, unused */
201 __u32 bs_dmevmask; /* DMIG event mask */
202 __u16 bs_dmstate; /* DMIG state info */
203 __u16 bs_aextents; /* attribute number of extents */
204} _PACKED compat_xfs_bstat_t;
205
206STATIC int xfs_bulkstat_one_fmt_compat(
207 void __user *ubuffer,
208 const xfs_bstat_t *buffer)
209{
210 compat_xfs_bstat_t __user *p32 = ubuffer;
211
212 if (put_user(buffer->bs_ino, &p32->bs_ino) ||
213 put_user(buffer->bs_mode, &p32->bs_mode) ||
214 put_user(buffer->bs_nlink, &p32->bs_nlink) ||
215 put_user(buffer->bs_uid, &p32->bs_uid) ||
216 put_user(buffer->bs_gid, &p32->bs_gid) ||
217 put_user(buffer->bs_rdev, &p32->bs_rdev) ||
218 put_user(buffer->bs_blksize, &p32->bs_blksize) ||
219 put_user(buffer->bs_size, &p32->bs_size) ||
220 xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
221 xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
222 xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
223 put_user(buffer->bs_blocks, &p32->bs_blocks) ||
224 put_user(buffer->bs_xflags, &p32->bs_xflags) ||
225 put_user(buffer->bs_extsize, &p32->bs_extsize) ||
226 put_user(buffer->bs_extents, &p32->bs_extents) ||
227 put_user(buffer->bs_gen, &p32->bs_gen) ||
228 put_user(buffer->bs_projid, &p32->bs_projid) ||
229 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
230 put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
231 put_user(buffer->bs_aextents, &p32->bs_aextents))
232 return -EFAULT;
233 return sizeof(*p32);
234}
235
236
237
238typedef struct compat_xfs_fsop_bulkreq {
81 compat_uptr_t lastip; /* last inode # pointer */ 239 compat_uptr_t lastip; /* last inode # pointer */
82 __s32 icount; /* count of entries in buffer */ 240 __s32 icount; /* count of entries in buffer */
83 compat_uptr_t ubuffer; /* user buffer for inode desc. */ 241 compat_uptr_t ubuffer; /* user buffer for inode desc. */
84 __s32 ocount; /* output count pointer */ 242 compat_uptr_t ocount; /* output count pointer */
85} xfs_fsop_bulkreq32_t; 243} compat_xfs_fsop_bulkreq_t;
86 244
87STATIC unsigned long 245#define XFS_IOC_FSBULKSTAT_32 \
88xfs_ioctl32_bulkstat( 246 _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
89 unsigned long arg) 247#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
248 _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
249#define XFS_IOC_FSINUMBERS_32 \
250 _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
251
252/* copied from xfs_ioctl.c */
253STATIC int
254xfs_ioc_bulkstat_compat(
255 xfs_mount_t *mp,
256 unsigned int cmd,
257 void __user *arg)
90{ 258{
91 xfs_fsop_bulkreq32_t __user *p32 = (void __user *)arg; 259 compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg;
92 xfs_fsop_bulkreq_t __user *p = compat_alloc_user_space(sizeof(*p));
93 u32 addr; 260 u32 addr;
261 xfs_fsop_bulkreq_t bulkreq;
262 int count; /* # of records returned */
263 xfs_ino_t inlast; /* last inode number */
264 int done;
265 int error;
266
267 /* done = 1 if there are more stats to get and if bulkstat */
268 /* should be called again (unused here, but used in dmapi) */
269
270 if (!capable(CAP_SYS_ADMIN))
271 return -EPERM;
272
273 if (XFS_FORCED_SHUTDOWN(mp))
274 return -XFS_ERROR(EIO);
275
276 if (get_user(addr, &p32->lastip))
277 return -EFAULT;
278 bulkreq.lastip = compat_ptr(addr);
279 if (get_user(bulkreq.icount, &p32->icount) ||
280 get_user(addr, &p32->ubuffer))
281 return -EFAULT;
282 bulkreq.ubuffer = compat_ptr(addr);
283 if (get_user(addr, &p32->ocount))
284 return -EFAULT;
285 bulkreq.ocount = compat_ptr(addr);
286
287 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
288 return -XFS_ERROR(EFAULT);
289
290 if ((count = bulkreq.icount) <= 0)
291 return -XFS_ERROR(EINVAL);
292
293 if (cmd == XFS_IOC_FSINUMBERS)
294 error = xfs_inumbers(mp, &inlast, &count,
295 bulkreq.ubuffer, xfs_inumbers_fmt_compat);
296 else {
297 /* declare a var to get a warning in case the type changes */
298 bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat;
299 error = xfs_bulkstat(mp, &inlast, &count,
300 xfs_bulkstat_one, formatter,
301 sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
302 BULKSTAT_FG_QUICK, &done);
303 }
304 if (error)
305 return -error;
306
307 if (bulkreq.ocount != NULL) {
308 if (copy_to_user(bulkreq.lastip, &inlast,
309 sizeof(xfs_ino_t)))
310 return -XFS_ERROR(EFAULT);
311
312 if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
313 return -XFS_ERROR(EFAULT);
314 }
315
316 return 0;
317}
318
319
320
321typedef struct compat_xfs_fsop_handlereq {
322 __u32 fd; /* fd for FD_TO_HANDLE */
323 compat_uptr_t path; /* user pathname */
324 __u32 oflags; /* open flags */
325 compat_uptr_t ihandle; /* user supplied handle */
326 __u32 ihandlen; /* user supplied length */
327 compat_uptr_t ohandle; /* user buffer for handle */
328 compat_uptr_t ohandlen; /* user buffer length */
329} compat_xfs_fsop_handlereq_t;
330
331#define XFS_IOC_PATH_TO_FSHANDLE_32 \
332 _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
333#define XFS_IOC_PATH_TO_HANDLE_32 \
334 _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
335#define XFS_IOC_FD_TO_HANDLE_32 \
336 _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
337#define XFS_IOC_OPEN_BY_HANDLE_32 \
338 _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
339#define XFS_IOC_READLINK_BY_HANDLE_32 \
340 _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
341
342STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg)
343{
344 compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg;
345 xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p));
346 u32 addr;
94 347
95 if (get_user(addr, &p32->lastip) || 348 if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) ||
96 put_user(compat_ptr(addr), &p->lastip) || 349 get_user(addr, &p32->path) ||
97 copy_in_user(&p->icount, &p32->icount, sizeof(s32)) || 350 put_user(compat_ptr(addr), &p->path) ||
98 get_user(addr, &p32->ubuffer) || 351 copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) ||
99 put_user(compat_ptr(addr), &p->ubuffer) || 352 get_user(addr, &p32->ihandle) ||
100 get_user(addr, &p32->ocount) || 353 put_user(compat_ptr(addr), &p->ihandle) ||
101 put_user(compat_ptr(addr), &p->ocount)) 354 copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) ||
355 get_user(addr, &p32->ohandle) ||
356 put_user(compat_ptr(addr), &p->ohandle) ||
357 get_user(addr, &p32->ohandlen) ||
358 put_user(compat_ptr(addr), &p->ohandlen))
102 return -EFAULT; 359 return -EFAULT;
103 360
104 return (unsigned long)p; 361 return (unsigned long)p;
105} 362}
106#endif 363
107 364
108STATIC long 365STATIC long
109xfs_compat_ioctl( 366xfs_compat_ioctl(
@@ -118,7 +375,6 @@ xfs_compat_ioctl(
118 375
119 switch (cmd) { 376 switch (cmd) {
120 case XFS_IOC_DIOINFO: 377 case XFS_IOC_DIOINFO:
121 case XFS_IOC_FSGEOMETRY_V1:
122 case XFS_IOC_FSGEOMETRY: 378 case XFS_IOC_FSGEOMETRY:
123 case XFS_IOC_GETVERSION: 379 case XFS_IOC_GETVERSION:
124 case XFS_IOC_GETXFLAGS: 380 case XFS_IOC_GETXFLAGS:
@@ -131,12 +387,7 @@ xfs_compat_ioctl(
131 case XFS_IOC_GETBMAPA: 387 case XFS_IOC_GETBMAPA:
132 case XFS_IOC_GETBMAPX: 388 case XFS_IOC_GETBMAPX:
133/* not handled 389/* not handled
134 case XFS_IOC_FD_TO_HANDLE:
135 case XFS_IOC_PATH_TO_HANDLE:
136 case XFS_IOC_PATH_TO_FSHANDLE:
137 case XFS_IOC_OPEN_BY_HANDLE:
138 case XFS_IOC_FSSETDM_BY_HANDLE: 390 case XFS_IOC_FSSETDM_BY_HANDLE:
139 case XFS_IOC_READLINK_BY_HANDLE:
140 case XFS_IOC_ATTRLIST_BY_HANDLE: 391 case XFS_IOC_ATTRLIST_BY_HANDLE:
141 case XFS_IOC_ATTRMULTI_BY_HANDLE: 392 case XFS_IOC_ATTRMULTI_BY_HANDLE:
142*/ 393*/
@@ -166,6 +417,10 @@ xfs_compat_ioctl(
166 arg = xfs_ioctl32_flock(arg); 417 arg = xfs_ioctl32_flock(arg);
167 cmd = _NATIVE_IOC(cmd, struct xfs_flock64); 418 cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
168 break; 419 break;
420 case XFS_IOC_FSGEOMETRY_V1_32:
421 arg = xfs_ioctl32_geom_v1(arg);
422 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1);
423 break;
169 424
170#else /* These are handled fine if no alignment issues */ 425#else /* These are handled fine if no alignment issues */
171 case XFS_IOC_ALLOCSP: 426 case XFS_IOC_ALLOCSP:
@@ -176,18 +431,28 @@ xfs_compat_ioctl(
176 case XFS_IOC_FREESP64: 431 case XFS_IOC_FREESP64:
177 case XFS_IOC_RESVSP64: 432 case XFS_IOC_RESVSP64:
178 case XFS_IOC_UNRESVSP64: 433 case XFS_IOC_UNRESVSP64:
434 case XFS_IOC_FSGEOMETRY_V1:
179 break; 435 break;
180 436
181 /* xfs_bstat_t still has wrong u32 vs u64 alignment */ 437 /* xfs_bstat_t still has wrong u32 vs u64 alignment */
182 case XFS_IOC_SWAPEXT: 438 case XFS_IOC_SWAPEXT:
183 break; 439 break;
184 440
185 case XFS_IOC_FSBULKSTAT_SINGLE:
186 case XFS_IOC_FSBULKSTAT:
187 case XFS_IOC_FSINUMBERS:
188 arg = xfs_ioctl32_bulkstat(arg);
189 break;
190#endif 441#endif
442 case XFS_IOC_FSBULKSTAT_32:
443 case XFS_IOC_FSBULKSTAT_SINGLE_32:
444 case XFS_IOC_FSINUMBERS_32:
445 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq);
446 return xfs_ioc_bulkstat_compat(XFS_BHVTOI(VNHEAD(vp))->i_mount,
447 cmd, (void*)arg);
448 case XFS_IOC_FD_TO_HANDLE_32:
449 case XFS_IOC_PATH_TO_HANDLE_32:
450 case XFS_IOC_PATH_TO_FSHANDLE_32:
451 case XFS_IOC_OPEN_BY_HANDLE_32:
452 case XFS_IOC_READLINK_BY_HANDLE_32:
453 arg = xfs_ioctl32_fshandle(arg);
454 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
455 break;
191 default: 456 default:
192 return -ENOIOCTLCMD; 457 return -ENOIOCTLCMD;
193 } 458 }
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 715adad7dd4d..330c4ba9d404 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -101,7 +101,6 @@
101 * Feature macros (disable/enable) 101 * Feature macros (disable/enable)
102 */ 102 */
103#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ 103#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
104#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */
105#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ 104#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
106#ifdef CONFIG_SMP 105#ifdef CONFIG_SMP
107#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 106#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
@@ -124,6 +123,7 @@
124#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val 123#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
125#define xfs_rotorstep xfs_params.rotorstep.val 124#define xfs_rotorstep xfs_params.rotorstep.val
126#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val 125#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
126#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val
127 127
128#define current_cpu() (raw_smp_processor_id()) 128#define current_cpu() (raw_smp_processor_id())
129#define current_pid() (current->pid) 129#define current_pid() (current->pid)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 86fb671a8bcc..765ec16a6e39 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -159,7 +159,7 @@ xfs_iozero(
159 if (status) 159 if (status)
160 goto unlock; 160 goto unlock;
161 161
162 memclear_highpage_flush(page, offset, bytes); 162 zero_user_page(page, offset, bytes, KM_USER0);
163 163
164 status = mapping->a_ops->commit_write(NULL, page, offset, 164 status = mapping->a_ops->commit_write(NULL, page, offset,
165 offset + bytes); 165 offset + bytes);
@@ -287,50 +287,6 @@ xfs_read(
287} 287}
288 288
289ssize_t 289ssize_t
290xfs_sendfile(
291 bhv_desc_t *bdp,
292 struct file *filp,
293 loff_t *offset,
294 int ioflags,
295 size_t count,
296 read_actor_t actor,
297 void *target,
298 cred_t *credp)
299{
300 xfs_inode_t *ip = XFS_BHVTOI(bdp);
301 xfs_mount_t *mp = ip->i_mount;
302 ssize_t ret;
303
304 XFS_STATS_INC(xs_read_calls);
305 if (XFS_FORCED_SHUTDOWN(mp))
306 return -EIO;
307
308 xfs_ilock(ip, XFS_IOLOCK_SHARED);
309
310 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
311 (!(ioflags & IO_INVIS))) {
312 bhv_vrwlock_t locktype = VRWLOCK_READ;
313 int error;
314
315 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
316 *offset, count,
317 FILP_DELAY_FLAG(filp), &locktype);
318 if (error) {
319 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
320 return -error;
321 }
322 }
323 xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
324 (void *)(unsigned long)target, count, *offset, ioflags);
325 ret = generic_file_sendfile(filp, offset, count, actor, target);
326 if (ret > 0)
327 XFS_STATS_ADD(xs_read_bytes, ret);
328
329 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
330 return ret;
331}
332
333ssize_t
334xfs_splice_read( 290xfs_splice_read(
335 bhv_desc_t *bdp, 291 bhv_desc_t *bdp,
336 struct file *infilp, 292 struct file *infilp,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 7ac51b1d2161..7c60a1eed88b 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -90,9 +90,6 @@ extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
90extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *, 90extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
91 const struct iovec *, unsigned int, 91 const struct iovec *, unsigned int,
92 loff_t *, int, struct cred *); 92 loff_t *, int, struct cred *);
93extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
94 loff_t *, int, size_t, read_actor_t,
95 void *, struct cred *);
96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *, 93extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *,
97 struct pipe_inode_info *, size_t, int, int, 94 struct pipe_inode_info *, size_t, int, int,
98 struct cred *); 95 struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bf9a9d5909be..06894cf00b12 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -547,7 +547,8 @@ vfs_sync_worker(
547 547
548 if (!(vfsp->vfs_flag & VFS_RDONLY)) 548 if (!(vfsp->vfs_flag & VFS_RDONLY))
549 error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \ 549 error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
550 SYNC_ATTR | SYNC_REFCACHE, NULL); 550 SYNC_ATTR | SYNC_REFCACHE | SYNC_SUPER,
551 NULL);
551 vfsp->vfs_sync_seq++; 552 vfsp->vfs_sync_seq++;
552 wake_up(&vfsp->vfs_wait_single_sync_task); 553 wake_up(&vfsp->vfs_wait_single_sync_task);
553} 554}
@@ -663,7 +664,7 @@ xfs_fs_sync_super(
663 * occur here so don't bother flushing the buftarg (i.e 664 * occur here so don't bother flushing the buftarg (i.e
664 * SYNC_QUIESCE) because it'll just get dirty again. 665 * SYNC_QUIESCE) because it'll just get dirty again.
665 */ 666 */
666 flags = SYNC_FSDATA | SYNC_DELWRI | SYNC_WAIT | SYNC_IOWAIT; 667 flags = SYNC_DATA_QUIESCE;
667 } else 668 } else
668 flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0); 669 flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
669 670
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index cd6eaa44aa2b..bb997d75c05c 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -210,6 +210,17 @@ static ctl_table xfs_table[] = {
210 .extra1 = &xfs_params.inherit_nodfrg.min, 210 .extra1 = &xfs_params.inherit_nodfrg.min,
211 .extra2 = &xfs_params.inherit_nodfrg.max 211 .extra2 = &xfs_params.inherit_nodfrg.max
212 }, 212 },
213 {
214 .ctl_name = XFS_FILESTREAM_TIMER,
215 .procname = "filestream_centisecs",
216 .data = &xfs_params.fstrm_timer.val,
217 .maxlen = sizeof(int),
218 .mode = 0644,
219 .proc_handler = &proc_dointvec_minmax,
220 .strategy = &sysctl_intvec,
221 .extra1 = &xfs_params.fstrm_timer.min,
222 .extra2 = &xfs_params.fstrm_timer.max,
223 },
213 /* please keep this the last entry */ 224 /* please keep this the last entry */
214#ifdef CONFIG_PROC_FS 225#ifdef CONFIG_PROC_FS
215 { 226 {
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index a631fb8cc5ac..98b97e399d6f 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -47,6 +47,7 @@ typedef struct xfs_param {
47 xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ 47 xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
48 xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ 48 xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
49 xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ 49 xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
50 xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */
50} xfs_param_t; 51} xfs_param_t;
51 52
52/* 53/*
@@ -86,6 +87,7 @@ enum {
86 XFS_INHERIT_NOSYM = 19, 87 XFS_INHERIT_NOSYM = 19,
87 XFS_ROTORSTEP = 20, 88 XFS_ROTORSTEP = 20,
88 XFS_INHERIT_NODFRG = 21, 89 XFS_INHERIT_NODFRG = 21,
90 XFS_FILESTREAM_TIMER = 22,
89}; 91};
90 92
91extern xfs_param_t xfs_params; 93extern xfs_param_t xfs_params;
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index e2c2ce98ab5b..dca3481aaafa 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -92,6 +92,21 @@ typedef enum {
92#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ 92#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
93#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ 93#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
94#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ 94#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
95#define SYNC_SUPER 0x0200 /* flush superblock to disk */
96
97/*
98 * When remounting a filesystem read-only or freezing the filesystem,
99 * we have two phases to execute. This first phase is syncing the data
100 * before we quiesce the fielsystem, and the second is flushing all the
101 * inodes out after we've waited for all the transactions created by
102 * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
103 * to ensure that the inodes are written to their location on disk
104 * rather than just existing in transactions in the log. This means
105 * after a quiesce there is no log replay required to write the inodes
106 * to disk (this is the main difference between a sync and a quiesce).
107 */
108#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
109#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
95 110
96#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ 111#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
97#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ 112#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index d1b2d01843d1..5742d65f0785 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -129,19 +129,13 @@ typedef enum bhv_vchange {
129 VCHANGE_FLAGS_IOEXCL_COUNT = 4 129 VCHANGE_FLAGS_IOEXCL_COUNT = 4
130} bhv_vchange_t; 130} bhv_vchange_t;
131 131
132typedef enum { L_FALSE, L_TRUE } lastclose_t;
133
134typedef int (*vop_open_t)(bhv_desc_t *, struct cred *); 132typedef int (*vop_open_t)(bhv_desc_t *, struct cred *);
135typedef int (*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *);
136typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *, 133typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
137 const struct iovec *, unsigned int, 134 const struct iovec *, unsigned int,
138 loff_t *, int, struct cred *); 135 loff_t *, int, struct cred *);
139typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *, 136typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
140 const struct iovec *, unsigned int, 137 const struct iovec *, unsigned int,
141 loff_t *, int, struct cred *); 138 loff_t *, int, struct cred *);
142typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
143 loff_t *, int, size_t, read_actor_t,
144 void *, struct cred *);
145typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *, 139typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *,
146 struct pipe_inode_info *, size_t, int, int, 140 struct pipe_inode_info *, size_t, int, int,
147 struct cred *); 141 struct cred *);
@@ -203,10 +197,8 @@ typedef int (*vop_iflush_t)(bhv_desc_t *, int);
203typedef struct bhv_vnodeops { 197typedef struct bhv_vnodeops {
204 bhv_position_t vn_position; /* position within behavior chain */ 198 bhv_position_t vn_position; /* position within behavior chain */
205 vop_open_t vop_open; 199 vop_open_t vop_open;
206 vop_close_t vop_close;
207 vop_read_t vop_read; 200 vop_read_t vop_read;
208 vop_write_t vop_write; 201 vop_write_t vop_write;
209 vop_sendfile_t vop_sendfile;
210 vop_splice_read_t vop_splice_read; 202 vop_splice_read_t vop_splice_read;
211 vop_splice_write_t vop_splice_write; 203 vop_splice_write_t vop_splice_write;
212 vop_ioctl_t vop_ioctl; 204 vop_ioctl_t vop_ioctl;
@@ -249,13 +241,10 @@ typedef struct bhv_vnodeops {
249#define VNHEAD(vp) ((vp)->v_bh.bh_first) 241#define VNHEAD(vp) ((vp)->v_bh.bh_first)
250#define VOP(op, vp) (*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op) 242#define VOP(op, vp) (*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op)
251#define bhv_vop_open(vp, cr) VOP(vop_open, vp)(VNHEAD(vp),cr) 243#define bhv_vop_open(vp, cr) VOP(vop_open, vp)(VNHEAD(vp),cr)
252#define bhv_vop_close(vp, f,last,cr) VOP(vop_close, vp)(VNHEAD(vp),f,last,cr)
253#define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr) \ 244#define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr) \
254 VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) 245 VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
255#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \ 246#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \
256 VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) 247 VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
257#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr) \
258 VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr)
259#define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr) \ 248#define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr) \
260 VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr) 249 VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
261#define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr) \ 250#define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr) \
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 3e4a8ad8a34c..7def4c699343 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -62,10 +62,9 @@ uint ndquot;
62 62
63kmem_zone_t *qm_dqzone; 63kmem_zone_t *qm_dqzone;
64kmem_zone_t *qm_dqtrxzone; 64kmem_zone_t *qm_dqtrxzone;
65static kmem_shaker_t xfs_qm_shaker; 65static struct shrinker *xfs_qm_shaker;
66 66
67static cred_t xfs_zerocr; 67static cred_t xfs_zerocr;
68static xfs_inode_t xfs_zeroino;
69 68
70STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); 69STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
71STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); 70STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
@@ -150,7 +149,7 @@ xfs_Gqm_init(void)
150 } else 149 } else
151 xqm->qm_dqzone = qm_dqzone; 150 xqm->qm_dqzone = qm_dqzone;
152 151
153 xfs_qm_shaker = kmem_shake_register(xfs_qm_shake); 152 xfs_qm_shaker = set_shrinker(DEFAULT_SEEKS, xfs_qm_shake);
154 153
155 /* 154 /*
156 * The t_dqinfo portion of transactions. 155 * The t_dqinfo portion of transactions.
@@ -182,7 +181,7 @@ xfs_qm_destroy(
182 181
183 ASSERT(xqm != NULL); 182 ASSERT(xqm != NULL);
184 ASSERT(xqm->qm_nrefs == 0); 183 ASSERT(xqm->qm_nrefs == 0);
185 kmem_shake_deregister(xfs_qm_shaker); 184 remove_shrinker(xfs_qm_shaker);
186 hsize = xqm->qm_dqhashmask + 1; 185 hsize = xqm->qm_dqhashmask + 1;
187 for (i = 0; i < hsize; i++) { 186 for (i = 0; i < hsize; i++) {
188 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 187 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
@@ -1415,7 +1414,7 @@ xfs_qm_qino_alloc(
1415 return error; 1414 return error;
1416 } 1415 }
1417 1416
1418 if ((error = xfs_dir_ialloc(&tp, &xfs_zeroino, S_IFREG, 1, 0, 1417 if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
1419 &xfs_zerocr, 0, 1, ip, &committed))) { 1418 &xfs_zerocr, 0, 1, ip, &committed))) {
1420 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 1419 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1421 XFS_TRANS_ABORT); 1420 XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index bf0a12040b13..b5a7d92c6843 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -38,6 +38,7 @@
38#define XFS_RW_TRACE 1 38#define XFS_RW_TRACE 1
39#define XFS_BUF_TRACE 1 39#define XFS_BUF_TRACE 1
40#define XFS_VNODE_TRACE 1 40#define XFS_VNODE_TRACE 1
41#define XFS_FILESTREAMS_TRACE 1
41#endif 42#endif
42 43
43#include <linux-2.6/xfs_linux.h> 44#include <linux-2.6/xfs_linux.h>
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 9ece7f87ec5b..51c09c114a20 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -68,6 +68,7 @@ typedef struct xfs_agf {
68 __be32 agf_flcount; /* count of blocks in freelist */ 68 __be32 agf_flcount; /* count of blocks in freelist */
69 __be32 agf_freeblks; /* total free blocks */ 69 __be32 agf_freeblks; /* total free blocks */
70 __be32 agf_longest; /* longest free space */ 70 __be32 agf_longest; /* longest free space */
71 __be32 agf_btreeblks; /* # of blocks held in AGF btrees */
71} xfs_agf_t; 72} xfs_agf_t;
72 73
73#define XFS_AGF_MAGICNUM 0x00000001 74#define XFS_AGF_MAGICNUM 0x00000001
@@ -81,7 +82,8 @@ typedef struct xfs_agf {
81#define XFS_AGF_FLCOUNT 0x00000100 82#define XFS_AGF_FLCOUNT 0x00000100
82#define XFS_AGF_FREEBLKS 0x00000200 83#define XFS_AGF_FREEBLKS 0x00000200
83#define XFS_AGF_LONGEST 0x00000400 84#define XFS_AGF_LONGEST 0x00000400
84#define XFS_AGF_NUM_BITS 11 85#define XFS_AGF_BTREEBLKS 0x00000800
86#define XFS_AGF_NUM_BITS 12
85#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) 87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
86 88
87/* disk block (xfs_daddr_t) in the AG */ 89/* disk block (xfs_daddr_t) in the AG */
@@ -186,12 +188,15 @@ typedef struct xfs_perag
186 __uint32_t pagf_flcount; /* count of blocks in freelist */ 188 __uint32_t pagf_flcount; /* count of blocks in freelist */
187 xfs_extlen_t pagf_freeblks; /* total free blocks */ 189 xfs_extlen_t pagf_freeblks; /* total free blocks */
188 xfs_extlen_t pagf_longest; /* longest free space */ 190 xfs_extlen_t pagf_longest; /* longest free space */
191 __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
189 xfs_agino_t pagi_freecount; /* number of free inodes */ 192 xfs_agino_t pagi_freecount; /* number of free inodes */
193 xfs_agino_t pagi_count; /* number of allocated inodes */
194 int pagb_count; /* pagb slots in use */
190#ifdef __KERNEL__ 195#ifdef __KERNEL__
191 lock_t pagb_lock; /* lock for pagb_list */ 196 lock_t pagb_lock; /* lock for pagb_list */
192#endif 197#endif
193 int pagb_count; /* pagb slots in use */
194 xfs_perag_busy_t *pagb_list; /* unstable blocks */ 198 xfs_perag_busy_t *pagb_list; /* unstable blocks */
199 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
195} xfs_perag_t; 200} xfs_perag_t;
196 201
197#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) 202#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8e9a40aa0cd3..012a649a19c3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -55,17 +55,17 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
55ktrace_t *xfs_alloc_trace_buf; 55ktrace_t *xfs_alloc_trace_buf;
56 56
57#define TRACE_ALLOC(s,a) \ 57#define TRACE_ALLOC(s,a) \
58 xfs_alloc_trace_alloc(fname, s, a, __LINE__) 58 xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__)
59#define TRACE_FREE(s,a,b,x,f) \ 59#define TRACE_FREE(s,a,b,x,f) \
60 xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__) 60 xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__)
61#define TRACE_MODAGF(s,a,f) \ 61#define TRACE_MODAGF(s,a,f) \
62 xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__) 62 xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__)
63#define TRACE_BUSY(fname,s,ag,agb,l,sl,tp) \ 63#define TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp) \
64 xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) 64 xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
65#define TRACE_UNBUSY(fname,s,ag,sl,tp) \ 65#define TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp) \
66 xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) 66 xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
67#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) \ 67#define TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp) \
68 xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) 68 xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
69#else 69#else
70#define TRACE_ALLOC(s,a) 70#define TRACE_ALLOC(s,a)
71#define TRACE_FREE(s,a,b,x,f) 71#define TRACE_FREE(s,a,b,x,f)
@@ -420,7 +420,7 @@ xfs_alloc_read_agfl(
420 */ 420 */
421STATIC void 421STATIC void
422xfs_alloc_trace_alloc( 422xfs_alloc_trace_alloc(
423 char *name, /* function tag string */ 423 const char *name, /* function tag string */
424 char *str, /* additional string */ 424 char *str, /* additional string */
425 xfs_alloc_arg_t *args, /* allocation argument structure */ 425 xfs_alloc_arg_t *args, /* allocation argument structure */
426 int line) /* source line number */ 426 int line) /* source line number */
@@ -453,7 +453,7 @@ xfs_alloc_trace_alloc(
453 */ 453 */
454STATIC void 454STATIC void
455xfs_alloc_trace_free( 455xfs_alloc_trace_free(
456 char *name, /* function tag string */ 456 const char *name, /* function tag string */
457 char *str, /* additional string */ 457 char *str, /* additional string */
458 xfs_mount_t *mp, /* file system mount point */ 458 xfs_mount_t *mp, /* file system mount point */
459 xfs_agnumber_t agno, /* allocation group number */ 459 xfs_agnumber_t agno, /* allocation group number */
@@ -479,7 +479,7 @@ xfs_alloc_trace_free(
479 */ 479 */
480STATIC void 480STATIC void
481xfs_alloc_trace_modagf( 481xfs_alloc_trace_modagf(
482 char *name, /* function tag string */ 482 const char *name, /* function tag string */
483 char *str, /* additional string */ 483 char *str, /* additional string */
484 xfs_mount_t *mp, /* file system mount point */ 484 xfs_mount_t *mp, /* file system mount point */
485 xfs_agf_t *agf, /* new agf value */ 485 xfs_agf_t *agf, /* new agf value */
@@ -507,7 +507,7 @@ xfs_alloc_trace_modagf(
507 507
508STATIC void 508STATIC void
509xfs_alloc_trace_busy( 509xfs_alloc_trace_busy(
510 char *name, /* function tag string */ 510 const char *name, /* function tag string */
511 char *str, /* additional string */ 511 char *str, /* additional string */
512 xfs_mount_t *mp, /* file system mount point */ 512 xfs_mount_t *mp, /* file system mount point */
513 xfs_agnumber_t agno, /* allocation group number */ 513 xfs_agnumber_t agno, /* allocation group number */
@@ -549,9 +549,6 @@ xfs_alloc_ag_vextent(
549 xfs_alloc_arg_t *args) /* argument structure for allocation */ 549 xfs_alloc_arg_t *args) /* argument structure for allocation */
550{ 550{
551 int error=0; 551 int error=0;
552#ifdef XFS_ALLOC_TRACE
553 static char fname[] = "xfs_alloc_ag_vextent";
554#endif
555 552
556 ASSERT(args->minlen > 0); 553 ASSERT(args->minlen > 0);
557 ASSERT(args->maxlen > 0); 554 ASSERT(args->maxlen > 0);
@@ -635,9 +632,6 @@ xfs_alloc_ag_vextent_exact(
635 xfs_agblock_t fbno; /* start block of found extent */ 632 xfs_agblock_t fbno; /* start block of found extent */
636 xfs_agblock_t fend; /* end block of found extent */ 633 xfs_agblock_t fend; /* end block of found extent */
637 xfs_extlen_t flen; /* length of found extent */ 634 xfs_extlen_t flen; /* length of found extent */
638#ifdef XFS_ALLOC_TRACE
639 static char fname[] = "xfs_alloc_ag_vextent_exact";
640#endif
641 int i; /* success/failure of operation */ 635 int i; /* success/failure of operation */
642 xfs_agblock_t maxend; /* end of maximal extent */ 636 xfs_agblock_t maxend; /* end of maximal extent */
643 xfs_agblock_t minend; /* end of minimal extent */ 637 xfs_agblock_t minend; /* end of minimal extent */
@@ -737,9 +731,6 @@ xfs_alloc_ag_vextent_near(
737 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ 731 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
738 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ 732 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
739 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ 733 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
740#ifdef XFS_ALLOC_TRACE
741 static char fname[] = "xfs_alloc_ag_vextent_near";
742#endif
743 xfs_agblock_t gtbno; /* start bno of right side entry */ 734 xfs_agblock_t gtbno; /* start bno of right side entry */
744 xfs_agblock_t gtbnoa; /* aligned ... */ 735 xfs_agblock_t gtbnoa; /* aligned ... */
745 xfs_extlen_t gtdiff; /* difference to right side entry */ 736 xfs_extlen_t gtdiff; /* difference to right side entry */
@@ -1270,9 +1261,6 @@ xfs_alloc_ag_vextent_size(
1270 int error; /* error result */ 1261 int error; /* error result */
1271 xfs_agblock_t fbno; /* start of found freespace */ 1262 xfs_agblock_t fbno; /* start of found freespace */
1272 xfs_extlen_t flen; /* length of found freespace */ 1263 xfs_extlen_t flen; /* length of found freespace */
1273#ifdef XFS_ALLOC_TRACE
1274 static char fname[] = "xfs_alloc_ag_vextent_size";
1275#endif
1276 int i; /* temp status variable */ 1264 int i; /* temp status variable */
1277 xfs_agblock_t rbno; /* returned block number */ 1265 xfs_agblock_t rbno; /* returned block number */
1278 xfs_extlen_t rlen; /* length of returned extent */ 1266 xfs_extlen_t rlen; /* length of returned extent */
@@ -1427,9 +1415,6 @@ xfs_alloc_ag_vextent_small(
1427 int error; 1415 int error;
1428 xfs_agblock_t fbno; 1416 xfs_agblock_t fbno;
1429 xfs_extlen_t flen; 1417 xfs_extlen_t flen;
1430#ifdef XFS_ALLOC_TRACE
1431 static char fname[] = "xfs_alloc_ag_vextent_small";
1432#endif
1433 int i; 1418 int i;
1434 1419
1435 if ((error = xfs_alloc_decrement(ccur, 0, &i))) 1420 if ((error = xfs_alloc_decrement(ccur, 0, &i)))
@@ -1447,7 +1432,8 @@ xfs_alloc_ag_vextent_small(
1447 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && 1432 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1448 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1433 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1449 > args->minleft)) { 1434 > args->minleft)) {
1450 if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno))) 1435 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1436 if (error)
1451 goto error0; 1437 goto error0;
1452 if (fbno != NULLAGBLOCK) { 1438 if (fbno != NULLAGBLOCK) {
1453 if (args->userdata) { 1439 if (args->userdata) {
@@ -1515,9 +1501,6 @@ xfs_free_ag_extent(
1515 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1501 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1516 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1502 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
1517 int error; /* error return value */ 1503 int error; /* error return value */
1518#ifdef XFS_ALLOC_TRACE
1519 static char fname[] = "xfs_free_ag_extent";
1520#endif
1521 xfs_agblock_t gtbno; /* start of right neighbor block */ 1504 xfs_agblock_t gtbno; /* start of right neighbor block */
1522 xfs_extlen_t gtlen; /* length of right neighbor block */ 1505 xfs_extlen_t gtlen; /* length of right neighbor block */
1523 int haveleft; /* have a left neighbor block */ 1506 int haveleft; /* have a left neighbor block */
@@ -1923,7 +1906,8 @@ xfs_alloc_fix_freelist(
1923 while (be32_to_cpu(agf->agf_flcount) > need) { 1906 while (be32_to_cpu(agf->agf_flcount) > need) {
1924 xfs_buf_t *bp; 1907 xfs_buf_t *bp;
1925 1908
1926 if ((error = xfs_alloc_get_freelist(tp, agbp, &bno))) 1909 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1910 if (error)
1927 return error; 1911 return error;
1928 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) 1912 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1929 return error; 1913 return error;
@@ -1973,8 +1957,9 @@ xfs_alloc_fix_freelist(
1973 * Put each allocated block on the list. 1957 * Put each allocated block on the list.
1974 */ 1958 */
1975 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 1959 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
1976 if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp, 1960 error = xfs_alloc_put_freelist(tp, agbp,
1977 bno))) 1961 agflbp, bno, 0);
1962 if (error)
1978 return error; 1963 return error;
1979 } 1964 }
1980 } 1965 }
@@ -1991,16 +1976,15 @@ int /* error */
1991xfs_alloc_get_freelist( 1976xfs_alloc_get_freelist(
1992 xfs_trans_t *tp, /* transaction pointer */ 1977 xfs_trans_t *tp, /* transaction pointer */
1993 xfs_buf_t *agbp, /* buffer containing the agf structure */ 1978 xfs_buf_t *agbp, /* buffer containing the agf structure */
1994 xfs_agblock_t *bnop) /* block address retrieved from freelist */ 1979 xfs_agblock_t *bnop, /* block address retrieved from freelist */
1980 int btreeblk) /* destination is a AGF btree */
1995{ 1981{
1996 xfs_agf_t *agf; /* a.g. freespace structure */ 1982 xfs_agf_t *agf; /* a.g. freespace structure */
1997 xfs_agfl_t *agfl; /* a.g. freelist structure */ 1983 xfs_agfl_t *agfl; /* a.g. freelist structure */
1998 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 1984 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
1999 xfs_agblock_t bno; /* block number returned */ 1985 xfs_agblock_t bno; /* block number returned */
2000 int error; 1986 int error;
2001#ifdef XFS_ALLOC_TRACE 1987 int logflags;
2002 static char fname[] = "xfs_alloc_get_freelist";
2003#endif
2004 xfs_mount_t *mp; /* mount structure */ 1988 xfs_mount_t *mp; /* mount structure */
2005 xfs_perag_t *pag; /* per allocation group data */ 1989 xfs_perag_t *pag; /* per allocation group data */
2006 1990
@@ -2032,8 +2016,16 @@ xfs_alloc_get_freelist(
2032 be32_add(&agf->agf_flcount, -1); 2016 be32_add(&agf->agf_flcount, -1);
2033 xfs_trans_agflist_delta(tp, -1); 2017 xfs_trans_agflist_delta(tp, -1);
2034 pag->pagf_flcount--; 2018 pag->pagf_flcount--;
2035 TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); 2019
2036 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); 2020 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2021 if (btreeblk) {
2022 be32_add(&agf->agf_btreeblks, 1);
2023 pag->pagf_btreeblks++;
2024 logflags |= XFS_AGF_BTREEBLKS;
2025 }
2026
2027 TRACE_MODAGF(NULL, agf, logflags);
2028 xfs_alloc_log_agf(tp, agbp, logflags);
2037 *bnop = bno; 2029 *bnop = bno;
2038 2030
2039 /* 2031 /*
@@ -2071,6 +2063,7 @@ xfs_alloc_log_agf(
2071 offsetof(xfs_agf_t, agf_flcount), 2063 offsetof(xfs_agf_t, agf_flcount),
2072 offsetof(xfs_agf_t, agf_freeblks), 2064 offsetof(xfs_agf_t, agf_freeblks),
2073 offsetof(xfs_agf_t, agf_longest), 2065 offsetof(xfs_agf_t, agf_longest),
2066 offsetof(xfs_agf_t, agf_btreeblks),
2074 sizeof(xfs_agf_t) 2067 sizeof(xfs_agf_t)
2075 }; 2068 };
2076 2069
@@ -2106,15 +2099,14 @@ xfs_alloc_put_freelist(
2106 xfs_trans_t *tp, /* transaction pointer */ 2099 xfs_trans_t *tp, /* transaction pointer */
2107 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2100 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2108 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2101 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
2109 xfs_agblock_t bno) /* block being freed */ 2102 xfs_agblock_t bno, /* block being freed */
2103 int btreeblk) /* block came from a AGF btree */
2110{ 2104{
2111 xfs_agf_t *agf; /* a.g. freespace structure */ 2105 xfs_agf_t *agf; /* a.g. freespace structure */
2112 xfs_agfl_t *agfl; /* a.g. free block array */ 2106 xfs_agfl_t *agfl; /* a.g. free block array */
2113 __be32 *blockp;/* pointer to array entry */ 2107 __be32 *blockp;/* pointer to array entry */
2114 int error; 2108 int error;
2115#ifdef XFS_ALLOC_TRACE 2109 int logflags;
2116 static char fname[] = "xfs_alloc_put_freelist";
2117#endif
2118 xfs_mount_t *mp; /* mount structure */ 2110 xfs_mount_t *mp; /* mount structure */
2119 xfs_perag_t *pag; /* per allocation group data */ 2111 xfs_perag_t *pag; /* per allocation group data */
2120 2112
@@ -2132,11 +2124,22 @@ xfs_alloc_put_freelist(
2132 be32_add(&agf->agf_flcount, 1); 2124 be32_add(&agf->agf_flcount, 1);
2133 xfs_trans_agflist_delta(tp, 1); 2125 xfs_trans_agflist_delta(tp, 1);
2134 pag->pagf_flcount++; 2126 pag->pagf_flcount++;
2127
2128 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2129 if (btreeblk) {
2130 be32_add(&agf->agf_btreeblks, -1);
2131 pag->pagf_btreeblks--;
2132 logflags |= XFS_AGF_BTREEBLKS;
2133 }
2134
2135 TRACE_MODAGF(NULL, agf, logflags);
2136 xfs_alloc_log_agf(tp, agbp, logflags);
2137
2135 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2138 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2136 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2139 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
2137 *blockp = cpu_to_be32(bno); 2140 *blockp = cpu_to_be32(bno);
2138 TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); 2141 TRACE_MODAGF(NULL, agf, logflags);
2139 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); 2142 xfs_alloc_log_agf(tp, agbp, logflags);
2140 xfs_trans_log_buf(tp, agflbp, 2143 xfs_trans_log_buf(tp, agflbp,
2141 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), 2144 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2142 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + 2145 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
@@ -2196,6 +2199,7 @@ xfs_alloc_read_agf(
2196 pag = &mp->m_perag[agno]; 2199 pag = &mp->m_perag[agno];
2197 if (!pag->pagf_init) { 2200 if (!pag->pagf_init) {
2198 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2201 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2202 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2199 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2203 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2200 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2204 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2201 pag->pagf_levels[XFS_BTNUM_BNOi] = 2205 pag->pagf_levels[XFS_BTNUM_BNOi] =
@@ -2235,9 +2239,6 @@ xfs_alloc_vextent(
2235 xfs_agblock_t agsize; /* allocation group size */ 2239 xfs_agblock_t agsize; /* allocation group size */
2236 int error; 2240 int error;
2237 int flags; /* XFS_ALLOC_FLAG_... locking flags */ 2241 int flags; /* XFS_ALLOC_FLAG_... locking flags */
2238#ifdef XFS_ALLOC_TRACE
2239 static char fname[] = "xfs_alloc_vextent";
2240#endif
2241 xfs_extlen_t minleft;/* minimum left value, temp copy */ 2242 xfs_extlen_t minleft;/* minimum left value, temp copy */
2242 xfs_mount_t *mp; /* mount structure pointer */ 2243 xfs_mount_t *mp; /* mount structure pointer */
2243 xfs_agnumber_t sagno; /* starting allocation group number */ 2244 xfs_agnumber_t sagno; /* starting allocation group number */
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5a4256120ccc..5aec15d0651e 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -136,7 +136,8 @@ int /* error */
136xfs_alloc_get_freelist( 136xfs_alloc_get_freelist(
137 struct xfs_trans *tp, /* transaction pointer */ 137 struct xfs_trans *tp, /* transaction pointer */
138 struct xfs_buf *agbp, /* buffer containing the agf structure */ 138 struct xfs_buf *agbp, /* buffer containing the agf structure */
139 xfs_agblock_t *bnop); /* block address retrieved from freelist */ 139 xfs_agblock_t *bnop, /* block address retrieved from freelist */
140 int btreeblk); /* destination is a AGF btree */
140 141
141/* 142/*
142 * Log the given fields from the agf structure. 143 * Log the given fields from the agf structure.
@@ -165,7 +166,8 @@ xfs_alloc_put_freelist(
165 struct xfs_trans *tp, /* transaction pointer */ 166 struct xfs_trans *tp, /* transaction pointer */
166 struct xfs_buf *agbp, /* buffer for a.g. freelist header */ 167 struct xfs_buf *agbp, /* buffer for a.g. freelist header */
167 struct xfs_buf *agflbp,/* buffer for a.g. free block array */ 168 struct xfs_buf *agflbp,/* buffer for a.g. free block array */
168 xfs_agblock_t bno); /* block being freed */ 169 xfs_agblock_t bno, /* block being freed */
170 int btreeblk); /* owner was a AGF btree */
169 171
170/* 172/*
171 * Read in the allocation group header (free/alloc section). 173 * Read in the allocation group header (free/alloc section).
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 74cadf95d4e8..1603ce595853 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -226,8 +226,9 @@ xfs_alloc_delrec(
226 /* 226 /*
227 * Put this buffer/block on the ag's freelist. 227 * Put this buffer/block on the ag's freelist.
228 */ 228 */
229 if ((error = xfs_alloc_put_freelist(cur->bc_tp, 229 error = xfs_alloc_put_freelist(cur->bc_tp,
230 cur->bc_private.a.agbp, NULL, bno))) 230 cur->bc_private.a.agbp, NULL, bno, 1);
231 if (error)
231 return error; 232 return error;
232 /* 233 /*
233 * Since blocks move to the free list without the 234 * Since blocks move to the free list without the
@@ -549,8 +550,9 @@ xfs_alloc_delrec(
549 /* 550 /*
550 * Free the deleting block by putting it on the freelist. 551 * Free the deleting block by putting it on the freelist.
551 */ 552 */
552 if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp, 553 error = xfs_alloc_put_freelist(cur->bc_tp,
553 NULL, rbno))) 554 cur->bc_private.a.agbp, NULL, rbno, 1);
555 if (error)
554 return error; 556 return error;
555 /* 557 /*
556 * Since blocks move to the free list without the coordination 558 * Since blocks move to the free list without the coordination
@@ -1320,8 +1322,9 @@ xfs_alloc_newroot(
1320 /* 1322 /*
1321 * Get a buffer from the freelist blocks, for the new root. 1323 * Get a buffer from the freelist blocks, for the new root.
1322 */ 1324 */
1323 if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, 1325 error = xfs_alloc_get_freelist(cur->bc_tp,
1324 &nbno))) 1326 cur->bc_private.a.agbp, &nbno, 1);
1327 if (error)
1325 return error; 1328 return error;
1326 /* 1329 /*
1327 * None available, we fail. 1330 * None available, we fail.
@@ -1604,8 +1607,9 @@ xfs_alloc_split(
1604 * Allocate the new block from the freelist. 1607 * Allocate the new block from the freelist.
1605 * If we can't do it, we're toast. Give up. 1608 * If we can't do it, we're toast. Give up.
1606 */ 1609 */
1607 if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, 1610 error = xfs_alloc_get_freelist(cur->bc_tp,
1608 &rbno))) 1611 cur->bc_private.a.agbp, &rbno, 1);
1612 if (error)
1609 return error; 1613 return error;
1610 if (rbno == NULLAGBLOCK) { 1614 if (rbno == NULLAGBLOCK) {
1611 *stat = 0; 1615 *stat = 0;
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 1afe07f67e3b..fab0b6d5a41b 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -66,44 +66,6 @@ static const char xfs_highbit[256] = {
66#endif 66#endif
67 67
68/* 68/*
69 * Count of bits set in byte, 0..8.
70 */
71static const char xfs_countbit[256] = {
72 0, 1, 1, 2, 1, 2, 2, 3, /* 00 .. 07 */
73 1, 2, 2, 3, 2, 3, 3, 4, /* 08 .. 0f */
74 1, 2, 2, 3, 2, 3, 3, 4, /* 10 .. 17 */
75 2, 3, 3, 4, 3, 4, 4, 5, /* 18 .. 1f */
76 1, 2, 2, 3, 2, 3, 3, 4, /* 20 .. 27 */
77 2, 3, 3, 4, 3, 4, 4, 5, /* 28 .. 2f */
78 2, 3, 3, 4, 3, 4, 4, 5, /* 30 .. 37 */
79 3, 4, 4, 5, 4, 5, 5, 6, /* 38 .. 3f */
80 1, 2, 2, 3, 2, 3, 3, 4, /* 40 .. 47 */
81 2, 3, 3, 4, 3, 4, 4, 5, /* 48 .. 4f */
82 2, 3, 3, 4, 3, 4, 4, 5, /* 50 .. 57 */
83 3, 4, 4, 5, 4, 5, 5, 6, /* 58 .. 5f */
84 2, 3, 3, 4, 3, 4, 4, 5, /* 60 .. 67 */
85 3, 4, 4, 5, 4, 5, 5, 6, /* 68 .. 6f */
86 3, 4, 4, 5, 4, 5, 5, 6, /* 70 .. 77 */
87 4, 5, 5, 6, 5, 6, 6, 7, /* 78 .. 7f */
88 1, 2, 2, 3, 2, 3, 3, 4, /* 80 .. 87 */
89 2, 3, 3, 4, 3, 4, 4, 5, /* 88 .. 8f */
90 2, 3, 3, 4, 3, 4, 4, 5, /* 90 .. 97 */
91 3, 4, 4, 5, 4, 5, 5, 6, /* 98 .. 9f */
92 2, 3, 3, 4, 3, 4, 4, 5, /* a0 .. a7 */
93 3, 4, 4, 5, 4, 5, 5, 6, /* a8 .. af */
94 3, 4, 4, 5, 4, 5, 5, 6, /* b0 .. b7 */
95 4, 5, 5, 6, 5, 6, 6, 7, /* b8 .. bf */
96 2, 3, 3, 4, 3, 4, 4, 5, /* c0 .. c7 */
97 3, 4, 4, 5, 4, 5, 5, 6, /* c8 .. cf */
98 3, 4, 4, 5, 4, 5, 5, 6, /* d0 .. d7 */
99 4, 5, 5, 6, 5, 6, 6, 7, /* d8 .. df */
100 3, 4, 4, 5, 4, 5, 5, 6, /* e0 .. e7 */
101 4, 5, 5, 6, 5, 6, 6, 7, /* e8 .. ef */
102 4, 5, 5, 6, 5, 6, 6, 7, /* f0 .. f7 */
103 5, 6, 6, 7, 6, 7, 7, 8, /* f8 .. ff */
104};
105
106/*
107 * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. 69 * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
108 */ 70 */
109inline int 71inline int
@@ -167,56 +129,21 @@ xfs_highbit64(
167 129
168 130
169/* 131/*
170 * Count the number of bits set in the bitmap starting with bit 132 * Return whether bitmap is empty.
171 * start_bit. Size is the size of the bitmap in words. 133 * Size is number of words in the bitmap, which is padded to word boundary
172 * 134 * Returns 1 for empty, 0 for non-empty.
173 * Do the counting by mapping a byte value to the number of set
174 * bits for that value using the xfs_countbit array, i.e.
175 * xfs_countbit[0] == 0, xfs_countbit[1] == 1, xfs_countbit[2] == 1,
176 * xfs_countbit[3] == 2, etc.
177 */ 135 */
178int 136int
179xfs_count_bits(uint *map, uint size, uint start_bit) 137xfs_bitmap_empty(uint *map, uint size)
180{ 138{
181 register int bits; 139 uint i;
182 register unsigned char *bytep; 140 uint ret = 0;
183 register unsigned char *end_map;
184 int byte_bit;
185
186 bits = 0;
187 end_map = (char*)(map + size);
188 bytep = (char*)(map + (start_bit & ~0x7));
189 byte_bit = start_bit & 0x7;
190
191 /*
192 * If the caller fell off the end of the map, return 0.
193 */
194 if (bytep >= end_map) {
195 return (0);
196 }
197
198 /*
199 * If start_bit is not byte aligned, then process the
200 * first byte separately.
201 */
202 if (byte_bit != 0) {
203 /*
204 * Shift off the bits we don't want to look at,
205 * before indexing into xfs_countbit.
206 */
207 bits += xfs_countbit[(*bytep >> byte_bit)];
208 bytep++;
209 }
210 141
211 /* 142 for (i = 0; i < size; i++) {
212 * Count the bits in each byte until the end of the bitmap. 143 ret |= map[i];
213 */
214 while (bytep < end_map) {
215 bits += xfs_countbit[*bytep];
216 bytep++;
217 } 144 }
218 145
219 return (bits); 146 return (ret == 0);
220} 147}
221 148
222/* 149/*
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 0bbe56817542..082641a9782c 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -55,8 +55,8 @@ extern int xfs_lowbit64(__uint64_t v);
55/* Get high bit set out of 64-bit argument, -1 if none set */ 55/* Get high bit set out of 64-bit argument, -1 if none set */
56extern int xfs_highbit64(__uint64_t); 56extern int xfs_highbit64(__uint64_t);
57 57
58/* Count set bits in map starting with start_bit */ 58/* Return whether bitmap is empty (1 == empty) */
59extern int xfs_count_bits(uint *map, uint size, uint start_bit); 59extern int xfs_bitmap_empty(uint *map, uint size);
60 60
61/* Count continuous one bits in map starting with start_bit */ 61/* Count continuous one bits in map starting with start_bit */
62extern int xfs_contig_bits(uint *map, uint size, uint start_bit); 62extern int xfs_contig_bits(uint *map, uint size, uint start_bit);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index b1ea26e40aaf..94b5c5fe2681 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -52,6 +52,7 @@
52#include "xfs_quota.h" 52#include "xfs_quota.h"
53#include "xfs_trans_space.h" 53#include "xfs_trans_space.h"
54#include "xfs_buf_item.h" 54#include "xfs_buf_item.h"
55#include "xfs_filestream.h"
55 56
56 57
57#ifdef DEBUG 58#ifdef DEBUG
@@ -277,7 +278,7 @@ xfs_bmap_isaeof(
277STATIC void 278STATIC void
278xfs_bmap_trace_addentry( 279xfs_bmap_trace_addentry(
279 int opcode, /* operation */ 280 int opcode, /* operation */
280 char *fname, /* function name */ 281 const char *fname, /* function name */
281 char *desc, /* operation description */ 282 char *desc, /* operation description */
282 xfs_inode_t *ip, /* incore inode pointer */ 283 xfs_inode_t *ip, /* incore inode pointer */
283 xfs_extnum_t idx, /* index of entry(ies) */ 284 xfs_extnum_t idx, /* index of entry(ies) */
@@ -291,7 +292,7 @@ xfs_bmap_trace_addentry(
291 */ 292 */
292STATIC void 293STATIC void
293xfs_bmap_trace_delete( 294xfs_bmap_trace_delete(
294 char *fname, /* function name */ 295 const char *fname, /* function name */
295 char *desc, /* operation description */ 296 char *desc, /* operation description */
296 xfs_inode_t *ip, /* incore inode pointer */ 297 xfs_inode_t *ip, /* incore inode pointer */
297 xfs_extnum_t idx, /* index of entry(entries) deleted */ 298 xfs_extnum_t idx, /* index of entry(entries) deleted */
@@ -304,7 +305,7 @@ xfs_bmap_trace_delete(
304 */ 305 */
305STATIC void 306STATIC void
306xfs_bmap_trace_insert( 307xfs_bmap_trace_insert(
307 char *fname, /* function name */ 308 const char *fname, /* function name */
308 char *desc, /* operation description */ 309 char *desc, /* operation description */
309 xfs_inode_t *ip, /* incore inode pointer */ 310 xfs_inode_t *ip, /* incore inode pointer */
310 xfs_extnum_t idx, /* index of entry(entries) inserted */ 311 xfs_extnum_t idx, /* index of entry(entries) inserted */
@@ -318,7 +319,7 @@ xfs_bmap_trace_insert(
318 */ 319 */
319STATIC void 320STATIC void
320xfs_bmap_trace_post_update( 321xfs_bmap_trace_post_update(
321 char *fname, /* function name */ 322 const char *fname, /* function name */
322 char *desc, /* operation description */ 323 char *desc, /* operation description */
323 xfs_inode_t *ip, /* incore inode pointer */ 324 xfs_inode_t *ip, /* incore inode pointer */
324 xfs_extnum_t idx, /* index of entry updated */ 325 xfs_extnum_t idx, /* index of entry updated */
@@ -329,17 +330,25 @@ xfs_bmap_trace_post_update(
329 */ 330 */
330STATIC void 331STATIC void
331xfs_bmap_trace_pre_update( 332xfs_bmap_trace_pre_update(
332 char *fname, /* function name */ 333 const char *fname, /* function name */
333 char *desc, /* operation description */ 334 char *desc, /* operation description */
334 xfs_inode_t *ip, /* incore inode pointer */ 335 xfs_inode_t *ip, /* incore inode pointer */
335 xfs_extnum_t idx, /* index of entry to be updated */ 336 xfs_extnum_t idx, /* index of entry to be updated */
336 int whichfork); /* data or attr fork */ 337 int whichfork); /* data or attr fork */
337 338
339#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \
340 xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w)
341#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \
342 xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w)
343#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \
344 xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w)
345#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \
346 xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w)
338#else 347#else
339#define xfs_bmap_trace_delete(f,d,ip,i,c,w) 348#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)
340#define xfs_bmap_trace_insert(f,d,ip,i,c,r1,r2,w) 349#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)
341#define xfs_bmap_trace_post_update(f,d,ip,i,w) 350#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)
342#define xfs_bmap_trace_pre_update(f,d,ip,i,w) 351#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)
343#endif /* XFS_BMAP_TRACE */ 352#endif /* XFS_BMAP_TRACE */
344 353
345/* 354/*
@@ -531,9 +540,6 @@ xfs_bmap_add_extent(
531 xfs_filblks_t da_new; /* new count del alloc blocks used */ 540 xfs_filblks_t da_new; /* new count del alloc blocks used */
532 xfs_filblks_t da_old; /* old count del alloc blocks used */ 541 xfs_filblks_t da_old; /* old count del alloc blocks used */
533 int error; /* error return value */ 542 int error; /* error return value */
534#ifdef XFS_BMAP_TRACE
535 static char fname[] = "xfs_bmap_add_extent";
536#endif
537 xfs_ifork_t *ifp; /* inode fork ptr */ 543 xfs_ifork_t *ifp; /* inode fork ptr */
538 int logflags; /* returned value */ 544 int logflags; /* returned value */
539 xfs_extnum_t nextents; /* number of extents in file now */ 545 xfs_extnum_t nextents; /* number of extents in file now */
@@ -551,8 +557,8 @@ xfs_bmap_add_extent(
551 * already extents in the list. 557 * already extents in the list.
552 */ 558 */
553 if (nextents == 0) { 559 if (nextents == 0) {
554 xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new, 560 XFS_BMAP_TRACE_INSERT("insert empty", ip, 0, 1, new, NULL,
555 NULL, whichfork); 561 whichfork);
556 xfs_iext_insert(ifp, 0, 1, new); 562 xfs_iext_insert(ifp, 0, 1, new);
557 ASSERT(cur == NULL); 563 ASSERT(cur == NULL);
558 ifp->if_lastex = 0; 564 ifp->if_lastex = 0;
@@ -710,9 +716,6 @@ xfs_bmap_add_extent_delay_real(
710 int diff; /* temp value */ 716 int diff; /* temp value */
711 xfs_bmbt_rec_t *ep; /* extent entry for idx */ 717 xfs_bmbt_rec_t *ep; /* extent entry for idx */
712 int error; /* error return value */ 718 int error; /* error return value */
713#ifdef XFS_BMAP_TRACE
714 static char fname[] = "xfs_bmap_add_extent_delay_real";
715#endif
716 int i; /* temp state */ 719 int i; /* temp state */
717 xfs_ifork_t *ifp; /* inode fork pointer */ 720 xfs_ifork_t *ifp; /* inode fork pointer */
718 xfs_fileoff_t new_endoff; /* end offset of new entry */ 721 xfs_fileoff_t new_endoff; /* end offset of new entry */
@@ -808,15 +811,14 @@ xfs_bmap_add_extent_delay_real(
808 * Filling in all of a previously delayed allocation extent. 811 * Filling in all of a previously delayed allocation extent.
809 * The left and right neighbors are both contiguous with new. 812 * The left and right neighbors are both contiguous with new.
810 */ 813 */
811 xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1, 814 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1,
812 XFS_DATA_FORK); 815 XFS_DATA_FORK);
813 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 816 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
814 LEFT.br_blockcount + PREV.br_blockcount + 817 LEFT.br_blockcount + PREV.br_blockcount +
815 RIGHT.br_blockcount); 818 RIGHT.br_blockcount);
816 xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1, 819 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1,
817 XFS_DATA_FORK);
818 xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
819 XFS_DATA_FORK); 820 XFS_DATA_FORK);
821 XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK);
820 xfs_iext_remove(ifp, idx, 2); 822 xfs_iext_remove(ifp, idx, 2);
821 ip->i_df.if_lastex = idx - 1; 823 ip->i_df.if_lastex = idx - 1;
822 ip->i_d.di_nextents--; 824 ip->i_d.di_nextents--;
@@ -855,15 +857,14 @@ xfs_bmap_add_extent_delay_real(
855 * Filling in all of a previously delayed allocation extent. 857 * Filling in all of a previously delayed allocation extent.
856 * The left neighbor is contiguous, the right is not. 858 * The left neighbor is contiguous, the right is not.
857 */ 859 */
858 xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1, 860 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1,
859 XFS_DATA_FORK); 861 XFS_DATA_FORK);
860 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 862 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
861 LEFT.br_blockcount + PREV.br_blockcount); 863 LEFT.br_blockcount + PREV.br_blockcount);
862 xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1, 864 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1,
863 XFS_DATA_FORK); 865 XFS_DATA_FORK);
864 ip->i_df.if_lastex = idx - 1; 866 ip->i_df.if_lastex = idx - 1;
865 xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1, 867 XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK);
866 XFS_DATA_FORK);
867 xfs_iext_remove(ifp, idx, 1); 868 xfs_iext_remove(ifp, idx, 1);
868 if (cur == NULL) 869 if (cur == NULL)
869 rval = XFS_ILOG_DEXT; 870 rval = XFS_ILOG_DEXT;
@@ -892,16 +893,13 @@ xfs_bmap_add_extent_delay_real(
892 * Filling in all of a previously delayed allocation extent. 893 * Filling in all of a previously delayed allocation extent.
893 * The right neighbor is contiguous, the left is not. 894 * The right neighbor is contiguous, the left is not.
894 */ 895 */
895 xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx, 896 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK);
896 XFS_DATA_FORK);
897 xfs_bmbt_set_startblock(ep, new->br_startblock); 897 xfs_bmbt_set_startblock(ep, new->br_startblock);
898 xfs_bmbt_set_blockcount(ep, 898 xfs_bmbt_set_blockcount(ep,
899 PREV.br_blockcount + RIGHT.br_blockcount); 899 PREV.br_blockcount + RIGHT.br_blockcount);
900 xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx, 900 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK);
901 XFS_DATA_FORK);
902 ip->i_df.if_lastex = idx; 901 ip->i_df.if_lastex = idx;
903 xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1, 902 XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK);
904 XFS_DATA_FORK);
905 xfs_iext_remove(ifp, idx + 1, 1); 903 xfs_iext_remove(ifp, idx + 1, 1);
906 if (cur == NULL) 904 if (cur == NULL)
907 rval = XFS_ILOG_DEXT; 905 rval = XFS_ILOG_DEXT;
@@ -931,11 +929,9 @@ xfs_bmap_add_extent_delay_real(
931 * Neither the left nor right neighbors are contiguous with 929 * Neither the left nor right neighbors are contiguous with
932 * the new one. 930 * the new one.
933 */ 931 */
934 xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx, 932 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK);
935 XFS_DATA_FORK);
936 xfs_bmbt_set_startblock(ep, new->br_startblock); 933 xfs_bmbt_set_startblock(ep, new->br_startblock);
937 xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx, 934 XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK);
938 XFS_DATA_FORK);
939 ip->i_df.if_lastex = idx; 935 ip->i_df.if_lastex = idx;
940 ip->i_d.di_nextents++; 936 ip->i_d.di_nextents++;
941 if (cur == NULL) 937 if (cur == NULL)
@@ -963,17 +959,14 @@ xfs_bmap_add_extent_delay_real(
963 * Filling in the first part of a previous delayed allocation. 959 * Filling in the first part of a previous delayed allocation.
964 * The left neighbor is contiguous. 960 * The left neighbor is contiguous.
965 */ 961 */
966 xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1, 962 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK);
967 XFS_DATA_FORK);
968 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 963 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
969 LEFT.br_blockcount + new->br_blockcount); 964 LEFT.br_blockcount + new->br_blockcount);
970 xfs_bmbt_set_startoff(ep, 965 xfs_bmbt_set_startoff(ep,
971 PREV.br_startoff + new->br_blockcount); 966 PREV.br_startoff + new->br_blockcount);
972 xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1, 967 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK);
973 XFS_DATA_FORK);
974 temp = PREV.br_blockcount - new->br_blockcount; 968 temp = PREV.br_blockcount - new->br_blockcount;
975 xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx, 969 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK);
976 XFS_DATA_FORK);
977 xfs_bmbt_set_blockcount(ep, temp); 970 xfs_bmbt_set_blockcount(ep, temp);
978 ip->i_df.if_lastex = idx - 1; 971 ip->i_df.if_lastex = idx - 1;
979 if (cur == NULL) 972 if (cur == NULL)
@@ -995,8 +988,7 @@ xfs_bmap_add_extent_delay_real(
995 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 988 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
996 STARTBLOCKVAL(PREV.br_startblock)); 989 STARTBLOCKVAL(PREV.br_startblock));
997 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 990 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
998 xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx, 991 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK);
999 XFS_DATA_FORK);
1000 *dnew = temp; 992 *dnew = temp;
1001 /* DELTA: The boundary between two in-core extents moved. */ 993 /* DELTA: The boundary between two in-core extents moved. */
1002 temp = LEFT.br_startoff; 994 temp = LEFT.br_startoff;
@@ -1009,11 +1001,11 @@ xfs_bmap_add_extent_delay_real(
1009 * Filling in the first part of a previous delayed allocation. 1001 * Filling in the first part of a previous delayed allocation.
1010 * The left neighbor is not contiguous. 1002 * The left neighbor is not contiguous.
1011 */ 1003 */
1012 xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK); 1004 XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK);
1013 xfs_bmbt_set_startoff(ep, new_endoff); 1005 xfs_bmbt_set_startoff(ep, new_endoff);
1014 temp = PREV.br_blockcount - new->br_blockcount; 1006 temp = PREV.br_blockcount - new->br_blockcount;
1015 xfs_bmbt_set_blockcount(ep, temp); 1007 xfs_bmbt_set_blockcount(ep, temp);
1016 xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL, 1008 XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL,
1017 XFS_DATA_FORK); 1009 XFS_DATA_FORK);
1018 xfs_iext_insert(ifp, idx, 1, new); 1010 xfs_iext_insert(ifp, idx, 1, new);
1019 ip->i_df.if_lastex = idx; 1011 ip->i_df.if_lastex = idx;
@@ -1046,8 +1038,7 @@ xfs_bmap_add_extent_delay_real(
1046 (cur ? cur->bc_private.b.allocated : 0)); 1038 (cur ? cur->bc_private.b.allocated : 0));
1047 ep = xfs_iext_get_ext(ifp, idx + 1); 1039 ep = xfs_iext_get_ext(ifp, idx + 1);
1048 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 1040 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1049 xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1, 1041 XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx + 1, XFS_DATA_FORK);
1050 XFS_DATA_FORK);
1051 *dnew = temp; 1042 *dnew = temp;
1052 /* DELTA: One in-core extent is split in two. */ 1043 /* DELTA: One in-core extent is split in two. */
1053 temp = PREV.br_startoff; 1044 temp = PREV.br_startoff;
@@ -1060,17 +1051,14 @@ xfs_bmap_add_extent_delay_real(
1060 * The right neighbor is contiguous with the new allocation. 1051 * The right neighbor is contiguous with the new allocation.
1061 */ 1052 */
1062 temp = PREV.br_blockcount - new->br_blockcount; 1053 temp = PREV.br_blockcount - new->br_blockcount;
1063 xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx, 1054 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK);
1064 XFS_DATA_FORK); 1055 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK);
1065 xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
1066 XFS_DATA_FORK);
1067 xfs_bmbt_set_blockcount(ep, temp); 1056 xfs_bmbt_set_blockcount(ep, temp);
1068 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1057 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
1069 new->br_startoff, new->br_startblock, 1058 new->br_startoff, new->br_startblock,
1070 new->br_blockcount + RIGHT.br_blockcount, 1059 new->br_blockcount + RIGHT.br_blockcount,
1071 RIGHT.br_state); 1060 RIGHT.br_state);
1072 xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1, 1061 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK);
1073 XFS_DATA_FORK);
1074 ip->i_df.if_lastex = idx + 1; 1062 ip->i_df.if_lastex = idx + 1;
1075 if (cur == NULL) 1063 if (cur == NULL)
1076 rval = XFS_ILOG_DEXT; 1064 rval = XFS_ILOG_DEXT;
@@ -1091,8 +1079,7 @@ xfs_bmap_add_extent_delay_real(
1091 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 1079 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1092 STARTBLOCKVAL(PREV.br_startblock)); 1080 STARTBLOCKVAL(PREV.br_startblock));
1093 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 1081 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1094 xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx, 1082 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK);
1095 XFS_DATA_FORK);
1096 *dnew = temp; 1083 *dnew = temp;
1097 /* DELTA: The boundary between two in-core extents moved. */ 1084 /* DELTA: The boundary between two in-core extents moved. */
1098 temp = PREV.br_startoff; 1085 temp = PREV.br_startoff;
@@ -1106,10 +1093,10 @@ xfs_bmap_add_extent_delay_real(
1106 * The right neighbor is not contiguous. 1093 * The right neighbor is not contiguous.
1107 */ 1094 */
1108 temp = PREV.br_blockcount - new->br_blockcount; 1095 temp = PREV.br_blockcount - new->br_blockcount;
1109 xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK); 1096 XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK);
1110 xfs_bmbt_set_blockcount(ep, temp); 1097 xfs_bmbt_set_blockcount(ep, temp);
1111 xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1, 1098 XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL,
1112 new, NULL, XFS_DATA_FORK); 1099 XFS_DATA_FORK);
1113 xfs_iext_insert(ifp, idx + 1, 1, new); 1100 xfs_iext_insert(ifp, idx + 1, 1, new);
1114 ip->i_df.if_lastex = idx + 1; 1101 ip->i_df.if_lastex = idx + 1;
1115 ip->i_d.di_nextents++; 1102 ip->i_d.di_nextents++;
@@ -1141,7 +1128,7 @@ xfs_bmap_add_extent_delay_real(
1141 (cur ? cur->bc_private.b.allocated : 0)); 1128 (cur ? cur->bc_private.b.allocated : 0));
1142 ep = xfs_iext_get_ext(ifp, idx); 1129 ep = xfs_iext_get_ext(ifp, idx);
1143 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 1130 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1144 xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK); 1131 XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK);
1145 *dnew = temp; 1132 *dnew = temp;
1146 /* DELTA: One in-core extent is split in two. */ 1133 /* DELTA: One in-core extent is split in two. */
1147 temp = PREV.br_startoff; 1134 temp = PREV.br_startoff;
@@ -1155,7 +1142,7 @@ xfs_bmap_add_extent_delay_real(
1155 * This case is avoided almost all the time. 1142 * This case is avoided almost all the time.
1156 */ 1143 */
1157 temp = new->br_startoff - PREV.br_startoff; 1144 temp = new->br_startoff - PREV.br_startoff;
1158 xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK); 1145 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK);
1159 xfs_bmbt_set_blockcount(ep, temp); 1146 xfs_bmbt_set_blockcount(ep, temp);
1160 r[0] = *new; 1147 r[0] = *new;
1161 r[1].br_state = PREV.br_state; 1148 r[1].br_state = PREV.br_state;
@@ -1163,7 +1150,7 @@ xfs_bmap_add_extent_delay_real(
1163 r[1].br_startoff = new_endoff; 1150 r[1].br_startoff = new_endoff;
1164 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1151 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
1165 r[1].br_blockcount = temp2; 1152 r[1].br_blockcount = temp2;
1166 xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1], 1153 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1],
1167 XFS_DATA_FORK); 1154 XFS_DATA_FORK);
1168 xfs_iext_insert(ifp, idx + 1, 2, &r[0]); 1155 xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
1169 ip->i_df.if_lastex = idx + 1; 1156 ip->i_df.if_lastex = idx + 1;
@@ -1222,13 +1209,11 @@ xfs_bmap_add_extent_delay_real(
1222 } 1209 }
1223 ep = xfs_iext_get_ext(ifp, idx); 1210 ep = xfs_iext_get_ext(ifp, idx);
1224 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 1211 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1225 xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK); 1212 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK);
1226 xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2, 1213 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx + 2, XFS_DATA_FORK);
1227 XFS_DATA_FORK);
1228 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), 1214 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
1229 NULLSTARTBLOCK((int)temp2)); 1215 NULLSTARTBLOCK((int)temp2));
1230 xfs_bmap_trace_post_update(fname, "0", ip, idx + 2, 1216 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx + 2, XFS_DATA_FORK);
1231 XFS_DATA_FORK);
1232 *dnew = temp + temp2; 1217 *dnew = temp + temp2;
1233 /* DELTA: One in-core extent is split in three. */ 1218 /* DELTA: One in-core extent is split in three. */
1234 temp = PREV.br_startoff; 1219 temp = PREV.br_startoff;
@@ -1287,9 +1272,6 @@ xfs_bmap_add_extent_unwritten_real(
1287 xfs_btree_cur_t *cur; /* btree cursor */ 1272 xfs_btree_cur_t *cur; /* btree cursor */
1288 xfs_bmbt_rec_t *ep; /* extent entry for idx */ 1273 xfs_bmbt_rec_t *ep; /* extent entry for idx */
1289 int error; /* error return value */ 1274 int error; /* error return value */
1290#ifdef XFS_BMAP_TRACE
1291 static char fname[] = "xfs_bmap_add_extent_unwritten_real";
1292#endif
1293 int i; /* temp state */ 1275 int i; /* temp state */
1294 xfs_ifork_t *ifp; /* inode fork pointer */ 1276 xfs_ifork_t *ifp; /* inode fork pointer */
1295 xfs_fileoff_t new_endoff; /* end offset of new entry */ 1277 xfs_fileoff_t new_endoff; /* end offset of new entry */
@@ -1390,15 +1372,14 @@ xfs_bmap_add_extent_unwritten_real(
1390 * Setting all of a previous oldext extent to newext. 1372 * Setting all of a previous oldext extent to newext.
1391 * The left and right neighbors are both contiguous with new. 1373 * The left and right neighbors are both contiguous with new.
1392 */ 1374 */
1393 xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1, 1375 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1,
1394 XFS_DATA_FORK); 1376 XFS_DATA_FORK);
1395 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1377 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1396 LEFT.br_blockcount + PREV.br_blockcount + 1378 LEFT.br_blockcount + PREV.br_blockcount +
1397 RIGHT.br_blockcount); 1379 RIGHT.br_blockcount);
1398 xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1, 1380 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1,
1399 XFS_DATA_FORK);
1400 xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
1401 XFS_DATA_FORK); 1381 XFS_DATA_FORK);
1382 XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK);
1402 xfs_iext_remove(ifp, idx, 2); 1383 xfs_iext_remove(ifp, idx, 2);
1403 ip->i_df.if_lastex = idx - 1; 1384 ip->i_df.if_lastex = idx - 1;
1404 ip->i_d.di_nextents -= 2; 1385 ip->i_d.di_nextents -= 2;
@@ -1441,15 +1422,14 @@ xfs_bmap_add_extent_unwritten_real(
1441 * Setting all of a previous oldext extent to newext. 1422 * Setting all of a previous oldext extent to newext.
1442 * The left neighbor is contiguous, the right is not. 1423 * The left neighbor is contiguous, the right is not.
1443 */ 1424 */
1444 xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1, 1425 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1,
1445 XFS_DATA_FORK); 1426 XFS_DATA_FORK);
1446 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1427 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1447 LEFT.br_blockcount + PREV.br_blockcount); 1428 LEFT.br_blockcount + PREV.br_blockcount);
1448 xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1, 1429 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1,
1449 XFS_DATA_FORK); 1430 XFS_DATA_FORK);
1450 ip->i_df.if_lastex = idx - 1; 1431 ip->i_df.if_lastex = idx - 1;
1451 xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1, 1432 XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK);
1452 XFS_DATA_FORK);
1453 xfs_iext_remove(ifp, idx, 1); 1433 xfs_iext_remove(ifp, idx, 1);
1454 ip->i_d.di_nextents--; 1434 ip->i_d.di_nextents--;
1455 if (cur == NULL) 1435 if (cur == NULL)
@@ -1484,16 +1464,15 @@ xfs_bmap_add_extent_unwritten_real(
1484 * Setting all of a previous oldext extent to newext. 1464 * Setting all of a previous oldext extent to newext.
1485 * The right neighbor is contiguous, the left is not. 1465 * The right neighbor is contiguous, the left is not.
1486 */ 1466 */
1487 xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx, 1467 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx,
1488 XFS_DATA_FORK); 1468 XFS_DATA_FORK);
1489 xfs_bmbt_set_blockcount(ep, 1469 xfs_bmbt_set_blockcount(ep,
1490 PREV.br_blockcount + RIGHT.br_blockcount); 1470 PREV.br_blockcount + RIGHT.br_blockcount);
1491 xfs_bmbt_set_state(ep, newext); 1471 xfs_bmbt_set_state(ep, newext);
1492 xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx, 1472 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx,
1493 XFS_DATA_FORK); 1473 XFS_DATA_FORK);
1494 ip->i_df.if_lastex = idx; 1474 ip->i_df.if_lastex = idx;
1495 xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1, 1475 XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK);
1496 XFS_DATA_FORK);
1497 xfs_iext_remove(ifp, idx + 1, 1); 1476 xfs_iext_remove(ifp, idx + 1, 1);
1498 ip->i_d.di_nextents--; 1477 ip->i_d.di_nextents--;
1499 if (cur == NULL) 1478 if (cur == NULL)
@@ -1529,10 +1508,10 @@ xfs_bmap_add_extent_unwritten_real(
1529 * Neither the left nor right neighbors are contiguous with 1508 * Neither the left nor right neighbors are contiguous with
1530 * the new one. 1509 * the new one.
1531 */ 1510 */
1532 xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx, 1511 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx,
1533 XFS_DATA_FORK); 1512 XFS_DATA_FORK);
1534 xfs_bmbt_set_state(ep, newext); 1513 xfs_bmbt_set_state(ep, newext);
1535 xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx, 1514 XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx,
1536 XFS_DATA_FORK); 1515 XFS_DATA_FORK);
1537 ip->i_df.if_lastex = idx; 1516 ip->i_df.if_lastex = idx;
1538 if (cur == NULL) 1517 if (cur == NULL)
@@ -1559,21 +1538,21 @@ xfs_bmap_add_extent_unwritten_real(
1559 * Setting the first part of a previous oldext extent to newext. 1538 * Setting the first part of a previous oldext extent to newext.
1560 * The left neighbor is contiguous. 1539 * The left neighbor is contiguous.
1561 */ 1540 */
1562 xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1, 1541 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1,
1563 XFS_DATA_FORK); 1542 XFS_DATA_FORK);
1564 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1543 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1565 LEFT.br_blockcount + new->br_blockcount); 1544 LEFT.br_blockcount + new->br_blockcount);
1566 xfs_bmbt_set_startoff(ep, 1545 xfs_bmbt_set_startoff(ep,
1567 PREV.br_startoff + new->br_blockcount); 1546 PREV.br_startoff + new->br_blockcount);
1568 xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1, 1547 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1,
1569 XFS_DATA_FORK); 1548 XFS_DATA_FORK);
1570 xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx, 1549 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx,
1571 XFS_DATA_FORK); 1550 XFS_DATA_FORK);
1572 xfs_bmbt_set_startblock(ep, 1551 xfs_bmbt_set_startblock(ep,
1573 new->br_startblock + new->br_blockcount); 1552 new->br_startblock + new->br_blockcount);
1574 xfs_bmbt_set_blockcount(ep, 1553 xfs_bmbt_set_blockcount(ep,
1575 PREV.br_blockcount - new->br_blockcount); 1554 PREV.br_blockcount - new->br_blockcount);
1576 xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx, 1555 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx,
1577 XFS_DATA_FORK); 1556 XFS_DATA_FORK);
1578 ip->i_df.if_lastex = idx - 1; 1557 ip->i_df.if_lastex = idx - 1;
1579 if (cur == NULL) 1558 if (cur == NULL)
@@ -1610,15 +1589,15 @@ xfs_bmap_add_extent_unwritten_real(
1610 * Setting the first part of a previous oldext extent to newext. 1589 * Setting the first part of a previous oldext extent to newext.
1611 * The left neighbor is not contiguous. 1590 * The left neighbor is not contiguous.
1612 */ 1591 */
1613 xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK); 1592 XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK);
1614 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); 1593 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
1615 xfs_bmbt_set_startoff(ep, new_endoff); 1594 xfs_bmbt_set_startoff(ep, new_endoff);
1616 xfs_bmbt_set_blockcount(ep, 1595 xfs_bmbt_set_blockcount(ep,
1617 PREV.br_blockcount - new->br_blockcount); 1596 PREV.br_blockcount - new->br_blockcount);
1618 xfs_bmbt_set_startblock(ep, 1597 xfs_bmbt_set_startblock(ep,
1619 new->br_startblock + new->br_blockcount); 1598 new->br_startblock + new->br_blockcount);
1620 xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK); 1599 XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx, XFS_DATA_FORK);
1621 xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL, 1600 XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL,
1622 XFS_DATA_FORK); 1601 XFS_DATA_FORK);
1623 xfs_iext_insert(ifp, idx, 1, new); 1602 xfs_iext_insert(ifp, idx, 1, new);
1624 ip->i_df.if_lastex = idx; 1603 ip->i_df.if_lastex = idx;
@@ -1653,18 +1632,18 @@ xfs_bmap_add_extent_unwritten_real(
1653 * Setting the last part of a previous oldext extent to newext. 1632 * Setting the last part of a previous oldext extent to newext.
1654 * The right neighbor is contiguous with the new allocation. 1633 * The right neighbor is contiguous with the new allocation.
1655 */ 1634 */
1656 xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx, 1635 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx,
1657 XFS_DATA_FORK); 1636 XFS_DATA_FORK);
1658 xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1, 1637 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1,
1659 XFS_DATA_FORK); 1638 XFS_DATA_FORK);
1660 xfs_bmbt_set_blockcount(ep, 1639 xfs_bmbt_set_blockcount(ep,
1661 PREV.br_blockcount - new->br_blockcount); 1640 PREV.br_blockcount - new->br_blockcount);
1662 xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx, 1641 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx,
1663 XFS_DATA_FORK); 1642 XFS_DATA_FORK);
1664 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1643 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
1665 new->br_startoff, new->br_startblock, 1644 new->br_startoff, new->br_startblock,
1666 new->br_blockcount + RIGHT.br_blockcount, newext); 1645 new->br_blockcount + RIGHT.br_blockcount, newext);
1667 xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1, 1646 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1,
1668 XFS_DATA_FORK); 1647 XFS_DATA_FORK);
1669 ip->i_df.if_lastex = idx + 1; 1648 ip->i_df.if_lastex = idx + 1;
1670 if (cur == NULL) 1649 if (cur == NULL)
@@ -1700,12 +1679,12 @@ xfs_bmap_add_extent_unwritten_real(
1700 * Setting the last part of a previous oldext extent to newext. 1679 * Setting the last part of a previous oldext extent to newext.
1701 * The right neighbor is not contiguous. 1680 * The right neighbor is not contiguous.
1702 */ 1681 */
1703 xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK); 1682 XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK);
1704 xfs_bmbt_set_blockcount(ep, 1683 xfs_bmbt_set_blockcount(ep,
1705 PREV.br_blockcount - new->br_blockcount); 1684 PREV.br_blockcount - new->br_blockcount);
1706 xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK); 1685 XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK);
1707 xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1, 1686 XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL,
1708 new, NULL, XFS_DATA_FORK); 1687 XFS_DATA_FORK);
1709 xfs_iext_insert(ifp, idx + 1, 1, new); 1688 xfs_iext_insert(ifp, idx + 1, 1, new);
1710 ip->i_df.if_lastex = idx + 1; 1689 ip->i_df.if_lastex = idx + 1;
1711 ip->i_d.di_nextents++; 1690 ip->i_d.di_nextents++;
@@ -1744,17 +1723,17 @@ xfs_bmap_add_extent_unwritten_real(
1744 * newext. Contiguity is impossible here. 1723 * newext. Contiguity is impossible here.
1745 * One extent becomes three extents. 1724 * One extent becomes three extents.
1746 */ 1725 */
1747 xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK); 1726 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK);
1748 xfs_bmbt_set_blockcount(ep, 1727 xfs_bmbt_set_blockcount(ep,
1749 new->br_startoff - PREV.br_startoff); 1728 new->br_startoff - PREV.br_startoff);
1750 xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK); 1729 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK);
1751 r[0] = *new; 1730 r[0] = *new;
1752 r[1].br_startoff = new_endoff; 1731 r[1].br_startoff = new_endoff;
1753 r[1].br_blockcount = 1732 r[1].br_blockcount =
1754 PREV.br_startoff + PREV.br_blockcount - new_endoff; 1733 PREV.br_startoff + PREV.br_blockcount - new_endoff;
1755 r[1].br_startblock = new->br_startblock + new->br_blockcount; 1734 r[1].br_startblock = new->br_startblock + new->br_blockcount;
1756 r[1].br_state = oldext; 1735 r[1].br_state = oldext;
1757 xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1], 1736 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1],
1758 XFS_DATA_FORK); 1737 XFS_DATA_FORK);
1759 xfs_iext_insert(ifp, idx + 1, 2, &r[0]); 1738 xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
1760 ip->i_df.if_lastex = idx + 1; 1739 ip->i_df.if_lastex = idx + 1;
@@ -1845,9 +1824,6 @@ xfs_bmap_add_extent_hole_delay(
1845 int rsvd) /* OK to allocate reserved blocks */ 1824 int rsvd) /* OK to allocate reserved blocks */
1846{ 1825{
1847 xfs_bmbt_rec_t *ep; /* extent record for idx */ 1826 xfs_bmbt_rec_t *ep; /* extent record for idx */
1848#ifdef XFS_BMAP_TRACE
1849 static char fname[] = "xfs_bmap_add_extent_hole_delay";
1850#endif
1851 xfs_ifork_t *ifp; /* inode fork pointer */ 1827 xfs_ifork_t *ifp; /* inode fork pointer */
1852 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 1828 xfs_bmbt_irec_t left; /* left neighbor extent entry */
1853 xfs_filblks_t newlen=0; /* new indirect size */ 1829 xfs_filblks_t newlen=0; /* new indirect size */
@@ -1919,7 +1895,7 @@ xfs_bmap_add_extent_hole_delay(
1919 */ 1895 */
1920 temp = left.br_blockcount + new->br_blockcount + 1896 temp = left.br_blockcount + new->br_blockcount +
1921 right.br_blockcount; 1897 right.br_blockcount;
1922 xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1, 1898 XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1,
1923 XFS_DATA_FORK); 1899 XFS_DATA_FORK);
1924 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1900 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
1925 oldlen = STARTBLOCKVAL(left.br_startblock) + 1901 oldlen = STARTBLOCKVAL(left.br_startblock) +
@@ -1928,10 +1904,9 @@ xfs_bmap_add_extent_hole_delay(
1928 newlen = xfs_bmap_worst_indlen(ip, temp); 1904 newlen = xfs_bmap_worst_indlen(ip, temp);
1929 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1905 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
1930 NULLSTARTBLOCK((int)newlen)); 1906 NULLSTARTBLOCK((int)newlen));
1931 xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1, 1907 XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1,
1932 XFS_DATA_FORK);
1933 xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1,
1934 XFS_DATA_FORK); 1908 XFS_DATA_FORK);
1909 XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, XFS_DATA_FORK);
1935 xfs_iext_remove(ifp, idx, 1); 1910 xfs_iext_remove(ifp, idx, 1);
1936 ip->i_df.if_lastex = idx - 1; 1911 ip->i_df.if_lastex = idx - 1;
1937 /* DELTA: Two in-core extents were replaced by one. */ 1912 /* DELTA: Two in-core extents were replaced by one. */
@@ -1946,7 +1921,7 @@ xfs_bmap_add_extent_hole_delay(
1946 * Merge the new allocation with the left neighbor. 1921 * Merge the new allocation with the left neighbor.
1947 */ 1922 */
1948 temp = left.br_blockcount + new->br_blockcount; 1923 temp = left.br_blockcount + new->br_blockcount;
1949 xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, 1924 XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1,
1950 XFS_DATA_FORK); 1925 XFS_DATA_FORK);
1951 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1926 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
1952 oldlen = STARTBLOCKVAL(left.br_startblock) + 1927 oldlen = STARTBLOCKVAL(left.br_startblock) +
@@ -1954,7 +1929,7 @@ xfs_bmap_add_extent_hole_delay(
1954 newlen = xfs_bmap_worst_indlen(ip, temp); 1929 newlen = xfs_bmap_worst_indlen(ip, temp);
1955 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1930 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
1956 NULLSTARTBLOCK((int)newlen)); 1931 NULLSTARTBLOCK((int)newlen));
1957 xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, 1932 XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1,
1958 XFS_DATA_FORK); 1933 XFS_DATA_FORK);
1959 ip->i_df.if_lastex = idx - 1; 1934 ip->i_df.if_lastex = idx - 1;
1960 /* DELTA: One in-core extent grew into a hole. */ 1935 /* DELTA: One in-core extent grew into a hole. */
@@ -1968,14 +1943,14 @@ xfs_bmap_add_extent_hole_delay(
1968 * on the right. 1943 * on the right.
1969 * Merge the new allocation with the right neighbor. 1944 * Merge the new allocation with the right neighbor.
1970 */ 1945 */
1971 xfs_bmap_trace_pre_update(fname, "RC", ip, idx, XFS_DATA_FORK); 1946 XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, XFS_DATA_FORK);
1972 temp = new->br_blockcount + right.br_blockcount; 1947 temp = new->br_blockcount + right.br_blockcount;
1973 oldlen = STARTBLOCKVAL(new->br_startblock) + 1948 oldlen = STARTBLOCKVAL(new->br_startblock) +
1974 STARTBLOCKVAL(right.br_startblock); 1949 STARTBLOCKVAL(right.br_startblock);
1975 newlen = xfs_bmap_worst_indlen(ip, temp); 1950 newlen = xfs_bmap_worst_indlen(ip, temp);
1976 xfs_bmbt_set_allf(ep, new->br_startoff, 1951 xfs_bmbt_set_allf(ep, new->br_startoff,
1977 NULLSTARTBLOCK((int)newlen), temp, right.br_state); 1952 NULLSTARTBLOCK((int)newlen), temp, right.br_state);
1978 xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK); 1953 XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, XFS_DATA_FORK);
1979 ip->i_df.if_lastex = idx; 1954 ip->i_df.if_lastex = idx;
1980 /* DELTA: One in-core extent grew into a hole. */ 1955 /* DELTA: One in-core extent grew into a hole. */
1981 temp2 = temp; 1956 temp2 = temp;
@@ -1989,7 +1964,7 @@ xfs_bmap_add_extent_hole_delay(
1989 * Insert a new entry. 1964 * Insert a new entry.
1990 */ 1965 */
1991 oldlen = newlen = 0; 1966 oldlen = newlen = 0;
1992 xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL, 1967 XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL,
1993 XFS_DATA_FORK); 1968 XFS_DATA_FORK);
1994 xfs_iext_insert(ifp, idx, 1, new); 1969 xfs_iext_insert(ifp, idx, 1, new);
1995 ip->i_df.if_lastex = idx; 1970 ip->i_df.if_lastex = idx;
@@ -2039,9 +2014,6 @@ xfs_bmap_add_extent_hole_real(
2039{ 2014{
2040 xfs_bmbt_rec_t *ep; /* pointer to extent entry ins. point */ 2015 xfs_bmbt_rec_t *ep; /* pointer to extent entry ins. point */
2041 int error; /* error return value */ 2016 int error; /* error return value */
2042#ifdef XFS_BMAP_TRACE
2043 static char fname[] = "xfs_bmap_add_extent_hole_real";
2044#endif
2045 int i; /* temp state */ 2017 int i; /* temp state */
2046 xfs_ifork_t *ifp; /* inode fork pointer */ 2018 xfs_ifork_t *ifp; /* inode fork pointer */
2047 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2019 xfs_bmbt_irec_t left; /* left neighbor extent entry */
@@ -2118,15 +2090,14 @@ xfs_bmap_add_extent_hole_real(
2118 * left and on the right. 2090 * left and on the right.
2119 * Merge all three into a single extent record. 2091 * Merge all three into a single extent record.
2120 */ 2092 */
2121 xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1, 2093 XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1,
2122 whichfork); 2094 whichfork);
2123 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 2095 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
2124 left.br_blockcount + new->br_blockcount + 2096 left.br_blockcount + new->br_blockcount +
2125 right.br_blockcount); 2097 right.br_blockcount);
2126 xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1, 2098 XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1,
2127 whichfork); 2099 whichfork);
2128 xfs_bmap_trace_delete(fname, "LC|RC", ip, 2100 XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, whichfork);
2129 idx, 1, whichfork);
2130 xfs_iext_remove(ifp, idx, 1); 2101 xfs_iext_remove(ifp, idx, 1);
2131 ifp->if_lastex = idx - 1; 2102 ifp->if_lastex = idx - 1;
2132 XFS_IFORK_NEXT_SET(ip, whichfork, 2103 XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -2168,10 +2139,10 @@ xfs_bmap_add_extent_hole_real(
2168 * on the left. 2139 * on the left.
2169 * Merge the new allocation with the left neighbor. 2140 * Merge the new allocation with the left neighbor.
2170 */ 2141 */
2171 xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork); 2142 XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, whichfork);
2172 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 2143 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
2173 left.br_blockcount + new->br_blockcount); 2144 left.br_blockcount + new->br_blockcount);
2174 xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork); 2145 XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, whichfork);
2175 ifp->if_lastex = idx - 1; 2146 ifp->if_lastex = idx - 1;
2176 if (cur == NULL) { 2147 if (cur == NULL) {
2177 rval = XFS_ILOG_FEXT(whichfork); 2148 rval = XFS_ILOG_FEXT(whichfork);
@@ -2202,11 +2173,11 @@ xfs_bmap_add_extent_hole_real(
2202 * on the right. 2173 * on the right.
2203 * Merge the new allocation with the right neighbor. 2174 * Merge the new allocation with the right neighbor.
2204 */ 2175 */
2205 xfs_bmap_trace_pre_update(fname, "RC", ip, idx, whichfork); 2176 XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, whichfork);
2206 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, 2177 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
2207 new->br_blockcount + right.br_blockcount, 2178 new->br_blockcount + right.br_blockcount,
2208 right.br_state); 2179 right.br_state);
2209 xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork); 2180 XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, whichfork);
2210 ifp->if_lastex = idx; 2181 ifp->if_lastex = idx;
2211 if (cur == NULL) { 2182 if (cur == NULL) {
2212 rval = XFS_ILOG_FEXT(whichfork); 2183 rval = XFS_ILOG_FEXT(whichfork);
@@ -2237,8 +2208,7 @@ xfs_bmap_add_extent_hole_real(
2237 * real allocation. 2208 * real allocation.
2238 * Insert a new entry. 2209 * Insert a new entry.
2239 */ 2210 */
2240 xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL, 2211 XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, whichfork);
2241 whichfork);
2242 xfs_iext_insert(ifp, idx, 1, new); 2212 xfs_iext_insert(ifp, idx, 1, new);
2243 ifp->if_lastex = idx; 2213 ifp->if_lastex = idx;
2244 XFS_IFORK_NEXT_SET(ip, whichfork, 2214 XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -2605,12 +2575,10 @@ xfs_bmap_rtalloc(
2605 xfs_extlen_t prod = 0; /* product factor for allocators */ 2575 xfs_extlen_t prod = 0; /* product factor for allocators */
2606 xfs_extlen_t ralen = 0; /* realtime allocation length */ 2576 xfs_extlen_t ralen = 0; /* realtime allocation length */
2607 xfs_extlen_t align; /* minimum allocation alignment */ 2577 xfs_extlen_t align; /* minimum allocation alignment */
2608 xfs_rtblock_t rtx; /* realtime extent number */
2609 xfs_rtblock_t rtb; 2578 xfs_rtblock_t rtb;
2610 2579
2611 mp = ap->ip->i_mount; 2580 mp = ap->ip->i_mount;
2612 align = ap->ip->i_d.di_extsize ? 2581 align = xfs_get_extsz_hint(ap->ip);
2613 ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
2614 prod = align / mp->m_sb.sb_rextsize; 2582 prod = align / mp->m_sb.sb_rextsize;
2615 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, 2583 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
2616 align, 1, ap->eof, 0, 2584 align, 1, ap->eof, 0,
@@ -2644,6 +2612,8 @@ xfs_bmap_rtalloc(
2644 * pick an extent that will space things out in the rt area. 2612 * pick an extent that will space things out in the rt area.
2645 */ 2613 */
2646 if (ap->eof && ap->off == 0) { 2614 if (ap->eof && ap->off == 0) {
2615 xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
2616
2647 error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); 2617 error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
2648 if (error) 2618 if (error)
2649 return error; 2619 return error;
@@ -2715,9 +2685,7 @@ xfs_bmap_btalloc(
2715 int error; 2685 int error;
2716 2686
2717 mp = ap->ip->i_mount; 2687 mp = ap->ip->i_mount;
2718 align = (ap->userdata && ap->ip->i_d.di_extsize && 2688 align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
2719 (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
2720 ap->ip->i_d.di_extsize : 0;
2721 if (unlikely(align)) { 2689 if (unlikely(align)) {
2722 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, 2690 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
2723 align, 0, ap->eof, 0, ap->conv, 2691 align, 0, ap->eof, 0, ap->conv,
@@ -2727,9 +2695,15 @@ xfs_bmap_btalloc(
2727 } 2695 }
2728 nullfb = ap->firstblock == NULLFSBLOCK; 2696 nullfb = ap->firstblock == NULLFSBLOCK;
2729 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); 2697 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
2730 if (nullfb) 2698 if (nullfb) {
2731 ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); 2699 if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
2732 else 2700 ag = xfs_filestream_lookup_ag(ap->ip);
2701 ag = (ag != NULLAGNUMBER) ? ag : 0;
2702 ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
2703 } else {
2704 ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
2705 }
2706 } else
2733 ap->rval = ap->firstblock; 2707 ap->rval = ap->firstblock;
2734 2708
2735 xfs_bmap_adjacent(ap); 2709 xfs_bmap_adjacent(ap);
@@ -2753,13 +2727,22 @@ xfs_bmap_btalloc(
2753 args.firstblock = ap->firstblock; 2727 args.firstblock = ap->firstblock;
2754 blen = 0; 2728 blen = 0;
2755 if (nullfb) { 2729 if (nullfb) {
2756 args.type = XFS_ALLOCTYPE_START_BNO; 2730 if (ap->userdata && xfs_inode_is_filestream(ap->ip))
2731 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2732 else
2733 args.type = XFS_ALLOCTYPE_START_BNO;
2757 args.total = ap->total; 2734 args.total = ap->total;
2735
2758 /* 2736 /*
2759 * Find the longest available space. 2737 * Search for an allocation group with a single extent
2760 * We're going to try for the whole allocation at once. 2738 * large enough for the request.
2739 *
2740 * If one isn't found, then adjust the minimum allocation
2741 * size to the largest space found.
2761 */ 2742 */
2762 startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno); 2743 startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
2744 if (startag == NULLAGNUMBER)
2745 startag = ag = 0;
2763 notinit = 0; 2746 notinit = 0;
2764 down_read(&mp->m_peraglock); 2747 down_read(&mp->m_peraglock);
2765 while (blen < ap->alen) { 2748 while (blen < ap->alen) {
@@ -2785,6 +2768,35 @@ xfs_bmap_btalloc(
2785 blen = longest; 2768 blen = longest;
2786 } else 2769 } else
2787 notinit = 1; 2770 notinit = 1;
2771
2772 if (xfs_inode_is_filestream(ap->ip)) {
2773 if (blen >= ap->alen)
2774 break;
2775
2776 if (ap->userdata) {
2777 /*
2778 * If startag is an invalid AG, we've
2779 * come here once before and
2780 * xfs_filestream_new_ag picked the
2781 * best currently available.
2782 *
2783 * Don't continue looping, since we
2784 * could loop forever.
2785 */
2786 if (startag == NULLAGNUMBER)
2787 break;
2788
2789 error = xfs_filestream_new_ag(ap, &ag);
2790 if (error) {
2791 up_read(&mp->m_peraglock);
2792 return error;
2793 }
2794
2795 /* loop again to set 'blen'*/
2796 startag = NULLAGNUMBER;
2797 continue;
2798 }
2799 }
2788 if (++ag == mp->m_sb.sb_agcount) 2800 if (++ag == mp->m_sb.sb_agcount)
2789 ag = 0; 2801 ag = 0;
2790 if (ag == startag) 2802 if (ag == startag)
@@ -2809,17 +2821,27 @@ xfs_bmap_btalloc(
2809 */ 2821 */
2810 else 2822 else
2811 args.minlen = ap->alen; 2823 args.minlen = ap->alen;
2824
2825 /*
2826 * set the failure fallback case to look in the selected
2827 * AG as the stream may have moved.
2828 */
2829 if (xfs_inode_is_filestream(ap->ip))
2830 ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
2812 } else if (ap->low) { 2831 } else if (ap->low) {
2813 args.type = XFS_ALLOCTYPE_START_BNO; 2832 if (xfs_inode_is_filestream(ap->ip))
2833 args.type = XFS_ALLOCTYPE_FIRST_AG;
2834 else
2835 args.type = XFS_ALLOCTYPE_START_BNO;
2814 args.total = args.minlen = ap->minlen; 2836 args.total = args.minlen = ap->minlen;
2815 } else { 2837 } else {
2816 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2838 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2817 args.total = ap->total; 2839 args.total = ap->total;
2818 args.minlen = ap->minlen; 2840 args.minlen = ap->minlen;
2819 } 2841 }
2820 if (unlikely(ap->userdata && ap->ip->i_d.di_extsize && 2842 /* apply extent size hints if obtained earlier */
2821 (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) { 2843 if (unlikely(align)) {
2822 args.prod = ap->ip->i_d.di_extsize; 2844 args.prod = align;
2823 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) 2845 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
2824 args.mod = (xfs_extlen_t)(args.prod - args.mod); 2846 args.mod = (xfs_extlen_t)(args.prod - args.mod);
2825 } else if (mp->m_sb.sb_blocksize >= NBPP) { 2847 } else if (mp->m_sb.sb_blocksize >= NBPP) {
@@ -3051,9 +3073,6 @@ xfs_bmap_del_extent(
3051 xfs_bmbt_rec_t *ep; /* current extent entry pointer */ 3073 xfs_bmbt_rec_t *ep; /* current extent entry pointer */
3052 int error; /* error return value */ 3074 int error; /* error return value */
3053 int flags; /* inode logging flags */ 3075 int flags; /* inode logging flags */
3054#ifdef XFS_BMAP_TRACE
3055 static char fname[] = "xfs_bmap_del_extent";
3056#endif
3057 xfs_bmbt_irec_t got; /* current extent entry */ 3076 xfs_bmbt_irec_t got; /* current extent entry */
3058 xfs_fileoff_t got_endoff; /* first offset past got */ 3077 xfs_fileoff_t got_endoff; /* first offset past got */
3059 int i; /* temp state */ 3078 int i; /* temp state */
@@ -3147,7 +3166,7 @@ xfs_bmap_del_extent(
3147 /* 3166 /*
3148 * Matches the whole extent. Delete the entry. 3167 * Matches the whole extent. Delete the entry.
3149 */ 3168 */
3150 xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork); 3169 XFS_BMAP_TRACE_DELETE("3", ip, idx, 1, whichfork);
3151 xfs_iext_remove(ifp, idx, 1); 3170 xfs_iext_remove(ifp, idx, 1);
3152 ifp->if_lastex = idx; 3171 ifp->if_lastex = idx;
3153 if (delay) 3172 if (delay)
@@ -3168,7 +3187,7 @@ xfs_bmap_del_extent(
3168 /* 3187 /*
3169 * Deleting the first part of the extent. 3188 * Deleting the first part of the extent.
3170 */ 3189 */
3171 xfs_bmap_trace_pre_update(fname, "2", ip, idx, whichfork); 3190 XFS_BMAP_TRACE_PRE_UPDATE("2", ip, idx, whichfork);
3172 xfs_bmbt_set_startoff(ep, del_endoff); 3191 xfs_bmbt_set_startoff(ep, del_endoff);
3173 temp = got.br_blockcount - del->br_blockcount; 3192 temp = got.br_blockcount - del->br_blockcount;
3174 xfs_bmbt_set_blockcount(ep, temp); 3193 xfs_bmbt_set_blockcount(ep, temp);
@@ -3177,13 +3196,13 @@ xfs_bmap_del_extent(
3177 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 3196 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3178 da_old); 3197 da_old);
3179 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 3198 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
3180 xfs_bmap_trace_post_update(fname, "2", ip, idx, 3199 XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx,
3181 whichfork); 3200 whichfork);
3182 da_new = temp; 3201 da_new = temp;
3183 break; 3202 break;
3184 } 3203 }
3185 xfs_bmbt_set_startblock(ep, del_endblock); 3204 xfs_bmbt_set_startblock(ep, del_endblock);
3186 xfs_bmap_trace_post_update(fname, "2", ip, idx, whichfork); 3205 XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork);
3187 if (!cur) { 3206 if (!cur) {
3188 flags |= XFS_ILOG_FEXT(whichfork); 3207 flags |= XFS_ILOG_FEXT(whichfork);
3189 break; 3208 break;
@@ -3199,19 +3218,19 @@ xfs_bmap_del_extent(
3199 * Deleting the last part of the extent. 3218 * Deleting the last part of the extent.
3200 */ 3219 */
3201 temp = got.br_blockcount - del->br_blockcount; 3220 temp = got.br_blockcount - del->br_blockcount;
3202 xfs_bmap_trace_pre_update(fname, "1", ip, idx, whichfork); 3221 XFS_BMAP_TRACE_PRE_UPDATE("1", ip, idx, whichfork);
3203 xfs_bmbt_set_blockcount(ep, temp); 3222 xfs_bmbt_set_blockcount(ep, temp);
3204 ifp->if_lastex = idx; 3223 ifp->if_lastex = idx;
3205 if (delay) { 3224 if (delay) {
3206 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 3225 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3207 da_old); 3226 da_old);
3208 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 3227 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
3209 xfs_bmap_trace_post_update(fname, "1", ip, idx, 3228 XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx,
3210 whichfork); 3229 whichfork);
3211 da_new = temp; 3230 da_new = temp;
3212 break; 3231 break;
3213 } 3232 }
3214 xfs_bmap_trace_post_update(fname, "1", ip, idx, whichfork); 3233 XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork);
3215 if (!cur) { 3234 if (!cur) {
3216 flags |= XFS_ILOG_FEXT(whichfork); 3235 flags |= XFS_ILOG_FEXT(whichfork);
3217 break; 3236 break;
@@ -3228,7 +3247,7 @@ xfs_bmap_del_extent(
3228 * Deleting the middle of the extent. 3247 * Deleting the middle of the extent.
3229 */ 3248 */
3230 temp = del->br_startoff - got.br_startoff; 3249 temp = del->br_startoff - got.br_startoff;
3231 xfs_bmap_trace_pre_update(fname, "0", ip, idx, whichfork); 3250 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, whichfork);
3232 xfs_bmbt_set_blockcount(ep, temp); 3251 xfs_bmbt_set_blockcount(ep, temp);
3233 new.br_startoff = del_endoff; 3252 new.br_startoff = del_endoff;
3234 temp2 = got_endoff - del_endoff; 3253 temp2 = got_endoff - del_endoff;
@@ -3315,8 +3334,8 @@ xfs_bmap_del_extent(
3315 } 3334 }
3316 } 3335 }
3317 } 3336 }
3318 xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork); 3337 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, whichfork);
3319 xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL, 3338 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 1, &new, NULL,
3320 whichfork); 3339 whichfork);
3321 xfs_iext_insert(ifp, idx + 1, 1, &new); 3340 xfs_iext_insert(ifp, idx + 1, 1, &new);
3322 ifp->if_lastex = idx + 1; 3341 ifp->if_lastex = idx + 1;
@@ -3556,9 +3575,6 @@ xfs_bmap_local_to_extents(
3556{ 3575{
3557 int error; /* error return value */ 3576 int error; /* error return value */
3558 int flags; /* logging flags returned */ 3577 int flags; /* logging flags returned */
3559#ifdef XFS_BMAP_TRACE
3560 static char fname[] = "xfs_bmap_local_to_extents";
3561#endif
3562 xfs_ifork_t *ifp; /* inode fork pointer */ 3578 xfs_ifork_t *ifp; /* inode fork pointer */
3563 3579
3564 /* 3580 /*
@@ -3613,7 +3629,7 @@ xfs_bmap_local_to_extents(
3613 xfs_iext_add(ifp, 0, 1); 3629 xfs_iext_add(ifp, 0, 1);
3614 ep = xfs_iext_get_ext(ifp, 0); 3630 ep = xfs_iext_get_ext(ifp, 0);
3615 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); 3631 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
3616 xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork); 3632 XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
3617 XFS_IFORK_NEXT_SET(ip, whichfork, 1); 3633 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
3618 ip->i_d.di_nblocks = 1; 3634 ip->i_d.di_nblocks = 1;
3619 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip, 3635 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
@@ -3736,7 +3752,7 @@ ktrace_t *xfs_bmap_trace_buf;
3736STATIC void 3752STATIC void
3737xfs_bmap_trace_addentry( 3753xfs_bmap_trace_addentry(
3738 int opcode, /* operation */ 3754 int opcode, /* operation */
3739 char *fname, /* function name */ 3755 const char *fname, /* function name */
3740 char *desc, /* operation description */ 3756 char *desc, /* operation description */
3741 xfs_inode_t *ip, /* incore inode pointer */ 3757 xfs_inode_t *ip, /* incore inode pointer */
3742 xfs_extnum_t idx, /* index of entry(ies) */ 3758 xfs_extnum_t idx, /* index of entry(ies) */
@@ -3795,7 +3811,7 @@ xfs_bmap_trace_addentry(
3795 */ 3811 */
3796STATIC void 3812STATIC void
3797xfs_bmap_trace_delete( 3813xfs_bmap_trace_delete(
3798 char *fname, /* function name */ 3814 const char *fname, /* function name */
3799 char *desc, /* operation description */ 3815 char *desc, /* operation description */
3800 xfs_inode_t *ip, /* incore inode pointer */ 3816 xfs_inode_t *ip, /* incore inode pointer */
3801 xfs_extnum_t idx, /* index of entry(entries) deleted */ 3817 xfs_extnum_t idx, /* index of entry(entries) deleted */
@@ -3817,7 +3833,7 @@ xfs_bmap_trace_delete(
3817 */ 3833 */
3818STATIC void 3834STATIC void
3819xfs_bmap_trace_insert( 3835xfs_bmap_trace_insert(
3820 char *fname, /* function name */ 3836 const char *fname, /* function name */
3821 char *desc, /* operation description */ 3837 char *desc, /* operation description */
3822 xfs_inode_t *ip, /* incore inode pointer */ 3838 xfs_inode_t *ip, /* incore inode pointer */
3823 xfs_extnum_t idx, /* index of entry(entries) inserted */ 3839 xfs_extnum_t idx, /* index of entry(entries) inserted */
@@ -3846,7 +3862,7 @@ xfs_bmap_trace_insert(
3846 */ 3862 */
3847STATIC void 3863STATIC void
3848xfs_bmap_trace_post_update( 3864xfs_bmap_trace_post_update(
3849 char *fname, /* function name */ 3865 const char *fname, /* function name */
3850 char *desc, /* operation description */ 3866 char *desc, /* operation description */
3851 xfs_inode_t *ip, /* incore inode pointer */ 3867 xfs_inode_t *ip, /* incore inode pointer */
3852 xfs_extnum_t idx, /* index of entry updated */ 3868 xfs_extnum_t idx, /* index of entry updated */
@@ -3864,7 +3880,7 @@ xfs_bmap_trace_post_update(
3864 */ 3880 */
3865STATIC void 3881STATIC void
3866xfs_bmap_trace_pre_update( 3882xfs_bmap_trace_pre_update(
3867 char *fname, /* function name */ 3883 const char *fname, /* function name */
3868 char *desc, /* operation description */ 3884 char *desc, /* operation description */
3869 xfs_inode_t *ip, /* incore inode pointer */ 3885 xfs_inode_t *ip, /* incore inode pointer */
3870 xfs_extnum_t idx, /* index of entry to be updated */ 3886 xfs_extnum_t idx, /* index of entry to be updated */
@@ -4481,9 +4497,6 @@ xfs_bmap_read_extents(
4481 xfs_buf_t *bp; /* buffer for "block" */ 4497 xfs_buf_t *bp; /* buffer for "block" */
4482 int error; /* error return value */ 4498 int error; /* error return value */
4483 xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ 4499 xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */
4484#ifdef XFS_BMAP_TRACE
4485 static char fname[] = "xfs_bmap_read_extents";
4486#endif
4487 xfs_extnum_t i, j; /* index into the extents list */ 4500 xfs_extnum_t i, j; /* index into the extents list */
4488 xfs_ifork_t *ifp; /* fork structure */ 4501 xfs_ifork_t *ifp; /* fork structure */
4489 int level; /* btree level, for checking */ 4502 int level; /* btree level, for checking */
@@ -4600,7 +4613,7 @@ xfs_bmap_read_extents(
4600 } 4613 }
4601 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); 4614 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4602 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); 4615 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
4603 xfs_bmap_trace_exlist(fname, ip, i, whichfork); 4616 XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
4604 return 0; 4617 return 0;
4605error0: 4618error0:
4606 xfs_trans_brelse(tp, bp); 4619 xfs_trans_brelse(tp, bp);
@@ -4613,7 +4626,7 @@ error0:
4613 */ 4626 */
4614void 4627void
4615xfs_bmap_trace_exlist( 4628xfs_bmap_trace_exlist(
4616 char *fname, /* function name */ 4629 const char *fname, /* function name */
4617 xfs_inode_t *ip, /* incore inode pointer */ 4630 xfs_inode_t *ip, /* incore inode pointer */
4618 xfs_extnum_t cnt, /* count of entries in the list */ 4631 xfs_extnum_t cnt, /* count of entries in the list */
4619 int whichfork) /* data or attr fork */ 4632 int whichfork) /* data or attr fork */
@@ -4628,7 +4641,7 @@ xfs_bmap_trace_exlist(
4628 for (idx = 0; idx < cnt; idx++) { 4641 for (idx = 0; idx < cnt; idx++) {
4629 ep = xfs_iext_get_ext(ifp, idx); 4642 ep = xfs_iext_get_ext(ifp, idx);
4630 xfs_bmbt_get_all(ep, &s); 4643 xfs_bmbt_get_all(ep, &s);
4631 xfs_bmap_trace_insert(fname, "exlist", ip, idx, 1, &s, NULL, 4644 XFS_BMAP_TRACE_INSERT("exlist", ip, idx, 1, &s, NULL,
4632 whichfork); 4645 whichfork);
4633 } 4646 }
4634} 4647}
@@ -4868,12 +4881,7 @@ xfs_bmapi(
4868 xfs_extlen_t extsz; 4881 xfs_extlen_t extsz;
4869 4882
4870 /* Figure out the extent size, adjust alen */ 4883 /* Figure out the extent size, adjust alen */
4871 if (rt) { 4884 extsz = xfs_get_extsz_hint(ip);
4872 if (!(extsz = ip->i_d.di_extsize))
4873 extsz = mp->m_sb.sb_rextsize;
4874 } else {
4875 extsz = ip->i_d.di_extsize;
4876 }
4877 if (extsz) { 4885 if (extsz) {
4878 error = xfs_bmap_extsize_align(mp, 4886 error = xfs_bmap_extsize_align(mp,
4879 &got, &prev, extsz, 4887 &got, &prev, extsz,
@@ -5219,10 +5227,10 @@ xfs_bmapi(
5219 * Else go on to the next record. 5227 * Else go on to the next record.
5220 */ 5228 */
5221 ep = xfs_iext_get_ext(ifp, ++lastx); 5229 ep = xfs_iext_get_ext(ifp, ++lastx);
5222 if (lastx >= nextents) { 5230 prev = got;
5231 if (lastx >= nextents)
5223 eof = 1; 5232 eof = 1;
5224 prev = got; 5233 else
5225 } else
5226 xfs_bmbt_get_all(ep, &got); 5234 xfs_bmbt_get_all(ep, &got);
5227 } 5235 }
5228 ifp->if_lastex = lastx; 5236 ifp->if_lastex = lastx;
@@ -5813,8 +5821,7 @@ xfs_getbmap(
5813 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5821 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
5814 return XFS_ERROR(EINVAL); 5822 return XFS_ERROR(EINVAL);
5815 if (whichfork == XFS_DATA_FORK) { 5823 if (whichfork == XFS_DATA_FORK) {
5816 if ((ip->i_d.di_extsize && (ip->i_d.di_flags & 5824 if (xfs_get_extsz_hint(ip) ||
5817 (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
5818 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 5825 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
5819 prealloced = 1; 5826 prealloced = 1;
5820 fixlen = XFS_MAXIOFFSET(mp); 5827 fixlen = XFS_MAXIOFFSET(mp);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 4f24c7e39b31..524b1c9d5246 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -144,12 +144,14 @@ extern ktrace_t *xfs_bmap_trace_buf;
144 */ 144 */
145void 145void
146xfs_bmap_trace_exlist( 146xfs_bmap_trace_exlist(
147 char *fname, /* function name */ 147 const char *fname, /* function name */
148 struct xfs_inode *ip, /* incore inode pointer */ 148 struct xfs_inode *ip, /* incore inode pointer */
149 xfs_extnum_t cnt, /* count of entries in list */ 149 xfs_extnum_t cnt, /* count of entries in list */
150 int whichfork); /* data or attr fork */ 150 int whichfork); /* data or attr fork */
151#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
152 xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w)
151#else 153#else
152#define xfs_bmap_trace_exlist(f,ip,c,w) 154#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
153#endif 155#endif
154 156
155/* 157/*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0bf192fea3eb..89b891f51cfb 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -76,7 +76,7 @@ static char EXIT[] = "exit";
76 */ 76 */
77STATIC void 77STATIC void
78xfs_bmbt_trace_enter( 78xfs_bmbt_trace_enter(
79 char *func, 79 const char *func,
80 xfs_btree_cur_t *cur, 80 xfs_btree_cur_t *cur,
81 char *s, 81 char *s,
82 int type, 82 int type,
@@ -117,7 +117,7 @@ xfs_bmbt_trace_enter(
117 */ 117 */
118STATIC void 118STATIC void
119xfs_bmbt_trace_argbi( 119xfs_bmbt_trace_argbi(
120 char *func, 120 const char *func,
121 xfs_btree_cur_t *cur, 121 xfs_btree_cur_t *cur,
122 xfs_buf_t *b, 122 xfs_buf_t *b,
123 int i, 123 int i,
@@ -134,7 +134,7 @@ xfs_bmbt_trace_argbi(
134 */ 134 */
135STATIC void 135STATIC void
136xfs_bmbt_trace_argbii( 136xfs_bmbt_trace_argbii(
137 char *func, 137 const char *func,
138 xfs_btree_cur_t *cur, 138 xfs_btree_cur_t *cur,
139 xfs_buf_t *b, 139 xfs_buf_t *b,
140 int i0, 140 int i0,
@@ -153,7 +153,7 @@ xfs_bmbt_trace_argbii(
153 */ 153 */
154STATIC void 154STATIC void
155xfs_bmbt_trace_argfffi( 155xfs_bmbt_trace_argfffi(
156 char *func, 156 const char *func,
157 xfs_btree_cur_t *cur, 157 xfs_btree_cur_t *cur,
158 xfs_dfiloff_t o, 158 xfs_dfiloff_t o,
159 xfs_dfsbno_t b, 159 xfs_dfsbno_t b,
@@ -172,7 +172,7 @@ xfs_bmbt_trace_argfffi(
172 */ 172 */
173STATIC void 173STATIC void
174xfs_bmbt_trace_argi( 174xfs_bmbt_trace_argi(
175 char *func, 175 const char *func,
176 xfs_btree_cur_t *cur, 176 xfs_btree_cur_t *cur,
177 int i, 177 int i,
178 int line) 178 int line)
@@ -188,7 +188,7 @@ xfs_bmbt_trace_argi(
188 */ 188 */
189STATIC void 189STATIC void
190xfs_bmbt_trace_argifk( 190xfs_bmbt_trace_argifk(
191 char *func, 191 const char *func,
192 xfs_btree_cur_t *cur, 192 xfs_btree_cur_t *cur,
193 int i, 193 int i,
194 xfs_fsblock_t f, 194 xfs_fsblock_t f,
@@ -206,7 +206,7 @@ xfs_bmbt_trace_argifk(
206 */ 206 */
207STATIC void 207STATIC void
208xfs_bmbt_trace_argifr( 208xfs_bmbt_trace_argifr(
209 char *func, 209 const char *func,
210 xfs_btree_cur_t *cur, 210 xfs_btree_cur_t *cur,
211 int i, 211 int i,
212 xfs_fsblock_t f, 212 xfs_fsblock_t f,
@@ -235,7 +235,7 @@ xfs_bmbt_trace_argifr(
235 */ 235 */
236STATIC void 236STATIC void
237xfs_bmbt_trace_argik( 237xfs_bmbt_trace_argik(
238 char *func, 238 const char *func,
239 xfs_btree_cur_t *cur, 239 xfs_btree_cur_t *cur,
240 int i, 240 int i,
241 xfs_bmbt_key_t *k, 241 xfs_bmbt_key_t *k,
@@ -255,7 +255,7 @@ xfs_bmbt_trace_argik(
255 */ 255 */
256STATIC void 256STATIC void
257xfs_bmbt_trace_cursor( 257xfs_bmbt_trace_cursor(
258 char *func, 258 const char *func,
259 xfs_btree_cur_t *cur, 259 xfs_btree_cur_t *cur,
260 char *s, 260 char *s,
261 int line) 261 int line)
@@ -274,21 +274,21 @@ xfs_bmbt_trace_cursor(
274} 274}
275 275
276#define XFS_BMBT_TRACE_ARGBI(c,b,i) \ 276#define XFS_BMBT_TRACE_ARGBI(c,b,i) \
277 xfs_bmbt_trace_argbi(fname, c, b, i, __LINE__) 277 xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__)
278#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ 278#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \
279 xfs_bmbt_trace_argbii(fname, c, b, i, j, __LINE__) 279 xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__)
280#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ 280#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \
281 xfs_bmbt_trace_argfffi(fname, c, o, b, i, j, __LINE__) 281 xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__)
282#define XFS_BMBT_TRACE_ARGI(c,i) \ 282#define XFS_BMBT_TRACE_ARGI(c,i) \
283 xfs_bmbt_trace_argi(fname, c, i, __LINE__) 283 xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__)
284#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ 284#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \
285 xfs_bmbt_trace_argifk(fname, c, i, f, s, __LINE__) 285 xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__)
286#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ 286#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \
287 xfs_bmbt_trace_argifr(fname, c, i, f, r, __LINE__) 287 xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__)
288#define XFS_BMBT_TRACE_ARGIK(c,i,k) \ 288#define XFS_BMBT_TRACE_ARGIK(c,i,k) \
289 xfs_bmbt_trace_argik(fname, c, i, k, __LINE__) 289 xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__)
290#define XFS_BMBT_TRACE_CURSOR(c,s) \ 290#define XFS_BMBT_TRACE_CURSOR(c,s) \
291 xfs_bmbt_trace_cursor(fname, c, s, __LINE__) 291 xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__)
292#else 292#else
293#define XFS_BMBT_TRACE_ARGBI(c,b,i) 293#define XFS_BMBT_TRACE_ARGBI(c,b,i)
294#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) 294#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
@@ -318,9 +318,6 @@ xfs_bmbt_delrec(
318 xfs_fsblock_t bno; /* fs-relative block number */ 318 xfs_fsblock_t bno; /* fs-relative block number */
319 xfs_buf_t *bp; /* buffer for block */ 319 xfs_buf_t *bp; /* buffer for block */
320 int error; /* error return value */ 320 int error; /* error return value */
321#ifdef XFS_BMBT_TRACE
322 static char fname[] = "xfs_bmbt_delrec";
323#endif
324 int i; /* loop counter */ 321 int i; /* loop counter */
325 int j; /* temp state */ 322 int j; /* temp state */
326 xfs_bmbt_key_t key; /* bmap btree key */ 323 xfs_bmbt_key_t key; /* bmap btree key */
@@ -694,9 +691,6 @@ xfs_bmbt_insrec(
694 xfs_bmbt_block_t *block; /* bmap btree block */ 691 xfs_bmbt_block_t *block; /* bmap btree block */
695 xfs_buf_t *bp; /* buffer for block */ 692 xfs_buf_t *bp; /* buffer for block */
696 int error; /* error return value */ 693 int error; /* error return value */
697#ifdef XFS_BMBT_TRACE
698 static char fname[] = "xfs_bmbt_insrec";
699#endif
700 int i; /* loop index */ 694 int i; /* loop index */
701 xfs_bmbt_key_t key; /* bmap btree key */ 695 xfs_bmbt_key_t key; /* bmap btree key */
702 xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */ 696 xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
@@ -881,9 +875,6 @@ xfs_bmbt_killroot(
881#ifdef DEBUG 875#ifdef DEBUG
882 int error; 876 int error;
883#endif 877#endif
884#ifdef XFS_BMBT_TRACE
885 static char fname[] = "xfs_bmbt_killroot";
886#endif
887 int i; 878 int i;
888 xfs_bmbt_key_t *kp; 879 xfs_bmbt_key_t *kp;
889 xfs_inode_t *ip; 880 xfs_inode_t *ip;
@@ -973,9 +964,6 @@ xfs_bmbt_log_keys(
973 int kfirst, 964 int kfirst,
974 int klast) 965 int klast)
975{ 966{
976#ifdef XFS_BMBT_TRACE
977 static char fname[] = "xfs_bmbt_log_keys";
978#endif
979 xfs_trans_t *tp; 967 xfs_trans_t *tp;
980 968
981 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 969 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
@@ -1012,9 +1000,6 @@ xfs_bmbt_log_ptrs(
1012 int pfirst, 1000 int pfirst,
1013 int plast) 1001 int plast)
1014{ 1002{
1015#ifdef XFS_BMBT_TRACE
1016 static char fname[] = "xfs_bmbt_log_ptrs";
1017#endif
1018 xfs_trans_t *tp; 1003 xfs_trans_t *tp;
1019 1004
1020 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 1005 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
@@ -1055,9 +1040,6 @@ xfs_bmbt_lookup(
1055 xfs_daddr_t d; 1040 xfs_daddr_t d;
1056 xfs_sfiloff_t diff; 1041 xfs_sfiloff_t diff;
1057 int error; /* error return value */ 1042 int error; /* error return value */
1058#ifdef XFS_BMBT_TRACE
1059 static char fname[] = "xfs_bmbt_lookup";
1060#endif
1061 xfs_fsblock_t fsbno=0; 1043 xfs_fsblock_t fsbno=0;
1062 int high; 1044 int high;
1063 int i; 1045 int i;
@@ -1195,9 +1177,6 @@ xfs_bmbt_lshift(
1195 int *stat) /* success/failure */ 1177 int *stat) /* success/failure */
1196{ 1178{
1197 int error; /* error return value */ 1179 int error; /* error return value */
1198#ifdef XFS_BMBT_TRACE
1199 static char fname[] = "xfs_bmbt_lshift";
1200#endif
1201#ifdef DEBUG 1180#ifdef DEBUG
1202 int i; /* loop counter */ 1181 int i; /* loop counter */
1203#endif 1182#endif
@@ -1331,9 +1310,6 @@ xfs_bmbt_rshift(
1331 int *stat) /* success/failure */ 1310 int *stat) /* success/failure */
1332{ 1311{
1333 int error; /* error return value */ 1312 int error; /* error return value */
1334#ifdef XFS_BMBT_TRACE
1335 static char fname[] = "xfs_bmbt_rshift";
1336#endif
1337 int i; /* loop counter */ 1313 int i; /* loop counter */
1338 xfs_bmbt_key_t key; /* bmap btree key */ 1314 xfs_bmbt_key_t key; /* bmap btree key */
1339 xfs_buf_t *lbp; /* left buffer pointer */ 1315 xfs_buf_t *lbp; /* left buffer pointer */
@@ -1492,9 +1468,6 @@ xfs_bmbt_split(
1492{ 1468{
1493 xfs_alloc_arg_t args; /* block allocation args */ 1469 xfs_alloc_arg_t args; /* block allocation args */
1494 int error; /* error return value */ 1470 int error; /* error return value */
1495#ifdef XFS_BMBT_TRACE
1496 static char fname[] = "xfs_bmbt_split";
1497#endif
1498 int i; /* loop counter */ 1471 int i; /* loop counter */
1499 xfs_fsblock_t lbno; /* left sibling block number */ 1472 xfs_fsblock_t lbno; /* left sibling block number */
1500 xfs_buf_t *lbp; /* left buffer pointer */ 1473 xfs_buf_t *lbp; /* left buffer pointer */
@@ -1641,9 +1614,6 @@ xfs_bmbt_updkey(
1641#ifdef DEBUG 1614#ifdef DEBUG
1642 int error; 1615 int error;
1643#endif 1616#endif
1644#ifdef XFS_BMBT_TRACE
1645 static char fname[] = "xfs_bmbt_updkey";
1646#endif
1647 xfs_bmbt_key_t *kp; 1617 xfs_bmbt_key_t *kp;
1648 int ptr; 1618 int ptr;
1649 1619
@@ -1712,9 +1682,6 @@ xfs_bmbt_decrement(
1712 xfs_bmbt_block_t *block; 1682 xfs_bmbt_block_t *block;
1713 xfs_buf_t *bp; 1683 xfs_buf_t *bp;
1714 int error; /* error return value */ 1684 int error; /* error return value */
1715#ifdef XFS_BMBT_TRACE
1716 static char fname[] = "xfs_bmbt_decrement";
1717#endif
1718 xfs_fsblock_t fsbno; 1685 xfs_fsblock_t fsbno;
1719 int lev; 1686 int lev;
1720 xfs_mount_t *mp; 1687 xfs_mount_t *mp;
@@ -1785,9 +1752,6 @@ xfs_bmbt_delete(
1785 int *stat) /* success/failure */ 1752 int *stat) /* success/failure */
1786{ 1753{
1787 int error; /* error return value */ 1754 int error; /* error return value */
1788#ifdef XFS_BMBT_TRACE
1789 static char fname[] = "xfs_bmbt_delete";
1790#endif
1791 int i; 1755 int i;
1792 int level; 1756 int level;
1793 1757
@@ -2000,9 +1964,6 @@ xfs_bmbt_increment(
2000 xfs_bmbt_block_t *block; 1964 xfs_bmbt_block_t *block;
2001 xfs_buf_t *bp; 1965 xfs_buf_t *bp;
2002 int error; /* error return value */ 1966 int error; /* error return value */
2003#ifdef XFS_BMBT_TRACE
2004 static char fname[] = "xfs_bmbt_increment";
2005#endif
2006 xfs_fsblock_t fsbno; 1967 xfs_fsblock_t fsbno;
2007 int lev; 1968 int lev;
2008 xfs_mount_t *mp; 1969 xfs_mount_t *mp;
@@ -2080,9 +2041,6 @@ xfs_bmbt_insert(
2080 int *stat) /* success/failure */ 2041 int *stat) /* success/failure */
2081{ 2042{
2082 int error; /* error return value */ 2043 int error; /* error return value */
2083#ifdef XFS_BMBT_TRACE
2084 static char fname[] = "xfs_bmbt_insert";
2085#endif
2086 int i; 2044 int i;
2087 int level; 2045 int level;
2088 xfs_fsblock_t nbno; 2046 xfs_fsblock_t nbno;
@@ -2142,9 +2100,6 @@ xfs_bmbt_log_block(
2142 int fields) 2100 int fields)
2143{ 2101{
2144 int first; 2102 int first;
2145#ifdef XFS_BMBT_TRACE
2146 static char fname[] = "xfs_bmbt_log_block";
2147#endif
2148 int last; 2103 int last;
2149 xfs_trans_t *tp; 2104 xfs_trans_t *tp;
2150 static const short offsets[] = { 2105 static const short offsets[] = {
@@ -2181,9 +2136,6 @@ xfs_bmbt_log_recs(
2181{ 2136{
2182 xfs_bmbt_block_t *block; 2137 xfs_bmbt_block_t *block;
2183 int first; 2138 int first;
2184#ifdef XFS_BMBT_TRACE
2185 static char fname[] = "xfs_bmbt_log_recs";
2186#endif
2187 int last; 2139 int last;
2188 xfs_bmbt_rec_t *rp; 2140 xfs_bmbt_rec_t *rp;
2189 xfs_trans_t *tp; 2141 xfs_trans_t *tp;
@@ -2245,9 +2197,6 @@ xfs_bmbt_newroot(
2245 xfs_bmbt_key_t *ckp; /* child key pointer */ 2197 xfs_bmbt_key_t *ckp; /* child key pointer */
2246 xfs_bmbt_ptr_t *cpp; /* child ptr pointer */ 2198 xfs_bmbt_ptr_t *cpp; /* child ptr pointer */
2247 int error; /* error return code */ 2199 int error; /* error return code */
2248#ifdef XFS_BMBT_TRACE
2249 static char fname[] = "xfs_bmbt_newroot";
2250#endif
2251#ifdef DEBUG 2200#ifdef DEBUG
2252 int i; /* loop counter */ 2201 int i; /* loop counter */
2253#endif 2202#endif
@@ -2630,9 +2579,6 @@ xfs_bmbt_update(
2630 xfs_bmbt_block_t *block; 2579 xfs_bmbt_block_t *block;
2631 xfs_buf_t *bp; 2580 xfs_buf_t *bp;
2632 int error; 2581 int error;
2633#ifdef XFS_BMBT_TRACE
2634 static char fname[] = "xfs_bmbt_update";
2635#endif
2636 xfs_bmbt_key_t key; 2582 xfs_bmbt_key_t key;
2637 int ptr; 2583 int ptr;
2638 xfs_bmbt_rec_t *rp; 2584 xfs_bmbt_rec_t *rp;
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 4e27d55a1e73..6e40a0a198ff 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -444,30 +444,14 @@ xfs_btree_setbuf(
444/* 444/*
445 * Min and max functions for extlen, agblock, fileoff, and filblks types. 445 * Min and max functions for extlen, agblock, fileoff, and filblks types.
446 */ 446 */
447#define XFS_EXTLEN_MIN(a,b) \ 447#define XFS_EXTLEN_MIN(a,b) min_t(xfs_extlen_t, (a), (b))
448 ((xfs_extlen_t)(a) < (xfs_extlen_t)(b) ? \ 448#define XFS_EXTLEN_MAX(a,b) max_t(xfs_extlen_t, (a), (b))
449 (xfs_extlen_t)(a) : (xfs_extlen_t)(b)) 449#define XFS_AGBLOCK_MIN(a,b) min_t(xfs_agblock_t, (a), (b))
450#define XFS_EXTLEN_MAX(a,b) \ 450#define XFS_AGBLOCK_MAX(a,b) max_t(xfs_agblock_t, (a), (b))
451 ((xfs_extlen_t)(a) > (xfs_extlen_t)(b) ? \ 451#define XFS_FILEOFF_MIN(a,b) min_t(xfs_fileoff_t, (a), (b))
452 (xfs_extlen_t)(a) : (xfs_extlen_t)(b)) 452#define XFS_FILEOFF_MAX(a,b) max_t(xfs_fileoff_t, (a), (b))
453#define XFS_AGBLOCK_MIN(a,b) \ 453#define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b))
454 ((xfs_agblock_t)(a) < (xfs_agblock_t)(b) ? \ 454#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
455 (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
456#define XFS_AGBLOCK_MAX(a,b) \
457 ((xfs_agblock_t)(a) > (xfs_agblock_t)(b) ? \
458 (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
459#define XFS_FILEOFF_MIN(a,b) \
460 ((xfs_fileoff_t)(a) < (xfs_fileoff_t)(b) ? \
461 (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
462#define XFS_FILEOFF_MAX(a,b) \
463 ((xfs_fileoff_t)(a) > (xfs_fileoff_t)(b) ? \
464 (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
465#define XFS_FILBLKS_MIN(a,b) \
466 ((xfs_filblks_t)(a) < (xfs_filblks_t)(b) ? \
467 (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
468#define XFS_FILBLKS_MAX(a,b) \
469 ((xfs_filblks_t)(a) > (xfs_filblks_t)(b) ? \
470 (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
471 455
472#define XFS_FSB_SANITY_CHECK(mp,fsb) \ 456#define XFS_FSB_SANITY_CHECK(mp,fsb) \
473 (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ 457 (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 6c1bddc04e31..b0667cb27d66 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -580,8 +580,8 @@ xfs_buf_item_unlock(
580 * If the buf item isn't tracking any data, free it. 580 * If the buf item isn't tracking any data, free it.
581 * Otherwise, if XFS_BLI_HOLD is set clear it. 581 * Otherwise, if XFS_BLI_HOLD is set clear it.
582 */ 582 */
583 if (xfs_count_bits(bip->bli_format.blf_data_map, 583 if (xfs_bitmap_empty(bip->bli_format.blf_data_map,
584 bip->bli_format.blf_map_size, 0) == 0) { 584 bip->bli_format.blf_map_size)) {
585 xfs_buf_item_relse(bp); 585 xfs_buf_item_relse(bp);
586 } else if (hold) { 586 } else if (hold) {
587 bip->bli_flags &= ~XFS_BLI_HOLD; 587 bip->bli_flags &= ~XFS_BLI_HOLD;
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 5b7eb81453be..f89196cb08d2 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -99,5 +99,7 @@ struct xfs_mount_args {
99 */ 99 */
100#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred 100#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred
101 * I/O size in stat(2) */ 101 * I/O size in stat(2) */
102#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams
103 * allocator */
102 104
103#endif /* __XFS_CLNT_H__ */ 105#endif /* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index b33826961c45..fefd0116bac9 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -257,6 +257,7 @@ typedef enum xfs_dinode_fmt
257#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ 257#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */
258#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ 258#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
259#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ 259#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */
260#define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */
260#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) 261#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
261#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) 262#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
262#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) 263#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -271,12 +272,13 @@ typedef enum xfs_dinode_fmt
271#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) 272#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT)
272#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) 273#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
273#define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) 274#define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT)
275#define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT)
274 276
275#define XFS_DIFLAG_ANY \ 277#define XFS_DIFLAG_ANY \
276 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ 278 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
277 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ 279 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
278 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ 280 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
279 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ 281 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
280 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG) 282 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
281 283
282#endif /* __XFS_DINODE_H__ */ 284#endif /* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 8e8e5279334a..29e091914df4 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -55,9 +55,9 @@ xfs_dir_mount(
55 XFS_MAX_BLOCKSIZE); 55 XFS_MAX_BLOCKSIZE);
56 mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); 56 mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
57 mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog; 57 mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog;
58 mp->m_dirdatablk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp)); 58 mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp));
59 mp->m_dirleafblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); 59 mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
60 mp->m_dirfreeblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp)); 60 mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp));
61 mp->m_attr_node_ents = 61 mp->m_attr_node_ents =
62 (mp->m_sb.sb_blocksize - (uint)sizeof(xfs_da_node_hdr_t)) / 62 (mp->m_sb.sb_blocksize - (uint)sizeof(xfs_da_node_hdr_t)) /
63 (uint)sizeof(xfs_da_node_entry_t); 63 (uint)sizeof(xfs_da_node_entry_t);
@@ -554,7 +554,7 @@ xfs_dir2_grow_inode(
554 */ 554 */
555 if (mapp != &map) 555 if (mapp != &map)
556 kmem_free(mapp, sizeof(*mapp) * count); 556 kmem_free(mapp, sizeof(*mapp) * count);
557 *dbp = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)bno); 557 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
558 /* 558 /*
559 * Update file's size if this is the data space and it grew. 559 * Update file's size if this is the data space and it grew.
560 */ 560 */
@@ -706,7 +706,7 @@ xfs_dir2_shrink_inode(
706 dp = args->dp; 706 dp = args->dp;
707 mp = dp->i_mount; 707 mp = dp->i_mount;
708 tp = args->trans; 708 tp = args->trans;
709 da = XFS_DIR2_DB_TO_DA(mp, db); 709 da = xfs_dir2_db_to_da(mp, db);
710 /* 710 /*
711 * Unmap the fsblock(s). 711 * Unmap the fsblock(s).
712 */ 712 */
@@ -742,7 +742,7 @@ xfs_dir2_shrink_inode(
742 /* 742 /*
743 * If the block isn't the last one in the directory, we're done. 743 * If the block isn't the last one in the directory, we're done.
744 */ 744 */
745 if (dp->i_d.di_size > XFS_DIR2_DB_OFF_TO_BYTE(mp, db + 1, 0)) 745 if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0))
746 return 0; 746 return 0;
747 bno = da; 747 bno = da;
748 if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) { 748 if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 3accc1dcd6c9..e4df1aaae2a2 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -115,13 +115,13 @@ xfs_dir2_block_addname(
115 xfs_da_brelse(tp, bp); 115 xfs_da_brelse(tp, bp);
116 return XFS_ERROR(EFSCORRUPTED); 116 return XFS_ERROR(EFSCORRUPTED);
117 } 117 }
118 len = XFS_DIR2_DATA_ENTSIZE(args->namelen); 118 len = xfs_dir2_data_entsize(args->namelen);
119 /* 119 /*
120 * Set up pointers to parts of the block. 120 * Set up pointers to parts of the block.
121 */ 121 */
122 bf = block->hdr.bestfree; 122 bf = block->hdr.bestfree;
123 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 123 btp = xfs_dir2_block_tail_p(mp, block);
124 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 124 blp = xfs_dir2_block_leaf_p(btp);
125 /* 125 /*
126 * No stale entries? Need space for entry and new leaf. 126 * No stale entries? Need space for entry and new leaf.
127 */ 127 */
@@ -396,7 +396,7 @@ xfs_dir2_block_addname(
396 * Fill in the leaf entry. 396 * Fill in the leaf entry.
397 */ 397 */
398 blp[mid].hashval = cpu_to_be32(args->hashval); 398 blp[mid].hashval = cpu_to_be32(args->hashval);
399 blp[mid].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, 399 blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
400 (char *)dep - (char *)block)); 400 (char *)dep - (char *)block));
401 xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); 401 xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
402 /* 402 /*
@@ -411,7 +411,7 @@ xfs_dir2_block_addname(
411 dep->inumber = cpu_to_be64(args->inumber); 411 dep->inumber = cpu_to_be64(args->inumber);
412 dep->namelen = args->namelen; 412 dep->namelen = args->namelen;
413 memcpy(dep->name, args->name, args->namelen); 413 memcpy(dep->name, args->name, args->namelen);
414 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 414 tagp = xfs_dir2_data_entry_tag_p(dep);
415 *tagp = cpu_to_be16((char *)dep - (char *)block); 415 *tagp = cpu_to_be16((char *)dep - (char *)block);
416 /* 416 /*
417 * Clean up the bestfree array and log the header, tail, and entry. 417 * Clean up the bestfree array and log the header, tail, and entry.
@@ -455,7 +455,7 @@ xfs_dir2_block_getdents(
455 /* 455 /*
456 * If the block number in the offset is out of range, we're done. 456 * If the block number in the offset is out of range, we're done.
457 */ 457 */
458 if (XFS_DIR2_DATAPTR_TO_DB(mp, uio->uio_offset) > mp->m_dirdatablk) { 458 if (xfs_dir2_dataptr_to_db(mp, uio->uio_offset) > mp->m_dirdatablk) {
459 *eofp = 1; 459 *eofp = 1;
460 return 0; 460 return 0;
461 } 461 }
@@ -471,15 +471,15 @@ xfs_dir2_block_getdents(
471 * Extract the byte offset we start at from the seek pointer. 471 * Extract the byte offset we start at from the seek pointer.
472 * We'll skip entries before this. 472 * We'll skip entries before this.
473 */ 473 */
474 wantoff = XFS_DIR2_DATAPTR_TO_OFF(mp, uio->uio_offset); 474 wantoff = xfs_dir2_dataptr_to_off(mp, uio->uio_offset);
475 block = bp->data; 475 block = bp->data;
476 xfs_dir2_data_check(dp, bp); 476 xfs_dir2_data_check(dp, bp);
477 /* 477 /*
478 * Set up values for the loop. 478 * Set up values for the loop.
479 */ 479 */
480 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 480 btp = xfs_dir2_block_tail_p(mp, block);
481 ptr = (char *)block->u; 481 ptr = (char *)block->u;
482 endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); 482 endptr = (char *)xfs_dir2_block_leaf_p(btp);
483 p.dbp = dbp; 483 p.dbp = dbp;
484 p.put = put; 484 p.put = put;
485 p.uio = uio; 485 p.uio = uio;
@@ -502,7 +502,7 @@ xfs_dir2_block_getdents(
502 /* 502 /*
503 * Bump pointer for the next iteration. 503 * Bump pointer for the next iteration.
504 */ 504 */
505 ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); 505 ptr += xfs_dir2_data_entsize(dep->namelen);
506 /* 506 /*
507 * The entry is before the desired starting point, skip it. 507 * The entry is before the desired starting point, skip it.
508 */ 508 */
@@ -513,7 +513,7 @@ xfs_dir2_block_getdents(
513 */ 513 */
514 p.namelen = dep->namelen; 514 p.namelen = dep->namelen;
515 515
516 p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 516 p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
517 ptr - (char *)block); 517 ptr - (char *)block);
518 p.ino = be64_to_cpu(dep->inumber); 518 p.ino = be64_to_cpu(dep->inumber);
519#if XFS_BIG_INUMS 519#if XFS_BIG_INUMS
@@ -531,7 +531,7 @@ xfs_dir2_block_getdents(
531 */ 531 */
532 if (!p.done) { 532 if (!p.done) {
533 uio->uio_offset = 533 uio->uio_offset =
534 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 534 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
535 (char *)dep - (char *)block); 535 (char *)dep - (char *)block);
536 xfs_da_brelse(tp, bp); 536 xfs_da_brelse(tp, bp);
537 return error; 537 return error;
@@ -545,7 +545,7 @@ xfs_dir2_block_getdents(
545 *eofp = 1; 545 *eofp = 1;
546 546
547 uio->uio_offset = 547 uio->uio_offset =
548 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0); 548 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0);
549 549
550 xfs_da_brelse(tp, bp); 550 xfs_da_brelse(tp, bp);
551 551
@@ -569,8 +569,8 @@ xfs_dir2_block_log_leaf(
569 569
570 mp = tp->t_mountp; 570 mp = tp->t_mountp;
571 block = bp->data; 571 block = bp->data;
572 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 572 btp = xfs_dir2_block_tail_p(mp, block);
573 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 573 blp = xfs_dir2_block_leaf_p(btp);
574 xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block), 574 xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
575 (uint)((char *)&blp[last + 1] - (char *)block - 1)); 575 (uint)((char *)&blp[last + 1] - (char *)block - 1));
576} 576}
@@ -589,7 +589,7 @@ xfs_dir2_block_log_tail(
589 589
590 mp = tp->t_mountp; 590 mp = tp->t_mountp;
591 block = bp->data; 591 block = bp->data;
592 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 592 btp = xfs_dir2_block_tail_p(mp, block);
593 xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block), 593 xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
594 (uint)((char *)(btp + 1) - (char *)block - 1)); 594 (uint)((char *)(btp + 1) - (char *)block - 1));
595} 595}
@@ -623,13 +623,13 @@ xfs_dir2_block_lookup(
623 mp = dp->i_mount; 623 mp = dp->i_mount;
624 block = bp->data; 624 block = bp->data;
625 xfs_dir2_data_check(dp, bp); 625 xfs_dir2_data_check(dp, bp);
626 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 626 btp = xfs_dir2_block_tail_p(mp, block);
627 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 627 blp = xfs_dir2_block_leaf_p(btp);
628 /* 628 /*
629 * Get the offset from the leaf entry, to point to the data. 629 * Get the offset from the leaf entry, to point to the data.
630 */ 630 */
631 dep = (xfs_dir2_data_entry_t *) 631 dep = (xfs_dir2_data_entry_t *)
632 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); 632 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
633 /* 633 /*
634 * Fill in inode number, release the block. 634 * Fill in inode number, release the block.
635 */ 635 */
@@ -675,8 +675,8 @@ xfs_dir2_block_lookup_int(
675 ASSERT(bp != NULL); 675 ASSERT(bp != NULL);
676 block = bp->data; 676 block = bp->data;
677 xfs_dir2_data_check(dp, bp); 677 xfs_dir2_data_check(dp, bp);
678 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 678 btp = xfs_dir2_block_tail_p(mp, block);
679 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 679 blp = xfs_dir2_block_leaf_p(btp);
680 /* 680 /*
681 * Loop doing a binary search for our hash value. 681 * Loop doing a binary search for our hash value.
682 * Find our entry, ENOENT if it's not there. 682 * Find our entry, ENOENT if it's not there.
@@ -713,7 +713,7 @@ xfs_dir2_block_lookup_int(
713 * Get pointer to the entry from the leaf. 713 * Get pointer to the entry from the leaf.
714 */ 714 */
715 dep = (xfs_dir2_data_entry_t *) 715 dep = (xfs_dir2_data_entry_t *)
716 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr)); 716 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
717 /* 717 /*
718 * Compare, if it's right give back buffer & entry number. 718 * Compare, if it's right give back buffer & entry number.
719 */ 719 */
@@ -768,20 +768,20 @@ xfs_dir2_block_removename(
768 tp = args->trans; 768 tp = args->trans;
769 mp = dp->i_mount; 769 mp = dp->i_mount;
770 block = bp->data; 770 block = bp->data;
771 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 771 btp = xfs_dir2_block_tail_p(mp, block);
772 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 772 blp = xfs_dir2_block_leaf_p(btp);
773 /* 773 /*
774 * Point to the data entry using the leaf entry. 774 * Point to the data entry using the leaf entry.
775 */ 775 */
776 dep = (xfs_dir2_data_entry_t *) 776 dep = (xfs_dir2_data_entry_t *)
777 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); 777 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
778 /* 778 /*
779 * Mark the data entry's space free. 779 * Mark the data entry's space free.
780 */ 780 */
781 needlog = needscan = 0; 781 needlog = needscan = 0;
782 xfs_dir2_data_make_free(tp, bp, 782 xfs_dir2_data_make_free(tp, bp,
783 (xfs_dir2_data_aoff_t)((char *)dep - (char *)block), 783 (xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
784 XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); 784 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
785 /* 785 /*
786 * Fix up the block tail. 786 * Fix up the block tail.
787 */ 787 */
@@ -843,13 +843,13 @@ xfs_dir2_block_replace(
843 dp = args->dp; 843 dp = args->dp;
844 mp = dp->i_mount; 844 mp = dp->i_mount;
845 block = bp->data; 845 block = bp->data;
846 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 846 btp = xfs_dir2_block_tail_p(mp, block);
847 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 847 blp = xfs_dir2_block_leaf_p(btp);
848 /* 848 /*
849 * Point to the data entry we need to change. 849 * Point to the data entry we need to change.
850 */ 850 */
851 dep = (xfs_dir2_data_entry_t *) 851 dep = (xfs_dir2_data_entry_t *)
852 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); 852 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
853 ASSERT(be64_to_cpu(dep->inumber) != args->inumber); 853 ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
854 /* 854 /*
855 * Change the inode number to the new value. 855 * Change the inode number to the new value.
@@ -912,7 +912,7 @@ xfs_dir2_leaf_to_block(
912 mp = dp->i_mount; 912 mp = dp->i_mount;
913 leaf = lbp->data; 913 leaf = lbp->data;
914 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); 914 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
915 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 915 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
916 /* 916 /*
917 * If there are data blocks other than the first one, take this 917 * If there are data blocks other than the first one, take this
918 * opportunity to remove trailing empty data blocks that may have 918 * opportunity to remove trailing empty data blocks that may have
@@ -920,7 +920,7 @@ xfs_dir2_leaf_to_block(
920 * These will show up in the leaf bests table. 920 * These will show up in the leaf bests table.
921 */ 921 */
922 while (dp->i_d.di_size > mp->m_dirblksize) { 922 while (dp->i_d.di_size > mp->m_dirblksize) {
923 bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); 923 bestsp = xfs_dir2_leaf_bests_p(ltp);
924 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == 924 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
925 mp->m_dirblksize - (uint)sizeof(block->hdr)) { 925 mp->m_dirblksize - (uint)sizeof(block->hdr)) {
926 if ((error = 926 if ((error =
@@ -974,14 +974,14 @@ xfs_dir2_leaf_to_block(
974 /* 974 /*
975 * Initialize the block tail. 975 * Initialize the block tail.
976 */ 976 */
977 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 977 btp = xfs_dir2_block_tail_p(mp, block);
978 btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); 978 btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
979 btp->stale = 0; 979 btp->stale = 0;
980 xfs_dir2_block_log_tail(tp, dbp); 980 xfs_dir2_block_log_tail(tp, dbp);
981 /* 981 /*
982 * Initialize the block leaf area. We compact out stale entries. 982 * Initialize the block leaf area. We compact out stale entries.
983 */ 983 */
984 lep = XFS_DIR2_BLOCK_LEAF_P(btp); 984 lep = xfs_dir2_block_leaf_p(btp);
985 for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { 985 for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
986 if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) 986 if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
987 continue; 987 continue;
@@ -1067,7 +1067,7 @@ xfs_dir2_sf_to_block(
1067 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 1067 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
1068 ASSERT(dp->i_df.if_u1.if_data != NULL); 1068 ASSERT(dp->i_df.if_u1.if_data != NULL);
1069 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1069 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
1070 ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 1070 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
1071 /* 1071 /*
1072 * Copy the directory into the stack buffer. 1072 * Copy the directory into the stack buffer.
1073 * Then pitch the incore inode data so we can make extents. 1073 * Then pitch the incore inode data so we can make extents.
@@ -1119,10 +1119,10 @@ xfs_dir2_sf_to_block(
1119 /* 1119 /*
1120 * Fill in the tail. 1120 * Fill in the tail.
1121 */ 1121 */
1122 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 1122 btp = xfs_dir2_block_tail_p(mp, block);
1123 btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */ 1123 btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */
1124 btp->stale = 0; 1124 btp->stale = 0;
1125 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 1125 blp = xfs_dir2_block_leaf_p(btp);
1126 endoffset = (uint)((char *)blp - (char *)block); 1126 endoffset = (uint)((char *)blp - (char *)block);
1127 /* 1127 /*
1128 * Remove the freespace, we'll manage it. 1128 * Remove the freespace, we'll manage it.
@@ -1138,25 +1138,25 @@ xfs_dir2_sf_to_block(
1138 dep->inumber = cpu_to_be64(dp->i_ino); 1138 dep->inumber = cpu_to_be64(dp->i_ino);
1139 dep->namelen = 1; 1139 dep->namelen = 1;
1140 dep->name[0] = '.'; 1140 dep->name[0] = '.';
1141 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1141 tagp = xfs_dir2_data_entry_tag_p(dep);
1142 *tagp = cpu_to_be16((char *)dep - (char *)block); 1142 *tagp = cpu_to_be16((char *)dep - (char *)block);
1143 xfs_dir2_data_log_entry(tp, bp, dep); 1143 xfs_dir2_data_log_entry(tp, bp, dep);
1144 blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); 1144 blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
1145 blp[0].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, 1145 blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1146 (char *)dep - (char *)block)); 1146 (char *)dep - (char *)block));
1147 /* 1147 /*
1148 * Create entry for .. 1148 * Create entry for ..
1149 */ 1149 */
1150 dep = (xfs_dir2_data_entry_t *) 1150 dep = (xfs_dir2_data_entry_t *)
1151 ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); 1151 ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
1152 dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); 1152 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
1153 dep->namelen = 2; 1153 dep->namelen = 2;
1154 dep->name[0] = dep->name[1] = '.'; 1154 dep->name[0] = dep->name[1] = '.';
1155 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1155 tagp = xfs_dir2_data_entry_tag_p(dep);
1156 *tagp = cpu_to_be16((char *)dep - (char *)block); 1156 *tagp = cpu_to_be16((char *)dep - (char *)block);
1157 xfs_dir2_data_log_entry(tp, bp, dep); 1157 xfs_dir2_data_log_entry(tp, bp, dep);
1158 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); 1158 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
1159 blp[1].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, 1159 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1160 (char *)dep - (char *)block)); 1160 (char *)dep - (char *)block));
1161 offset = XFS_DIR2_DATA_FIRST_OFFSET; 1161 offset = XFS_DIR2_DATA_FIRST_OFFSET;
1162 /* 1162 /*
@@ -1165,7 +1165,7 @@ xfs_dir2_sf_to_block(
1165 if ((i = 0) == sfp->hdr.count) 1165 if ((i = 0) == sfp->hdr.count)
1166 sfep = NULL; 1166 sfep = NULL;
1167 else 1167 else
1168 sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 1168 sfep = xfs_dir2_sf_firstentry(sfp);
1169 /* 1169 /*
1170 * Need to preserve the existing offset values in the sf directory. 1170 * Need to preserve the existing offset values in the sf directory.
1171 * Insert holes (unused entries) where necessary. 1171 * Insert holes (unused entries) where necessary.
@@ -1177,7 +1177,7 @@ xfs_dir2_sf_to_block(
1177 if (sfep == NULL) 1177 if (sfep == NULL)
1178 newoffset = endoffset; 1178 newoffset = endoffset;
1179 else 1179 else
1180 newoffset = XFS_DIR2_SF_GET_OFFSET(sfep); 1180 newoffset = xfs_dir2_sf_get_offset(sfep);
1181 /* 1181 /*
1182 * There should be a hole here, make one. 1182 * There should be a hole here, make one.
1183 */ 1183 */
@@ -1186,7 +1186,7 @@ xfs_dir2_sf_to_block(
1186 ((char *)block + offset); 1186 ((char *)block + offset);
1187 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 1187 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
1188 dup->length = cpu_to_be16(newoffset - offset); 1188 dup->length = cpu_to_be16(newoffset - offset);
1189 *XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16( 1189 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
1190 ((char *)dup - (char *)block)); 1190 ((char *)dup - (char *)block));
1191 xfs_dir2_data_log_unused(tp, bp, dup); 1191 xfs_dir2_data_log_unused(tp, bp, dup);
1192 (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block, 1192 (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
@@ -1198,22 +1198,22 @@ xfs_dir2_sf_to_block(
1198 * Copy a real entry. 1198 * Copy a real entry.
1199 */ 1199 */
1200 dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); 1200 dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
1201 dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, 1201 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
1202 XFS_DIR2_SF_INUMBERP(sfep))); 1202 xfs_dir2_sf_inumberp(sfep)));
1203 dep->namelen = sfep->namelen; 1203 dep->namelen = sfep->namelen;
1204 memcpy(dep->name, sfep->name, dep->namelen); 1204 memcpy(dep->name, sfep->name, dep->namelen);
1205 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1205 tagp = xfs_dir2_data_entry_tag_p(dep);
1206 *tagp = cpu_to_be16((char *)dep - (char *)block); 1206 *tagp = cpu_to_be16((char *)dep - (char *)block);
1207 xfs_dir2_data_log_entry(tp, bp, dep); 1207 xfs_dir2_data_log_entry(tp, bp, dep);
1208 blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname( 1208 blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
1209 (char *)sfep->name, sfep->namelen)); 1209 (char *)sfep->name, sfep->namelen));
1210 blp[2 + i].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, 1210 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1211 (char *)dep - (char *)block)); 1211 (char *)dep - (char *)block));
1212 offset = (int)((char *)(tagp + 1) - (char *)block); 1212 offset = (int)((char *)(tagp + 1) - (char *)block);
1213 if (++i == sfp->hdr.count) 1213 if (++i == sfp->hdr.count)
1214 sfep = NULL; 1214 sfep = NULL;
1215 else 1215 else
1216 sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); 1216 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
1217 } 1217 }
1218 /* Done with the temporary buffer */ 1218 /* Done with the temporary buffer */
1219 kmem_free(buf, buf_len); 1219 kmem_free(buf, buf_len);
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
index 6722effd0b20..e7c2606161e9 100644
--- a/fs/xfs/xfs_dir2_block.h
+++ b/fs/xfs/xfs_dir2_block.h
@@ -60,7 +60,6 @@ typedef struct xfs_dir2_block {
60/* 60/*
61 * Pointer to the leaf header embedded in a data block (1-block format) 61 * Pointer to the leaf header embedded in a data block (1-block format)
62 */ 62 */
63#define XFS_DIR2_BLOCK_TAIL_P(mp,block) xfs_dir2_block_tail_p(mp,block)
64static inline xfs_dir2_block_tail_t * 63static inline xfs_dir2_block_tail_t *
65xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block) 64xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
66{ 65{
@@ -71,7 +70,6 @@ xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
71/* 70/*
72 * Pointer to the leaf entries embedded in a data block (1-block format) 71 * Pointer to the leaf entries embedded in a data block (1-block format)
73 */ 72 */
74#define XFS_DIR2_BLOCK_LEAF_P(btp) xfs_dir2_block_leaf_p(btp)
75static inline struct xfs_dir2_leaf_entry * 73static inline struct xfs_dir2_leaf_entry *
76xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp) 74xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
77{ 75{
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index c211c37ef67c..7ebe295bd6d3 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -72,8 +72,8 @@ xfs_dir2_data_check(
72 bf = d->hdr.bestfree; 72 bf = d->hdr.bestfree;
73 p = (char *)d->u; 73 p = (char *)d->u;
74 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { 74 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
75 btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); 75 btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
76 lep = XFS_DIR2_BLOCK_LEAF_P(btp); 76 lep = xfs_dir2_block_leaf_p(btp);
77 endp = (char *)lep; 77 endp = (char *)lep;
78 } else 78 } else
79 endp = (char *)d + mp->m_dirblksize; 79 endp = (char *)d + mp->m_dirblksize;
@@ -107,7 +107,7 @@ xfs_dir2_data_check(
107 */ 107 */
108 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 108 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
109 ASSERT(lastfree == 0); 109 ASSERT(lastfree == 0);
110 ASSERT(be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)) == 110 ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
111 (char *)dup - (char *)d); 111 (char *)dup - (char *)d);
112 dfp = xfs_dir2_data_freefind(d, dup); 112 dfp = xfs_dir2_data_freefind(d, dup);
113 if (dfp) { 113 if (dfp) {
@@ -131,12 +131,12 @@ xfs_dir2_data_check(
131 dep = (xfs_dir2_data_entry_t *)p; 131 dep = (xfs_dir2_data_entry_t *)p;
132 ASSERT(dep->namelen != 0); 132 ASSERT(dep->namelen != 0);
133 ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0); 133 ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
134 ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) == 134 ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
135 (char *)dep - (char *)d); 135 (char *)dep - (char *)d);
136 count++; 136 count++;
137 lastfree = 0; 137 lastfree = 0;
138 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { 138 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
139 addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 139 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
140 (xfs_dir2_data_aoff_t) 140 (xfs_dir2_data_aoff_t)
141 ((char *)dep - (char *)d)); 141 ((char *)dep - (char *)d));
142 hash = xfs_da_hashname((char *)dep->name, dep->namelen); 142 hash = xfs_da_hashname((char *)dep->name, dep->namelen);
@@ -147,7 +147,7 @@ xfs_dir2_data_check(
147 } 147 }
148 ASSERT(i < be32_to_cpu(btp->count)); 148 ASSERT(i < be32_to_cpu(btp->count));
149 } 149 }
150 p += XFS_DIR2_DATA_ENTSIZE(dep->namelen); 150 p += xfs_dir2_data_entsize(dep->namelen);
151 } 151 }
152 /* 152 /*
153 * Need to have seen all the entries and all the bestfree slots. 153 * Need to have seen all the entries and all the bestfree slots.
@@ -346,8 +346,8 @@ xfs_dir2_data_freescan(
346 */ 346 */
347 p = (char *)d->u; 347 p = (char *)d->u;
348 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { 348 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
349 btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); 349 btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
350 endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); 350 endp = (char *)xfs_dir2_block_leaf_p(btp);
351 } else 351 } else
352 endp = (char *)d + mp->m_dirblksize; 352 endp = (char *)d + mp->m_dirblksize;
353 /* 353 /*
@@ -360,7 +360,7 @@ xfs_dir2_data_freescan(
360 */ 360 */
361 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 361 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
362 ASSERT((char *)dup - (char *)d == 362 ASSERT((char *)dup - (char *)d ==
363 be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup))); 363 be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
364 xfs_dir2_data_freeinsert(d, dup, loghead); 364 xfs_dir2_data_freeinsert(d, dup, loghead);
365 p += be16_to_cpu(dup->length); 365 p += be16_to_cpu(dup->length);
366 } 366 }
@@ -370,8 +370,8 @@ xfs_dir2_data_freescan(
370 else { 370 else {
371 dep = (xfs_dir2_data_entry_t *)p; 371 dep = (xfs_dir2_data_entry_t *)p;
372 ASSERT((char *)dep - (char *)d == 372 ASSERT((char *)dep - (char *)d ==
373 be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep))); 373 be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
374 p += XFS_DIR2_DATA_ENTSIZE(dep->namelen); 374 p += xfs_dir2_data_entsize(dep->namelen);
375 } 375 }
376 } 376 }
377} 377}
@@ -402,7 +402,7 @@ xfs_dir2_data_init(
402 /* 402 /*
403 * Get the buffer set up for the block. 403 * Get the buffer set up for the block.
404 */ 404 */
405 error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, blkno), -1, &bp, 405 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp,
406 XFS_DATA_FORK); 406 XFS_DATA_FORK);
407 if (error) { 407 if (error) {
408 return error; 408 return error;
@@ -427,7 +427,7 @@ xfs_dir2_data_init(
427 t=mp->m_dirblksize - (uint)sizeof(d->hdr); 427 t=mp->m_dirblksize - (uint)sizeof(d->hdr);
428 d->hdr.bestfree[0].length = cpu_to_be16(t); 428 d->hdr.bestfree[0].length = cpu_to_be16(t);
429 dup->length = cpu_to_be16(t); 429 dup->length = cpu_to_be16(t);
430 *XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16((char *)dup - (char *)d); 430 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d);
431 /* 431 /*
432 * Log it and return it. 432 * Log it and return it.
433 */ 433 */
@@ -452,7 +452,7 @@ xfs_dir2_data_log_entry(
452 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || 452 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
453 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 453 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
454 xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d), 454 xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
455 (uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) - 455 (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
456 (char *)d - 1)); 456 (char *)d - 1));
457} 457}
458 458
@@ -497,8 +497,8 @@ xfs_dir2_data_log_unused(
497 * Log the end (tag) of the unused entry. 497 * Log the end (tag) of the unused entry.
498 */ 498 */
499 xfs_da_log_buf(tp, bp, 499 xfs_da_log_buf(tp, bp,
500 (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d), 500 (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d),
501 (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d + 501 (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d +
502 sizeof(xfs_dir2_data_off_t) - 1)); 502 sizeof(xfs_dir2_data_off_t) - 1));
503} 503}
504 504
@@ -535,8 +535,8 @@ xfs_dir2_data_make_free(
535 xfs_dir2_block_tail_t *btp; /* block tail */ 535 xfs_dir2_block_tail_t *btp; /* block tail */
536 536
537 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 537 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
538 btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); 538 btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
539 endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); 539 endptr = (char *)xfs_dir2_block_leaf_p(btp);
540 } 540 }
541 /* 541 /*
542 * If this isn't the start of the block, then back up to 542 * If this isn't the start of the block, then back up to
@@ -587,7 +587,7 @@ xfs_dir2_data_make_free(
587 * Fix up the new big freespace. 587 * Fix up the new big freespace.
588 */ 588 */
589 be16_add(&prevdup->length, len + be16_to_cpu(postdup->length)); 589 be16_add(&prevdup->length, len + be16_to_cpu(postdup->length));
590 *XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) = 590 *xfs_dir2_data_unused_tag_p(prevdup) =
591 cpu_to_be16((char *)prevdup - (char *)d); 591 cpu_to_be16((char *)prevdup - (char *)d);
592 xfs_dir2_data_log_unused(tp, bp, prevdup); 592 xfs_dir2_data_log_unused(tp, bp, prevdup);
593 if (!needscan) { 593 if (!needscan) {
@@ -621,7 +621,7 @@ xfs_dir2_data_make_free(
621 else if (prevdup) { 621 else if (prevdup) {
622 dfp = xfs_dir2_data_freefind(d, prevdup); 622 dfp = xfs_dir2_data_freefind(d, prevdup);
623 be16_add(&prevdup->length, len); 623 be16_add(&prevdup->length, len);
624 *XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) = 624 *xfs_dir2_data_unused_tag_p(prevdup) =
625 cpu_to_be16((char *)prevdup - (char *)d); 625 cpu_to_be16((char *)prevdup - (char *)d);
626 xfs_dir2_data_log_unused(tp, bp, prevdup); 626 xfs_dir2_data_log_unused(tp, bp, prevdup);
627 /* 627 /*
@@ -649,7 +649,7 @@ xfs_dir2_data_make_free(
649 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); 649 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
650 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 650 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
651 newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); 651 newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
652 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = 652 *xfs_dir2_data_unused_tag_p(newdup) =
653 cpu_to_be16((char *)newdup - (char *)d); 653 cpu_to_be16((char *)newdup - (char *)d);
654 xfs_dir2_data_log_unused(tp, bp, newdup); 654 xfs_dir2_data_log_unused(tp, bp, newdup);
655 /* 655 /*
@@ -676,7 +676,7 @@ xfs_dir2_data_make_free(
676 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); 676 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
677 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 677 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
678 newdup->length = cpu_to_be16(len); 678 newdup->length = cpu_to_be16(len);
679 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = 679 *xfs_dir2_data_unused_tag_p(newdup) =
680 cpu_to_be16((char *)newdup - (char *)d); 680 cpu_to_be16((char *)newdup - (char *)d);
681 xfs_dir2_data_log_unused(tp, bp, newdup); 681 xfs_dir2_data_log_unused(tp, bp, newdup);
682 (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); 682 (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
@@ -712,7 +712,7 @@ xfs_dir2_data_use_free(
712 ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); 712 ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
713 ASSERT(offset >= (char *)dup - (char *)d); 713 ASSERT(offset >= (char *)dup - (char *)d);
714 ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d); 714 ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
715 ASSERT((char *)dup - (char *)d == be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup))); 715 ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
716 /* 716 /*
717 * Look up the entry in the bestfree table. 717 * Look up the entry in the bestfree table.
718 */ 718 */
@@ -745,7 +745,7 @@ xfs_dir2_data_use_free(
745 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len); 745 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
746 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 746 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
747 newdup->length = cpu_to_be16(oldlen - len); 747 newdup->length = cpu_to_be16(oldlen - len);
748 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = 748 *xfs_dir2_data_unused_tag_p(newdup) =
749 cpu_to_be16((char *)newdup - (char *)d); 749 cpu_to_be16((char *)newdup - (char *)d);
750 xfs_dir2_data_log_unused(tp, bp, newdup); 750 xfs_dir2_data_log_unused(tp, bp, newdup);
751 /* 751 /*
@@ -772,7 +772,7 @@ xfs_dir2_data_use_free(
772 else if (matchback) { 772 else if (matchback) {
773 newdup = dup; 773 newdup = dup;
774 newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); 774 newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
775 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = 775 *xfs_dir2_data_unused_tag_p(newdup) =
776 cpu_to_be16((char *)newdup - (char *)d); 776 cpu_to_be16((char *)newdup - (char *)d);
777 xfs_dir2_data_log_unused(tp, bp, newdup); 777 xfs_dir2_data_log_unused(tp, bp, newdup);
778 /* 778 /*
@@ -799,13 +799,13 @@ xfs_dir2_data_use_free(
799 else { 799 else {
800 newdup = dup; 800 newdup = dup;
801 newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); 801 newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
802 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = 802 *xfs_dir2_data_unused_tag_p(newdup) =
803 cpu_to_be16((char *)newdup - (char *)d); 803 cpu_to_be16((char *)newdup - (char *)d);
804 xfs_dir2_data_log_unused(tp, bp, newdup); 804 xfs_dir2_data_log_unused(tp, bp, newdup);
805 newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len); 805 newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
806 newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 806 newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
807 newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); 807 newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
808 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup2) = 808 *xfs_dir2_data_unused_tag_p(newdup2) =
809 cpu_to_be16((char *)newdup2 - (char *)d); 809 cpu_to_be16((char *)newdup2 - (char *)d);
810 xfs_dir2_data_log_unused(tp, bp, newdup2); 810 xfs_dir2_data_log_unused(tp, bp, newdup2);
811 /* 811 /*
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index c94c9099cfb1..b816e0252739 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -44,7 +44,7 @@ struct xfs_trans;
44#define XFS_DIR2_DATA_SPACE 0 44#define XFS_DIR2_DATA_SPACE 0
45#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) 45#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
46#define XFS_DIR2_DATA_FIRSTDB(mp) \ 46#define XFS_DIR2_DATA_FIRSTDB(mp) \
47 XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATA_OFFSET) 47 xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
48 48
49/* 49/*
50 * Offsets of . and .. in data space (always block 0) 50 * Offsets of . and .. in data space (always block 0)
@@ -52,9 +52,9 @@ struct xfs_trans;
52#define XFS_DIR2_DATA_DOT_OFFSET \ 52#define XFS_DIR2_DATA_DOT_OFFSET \
53 ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t)) 53 ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
54#define XFS_DIR2_DATA_DOTDOT_OFFSET \ 54#define XFS_DIR2_DATA_DOTDOT_OFFSET \
55 (XFS_DIR2_DATA_DOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(1)) 55 (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
56#define XFS_DIR2_DATA_FIRST_OFFSET \ 56#define XFS_DIR2_DATA_FIRST_OFFSET \
57 (XFS_DIR2_DATA_DOTDOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(2)) 57 (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
58 58
59/* 59/*
60 * Structures. 60 * Structures.
@@ -123,7 +123,6 @@ typedef struct xfs_dir2_data {
123/* 123/*
124 * Size of a data entry. 124 * Size of a data entry.
125 */ 125 */
126#define XFS_DIR2_DATA_ENTSIZE(n) xfs_dir2_data_entsize(n)
127static inline int xfs_dir2_data_entsize(int n) 126static inline int xfs_dir2_data_entsize(int n)
128{ 127{
129 return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \ 128 return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
@@ -133,19 +132,16 @@ static inline int xfs_dir2_data_entsize(int n)
133/* 132/*
134 * Pointer to an entry's tag word. 133 * Pointer to an entry's tag word.
135 */ 134 */
136#define XFS_DIR2_DATA_ENTRY_TAG_P(dep) xfs_dir2_data_entry_tag_p(dep)
137static inline __be16 * 135static inline __be16 *
138xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep) 136xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
139{ 137{
140 return (__be16 *)((char *)dep + 138 return (__be16 *)((char *)dep +
141 XFS_DIR2_DATA_ENTSIZE(dep->namelen) - sizeof(__be16)); 139 xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
142} 140}
143 141
144/* 142/*
145 * Pointer to a freespace's tag word. 143 * Pointer to a freespace's tag word.
146 */ 144 */
147#define XFS_DIR2_DATA_UNUSED_TAG_P(dup) \
148 xfs_dir2_data_unused_tag_p(dup)
149static inline __be16 * 145static inline __be16 *
150xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup) 146xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
151{ 147{
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index db14ea71459f..1b73c9ad646a 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -92,7 +92,7 @@ xfs_dir2_block_to_leaf(
92 if ((error = xfs_da_grow_inode(args, &blkno))) { 92 if ((error = xfs_da_grow_inode(args, &blkno))) {
93 return error; 93 return error;
94 } 94 }
95 ldb = XFS_DIR2_DA_TO_DB(mp, blkno); 95 ldb = xfs_dir2_da_to_db(mp, blkno);
96 ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp)); 96 ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
97 /* 97 /*
98 * Initialize the leaf block, get a buffer for it. 98 * Initialize the leaf block, get a buffer for it.
@@ -104,8 +104,8 @@ xfs_dir2_block_to_leaf(
104 leaf = lbp->data; 104 leaf = lbp->data;
105 block = dbp->data; 105 block = dbp->data;
106 xfs_dir2_data_check(dp, dbp); 106 xfs_dir2_data_check(dp, dbp);
107 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 107 btp = xfs_dir2_block_tail_p(mp, block);
108 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 108 blp = xfs_dir2_block_leaf_p(btp);
109 /* 109 /*
110 * Set the counts in the leaf header. 110 * Set the counts in the leaf header.
111 */ 111 */
@@ -137,9 +137,9 @@ xfs_dir2_block_to_leaf(
137 /* 137 /*
138 * Set up leaf tail and bests table. 138 * Set up leaf tail and bests table.
139 */ 139 */
140 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 140 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
141 ltp->bestcount = cpu_to_be32(1); 141 ltp->bestcount = cpu_to_be32(1);
142 bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); 142 bestsp = xfs_dir2_leaf_bests_p(ltp);
143 bestsp[0] = block->hdr.bestfree[0].length; 143 bestsp[0] = block->hdr.bestfree[0].length;
144 /* 144 /*
145 * Log the data header and leaf bests table. 145 * Log the data header and leaf bests table.
@@ -209,9 +209,9 @@ xfs_dir2_leaf_addname(
209 */ 209 */
210 index = xfs_dir2_leaf_search_hash(args, lbp); 210 index = xfs_dir2_leaf_search_hash(args, lbp);
211 leaf = lbp->data; 211 leaf = lbp->data;
212 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 212 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
213 bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); 213 bestsp = xfs_dir2_leaf_bests_p(ltp);
214 length = XFS_DIR2_DATA_ENTSIZE(args->namelen); 214 length = xfs_dir2_data_entsize(args->namelen);
215 /* 215 /*
216 * See if there are any entries with the same hash value 216 * See if there are any entries with the same hash value
217 * and space in their block for the new entry. 217 * and space in their block for the new entry.
@@ -223,7 +223,7 @@ xfs_dir2_leaf_addname(
223 index++, lep++) { 223 index++, lep++) {
224 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) 224 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
225 continue; 225 continue;
226 i = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); 226 i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
227 ASSERT(i < be32_to_cpu(ltp->bestcount)); 227 ASSERT(i < be32_to_cpu(ltp->bestcount));
228 ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF); 228 ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
229 if (be16_to_cpu(bestsp[i]) >= length) { 229 if (be16_to_cpu(bestsp[i]) >= length) {
@@ -378,7 +378,7 @@ xfs_dir2_leaf_addname(
378 */ 378 */
379 else { 379 else {
380 if ((error = 380 if ((error =
381 xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, use_block), 381 xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
382 -1, &dbp, XFS_DATA_FORK))) { 382 -1, &dbp, XFS_DATA_FORK))) {
383 xfs_da_brelse(tp, lbp); 383 xfs_da_brelse(tp, lbp);
384 return error; 384 return error;
@@ -407,7 +407,7 @@ xfs_dir2_leaf_addname(
407 dep->inumber = cpu_to_be64(args->inumber); 407 dep->inumber = cpu_to_be64(args->inumber);
408 dep->namelen = args->namelen; 408 dep->namelen = args->namelen;
409 memcpy(dep->name, args->name, dep->namelen); 409 memcpy(dep->name, args->name, dep->namelen);
410 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 410 tagp = xfs_dir2_data_entry_tag_p(dep);
411 *tagp = cpu_to_be16((char *)dep - (char *)data); 411 *tagp = cpu_to_be16((char *)dep - (char *)data);
412 /* 412 /*
413 * Need to scan fix up the bestfree table. 413 * Need to scan fix up the bestfree table.
@@ -529,7 +529,7 @@ xfs_dir2_leaf_addname(
529 * Fill in the new leaf entry. 529 * Fill in the new leaf entry.
530 */ 530 */
531 lep->hashval = cpu_to_be32(args->hashval); 531 lep->hashval = cpu_to_be32(args->hashval);
532 lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block, 532 lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
533 be16_to_cpu(*tagp))); 533 be16_to_cpu(*tagp)));
534 /* 534 /*
535 * Log the leaf fields and give up the buffers. 535 * Log the leaf fields and give up the buffers.
@@ -567,13 +567,13 @@ xfs_dir2_leaf_check(
567 * Should factor in the size of the bests table as well. 567 * Should factor in the size of the bests table as well.
568 * We can deduce a value for that from di_size. 568 * We can deduce a value for that from di_size.
569 */ 569 */
570 ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp)); 570 ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
571 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 571 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
572 /* 572 /*
573 * Leaves and bests don't overlap. 573 * Leaves and bests don't overlap.
574 */ 574 */
575 ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <= 575 ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
576 (char *)XFS_DIR2_LEAF_BESTS_P(ltp)); 576 (char *)xfs_dir2_leaf_bests_p(ltp));
577 /* 577 /*
578 * Check hash value order, count stale entries. 578 * Check hash value order, count stale entries.
579 */ 579 */
@@ -815,12 +815,12 @@ xfs_dir2_leaf_getdents(
815 * Inside the loop we keep the main offset value as a byte offset 815 * Inside the loop we keep the main offset value as a byte offset
816 * in the directory file. 816 * in the directory file.
817 */ 817 */
818 curoff = XFS_DIR2_DATAPTR_TO_BYTE(mp, uio->uio_offset); 818 curoff = xfs_dir2_dataptr_to_byte(mp, uio->uio_offset);
819 /* 819 /*
820 * Force this conversion through db so we truncate the offset 820 * Force this conversion through db so we truncate the offset
821 * down to get the start of the data block. 821 * down to get the start of the data block.
822 */ 822 */
823 map_off = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, curoff)); 823 map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff));
824 /* 824 /*
825 * Loop over directory entries until we reach the end offset. 825 * Loop over directory entries until we reach the end offset.
826 * Get more blocks and readahead as necessary. 826 * Get more blocks and readahead as necessary.
@@ -870,7 +870,7 @@ xfs_dir2_leaf_getdents(
870 */ 870 */
871 if (1 + ra_want > map_blocks && 871 if (1 + ra_want > map_blocks &&
872 map_off < 872 map_off <
873 XFS_DIR2_BYTE_TO_DA(mp, XFS_DIR2_LEAF_OFFSET)) { 873 xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
874 /* 874 /*
875 * Get more bmaps, fill in after the ones 875 * Get more bmaps, fill in after the ones
876 * we already have in the table. 876 * we already have in the table.
@@ -878,7 +878,7 @@ xfs_dir2_leaf_getdents(
878 nmap = map_size - map_valid; 878 nmap = map_size - map_valid;
879 error = xfs_bmapi(tp, dp, 879 error = xfs_bmapi(tp, dp,
880 map_off, 880 map_off,
881 XFS_DIR2_BYTE_TO_DA(mp, 881 xfs_dir2_byte_to_da(mp,
882 XFS_DIR2_LEAF_OFFSET) - map_off, 882 XFS_DIR2_LEAF_OFFSET) - map_off,
883 XFS_BMAPI_METADATA, NULL, 0, 883 XFS_BMAPI_METADATA, NULL, 0,
884 &map[map_valid], &nmap, NULL, NULL); 884 &map[map_valid], &nmap, NULL, NULL);
@@ -903,7 +903,7 @@ xfs_dir2_leaf_getdents(
903 map[map_valid + nmap - 1].br_blockcount; 903 map[map_valid + nmap - 1].br_blockcount;
904 else 904 else
905 map_off = 905 map_off =
906 XFS_DIR2_BYTE_TO_DA(mp, 906 xfs_dir2_byte_to_da(mp,
907 XFS_DIR2_LEAF_OFFSET); 907 XFS_DIR2_LEAF_OFFSET);
908 /* 908 /*
909 * Look for holes in the mapping, and 909 * Look for holes in the mapping, and
@@ -931,14 +931,14 @@ xfs_dir2_leaf_getdents(
931 * No valid mappings, so no more data blocks. 931 * No valid mappings, so no more data blocks.
932 */ 932 */
933 if (!map_valid) { 933 if (!map_valid) {
934 curoff = XFS_DIR2_DA_TO_BYTE(mp, map_off); 934 curoff = xfs_dir2_da_to_byte(mp, map_off);
935 break; 935 break;
936 } 936 }
937 /* 937 /*
938 * Read the directory block starting at the first 938 * Read the directory block starting at the first
939 * mapping. 939 * mapping.
940 */ 940 */
941 curdb = XFS_DIR2_DA_TO_DB(mp, map->br_startoff); 941 curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
942 error = xfs_da_read_buf(tp, dp, map->br_startoff, 942 error = xfs_da_read_buf(tp, dp, map->br_startoff,
943 map->br_blockcount >= mp->m_dirblkfsbs ? 943 map->br_blockcount >= mp->m_dirblkfsbs ?
944 XFS_FSB_TO_DADDR(mp, map->br_startblock) : 944 XFS_FSB_TO_DADDR(mp, map->br_startblock) :
@@ -1014,7 +1014,7 @@ xfs_dir2_leaf_getdents(
1014 /* 1014 /*
1015 * Having done a read, we need to set a new offset. 1015 * Having done a read, we need to set a new offset.
1016 */ 1016 */
1017 newoff = XFS_DIR2_DB_OFF_TO_BYTE(mp, curdb, 0); 1017 newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0);
1018 /* 1018 /*
1019 * Start of the current block. 1019 * Start of the current block.
1020 */ 1020 */
@@ -1024,7 +1024,7 @@ xfs_dir2_leaf_getdents(
1024 * Make sure we're in the right block. 1024 * Make sure we're in the right block.
1025 */ 1025 */
1026 else if (curoff > newoff) 1026 else if (curoff > newoff)
1027 ASSERT(XFS_DIR2_BYTE_TO_DB(mp, curoff) == 1027 ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1028 curdb); 1028 curdb);
1029 data = bp->data; 1029 data = bp->data;
1030 xfs_dir2_data_check(dp, bp); 1030 xfs_dir2_data_check(dp, bp);
@@ -1032,7 +1032,7 @@ xfs_dir2_leaf_getdents(
1032 * Find our position in the block. 1032 * Find our position in the block.
1033 */ 1033 */
1034 ptr = (char *)&data->u; 1034 ptr = (char *)&data->u;
1035 byteoff = XFS_DIR2_BYTE_TO_OFF(mp, curoff); 1035 byteoff = xfs_dir2_byte_to_off(mp, curoff);
1036 /* 1036 /*
1037 * Skip past the header. 1037 * Skip past the header.
1038 */ 1038 */
@@ -1054,15 +1054,15 @@ xfs_dir2_leaf_getdents(
1054 } 1054 }
1055 dep = (xfs_dir2_data_entry_t *)ptr; 1055 dep = (xfs_dir2_data_entry_t *)ptr;
1056 length = 1056 length =
1057 XFS_DIR2_DATA_ENTSIZE(dep->namelen); 1057 xfs_dir2_data_entsize(dep->namelen);
1058 ptr += length; 1058 ptr += length;
1059 } 1059 }
1060 /* 1060 /*
1061 * Now set our real offset. 1061 * Now set our real offset.
1062 */ 1062 */
1063 curoff = 1063 curoff =
1064 XFS_DIR2_DB_OFF_TO_BYTE(mp, 1064 xfs_dir2_db_off_to_byte(mp,
1065 XFS_DIR2_BYTE_TO_DB(mp, curoff), 1065 xfs_dir2_byte_to_db(mp, curoff),
1066 (char *)ptr - (char *)data); 1066 (char *)ptr - (char *)data);
1067 if (ptr >= (char *)data + mp->m_dirblksize) { 1067 if (ptr >= (char *)data + mp->m_dirblksize) {
1068 continue; 1068 continue;
@@ -1091,9 +1091,9 @@ xfs_dir2_leaf_getdents(
1091 1091
1092 p->namelen = dep->namelen; 1092 p->namelen = dep->namelen;
1093 1093
1094 length = XFS_DIR2_DATA_ENTSIZE(p->namelen); 1094 length = xfs_dir2_data_entsize(p->namelen);
1095 1095
1096 p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length); 1096 p->cook = xfs_dir2_byte_to_dataptr(mp, curoff + length);
1097 1097
1098 p->ino = be64_to_cpu(dep->inumber); 1098 p->ino = be64_to_cpu(dep->inumber);
1099#if XFS_BIG_INUMS 1099#if XFS_BIG_INUMS
@@ -1121,10 +1121,10 @@ xfs_dir2_leaf_getdents(
1121 * All done. Set output offset value to current offset. 1121 * All done. Set output offset value to current offset.
1122 */ 1122 */
1123 *eofp = eof; 1123 *eofp = eof;
1124 if (curoff > XFS_DIR2_DATAPTR_TO_BYTE(mp, XFS_DIR2_MAX_DATAPTR)) 1124 if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
1125 uio->uio_offset = XFS_DIR2_MAX_DATAPTR; 1125 uio->uio_offset = XFS_DIR2_MAX_DATAPTR;
1126 else 1126 else
1127 uio->uio_offset = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff); 1127 uio->uio_offset = xfs_dir2_byte_to_dataptr(mp, curoff);
1128 kmem_free(map, map_size * sizeof(*map)); 1128 kmem_free(map, map_size * sizeof(*map));
1129 kmem_free(p, sizeof(*p)); 1129 kmem_free(p, sizeof(*p));
1130 if (bp) 1130 if (bp)
@@ -1159,7 +1159,7 @@ xfs_dir2_leaf_init(
1159 /* 1159 /*
1160 * Get the buffer for the block. 1160 * Get the buffer for the block.
1161 */ 1161 */
1162 error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, bno), -1, &bp, 1162 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
1163 XFS_DATA_FORK); 1163 XFS_DATA_FORK);
1164 if (error) { 1164 if (error) {
1165 return error; 1165 return error;
@@ -1181,7 +1181,7 @@ xfs_dir2_leaf_init(
1181 * the block. 1181 * the block.
1182 */ 1182 */
1183 if (magic == XFS_DIR2_LEAF1_MAGIC) { 1183 if (magic == XFS_DIR2_LEAF1_MAGIC) {
1184 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 1184 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1185 ltp->bestcount = 0; 1185 ltp->bestcount = 0;
1186 xfs_dir2_leaf_log_tail(tp, bp); 1186 xfs_dir2_leaf_log_tail(tp, bp);
1187 } 1187 }
@@ -1206,9 +1206,9 @@ xfs_dir2_leaf_log_bests(
1206 1206
1207 leaf = bp->data; 1207 leaf = bp->data;
1208 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); 1208 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
1209 ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf); 1209 ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
1210 firstb = XFS_DIR2_LEAF_BESTS_P(ltp) + first; 1210 firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1211 lastb = XFS_DIR2_LEAF_BESTS_P(ltp) + last; 1211 lastb = xfs_dir2_leaf_bests_p(ltp) + last;
1212 xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), 1212 xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
1213 (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); 1213 (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
1214} 1214}
@@ -1268,7 +1268,7 @@ xfs_dir2_leaf_log_tail(
1268 mp = tp->t_mountp; 1268 mp = tp->t_mountp;
1269 leaf = bp->data; 1269 leaf = bp->data;
1270 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); 1270 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
1271 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 1271 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1272 xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), 1272 xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1273 (uint)(mp->m_dirblksize - 1)); 1273 (uint)(mp->m_dirblksize - 1));
1274} 1274}
@@ -1312,7 +1312,7 @@ xfs_dir2_leaf_lookup(
1312 */ 1312 */
1313 dep = (xfs_dir2_data_entry_t *) 1313 dep = (xfs_dir2_data_entry_t *)
1314 ((char *)dbp->data + 1314 ((char *)dbp->data +
1315 XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address))); 1315 xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1316 /* 1316 /*
1317 * Return the found inode number. 1317 * Return the found inode number.
1318 */ 1318 */
@@ -1381,7 +1381,7 @@ xfs_dir2_leaf_lookup_int(
1381 /* 1381 /*
1382 * Get the new data block number. 1382 * Get the new data block number.
1383 */ 1383 */
1384 newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); 1384 newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1385 /* 1385 /*
1386 * If it's not the same as the old data block number, 1386 * If it's not the same as the old data block number,
1387 * need to pitch the old one and read the new one. 1387 * need to pitch the old one and read the new one.
@@ -1391,7 +1391,7 @@ xfs_dir2_leaf_lookup_int(
1391 xfs_da_brelse(tp, dbp); 1391 xfs_da_brelse(tp, dbp);
1392 if ((error = 1392 if ((error =
1393 xfs_da_read_buf(tp, dp, 1393 xfs_da_read_buf(tp, dp,
1394 XFS_DIR2_DB_TO_DA(mp, newdb), -1, &dbp, 1394 xfs_dir2_db_to_da(mp, newdb), -1, &dbp,
1395 XFS_DATA_FORK))) { 1395 XFS_DATA_FORK))) {
1396 xfs_da_brelse(tp, lbp); 1396 xfs_da_brelse(tp, lbp);
1397 return error; 1397 return error;
@@ -1404,7 +1404,7 @@ xfs_dir2_leaf_lookup_int(
1404 */ 1404 */
1405 dep = (xfs_dir2_data_entry_t *) 1405 dep = (xfs_dir2_data_entry_t *)
1406 ((char *)dbp->data + 1406 ((char *)dbp->data +
1407 XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address))); 1407 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1408 /* 1408 /*
1409 * If it matches then return it. 1409 * If it matches then return it.
1410 */ 1410 */
@@ -1469,20 +1469,20 @@ xfs_dir2_leaf_removename(
1469 * Point to the leaf entry, use that to point to the data entry. 1469 * Point to the leaf entry, use that to point to the data entry.
1470 */ 1470 */
1471 lep = &leaf->ents[index]; 1471 lep = &leaf->ents[index];
1472 db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); 1472 db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1473 dep = (xfs_dir2_data_entry_t *) 1473 dep = (xfs_dir2_data_entry_t *)
1474 ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address))); 1474 ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1475 needscan = needlog = 0; 1475 needscan = needlog = 0;
1476 oldbest = be16_to_cpu(data->hdr.bestfree[0].length); 1476 oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
1477 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 1477 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1478 bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); 1478 bestsp = xfs_dir2_leaf_bests_p(ltp);
1479 ASSERT(be16_to_cpu(bestsp[db]) == oldbest); 1479 ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
1480 /* 1480 /*
1481 * Mark the former data entry unused. 1481 * Mark the former data entry unused.
1482 */ 1482 */
1483 xfs_dir2_data_make_free(tp, dbp, 1483 xfs_dir2_data_make_free(tp, dbp,
1484 (xfs_dir2_data_aoff_t)((char *)dep - (char *)data), 1484 (xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
1485 XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); 1485 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
1486 /* 1486 /*
1487 * We just mark the leaf entry stale by putting a null in it. 1487 * We just mark the leaf entry stale by putting a null in it.
1488 */ 1488 */
@@ -1602,7 +1602,7 @@ xfs_dir2_leaf_replace(
1602 */ 1602 */
1603 dep = (xfs_dir2_data_entry_t *) 1603 dep = (xfs_dir2_data_entry_t *)
1604 ((char *)dbp->data + 1604 ((char *)dbp->data +
1605 XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address))); 1605 xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1606 ASSERT(args->inumber != be64_to_cpu(dep->inumber)); 1606 ASSERT(args->inumber != be64_to_cpu(dep->inumber));
1607 /* 1607 /*
1608 * Put the new inode number in, log it. 1608 * Put the new inode number in, log it.
@@ -1698,7 +1698,7 @@ xfs_dir2_leaf_trim_data(
1698 /* 1698 /*
1699 * Read the offending data block. We need its buffer. 1699 * Read the offending data block. We need its buffer.
1700 */ 1700 */
1701 if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, db), -1, &dbp, 1701 if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp,
1702 XFS_DATA_FORK))) { 1702 XFS_DATA_FORK))) {
1703 return error; 1703 return error;
1704 } 1704 }
@@ -1712,7 +1712,7 @@ xfs_dir2_leaf_trim_data(
1712 */ 1712 */
1713 1713
1714 leaf = lbp->data; 1714 leaf = lbp->data;
1715 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 1715 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1716 ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) == 1716 ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
1717 mp->m_dirblksize - (uint)sizeof(data->hdr)); 1717 mp->m_dirblksize - (uint)sizeof(data->hdr));
1718 ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); 1718 ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
@@ -1727,7 +1727,7 @@ xfs_dir2_leaf_trim_data(
1727 /* 1727 /*
1728 * Eliminate the last bests entry from the table. 1728 * Eliminate the last bests entry from the table.
1729 */ 1729 */
1730 bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); 1730 bestsp = xfs_dir2_leaf_bests_p(ltp);
1731 be32_add(&ltp->bestcount, -1); 1731 be32_add(&ltp->bestcount, -1);
1732 memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); 1732 memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
1733 xfs_dir2_leaf_log_tail(tp, lbp); 1733 xfs_dir2_leaf_log_tail(tp, lbp);
@@ -1838,12 +1838,12 @@ xfs_dir2_node_to_leaf(
1838 /* 1838 /*
1839 * Set up the leaf tail from the freespace block. 1839 * Set up the leaf tail from the freespace block.
1840 */ 1840 */
1841 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 1841 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1842 ltp->bestcount = free->hdr.nvalid; 1842 ltp->bestcount = free->hdr.nvalid;
1843 /* 1843 /*
1844 * Set up the leaf bests table. 1844 * Set up the leaf bests table.
1845 */ 1845 */
1846 memcpy(XFS_DIR2_LEAF_BESTS_P(ltp), free->bests, 1846 memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
1847 be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0])); 1847 be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
1848 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 1848 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1849 xfs_dir2_leaf_log_tail(tp, lbp); 1849 xfs_dir2_leaf_log_tail(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index f57ca1162412..70c97f3f815e 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -32,7 +32,7 @@ struct xfs_trans;
32#define XFS_DIR2_LEAF_SPACE 1 32#define XFS_DIR2_LEAF_SPACE 1
33#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) 33#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
34#define XFS_DIR2_LEAF_FIRSTDB(mp) \ 34#define XFS_DIR2_LEAF_FIRSTDB(mp) \
35 XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_LEAF_OFFSET) 35 xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
36 36
37/* 37/*
38 * Offset in data space of a data entry. 38 * Offset in data space of a data entry.
@@ -82,7 +82,6 @@ typedef struct xfs_dir2_leaf {
82 * DB blocks here are logical directory block numbers, not filesystem blocks. 82 * DB blocks here are logical directory block numbers, not filesystem blocks.
83 */ 83 */
84 84
85#define XFS_DIR2_MAX_LEAF_ENTS(mp) xfs_dir2_max_leaf_ents(mp)
86static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) 85static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
87{ 86{
88 return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / 87 return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
@@ -92,7 +91,6 @@ static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
92/* 91/*
93 * Get address of the bestcount field in the single-leaf block. 92 * Get address of the bestcount field in the single-leaf block.
94 */ 93 */
95#define XFS_DIR2_LEAF_TAIL_P(mp,lp) xfs_dir2_leaf_tail_p(mp, lp)
96static inline xfs_dir2_leaf_tail_t * 94static inline xfs_dir2_leaf_tail_t *
97xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp) 95xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
98{ 96{
@@ -104,7 +102,6 @@ xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
104/* 102/*
105 * Get address of the bests array in the single-leaf block. 103 * Get address of the bests array in the single-leaf block.
106 */ 104 */
107#define XFS_DIR2_LEAF_BESTS_P(ltp) xfs_dir2_leaf_bests_p(ltp)
108static inline __be16 * 105static inline __be16 *
109xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp) 106xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
110{ 107{
@@ -114,7 +111,6 @@ xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
114/* 111/*
115 * Convert dataptr to byte in file space 112 * Convert dataptr to byte in file space
116 */ 113 */
117#define XFS_DIR2_DATAPTR_TO_BYTE(mp,dp) xfs_dir2_dataptr_to_byte(mp, dp)
118static inline xfs_dir2_off_t 114static inline xfs_dir2_off_t
119xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) 115xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
120{ 116{
@@ -124,7 +120,6 @@ xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
124/* 120/*
125 * Convert byte in file space to dataptr. It had better be aligned. 121 * Convert byte in file space to dataptr. It had better be aligned.
126 */ 122 */
127#define XFS_DIR2_BYTE_TO_DATAPTR(mp,by) xfs_dir2_byte_to_dataptr(mp,by)
128static inline xfs_dir2_dataptr_t 123static inline xfs_dir2_dataptr_t
129xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) 124xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
130{ 125{
@@ -134,7 +129,6 @@ xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
134/* 129/*
135 * Convert byte in space to (DB) block 130 * Convert byte in space to (DB) block
136 */ 131 */
137#define XFS_DIR2_BYTE_TO_DB(mp,by) xfs_dir2_byte_to_db(mp, by)
138static inline xfs_dir2_db_t 132static inline xfs_dir2_db_t
139xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) 133xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
140{ 134{
@@ -145,17 +139,15 @@ xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
145/* 139/*
146 * Convert dataptr to a block number 140 * Convert dataptr to a block number
147 */ 141 */
148#define XFS_DIR2_DATAPTR_TO_DB(mp,dp) xfs_dir2_dataptr_to_db(mp, dp)
149static inline xfs_dir2_db_t 142static inline xfs_dir2_db_t
150xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) 143xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
151{ 144{
152 return XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp)); 145 return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
153} 146}
154 147
155/* 148/*
156 * Convert byte in space to offset in a block 149 * Convert byte in space to offset in a block
157 */ 150 */
158#define XFS_DIR2_BYTE_TO_OFF(mp,by) xfs_dir2_byte_to_off(mp, by)
159static inline xfs_dir2_data_aoff_t 151static inline xfs_dir2_data_aoff_t
160xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) 152xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
161{ 153{
@@ -166,18 +158,15 @@ xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
166/* 158/*
167 * Convert dataptr to a byte offset in a block 159 * Convert dataptr to a byte offset in a block
168 */ 160 */
169#define XFS_DIR2_DATAPTR_TO_OFF(mp,dp) xfs_dir2_dataptr_to_off(mp, dp)
170static inline xfs_dir2_data_aoff_t 161static inline xfs_dir2_data_aoff_t
171xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) 162xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
172{ 163{
173 return XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp)); 164 return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
174} 165}
175 166
176/* 167/*
177 * Convert block and offset to byte in space 168 * Convert block and offset to byte in space
178 */ 169 */
179#define XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o) \
180 xfs_dir2_db_off_to_byte(mp, db, o)
181static inline xfs_dir2_off_t 170static inline xfs_dir2_off_t
182xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, 171xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
183 xfs_dir2_data_aoff_t o) 172 xfs_dir2_data_aoff_t o)
@@ -189,7 +178,6 @@ xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
189/* 178/*
190 * Convert block (DB) to block (dablk) 179 * Convert block (DB) to block (dablk)
191 */ 180 */
192#define XFS_DIR2_DB_TO_DA(mp,db) xfs_dir2_db_to_da(mp, db)
193static inline xfs_dablk_t 181static inline xfs_dablk_t
194xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) 182xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
195{ 183{
@@ -199,29 +187,25 @@ xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
199/* 187/*
200 * Convert byte in space to (DA) block 188 * Convert byte in space to (DA) block
201 */ 189 */
202#define XFS_DIR2_BYTE_TO_DA(mp,by) xfs_dir2_byte_to_da(mp, by)
203static inline xfs_dablk_t 190static inline xfs_dablk_t
204xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) 191xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
205{ 192{
206 return XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by)); 193 return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
207} 194}
208 195
209/* 196/*
210 * Convert block and offset to dataptr 197 * Convert block and offset to dataptr
211 */ 198 */
212#define XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o) \
213 xfs_dir2_db_off_to_dataptr(mp, db, o)
214static inline xfs_dir2_dataptr_t 199static inline xfs_dir2_dataptr_t
215xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, 200xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
216 xfs_dir2_data_aoff_t o) 201 xfs_dir2_data_aoff_t o)
217{ 202{
218 return XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o)); 203 return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
219} 204}
220 205
221/* 206/*
222 * Convert block (dablk) to block (DB) 207 * Convert block (dablk) to block (DB)
223 */ 208 */
224#define XFS_DIR2_DA_TO_DB(mp,da) xfs_dir2_da_to_db(mp, da)
225static inline xfs_dir2_db_t 209static inline xfs_dir2_db_t
226xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) 210xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
227{ 211{
@@ -231,11 +215,10 @@ xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
231/* 215/*
232 * Convert block (dablk) to byte offset in space 216 * Convert block (dablk) to byte offset in space
233 */ 217 */
234#define XFS_DIR2_DA_TO_BYTE(mp,da) xfs_dir2_da_to_byte(mp, da)
235static inline xfs_dir2_off_t 218static inline xfs_dir2_off_t
236xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) 219xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
237{ 220{
238 return XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0); 221 return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
239} 222}
240 223
241/* 224/*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index d083c3819934..91c61d9632c8 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -136,14 +136,14 @@ xfs_dir2_leaf_to_node(
136 /* 136 /*
137 * Get the buffer for the new freespace block. 137 * Get the buffer for the new freespace block.
138 */ 138 */
139 if ((error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp, 139 if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp,
140 XFS_DATA_FORK))) { 140 XFS_DATA_FORK))) {
141 return error; 141 return error;
142 } 142 }
143 ASSERT(fbp != NULL); 143 ASSERT(fbp != NULL);
144 free = fbp->data; 144 free = fbp->data;
145 leaf = lbp->data; 145 leaf = lbp->data;
146 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 146 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
147 /* 147 /*
148 * Initialize the freespace block header. 148 * Initialize the freespace block header.
149 */ 149 */
@@ -155,7 +155,7 @@ xfs_dir2_leaf_to_node(
155 * Copy freespace entries from the leaf block to the new block. 155 * Copy freespace entries from the leaf block to the new block.
156 * Count active entries. 156 * Count active entries.
157 */ 157 */
158 for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P(ltp), to = free->bests; 158 for (i = n = 0, from = xfs_dir2_leaf_bests_p(ltp), to = free->bests;
159 i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { 159 i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
160 if ((off = be16_to_cpu(*from)) != NULLDATAOFF) 160 if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
161 n++; 161 n++;
@@ -215,7 +215,7 @@ xfs_dir2_leafn_add(
215 * a compact. 215 * a compact.
216 */ 216 */
217 217
218 if (be16_to_cpu(leaf->hdr.count) == XFS_DIR2_MAX_LEAF_ENTS(mp)) { 218 if (be16_to_cpu(leaf->hdr.count) == xfs_dir2_max_leaf_ents(mp)) {
219 if (!leaf->hdr.stale) 219 if (!leaf->hdr.stale)
220 return XFS_ERROR(ENOSPC); 220 return XFS_ERROR(ENOSPC);
221 compact = be16_to_cpu(leaf->hdr.stale) > 1; 221 compact = be16_to_cpu(leaf->hdr.stale) > 1;
@@ -327,7 +327,7 @@ xfs_dir2_leafn_add(
327 * Insert the new entry, log everything. 327 * Insert the new entry, log everything.
328 */ 328 */
329 lep->hashval = cpu_to_be32(args->hashval); 329 lep->hashval = cpu_to_be32(args->hashval);
330 lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, 330 lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
331 args->blkno, args->index)); 331 args->blkno, args->index));
332 xfs_dir2_leaf_log_header(tp, bp); 332 xfs_dir2_leaf_log_header(tp, bp);
333 xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh); 333 xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
@@ -352,7 +352,7 @@ xfs_dir2_leafn_check(
352 leaf = bp->data; 352 leaf = bp->data;
353 mp = dp->i_mount; 353 mp = dp->i_mount;
354 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 354 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
355 ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp)); 355 ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
356 for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) { 356 for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
357 if (i + 1 < be16_to_cpu(leaf->hdr.count)) { 357 if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
358 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= 358 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
@@ -440,7 +440,7 @@ xfs_dir2_leafn_lookup_int(
440 if (args->addname) { 440 if (args->addname) {
441 curfdb = curbp ? state->extrablk.blkno : -1; 441 curfdb = curbp ? state->extrablk.blkno : -1;
442 curdb = -1; 442 curdb = -1;
443 length = XFS_DIR2_DATA_ENTSIZE(args->namelen); 443 length = xfs_dir2_data_entsize(args->namelen);
444 if ((free = (curbp ? curbp->data : NULL))) 444 if ((free = (curbp ? curbp->data : NULL)))
445 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 445 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
446 } 446 }
@@ -465,7 +465,7 @@ xfs_dir2_leafn_lookup_int(
465 /* 465 /*
466 * Pull the data block number from the entry. 466 * Pull the data block number from the entry.
467 */ 467 */
468 newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); 468 newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
469 /* 469 /*
470 * For addname, we're looking for a place to put the new entry. 470 * For addname, we're looking for a place to put the new entry.
471 * We want to use a data block with an entry of equal 471 * We want to use a data block with an entry of equal
@@ -482,7 +482,7 @@ xfs_dir2_leafn_lookup_int(
482 * Convert the data block to the free block 482 * Convert the data block to the free block
483 * holding its freespace information. 483 * holding its freespace information.
484 */ 484 */
485 newfdb = XFS_DIR2_DB_TO_FDB(mp, newdb); 485 newfdb = xfs_dir2_db_to_fdb(mp, newdb);
486 /* 486 /*
487 * If it's not the one we have in hand, 487 * If it's not the one we have in hand,
488 * read it in. 488 * read it in.
@@ -497,7 +497,7 @@ xfs_dir2_leafn_lookup_int(
497 * Read the free block. 497 * Read the free block.
498 */ 498 */
499 if ((error = xfs_da_read_buf(tp, dp, 499 if ((error = xfs_da_read_buf(tp, dp,
500 XFS_DIR2_DB_TO_DA(mp, 500 xfs_dir2_db_to_da(mp,
501 newfdb), 501 newfdb),
502 -1, &curbp, 502 -1, &curbp,
503 XFS_DATA_FORK))) { 503 XFS_DATA_FORK))) {
@@ -517,7 +517,7 @@ xfs_dir2_leafn_lookup_int(
517 /* 517 /*
518 * Get the index for our entry. 518 * Get the index for our entry.
519 */ 519 */
520 fi = XFS_DIR2_DB_TO_FDINDEX(mp, curdb); 520 fi = xfs_dir2_db_to_fdindex(mp, curdb);
521 /* 521 /*
522 * If it has room, return it. 522 * If it has room, return it.
523 */ 523 */
@@ -561,7 +561,7 @@ xfs_dir2_leafn_lookup_int(
561 */ 561 */
562 if ((error = 562 if ((error =
563 xfs_da_read_buf(tp, dp, 563 xfs_da_read_buf(tp, dp,
564 XFS_DIR2_DB_TO_DA(mp, newdb), -1, 564 xfs_dir2_db_to_da(mp, newdb), -1,
565 &curbp, XFS_DATA_FORK))) { 565 &curbp, XFS_DATA_FORK))) {
566 return error; 566 return error;
567 } 567 }
@@ -573,7 +573,7 @@ xfs_dir2_leafn_lookup_int(
573 */ 573 */
574 dep = (xfs_dir2_data_entry_t *) 574 dep = (xfs_dir2_data_entry_t *)
575 ((char *)curbp->data + 575 ((char *)curbp->data +
576 XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address))); 576 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
577 /* 577 /*
578 * Compare the entry, return it if it matches. 578 * Compare the entry, return it if it matches.
579 */ 579 */
@@ -876,9 +876,9 @@ xfs_dir2_leafn_remove(
876 /* 876 /*
877 * Extract the data block and offset from the entry. 877 * Extract the data block and offset from the entry.
878 */ 878 */
879 db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); 879 db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
880 ASSERT(dblk->blkno == db); 880 ASSERT(dblk->blkno == db);
881 off = XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)); 881 off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address));
882 ASSERT(dblk->index == off); 882 ASSERT(dblk->index == off);
883 /* 883 /*
884 * Kill the leaf entry by marking it stale. 884 * Kill the leaf entry by marking it stale.
@@ -898,7 +898,7 @@ xfs_dir2_leafn_remove(
898 longest = be16_to_cpu(data->hdr.bestfree[0].length); 898 longest = be16_to_cpu(data->hdr.bestfree[0].length);
899 needlog = needscan = 0; 899 needlog = needscan = 0;
900 xfs_dir2_data_make_free(tp, dbp, off, 900 xfs_dir2_data_make_free(tp, dbp, off,
901 XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); 901 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
902 /* 902 /*
903 * Rescan the data block freespaces for bestfree. 903 * Rescan the data block freespaces for bestfree.
904 * Log the data block header if needed. 904 * Log the data block header if needed.
@@ -924,8 +924,8 @@ xfs_dir2_leafn_remove(
924 * Convert the data block number to a free block, 924 * Convert the data block number to a free block,
925 * read in the free block. 925 * read in the free block.
926 */ 926 */
927 fdb = XFS_DIR2_DB_TO_FDB(mp, db); 927 fdb = xfs_dir2_db_to_fdb(mp, db);
928 if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), 928 if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb),
929 -1, &fbp, XFS_DATA_FORK))) { 929 -1, &fbp, XFS_DATA_FORK))) {
930 return error; 930 return error;
931 } 931 }
@@ -937,7 +937,7 @@ xfs_dir2_leafn_remove(
937 /* 937 /*
938 * Calculate which entry we need to fix. 938 * Calculate which entry we need to fix.
939 */ 939 */
940 findex = XFS_DIR2_DB_TO_FDINDEX(mp, db); 940 findex = xfs_dir2_db_to_fdindex(mp, db);
941 longest = be16_to_cpu(data->hdr.bestfree[0].length); 941 longest = be16_to_cpu(data->hdr.bestfree[0].length);
942 /* 942 /*
943 * If the data block is now empty we can get rid of it 943 * If the data block is now empty we can get rid of it
@@ -1073,7 +1073,7 @@ xfs_dir2_leafn_split(
1073 /* 1073 /*
1074 * Initialize the new leaf block. 1074 * Initialize the new leaf block.
1075 */ 1075 */
1076 error = xfs_dir2_leaf_init(args, XFS_DIR2_DA_TO_DB(mp, blkno), 1076 error = xfs_dir2_leaf_init(args, xfs_dir2_da_to_db(mp, blkno),
1077 &newblk->bp, XFS_DIR2_LEAFN_MAGIC); 1077 &newblk->bp, XFS_DIR2_LEAFN_MAGIC);
1078 if (error) { 1078 if (error) {
1079 return error; 1079 return error;
@@ -1385,7 +1385,7 @@ xfs_dir2_node_addname_int(
1385 dp = args->dp; 1385 dp = args->dp;
1386 mp = dp->i_mount; 1386 mp = dp->i_mount;
1387 tp = args->trans; 1387 tp = args->trans;
1388 length = XFS_DIR2_DATA_ENTSIZE(args->namelen); 1388 length = xfs_dir2_data_entsize(args->namelen);
1389 /* 1389 /*
1390 * If we came in with a freespace block that means that lookup 1390 * If we came in with a freespace block that means that lookup
1391 * found an entry with our hash value. This is the freespace 1391 * found an entry with our hash value. This is the freespace
@@ -1438,7 +1438,7 @@ xfs_dir2_node_addname_int(
1438 1438
1439 if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) 1439 if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)))
1440 return error; 1440 return error;
1441 lastfbno = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo); 1441 lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo);
1442 fbno = ifbno; 1442 fbno = ifbno;
1443 } 1443 }
1444 /* 1444 /*
@@ -1474,7 +1474,7 @@ xfs_dir2_node_addname_int(
1474 * to avoid it. 1474 * to avoid it.
1475 */ 1475 */
1476 if ((error = xfs_da_read_buf(tp, dp, 1476 if ((error = xfs_da_read_buf(tp, dp,
1477 XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp, 1477 xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
1478 XFS_DATA_FORK))) { 1478 XFS_DATA_FORK))) {
1479 return error; 1479 return error;
1480 } 1480 }
@@ -1550,9 +1550,9 @@ xfs_dir2_node_addname_int(
1550 * Get the freespace block corresponding to the data block 1550 * Get the freespace block corresponding to the data block
1551 * that was just allocated. 1551 * that was just allocated.
1552 */ 1552 */
1553 fbno = XFS_DIR2_DB_TO_FDB(mp, dbno); 1553 fbno = xfs_dir2_db_to_fdb(mp, dbno);
1554 if (unlikely(error = xfs_da_read_buf(tp, dp, 1554 if (unlikely(error = xfs_da_read_buf(tp, dp,
1555 XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp, 1555 xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
1556 XFS_DATA_FORK))) { 1556 XFS_DATA_FORK))) {
1557 xfs_da_buf_done(dbp); 1557 xfs_da_buf_done(dbp);
1558 return error; 1558 return error;
@@ -1567,14 +1567,14 @@ xfs_dir2_node_addname_int(
1567 return error; 1567 return error;
1568 } 1568 }
1569 1569
1570 if (unlikely(XFS_DIR2_DB_TO_FDB(mp, dbno) != fbno)) { 1570 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
1571 cmn_err(CE_ALERT, 1571 cmn_err(CE_ALERT,
1572 "xfs_dir2_node_addname_int: dir ino " 1572 "xfs_dir2_node_addname_int: dir ino "
1573 "%llu needed freesp block %lld for\n" 1573 "%llu needed freesp block %lld for\n"
1574 " data block %lld, got %lld\n" 1574 " data block %lld, got %lld\n"
1575 " ifbno %llu lastfbno %d\n", 1575 " ifbno %llu lastfbno %d\n",
1576 (unsigned long long)dp->i_ino, 1576 (unsigned long long)dp->i_ino,
1577 (long long)XFS_DIR2_DB_TO_FDB(mp, dbno), 1577 (long long)xfs_dir2_db_to_fdb(mp, dbno),
1578 (long long)dbno, (long long)fbno, 1578 (long long)dbno, (long long)fbno,
1579 (unsigned long long)ifbno, lastfbno); 1579 (unsigned long long)ifbno, lastfbno);
1580 if (fblk) { 1580 if (fblk) {
@@ -1598,7 +1598,7 @@ xfs_dir2_node_addname_int(
1598 * Get a buffer for the new block. 1598 * Get a buffer for the new block.
1599 */ 1599 */
1600 if ((error = xfs_da_get_buf(tp, dp, 1600 if ((error = xfs_da_get_buf(tp, dp,
1601 XFS_DIR2_DB_TO_DA(mp, fbno), 1601 xfs_dir2_db_to_da(mp, fbno),
1602 -1, &fbp, XFS_DATA_FORK))) { 1602 -1, &fbp, XFS_DATA_FORK))) {
1603 return error; 1603 return error;
1604 } 1604 }
@@ -1623,7 +1623,7 @@ xfs_dir2_node_addname_int(
1623 /* 1623 /*
1624 * Set the freespace block index from the data block number. 1624 * Set the freespace block index from the data block number.
1625 */ 1625 */
1626 findex = XFS_DIR2_DB_TO_FDINDEX(mp, dbno); 1626 findex = xfs_dir2_db_to_fdindex(mp, dbno);
1627 /* 1627 /*
1628 * If it's after the end of the current entries in the 1628 * If it's after the end of the current entries in the
1629 * freespace block, extend that table. 1629 * freespace block, extend that table.
@@ -1669,7 +1669,7 @@ xfs_dir2_node_addname_int(
1669 * Read the data block in. 1669 * Read the data block in.
1670 */ 1670 */
1671 if (unlikely( 1671 if (unlikely(
1672 error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, dbno), 1672 error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
1673 -1, &dbp, XFS_DATA_FORK))) { 1673 -1, &dbp, XFS_DATA_FORK))) {
1674 if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) 1674 if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
1675 xfs_da_buf_done(fbp); 1675 xfs_da_buf_done(fbp);
@@ -1698,7 +1698,7 @@ xfs_dir2_node_addname_int(
1698 dep->inumber = cpu_to_be64(args->inumber); 1698 dep->inumber = cpu_to_be64(args->inumber);
1699 dep->namelen = args->namelen; 1699 dep->namelen = args->namelen;
1700 memcpy(dep->name, args->name, dep->namelen); 1700 memcpy(dep->name, args->name, dep->namelen);
1701 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1701 tagp = xfs_dir2_data_entry_tag_p(dep);
1702 *tagp = cpu_to_be16((char *)dep - (char *)data); 1702 *tagp = cpu_to_be16((char *)dep - (char *)data);
1703 xfs_dir2_data_log_entry(tp, dbp, dep); 1703 xfs_dir2_data_log_entry(tp, dbp, dep);
1704 /* 1704 /*
@@ -1904,7 +1904,7 @@ xfs_dir2_node_replace(
1904 ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); 1904 ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
1905 dep = (xfs_dir2_data_entry_t *) 1905 dep = (xfs_dir2_data_entry_t *)
1906 ((char *)data + 1906 ((char *)data +
1907 XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address))); 1907 xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
1908 ASSERT(inum != be64_to_cpu(dep->inumber)); 1908 ASSERT(inum != be64_to_cpu(dep->inumber));
1909 /* 1909 /*
1910 * Fill in the new inode number and log the entry. 1910 * Fill in the new inode number and log the entry.
@@ -1980,7 +1980,7 @@ xfs_dir2_node_trim_free(
1980 * Blow the block away. 1980 * Blow the block away.
1981 */ 1981 */
1982 if ((error = 1982 if ((error =
1983 xfs_dir2_shrink_inode(args, XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo), 1983 xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo),
1984 bp))) { 1984 bp))) {
1985 /* 1985 /*
1986 * Can't fail with ENOSPC since that only happens with no 1986 * Can't fail with ENOSPC since that only happens with no
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index c7c870ee7857..dde72db3d695 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -36,7 +36,7 @@ struct xfs_trans;
36#define XFS_DIR2_FREE_SPACE 2 36#define XFS_DIR2_FREE_SPACE 2
37#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) 37#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
38#define XFS_DIR2_FREE_FIRSTDB(mp) \ 38#define XFS_DIR2_FREE_FIRSTDB(mp) \
39 XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_FREE_OFFSET) 39 xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
40 40
41#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */ 41#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */
42 42
@@ -60,7 +60,6 @@ typedef struct xfs_dir2_free {
60/* 60/*
61 * Convert data space db to the corresponding free db. 61 * Convert data space db to the corresponding free db.
62 */ 62 */
63#define XFS_DIR2_DB_TO_FDB(mp,db) xfs_dir2_db_to_fdb(mp, db)
64static inline xfs_dir2_db_t 63static inline xfs_dir2_db_t
65xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) 64xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
66{ 65{
@@ -70,7 +69,6 @@ xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
70/* 69/*
71 * Convert data space db to the corresponding index in a free db. 70 * Convert data space db to the corresponding index in a free db.
72 */ 71 */
73#define XFS_DIR2_DB_TO_FDINDEX(mp,db) xfs_dir2_db_to_fdindex(mp, db)
74static inline int 72static inline int
75xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) 73xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
76{ 74{
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 0cd77b17bf92..38fc4f22b76d 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -89,8 +89,8 @@ xfs_dir2_block_sfsize(
89 mp = dp->i_mount; 89 mp = dp->i_mount;
90 90
91 count = i8count = namelen = 0; 91 count = i8count = namelen = 0;
92 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 92 btp = xfs_dir2_block_tail_p(mp, block);
93 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 93 blp = xfs_dir2_block_leaf_p(btp);
94 94
95 /* 95 /*
96 * Iterate over the block's data entries by using the leaf pointers. 96 * Iterate over the block's data entries by using the leaf pointers.
@@ -102,7 +102,7 @@ xfs_dir2_block_sfsize(
102 * Calculate the pointer to the entry at hand. 102 * Calculate the pointer to the entry at hand.
103 */ 103 */
104 dep = (xfs_dir2_data_entry_t *) 104 dep = (xfs_dir2_data_entry_t *)
105 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr)); 105 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
106 /* 106 /*
107 * Detect . and .., so we can special-case them. 107 * Detect . and .., so we can special-case them.
108 * . is not included in sf directories. 108 * . is not included in sf directories.
@@ -124,7 +124,7 @@ xfs_dir2_block_sfsize(
124 /* 124 /*
125 * Calculate the new size, see if we should give up yet. 125 * Calculate the new size, see if we should give up yet.
126 */ 126 */
127 size = XFS_DIR2_SF_HDR_SIZE(i8count) + /* header */ 127 size = xfs_dir2_sf_hdr_size(i8count) + /* header */
128 count + /* namelen */ 128 count + /* namelen */
129 count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */ 129 count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */
130 namelen + /* name */ 130 namelen + /* name */
@@ -139,7 +139,7 @@ xfs_dir2_block_sfsize(
139 */ 139 */
140 sfhp->count = count; 140 sfhp->count = count;
141 sfhp->i8count = i8count; 141 sfhp->i8count = i8count;
142 XFS_DIR2_SF_PUT_INUMBER((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent); 142 xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
143 return size; 143 return size;
144} 144}
145 145
@@ -199,15 +199,15 @@ xfs_dir2_block_to_sf(
199 * Copy the header into the newly allocate local space. 199 * Copy the header into the newly allocate local space.
200 */ 200 */
201 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 201 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
202 memcpy(sfp, sfhp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count)); 202 memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
203 dp->i_d.di_size = size; 203 dp->i_d.di_size = size;
204 /* 204 /*
205 * Set up to loop over the block's entries. 205 * Set up to loop over the block's entries.
206 */ 206 */
207 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 207 btp = xfs_dir2_block_tail_p(mp, block);
208 ptr = (char *)block->u; 208 ptr = (char *)block->u;
209 endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); 209 endptr = (char *)xfs_dir2_block_leaf_p(btp);
210 sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 210 sfep = xfs_dir2_sf_firstentry(sfp);
211 /* 211 /*
212 * Loop over the active and unused entries. 212 * Loop over the active and unused entries.
213 * Stop when we reach the leaf/tail portion of the block. 213 * Stop when we reach the leaf/tail portion of the block.
@@ -233,22 +233,22 @@ xfs_dir2_block_to_sf(
233 else if (dep->namelen == 2 && 233 else if (dep->namelen == 2 &&
234 dep->name[0] == '.' && dep->name[1] == '.') 234 dep->name[0] == '.' && dep->name[1] == '.')
235 ASSERT(be64_to_cpu(dep->inumber) == 235 ASSERT(be64_to_cpu(dep->inumber) ==
236 XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); 236 xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
237 /* 237 /*
238 * Normal entry, copy it into shortform. 238 * Normal entry, copy it into shortform.
239 */ 239 */
240 else { 240 else {
241 sfep->namelen = dep->namelen; 241 sfep->namelen = dep->namelen;
242 XFS_DIR2_SF_PUT_OFFSET(sfep, 242 xfs_dir2_sf_put_offset(sfep,
243 (xfs_dir2_data_aoff_t) 243 (xfs_dir2_data_aoff_t)
244 ((char *)dep - (char *)block)); 244 ((char *)dep - (char *)block));
245 memcpy(sfep->name, dep->name, dep->namelen); 245 memcpy(sfep->name, dep->name, dep->namelen);
246 temp = be64_to_cpu(dep->inumber); 246 temp = be64_to_cpu(dep->inumber);
247 XFS_DIR2_SF_PUT_INUMBER(sfp, &temp, 247 xfs_dir2_sf_put_inumber(sfp, &temp,
248 XFS_DIR2_SF_INUMBERP(sfep)); 248 xfs_dir2_sf_inumberp(sfep));
249 sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); 249 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
250 } 250 }
251 ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); 251 ptr += xfs_dir2_data_entsize(dep->namelen);
252 } 252 }
253 ASSERT((char *)sfep - (char *)sfp == size); 253 ASSERT((char *)sfep - (char *)sfp == size);
254 xfs_dir2_sf_check(args); 254 xfs_dir2_sf_check(args);
@@ -294,11 +294,11 @@ xfs_dir2_sf_addname(
294 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 294 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
295 ASSERT(dp->i_df.if_u1.if_data != NULL); 295 ASSERT(dp->i_df.if_u1.if_data != NULL);
296 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 296 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
297 ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 297 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
298 /* 298 /*
299 * Compute entry (and change in) size. 299 * Compute entry (and change in) size.
300 */ 300 */
301 add_entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen); 301 add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
302 incr_isize = add_entsize; 302 incr_isize = add_entsize;
303 objchange = 0; 303 objchange = 0;
304#if XFS_BIG_INUMS 304#if XFS_BIG_INUMS
@@ -392,7 +392,7 @@ xfs_dir2_sf_addname_easy(
392 /* 392 /*
393 * Grow the in-inode space. 393 * Grow the in-inode space.
394 */ 394 */
395 xfs_idata_realloc(dp, XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen), 395 xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen),
396 XFS_DATA_FORK); 396 XFS_DATA_FORK);
397 /* 397 /*
398 * Need to set up again due to realloc of the inode data. 398 * Need to set up again due to realloc of the inode data.
@@ -403,10 +403,10 @@ xfs_dir2_sf_addname_easy(
403 * Fill in the new entry. 403 * Fill in the new entry.
404 */ 404 */
405 sfep->namelen = args->namelen; 405 sfep->namelen = args->namelen;
406 XFS_DIR2_SF_PUT_OFFSET(sfep, offset); 406 xfs_dir2_sf_put_offset(sfep, offset);
407 memcpy(sfep->name, args->name, sfep->namelen); 407 memcpy(sfep->name, args->name, sfep->namelen);
408 XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, 408 xfs_dir2_sf_put_inumber(sfp, &args->inumber,
409 XFS_DIR2_SF_INUMBERP(sfep)); 409 xfs_dir2_sf_inumberp(sfep));
410 /* 410 /*
411 * Update the header and inode. 411 * Update the header and inode.
412 */ 412 */
@@ -463,14 +463,14 @@ xfs_dir2_sf_addname_hard(
463 * If it's going to end up at the end then oldsfep will point there. 463 * If it's going to end up at the end then oldsfep will point there.
464 */ 464 */
465 for (offset = XFS_DIR2_DATA_FIRST_OFFSET, 465 for (offset = XFS_DIR2_DATA_FIRST_OFFSET,
466 oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp), 466 oldsfep = xfs_dir2_sf_firstentry(oldsfp),
467 add_datasize = XFS_DIR2_DATA_ENTSIZE(args->namelen), 467 add_datasize = xfs_dir2_data_entsize(args->namelen),
468 eof = (char *)oldsfep == &buf[old_isize]; 468 eof = (char *)oldsfep == &buf[old_isize];
469 !eof; 469 !eof;
470 offset = new_offset + XFS_DIR2_DATA_ENTSIZE(oldsfep->namelen), 470 offset = new_offset + xfs_dir2_data_entsize(oldsfep->namelen),
471 oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep), 471 oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep),
472 eof = (char *)oldsfep == &buf[old_isize]) { 472 eof = (char *)oldsfep == &buf[old_isize]) {
473 new_offset = XFS_DIR2_SF_GET_OFFSET(oldsfep); 473 new_offset = xfs_dir2_sf_get_offset(oldsfep);
474 if (offset + add_datasize <= new_offset) 474 if (offset + add_datasize <= new_offset)
475 break; 475 break;
476 } 476 }
@@ -495,10 +495,10 @@ xfs_dir2_sf_addname_hard(
495 * Fill in the new entry, and update the header counts. 495 * Fill in the new entry, and update the header counts.
496 */ 496 */
497 sfep->namelen = args->namelen; 497 sfep->namelen = args->namelen;
498 XFS_DIR2_SF_PUT_OFFSET(sfep, offset); 498 xfs_dir2_sf_put_offset(sfep, offset);
499 memcpy(sfep->name, args->name, sfep->namelen); 499 memcpy(sfep->name, args->name, sfep->namelen);
500 XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, 500 xfs_dir2_sf_put_inumber(sfp, &args->inumber,
501 XFS_DIR2_SF_INUMBERP(sfep)); 501 xfs_dir2_sf_inumberp(sfep));
502 sfp->hdr.count++; 502 sfp->hdr.count++;
503#if XFS_BIG_INUMS 503#if XFS_BIG_INUMS
504 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) 504 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
@@ -508,7 +508,7 @@ xfs_dir2_sf_addname_hard(
508 * If there's more left to copy, do that. 508 * If there's more left to copy, do that.
509 */ 509 */
510 if (!eof) { 510 if (!eof) {
511 sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); 511 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
512 memcpy(sfep, oldsfep, old_isize - nbytes); 512 memcpy(sfep, oldsfep, old_isize - nbytes);
513 } 513 }
514 kmem_free(buf, old_isize); 514 kmem_free(buf, old_isize);
@@ -544,9 +544,9 @@ xfs_dir2_sf_addname_pick(
544 mp = dp->i_mount; 544 mp = dp->i_mount;
545 545
546 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 546 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
547 size = XFS_DIR2_DATA_ENTSIZE(args->namelen); 547 size = xfs_dir2_data_entsize(args->namelen);
548 offset = XFS_DIR2_DATA_FIRST_OFFSET; 548 offset = XFS_DIR2_DATA_FIRST_OFFSET;
549 sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 549 sfep = xfs_dir2_sf_firstentry(sfp);
550 holefit = 0; 550 holefit = 0;
551 /* 551 /*
552 * Loop over sf entries. 552 * Loop over sf entries.
@@ -555,10 +555,10 @@ xfs_dir2_sf_addname_pick(
555 */ 555 */
556 for (i = 0; i < sfp->hdr.count; i++) { 556 for (i = 0; i < sfp->hdr.count; i++) {
557 if (!holefit) 557 if (!holefit)
558 holefit = offset + size <= XFS_DIR2_SF_GET_OFFSET(sfep); 558 holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
559 offset = XFS_DIR2_SF_GET_OFFSET(sfep) + 559 offset = xfs_dir2_sf_get_offset(sfep) +
560 XFS_DIR2_DATA_ENTSIZE(sfep->namelen); 560 xfs_dir2_data_entsize(sfep->namelen);
561 sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); 561 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
562 } 562 }
563 /* 563 /*
564 * Calculate data bytes used excluding the new entry, if this 564 * Calculate data bytes used excluding the new entry, if this
@@ -617,18 +617,18 @@ xfs_dir2_sf_check(
617 617
618 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 618 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
619 offset = XFS_DIR2_DATA_FIRST_OFFSET; 619 offset = XFS_DIR2_DATA_FIRST_OFFSET;
620 ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); 620 ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
621 i8count = ino > XFS_DIR2_MAX_SHORT_INUM; 621 i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
622 622
623 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 623 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
624 i < sfp->hdr.count; 624 i < sfp->hdr.count;
625 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { 625 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
626 ASSERT(XFS_DIR2_SF_GET_OFFSET(sfep) >= offset); 626 ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
627 ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep)); 627 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
628 i8count += ino > XFS_DIR2_MAX_SHORT_INUM; 628 i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
629 offset = 629 offset =
630 XFS_DIR2_SF_GET_OFFSET(sfep) + 630 xfs_dir2_sf_get_offset(sfep) +
631 XFS_DIR2_DATA_ENTSIZE(sfep->namelen); 631 xfs_dir2_data_entsize(sfep->namelen);
632 } 632 }
633 ASSERT(i8count == sfp->hdr.i8count); 633 ASSERT(i8count == sfp->hdr.i8count);
634 ASSERT(XFS_BIG_INUMS || i8count == 0); 634 ASSERT(XFS_BIG_INUMS || i8count == 0);
@@ -671,7 +671,7 @@ xfs_dir2_sf_create(
671 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 671 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
672 ASSERT(dp->i_df.if_bytes == 0); 672 ASSERT(dp->i_df.if_bytes == 0);
673 i8count = pino > XFS_DIR2_MAX_SHORT_INUM; 673 i8count = pino > XFS_DIR2_MAX_SHORT_INUM;
674 size = XFS_DIR2_SF_HDR_SIZE(i8count); 674 size = xfs_dir2_sf_hdr_size(i8count);
675 /* 675 /*
676 * Make a buffer for the data. 676 * Make a buffer for the data.
677 */ 677 */
@@ -684,7 +684,7 @@ xfs_dir2_sf_create(
684 /* 684 /*
685 * Now can put in the inode number, since i8count is set. 685 * Now can put in the inode number, since i8count is set.
686 */ 686 */
687 XFS_DIR2_SF_PUT_INUMBER(sfp, &pino, &sfp->hdr.parent); 687 xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent);
688 sfp->hdr.count = 0; 688 sfp->hdr.count = 0;
689 dp->i_d.di_size = size; 689 dp->i_d.di_size = size;
690 xfs_dir2_sf_check(args); 690 xfs_dir2_sf_check(args);
@@ -727,12 +727,12 @@ xfs_dir2_sf_getdents(
727 727
728 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 728 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
729 729
730 ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 730 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
731 731
732 /* 732 /*
733 * If the block number in the offset is out of range, we're done. 733 * If the block number in the offset is out of range, we're done.
734 */ 734 */
735 if (XFS_DIR2_DATAPTR_TO_DB(mp, dir_offset) > mp->m_dirdatablk) { 735 if (xfs_dir2_dataptr_to_db(mp, dir_offset) > mp->m_dirdatablk) {
736 *eofp = 1; 736 *eofp = 1;
737 return 0; 737 return 0;
738 } 738 }
@@ -747,9 +747,9 @@ xfs_dir2_sf_getdents(
747 * Put . entry unless we're starting past it. 747 * Put . entry unless we're starting past it.
748 */ 748 */
749 if (dir_offset <= 749 if (dir_offset <=
750 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 750 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
751 XFS_DIR2_DATA_DOT_OFFSET)) { 751 XFS_DIR2_DATA_DOT_OFFSET)) {
752 p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, 0, 752 p.cook = xfs_dir2_db_off_to_dataptr(mp, 0,
753 XFS_DIR2_DATA_DOTDOT_OFFSET); 753 XFS_DIR2_DATA_DOTDOT_OFFSET);
754 p.ino = dp->i_ino; 754 p.ino = dp->i_ino;
755#if XFS_BIG_INUMS 755#if XFS_BIG_INUMS
@@ -762,7 +762,7 @@ xfs_dir2_sf_getdents(
762 762
763 if (!p.done) { 763 if (!p.done) {
764 uio->uio_offset = 764 uio->uio_offset =
765 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 765 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
766 XFS_DIR2_DATA_DOT_OFFSET); 766 XFS_DIR2_DATA_DOT_OFFSET);
767 return error; 767 return error;
768 } 768 }
@@ -772,11 +772,11 @@ xfs_dir2_sf_getdents(
772 * Put .. entry unless we're starting past it. 772 * Put .. entry unless we're starting past it.
773 */ 773 */
774 if (dir_offset <= 774 if (dir_offset <=
775 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 775 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
776 XFS_DIR2_DATA_DOTDOT_OFFSET)) { 776 XFS_DIR2_DATA_DOTDOT_OFFSET)) {
777 p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 777 p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
778 XFS_DIR2_DATA_FIRST_OFFSET); 778 XFS_DIR2_DATA_FIRST_OFFSET);
779 p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); 779 p.ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
780#if XFS_BIG_INUMS 780#if XFS_BIG_INUMS
781 p.ino += mp->m_inoadd; 781 p.ino += mp->m_inoadd;
782#endif 782#endif
@@ -787,7 +787,7 @@ xfs_dir2_sf_getdents(
787 787
788 if (!p.done) { 788 if (!p.done) {
789 uio->uio_offset = 789 uio->uio_offset =
790 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 790 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
791 XFS_DIR2_DATA_DOTDOT_OFFSET); 791 XFS_DIR2_DATA_DOTDOT_OFFSET);
792 return error; 792 return error;
793 } 793 }
@@ -796,23 +796,23 @@ xfs_dir2_sf_getdents(
796 /* 796 /*
797 * Loop while there are more entries and put'ing works. 797 * Loop while there are more entries and put'ing works.
798 */ 798 */
799 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 799 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
800 i < sfp->hdr.count; 800 i < sfp->hdr.count;
801 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { 801 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
802 802
803 off = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 803 off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
804 XFS_DIR2_SF_GET_OFFSET(sfep)); 804 xfs_dir2_sf_get_offset(sfep));
805 805
806 if (dir_offset > off) 806 if (dir_offset > off)
807 continue; 807 continue;
808 808
809 p.namelen = sfep->namelen; 809 p.namelen = sfep->namelen;
810 810
811 p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 811 p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
812 XFS_DIR2_SF_GET_OFFSET(sfep) + 812 xfs_dir2_sf_get_offset(sfep) +
813 XFS_DIR2_DATA_ENTSIZE(p.namelen)); 813 xfs_dir2_data_entsize(p.namelen));
814 814
815 p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep)); 815 p.ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
816#if XFS_BIG_INUMS 816#if XFS_BIG_INUMS
817 p.ino += mp->m_inoadd; 817 p.ino += mp->m_inoadd;
818#endif 818#endif
@@ -832,7 +832,7 @@ xfs_dir2_sf_getdents(
832 *eofp = 1; 832 *eofp = 1;
833 833
834 uio->uio_offset = 834 uio->uio_offset =
835 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0); 835 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0);
836 836
837 return 0; 837 return 0;
838} 838}
@@ -865,7 +865,7 @@ xfs_dir2_sf_lookup(
865 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 865 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
866 ASSERT(dp->i_df.if_u1.if_data != NULL); 866 ASSERT(dp->i_df.if_u1.if_data != NULL);
867 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 867 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
868 ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 868 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
869 /* 869 /*
870 * Special case for . 870 * Special case for .
871 */ 871 */
@@ -878,21 +878,21 @@ xfs_dir2_sf_lookup(
878 */ 878 */
879 if (args->namelen == 2 && 879 if (args->namelen == 2 &&
880 args->name[0] == '.' && args->name[1] == '.') { 880 args->name[0] == '.' && args->name[1] == '.') {
881 args->inumber = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); 881 args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
882 return XFS_ERROR(EEXIST); 882 return XFS_ERROR(EEXIST);
883 } 883 }
884 /* 884 /*
885 * Loop over all the entries trying to match ours. 885 * Loop over all the entries trying to match ours.
886 */ 886 */
887 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 887 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
888 i < sfp->hdr.count; 888 i < sfp->hdr.count;
889 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { 889 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
890 if (sfep->namelen == args->namelen && 890 if (sfep->namelen == args->namelen &&
891 sfep->name[0] == args->name[0] && 891 sfep->name[0] == args->name[0] &&
892 memcmp(args->name, sfep->name, args->namelen) == 0) { 892 memcmp(args->name, sfep->name, args->namelen) == 0) {
893 args->inumber = 893 args->inumber =
894 XFS_DIR2_SF_GET_INUMBER(sfp, 894 xfs_dir2_sf_get_inumber(sfp,
895 XFS_DIR2_SF_INUMBERP(sfep)); 895 xfs_dir2_sf_inumberp(sfep));
896 return XFS_ERROR(EEXIST); 896 return XFS_ERROR(EEXIST);
897 } 897 }
898 } 898 }
@@ -934,19 +934,19 @@ xfs_dir2_sf_removename(
934 ASSERT(dp->i_df.if_bytes == oldsize); 934 ASSERT(dp->i_df.if_bytes == oldsize);
935 ASSERT(dp->i_df.if_u1.if_data != NULL); 935 ASSERT(dp->i_df.if_u1.if_data != NULL);
936 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 936 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
937 ASSERT(oldsize >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 937 ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
938 /* 938 /*
939 * Loop over the old directory entries. 939 * Loop over the old directory entries.
940 * Find the one we're deleting. 940 * Find the one we're deleting.
941 */ 941 */
942 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 942 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
943 i < sfp->hdr.count; 943 i < sfp->hdr.count;
944 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { 944 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
945 if (sfep->namelen == args->namelen && 945 if (sfep->namelen == args->namelen &&
946 sfep->name[0] == args->name[0] && 946 sfep->name[0] == args->name[0] &&
947 memcmp(sfep->name, args->name, args->namelen) == 0) { 947 memcmp(sfep->name, args->name, args->namelen) == 0) {
948 ASSERT(XFS_DIR2_SF_GET_INUMBER(sfp, 948 ASSERT(xfs_dir2_sf_get_inumber(sfp,
949 XFS_DIR2_SF_INUMBERP(sfep)) == 949 xfs_dir2_sf_inumberp(sfep)) ==
950 args->inumber); 950 args->inumber);
951 break; 951 break;
952 } 952 }
@@ -961,7 +961,7 @@ xfs_dir2_sf_removename(
961 * Calculate sizes. 961 * Calculate sizes.
962 */ 962 */
963 byteoff = (int)((char *)sfep - (char *)sfp); 963 byteoff = (int)((char *)sfep - (char *)sfp);
964 entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen); 964 entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
965 newsize = oldsize - entsize; 965 newsize = oldsize - entsize;
966 /* 966 /*
967 * Copy the part if any after the removed entry, sliding it down. 967 * Copy the part if any after the removed entry, sliding it down.
@@ -1027,7 +1027,7 @@ xfs_dir2_sf_replace(
1027 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 1027 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
1028 ASSERT(dp->i_df.if_u1.if_data != NULL); 1028 ASSERT(dp->i_df.if_u1.if_data != NULL);
1029 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1029 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
1030 ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 1030 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
1031#if XFS_BIG_INUMS 1031#if XFS_BIG_INUMS
1032 /* 1032 /*
1033 * New inode number is large, and need to convert to 8-byte inodes. 1033 * New inode number is large, and need to convert to 8-byte inodes.
@@ -1067,28 +1067,28 @@ xfs_dir2_sf_replace(
1067 if (args->namelen == 2 && 1067 if (args->namelen == 2 &&
1068 args->name[0] == '.' && args->name[1] == '.') { 1068 args->name[0] == '.' && args->name[1] == '.') {
1069#if XFS_BIG_INUMS || defined(DEBUG) 1069#if XFS_BIG_INUMS || defined(DEBUG)
1070 ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); 1070 ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
1071 ASSERT(args->inumber != ino); 1071 ASSERT(args->inumber != ino);
1072#endif 1072#endif
1073 XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, &sfp->hdr.parent); 1073 xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent);
1074 } 1074 }
1075 /* 1075 /*
1076 * Normal entry, look for the name. 1076 * Normal entry, look for the name.
1077 */ 1077 */
1078 else { 1078 else {
1079 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 1079 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
1080 i < sfp->hdr.count; 1080 i < sfp->hdr.count;
1081 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { 1081 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
1082 if (sfep->namelen == args->namelen && 1082 if (sfep->namelen == args->namelen &&
1083 sfep->name[0] == args->name[0] && 1083 sfep->name[0] == args->name[0] &&
1084 memcmp(args->name, sfep->name, args->namelen) == 0) { 1084 memcmp(args->name, sfep->name, args->namelen) == 0) {
1085#if XFS_BIG_INUMS || defined(DEBUG) 1085#if XFS_BIG_INUMS || defined(DEBUG)
1086 ino = XFS_DIR2_SF_GET_INUMBER(sfp, 1086 ino = xfs_dir2_sf_get_inumber(sfp,
1087 XFS_DIR2_SF_INUMBERP(sfep)); 1087 xfs_dir2_sf_inumberp(sfep));
1088 ASSERT(args->inumber != ino); 1088 ASSERT(args->inumber != ino);
1089#endif 1089#endif
1090 XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, 1090 xfs_dir2_sf_put_inumber(sfp, &args->inumber,
1091 XFS_DIR2_SF_INUMBERP(sfep)); 1091 xfs_dir2_sf_inumberp(sfep));
1092 break; 1092 break;
1093 } 1093 }
1094 } 1094 }
@@ -1189,22 +1189,22 @@ xfs_dir2_sf_toino4(
1189 */ 1189 */
1190 sfp->hdr.count = oldsfp->hdr.count; 1190 sfp->hdr.count = oldsfp->hdr.count;
1191 sfp->hdr.i8count = 0; 1191 sfp->hdr.i8count = 0;
1192 ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent); 1192 ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
1193 XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent); 1193 xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
1194 /* 1194 /*
1195 * Copy the entries field by field. 1195 * Copy the entries field by field.
1196 */ 1196 */
1197 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp), 1197 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
1198 oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp); 1198 oldsfep = xfs_dir2_sf_firstentry(oldsfp);
1199 i < sfp->hdr.count; 1199 i < sfp->hdr.count;
1200 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep), 1200 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
1201 oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) { 1201 oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
1202 sfep->namelen = oldsfep->namelen; 1202 sfep->namelen = oldsfep->namelen;
1203 sfep->offset = oldsfep->offset; 1203 sfep->offset = oldsfep->offset;
1204 memcpy(sfep->name, oldsfep->name, sfep->namelen); 1204 memcpy(sfep->name, oldsfep->name, sfep->namelen);
1205 ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, 1205 ino = xfs_dir2_sf_get_inumber(oldsfp,
1206 XFS_DIR2_SF_INUMBERP(oldsfep)); 1206 xfs_dir2_sf_inumberp(oldsfep));
1207 XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep)); 1207 xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
1208 } 1208 }
1209 /* 1209 /*
1210 * Clean up the inode. 1210 * Clean up the inode.
@@ -1266,22 +1266,22 @@ xfs_dir2_sf_toino8(
1266 */ 1266 */
1267 sfp->hdr.count = oldsfp->hdr.count; 1267 sfp->hdr.count = oldsfp->hdr.count;
1268 sfp->hdr.i8count = 1; 1268 sfp->hdr.i8count = 1;
1269 ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent); 1269 ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
1270 XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent); 1270 xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
1271 /* 1271 /*
1272 * Copy the entries field by field. 1272 * Copy the entries field by field.
1273 */ 1273 */
1274 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp), 1274 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
1275 oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp); 1275 oldsfep = xfs_dir2_sf_firstentry(oldsfp);
1276 i < sfp->hdr.count; 1276 i < sfp->hdr.count;
1277 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep), 1277 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
1278 oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) { 1278 oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
1279 sfep->namelen = oldsfep->namelen; 1279 sfep->namelen = oldsfep->namelen;
1280 sfep->offset = oldsfep->offset; 1280 sfep->offset = oldsfep->offset;
1281 memcpy(sfep->name, oldsfep->name, sfep->namelen); 1281 memcpy(sfep->name, oldsfep->name, sfep->namelen);
1282 ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, 1282 ino = xfs_dir2_sf_get_inumber(oldsfp,
1283 XFS_DIR2_SF_INUMBERP(oldsfep)); 1283 xfs_dir2_sf_inumberp(oldsfep));
1284 XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep)); 1284 xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
1285 } 1285 }
1286 /* 1286 /*
1287 * Clean up the inode. 1287 * Clean up the inode.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 42f015b70018..11e503209afa 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -90,7 +90,6 @@ typedef struct xfs_dir2_sf {
90 xfs_dir2_sf_entry_t list[1]; /* shortform entries */ 90 xfs_dir2_sf_entry_t list[1]; /* shortform entries */
91} xfs_dir2_sf_t; 91} xfs_dir2_sf_t;
92 92
93#define XFS_DIR2_SF_HDR_SIZE(i8count) xfs_dir2_sf_hdr_size(i8count)
94static inline int xfs_dir2_sf_hdr_size(int i8count) 93static inline int xfs_dir2_sf_hdr_size(int i8count)
95{ 94{
96 return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \ 95 return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
@@ -98,14 +97,11 @@ static inline int xfs_dir2_sf_hdr_size(int i8count)
98 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); 97 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
99} 98}
100 99
101#define XFS_DIR2_SF_INUMBERP(sfep) xfs_dir2_sf_inumberp(sfep)
102static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep) 100static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
103{ 101{
104 return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen]; 102 return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
105} 103}
106 104
107#define XFS_DIR2_SF_GET_INUMBER(sfp, from) \
108 xfs_dir2_sf_get_inumber(sfp, from)
109static inline xfs_intino_t 105static inline xfs_intino_t
110xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from) 106xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
111{ 107{
@@ -114,8 +110,6 @@ xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
114 (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8)); 110 (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
115} 111}
116 112
117#define XFS_DIR2_SF_PUT_INUMBER(sfp,from,to) \
118 xfs_dir2_sf_put_inumber(sfp,from,to)
119static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from, 113static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
120 xfs_dir2_inou_t *to) 114 xfs_dir2_inou_t *to)
121{ 115{
@@ -125,24 +119,18 @@ static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
125 XFS_PUT_DIR_INO8(*(from), (to)->i8); 119 XFS_PUT_DIR_INO8(*(from), (to)->i8);
126} 120}
127 121
128#define XFS_DIR2_SF_GET_OFFSET(sfep) \
129 xfs_dir2_sf_get_offset(sfep)
130static inline xfs_dir2_data_aoff_t 122static inline xfs_dir2_data_aoff_t
131xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) 123xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
132{ 124{
133 return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i); 125 return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
134} 126}
135 127
136#define XFS_DIR2_SF_PUT_OFFSET(sfep,off) \
137 xfs_dir2_sf_put_offset(sfep,off)
138static inline void 128static inline void
139xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) 129xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
140{ 130{
141 INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off); 131 INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
142} 132}
143 133
144#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len) \
145 xfs_dir2_sf_entsize_byname(sfp,len)
146static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len) 134static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
147{ 135{
148 return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \ 136 return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
@@ -150,8 +138,6 @@ static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
150 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); 138 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
151} 139}
152 140
153#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep) \
154 xfs_dir2_sf_entsize_byentry(sfp,sfep)
155static inline int 141static inline int
156xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) 142xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
157{ 143{
@@ -160,19 +146,17 @@ xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
160 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); 146 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
161} 147}
162 148
163#define XFS_DIR2_SF_FIRSTENTRY(sfp) xfs_dir2_sf_firstentry(sfp)
164static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp) 149static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
165{ 150{
166 return ((xfs_dir2_sf_entry_t *) \ 151 return ((xfs_dir2_sf_entry_t *) \
167 ((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count))); 152 ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
168} 153}
169 154
170#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep) xfs_dir2_sf_nextentry(sfp,sfep)
171static inline xfs_dir2_sf_entry_t * 155static inline xfs_dir2_sf_entry_t *
172xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) 156xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
173{ 157{
174 return ((xfs_dir2_sf_entry_t *) \ 158 return ((xfs_dir2_sf_entry_t *) \
175 ((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep))); 159 ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
176} 160}
177 161
178/* 162/*
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
new file mode 100644
index 000000000000..ce2278611bb7
--- /dev/null
+++ b/fs/xfs/xfs_filestream.c
@@ -0,0 +1,771 @@
1/*
2 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_bmap_btree.h"
20#include "xfs_inum.h"
21#include "xfs_dir2.h"
22#include "xfs_dir2_sf.h"
23#include "xfs_attr_sf.h"
24#include "xfs_dinode.h"
25#include "xfs_inode.h"
26#include "xfs_ag.h"
27#include "xfs_dmapi.h"
28#include "xfs_log.h"
29#include "xfs_trans.h"
30#include "xfs_sb.h"
31#include "xfs_mount.h"
32#include "xfs_bmap.h"
33#include "xfs_alloc.h"
34#include "xfs_utils.h"
35#include "xfs_mru_cache.h"
36#include "xfs_filestream.h"
37
38#ifdef XFS_FILESTREAMS_TRACE
39
40ktrace_t *xfs_filestreams_trace_buf;
41
42STATIC void
43xfs_filestreams_trace(
44 xfs_mount_t *mp, /* mount point */
45 int type, /* type of trace */
46 const char *func, /* source function */
47 int line, /* source line number */
48 __psunsigned_t arg0,
49 __psunsigned_t arg1,
50 __psunsigned_t arg2,
51 __psunsigned_t arg3,
52 __psunsigned_t arg4,
53 __psunsigned_t arg5)
54{
55 ktrace_enter(xfs_filestreams_trace_buf,
56 (void *)(__psint_t)(type | (line << 16)),
57 (void *)func,
58 (void *)(__psunsigned_t)current_pid(),
59 (void *)mp,
60 (void *)(__psunsigned_t)arg0,
61 (void *)(__psunsigned_t)arg1,
62 (void *)(__psunsigned_t)arg2,
63 (void *)(__psunsigned_t)arg3,
64 (void *)(__psunsigned_t)arg4,
65 (void *)(__psunsigned_t)arg5,
66 NULL, NULL, NULL, NULL, NULL, NULL);
67}
68
69#define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0)
70#define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0)
71#define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0)
72#define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0)
73#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0)
74#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0)
75#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
76 xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \
77 (__psunsigned_t)a0, (__psunsigned_t)a1, \
78 (__psunsigned_t)a2, (__psunsigned_t)a3, \
79 (__psunsigned_t)a4, (__psunsigned_t)a5)
80
81#define TRACE_AG_SCAN(mp, ag, ag2) \
82 TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
83#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
84 TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
85#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
86 TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
87 cnt, free, scan, flag)
88#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
89 TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
90#define TRACE_FREE(mp, ip, pip, ag, cnt) \
91 TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
92#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
93 TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
94#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
95 TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
96#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
97 TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
98#define TRACE_ORPHAN(mp, ip, ag) \
99 TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
100
101
102#else
103#define TRACE_AG_SCAN(mp, ag, ag2)
104#define TRACE_AG_PICK1(mp, max_ag, maxfree)
105#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
106#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
107#define TRACE_FREE(mp, ip, pip, ag, cnt)
108#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
109#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
110#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
111#define TRACE_ORPHAN(mp, ip, ag)
112#endif
113
114static kmem_zone_t *item_zone;
115
116/*
117 * Structure for associating a file or a directory with an allocation group.
118 * The parent directory pointer is only needed for files, but since there will
119 * generally be vastly more files than directories in the cache, using the same
120 * data structure simplifies the code with very little memory overhead.
121 */
122typedef struct fstrm_item
123{
124 xfs_agnumber_t ag; /* AG currently in use for the file/directory. */
125 xfs_inode_t *ip; /* inode self-pointer. */
126 xfs_inode_t *pip; /* Parent directory inode pointer. */
127} fstrm_item_t;
128
129
130/*
131 * Scan the AGs starting at startag looking for an AG that isn't in use and has
132 * at least minlen blocks free.
133 */
134static int
135_xfs_filestream_pick_ag(
136 xfs_mount_t *mp,
137 xfs_agnumber_t startag,
138 xfs_agnumber_t *agp,
139 int flags,
140 xfs_extlen_t minlen)
141{
142 int err, trylock, nscan;
143 xfs_extlen_t delta, longest, need, free, minfree, maxfree = 0;
144 xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
145 struct xfs_perag *pag;
146
147 /* 2% of an AG's blocks must be free for it to be chosen. */
148 minfree = mp->m_sb.sb_agblocks / 50;
149
150 ag = startag;
151 *agp = NULLAGNUMBER;
152
153 /* For the first pass, don't sleep trying to init the per-AG. */
154 trylock = XFS_ALLOC_FLAG_TRYLOCK;
155
156 for (nscan = 0; 1; nscan++) {
157
158 TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag));
159
160 pag = mp->m_perag + ag;
161
162 if (!pag->pagf_init) {
163 err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
164 if (err && !trylock)
165 return err;
166 }
167
168 /* Might fail sometimes during the 1st pass with trylock set. */
169 if (!pag->pagf_init)
170 goto next_ag;
171
172 /* Keep track of the AG with the most free blocks. */
173 if (pag->pagf_freeblks > maxfree) {
174 maxfree = pag->pagf_freeblks;
175 max_ag = ag;
176 }
177
178 /*
179 * The AG reference count does two things: it enforces mutual
180 * exclusion when examining the suitability of an AG in this
181 * loop, and it guards against two filestreams being established
182 * in the same AG as each other.
183 */
184 if (xfs_filestream_get_ag(mp, ag) > 1) {
185 xfs_filestream_put_ag(mp, ag);
186 goto next_ag;
187 }
188
189 need = XFS_MIN_FREELIST_PAG(pag, mp);
190 delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
191 longest = (pag->pagf_longest > delta) ?
192 (pag->pagf_longest - delta) :
193 (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
194
195 if (((minlen && longest >= minlen) ||
196 (!minlen && pag->pagf_freeblks >= minfree)) &&
197 (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
198 (flags & XFS_PICK_LOWSPACE))) {
199
200 /* Break out, retaining the reference on the AG. */
201 free = pag->pagf_freeblks;
202 *agp = ag;
203 break;
204 }
205
206 /* Drop the reference on this AG, it's not usable. */
207 xfs_filestream_put_ag(mp, ag);
208next_ag:
209 /* Move to the next AG, wrapping to AG 0 if necessary. */
210 if (++ag >= mp->m_sb.sb_agcount)
211 ag = 0;
212
213 /* If a full pass of the AGs hasn't been done yet, continue. */
214 if (ag != startag)
215 continue;
216
217 /* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */
218 if (trylock != 0) {
219 trylock = 0;
220 continue;
221 }
222
223 /* Finally, if lowspace wasn't set, set it for the 3rd pass. */
224 if (!(flags & XFS_PICK_LOWSPACE)) {
225 flags |= XFS_PICK_LOWSPACE;
226 continue;
227 }
228
229 /*
230 * Take the AG with the most free space, regardless of whether
231 * it's already in use by another filestream.
232 */
233 if (max_ag != NULLAGNUMBER) {
234 xfs_filestream_get_ag(mp, max_ag);
235 TRACE_AG_PICK1(mp, max_ag, maxfree);
236 free = maxfree;
237 *agp = max_ag;
238 break;
239 }
240
241 /* take AG 0 if none matched */
242 TRACE_AG_PICK1(mp, max_ag, maxfree);
243 *agp = 0;
244 return 0;
245 }
246
247 TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp),
248 free, nscan, flags);
249
250 return 0;
251}
252
253/*
254 * Set the allocation group number for a file or a directory, updating inode
255 * references and per-AG references as appropriate. Must be called with the
256 * m_peraglock held in read mode.
257 */
258static int
259_xfs_filestream_update_ag(
260 xfs_inode_t *ip,
261 xfs_inode_t *pip,
262 xfs_agnumber_t ag)
263{
264 int err = 0;
265 xfs_mount_t *mp;
266 xfs_mru_cache_t *cache;
267 fstrm_item_t *item;
268 xfs_agnumber_t old_ag;
269 xfs_inode_t *old_pip;
270
271 /*
272 * Either ip is a regular file and pip is a directory, or ip is a
273 * directory and pip is NULL.
274 */
275 ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
276 (pip->i_d.di_mode & S_IFDIR)) ||
277 ((ip->i_d.di_mode & S_IFDIR) && !pip)));
278
279 mp = ip->i_mount;
280 cache = mp->m_filestream;
281
282 item = xfs_mru_cache_lookup(cache, ip->i_ino);
283 if (item) {
284 ASSERT(item->ip == ip);
285 old_ag = item->ag;
286 item->ag = ag;
287 old_pip = item->pip;
288 item->pip = pip;
289 xfs_mru_cache_done(cache);
290
291 /*
292 * If the AG has changed, drop the old ref and take a new one,
293 * effectively transferring the reference from old to new AG.
294 */
295 if (ag != old_ag) {
296 xfs_filestream_put_ag(mp, old_ag);
297 xfs_filestream_get_ag(mp, ag);
298 }
299
300 /*
301 * If ip is a file and its pip has changed, drop the old ref and
302 * take a new one.
303 */
304 if (pip && pip != old_pip) {
305 IRELE(old_pip);
306 IHOLD(pip);
307 }
308
309 TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
310 ag, xfs_filestream_peek_ag(mp, ag));
311 return 0;
312 }
313
314 item = kmem_zone_zalloc(item_zone, KM_MAYFAIL);
315 if (!item)
316 return ENOMEM;
317
318 item->ag = ag;
319 item->ip = ip;
320 item->pip = pip;
321
322 err = xfs_mru_cache_insert(cache, ip->i_ino, item);
323 if (err) {
324 kmem_zone_free(item_zone, item);
325 return err;
326 }
327
328 /* Take a reference on the AG. */
329 xfs_filestream_get_ag(mp, ag);
330
331 /*
332 * Take a reference on the inode itself regardless of whether it's a
333 * regular file or a directory.
334 */
335 IHOLD(ip);
336
337 /*
338 * In the case of a regular file, take a reference on the parent inode
339 * as well to ensure it remains in-core.
340 */
341 if (pip)
342 IHOLD(pip);
343
344 TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
345 ag, xfs_filestream_peek_ag(mp, ag));
346
347 return 0;
348}
349
350/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
351void
352xfs_fstrm_free_func(
353 xfs_ino_t ino,
354 fstrm_item_t *item)
355{
356 xfs_inode_t *ip = item->ip;
357 int ref;
358
359 ASSERT(ip->i_ino == ino);
360
361 xfs_iflags_clear(ip, XFS_IFILESTREAM);
362
363 /* Drop the reference taken on the AG when the item was added. */
364 ref = xfs_filestream_put_ag(ip->i_mount, item->ag);
365
366 ASSERT(ref >= 0);
367 TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
368 xfs_filestream_peek_ag(ip->i_mount, item->ag));
369
370 /*
371 * _xfs_filestream_update_ag() always takes a reference on the inode
372 * itself, whether it's a file or a directory. Release it here.
373 * This can result in the inode being freed and so we must
374 * not hold any inode locks when freeing filesstreams objects
375 * otherwise we can deadlock here.
376 */
377 IRELE(ip);
378
379 /*
380 * In the case of a regular file, _xfs_filestream_update_ag() also
381 * takes a ref on the parent inode to keep it in-core. Release that
382 * too.
383 */
384 if (item->pip)
385 IRELE(item->pip);
386
387 /* Finally, free the memory allocated for the item. */
388 kmem_zone_free(item_zone, item);
389}
390
391/*
392 * xfs_filestream_init() is called at xfs initialisation time to set up the
393 * memory zone that will be used for filestream data structure allocation.
394 */
395int
396xfs_filestream_init(void)
397{
398 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
399#ifdef XFS_FILESTREAMS_TRACE
400 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
401#endif
402 return item_zone ? 0 : -ENOMEM;
403}
404
405/*
406 * xfs_filestream_uninit() is called at xfs termination time to destroy the
407 * memory zone that was used for filestream data structure allocation.
408 */
409void
410xfs_filestream_uninit(void)
411{
412#ifdef XFS_FILESTREAMS_TRACE
413 ktrace_free(xfs_filestreams_trace_buf);
414#endif
415 kmem_zone_destroy(item_zone);
416}
417
418/*
419 * xfs_filestream_mount() is called when a file system is mounted with the
420 * filestream option. It is responsible for allocating the data structures
421 * needed to track the new file system's file streams.
422 */
423int
424xfs_filestream_mount(
425 xfs_mount_t *mp)
426{
427 int err;
428 unsigned int lifetime, grp_count;
429
430 /*
431 * The filestream timer tunable is currently fixed within the range of
432 * one second to four minutes, with five seconds being the default. The
433 * group count is somewhat arbitrary, but it'd be nice to adhere to the
434 * timer tunable to within about 10 percent. This requires at least 10
435 * groups.
436 */
437 lifetime = xfs_fstrm_centisecs * 10;
438 grp_count = 10;
439
440 err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
441 (xfs_mru_cache_free_func_t)xfs_fstrm_free_func);
442
443 return err;
444}
445
446/*
447 * xfs_filestream_unmount() is called when a file system that was mounted with
448 * the filestream option is unmounted. It drains the data structures created
449 * to track the file system's file streams and frees all the memory that was
450 * allocated.
451 */
452void
453xfs_filestream_unmount(
454 xfs_mount_t *mp)
455{
456 xfs_mru_cache_destroy(mp->m_filestream);
457}
458
459/*
460 * If the mount point's m_perag array is going to be reallocated, all
461 * outstanding cache entries must be flushed to avoid accessing reference count
462 * addresses that have been freed. The call to xfs_filestream_flush() must be
463 * made inside the block that holds the m_peraglock in write mode to do the
464 * reallocation.
465 */
466void
467xfs_filestream_flush(
468 xfs_mount_t *mp)
469{
470 /* point in time flush, so keep the reaper running */
471 xfs_mru_cache_flush(mp->m_filestream, 1);
472}
473
474/*
475 * Return the AG of the filestream the file or directory belongs to, or
476 * NULLAGNUMBER otherwise.
477 */
478xfs_agnumber_t
479xfs_filestream_lookup_ag(
480 xfs_inode_t *ip)
481{
482 xfs_mru_cache_t *cache;
483 fstrm_item_t *item;
484 xfs_agnumber_t ag;
485 int ref;
486
487 if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) {
488 ASSERT(0);
489 return NULLAGNUMBER;
490 }
491
492 cache = ip->i_mount->m_filestream;
493 item = xfs_mru_cache_lookup(cache, ip->i_ino);
494 if (!item) {
495 TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
496 return NULLAGNUMBER;
497 }
498
499 ASSERT(ip == item->ip);
500 ag = item->ag;
501 ref = xfs_filestream_peek_ag(ip->i_mount, ag);
502 xfs_mru_cache_done(cache);
503
504 TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref);
505 return ag;
506}
507
508/*
509 * xfs_filestream_associate() should only be called to associate a regular file
510 * with its parent directory. Calling it with a child directory isn't
511 * appropriate because filestreams don't apply to entire directory hierarchies.
512 * Creating a file in a child directory of an existing filestream directory
513 * starts a new filestream with its own allocation group association.
514 *
515 * Returns < 0 on error, 0 if successful association occurred, > 0 if
516 * we failed to get an association because of locking issues.
517 */
518int
519xfs_filestream_associate(
520 xfs_inode_t *pip,
521 xfs_inode_t *ip)
522{
523 xfs_mount_t *mp;
524 xfs_mru_cache_t *cache;
525 fstrm_item_t *item;
526 xfs_agnumber_t ag, rotorstep, startag;
527 int err = 0;
528
529 ASSERT(pip->i_d.di_mode & S_IFDIR);
530 ASSERT(ip->i_d.di_mode & S_IFREG);
531 if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG))
532 return -EINVAL;
533
534 mp = pip->i_mount;
535 cache = mp->m_filestream;
536 down_read(&mp->m_peraglock);
537
538 /*
539 * We have a problem, Houston.
540 *
541 * Taking the iolock here violates inode locking order - we already
542 * hold the ilock. Hence if we block getting this lock we may never
543 * wake. Unfortunately, that means if we can't get the lock, we're
544 * screwed in terms of getting a stream association - we can't spin
545 * waiting for the lock because someone else is waiting on the lock we
546 * hold and we cannot drop that as we are in a transaction here.
547 *
548 * Lucky for us, this inversion is rarely a problem because it's a
549 * directory inode that we are trying to lock here and that means the
550 * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
551 * used. i.e. freeze, remount-ro, quotasync or unmount.
552 *
553 * So, if we can't get the iolock without sleeping then just give up
554 */
555 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) {
556 up_read(&mp->m_peraglock);
557 return 1;
558 }
559
560 /* If the parent directory is already in the cache, use its AG. */
561 item = xfs_mru_cache_lookup(cache, pip->i_ino);
562 if (item) {
563 ASSERT(item->ip == pip);
564 ag = item->ag;
565 xfs_mru_cache_done(cache);
566
567 TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
568 err = _xfs_filestream_update_ag(ip, pip, ag);
569
570 goto exit;
571 }
572
573 /*
574 * Set the starting AG using the rotor for inode32, otherwise
575 * use the directory inode's AG.
576 */
577 if (mp->m_flags & XFS_MOUNT_32BITINODES) {
578 rotorstep = xfs_rotorstep;
579 startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
580 mp->m_agfrotor = (mp->m_agfrotor + 1) %
581 (mp->m_sb.sb_agcount * rotorstep);
582 } else
583 startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
584
585 /* Pick a new AG for the parent inode starting at startag. */
586 err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0);
587 if (err || ag == NULLAGNUMBER)
588 goto exit_did_pick;
589
590 /* Associate the parent inode with the AG. */
591 err = _xfs_filestream_update_ag(pip, NULL, ag);
592 if (err)
593 goto exit_did_pick;
594
595 /* Associate the file inode with the AG. */
596 err = _xfs_filestream_update_ag(ip, pip, ag);
597 if (err)
598 goto exit_did_pick;
599
600 TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
601
602exit_did_pick:
603 /*
604 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
605 * reference it took on it, since the file and directory will have taken
606 * their own now if they were successfully cached.
607 */
608 if (ag != NULLAGNUMBER)
609 xfs_filestream_put_ag(mp, ag);
610
611exit:
612 xfs_iunlock(pip, XFS_IOLOCK_EXCL);
613 up_read(&mp->m_peraglock);
614 return -err;
615}
616
617/*
618 * Pick a new allocation group for the current file and its file stream. This
619 * function is called by xfs_bmap_filestreams() with the mount point's per-ag
620 * lock held.
621 */
622int
623xfs_filestream_new_ag(
624 xfs_bmalloca_t *ap,
625 xfs_agnumber_t *agp)
626{
627 int flags, err;
628 xfs_inode_t *ip, *pip = NULL;
629 xfs_mount_t *mp;
630 xfs_mru_cache_t *cache;
631 xfs_extlen_t minlen;
632 fstrm_item_t *dir, *file;
633 xfs_agnumber_t ag = NULLAGNUMBER;
634
635 ip = ap->ip;
636 mp = ip->i_mount;
637 cache = mp->m_filestream;
638 minlen = ap->alen;
639 *agp = NULLAGNUMBER;
640
641 /*
642 * Look for the file in the cache, removing it if it's found. Doing
643 * this allows it to be held across the dir lookup that follows.
644 */
645 file = xfs_mru_cache_remove(cache, ip->i_ino);
646 if (file) {
647 ASSERT(ip == file->ip);
648
649 /* Save the file's parent inode and old AG number for later. */
650 pip = file->pip;
651 ag = file->ag;
652
653 /* Look for the file's directory in the cache. */
654 dir = xfs_mru_cache_lookup(cache, pip->i_ino);
655 if (dir) {
656 ASSERT(pip == dir->ip);
657
658 /*
659 * If the directory has already moved on to a new AG,
660 * use that AG as the new AG for the file. Don't
661 * forget to twiddle the AG refcounts to match the
662 * movement.
663 */
664 if (dir->ag != file->ag) {
665 xfs_filestream_put_ag(mp, file->ag);
666 xfs_filestream_get_ag(mp, dir->ag);
667 *agp = file->ag = dir->ag;
668 }
669
670 xfs_mru_cache_done(cache);
671 }
672
673 /*
674 * Put the file back in the cache. If this fails, the free
675 * function needs to be called to tidy up in the same way as if
676 * the item had simply expired from the cache.
677 */
678 err = xfs_mru_cache_insert(cache, ip->i_ino, file);
679 if (err) {
680 xfs_fstrm_free_func(ip->i_ino, file);
681 return err;
682 }
683
684 /*
685 * If the file's AG was moved to the directory's new AG, there's
686 * nothing more to be done.
687 */
688 if (*agp != NULLAGNUMBER) {
689 TRACE_MOVEAG(mp, ip, pip,
690 ag, xfs_filestream_peek_ag(mp, ag),
691 *agp, xfs_filestream_peek_ag(mp, *agp));
692 return 0;
693 }
694 }
695
696 /*
697 * If the file's parent directory is known, take its iolock in exclusive
698 * mode to prevent two sibling files from racing each other to migrate
699 * themselves and their parent to different AGs.
700 */
701 if (pip)
702 xfs_ilock(pip, XFS_IOLOCK_EXCL);
703
704 /*
705 * A new AG needs to be found for the file. If the file's parent
706 * directory is also known, it will be moved to the new AG as well to
707 * ensure that files created inside it in future use the new AG.
708 */
709 ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
710 flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
711 (ap->low ? XFS_PICK_LOWSPACE : 0);
712
713 err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
714 if (err || *agp == NULLAGNUMBER)
715 goto exit;
716
717 /*
718 * If the file wasn't found in the file cache, then its parent directory
719 * inode isn't known. For this to have happened, the file must either
720 * be pre-existing, or it was created long enough ago that its cache
721 * entry has expired. This isn't the sort of usage that the filestreams
722 * allocator is trying to optimise, so there's no point trying to track
723 * its new AG somehow in the filestream data structures.
724 */
725 if (!pip) {
726 TRACE_ORPHAN(mp, ip, *agp);
727 goto exit;
728 }
729
730 /* Associate the parent inode with the AG. */
731 err = _xfs_filestream_update_ag(pip, NULL, *agp);
732 if (err)
733 goto exit;
734
735 /* Associate the file inode with the AG. */
736 err = _xfs_filestream_update_ag(ip, pip, *agp);
737 if (err)
738 goto exit;
739
740 TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
741 *agp, xfs_filestream_peek_ag(mp, *agp));
742
743exit:
744 /*
745 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
746 * reference it took on it, since the file and directory will have taken
747 * their own now if they were successfully cached.
748 */
749 if (*agp != NULLAGNUMBER)
750 xfs_filestream_put_ag(mp, *agp);
751 else
752 *agp = 0;
753
754 if (pip)
755 xfs_iunlock(pip, XFS_IOLOCK_EXCL);
756
757 return err;
758}
759
760/*
761 * Remove an association between an inode and a filestream object.
762 * Typically this is done on last close of an unlinked file.
763 */
764void
765xfs_filestream_deassociate(
766 xfs_inode_t *ip)
767{
768 xfs_mru_cache_t *cache = ip->i_mount->m_filestream;
769
770 xfs_mru_cache_delete(cache, ip->i_ino);
771}
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
new file mode 100644
index 000000000000..f655f7dc334c
--- /dev/null
+++ b/fs/xfs/xfs_filestream.h
@@ -0,0 +1,136 @@
1/*
2 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_FILESTREAM_H__
19#define __XFS_FILESTREAM_H__
20
21#ifdef __KERNEL__
22
23struct xfs_mount;
24struct xfs_inode;
25struct xfs_perag;
26struct xfs_bmalloca;
27
28#ifdef XFS_FILESTREAMS_TRACE
29#define XFS_FSTRM_KTRACE_INFO 1
30#define XFS_FSTRM_KTRACE_AGSCAN 2
31#define XFS_FSTRM_KTRACE_AGPICK1 3
32#define XFS_FSTRM_KTRACE_AGPICK2 4
33#define XFS_FSTRM_KTRACE_UPDATE 5
34#define XFS_FSTRM_KTRACE_FREE 6
35#define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7
36#define XFS_FSTRM_KTRACE_ASSOCIATE 8
37#define XFS_FSTRM_KTRACE_MOVEAG 9
38#define XFS_FSTRM_KTRACE_ORPHAN 10
39
40#define XFS_FSTRM_KTRACE_SIZE 16384
41extern ktrace_t *xfs_filestreams_trace_buf;
42
43#endif
44
45/*
46 * Allocation group filestream associations are tracked with per-ag atomic
47 * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
48 * particular AG already has active filestreams associated with it. The mount
49 * point's m_peraglock is used to protect these counters from per-ag array
50 * re-allocation during a growfs operation. When xfs_growfs_data_private() is
51 * about to reallocate the array, it calls xfs_filestream_flush() with the
52 * m_peraglock held in write mode.
53 *
54 * Since xfs_mru_cache_flush() guarantees that all the free functions for all
55 * the cache elements have finished executing before it returns, it's safe for
56 * the free functions to use the atomic counters without m_peraglock protection.
57 * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
58 * whether it was called with the m_peraglock held in read mode, write mode or
59 * not held at all. The race condition this addresses is the following:
60 *
61 * - The work queue scheduler fires and pulls a filestream directory cache
62 * element off the LRU end of the cache for deletion, then gets pre-empted.
63 * - A growfs operation grabs the m_peraglock in write mode, flushes all the
64 * remaining items from the cache and reallocates the mount point's per-ag
65 * array, resetting all the counters to zero.
66 * - The work queue thread resumes and calls the free function for the element
67 * it started cleaning up earlier. In the process it decrements the
68 * filestreams counter for an AG that now has no references.
69 *
70 * With a shrinkfs feature, the above scenario could panic the system.
71 *
72 * All other uses of the following macros should be protected by either the
73 * m_peraglock held in read mode, or the cache's internal locking exposed by the
74 * interval between a call to xfs_mru_cache_lookup() and a call to
75 * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
76 * when new elements are added to the cache.
77 *
78 * Combined, these locking rules ensure that no associations will ever exist in
79 * the cache that reference per-ag array elements that have since been
80 * reallocated.
81 */
82STATIC_INLINE int
83xfs_filestream_peek_ag(
84 xfs_mount_t *mp,
85 xfs_agnumber_t agno)
86{
87 return atomic_read(&mp->m_perag[agno].pagf_fstrms);
88}
89
90STATIC_INLINE int
91xfs_filestream_get_ag(
92 xfs_mount_t *mp,
93 xfs_agnumber_t agno)
94{
95 return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
96}
97
98STATIC_INLINE int
99xfs_filestream_put_ag(
100 xfs_mount_t *mp,
101 xfs_agnumber_t agno)
102{
103 return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms);
104}
105
106/* allocation selection flags */
107typedef enum xfs_fstrm_alloc {
108 XFS_PICK_USERDATA = 1,
109 XFS_PICK_LOWSPACE = 2,
110} xfs_fstrm_alloc_t;
111
112/* prototypes for filestream.c */
113int xfs_filestream_init(void);
114void xfs_filestream_uninit(void);
115int xfs_filestream_mount(struct xfs_mount *mp);
116void xfs_filestream_unmount(struct xfs_mount *mp);
117void xfs_filestream_flush(struct xfs_mount *mp);
118xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
119int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
120void xfs_filestream_deassociate(struct xfs_inode *ip);
121int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
122
123
124/* filestreams for the inode? */
125STATIC_INLINE int
126xfs_inode_is_filestream(
127 struct xfs_inode *ip)
128{
129 return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
130 xfs_iflags_test(ip, XFS_IFILESTREAM) ||
131 (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
132}
133
134#endif /* __KERNEL__ */
135
136#endif /* __XFS_FILESTREAM_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 1335449841cd..ec3c9c27e0de 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -66,6 +66,7 @@ struct fsxattr {
66#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ 66#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
67#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ 67#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
68#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ 68#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
69#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
69#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ 70#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
70 71
71/* 72/*
@@ -238,6 +239,7 @@ typedef struct xfs_fsop_resblks {
238#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ 239#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */
239#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ 240#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */
240#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ 241#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */
242#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
241 243
242 244
243/* 245/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b599e6be9ec1..432e82347ed6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -44,6 +44,7 @@
44#include "xfs_trans_space.h" 44#include "xfs_trans_space.h"
45#include "xfs_rtalloc.h" 45#include "xfs_rtalloc.h"
46#include "xfs_rw.h" 46#include "xfs_rw.h"
47#include "xfs_filestream.h"
47 48
48/* 49/*
49 * File system operations 50 * File system operations
@@ -94,6 +95,8 @@ xfs_fs_geometry(
94 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | 95 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
95 (XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ? 96 (XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
96 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | 97 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
98 (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
99 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
97 (XFS_SB_VERSION_HASATTR2(&mp->m_sb) ? 100 (XFS_SB_VERSION_HASATTR2(&mp->m_sb) ?
98 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0); 101 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
99 geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ? 102 geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
@@ -140,6 +143,8 @@ xfs_growfs_data_private(
140 pct = in->imaxpct; 143 pct = in->imaxpct;
141 if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100) 144 if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
142 return XFS_ERROR(EINVAL); 145 return XFS_ERROR(EINVAL);
146 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
147 return error;
143 dpct = pct - mp->m_sb.sb_imax_pct; 148 dpct = pct - mp->m_sb.sb_imax_pct;
144 error = xfs_read_buf(mp, mp->m_ddev_targp, 149 error = xfs_read_buf(mp, mp->m_ddev_targp,
145 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), 150 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
@@ -161,6 +166,7 @@ xfs_growfs_data_private(
161 new = nb - mp->m_sb.sb_dblocks; 166 new = nb - mp->m_sb.sb_dblocks;
162 oagcount = mp->m_sb.sb_agcount; 167 oagcount = mp->m_sb.sb_agcount;
163 if (nagcount > oagcount) { 168 if (nagcount > oagcount) {
169 xfs_filestream_flush(mp);
164 down_write(&mp->m_peraglock); 170 down_write(&mp->m_peraglock);
165 mp->m_perag = kmem_realloc(mp->m_perag, 171 mp->m_perag = kmem_realloc(mp->m_perag,
166 sizeof(xfs_perag_t) * nagcount, 172 sizeof(xfs_perag_t) * nagcount,
@@ -173,6 +179,7 @@ xfs_growfs_data_private(
173 up_write(&mp->m_peraglock); 179 up_write(&mp->m_peraglock);
174 } 180 }
175 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 181 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
182 tp->t_flags |= XFS_TRANS_RESERVE;
176 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp), 183 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
177 XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) { 184 XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
178 xfs_trans_cancel(tp, 0); 185 xfs_trans_cancel(tp, 0);
@@ -328,6 +335,7 @@ xfs_growfs_data_private(
328 be32_add(&agf->agf_length, new); 335 be32_add(&agf->agf_length, new);
329 ASSERT(be32_to_cpu(agf->agf_length) == 336 ASSERT(be32_to_cpu(agf->agf_length) ==
330 be32_to_cpu(agi->agi_length)); 337 be32_to_cpu(agi->agi_length));
338 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
331 /* 339 /*
332 * Free the new space. 340 * Free the new space.
333 */ 341 */
@@ -494,8 +502,9 @@ xfs_reserve_blocks(
494 unsigned long s; 502 unsigned long s;
495 503
496 /* If inval is null, report current values and return */ 504 /* If inval is null, report current values and return */
497
498 if (inval == (__uint64_t *)NULL) { 505 if (inval == (__uint64_t *)NULL) {
506 if (!outval)
507 return EINVAL;
499 outval->resblks = mp->m_resblks; 508 outval->resblks = mp->m_resblks;
500 outval->resblks_avail = mp->m_resblks_avail; 509 outval->resblks_avail = mp->m_resblks_avail;
501 return 0; 510 return 0;
@@ -558,8 +567,10 @@ retry:
558 } 567 }
559 } 568 }
560out: 569out:
561 outval->resblks = mp->m_resblks; 570 if (outval) {
562 outval->resblks_avail = mp->m_resblks_avail; 571 outval->resblks = mp->m_resblks;
572 outval->resblks_avail = mp->m_resblks_avail;
573 }
563 XFS_SB_UNLOCK(mp, s); 574 XFS_SB_UNLOCK(mp, s);
564 575
565 if (fdblks_delta) { 576 if (fdblks_delta) {
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index b5feb3e77116..f943368c9b93 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -123,6 +123,7 @@ xfs_ialloc_ag_alloc(
123 int blks_per_cluster; /* fs blocks per inode cluster */ 123 int blks_per_cluster; /* fs blocks per inode cluster */
124 xfs_btree_cur_t *cur; /* inode btree cursor */ 124 xfs_btree_cur_t *cur; /* inode btree cursor */
125 xfs_daddr_t d; /* disk addr of buffer */ 125 xfs_daddr_t d; /* disk addr of buffer */
126 xfs_agnumber_t agno;
126 int error; 127 int error;
127 xfs_buf_t *fbuf; /* new free inodes' buffer */ 128 xfs_buf_t *fbuf; /* new free inodes' buffer */
128 xfs_dinode_t *free; /* new free inode structure */ 129 xfs_dinode_t *free; /* new free inode structure */
@@ -302,15 +303,15 @@ xfs_ialloc_ag_alloc(
302 } 303 }
303 be32_add(&agi->agi_count, newlen); 304 be32_add(&agi->agi_count, newlen);
304 be32_add(&agi->agi_freecount, newlen); 305 be32_add(&agi->agi_freecount, newlen);
306 agno = be32_to_cpu(agi->agi_seqno);
305 down_read(&args.mp->m_peraglock); 307 down_read(&args.mp->m_peraglock);
306 args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen; 308 args.mp->m_perag[agno].pagi_freecount += newlen;
307 up_read(&args.mp->m_peraglock); 309 up_read(&args.mp->m_peraglock);
308 agi->agi_newino = cpu_to_be32(newino); 310 agi->agi_newino = cpu_to_be32(newino);
309 /* 311 /*
310 * Insert records describing the new inode chunk into the btree. 312 * Insert records describing the new inode chunk into the btree.
311 */ 313 */
312 cur = xfs_btree_init_cursor(args.mp, tp, agbp, 314 cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno,
313 be32_to_cpu(agi->agi_seqno),
314 XFS_BTNUM_INO, (xfs_inode_t *)0, 0); 315 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
315 for (thisino = newino; 316 for (thisino = newino;
316 thisino < newino + newlen; 317 thisino < newino + newlen;
@@ -1387,6 +1388,7 @@ xfs_ialloc_read_agi(
1387 pag = &mp->m_perag[agno]; 1388 pag = &mp->m_perag[agno];
1388 if (!pag->pagi_init) { 1389 if (!pag->pagi_init) {
1389 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1390 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1391 pag->pagi_count = be32_to_cpu(agi->agi_count);
1390 pag->pagi_init = 1; 1392 pag->pagi_init = 1;
1391 } else { 1393 } else {
1392 /* 1394 /*
@@ -1410,3 +1412,23 @@ xfs_ialloc_read_agi(
1410 *bpp = bp; 1412 *bpp = bp;
1411 return 0; 1413 return 0;
1412} 1414}
1415
1416/*
1417 * Read in the agi to initialise the per-ag data in the mount structure
1418 */
1419int
1420xfs_ialloc_pagi_init(
1421 xfs_mount_t *mp, /* file system mount structure */
1422 xfs_trans_t *tp, /* transaction pointer */
1423 xfs_agnumber_t agno) /* allocation group number */
1424{
1425 xfs_buf_t *bp = NULL;
1426 int error;
1427
1428 error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
1429 if (error)
1430 return error;
1431 if (bp)
1432 xfs_trans_brelse(tp, bp);
1433 return 0;
1434}
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 7f5debe1acb6..97f4040931ca 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -149,6 +149,16 @@ xfs_ialloc_read_agi(
149 xfs_agnumber_t agno, /* allocation group number */ 149 xfs_agnumber_t agno, /* allocation group number */
150 struct xfs_buf **bpp); /* allocation group hdr buf */ 150 struct xfs_buf **bpp); /* allocation group hdr buf */
151 151
152/*
153 * Read in the allocation group header to initialise the per-ag data
154 * in the mount structure
155 */
156int
157xfs_ialloc_pagi_init(
158 struct xfs_mount *mp, /* file system mount structure */
159 struct xfs_trans *tp, /* transaction pointer */
160 xfs_agnumber_t agno); /* allocation group number */
161
152#endif /* __KERNEL__ */ 162#endif /* __KERNEL__ */
153 163
154#endif /* __XFS_IALLOC_H__ */ 164#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3ca5d43b8345..cdc4c28926d0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -48,7 +48,9 @@
48#include "xfs_dir2_trace.h" 48#include "xfs_dir2_trace.h"
49#include "xfs_quota.h" 49#include "xfs_quota.h"
50#include "xfs_acl.h" 50#include "xfs_acl.h"
51#include "xfs_filestream.h"
51 52
53#include <linux/log2.h>
52 54
53kmem_zone_t *xfs_ifork_zone; 55kmem_zone_t *xfs_ifork_zone;
54kmem_zone_t *xfs_inode_zone; 56kmem_zone_t *xfs_inode_zone;
@@ -643,8 +645,7 @@ xfs_iformat_extents(
643 ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1), 645 ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1),
644 ARCH_CONVERT); 646 ARCH_CONVERT);
645 } 647 }
646 xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex, 648 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
647 whichfork);
648 if (whichfork != XFS_DATA_FORK || 649 if (whichfork != XFS_DATA_FORK ||
649 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 650 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
650 if (unlikely(xfs_check_nostate_extents( 651 if (unlikely(xfs_check_nostate_extents(
@@ -817,6 +818,8 @@ _xfs_dic2xflags(
817 flags |= XFS_XFLAG_EXTSZINHERIT; 818 flags |= XFS_XFLAG_EXTSZINHERIT;
818 if (di_flags & XFS_DIFLAG_NODEFRAG) 819 if (di_flags & XFS_DIFLAG_NODEFRAG)
819 flags |= XFS_XFLAG_NODEFRAG; 820 flags |= XFS_XFLAG_NODEFRAG;
821 if (di_flags & XFS_DIFLAG_FILESTREAM)
822 flags |= XFS_XFLAG_FILESTREAM;
820 } 823 }
821 824
822 return flags; 825 return flags;
@@ -1074,6 +1077,11 @@ xfs_iread_extents(
1074 * also returns the [locked] bp pointing to the head of the freelist 1077 * also returns the [locked] bp pointing to the head of the freelist
1075 * as ialloc_context. The caller should hold this buffer across 1078 * as ialloc_context. The caller should hold this buffer across
1076 * the commit and pass it back into this routine on the second call. 1079 * the commit and pass it back into this routine on the second call.
1080 *
1081 * If we are allocating quota inodes, we do not have a parent inode
1082 * to attach to or associate with (i.e. pip == NULL) because they
1083 * are not linked into the directory structure - they are attached
1084 * directly to the superblock - and so have no parent.
1077 */ 1085 */
1078int 1086int
1079xfs_ialloc( 1087xfs_ialloc(
@@ -1099,7 +1107,7 @@ xfs_ialloc(
1099 * Call the space management code to pick 1107 * Call the space management code to pick
1100 * the on-disk inode to be allocated. 1108 * the on-disk inode to be allocated.
1101 */ 1109 */
1102 error = xfs_dialloc(tp, pip->i_ino, mode, okalloc, 1110 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
1103 ialloc_context, call_again, &ino); 1111 ialloc_context, call_again, &ino);
1104 if (error != 0) { 1112 if (error != 0) {
1105 return error; 1113 return error;
@@ -1150,10 +1158,10 @@ xfs_ialloc(
1150 /* 1158 /*
1151 * Project ids won't be stored on disk if we are using a version 1 inode. 1159 * Project ids won't be stored on disk if we are using a version 1 inode.
1152 */ 1160 */
1153 if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) 1161 if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
1154 xfs_bump_ino_vers2(tp, ip); 1162 xfs_bump_ino_vers2(tp, ip);
1155 1163
1156 if (XFS_INHERIT_GID(pip, vp->v_vfsp)) { 1164 if (pip && XFS_INHERIT_GID(pip, vp->v_vfsp)) {
1157 ip->i_d.di_gid = pip->i_d.di_gid; 1165 ip->i_d.di_gid = pip->i_d.di_gid;
1158 if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { 1166 if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
1159 ip->i_d.di_mode |= S_ISGID; 1167 ip->i_d.di_mode |= S_ISGID;
@@ -1195,8 +1203,16 @@ xfs_ialloc(
1195 flags |= XFS_ILOG_DEV; 1203 flags |= XFS_ILOG_DEV;
1196 break; 1204 break;
1197 case S_IFREG: 1205 case S_IFREG:
1206 if (pip && xfs_inode_is_filestream(pip)) {
1207 error = xfs_filestream_associate(pip, ip);
1208 if (error < 0)
1209 return -error;
1210 if (!error)
1211 xfs_iflags_set(ip, XFS_IFILESTREAM);
1212 }
1213 /* fall through */
1198 case S_IFDIR: 1214 case S_IFDIR:
1199 if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1215 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
1200 uint di_flags = 0; 1216 uint di_flags = 0;
1201 1217
1202 if ((mode & S_IFMT) == S_IFDIR) { 1218 if ((mode & S_IFMT) == S_IFDIR) {
@@ -1233,6 +1249,8 @@ xfs_ialloc(
1233 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 1249 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
1234 xfs_inherit_nodefrag) 1250 xfs_inherit_nodefrag)
1235 di_flags |= XFS_DIFLAG_NODEFRAG; 1251 di_flags |= XFS_DIFLAG_NODEFRAG;
1252 if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
1253 di_flags |= XFS_DIFLAG_FILESTREAM;
1236 ip->i_d.di_flags |= di_flags; 1254 ip->i_d.di_flags |= di_flags;
1237 } 1255 }
1238 /* FALLTHROUGH */ 1256 /* FALLTHROUGH */
@@ -2875,9 +2893,6 @@ xfs_iextents_copy(
2875 int copied; 2893 int copied;
2876 xfs_bmbt_rec_t *dest_ep; 2894 xfs_bmbt_rec_t *dest_ep;
2877 xfs_bmbt_rec_t *ep; 2895 xfs_bmbt_rec_t *ep;
2878#ifdef XFS_BMAP_TRACE
2879 static char fname[] = "xfs_iextents_copy";
2880#endif
2881 int i; 2896 int i;
2882 xfs_ifork_t *ifp; 2897 xfs_ifork_t *ifp;
2883 int nrecs; 2898 int nrecs;
@@ -2888,7 +2903,7 @@ xfs_iextents_copy(
2888 ASSERT(ifp->if_bytes > 0); 2903 ASSERT(ifp->if_bytes > 0);
2889 2904
2890 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 2905 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
2891 xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork); 2906 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
2892 ASSERT(nrecs > 0); 2907 ASSERT(nrecs > 0);
2893 2908
2894 /* 2909 /*
@@ -4184,7 +4199,7 @@ xfs_iext_realloc_direct(
4184 ifp->if_bytes = new_size; 4199 ifp->if_bytes = new_size;
4185 return; 4200 return;
4186 } 4201 }
4187 if ((new_size & (new_size - 1)) != 0) { 4202 if (!is_power_of_2(new_size)){
4188 rnew_size = xfs_iroundup(new_size); 4203 rnew_size = xfs_iroundup(new_size);
4189 } 4204 }
4190 if (rnew_size != ifp->if_real_bytes) { 4205 if (rnew_size != ifp->if_real_bytes) {
@@ -4207,7 +4222,7 @@ xfs_iext_realloc_direct(
4207 */ 4222 */
4208 else { 4223 else {
4209 new_size += ifp->if_bytes; 4224 new_size += ifp->if_bytes;
4210 if ((new_size & (new_size - 1)) != 0) { 4225 if (!is_power_of_2(new_size)) {
4211 rnew_size = xfs_iroundup(new_size); 4226 rnew_size = xfs_iroundup(new_size);
4212 } 4227 }
4213 xfs_iext_inline_to_direct(ifp, rnew_size); 4228 xfs_iext_inline_to_direct(ifp, rnew_size);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f75afecef8e7..012dfd4a958c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -379,6 +379,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
379#define XFS_ISTALE 0x0010 /* inode has been staled */ 379#define XFS_ISTALE 0x0010 /* inode has been staled */
380#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ 380#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */
381#define XFS_INEW 0x0040 381#define XFS_INEW 0x0040
382#define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */
382 383
383/* 384/*
384 * Flags for inode locking. 385 * Flags for inode locking.
@@ -414,19 +415,22 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
414 * gets a lockdep subclass of 1 and the second lock will have a lockdep 415 * gets a lockdep subclass of 1 and the second lock will have a lockdep
415 * subclass of 0. 416 * subclass of 0.
416 * 417 *
417 * XFS_I[O]LOCK_INUMORDER - for locking several inodes at the some time 418 * XFS_LOCK_INUMORDER - for locking several inodes at the some time
418 * with xfs_lock_inodes(). This flag is used as the starting subclass 419 * with xfs_lock_inodes(). This flag is used as the starting subclass
419 * and each subsequent lock acquired will increment the subclass by one. 420 * and each subsequent lock acquired will increment the subclass by one.
420 * So the first lock acquired will have a lockdep subclass of 2, the 421 * So the first lock acquired will have a lockdep subclass of 2, the
421 * second lock will have a lockdep subclass of 3, and so on. 422 * second lock will have a lockdep subclass of 3, and so on. It is
423 * the responsibility of the class builder to shift this to the correct
424 * portion of the lock_mode lockdep mask.
422 */ 425 */
426#define XFS_LOCK_PARENT 1
427#define XFS_LOCK_INUMORDER 2
428
423#define XFS_IOLOCK_SHIFT 16 429#define XFS_IOLOCK_SHIFT 16
424#define XFS_IOLOCK_PARENT (1 << XFS_IOLOCK_SHIFT) 430#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
425#define XFS_IOLOCK_INUMORDER (2 << XFS_IOLOCK_SHIFT)
426 431
427#define XFS_ILOCK_SHIFT 24 432#define XFS_ILOCK_SHIFT 24
428#define XFS_ILOCK_PARENT (1 << XFS_ILOCK_SHIFT) 433#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
429#define XFS_ILOCK_INUMORDER (2 << XFS_ILOCK_SHIFT)
430 434
431#define XFS_IOLOCK_DEP_MASK 0x00ff0000 435#define XFS_IOLOCK_DEP_MASK 0x00ff0000
432#define XFS_ILOCK_DEP_MASK 0xff000000 436#define XFS_ILOCK_DEP_MASK 0xff000000
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3f2b9f2a7b94..bf57b75acb90 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -451,19 +451,14 @@ xfs_iomap_write_direct(
451 return XFS_ERROR(error); 451 return XFS_ERROR(error);
452 452
453 rt = XFS_IS_REALTIME_INODE(ip); 453 rt = XFS_IS_REALTIME_INODE(ip);
454 if (unlikely(rt)) { 454 extsz = xfs_get_extsz_hint(ip);
455 if (!(extsz = ip->i_d.di_extsize))
456 extsz = mp->m_sb.sb_rextsize;
457 } else {
458 extsz = ip->i_d.di_extsize;
459 }
460 455
461 isize = ip->i_size; 456 isize = ip->i_size;
462 if (io->io_new_size > isize) 457 if (io->io_new_size > isize)
463 isize = io->io_new_size; 458 isize = io->io_new_size;
464 459
465 offset_fsb = XFS_B_TO_FSBT(mp, offset); 460 offset_fsb = XFS_B_TO_FSBT(mp, offset);
466 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 461 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
467 if ((offset + count) > isize) { 462 if ((offset + count) > isize) {
468 error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz, 463 error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
469 &last_fsb); 464 &last_fsb);
@@ -489,13 +484,13 @@ xfs_iomap_write_direct(
489 if (unlikely(rt)) { 484 if (unlikely(rt)) {
490 resrtextents = qblocks = resaligned; 485 resrtextents = qblocks = resaligned;
491 resrtextents /= mp->m_sb.sb_rextsize; 486 resrtextents /= mp->m_sb.sb_rextsize;
492 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 487 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
493 quota_flag = XFS_QMOPT_RES_RTBLKS; 488 quota_flag = XFS_QMOPT_RES_RTBLKS;
494 } else { 489 } else {
495 resrtextents = 0; 490 resrtextents = 0;
496 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 491 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
497 quota_flag = XFS_QMOPT_RES_REGBLKS; 492 quota_flag = XFS_QMOPT_RES_REGBLKS;
498 } 493 }
499 494
500 /* 495 /*
501 * Allocate and setup the transaction 496 * Allocate and setup the transaction
@@ -666,13 +661,7 @@ xfs_iomap_write_delay(
666 if (error) 661 if (error)
667 return XFS_ERROR(error); 662 return XFS_ERROR(error);
668 663
669 if (XFS_IS_REALTIME_INODE(ip)) { 664 extsz = xfs_get_extsz_hint(ip);
670 if (!(extsz = ip->i_d.di_extsize))
671 extsz = mp->m_sb.sb_rextsize;
672 } else {
673 extsz = ip->i_d.di_extsize;
674 }
675
676 offset_fsb = XFS_B_TO_FSBT(mp, offset); 665 offset_fsb = XFS_B_TO_FSBT(mp, offset);
677 666
678retry: 667retry:
@@ -788,18 +777,12 @@ xfs_iomap_write_allocate(
788 nimaps = 0; 777 nimaps = 0;
789 while (nimaps == 0) { 778 while (nimaps == 0) {
790 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 779 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
780 tp->t_flags |= XFS_TRANS_RESERVE;
791 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); 781 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
792 error = xfs_trans_reserve(tp, nres, 782 error = xfs_trans_reserve(tp, nres,
793 XFS_WRITE_LOG_RES(mp), 783 XFS_WRITE_LOG_RES(mp),
794 0, XFS_TRANS_PERM_LOG_RES, 784 0, XFS_TRANS_PERM_LOG_RES,
795 XFS_WRITE_LOG_COUNT); 785 XFS_WRITE_LOG_COUNT);
796 if (error == ENOSPC) {
797 error = xfs_trans_reserve(tp, 0,
798 XFS_WRITE_LOG_RES(mp),
799 0,
800 XFS_TRANS_PERM_LOG_RES,
801 XFS_WRITE_LOG_COUNT);
802 }
803 if (error) { 786 if (error) {
804 xfs_trans_cancel(tp, 0); 787 xfs_trans_cancel(tp, 0);
805 return XFS_ERROR(error); 788 return XFS_ERROR(error);
@@ -917,8 +900,8 @@ xfs_iomap_write_unwritten(
917 * from unwritten to real. Do allocations in a loop until 900 * from unwritten to real. Do allocations in a loop until
918 * we have covered the range passed in. 901 * we have covered the range passed in.
919 */ 902 */
920
921 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 903 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
904 tp->t_flags |= XFS_TRANS_RESERVE;
922 error = xfs_trans_reserve(tp, resblks, 905 error = xfs_trans_reserve(tp, resblks,
923 XFS_WRITE_LOG_RES(mp), 0, 906 XFS_WRITE_LOG_RES(mp), 0,
924 XFS_TRANS_PERM_LOG_RES, 907 XFS_TRANS_PERM_LOG_RES,
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index e725ddd3de5f..4c2454bcc714 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -202,6 +202,16 @@ xfs_bulkstat_one_dinode(
202 return 0; 202 return 0;
203} 203}
204 204
205STATIC int
206xfs_bulkstat_one_fmt(
207 void __user *ubuffer,
208 const xfs_bstat_t *buffer)
209{
210 if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
211 return -EFAULT;
212 return sizeof(*buffer);
213}
214
205/* 215/*
206 * Return stat information for one inode. 216 * Return stat information for one inode.
207 * Return 0 if ok, else errno. 217 * Return 0 if ok, else errno.
@@ -221,6 +231,7 @@ xfs_bulkstat_one(
221 xfs_bstat_t *buf; /* return buffer */ 231 xfs_bstat_t *buf; /* return buffer */
222 int error = 0; /* error value */ 232 int error = 0; /* error value */
223 xfs_dinode_t *dip; /* dinode inode pointer */ 233 xfs_dinode_t *dip; /* dinode inode pointer */
234 bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt;
224 235
225 dip = (xfs_dinode_t *)dibuff; 236 dip = (xfs_dinode_t *)dibuff;
226 *stat = BULKSTAT_RV_NOTHING; 237 *stat = BULKSTAT_RV_NOTHING;
@@ -243,14 +254,15 @@ xfs_bulkstat_one(
243 xfs_bulkstat_one_dinode(mp, ino, dip, buf); 254 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
244 } 255 }
245 256
246 if (copy_to_user(buffer, buf, sizeof(*buf))) { 257 error = formatter(buffer, buf);
258 if (error < 0) {
247 error = EFAULT; 259 error = EFAULT;
248 goto out_free; 260 goto out_free;
249 } 261 }
250 262
251 *stat = BULKSTAT_RV_DIDONE; 263 *stat = BULKSTAT_RV_DIDONE;
252 if (ubused) 264 if (ubused)
253 *ubused = sizeof(*buf); 265 *ubused = error;
254 266
255 out_free: 267 out_free:
256 kmem_free(buf, sizeof(*buf)); 268 kmem_free(buf, sizeof(*buf));
@@ -748,6 +760,19 @@ xfs_bulkstat_single(
748 return 0; 760 return 0;
749} 761}
750 762
763int
764xfs_inumbers_fmt(
765 void __user *ubuffer, /* buffer to write to */
766 const xfs_inogrp_t *buffer, /* buffer to read from */
767 long count, /* # of elements to read */
768 long *written) /* # of bytes written */
769{
770 if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
771 return -EFAULT;
772 *written = count * sizeof(*buffer);
773 return 0;
774}
775
751/* 776/*
752 * Return inode number table for the filesystem. 777 * Return inode number table for the filesystem.
753 */ 778 */
@@ -756,7 +781,8 @@ xfs_inumbers(
756 xfs_mount_t *mp, /* mount point for filesystem */ 781 xfs_mount_t *mp, /* mount point for filesystem */
757 xfs_ino_t *lastino, /* last inode returned */ 782 xfs_ino_t *lastino, /* last inode returned */
758 int *count, /* size of buffer/count returned */ 783 int *count, /* size of buffer/count returned */
759 xfs_inogrp_t __user *ubuffer)/* buffer with inode descriptions */ 784 void __user *ubuffer,/* buffer with inode descriptions */
785 inumbers_fmt_pf formatter)
760{ 786{
761 xfs_buf_t *agbp; 787 xfs_buf_t *agbp;
762 xfs_agino_t agino; 788 xfs_agino_t agino;
@@ -835,12 +861,12 @@ xfs_inumbers(
835 bufidx++; 861 bufidx++;
836 left--; 862 left--;
837 if (bufidx == bcount) { 863 if (bufidx == bcount) {
838 if (copy_to_user(ubuffer, buffer, 864 long written;
839 bufidx * sizeof(*buffer))) { 865 if (formatter(ubuffer, buffer, bufidx, &written)) {
840 error = XFS_ERROR(EFAULT); 866 error = XFS_ERROR(EFAULT);
841 break; 867 break;
842 } 868 }
843 ubuffer += bufidx; 869 ubuffer += written;
844 *count += bufidx; 870 *count += bufidx;
845 bufidx = 0; 871 bufidx = 0;
846 } 872 }
@@ -862,8 +888,8 @@ xfs_inumbers(
862 } 888 }
863 if (!error) { 889 if (!error) {
864 if (bufidx) { 890 if (bufidx) {
865 if (copy_to_user(ubuffer, buffer, 891 long written;
866 bufidx * sizeof(*buffer))) 892 if (formatter(ubuffer, buffer, bufidx, &written))
867 error = XFS_ERROR(EFAULT); 893 error = XFS_ERROR(EFAULT);
868 else 894 else
869 *count += bufidx; 895 *count += bufidx;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index f25a28862a17..a1f18fce9b70 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -69,6 +69,10 @@ xfs_bulkstat_single(
69 char __user *buffer, 69 char __user *buffer,
70 int *done); 70 int *done);
71 71
72typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
73 void __user *ubuffer, /* buffer to write to */
74 const xfs_bstat_t *buffer); /* buffer to read from */
75
72int 76int
73xfs_bulkstat_one( 77xfs_bulkstat_one(
74 xfs_mount_t *mp, 78 xfs_mount_t *mp,
@@ -86,11 +90,25 @@ xfs_internal_inum(
86 xfs_mount_t *mp, 90 xfs_mount_t *mp,
87 xfs_ino_t ino); 91 xfs_ino_t ino);
88 92
93typedef int (*inumbers_fmt_pf)(
94 void __user *ubuffer, /* buffer to write to */
95 const xfs_inogrp_t *buffer, /* buffer to read from */
96 long count, /* # of elements to read */
97 long *written); /* # of bytes written */
98
99int
100xfs_inumbers_fmt(
101 void __user *ubuffer, /* buffer to write to */
102 const xfs_inogrp_t *buffer, /* buffer to read from */
103 long count, /* # of elements to read */
104 long *written); /* # of bytes written */
105
89int /* error status */ 106int /* error status */
90xfs_inumbers( 107xfs_inumbers(
91 xfs_mount_t *mp, /* mount point for filesystem */ 108 xfs_mount_t *mp, /* mount point for filesystem */
92 xfs_ino_t *last, /* last inode returned */ 109 xfs_ino_t *last, /* last inode returned */
93 int *count, /* size of buffer/count returned */ 110 int *count, /* size of buffer/count returned */
94 xfs_inogrp_t __user *buffer);/* buffer with inode info */ 111 void __user *buffer, /* buffer with inode info */
112 inumbers_fmt_pf formatter);
95 113
96#endif /* __XFS_ITABLE_H__ */ 114#endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c48bf61f17bd..9d4c4fbeb3ee 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -817,10 +817,8 @@ xfs_log_need_covered(xfs_mount_t *mp)
817 SPLDECL(s); 817 SPLDECL(s);
818 int needed = 0, gen; 818 int needed = 0, gen;
819 xlog_t *log = mp->m_log; 819 xlog_t *log = mp->m_log;
820 bhv_vfs_t *vfsp = XFS_MTOVFS(mp);
821 820
822 if (vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) || 821 if (!xfs_fs_writable(mp))
823 (vfsp->vfs_flag & VFS_RDONLY))
824 return 0; 822 return 0;
825 823
826 s = LOG_LOCK(log); 824 s = LOG_LOCK(log);
@@ -967,14 +965,16 @@ xlog_iodone(xfs_buf_t *bp)
967 } else if (iclog->ic_state & XLOG_STATE_IOERROR) { 965 } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
968 aborted = XFS_LI_ABORTED; 966 aborted = XFS_LI_ABORTED;
969 } 967 }
968
969 /* log I/O is always issued ASYNC */
970 ASSERT(XFS_BUF_ISASYNC(bp));
970 xlog_state_done_syncing(iclog, aborted); 971 xlog_state_done_syncing(iclog, aborted);
971 if (!(XFS_BUF_ISASYNC(bp))) { 972 /*
972 /* 973 * do not reference the buffer (bp) here as we could race
973 * Corresponding psema() will be done in bwrite(). If we don't 974 * with it being freed after writing the unmount record to the
974 * vsema() here, panic. 975 * log.
975 */ 976 */
976 XFS_BUF_V_IODONESEMA(bp); 977
977 }
978} /* xlog_iodone */ 978} /* xlog_iodone */
979 979
980/* 980/*
@@ -1199,11 +1199,18 @@ xlog_alloc_log(xfs_mount_t *mp,
1199 *iclogp = (xlog_in_core_t *) 1199 *iclogp = (xlog_in_core_t *)
1200 kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); 1200 kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
1201 iclog = *iclogp; 1201 iclog = *iclogp;
1202 iclog->hic_data = (xlog_in_core_2_t *)
1203 kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
1204
1205 iclog->ic_prev = prev_iclog; 1202 iclog->ic_prev = prev_iclog;
1206 prev_iclog = iclog; 1203 prev_iclog = iclog;
1204
1205 bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
1206 if (!XFS_BUF_CPSEMA(bp))
1207 ASSERT(0);
1208 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1209 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1210 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1211 iclog->ic_bp = bp;
1212 iclog->hic_data = bp->b_addr;
1213
1207 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); 1214 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
1208 1215
1209 head = &iclog->ic_header; 1216 head = &iclog->ic_header;
@@ -1216,11 +1223,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1216 INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT); 1223 INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
1217 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 1224 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
1218 1225
1219 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
1220 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1221 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1222 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1223 iclog->ic_bp = bp;
1224 1226
1225 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; 1227 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
1226 iclog->ic_state = XLOG_STATE_ACTIVE; 1228 iclog->ic_state = XLOG_STATE_ACTIVE;
@@ -1432,7 +1434,7 @@ xlog_sync(xlog_t *log,
1432 } else { 1434 } else {
1433 iclog->ic_bwritecnt = 1; 1435 iclog->ic_bwritecnt = 1;
1434 } 1436 }
1435 XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count); 1437 XFS_BUF_SET_COUNT(bp, count);
1436 XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ 1438 XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */
1437 XFS_BUF_ZEROFLAGS(bp); 1439 XFS_BUF_ZEROFLAGS(bp);
1438 XFS_BUF_BUSY(bp); 1440 XFS_BUF_BUSY(bp);
@@ -1528,7 +1530,6 @@ xlog_dealloc_log(xlog_t *log)
1528 } 1530 }
1529#endif 1531#endif
1530 next_iclog = iclog->ic_next; 1532 next_iclog = iclog->ic_next;
1531 kmem_free(iclog->hic_data, log->l_iclog_size);
1532 kmem_free(iclog, sizeof(xlog_in_core_t)); 1533 kmem_free(iclog, sizeof(xlog_in_core_t));
1533 iclog = next_iclog; 1534 iclog = next_iclog;
1534 } 1535 }
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 080fabf61c92..fddbb091a86f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -927,6 +927,14 @@ xlog_find_tail(
927 ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle, 927 ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
928 after_umount_blk); 928 after_umount_blk);
929 *tail_blk = after_umount_blk; 929 *tail_blk = after_umount_blk;
930
931 /*
932 * Note that the unmount was clean. If the unmount
933 * was not clean, we need to know this to rebuild the
934 * superblock counters from the perag headers if we
935 * have a filesystem using non-persistent counters.
936 */
937 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
930 } 938 }
931 } 939 }
932 940
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a96bde6df96d..a66b39805176 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -202,6 +202,27 @@ xfs_mount_free(
202 kmem_free(mp, sizeof(xfs_mount_t)); 202 kmem_free(mp, sizeof(xfs_mount_t));
203} 203}
204 204
205/*
206 * Check size of device based on the (data/realtime) block count.
207 * Note: this check is used by the growfs code as well as mount.
208 */
209int
210xfs_sb_validate_fsb_count(
211 xfs_sb_t *sbp,
212 __uint64_t nblocks)
213{
214 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
215 ASSERT(sbp->sb_blocklog >= BBSHIFT);
216
217#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
218 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
219 return E2BIG;
220#else /* Limited by UINT_MAX of sectors */
221 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
222 return E2BIG;
223#endif
224 return 0;
225}
205 226
206/* 227/*
207 * Check the validity of the SB found. 228 * Check the validity of the SB found.
@@ -284,18 +305,8 @@ xfs_mount_validate_sb(
284 return XFS_ERROR(EFSCORRUPTED); 305 return XFS_ERROR(EFSCORRUPTED);
285 } 306 }
286 307
287 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); 308 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
288 ASSERT(sbp->sb_blocklog >= BBSHIFT); 309 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
289
290#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
291 if (unlikely(
292 (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX ||
293 (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
294#else /* Limited by UINT_MAX of sectors */
295 if (unlikely(
296 (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX ||
297 (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
298#endif
299 xfs_fs_mount_cmn_err(flags, 310 xfs_fs_mount_cmn_err(flags,
300 "file system too large to be mounted on this system."); 311 "file system too large to be mounted on this system.");
301 return XFS_ERROR(E2BIG); 312 return XFS_ERROR(E2BIG);
@@ -632,6 +643,64 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
632 sbp->sb_inopblock); 643 sbp->sb_inopblock);
633 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 644 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
634} 645}
646
647/*
648 * xfs_initialize_perag_data
649 *
650 * Read in each per-ag structure so we can count up the number of
651 * allocated inodes, free inodes and used filesystem blocks as this
652 * information is no longer persistent in the superblock. Once we have
653 * this information, write it into the in-core superblock structure.
654 */
655STATIC int
656xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
657{
658 xfs_agnumber_t index;
659 xfs_perag_t *pag;
660 xfs_sb_t *sbp = &mp->m_sb;
661 uint64_t ifree = 0;
662 uint64_t ialloc = 0;
663 uint64_t bfree = 0;
664 uint64_t bfreelst = 0;
665 uint64_t btree = 0;
666 int error;
667 int s;
668
669 for (index = 0; index < agcount; index++) {
670 /*
671 * read the agf, then the agi. This gets us
672 * all the inforamtion we need and populates the
673 * per-ag structures for us.
674 */
675 error = xfs_alloc_pagf_init(mp, NULL, index, 0);
676 if (error)
677 return error;
678
679 error = xfs_ialloc_pagi_init(mp, NULL, index);
680 if (error)
681 return error;
682 pag = &mp->m_perag[index];
683 ifree += pag->pagi_freecount;
684 ialloc += pag->pagi_count;
685 bfree += pag->pagf_freeblks;
686 bfreelst += pag->pagf_flcount;
687 btree += pag->pagf_btreeblks;
688 }
689 /*
690 * Overwrite incore superblock counters with just-read data
691 */
692 s = XFS_SB_LOCK(mp);
693 sbp->sb_ifree = ifree;
694 sbp->sb_icount = ialloc;
695 sbp->sb_fdblocks = bfree + bfreelst + btree;
696 XFS_SB_UNLOCK(mp, s);
697
698 /* Fixup the per-cpu counters as well. */
699 xfs_icsb_reinit_counters(mp);
700
701 return 0;
702}
703
635/* 704/*
636 * xfs_mountfs 705 * xfs_mountfs
637 * 706 *
@@ -656,7 +725,7 @@ xfs_mountfs(
656 bhv_vnode_t *rvp = NULL; 725 bhv_vnode_t *rvp = NULL;
657 int readio_log, writeio_log; 726 int readio_log, writeio_log;
658 xfs_daddr_t d; 727 xfs_daddr_t d;
659 __uint64_t ret64; 728 __uint64_t resblks;
660 __int64_t update_flags; 729 __int64_t update_flags;
661 uint quotamount, quotaflags; 730 uint quotamount, quotaflags;
662 int agno; 731 int agno;
@@ -773,6 +842,7 @@ xfs_mountfs(
773 */ 842 */
774 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 843 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
775 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { 844 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
845 __uint64_t ret64;
776 if (xfs_uuid_mount(mp)) { 846 if (xfs_uuid_mount(mp)) {
777 error = XFS_ERROR(EINVAL); 847 error = XFS_ERROR(EINVAL);
778 goto error1; 848 goto error1;
@@ -976,6 +1046,34 @@ xfs_mountfs(
976 } 1046 }
977 1047
978 /* 1048 /*
1049 * Now the log is mounted, we know if it was an unclean shutdown or
1050 * not. If it was, with the first phase of recovery has completed, we
1051 * have consistent AG blocks on disk. We have not recovered EFIs yet,
1052 * but they are recovered transactionally in the second recovery phase
1053 * later.
1054 *
1055 * Hence we can safely re-initialise incore superblock counters from
1056 * the per-ag data. These may not be correct if the filesystem was not
1057 * cleanly unmounted, so we need to wait for recovery to finish before
1058 * doing this.
1059 *
1060 * If the filesystem was cleanly unmounted, then we can trust the
1061 * values in the superblock to be correct and we don't need to do
1062 * anything here.
1063 *
1064 * If we are currently making the filesystem, the initialisation will
1065 * fail as the perag data is in an undefined state.
1066 */
1067
1068 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
1069 !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
1070 !mp->m_sb.sb_inprogress) {
1071 error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
1072 if (error) {
1073 goto error2;
1074 }
1075 }
1076 /*
979 * Get and sanity-check the root inode. 1077 * Get and sanity-check the root inode.
980 * Save the pointer to it in the mount structure. 1078 * Save the pointer to it in the mount structure.
981 */ 1079 */
@@ -1044,6 +1142,23 @@ xfs_mountfs(
1044 if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) 1142 if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
1045 goto error4; 1143 goto error4;
1046 1144
1145 /*
1146 * Now we are mounted, reserve a small amount of unused space for
1147 * privileged transactions. This is needed so that transaction
1148 * space required for critical operations can dip into this pool
1149 * when at ENOSPC. This is needed for operations like create with
1150 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
1151 * are not allowed to use this reserved space.
1152 *
1153 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1154 * This may drive us straight to ENOSPC on mount, but that implies
1155 * we were already there on the last unmount.
1156 */
1157 resblks = mp->m_sb.sb_dblocks;
1158 do_div(resblks, 20);
1159 resblks = min_t(__uint64_t, resblks, 1024);
1160 xfs_reserve_blocks(mp, &resblks, NULL);
1161
1047 return 0; 1162 return 0;
1048 1163
1049 error4: 1164 error4:
@@ -1083,7 +1198,19 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1083#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1198#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
1084 int64_t fsid; 1199 int64_t fsid;
1085#endif 1200#endif
1201 __uint64_t resblks;
1086 1202
1203 /*
1204 * We can potentially deadlock here if we have an inode cluster
1205 * that has been freed has it's buffer still pinned in memory because
1206 * the transaction is still sitting in a iclog. The stale inodes
1207 * on that buffer will have their flush locks held until the
1208 * transaction hits the disk and the callbacks run. the inode
1209 * flush takes the flush lock unconditionally and with nothing to
1210 * push out the iclog we will never get that unlocked. hence we
1211 * need to force the log first.
1212 */
1213 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
1087 xfs_iflush_all(mp); 1214 xfs_iflush_all(mp);
1088 1215
1089 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1216 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
@@ -1100,10 +1227,26 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1100 xfs_binval(mp->m_rtdev_targp); 1227 xfs_binval(mp->m_rtdev_targp);
1101 } 1228 }
1102 1229
1103 xfs_unmountfs_writesb(mp); 1230 /*
1231 * Unreserve any blocks we have so that when we unmount we don't account
1232 * the reserved free space as used. This is really only necessary for
1233 * lazy superblock counting because it trusts the incore superblock
1234 * counters to be aboslutely correct on clean unmount.
1235 *
1236 * We don't bother correcting this elsewhere for lazy superblock
1237 * counting because on mount of an unclean filesystem we reconstruct the
1238 * correct counter value and this is irrelevant.
1239 *
1240 * For non-lazy counter filesystems, this doesn't matter at all because
1241 * we only every apply deltas to the superblock and hence the incore
1242 * value does not matter....
1243 */
1244 resblks = 0;
1245 xfs_reserve_blocks(mp, &resblks, NULL);
1104 1246
1247 xfs_log_sbcount(mp, 1);
1248 xfs_unmountfs_writesb(mp);
1105 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1249 xfs_unmountfs_wait(mp); /* wait for async bufs */
1106
1107 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1250 xfs_log_unmount(mp); /* Done! No more fs ops. */
1108 1251
1109 xfs_freesb(mp); 1252 xfs_freesb(mp);
@@ -1150,6 +1293,62 @@ xfs_unmountfs_wait(xfs_mount_t *mp)
1150} 1293}
1151 1294
1152int 1295int
1296xfs_fs_writable(xfs_mount_t *mp)
1297{
1298 bhv_vfs_t *vfsp = XFS_MTOVFS(mp);
1299
1300 return !(vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
1301 (vfsp->vfs_flag & VFS_RDONLY));
1302}
1303
1304/*
1305 * xfs_log_sbcount
1306 *
1307 * Called either periodically to keep the on disk superblock values
1308 * roughly up to date or from unmount to make sure the values are
1309 * correct on a clean unmount.
1310 *
1311 * Note this code can be called during the process of freezing, so
1312 * we may need to use the transaction allocator which does not not
1313 * block when the transaction subsystem is in its frozen state.
1314 */
1315int
1316xfs_log_sbcount(
1317 xfs_mount_t *mp,
1318 uint sync)
1319{
1320 xfs_trans_t *tp;
1321 int error;
1322
1323 if (!xfs_fs_writable(mp))
1324 return 0;
1325
1326 xfs_icsb_sync_counters(mp);
1327
1328 /*
1329 * we don't need to do this if we are updating the superblock
1330 * counters on every modification.
1331 */
1332 if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1333 return 0;
1334
1335 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
1336 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
1337 XFS_DEFAULT_LOG_COUNT);
1338 if (error) {
1339 xfs_trans_cancel(tp, 0);
1340 return error;
1341 }
1342
1343 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
1344 if (sync)
1345 xfs_trans_set_sync(tp);
1346 xfs_trans_commit(tp, 0);
1347
1348 return 0;
1349}
1350
1351int
1153xfs_unmountfs_writesb(xfs_mount_t *mp) 1352xfs_unmountfs_writesb(xfs_mount_t *mp)
1154{ 1353{
1155 xfs_buf_t *sbp; 1354 xfs_buf_t *sbp;
@@ -1160,16 +1359,15 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1160 * skip superblock write if fs is read-only, or 1359 * skip superblock write if fs is read-only, or
1161 * if we are doing a forced umount. 1360 * if we are doing a forced umount.
1162 */ 1361 */
1163 sbp = xfs_getsb(mp, 0);
1164 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || 1362 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
1165 XFS_FORCED_SHUTDOWN(mp))) { 1363 XFS_FORCED_SHUTDOWN(mp))) {
1166 1364
1167 xfs_icsb_sync_counters(mp); 1365 sbp = xfs_getsb(mp, 0);
1366 sb = XFS_BUF_TO_SBP(sbp);
1168 1367
1169 /* 1368 /*
1170 * mark shared-readonly if desired 1369 * mark shared-readonly if desired
1171 */ 1370 */
1172 sb = XFS_BUF_TO_SBP(sbp);
1173 if (mp->m_mk_sharedro) { 1371 if (mp->m_mk_sharedro) {
1174 if (!(sb->sb_flags & XFS_SBF_READONLY)) 1372 if (!(sb->sb_flags & XFS_SBF_READONLY))
1175 sb->sb_flags |= XFS_SBF_READONLY; 1373 sb->sb_flags |= XFS_SBF_READONLY;
@@ -1178,6 +1376,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1178 xfs_fs_cmn_err(CE_NOTE, mp, 1376 xfs_fs_cmn_err(CE_NOTE, mp,
1179 "Unmounting, marking shared read-only"); 1377 "Unmounting, marking shared read-only");
1180 } 1378 }
1379
1181 XFS_BUF_UNDONE(sbp); 1380 XFS_BUF_UNDONE(sbp);
1182 XFS_BUF_UNREAD(sbp); 1381 XFS_BUF_UNREAD(sbp);
1183 XFS_BUF_UNDELAYWRITE(sbp); 1382 XFS_BUF_UNDELAYWRITE(sbp);
@@ -1192,8 +1391,8 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1192 mp, sbp, XFS_BUF_ADDR(sbp)); 1391 mp, sbp, XFS_BUF_ADDR(sbp));
1193 if (error && mp->m_mk_sharedro) 1392 if (error && mp->m_mk_sharedro)
1194 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); 1393 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
1394 xfs_buf_relse(sbp);
1195 } 1395 }
1196 xfs_buf_relse(sbp);
1197 return error; 1396 return error;
1198} 1397}
1199 1398
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 82304b94646d..76ad74758696 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,6 +66,7 @@ struct xfs_bmbt_irec;
66struct xfs_bmap_free; 66struct xfs_bmap_free;
67struct xfs_extdelta; 67struct xfs_extdelta;
68struct xfs_swapext; 68struct xfs_swapext;
69struct xfs_mru_cache;
69 70
70extern struct bhv_vfsops xfs_vfsops; 71extern struct bhv_vfsops xfs_vfsops;
71extern struct bhv_vnodeops xfs_vnodeops; 72extern struct bhv_vnodeops xfs_vnodeops;
@@ -424,17 +425,18 @@ typedef struct xfs_mount {
424 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ 425 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
425 struct mutex m_icsb_mutex; /* balancer sync lock */ 426 struct mutex m_icsb_mutex; /* balancer sync lock */
426#endif 427#endif
428 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
427} xfs_mount_t; 429} xfs_mount_t;
428 430
429/* 431/*
430 * Flags for m_flags. 432 * Flags for m_flags.
431 */ 433 */
432#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops 434#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
433 must be synchronous except 435 must be synchronous except
434 for space allocations */ 436 for space allocations */
435#define XFS_MOUNT_INO64 (1ULL << 1) 437#define XFS_MOUNT_INO64 (1ULL << 1)
436 /* (1ULL << 2) -- currently unused */ 438 /* (1ULL << 2) -- currently unused */
437 /* (1ULL << 3) -- currently unused */ 439#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
438#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 440#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
439 operations, typically for 441 operations, typically for
440 disk errors in metadata */ 442 disk errors in metadata */
@@ -463,6 +465,8 @@ typedef struct xfs_mount {
463 * I/O size in stat() */ 465 * I/O size in stat() */
464#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock 466#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock
465 counters */ 467 counters */
468#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
469 allocator */
466 470
467 471
468/* 472/*
@@ -511,6 +515,8 @@ xfs_preferred_iosize(xfs_mount_t *mp)
511 515
512#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) 516#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset)
513 517
518#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \
519 ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
514#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) 520#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
515#define xfs_force_shutdown(m,f) \ 521#define xfs_force_shutdown(m,f) \
516 bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__) 522 bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__)
@@ -602,6 +608,7 @@ typedef struct xfs_mod_sb {
602 608
603extern xfs_mount_t *xfs_mount_init(void); 609extern xfs_mount_t *xfs_mount_init(void);
604extern void xfs_mod_sb(xfs_trans_t *, __int64_t); 610extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
611extern int xfs_log_sbcount(xfs_mount_t *, uint);
605extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); 612extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
606extern int xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int); 613extern int xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int);
607extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 614extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
@@ -618,12 +625,14 @@ extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
618extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 625extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
619extern int xfs_readsb(xfs_mount_t *, int); 626extern int xfs_readsb(xfs_mount_t *, int);
620extern void xfs_freesb(xfs_mount_t *); 627extern void xfs_freesb(xfs_mount_t *);
628extern int xfs_fs_writable(xfs_mount_t *);
621extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int); 629extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
622extern int xfs_syncsub(xfs_mount_t *, int, int *); 630extern int xfs_syncsub(xfs_mount_t *, int, int *);
623extern int xfs_sync_inodes(xfs_mount_t *, int, int *); 631extern int xfs_sync_inodes(xfs_mount_t *, int, int *);
624extern xfs_agnumber_t xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *, 632extern xfs_agnumber_t xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *,
625 xfs_agnumber_t); 633 xfs_agnumber_t);
626extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t); 634extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t);
635extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
627 636
628extern struct xfs_dmops xfs_dmcore_stub; 637extern struct xfs_dmops xfs_dmcore_stub;
629extern struct xfs_qmops xfs_qmcore_stub; 638extern struct xfs_qmops xfs_qmcore_stub;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
new file mode 100644
index 000000000000..7deb9e3cbbd3
--- /dev/null
+++ b/fs/xfs/xfs_mru_cache.c
@@ -0,0 +1,608 @@
1/*
2 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_mru_cache.h"
20
21/*
22 * The MRU Cache data structure consists of a data store, an array of lists and
23 * a lock to protect its internal state. At initialisation time, the client
24 * supplies an element lifetime in milliseconds and a group count, as well as a
25 * function pointer to call when deleting elements. A data structure for
26 * queueing up work in the form of timed callbacks is also included.
27 *
28 * The group count controls how many lists are created, and thereby how finely
29 * the elements are grouped in time. When reaping occurs, all the elements in
30 * all the lists whose time has expired are deleted.
31 *
32 * To give an example of how this works in practice, consider a client that
33 * initialises an MRU Cache with a lifetime of ten seconds and a group count of
34 * five. Five internal lists will be created, each representing a two second
35 * period in time. When the first element is added, time zero for the data
36 * structure is initialised to the current time.
37 *
38 * All the elements added in the first two seconds are appended to the first
39 * list. Elements added in the third second go into the second list, and so on.
40 * If an element is accessed at any point, it is removed from its list and
41 * inserted at the head of the current most-recently-used list.
42 *
43 * The reaper function will have nothing to do until at least twelve seconds
44 * have elapsed since the first element was added. The reason for this is that
45 * if it were called at t=11s, there could be elements in the first list that
46 * have only been inactive for nine seconds, so it still does nothing. If it is
47 * called anywhere between t=12 and t=14 seconds, it will delete all the
48 * elements that remain in the first list. It's therefore possible for elements
49 * to remain in the data store even after they've been inactive for up to
50 * (t + t/g) seconds, where t is the inactive element lifetime and g is the
51 * number of groups.
52 *
53 * The above example assumes that the reaper function gets called at least once
54 * every (t/g) seconds. If it is called less frequently, unused elements will
55 * accumulate in the reap list until the reaper function is eventually called.
56 * The current implementation uses work queue callbacks to carefully time the
57 * reaper function calls, so this should happen rarely, if at all.
58 *
59 * From a design perspective, the primary reason for the choice of a list array
60 * representing discrete time intervals is that it's only practical to reap
61 * expired elements in groups of some appreciable size. This automatically
62 * introduces a granularity to element lifetimes, so there's no point storing an
63 * individual timeout with each element that specifies a more precise reap time.
64 * The bonus is a saving of sizeof(long) bytes of memory per element stored.
65 *
66 * The elements could have been stored in just one list, but an array of
67 * counters or pointers would need to be maintained to allow them to be divided
68 * up into discrete time groups. More critically, the process of touching or
69 * removing an element would involve walking large portions of the entire list,
70 * which would have a detrimental effect on performance. The additional memory
71 * requirement for the array of list heads is minimal.
72 *
73 * When an element is touched or deleted, it needs to be removed from its
74 * current list. Doubly linked lists are used to make the list maintenance
75 * portion of these operations O(1). Since reaper timing can be imprecise,
76 * inserts and lookups can occur when there are no free lists available. When
77 * this happens, all the elements on the LRU list need to be migrated to the end
78 * of the reap list. To keep the list maintenance portion of these operations
79 * O(1) also, list tails need to be accessible without walking the entire list.
80 * This is the reason why doubly linked list heads are used.
81 */
82
83/*
84 * An MRU Cache is a dynamic data structure that stores its elements in a way
85 * that allows efficient lookups, but also groups them into discrete time
86 * intervals based on insertion time. This allows elements to be efficiently
87 * and automatically reaped after a fixed period of inactivity.
88 *
89 * When a client data pointer is stored in the MRU Cache it needs to be added to
90 * both the data store and to one of the lists. It must also be possible to
91 * access each of these entries via the other, i.e. to:
92 *
93 * a) Walk a list, removing the corresponding data store entry for each item.
94 * b) Look up a data store entry, then access its list entry directly.
95 *
96 * To achieve both of these goals, each entry must contain both a list entry and
97 * a key, in addition to the user's data pointer. Note that it's not a good
98 * idea to have the client embed one of these structures at the top of their own
99 * data structure, because inserting the same item more than once would most
100 * likely result in a loop in one of the lists. That's a sure-fire recipe for
101 * an infinite loop in the code.
102 */
103typedef struct xfs_mru_cache_elem
104{
105 struct list_head list_node;
106 unsigned long key;
107 void *value;
108} xfs_mru_cache_elem_t;
109
110static kmem_zone_t *xfs_mru_elem_zone;
111static struct workqueue_struct *xfs_mru_reap_wq;
112
113/*
114 * When inserting, destroying or reaping, it's first necessary to update the
115 * lists relative to a particular time. In the case of destroying, that time
116 * will be well in the future to ensure that all items are moved to the reap
117 * list. In all other cases though, the time will be the current time.
118 *
119 * This function enters a loop, moving the contents of the LRU list to the reap
120 * list again and again until either a) the lists are all empty, or b) time zero
121 * has been advanced sufficiently to be within the immediate element lifetime.
122 *
123 * Case a) above is detected by counting how many groups are migrated and
124 * stopping when they've all been moved. Case b) is detected by monitoring the
125 * time_zero field, which is updated as each group is migrated.
126 *
127 * The return value is the earliest time that more migration could be needed, or
128 * zero if there's no need to schedule more work because the lists are empty.
129 */
130STATIC unsigned long
131_xfs_mru_cache_migrate(
132 xfs_mru_cache_t *mru,
133 unsigned long now)
134{
135 unsigned int grp;
136 unsigned int migrated = 0;
137 struct list_head *lru_list;
138
139 /* Nothing to do if the data store is empty. */
140 if (!mru->time_zero)
141 return 0;
142
143 /* While time zero is older than the time spanned by all the lists. */
144 while (mru->time_zero <= now - mru->grp_count * mru->grp_time) {
145
146 /*
147 * If the LRU list isn't empty, migrate its elements to the tail
148 * of the reap list.
149 */
150 lru_list = mru->lists + mru->lru_grp;
151 if (!list_empty(lru_list))
152 list_splice_init(lru_list, mru->reap_list.prev);
153
154 /*
155 * Advance the LRU group number, freeing the old LRU list to
156 * become the new MRU list; advance time zero accordingly.
157 */
158 mru->lru_grp = (mru->lru_grp + 1) % mru->grp_count;
159 mru->time_zero += mru->grp_time;
160
161 /*
162 * If reaping is so far behind that all the elements on all the
163 * lists have been migrated to the reap list, it's now empty.
164 */
165 if (++migrated == mru->grp_count) {
166 mru->lru_grp = 0;
167 mru->time_zero = 0;
168 return 0;
169 }
170 }
171
172 /* Find the first non-empty list from the LRU end. */
173 for (grp = 0; grp < mru->grp_count; grp++) {
174
175 /* Check the grp'th list from the LRU end. */
176 lru_list = mru->lists + ((mru->lru_grp + grp) % mru->grp_count);
177 if (!list_empty(lru_list))
178 return mru->time_zero +
179 (mru->grp_count + grp) * mru->grp_time;
180 }
181
182 /* All the lists must be empty. */
183 mru->lru_grp = 0;
184 mru->time_zero = 0;
185 return 0;
186}
187
188/*
189 * When inserting or doing a lookup, an element needs to be inserted into the
190 * MRU list. The lists must be migrated first to ensure that they're
191 * up-to-date, otherwise the new element could be given a shorter lifetime in
192 * the cache than it should.
193 */
194STATIC void
195_xfs_mru_cache_list_insert(
196 xfs_mru_cache_t *mru,
197 xfs_mru_cache_elem_t *elem)
198{
199 unsigned int grp = 0;
200 unsigned long now = jiffies;
201
202 /*
203 * If the data store is empty, initialise time zero, leave grp set to
204 * zero and start the work queue timer if necessary. Otherwise, set grp
205 * to the number of group times that have elapsed since time zero.
206 */
207 if (!_xfs_mru_cache_migrate(mru, now)) {
208 mru->time_zero = now;
209 if (!mru->next_reap)
210 mru->next_reap = mru->grp_count * mru->grp_time;
211 } else {
212 grp = (now - mru->time_zero) / mru->grp_time;
213 grp = (mru->lru_grp + grp) % mru->grp_count;
214 }
215
216 /* Insert the element at the tail of the corresponding list. */
217 list_add_tail(&elem->list_node, mru->lists + grp);
218}
219
220/*
221 * When destroying or reaping, all the elements that were migrated to the reap
222 * list need to be deleted. For each element this involves removing it from the
223 * data store, removing it from the reap list, calling the client's free
224 * function and deleting the element from the element zone.
225 */
226STATIC void
227_xfs_mru_cache_clear_reap_list(
228 xfs_mru_cache_t *mru)
229{
230 xfs_mru_cache_elem_t *elem, *next;
231 struct list_head tmp;
232
233 INIT_LIST_HEAD(&tmp);
234 list_for_each_entry_safe(elem, next, &mru->reap_list, list_node) {
235
236 /* Remove the element from the data store. */
237 radix_tree_delete(&mru->store, elem->key);
238
239 /*
240 * remove to temp list so it can be freed without
241 * needing to hold the lock
242 */
243 list_move(&elem->list_node, &tmp);
244 }
245 mutex_spinunlock(&mru->lock, 0);
246
247 list_for_each_entry_safe(elem, next, &tmp, list_node) {
248
249 /* Remove the element from the reap list. */
250 list_del_init(&elem->list_node);
251
252 /* Call the client's free function with the key and value pointer. */
253 mru->free_func(elem->key, elem->value);
254
255 /* Free the element structure. */
256 kmem_zone_free(xfs_mru_elem_zone, elem);
257 }
258
259 mutex_spinlock(&mru->lock);
260}
261
262/*
263 * We fire the reap timer every group expiry interval so
264 * we always have a reaper ready to run. This makes shutdown
265 * and flushing of the reaper easy to do. Hence we need to
266 * keep when the next reap must occur so we can determine
267 * at each interval whether there is anything we need to do.
268 */
269STATIC void
270_xfs_mru_cache_reap(
271 struct work_struct *work)
272{
273 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work);
274 unsigned long now;
275
276 ASSERT(mru && mru->lists);
277 if (!mru || !mru->lists)
278 return;
279
280 mutex_spinlock(&mru->lock);
281 now = jiffies;
282 if (mru->reap_all ||
283 (mru->next_reap && time_after(now, mru->next_reap))) {
284 if (mru->reap_all)
285 now += mru->grp_count * mru->grp_time * 2;
286 mru->next_reap = _xfs_mru_cache_migrate(mru, now);
287 _xfs_mru_cache_clear_reap_list(mru);
288 }
289
290 /*
291 * the process that triggered the reap_all is responsible
292 * for restating the periodic reap if it is required.
293 */
294 if (!mru->reap_all)
295 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
296 mru->reap_all = 0;
297 mutex_spinunlock(&mru->lock, 0);
298}
299
300int
301xfs_mru_cache_init(void)
302{
303 xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
304 "xfs_mru_cache_elem");
305 if (!xfs_mru_elem_zone)
306 return ENOMEM;
307
308 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
309 if (!xfs_mru_reap_wq) {
310 kmem_zone_destroy(xfs_mru_elem_zone);
311 return ENOMEM;
312 }
313
314 return 0;
315}
316
317void
318xfs_mru_cache_uninit(void)
319{
320 destroy_workqueue(xfs_mru_reap_wq);
321 kmem_zone_destroy(xfs_mru_elem_zone);
322}
323
324/*
325 * To initialise a struct xfs_mru_cache pointer, call xfs_mru_cache_create()
326 * with the address of the pointer, a lifetime value in milliseconds, a group
327 * count and a free function to use when deleting elements. This function
328 * returns 0 if the initialisation was successful.
329 */
330int
331xfs_mru_cache_create(
332 xfs_mru_cache_t **mrup,
333 unsigned int lifetime_ms,
334 unsigned int grp_count,
335 xfs_mru_cache_free_func_t free_func)
336{
337 xfs_mru_cache_t *mru = NULL;
338 int err = 0, grp;
339 unsigned int grp_time;
340
341 if (mrup)
342 *mrup = NULL;
343
344 if (!mrup || !grp_count || !lifetime_ms || !free_func)
345 return EINVAL;
346
347 if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count))
348 return EINVAL;
349
350 if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP)))
351 return ENOMEM;
352
353 /* An extra list is needed to avoid reaping up to a grp_time early. */
354 mru->grp_count = grp_count + 1;
355 mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
356
357 if (!mru->lists) {
358 err = ENOMEM;
359 goto exit;
360 }
361
362 for (grp = 0; grp < mru->grp_count; grp++)
363 INIT_LIST_HEAD(mru->lists + grp);
364
365 /*
366 * We use GFP_KERNEL radix tree preload and do inserts under a
367 * spinlock so GFP_ATOMIC is appropriate for the radix tree itself.
368 */
369 INIT_RADIX_TREE(&mru->store, GFP_ATOMIC);
370 INIT_LIST_HEAD(&mru->reap_list);
371 spinlock_init(&mru->lock, "xfs_mru_cache");
372 INIT_DELAYED_WORK(&mru->work, _xfs_mru_cache_reap);
373
374 mru->grp_time = grp_time;
375 mru->free_func = free_func;
376
377 /* start up the reaper event */
378 mru->next_reap = 0;
379 mru->reap_all = 0;
380 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
381
382 *mrup = mru;
383
384exit:
385 if (err && mru && mru->lists)
386 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
387 if (err && mru)
388 kmem_free(mru, sizeof(*mru));
389
390 return err;
391}
392
393/*
394 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
395 * free functions as they're deleted. When this function returns, the caller is
396 * guaranteed that all the free functions for all the elements have finished
397 * executing.
398 *
399 * While we are flushing, we stop the periodic reaper event from triggering.
400 * Normally, we want to restart this periodic event, but if we are shutting
401 * down the cache we do not want it restarted. hence the restart parameter
402 * where 0 = do not restart reaper and 1 = restart reaper.
403 */
404void
405xfs_mru_cache_flush(
406 xfs_mru_cache_t *mru,
407 int restart)
408{
409 if (!mru || !mru->lists)
410 return;
411
412 cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
413
414 mutex_spinlock(&mru->lock);
415 mru->reap_all = 1;
416 mutex_spinunlock(&mru->lock, 0);
417
418 queue_work(xfs_mru_reap_wq, &mru->work.work);
419 flush_workqueue(xfs_mru_reap_wq);
420
421 mutex_spinlock(&mru->lock);
422 WARN_ON_ONCE(mru->reap_all != 0);
423 mru->reap_all = 0;
424 if (restart)
425 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
426 mutex_spinunlock(&mru->lock, 0);
427}
428
429void
430xfs_mru_cache_destroy(
431 xfs_mru_cache_t *mru)
432{
433 if (!mru || !mru->lists)
434 return;
435
436 /* we don't want the reaper to restart here */
437 xfs_mru_cache_flush(mru, 0);
438
439 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
440 kmem_free(mru, sizeof(*mru));
441}
442
443/*
444 * To insert an element, call xfs_mru_cache_insert() with the data store, the
445 * element's key and the client data pointer. This function returns 0 on
446 * success or ENOMEM if memory for the data element couldn't be allocated.
447 */
448int
449xfs_mru_cache_insert(
450 xfs_mru_cache_t *mru,
451 unsigned long key,
452 void *value)
453{
454 xfs_mru_cache_elem_t *elem;
455
456 ASSERT(mru && mru->lists);
457 if (!mru || !mru->lists)
458 return EINVAL;
459
460 elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP);
461 if (!elem)
462 return ENOMEM;
463
464 if (radix_tree_preload(GFP_KERNEL)) {
465 kmem_zone_free(xfs_mru_elem_zone, elem);
466 return ENOMEM;
467 }
468
469 INIT_LIST_HEAD(&elem->list_node);
470 elem->key = key;
471 elem->value = value;
472
473 mutex_spinlock(&mru->lock);
474
475 radix_tree_insert(&mru->store, key, elem);
476 radix_tree_preload_end();
477 _xfs_mru_cache_list_insert(mru, elem);
478
479 mutex_spinunlock(&mru->lock, 0);
480
481 return 0;
482}
483
484/*
485 * To remove an element without calling the free function, call
486 * xfs_mru_cache_remove() with the data store and the element's key. On success
487 * the client data pointer for the removed element is returned, otherwise this
488 * function will return a NULL pointer.
489 */
490void *
491xfs_mru_cache_remove(
492 xfs_mru_cache_t *mru,
493 unsigned long key)
494{
495 xfs_mru_cache_elem_t *elem;
496 void *value = NULL;
497
498 ASSERT(mru && mru->lists);
499 if (!mru || !mru->lists)
500 return NULL;
501
502 mutex_spinlock(&mru->lock);
503 elem = radix_tree_delete(&mru->store, key);
504 if (elem) {
505 value = elem->value;
506 list_del(&elem->list_node);
507 }
508
509 mutex_spinunlock(&mru->lock, 0);
510
511 if (elem)
512 kmem_zone_free(xfs_mru_elem_zone, elem);
513
514 return value;
515}
516
517/*
518 * To remove and element and call the free function, call xfs_mru_cache_delete()
519 * with the data store and the element's key.
520 */
521void
522xfs_mru_cache_delete(
523 xfs_mru_cache_t *mru,
524 unsigned long key)
525{
526 void *value = xfs_mru_cache_remove(mru, key);
527
528 if (value)
529 mru->free_func(key, value);
530}
531
532/*
533 * To look up an element using its key, call xfs_mru_cache_lookup() with the
534 * data store and the element's key. If found, the element will be moved to the
535 * head of the MRU list to indicate that it's been touched.
536 *
537 * The internal data structures are protected by a spinlock that is STILL HELD
538 * when this function returns. Call xfs_mru_cache_done() to release it. Note
539 * that it is not safe to call any function that might sleep in the interim.
540 *
541 * The implementation could have used reference counting to avoid this
542 * restriction, but since most clients simply want to get, set or test a member
543 * of the returned data structure, the extra per-element memory isn't warranted.
544 *
545 * If the element isn't found, this function returns NULL and the spinlock is
546 * released. xfs_mru_cache_done() should NOT be called when this occurs.
547 */
548void *
549xfs_mru_cache_lookup(
550 xfs_mru_cache_t *mru,
551 unsigned long key)
552{
553 xfs_mru_cache_elem_t *elem;
554
555 ASSERT(mru && mru->lists);
556 if (!mru || !mru->lists)
557 return NULL;
558
559 mutex_spinlock(&mru->lock);
560 elem = radix_tree_lookup(&mru->store, key);
561 if (elem) {
562 list_del(&elem->list_node);
563 _xfs_mru_cache_list_insert(mru, elem);
564 }
565 else
566 mutex_spinunlock(&mru->lock, 0);
567
568 return elem ? elem->value : NULL;
569}
570
571/*
572 * To look up an element using its key, but leave its location in the internal
573 * lists alone, call xfs_mru_cache_peek(). If the element isn't found, this
574 * function returns NULL.
575 *
576 * See the comments above the declaration of the xfs_mru_cache_lookup() function
577 * for important locking information pertaining to this call.
578 */
579void *
580xfs_mru_cache_peek(
581 xfs_mru_cache_t *mru,
582 unsigned long key)
583{
584 xfs_mru_cache_elem_t *elem;
585
586 ASSERT(mru && mru->lists);
587 if (!mru || !mru->lists)
588 return NULL;
589
590 mutex_spinlock(&mru->lock);
591 elem = radix_tree_lookup(&mru->store, key);
592 if (!elem)
593 mutex_spinunlock(&mru->lock, 0);
594
595 return elem ? elem->value : NULL;
596}
597
598/*
599 * To release the internal data structure spinlock after having performed an
600 * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done()
601 * with the data store pointer.
602 */
603void
604xfs_mru_cache_done(
605 xfs_mru_cache_t *mru)
606{
607 mutex_spinunlock(&mru->lock, 0);
608}
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
new file mode 100644
index 000000000000..624fd10ee8e5
--- /dev/null
+++ b/fs/xfs/xfs_mru_cache.h
@@ -0,0 +1,57 @@
1/*
2 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_MRU_CACHE_H__
19#define __XFS_MRU_CACHE_H__
20
21
22/* Function pointer type for callback to free a client's data pointer. */
23typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*);
24
25typedef struct xfs_mru_cache
26{
27 struct radix_tree_root store; /* Core storage data structure. */
28 struct list_head *lists; /* Array of lists, one per grp. */
29 struct list_head reap_list; /* Elements overdue for reaping. */
30 spinlock_t lock; /* Lock to protect this struct. */
31 unsigned int grp_count; /* Number of discrete groups. */
32 unsigned int grp_time; /* Time period spanned by grps. */
33 unsigned int lru_grp; /* Group containing time zero. */
34 unsigned long time_zero; /* Time first element was added. */
35 unsigned long next_reap; /* Time that the reaper should
36 next do something. */
37 unsigned int reap_all; /* if set, reap all lists */
38 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
39 struct delayed_work work; /* Workqueue data for reaping. */
40} xfs_mru_cache_t;
41
42int xfs_mru_cache_init(void);
43void xfs_mru_cache_uninit(void);
44int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
45 unsigned int grp_count,
46 xfs_mru_cache_free_func_t free_func);
47void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);
48void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
49int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
50 void *value);
51void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
52void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
53void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
54void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key);
55void xfs_mru_cache_done(struct xfs_mru_cache *mru);
56
57#endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index b3a5f07bd073..47082c01872d 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1882,11 +1882,13 @@ xfs_growfs_rt(
1882 (nrblocks = in->newblocks) <= sbp->sb_rblocks || 1882 (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
1883 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) 1883 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
1884 return XFS_ERROR(EINVAL); 1884 return XFS_ERROR(EINVAL);
1885 if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
1886 return error;
1885 /* 1887 /*
1886 * Read in the last block of the device, make sure it exists. 1888 * Read in the last block of the device, make sure it exists.
1887 */ 1889 */
1888 error = xfs_read_buf(mp, mp->m_rtdev_targp, 1890 error = xfs_read_buf(mp, mp->m_rtdev_targp,
1889 XFS_FSB_TO_BB(mp, in->newblocks - 1), 1891 XFS_FSB_TO_BB(mp, nrblocks - 1),
1890 XFS_FSB_TO_BB(mp, 1), 0, &bp); 1892 XFS_FSB_TO_BB(mp, 1), 0, &bp);
1891 if (error) 1893 if (error)
1892 return error; 1894 return error;
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index 188b296ff50c..fcf28dbded7c 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -72,6 +72,34 @@ xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb)
72} 72}
73 73
74/* 74/*
75 * Flags for xfs_free_eofblocks
76 */
77#define XFS_FREE_EOF_LOCK (1<<0)
78#define XFS_FREE_EOF_NOLOCK (1<<1)
79
80
81/*
82 * helper function to extract extent size hint from inode
83 */
84STATIC_INLINE xfs_extlen_t
85xfs_get_extsz_hint(
86 xfs_inode_t *ip)
87{
88 xfs_extlen_t extsz;
89
90 if (unlikely(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
91 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
92 ? ip->i_d.di_extsize
93 : ip->i_mount->m_sb.sb_rextsize;
94 ASSERT(extsz);
95 } else {
96 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
97 ? ip->i_d.di_extsize : 0;
98 }
99 return extsz;
100}
101
102/*
75 * Prototypes for functions in xfs_rw.c. 103 * Prototypes for functions in xfs_rw.c.
76 */ 104 */
77extern int xfs_write_clear_setuid(struct xfs_inode *ip); 105extern int xfs_write_clear_setuid(struct xfs_inode *ip);
@@ -91,10 +119,12 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
91extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock); 119extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
92extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock); 120extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
93extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags, 121extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags,
94 cred_t *credp); 122 cred_t *credp);
95extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf, 123extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf,
96 xfs_off_t offset, cred_t *credp, int flags); 124 xfs_off_t offset, cred_t *credp, int flags);
97extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state, 125extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state,
98 cred_t *credp); 126 cred_t *credp);
127extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
128 int flags);
99 129
100#endif /* __XFS_RW_H__ */ 130#endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 467854b45c8f..ef42537a607a 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -74,12 +74,13 @@ struct xfs_mount;
74 */ 74 */
75#define XFS_SB_VERSION2_REALFBITS 0x00ffffff /* Mask: features */ 75#define XFS_SB_VERSION2_REALFBITS 0x00ffffff /* Mask: features */
76#define XFS_SB_VERSION2_RESERVED1BIT 0x00000001 76#define XFS_SB_VERSION2_RESERVED1BIT 0x00000001
77#define XFS_SB_VERSION2_RESERVED2BIT 0x00000002 77#define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */
78#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 78#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004
79#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ 79#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */
80 80
81#define XFS_SB_VERSION2_OKREALFBITS \ 81#define XFS_SB_VERSION2_OKREALFBITS \
82 (XFS_SB_VERSION2_ATTR2BIT) 82 (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
83 XFS_SB_VERSION2_ATTR2BIT)
83#define XFS_SB_VERSION2_OKSASHFBITS \ 84#define XFS_SB_VERSION2_OKSASHFBITS \
84 (0) 85 (0)
85#define XFS_SB_VERSION2_OKREALBITS \ 86#define XFS_SB_VERSION2_OKREALBITS \
@@ -181,6 +182,9 @@ typedef enum {
181#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN) 182#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN)
182#define XFS_SB_UNIT XFS_SB_MVAL(UNIT) 183#define XFS_SB_UNIT XFS_SB_MVAL(UNIT)
183#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH) 184#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH)
185#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT)
186#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
187#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
184#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) 188#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2)
185#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) 189#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
186#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) 190#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
@@ -188,7 +192,7 @@ typedef enum {
188 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ 192 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
189 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ 193 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
190 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ 194 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
191 XFS_SB_FEATURES2) 195 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2)
192 196
193 197
194/* 198/*
@@ -414,6 +418,12 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
414 * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT) 418 * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
415 */ 419 */
416 420
421static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
422{
423 return (XFS_SB_VERSION_HASMOREBITS(sbp) && \
424 ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
425}
426
417#define XFS_SB_VERSION_HASATTR2(sbp) xfs_sb_version_hasattr2(sbp) 427#define XFS_SB_VERSION_HASATTR2(sbp) xfs_sb_version_hasattr2(sbp)
418static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) 428static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
419{ 429{
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index cc2d60951e21..356d6627f581 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -427,6 +427,14 @@ undo_blocks:
427 * 427 *
428 * Mark the transaction structure to indicate that the superblock 428 * Mark the transaction structure to indicate that the superblock
429 * needs to be updated before committing. 429 * needs to be updated before committing.
430 *
431 * Because we may not be keeping track of allocated/free inodes and
432 * used filesystem blocks in the superblock, we do not mark the
433 * superblock dirty in this transaction if we modify these fields.
434 * We still need to update the transaction deltas so that they get
435 * applied to the incore superblock, but we don't want them to
436 * cause the superblock to get locked and logged if these are the
437 * only fields in the superblock that the transaction modifies.
430 */ 438 */
431void 439void
432xfs_trans_mod_sb( 440xfs_trans_mod_sb(
@@ -434,13 +442,19 @@ xfs_trans_mod_sb(
434 uint field, 442 uint field,
435 int64_t delta) 443 int64_t delta)
436{ 444{
445 uint32_t flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
446 xfs_mount_t *mp = tp->t_mountp;
437 447
438 switch (field) { 448 switch (field) {
439 case XFS_TRANS_SB_ICOUNT: 449 case XFS_TRANS_SB_ICOUNT:
440 tp->t_icount_delta += delta; 450 tp->t_icount_delta += delta;
451 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
452 flags &= ~XFS_TRANS_SB_DIRTY;
441 break; 453 break;
442 case XFS_TRANS_SB_IFREE: 454 case XFS_TRANS_SB_IFREE:
443 tp->t_ifree_delta += delta; 455 tp->t_ifree_delta += delta;
456 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
457 flags &= ~XFS_TRANS_SB_DIRTY;
444 break; 458 break;
445 case XFS_TRANS_SB_FDBLOCKS: 459 case XFS_TRANS_SB_FDBLOCKS:
446 /* 460 /*
@@ -453,6 +467,8 @@ xfs_trans_mod_sb(
453 ASSERT(tp->t_blk_res_used <= tp->t_blk_res); 467 ASSERT(tp->t_blk_res_used <= tp->t_blk_res);
454 } 468 }
455 tp->t_fdblocks_delta += delta; 469 tp->t_fdblocks_delta += delta;
470 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
471 flags &= ~XFS_TRANS_SB_DIRTY;
456 break; 472 break;
457 case XFS_TRANS_SB_RES_FDBLOCKS: 473 case XFS_TRANS_SB_RES_FDBLOCKS:
458 /* 474 /*
@@ -462,6 +478,8 @@ xfs_trans_mod_sb(
462 */ 478 */
463 ASSERT(delta < 0); 479 ASSERT(delta < 0);
464 tp->t_res_fdblocks_delta += delta; 480 tp->t_res_fdblocks_delta += delta;
481 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
482 flags &= ~XFS_TRANS_SB_DIRTY;
465 break; 483 break;
466 case XFS_TRANS_SB_FREXTENTS: 484 case XFS_TRANS_SB_FREXTENTS:
467 /* 485 /*
@@ -515,7 +533,7 @@ xfs_trans_mod_sb(
515 return; 533 return;
516 } 534 }
517 535
518 tp->t_flags |= (XFS_TRANS_SB_DIRTY | XFS_TRANS_DIRTY); 536 tp->t_flags |= flags;
519} 537}
520 538
521/* 539/*
@@ -544,18 +562,23 @@ xfs_trans_apply_sb_deltas(
544 (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta + 562 (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta +
545 tp->t_ag_btree_delta)); 563 tp->t_ag_btree_delta));
546 564
547 if (tp->t_icount_delta != 0) { 565 /*
548 INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta); 566 * Only update the superblock counters if we are logging them
549 } 567 */
550 if (tp->t_ifree_delta != 0) { 568 if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) {
551 INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta); 569 if (tp->t_icount_delta != 0) {
552 } 570 INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta);
571 }
572 if (tp->t_ifree_delta != 0) {
573 INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta);
574 }
553 575
554 if (tp->t_fdblocks_delta != 0) { 576 if (tp->t_fdblocks_delta != 0) {
555 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta); 577 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta);
556 } 578 }
557 if (tp->t_res_fdblocks_delta != 0) { 579 if (tp->t_res_fdblocks_delta != 0) {
558 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta); 580 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta);
581 }
559 } 582 }
560 583
561 if (tp->t_frextents_delta != 0) { 584 if (tp->t_frextents_delta != 0) {
@@ -615,11 +638,23 @@ xfs_trans_apply_sb_deltas(
615} 638}
616 639
617/* 640/*
618 * xfs_trans_unreserve_and_mod_sb() is called to release unused 641 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
619 * reservations and apply superblock counter changes to the in-core 642 * and apply superblock counter changes to the in-core superblock. The
620 * superblock. 643 * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
644 * applied to the in-core superblock. The idea is that that has already been
645 * done.
621 * 646 *
622 * This is done efficiently with a single call to xfs_mod_incore_sb_batch(). 647 * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
648 * However, we have to ensure that we only modify each superblock field only
649 * once because the application of the delta values may not be atomic. That can
650 * lead to ENOSPC races occurring if we have two separate modifcations of the
651 * free space counter to put back the entire reservation and then take away
652 * what we used.
653 *
654 * If we are not logging superblock counters, then the inode allocated/free and
655 * used block counts are not updated in the on disk superblock. In this case,
656 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
657 * still need to update the incore superblock with the changes.
623 */ 658 */
624STATIC void 659STATIC void
625xfs_trans_unreserve_and_mod_sb( 660xfs_trans_unreserve_and_mod_sb(
@@ -627,40 +662,49 @@ xfs_trans_unreserve_and_mod_sb(
627{ 662{
628 xfs_mod_sb_t msb[14]; /* If you add cases, add entries */ 663 xfs_mod_sb_t msb[14]; /* If you add cases, add entries */
629 xfs_mod_sb_t *msbp; 664 xfs_mod_sb_t *msbp;
665 xfs_mount_t *mp = tp->t_mountp;
630 /* REFERENCED */ 666 /* REFERENCED */
631 int error; 667 int error;
632 int rsvd; 668 int rsvd;
669 int64_t blkdelta = 0;
670 int64_t rtxdelta = 0;
633 671
634 msbp = msb; 672 msbp = msb;
635 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 673 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
636 674
637 /* 675 /* calculate free blocks delta */
638 * Release any reserved blocks. Any that were allocated 676 if (tp->t_blk_res > 0)
639 * will be taken back again by fdblocks_delta below. 677 blkdelta = tp->t_blk_res;
640 */ 678
641 if (tp->t_blk_res > 0) { 679 if ((tp->t_fdblocks_delta != 0) &&
680 (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
681 (tp->t_flags & XFS_TRANS_SB_DIRTY)))
682 blkdelta += tp->t_fdblocks_delta;
683
684 if (blkdelta != 0) {
642 msbp->msb_field = XFS_SBS_FDBLOCKS; 685 msbp->msb_field = XFS_SBS_FDBLOCKS;
643 msbp->msb_delta = tp->t_blk_res; 686 msbp->msb_delta = blkdelta;
644 msbp++; 687 msbp++;
645 } 688 }
646 689
647 /* 690 /* calculate free realtime extents delta */
648 * Release any reserved real time extents . Any that were 691 if (tp->t_rtx_res > 0)
649 * allocated will be taken back again by frextents_delta below. 692 rtxdelta = tp->t_rtx_res;
650 */ 693
651 if (tp->t_rtx_res > 0) { 694 if ((tp->t_frextents_delta != 0) &&
695 (tp->t_flags & XFS_TRANS_SB_DIRTY))
696 rtxdelta += tp->t_frextents_delta;
697
698 if (rtxdelta != 0) {
652 msbp->msb_field = XFS_SBS_FREXTENTS; 699 msbp->msb_field = XFS_SBS_FREXTENTS;
653 msbp->msb_delta = tp->t_rtx_res; 700 msbp->msb_delta = rtxdelta;
654 msbp++; 701 msbp++;
655 } 702 }
656 703
657 /* 704 /* apply remaining deltas */
658 * Apply any superblock modifications to the in-core version. 705
659 * The t_res_fdblocks_delta and t_res_frextents_delta fields are 706 if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
660 * explicitly NOT applied to the in-core superblock. 707 (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
661 * The idea is that that has already been done.
662 */
663 if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
664 if (tp->t_icount_delta != 0) { 708 if (tp->t_icount_delta != 0) {
665 msbp->msb_field = XFS_SBS_ICOUNT; 709 msbp->msb_field = XFS_SBS_ICOUNT;
666 msbp->msb_delta = tp->t_icount_delta; 710 msbp->msb_delta = tp->t_icount_delta;
@@ -671,16 +715,9 @@ xfs_trans_unreserve_and_mod_sb(
671 msbp->msb_delta = tp->t_ifree_delta; 715 msbp->msb_delta = tp->t_ifree_delta;
672 msbp++; 716 msbp++;
673 } 717 }
674 if (tp->t_fdblocks_delta != 0) { 718 }
675 msbp->msb_field = XFS_SBS_FDBLOCKS; 719
676 msbp->msb_delta = tp->t_fdblocks_delta; 720 if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
677 msbp++;
678 }
679 if (tp->t_frextents_delta != 0) {
680 msbp->msb_field = XFS_SBS_FREXTENTS;
681 msbp->msb_delta = tp->t_frextents_delta;
682 msbp++;
683 }
684 if (tp->t_dblocks_delta != 0) { 721 if (tp->t_dblocks_delta != 0) {
685 msbp->msb_field = XFS_SBS_DBLOCKS; 722 msbp->msb_field = XFS_SBS_DBLOCKS;
686 msbp->msb_delta = tp->t_dblocks_delta; 723 msbp->msb_delta = tp->t_dblocks_delta;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7dfcc450366f..0e26e729023e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -94,7 +94,8 @@ typedef struct xfs_trans_header {
94#define XFS_TRANS_GROWFSRT_ZERO 38 94#define XFS_TRANS_GROWFSRT_ZERO 38
95#define XFS_TRANS_GROWFSRT_FREE 39 95#define XFS_TRANS_GROWFSRT_FREE 39
96#define XFS_TRANS_SWAPEXT 40 96#define XFS_TRANS_SWAPEXT 40
97#define XFS_TRANS_TYPE_MAX 40 97#define XFS_TRANS_SB_COUNT 41
98#define XFS_TRANS_TYPE_MAX 41
98/* new transaction types need to be reflected in xfs_logprint(8) */ 99/* new transaction types need to be reflected in xfs_logprint(8) */
99 100
100 101
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 65c561201cb8..11f5ea29a038 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -51,6 +51,8 @@
51#include "xfs_acl.h" 51#include "xfs_acl.h"
52#include "xfs_attr.h" 52#include "xfs_attr.h"
53#include "xfs_clnt.h" 53#include "xfs_clnt.h"
54#include "xfs_mru_cache.h"
55#include "xfs_filestream.h"
54#include "xfs_fsops.h" 56#include "xfs_fsops.h"
55 57
56STATIC int xfs_sync(bhv_desc_t *, int, cred_t *); 58STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
@@ -81,6 +83,8 @@ xfs_init(void)
81 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); 83 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
82 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); 84 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
83 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); 85 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
86 xfs_mru_cache_init();
87 xfs_filestream_init();
84 88
85 /* 89 /*
86 * The size of the zone allocated buf log item is the maximum 90 * The size of the zone allocated buf log item is the maximum
@@ -164,6 +168,8 @@ xfs_cleanup(void)
164 xfs_cleanup_procfs(); 168 xfs_cleanup_procfs();
165 xfs_sysctl_unregister(); 169 xfs_sysctl_unregister();
166 xfs_refcache_destroy(); 170 xfs_refcache_destroy();
171 xfs_filestream_uninit();
172 xfs_mru_cache_uninit();
167 xfs_acl_zone_destroy(xfs_acl_zone); 173 xfs_acl_zone_destroy(xfs_acl_zone);
168 174
169#ifdef XFS_DIR2_TRACE 175#ifdef XFS_DIR2_TRACE
@@ -320,6 +326,9 @@ xfs_start_flags(
320 else 326 else
321 mp->m_flags &= ~XFS_MOUNT_BARRIER; 327 mp->m_flags &= ~XFS_MOUNT_BARRIER;
322 328
329 if (ap->flags2 & XFSMNT2_FILESTREAMS)
330 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
331
323 return 0; 332 return 0;
324} 333}
325 334
@@ -518,6 +527,9 @@ xfs_mount(
518 if (mp->m_flags & XFS_MOUNT_BARRIER) 527 if (mp->m_flags & XFS_MOUNT_BARRIER)
519 xfs_mountfs_check_barriers(mp); 528 xfs_mountfs_check_barriers(mp);
520 529
530 if ((error = xfs_filestream_mount(mp)))
531 goto error2;
532
521 error = XFS_IOINIT(vfsp, args, flags); 533 error = XFS_IOINIT(vfsp, args, flags);
522 if (error) 534 if (error)
523 goto error2; 535 goto error2;
@@ -575,6 +587,13 @@ xfs_unmount(
575 */ 587 */
576 xfs_refcache_purge_mp(mp); 588 xfs_refcache_purge_mp(mp);
577 589
590 /*
591 * Blow away any referenced inode in the filestreams cache.
592 * This can and will cause log traffic as inodes go inactive
593 * here.
594 */
595 xfs_filestream_unmount(mp);
596
578 XFS_bflush(mp->m_ddev_targp); 597 XFS_bflush(mp->m_ddev_targp);
579 error = xfs_unmount_flush(mp, 0); 598 error = xfs_unmount_flush(mp, 0);
580 if (error) 599 if (error)
@@ -640,7 +659,7 @@ xfs_quiesce_fs(
640 * we can write the unmount record. 659 * we can write the unmount record.
641 */ 660 */
642 do { 661 do {
643 xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, NULL); 662 xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL);
644 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); 663 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
645 if (!pincount) { 664 if (!pincount) {
646 delay(50); 665 delay(50);
@@ -651,6 +670,30 @@ xfs_quiesce_fs(
651 return 0; 670 return 0;
652} 671}
653 672
673/*
674 * Second stage of a quiesce. The data is already synced, now we have to take
675 * care of the metadata. New transactions are already blocked, so we need to
676 * wait for any remaining transactions to drain out before proceding.
677 */
678STATIC void
679xfs_attr_quiesce(
680 xfs_mount_t *mp)
681{
682 /* wait for all modifications to complete */
683 while (atomic_read(&mp->m_active_trans) > 0)
684 delay(100);
685
686 /* flush inodes and push all remaining buffers out to disk */
687 xfs_quiesce_fs(mp);
688
689 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
690
691 /* Push the superblock and write an unmount record */
692 xfs_log_sbcount(mp, 1);
693 xfs_log_unmount_write(mp);
694 xfs_unmountfs_writesb(mp);
695}
696
654STATIC int 697STATIC int
655xfs_mntupdate( 698xfs_mntupdate(
656 bhv_desc_t *bdp, 699 bhv_desc_t *bdp,
@@ -670,10 +713,9 @@ xfs_mntupdate(
670 mp->m_flags &= ~XFS_MOUNT_BARRIER; 713 mp->m_flags &= ~XFS_MOUNT_BARRIER;
671 } 714 }
672 } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */ 715 } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */
673 bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL); 716 xfs_filestream_flush(mp);
674 xfs_quiesce_fs(mp); 717 bhv_vfs_sync(vfsp, SYNC_DATA_QUIESCE, NULL);
675 xfs_log_unmount_write(mp); 718 xfs_attr_quiesce(mp);
676 xfs_unmountfs_writesb(mp);
677 vfsp->vfs_flag |= VFS_RDONLY; 719 vfsp->vfs_flag |= VFS_RDONLY;
678 } 720 }
679 return 0; 721 return 0;
@@ -887,6 +929,9 @@ xfs_sync(
887{ 929{
888 xfs_mount_t *mp = XFS_BHVTOM(bdp); 930 xfs_mount_t *mp = XFS_BHVTOM(bdp);
889 931
932 if (flags & SYNC_IOWAIT)
933 xfs_filestream_flush(mp);
934
890 return xfs_syncsub(mp, flags, NULL); 935 return xfs_syncsub(mp, flags, NULL);
891} 936}
892 937
@@ -1128,58 +1173,41 @@ xfs_sync_inodes(
1128 * in the inode list. 1173 * in the inode list.
1129 */ 1174 */
1130 1175
1131 if ((flags & SYNC_CLOSE) && (vp != NULL)) { 1176 /*
1132 /* 1177 * If we have to flush data or wait for I/O completion
1133 * This is the shutdown case. We just need to 1178 * we need to drop the ilock that we currently hold.
1134 * flush and invalidate all the pages associated 1179 * If we need to drop the lock, insert a marker if we
1135 * with the inode. Drop the inode lock since 1180 * have not already done so.
1136 * we can't hold it across calls to the buffer 1181 */
1137 * cache. 1182 if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
1138 * 1183 ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
1139 * We don't set the VREMAPPING bit in the vnode 1184 if (mount_locked) {
1140 * here, because we don't hold the vnode lock 1185 IPOINTER_INSERT(ip, mp);
1141 * exclusively. It doesn't really matter, though,
1142 * because we only come here when we're shutting
1143 * down anyway.
1144 */
1145 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1146
1147 if (XFS_FORCED_SHUTDOWN(mp)) {
1148 bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
1149 } else {
1150 error = bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF);
1151 } 1186 }
1187 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1152 1188
1153 xfs_ilock(ip, XFS_ILOCK_SHARED); 1189 if (flags & SYNC_CLOSE) {
1154 1190 /* Shutdown case. Flush and invalidate. */
1155 } else if ((flags & SYNC_DELWRI) && (vp != NULL)) { 1191 if (XFS_FORCED_SHUTDOWN(mp))
1156 if (VN_DIRTY(vp)) { 1192 bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
1157 /* We need to have dropped the lock here, 1193 else
1158 * so insert a marker if we have not already 1194 error = bhv_vop_flushinval_pages(vp, 0,
1159 * done so. 1195 -1, FI_REMAPF);
1160 */ 1196 } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
1161 if (mount_locked) {
1162 IPOINTER_INSERT(ip, mp);
1163 }
1164
1165 /*
1166 * Drop the inode lock since we can't hold it
1167 * across calls to the buffer cache.
1168 */
1169 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1170 error = bhv_vop_flush_pages(vp, (xfs_off_t)0, 1197 error = bhv_vop_flush_pages(vp, (xfs_off_t)0,
1171 -1, fflag, FI_NONE); 1198 -1, fflag, FI_NONE);
1172 xfs_ilock(ip, XFS_ILOCK_SHARED);
1173 } 1199 }
1174 1200
1201 /*
1202 * When freezing, we need to wait ensure all I/O (including direct
1203 * I/O) is complete to ensure no further data modification can take
1204 * place after this point
1205 */
1206 if (flags & SYNC_IOWAIT)
1207 vn_iowait(vp);
1208
1209 xfs_ilock(ip, XFS_ILOCK_SHARED);
1175 } 1210 }
1176 /*
1177 * When freezing, we need to wait ensure all I/O (including direct
1178 * I/O) is complete to ensure no further data modification can take
1179 * place after this point
1180 */
1181 if (flags & SYNC_IOWAIT)
1182 vn_iowait(vp);
1183 1211
1184 if (flags & SYNC_BDFLUSH) { 1212 if (flags & SYNC_BDFLUSH) {
1185 if ((flags & SYNC_ATTR) && 1213 if ((flags & SYNC_ATTR) &&
@@ -1514,6 +1542,15 @@ xfs_syncsub(
1514 } 1542 }
1515 1543
1516 /* 1544 /*
1545 * If asked, update the disk superblock with incore counter values if we
1546 * are using non-persistent counters so that they don't get too far out
1547 * of sync if we crash or get a forced shutdown. We don't want to force
1548 * this to disk, just get a transaction into the iclogs....
1549 */
1550 if (flags & SYNC_SUPER)
1551 xfs_log_sbcount(mp, 0);
1552
1553 /*
1517 * Now check to see if the log needs a "dummy" transaction. 1554 * Now check to see if the log needs a "dummy" transaction.
1518 */ 1555 */
1519 1556
@@ -1645,6 +1682,7 @@ xfs_vget(
1645 * in stat(). */ 1682 * in stat(). */
1646#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ 1683#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */
1647#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ 1684#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */
1685#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */
1648 1686
1649STATIC unsigned long 1687STATIC unsigned long
1650suffix_strtoul(char *s, char **endp, unsigned int base) 1688suffix_strtoul(char *s, char **endp, unsigned int base)
@@ -1831,6 +1869,8 @@ xfs_parseargs(
1831 args->flags |= XFSMNT_ATTR2; 1869 args->flags |= XFSMNT_ATTR2;
1832 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 1870 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
1833 args->flags &= ~XFSMNT_ATTR2; 1871 args->flags &= ~XFSMNT_ATTR2;
1872 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
1873 args->flags2 |= XFSMNT2_FILESTREAMS;
1834 } else if (!strcmp(this_char, "osyncisdsync")) { 1874 } else if (!strcmp(this_char, "osyncisdsync")) {
1835 /* no-op, this is now the default */ 1875 /* no-op, this is now the default */
1836 cmn_err(CE_WARN, 1876 cmn_err(CE_WARN,
@@ -1959,9 +1999,9 @@ xfs_showargs(
1959} 1999}
1960 2000
1961/* 2001/*
1962 * Second stage of a freeze. The data is already frozen, now we have to take 2002 * Second stage of a freeze. The data is already frozen so we only
1963 * care of the metadata. New transactions are already blocked, so we need to 2003 * need to take care of themetadata. Once that's done write a dummy
1964 * wait for any remaining transactions to drain out before proceding. 2004 * record to dirty the log in case of a crash while frozen.
1965 */ 2005 */
1966STATIC void 2006STATIC void
1967xfs_freeze( 2007xfs_freeze(
@@ -1969,18 +2009,7 @@ xfs_freeze(
1969{ 2009{
1970 xfs_mount_t *mp = XFS_BHVTOM(bdp); 2010 xfs_mount_t *mp = XFS_BHVTOM(bdp);
1971 2011
1972 /* wait for all modifications to complete */ 2012 xfs_attr_quiesce(mp);
1973 while (atomic_read(&mp->m_active_trans) > 0)
1974 delay(100);
1975
1976 /* flush inodes and push all remaining buffers out to disk */
1977 xfs_quiesce_fs(mp);
1978
1979 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
1980
1981 /* Push the superblock and write an unmount record */
1982 xfs_log_unmount_write(mp);
1983 xfs_unmountfs_writesb(mp);
1984 xfs_fs_log_dummy(mp); 2013 xfs_fs_log_dummy(mp);
1985} 2014}
1986 2015
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index de17aed578f0..79b522779aa4 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -51,6 +51,7 @@
51#include "xfs_refcache.h" 51#include "xfs_refcache.h"
52#include "xfs_trans_space.h" 52#include "xfs_trans_space.h"
53#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
54#include "xfs_filestream.h"
54 55
55STATIC int 56STATIC int
56xfs_open( 57xfs_open(
@@ -77,36 +78,6 @@ xfs_open(
77 return 0; 78 return 0;
78} 79}
79 80
80STATIC int
81xfs_close(
82 bhv_desc_t *bdp,
83 int flags,
84 lastclose_t lastclose,
85 cred_t *credp)
86{
87 bhv_vnode_t *vp = BHV_TO_VNODE(bdp);
88 xfs_inode_t *ip = XFS_BHVTOI(bdp);
89
90 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
91 return XFS_ERROR(EIO);
92
93 if (lastclose != L_TRUE || !VN_ISREG(vp))
94 return 0;
95
96 /*
97 * If we previously truncated this file and removed old data in
98 * the process, we want to initiate "early" writeout on the last
99 * close. This is an attempt to combat the notorious NULL files
100 * problem which is particularly noticable from a truncate down,
101 * buffered (re-)write (delalloc), followed by a crash. What we
102 * are effectively doing here is significantly reducing the time
103 * window where we'd otherwise be exposed to that problem.
104 */
105 if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
106 return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
107 return 0;
108}
109
110/* 81/*
111 * xfs_getattr 82 * xfs_getattr
112 */ 83 */
@@ -183,9 +154,8 @@ xfs_getattr(
183 * realtime extent size or the realtime volume's 154 * realtime extent size or the realtime volume's
184 * extent size. 155 * extent size.
185 */ 156 */
186 vap->va_blocksize = ip->i_d.di_extsize ? 157 vap->va_blocksize =
187 (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : 158 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
188 (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
189 } 159 }
190 break; 160 break;
191 } 161 }
@@ -814,6 +784,8 @@ xfs_setattr(
814 di_flags |= XFS_DIFLAG_PROJINHERIT; 784 di_flags |= XFS_DIFLAG_PROJINHERIT;
815 if (vap->va_xflags & XFS_XFLAG_NODEFRAG) 785 if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
816 di_flags |= XFS_DIFLAG_NODEFRAG; 786 di_flags |= XFS_DIFLAG_NODEFRAG;
787 if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
788 di_flags |= XFS_DIFLAG_FILESTREAM;
817 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 789 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
818 if (vap->va_xflags & XFS_XFLAG_RTINHERIT) 790 if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
819 di_flags |= XFS_DIFLAG_RTINHERIT; 791 di_flags |= XFS_DIFLAG_RTINHERIT;
@@ -1201,13 +1173,15 @@ xfs_fsync(
1201} 1173}
1202 1174
1203/* 1175/*
1204 * This is called by xfs_inactive to free any blocks beyond eof, 1176 * This is called by xfs_inactive to free any blocks beyond eof
1205 * when the link count isn't zero. 1177 * when the link count isn't zero and by xfs_dm_punch_hole() when
1178 * punching a hole to EOF.
1206 */ 1179 */
1207STATIC int 1180int
1208xfs_inactive_free_eofblocks( 1181xfs_free_eofblocks(
1209 xfs_mount_t *mp, 1182 xfs_mount_t *mp,
1210 xfs_inode_t *ip) 1183 xfs_inode_t *ip,
1184 int flags)
1211{ 1185{
1212 xfs_trans_t *tp; 1186 xfs_trans_t *tp;
1213 int error; 1187 int error;
@@ -1216,6 +1190,7 @@ xfs_inactive_free_eofblocks(
1216 xfs_filblks_t map_len; 1190 xfs_filblks_t map_len;
1217 int nimaps; 1191 int nimaps;
1218 xfs_bmbt_irec_t imap; 1192 xfs_bmbt_irec_t imap;
1193 int use_iolock = (flags & XFS_FREE_EOF_LOCK);
1219 1194
1220 /* 1195 /*
1221 * Figure out if there are any blocks beyond the end 1196 * Figure out if there are any blocks beyond the end
@@ -1256,11 +1231,14 @@ xfs_inactive_free_eofblocks(
1256 * cache and we can't 1231 * cache and we can't
1257 * do that within a transaction. 1232 * do that within a transaction.
1258 */ 1233 */
1259 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1234 if (use_iolock)
1235 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1260 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 1236 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
1261 ip->i_size); 1237 ip->i_size);
1262 if (error) { 1238 if (error) {
1263 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1239 xfs_trans_cancel(tp, 0);
1240 if (use_iolock)
1241 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1264 return error; 1242 return error;
1265 } 1243 }
1266 1244
@@ -1297,7 +1275,8 @@ xfs_inactive_free_eofblocks(
1297 error = xfs_trans_commit(tp, 1275 error = xfs_trans_commit(tp,
1298 XFS_TRANS_RELEASE_LOG_RES); 1276 XFS_TRANS_RELEASE_LOG_RES);
1299 } 1277 }
1300 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1278 xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
1279 : XFS_ILOCK_EXCL));
1301 } 1280 }
1302 return error; 1281 return error;
1303} 1282}
@@ -1560,6 +1539,31 @@ xfs_release(
1560 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1539 if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
1561 return 0; 1540 return 0;
1562 1541
1542 if (!XFS_FORCED_SHUTDOWN(mp)) {
1543 /*
1544 * If we are using filestreams, and we have an unlinked
1545 * file that we are processing the last close on, then nothing
1546 * will be able to reopen and write to this file. Purge this
1547 * inode from the filestreams cache so that it doesn't delay
1548 * teardown of the inode.
1549 */
1550 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
1551 xfs_filestream_deassociate(ip);
1552
1553 /*
1554 * If we previously truncated this file and removed old data
1555 * in the process, we want to initiate "early" writeout on
1556 * the last close. This is an attempt to combat the notorious
1557 * NULL files problem which is particularly noticable from a
1558 * truncate down, buffered (re-)write (delalloc), followed by
1559 * a crash. What we are effectively doing here is
1560 * significantly reducing the time window where we'd otherwise
1561 * be exposed to that problem.
1562 */
1563 if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
1564 bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
1565 }
1566
1563#ifdef HAVE_REFCACHE 1567#ifdef HAVE_REFCACHE
1564 /* If we are in the NFS reference cache then don't do this now */ 1568 /* If we are in the NFS reference cache then don't do this now */
1565 if (ip->i_refcache) 1569 if (ip->i_refcache)
@@ -1573,7 +1577,8 @@ xfs_release(
1573 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1577 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1574 (!(ip->i_d.di_flags & 1578 (!(ip->i_d.di_flags &
1575 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1579 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
1576 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1580 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
1581 if (error)
1577 return error; 1582 return error;
1578 /* Update linux inode block count after free above */ 1583 /* Update linux inode block count after free above */
1579 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1584 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
@@ -1654,7 +1659,8 @@ xfs_inactive(
1654 (!(ip->i_d.di_flags & 1659 (!(ip->i_d.di_flags &
1655 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1660 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
1656 (ip->i_delayed_blks != 0)))) { 1661 (ip->i_delayed_blks != 0)))) {
1657 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1662 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
1663 if (error)
1658 return VN_INACTIVE_CACHE; 1664 return VN_INACTIVE_CACHE;
1659 /* Update linux inode block count after free above */ 1665 /* Update linux inode block count after free above */
1660 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1666 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
@@ -1680,6 +1686,7 @@ xfs_inactive(
1680 1686
1681 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 1687 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
1682 if (error) { 1688 if (error) {
1689 xfs_trans_cancel(tp, 0);
1683 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1690 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1684 return VN_INACTIVE_CACHE; 1691 return VN_INACTIVE_CACHE;
1685 } 1692 }
@@ -2217,9 +2224,9 @@ static inline int
2217xfs_lock_inumorder(int lock_mode, int subclass) 2224xfs_lock_inumorder(int lock_mode, int subclass)
2218{ 2225{
2219 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 2226 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
2220 lock_mode |= (subclass + XFS_IOLOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 2227 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
2221 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 2228 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
2222 lock_mode |= (subclass + XFS_ILOCK_INUMORDER) << XFS_ILOCK_SHIFT; 2229 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
2223 2230
2224 return lock_mode; 2231 return lock_mode;
2225} 2232}
@@ -2546,6 +2553,15 @@ xfs_remove(
2546 */ 2553 */
2547 xfs_refcache_purge_ip(ip); 2554 xfs_refcache_purge_ip(ip);
2548 2555
2556 /*
2557 * If we are using filestreams, kill the stream association.
2558 * If the file is still open it may get a new one but that
2559 * will get killed on last close in xfs_close() so we don't
2560 * have to worry about that.
2561 */
2562 if (link_zero && xfs_inode_is_filestream(ip))
2563 xfs_filestream_deassociate(ip);
2564
2549 vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 2565 vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
2550 2566
2551 /* 2567 /*
@@ -4047,22 +4063,16 @@ xfs_alloc_file_space(
4047 if (XFS_FORCED_SHUTDOWN(mp)) 4063 if (XFS_FORCED_SHUTDOWN(mp))
4048 return XFS_ERROR(EIO); 4064 return XFS_ERROR(EIO);
4049 4065
4050 rt = XFS_IS_REALTIME_INODE(ip);
4051 if (unlikely(rt)) {
4052 if (!(extsz = ip->i_d.di_extsize))
4053 extsz = mp->m_sb.sb_rextsize;
4054 } else {
4055 extsz = ip->i_d.di_extsize;
4056 }
4057
4058 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4066 if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
4059 return error; 4067 return error;
4060 4068
4061 if (len <= 0) 4069 if (len <= 0)
4062 return XFS_ERROR(EINVAL); 4070 return XFS_ERROR(EINVAL);
4063 4071
4072 rt = XFS_IS_REALTIME_INODE(ip);
4073 extsz = xfs_get_extsz_hint(ip);
4074
4064 count = len; 4075 count = len;
4065 error = 0;
4066 imapp = &imaps[0]; 4076 imapp = &imaps[0];
4067 nimaps = 1; 4077 nimaps = 1;
4068 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 4078 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
@@ -4678,11 +4688,7 @@ xfs_change_file_space(
4678bhv_vnodeops_t xfs_vnodeops = { 4688bhv_vnodeops_t xfs_vnodeops = {
4679 BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), 4689 BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
4680 .vop_open = xfs_open, 4690 .vop_open = xfs_open,
4681 .vop_close = xfs_close,
4682 .vop_read = xfs_read, 4691 .vop_read = xfs_read,
4683#ifdef HAVE_SENDFILE
4684 .vop_sendfile = xfs_sendfile,
4685#endif
4686#ifdef HAVE_SPLICE 4692#ifdef HAVE_SPLICE
4687 .vop_splice_read = xfs_splice_read, 4693 .vop_splice_read = xfs_splice_read,
4688 .vop_splice_write = xfs_splice_write, 4694 .vop_splice_write = xfs_splice_write,