aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/9p.h375
-rw-r--r--fs/9p/Makefile6
-rw-r--r--fs/9p/conv.c845
-rw-r--r--fs/9p/conv.h50
-rw-r--r--fs/9p/debug.h77
-rw-r--r--fs/9p/error.c93
-rw-r--r--fs/9p/error.h177
-rw-r--r--fs/9p/fcall.c427
-rw-r--r--fs/9p/fcprint.c345
-rw-r--r--fs/9p/fid.c168
-rw-r--r--fs/9p/fid.h43
-rw-r--r--fs/9p/mux.c1033
-rw-r--r--fs/9p/mux.h55
-rw-r--r--fs/9p/trans_fd.c308
-rw-r--r--fs/9p/transport.h45
-rw-r--r--fs/9p/v9fs.c293
-rw-r--r--fs/9p/v9fs.h32
-rw-r--r--fs/9p/v9fs_vfs.h6
-rw-r--r--fs/9p/vfs_addr.c57
-rw-r--r--fs/9p/vfs_dentry.c37
-rw-r--r--fs/9p/vfs_dir.c155
-rw-r--r--fs/9p/vfs_file.c166
-rw-r--r--fs/9p/vfs_inode.c754
-rw-r--r--fs/9p/vfs_super.c93
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/afs/Makefile1
-rw-r--r--fs/afs/afs.h8
-rw-r--r--fs/afs/afs_fs.h3
-rw-r--r--fs/afs/callback.c3
-rw-r--r--fs/afs/dir.c1
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/flock.c558
-rw-r--r--fs/afs/fsclient.c155
-rw-r--r--fs/afs/internal.h30
-rw-r--r--fs/afs/main.c1
-rw-r--r--fs/afs/misc.c1
-rw-r--r--fs/afs/proc.c81
-rw-r--r--fs/afs/super.c3
-rw-r--r--fs/afs/vnode.c132
-rw-r--r--fs/anon_inodes.c10
-rw-r--r--fs/binfmt_elf.c109
-rw-r--r--fs/block_dev.c63
-rw-r--r--fs/buffer.c5
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/compat_ioctl.c4
-rw-r--r--fs/configfs/configfs_internal.h7
-rw-r--r--fs/configfs/dir.c289
-rw-r--r--fs/configfs/file.c28
-rw-r--r--fs/configfs/item.c29
-rw-r--r--fs/debugfs/inode.c63
-rw-r--r--fs/dlm/config.c20
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/ext2/file.c6
-rw-r--r--fs/ext2/super.c20
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/namei.c10
-rw-r--r--fs/ext3/super.c51
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/namei.c10
-rw-r--r--fs/ext4/super.c48
-rw-r--r--fs/fat/dir.c31
-rw-r--r--fs/fat/fatent.c7
-rw-r--r--fs/fat/inode.c3
-rw-r--r--fs/freevxfs/vxfs_dir.h2
-rw-r--r--fs/gfs2/eaops.c1
-rw-r--r--fs/hfsplus/btree.c4
-rw-r--r--fs/hfsplus/dir.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/inode.c5
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hfsplus/unicode.c230
-rw-r--r--fs/hugetlbfs/inode.c96
-rw-r--r--fs/ioctl.c22
-rw-r--r--fs/isofs/dir.c87
-rw-r--r--fs/isofs/inode.c417
-rw-r--r--fs/isofs/joliet.c10
-rw-r--r--fs/isofs/namei.c26
-rw-r--r--fs/jbd/commit.c3
-rw-r--r--fs/jbd/revoke.c5
-rw-r--r--fs/jbd2/commit.c3
-rw-r--r--fs/jbd2/revoke.c5
-rw-r--r--fs/lockd/host.c39
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svc.c6
-rw-r--r--fs/namespace.c23
-rw-r--r--fs/ncpfs/file.c2
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/client.c82
-rw-r--r--fs/nfs/delegation.c186
-rw-r--r--fs/nfs/delegation.h26
-rw-r--r--fs/nfs/dir.c16
-rw-r--r--fs/nfs/direct.c34
-rw-r--r--fs/nfs/inode.c73
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/mount_clnt.c169
-rw-r--r--fs/nfs/nfs2xdr.c6
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c8
-rw-r--r--fs/nfs/nfs4_fs.h40
-rw-r--r--fs/nfs/nfs4proc.c760
-rw-r--r--fs/nfs/nfs4state.c310
-rw-r--r--fs/nfs/nfs4xdr.c126
-rw-r--r--fs/nfs/nfsroot.c5
-rw-r--r--fs/nfs/pagelist.c60
-rw-r--r--fs/nfs/read.c40
-rw-r--r--fs/nfs/super.c1189
-rw-r--r--fs/nfs/write.c149
-rw-r--r--fs/nfsd/nfs4callback.c18
-rw-r--r--fs/nfsd/nfs4state.c1
-rw-r--r--fs/nfsd/vfs.c1
-rw-r--r--fs/nls/Makefile2
-rw-r--r--fs/ocfs2/alloc.c2676
-rw-r--r--fs/ocfs2/alloc.h43
-rw-r--r--fs/ocfs2/aops.c1015
-rw-r--r--fs/ocfs2/aops.h61
-rw-r--r--fs/ocfs2/cluster/heartbeat.c96
-rw-r--r--fs/ocfs2/cluster/heartbeat.h6
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/nodemanager.c42
-rw-r--r--fs/ocfs2/cluster/nodemanager.h5
-rw-r--r--fs/ocfs2/cluster/tcp.c21
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c8
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c40
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c79
-rw-r--r--fs/ocfs2/dlmglue.c6
-rw-r--r--fs/ocfs2/endian.h5
-rw-r--r--fs/ocfs2/extent_map.c41
-rw-r--r--fs/ocfs2/file.c702
-rw-r--r--fs/ocfs2/file.h10
-rw-r--r--fs/ocfs2/heartbeat.c10
-rw-r--r--fs/ocfs2/ioctl.c15
-rw-r--r--fs/ocfs2/journal.c6
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/mmap.c167
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/ocfs2.h14
-rw-r--r--fs/ocfs2/ocfs2_fs.h33
-rw-r--r--fs/ocfs2/slot_map.c12
-rw-r--r--fs/ocfs2/suballoc.c46
-rw-r--r--fs/ocfs2/suballoc.h17
-rw-r--r--fs/ocfs2/super.c27
-rw-r--r--fs/ocfs2/super.h2
-rw-r--r--fs/open.c14
-rw-r--r--fs/partitions/acorn.c9
-rw-r--r--fs/partitions/check.c1
-rw-r--r--fs/partitions/ldm.c137
-rw-r--r--fs/partitions/ldm.h2
-rw-r--r--fs/proc/array.c68
-rw-r--r--fs/proc/base.c83
-rw-r--r--fs/proc/generic.c52
-rw-r--r--fs/proc/inode.c254
-rw-r--r--fs/proc/proc_misc.c7
-rw-r--r--fs/proc/proc_tty.c15
-rw-r--r--fs/quota.c118
-rw-r--r--fs/reiserfs/file.c1
-rw-r--r--fs/seq_file.c18
-rw-r--r--fs/splice.c45
-rw-r--r--fs/super.c1
-rw-r--r--fs/sysfs/bin.c195
-rw-r--r--fs/sysfs/dir.c1297
-rw-r--r--fs/sysfs/file.c379
-rw-r--r--fs/sysfs/group.c55
-rw-r--r--fs/sysfs/inode.c221
-rw-r--r--fs/sysfs/mount.c36
-rw-r--r--fs/sysfs/symlink.c150
-rw-r--r--fs/sysfs/sysfs.h169
-rw-r--r--fs/udf/crc.c4
-rw-r--r--fs/udf/ialloc.c9
-rw-r--r--fs/udf/inode.c51
-rw-r--r--fs/ufs/super.c5
-rw-r--r--fs/utimes.c2
-rw-r--r--fs/xfs/Makefile-linux-2.62
-rw-r--r--fs/xfs/linux-2.6/kmem.h19
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c43
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c59
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c321
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h15
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h5
-rw-r--r--fs/xfs/quota/xfs_qm.c9
-rw-r--r--fs/xfs/xfs.h1
-rw-r--r--fs/xfs/xfs_ag.h9
-rw-r--r--fs/xfs/xfs_alloc.c101
-rw-r--r--fs/xfs/xfs_alloc.h6
-rw-r--r--fs/xfs/xfs_alloc_btree.c20
-rw-r--r--fs/xfs/xfs_bit.c91
-rw-r--r--fs/xfs/xfs_bit.h4
-rw-r--r--fs/xfs/xfs_bmap.c369
-rw-r--r--fs/xfs/xfs_bmap.h6
-rw-r--r--fs/xfs/xfs_bmap_btree.c88
-rw-r--r--fs/xfs/xfs_btree.h32
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_clnt.h2
-rw-r--r--fs/xfs/xfs_dinode.h4
-rw-r--r--fs/xfs/xfs_dir2.c12
-rw-r--r--fs/xfs/xfs_dir2_block.c98
-rw-r--r--fs/xfs/xfs_dir2_block.h2
-rw-r--r--fs/xfs/xfs_dir2_data.c54
-rw-r--r--fs/xfs/xfs_dir2_data.h12
-rw-r--r--fs/xfs/xfs_dir2_leaf.c106
-rw-r--r--fs/xfs/xfs_dir2_leaf.h29
-rw-r--r--fs/xfs/xfs_dir2_node.c66
-rw-r--r--fs/xfs/xfs_dir2_node.h4
-rw-r--r--fs/xfs/xfs_dir2_sf.c204
-rw-r--r--fs/xfs/xfs_dir2_sf.h20
-rw-r--r--fs/xfs/xfs_filestream.c771
-rw-r--r--fs/xfs/xfs_filestream.h136
-rw-r--r--fs/xfs/xfs_fs.h2
-rw-r--r--fs/xfs/xfs_fsops.c17
-rw-r--r--fs/xfs/xfs_ialloc.c28
-rw-r--r--fs/xfs/xfs_ialloc.h10
-rw-r--r--fs/xfs/xfs_inode.c39
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_iomap.c41
-rw-r--r--fs/xfs/xfs_itable.c42
-rw-r--r--fs/xfs/xfs_itable.h20
-rw-r--r--fs/xfs/xfs_log.c41
-rw-r--r--fs/xfs/xfs_log_recover.c8
-rw-r--r--fs/xfs/xfs_mount.c237
-rw-r--r--fs/xfs/xfs_mount.h15
-rw-r--r--fs/xfs/xfs_mru_cache.c608
-rw-r--r--fs/xfs/xfs_mru_cache.h57
-rw-r--r--fs/xfs/xfs_rtalloc.c4
-rw-r--r--fs/xfs/xfs_rw.h36
-rw-r--r--fs/xfs/xfs_sb.h16
-rw-r--r--fs/xfs/xfs_trans.c125
-rw-r--r--fs/xfs/xfs_trans.h3
-rw-r--r--fs/xfs/xfs_vfsops.c159
-rw-r--r--fs/xfs/xfs_vnodeops.c119
238 files changed, 14591 insertions, 10178 deletions
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
deleted file mode 100644
index 94e2f92ab2e..00000000000
--- a/fs/9p/9p.h
+++ /dev/null
@@ -1,375 +0,0 @@
1/*
2 * linux/fs/9p/9p.h
3 *
4 * 9P protocol definitions.
5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27/* Message Types */
28enum {
29 TVERSION = 100,
30 RVERSION,
31 TAUTH = 102,
32 RAUTH,
33 TATTACH = 104,
34 RATTACH,
35 TERROR = 106,
36 RERROR,
37 TFLUSH = 108,
38 RFLUSH,
39 TWALK = 110,
40 RWALK,
41 TOPEN = 112,
42 ROPEN,
43 TCREATE = 114,
44 RCREATE,
45 TREAD = 116,
46 RREAD,
47 TWRITE = 118,
48 RWRITE,
49 TCLUNK = 120,
50 RCLUNK,
51 TREMOVE = 122,
52 RREMOVE,
53 TSTAT = 124,
54 RSTAT,
55 TWSTAT = 126,
56 RWSTAT,
57};
58
59/* modes */
60enum {
61 V9FS_OREAD = 0x00,
62 V9FS_OWRITE = 0x01,
63 V9FS_ORDWR = 0x02,
64 V9FS_OEXEC = 0x03,
65 V9FS_OEXCL = 0x04,
66 V9FS_OTRUNC = 0x10,
67 V9FS_OREXEC = 0x20,
68 V9FS_ORCLOSE = 0x40,
69 V9FS_OAPPEND = 0x80,
70};
71
72/* permissions */
73enum {
74 V9FS_DMDIR = 0x80000000,
75 V9FS_DMAPPEND = 0x40000000,
76 V9FS_DMEXCL = 0x20000000,
77 V9FS_DMMOUNT = 0x10000000,
78 V9FS_DMAUTH = 0x08000000,
79 V9FS_DMTMP = 0x04000000,
80 V9FS_DMSYMLINK = 0x02000000,
81 V9FS_DMLINK = 0x01000000,
82 /* 9P2000.u extensions */
83 V9FS_DMDEVICE = 0x00800000,
84 V9FS_DMNAMEDPIPE = 0x00200000,
85 V9FS_DMSOCKET = 0x00100000,
86 V9FS_DMSETUID = 0x00080000,
87 V9FS_DMSETGID = 0x00040000,
88};
89
90/* qid.types */
91enum {
92 V9FS_QTDIR = 0x80,
93 V9FS_QTAPPEND = 0x40,
94 V9FS_QTEXCL = 0x20,
95 V9FS_QTMOUNT = 0x10,
96 V9FS_QTAUTH = 0x08,
97 V9FS_QTTMP = 0x04,
98 V9FS_QTSYMLINK = 0x02,
99 V9FS_QTLINK = 0x01,
100 V9FS_QTFILE = 0x00,
101};
102
103#define V9FS_NOTAG (u16)(~0)
104#define V9FS_NOFID (u32)(~0)
105#define V9FS_MAXWELEM 16
106
107/* ample room for Twrite/Rread header (iounit) */
108#define V9FS_IOHDRSZ 24
109
110struct v9fs_str {
111 u16 len;
112 char *str;
113};
114
115/* qids are the unique ID for a file (like an inode */
116struct v9fs_qid {
117 u8 type;
118 u32 version;
119 u64 path;
120};
121
122/* Plan 9 file metadata (stat) structure */
123struct v9fs_stat {
124 u16 size;
125 u16 type;
126 u32 dev;
127 struct v9fs_qid qid;
128 u32 mode;
129 u32 atime;
130 u32 mtime;
131 u64 length;
132 struct v9fs_str name;
133 struct v9fs_str uid;
134 struct v9fs_str gid;
135 struct v9fs_str muid;
136 struct v9fs_str extension; /* 9p2000.u extensions */
137 u32 n_uid; /* 9p2000.u extensions */
138 u32 n_gid; /* 9p2000.u extensions */
139 u32 n_muid; /* 9p2000.u extensions */
140};
141
142/* file metadata (stat) structure used to create Twstat message
143 The is similar to v9fs_stat, but the strings don't point to
144 the same memory block and should be freed separately
145*/
146struct v9fs_wstat {
147 u16 size;
148 u16 type;
149 u32 dev;
150 struct v9fs_qid qid;
151 u32 mode;
152 u32 atime;
153 u32 mtime;
154 u64 length;
155 char *name;
156 char *uid;
157 char *gid;
158 char *muid;
159 char *extension; /* 9p2000.u extensions */
160 u32 n_uid; /* 9p2000.u extensions */
161 u32 n_gid; /* 9p2000.u extensions */
162 u32 n_muid; /* 9p2000.u extensions */
163};
164
165/* Structures for Protocol Operations */
166
167struct Tversion {
168 u32 msize;
169 struct v9fs_str version;
170};
171
172struct Rversion {
173 u32 msize;
174 struct v9fs_str version;
175};
176
177struct Tauth {
178 u32 afid;
179 struct v9fs_str uname;
180 struct v9fs_str aname;
181};
182
183struct Rauth {
184 struct v9fs_qid qid;
185};
186
187struct Rerror {
188 struct v9fs_str error;
189 u32 errno; /* 9p2000.u extension */
190};
191
192struct Tflush {
193 u16 oldtag;
194};
195
196struct Rflush {
197};
198
199struct Tattach {
200 u32 fid;
201 u32 afid;
202 struct v9fs_str uname;
203 struct v9fs_str aname;
204};
205
206struct Rattach {
207 struct v9fs_qid qid;
208};
209
210struct Twalk {
211 u32 fid;
212 u32 newfid;
213 u16 nwname;
214 struct v9fs_str wnames[16];
215};
216
217struct Rwalk {
218 u16 nwqid;
219 struct v9fs_qid wqids[16];
220};
221
222struct Topen {
223 u32 fid;
224 u8 mode;
225};
226
227struct Ropen {
228 struct v9fs_qid qid;
229 u32 iounit;
230};
231
232struct Tcreate {
233 u32 fid;
234 struct v9fs_str name;
235 u32 perm;
236 u8 mode;
237 struct v9fs_str extension;
238};
239
240struct Rcreate {
241 struct v9fs_qid qid;
242 u32 iounit;
243};
244
245struct Tread {
246 u32 fid;
247 u64 offset;
248 u32 count;
249};
250
251struct Rread {
252 u32 count;
253 u8 *data;
254};
255
256struct Twrite {
257 u32 fid;
258 u64 offset;
259 u32 count;
260 u8 *data;
261};
262
263struct Rwrite {
264 u32 count;
265};
266
267struct Tclunk {
268 u32 fid;
269};
270
271struct Rclunk {
272};
273
274struct Tremove {
275 u32 fid;
276};
277
278struct Rremove {
279};
280
281struct Tstat {
282 u32 fid;
283};
284
285struct Rstat {
286 struct v9fs_stat stat;
287};
288
289struct Twstat {
290 u32 fid;
291 struct v9fs_stat stat;
292};
293
294struct Rwstat {
295};
296
297/*
298 * fcall is the primary packet structure
299 *
300 */
301
302struct v9fs_fcall {
303 u32 size;
304 u8 id;
305 u16 tag;
306 void *sdata;
307
308 union {
309 struct Tversion tversion;
310 struct Rversion rversion;
311 struct Tauth tauth;
312 struct Rauth rauth;
313 struct Rerror rerror;
314 struct Tflush tflush;
315 struct Rflush rflush;
316 struct Tattach tattach;
317 struct Rattach rattach;
318 struct Twalk twalk;
319 struct Rwalk rwalk;
320 struct Topen topen;
321 struct Ropen ropen;
322 struct Tcreate tcreate;
323 struct Rcreate rcreate;
324 struct Tread tread;
325 struct Rread rread;
326 struct Twrite twrite;
327 struct Rwrite rwrite;
328 struct Tclunk tclunk;
329 struct Rclunk rclunk;
330 struct Tremove tremove;
331 struct Rremove rremove;
332 struct Tstat tstat;
333 struct Rstat rstat;
334 struct Twstat twstat;
335 struct Rwstat rwstat;
336 } params;
337};
338
339#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \
340 fcall?fcall->params.rerror.error.len:0, \
341 fcall?fcall->params.rerror.error.str:"");
342
343int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
344 char *version, struct v9fs_fcall **rcall);
345
346int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
347 u32 fid, u32 afid, struct v9fs_fcall **rcall);
348
349int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
350
351int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
352 struct v9fs_fcall **rcall);
353
354int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
355 struct v9fs_wstat *wstat, struct v9fs_fcall **rcall);
356
357int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
358 char *name, struct v9fs_fcall **rcall);
359
360int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
361 struct v9fs_fcall **rcall);
362
363int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
364 struct v9fs_fcall **rcall);
365
366int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
367 u32 perm, u8 mode, char *extension, struct v9fs_fcall **rcall);
368
369int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
370 u64 offset, u32 count, struct v9fs_fcall **rcall);
371
372int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
373 u32 count, const char __user * data,
374 struct v9fs_fcall **rcall);
375int v9fs_printfcall(char *, int, struct v9fs_fcall *, int);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 87897f84dfb..bc7f0d1551e 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,18 +1,12 @@
1obj-$(CONFIG_9P_FS) := 9p.o 1obj-$(CONFIG_9P_FS) := 9p.o
2 2
39p-objs := \ 39p-objs := \
4 trans_fd.o \
5 mux.o \
6 fcall.o \
7 conv.o \
8 vfs_super.o \ 4 vfs_super.o \
9 vfs_inode.o \ 5 vfs_inode.o \
10 vfs_addr.o \ 6 vfs_addr.o \
11 vfs_file.o \ 7 vfs_file.o \
12 vfs_dir.o \ 8 vfs_dir.o \
13 vfs_dentry.o \ 9 vfs_dentry.o \
14 error.o \
15 v9fs.o \ 10 v9fs.o \
16 fid.o \ 11 fid.o \
17 fcprint.o
18 12
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
deleted file mode 100644
index a3ed571eee3..00000000000
--- a/fs/9p/conv.c
+++ /dev/null
@@ -1,845 +0,0 @@
1/*
2 * linux/fs/9p/conv.c
3 *
4 * 9P protocol conversion functions
5 *
6 * Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/sched.h>
31#include <linux/idr.h>
32#include <asm/uaccess.h>
33#include "debug.h"
34#include "v9fs.h"
35#include "9p.h"
36#include "conv.h"
37
38/*
39 * Buffer to help with string parsing
40 */
41struct cbuf {
42 unsigned char *sp;
43 unsigned char *p;
44 unsigned char *ep;
45};
46
47static inline void buf_init(struct cbuf *buf, void *data, int datalen)
48{
49 buf->sp = buf->p = data;
50 buf->ep = data + datalen;
51}
52
53static inline int buf_check_overflow(struct cbuf *buf)
54{
55 return buf->p > buf->ep;
56}
57
58static int buf_check_size(struct cbuf *buf, int len)
59{
60 if (buf->p + len > buf->ep) {
61 if (buf->p < buf->ep) {
62 eprintk(KERN_ERR, "buffer overflow: want %d has %d\n",
63 len, (int)(buf->ep - buf->p));
64 dump_stack();
65 buf->p = buf->ep + 1;
66 }
67
68 return 0;
69 }
70
71 return 1;
72}
73
74static void *buf_alloc(struct cbuf *buf, int len)
75{
76 void *ret = NULL;
77
78 if (buf_check_size(buf, len)) {
79 ret = buf->p;
80 buf->p += len;
81 }
82
83 return ret;
84}
85
86static void buf_put_int8(struct cbuf *buf, u8 val)
87{
88 if (buf_check_size(buf, 1)) {
89 buf->p[0] = val;
90 buf->p++;
91 }
92}
93
94static void buf_put_int16(struct cbuf *buf, u16 val)
95{
96 if (buf_check_size(buf, 2)) {
97 *(__le16 *) buf->p = cpu_to_le16(val);
98 buf->p += 2;
99 }
100}
101
102static void buf_put_int32(struct cbuf *buf, u32 val)
103{
104 if (buf_check_size(buf, 4)) {
105 *(__le32 *)buf->p = cpu_to_le32(val);
106 buf->p += 4;
107 }
108}
109
110static void buf_put_int64(struct cbuf *buf, u64 val)
111{
112 if (buf_check_size(buf, 8)) {
113 *(__le64 *)buf->p = cpu_to_le64(val);
114 buf->p += 8;
115 }
116}
117
118static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
119{
120 char *ret;
121
122 ret = NULL;
123 if (buf_check_size(buf, slen + 2)) {
124 buf_put_int16(buf, slen);
125 ret = buf->p;
126 memcpy(buf->p, s, slen);
127 buf->p += slen;
128 }
129
130 return ret;
131}
132
133static inline void buf_put_string(struct cbuf *buf, const char *s)
134{
135 buf_put_stringn(buf, s, strlen(s));
136}
137
138static u8 buf_get_int8(struct cbuf *buf)
139{
140 u8 ret = 0;
141
142 if (buf_check_size(buf, 1)) {
143 ret = buf->p[0];
144 buf->p++;
145 }
146
147 return ret;
148}
149
150static u16 buf_get_int16(struct cbuf *buf)
151{
152 u16 ret = 0;
153
154 if (buf_check_size(buf, 2)) {
155 ret = le16_to_cpu(*(__le16 *)buf->p);
156 buf->p += 2;
157 }
158
159 return ret;
160}
161
162static u32 buf_get_int32(struct cbuf *buf)
163{
164 u32 ret = 0;
165
166 if (buf_check_size(buf, 4)) {
167 ret = le32_to_cpu(*(__le32 *)buf->p);
168 buf->p += 4;
169 }
170
171 return ret;
172}
173
174static u64 buf_get_int64(struct cbuf *buf)
175{
176 u64 ret = 0;
177
178 if (buf_check_size(buf, 8)) {
179 ret = le64_to_cpu(*(__le64 *)buf->p);
180 buf->p += 8;
181 }
182
183 return ret;
184}
185
186static void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr)
187{
188 vstr->len = buf_get_int16(buf);
189 if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
190 vstr->str = buf->p;
191 buf->p += vstr->len;
192 } else {
193 vstr->len = 0;
194 vstr->str = NULL;
195 }
196}
197
198static void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid)
199{
200 qid->type = buf_get_int8(bufp);
201 qid->version = buf_get_int32(bufp);
202 qid->path = buf_get_int64(bufp);
203}
204
205/**
206 * v9fs_size_wstat - calculate the size of a variable length stat struct
207 * @stat: metadata (stat) structure
208 * @extended: non-zero if 9P2000.u
209 *
210 */
211
212static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended)
213{
214 int size = 0;
215
216 if (wstat == NULL) {
217 eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
218 return 0;
219 }
220
221 size = /* 2 + *//* size[2] */
222 2 + /* type[2] */
223 4 + /* dev[4] */
224 1 + /* qid.type[1] */
225 4 + /* qid.vers[4] */
226 8 + /* qid.path[8] */
227 4 + /* mode[4] */
228 4 + /* atime[4] */
229 4 + /* mtime[4] */
230 8 + /* length[8] */
231 8; /* minimum sum of string lengths */
232
233 if (wstat->name)
234 size += strlen(wstat->name);
235 if (wstat->uid)
236 size += strlen(wstat->uid);
237 if (wstat->gid)
238 size += strlen(wstat->gid);
239 if (wstat->muid)
240 size += strlen(wstat->muid);
241
242 if (extended) {
243 size += 4 + /* n_uid[4] */
244 4 + /* n_gid[4] */
245 4 + /* n_muid[4] */
246 2; /* string length of extension[4] */
247 if (wstat->extension)
248 size += strlen(wstat->extension);
249 }
250
251 return size;
252}
253
254/**
255 * buf_get_stat - safely decode a recieved metadata (stat) structure
256 * @bufp: buffer to deserialize
257 * @stat: metadata (stat) structure
258 * @extended: non-zero if 9P2000.u
259 *
260 */
261
262static void
263buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended)
264{
265 stat->size = buf_get_int16(bufp);
266 stat->type = buf_get_int16(bufp);
267 stat->dev = buf_get_int32(bufp);
268 stat->qid.type = buf_get_int8(bufp);
269 stat->qid.version = buf_get_int32(bufp);
270 stat->qid.path = buf_get_int64(bufp);
271 stat->mode = buf_get_int32(bufp);
272 stat->atime = buf_get_int32(bufp);
273 stat->mtime = buf_get_int32(bufp);
274 stat->length = buf_get_int64(bufp);
275 buf_get_str(bufp, &stat->name);
276 buf_get_str(bufp, &stat->uid);
277 buf_get_str(bufp, &stat->gid);
278 buf_get_str(bufp, &stat->muid);
279
280 if (extended) {
281 buf_get_str(bufp, &stat->extension);
282 stat->n_uid = buf_get_int32(bufp);
283 stat->n_gid = buf_get_int32(bufp);
284 stat->n_muid = buf_get_int32(bufp);
285 }
286}
287
288/**
289 * v9fs_deserialize_stat - decode a received metadata structure
290 * @buf: buffer to deserialize
291 * @buflen: length of received buffer
292 * @stat: metadata structure to decode into
293 * @extended: non-zero if 9P2000.u
294 *
295 * Note: stat will point to the buf region.
296 */
297
298int
299v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
300 int extended)
301{
302 struct cbuf buffer;
303 struct cbuf *bufp = &buffer;
304 unsigned char *p;
305
306 buf_init(bufp, buf, buflen);
307 p = bufp->p;
308 buf_get_stat(bufp, stat, extended);
309
310 if (buf_check_overflow(bufp))
311 return 0;
312 else
313 return bufp->p - p;
314}
315
316/**
317 * deserialize_fcall - unmarshal a response
318 * @buf: recieved buffer
319 * @buflen: length of received buffer
320 * @rcall: fcall structure to populate
321 * @rcalllen: length of fcall structure to populate
322 * @extended: non-zero if 9P2000.u
323 *
324 */
325
326int
327v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
328 int extended)
329{
330
331 struct cbuf buffer;
332 struct cbuf *bufp = &buffer;
333 int i = 0;
334
335 buf_init(bufp, buf, buflen);
336
337 rcall->size = buf_get_int32(bufp);
338 rcall->id = buf_get_int8(bufp);
339 rcall->tag = buf_get_int16(bufp);
340
341 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
342 rcall->tag);
343
344 switch (rcall->id) {
345 default:
346 eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
347 return -EPROTO;
348 case RVERSION:
349 rcall->params.rversion.msize = buf_get_int32(bufp);
350 buf_get_str(bufp, &rcall->params.rversion.version);
351 break;
352 case RFLUSH:
353 break;
354 case RATTACH:
355 rcall->params.rattach.qid.type = buf_get_int8(bufp);
356 rcall->params.rattach.qid.version = buf_get_int32(bufp);
357 rcall->params.rattach.qid.path = buf_get_int64(bufp);
358 break;
359 case RWALK:
360 rcall->params.rwalk.nwqid = buf_get_int16(bufp);
361 if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) {
362 eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n",
363 V9FS_MAXWELEM, rcall->params.rwalk.nwqid);
364 return -EPROTO;
365 }
366
367 for (i = 0; i < rcall->params.rwalk.nwqid; i++)
368 buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
369 break;
370 case ROPEN:
371 buf_get_qid(bufp, &rcall->params.ropen.qid);
372 rcall->params.ropen.iounit = buf_get_int32(bufp);
373 break;
374 case RCREATE:
375 buf_get_qid(bufp, &rcall->params.rcreate.qid);
376 rcall->params.rcreate.iounit = buf_get_int32(bufp);
377 break;
378 case RREAD:
379 rcall->params.rread.count = buf_get_int32(bufp);
380 rcall->params.rread.data = bufp->p;
381 buf_check_size(bufp, rcall->params.rread.count);
382 break;
383 case RWRITE:
384 rcall->params.rwrite.count = buf_get_int32(bufp);
385 break;
386 case RCLUNK:
387 break;
388 case RREMOVE:
389 break;
390 case RSTAT:
391 buf_get_int16(bufp);
392 buf_get_stat(bufp, &rcall->params.rstat.stat, extended);
393 break;
394 case RWSTAT:
395 break;
396 case RERROR:
397 buf_get_str(bufp, &rcall->params.rerror.error);
398 if (extended)
399 rcall->params.rerror.errno = buf_get_int16(bufp);
400 break;
401 }
402
403 if (buf_check_overflow(bufp)) {
404 dprintk(DEBUG_ERROR, "buffer overflow\n");
405 return -EIO;
406 }
407
408 return bufp->p - bufp->sp;
409}
410
411static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p)
412{
413 *p = val;
414 buf_put_int8(bufp, val);
415}
416
417static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p)
418{
419 *p = val;
420 buf_put_int16(bufp, val);
421}
422
423static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p)
424{
425 *p = val;
426 buf_put_int32(bufp, val);
427}
428
429static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p)
430{
431 *p = val;
432 buf_put_int64(bufp, val);
433}
434
435static void
436v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str)
437{
438 int len;
439 char *s;
440
441 if (data)
442 len = strlen(data);
443 else
444 len = 0;
445
446 s = buf_put_stringn(bufp, data, len);
447 if (str) {
448 str->len = len;
449 str->str = s;
450 }
451}
452
453static int
454v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count,
455 unsigned char **pdata)
456{
457 *pdata = buf_alloc(bufp, count);
458 return copy_from_user(*pdata, data, count);
459}
460
461static void
462v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat,
463 struct v9fs_stat *stat, int statsz, int extended)
464{
465 v9fs_put_int16(bufp, statsz, &stat->size);
466 v9fs_put_int16(bufp, wstat->type, &stat->type);
467 v9fs_put_int32(bufp, wstat->dev, &stat->dev);
468 v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type);
469 v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version);
470 v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path);
471 v9fs_put_int32(bufp, wstat->mode, &stat->mode);
472 v9fs_put_int32(bufp, wstat->atime, &stat->atime);
473 v9fs_put_int32(bufp, wstat->mtime, &stat->mtime);
474 v9fs_put_int64(bufp, wstat->length, &stat->length);
475
476 v9fs_put_str(bufp, wstat->name, &stat->name);
477 v9fs_put_str(bufp, wstat->uid, &stat->uid);
478 v9fs_put_str(bufp, wstat->gid, &stat->gid);
479 v9fs_put_str(bufp, wstat->muid, &stat->muid);
480
481 if (extended) {
482 v9fs_put_str(bufp, wstat->extension, &stat->extension);
483 v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid);
484 v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid);
485 v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid);
486 }
487}
488
489static struct v9fs_fcall *
490v9fs_create_common(struct cbuf *bufp, u32 size, u8 id)
491{
492 struct v9fs_fcall *fc;
493
494 size += 4 + 1 + 2; /* size[4] id[1] tag[2] */
495 fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL);
496 if (!fc)
497 return ERR_PTR(-ENOMEM);
498
499 fc->sdata = (char *)fc + sizeof(*fc);
500
501 buf_init(bufp, (char *)fc->sdata, size);
502 v9fs_put_int32(bufp, size, &fc->size);
503 v9fs_put_int8(bufp, id, &fc->id);
504 v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag);
505
506 return fc;
507}
508
509void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag)
510{
511 fc->tag = tag;
512 *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
513}
514
515struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
516{
517 int size;
518 struct v9fs_fcall *fc;
519 struct cbuf buffer;
520 struct cbuf *bufp = &buffer;
521
522 size = 4 + 2 + strlen(version); /* msize[4] version[s] */
523 fc = v9fs_create_common(bufp, size, TVERSION);
524 if (IS_ERR(fc))
525 goto error;
526
527 v9fs_put_int32(bufp, msize, &fc->params.tversion.msize);
528 v9fs_put_str(bufp, version, &fc->params.tversion.version);
529
530 if (buf_check_overflow(bufp)) {
531 kfree(fc);
532 fc = ERR_PTR(-ENOMEM);
533 }
534 error:
535 return fc;
536}
537
538#if 0
539struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
540{
541 int size;
542 struct v9fs_fcall *fc;
543 struct cbuf buffer;
544 struct cbuf *bufp = &buffer;
545
546 size = 4 + 2 + strlen(uname) + 2 + strlen(aname); /* afid[4] uname[s] aname[s] */
547 fc = v9fs_create_common(bufp, size, TAUTH);
548 if (IS_ERR(fc))
549 goto error;
550
551 v9fs_put_int32(bufp, afid, &fc->params.tauth.afid);
552 v9fs_put_str(bufp, uname, &fc->params.tauth.uname);
553 v9fs_put_str(bufp, aname, &fc->params.tauth.aname);
554
555 if (buf_check_overflow(bufp)) {
556 kfree(fc);
557 fc = ERR_PTR(-ENOMEM);
558 }
559 error:
560 return fc;
561}
562#endif /* 0 */
563
564struct v9fs_fcall *
565v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
566{
567 int size;
568 struct v9fs_fcall *fc;
569 struct cbuf buffer;
570 struct cbuf *bufp = &buffer;
571
572 size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); /* fid[4] afid[4] uname[s] aname[s] */
573 fc = v9fs_create_common(bufp, size, TATTACH);
574 if (IS_ERR(fc))
575 goto error;
576
577 v9fs_put_int32(bufp, fid, &fc->params.tattach.fid);
578 v9fs_put_int32(bufp, afid, &fc->params.tattach.afid);
579 v9fs_put_str(bufp, uname, &fc->params.tattach.uname);
580 v9fs_put_str(bufp, aname, &fc->params.tattach.aname);
581
582 error:
583 return fc;
584}
585
586struct v9fs_fcall *v9fs_create_tflush(u16 oldtag)
587{
588 int size;
589 struct v9fs_fcall *fc;
590 struct cbuf buffer;
591 struct cbuf *bufp = &buffer;
592
593 size = 2; /* oldtag[2] */
594 fc = v9fs_create_common(bufp, size, TFLUSH);
595 if (IS_ERR(fc))
596 goto error;
597
598 v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
599
600 if (buf_check_overflow(bufp)) {
601 kfree(fc);
602 fc = ERR_PTR(-ENOMEM);
603 }
604 error:
605 return fc;
606}
607
608struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
609 char **wnames)
610{
611 int i, size;
612 struct v9fs_fcall *fc;
613 struct cbuf buffer;
614 struct cbuf *bufp = &buffer;
615
616 if (nwname > V9FS_MAXWELEM) {
617 dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM);
618 return NULL;
619 }
620
621 size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */
622 for (i = 0; i < nwname; i++) {
623 size += 2 + strlen(wnames[i]); /* wname[s] */
624 }
625
626 fc = v9fs_create_common(bufp, size, TWALK);
627 if (IS_ERR(fc))
628 goto error;
629
630 v9fs_put_int32(bufp, fid, &fc->params.twalk.fid);
631 v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid);
632 v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname);
633 for (i = 0; i < nwname; i++) {
634 v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
635 }
636
637 if (buf_check_overflow(bufp)) {
638 kfree(fc);
639 fc = ERR_PTR(-ENOMEM);
640 }
641 error:
642 return fc;
643}
644
645struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
646{
647 int size;
648 struct v9fs_fcall *fc;
649 struct cbuf buffer;
650 struct cbuf *bufp = &buffer;
651
652 size = 4 + 1; /* fid[4] mode[1] */
653 fc = v9fs_create_common(bufp, size, TOPEN);
654 if (IS_ERR(fc))
655 goto error;
656
657 v9fs_put_int32(bufp, fid, &fc->params.topen.fid);
658 v9fs_put_int8(bufp, mode, &fc->params.topen.mode);
659
660 if (buf_check_overflow(bufp)) {
661 kfree(fc);
662 fc = ERR_PTR(-ENOMEM);
663 }
664 error:
665 return fc;
666}
667
668struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
669 char *extension, int extended)
670{
671 int size;
672 struct v9fs_fcall *fc;
673 struct cbuf buffer;
674 struct cbuf *bufp = &buffer;
675
676 size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */
677 if (extended) {
678 size += 2 + /* extension[s] */
679 (extension == NULL ? 0 : strlen(extension));
680 }
681
682 fc = v9fs_create_common(bufp, size, TCREATE);
683 if (IS_ERR(fc))
684 goto error;
685
686 v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid);
687 v9fs_put_str(bufp, name, &fc->params.tcreate.name);
688 v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
689 v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
690 if (extended)
691 v9fs_put_str(bufp, extension, &fc->params.tcreate.extension);
692
693 if (buf_check_overflow(bufp)) {
694 kfree(fc);
695 fc = ERR_PTR(-ENOMEM);
696 }
697 error:
698 return fc;
699}
700
701struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count)
702{
703 int size;
704 struct v9fs_fcall *fc;
705 struct cbuf buffer;
706 struct cbuf *bufp = &buffer;
707
708 size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */
709 fc = v9fs_create_common(bufp, size, TREAD);
710 if (IS_ERR(fc))
711 goto error;
712
713 v9fs_put_int32(bufp, fid, &fc->params.tread.fid);
714 v9fs_put_int64(bufp, offset, &fc->params.tread.offset);
715 v9fs_put_int32(bufp, count, &fc->params.tread.count);
716
717 if (buf_check_overflow(bufp)) {
718 kfree(fc);
719 fc = ERR_PTR(-ENOMEM);
720 }
721 error:
722 return fc;
723}
724
725struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
726 const char __user * data)
727{
728 int size, err;
729 struct v9fs_fcall *fc;
730 struct cbuf buffer;
731 struct cbuf *bufp = &buffer;
732
733 size = 4 + 8 + 4 + count; /* fid[4] offset[8] count[4] data[count] */
734 fc = v9fs_create_common(bufp, size, TWRITE);
735 if (IS_ERR(fc))
736 goto error;
737
738 v9fs_put_int32(bufp, fid, &fc->params.twrite.fid);
739 v9fs_put_int64(bufp, offset, &fc->params.twrite.offset);
740 v9fs_put_int32(bufp, count, &fc->params.twrite.count);
741 err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data);
742 if (err) {
743 kfree(fc);
744 fc = ERR_PTR(err);
745 }
746
747 if (buf_check_overflow(bufp)) {
748 kfree(fc);
749 fc = ERR_PTR(-ENOMEM);
750 }
751 error:
752 return fc;
753}
754
755struct v9fs_fcall *v9fs_create_tclunk(u32 fid)
756{
757 int size;
758 struct v9fs_fcall *fc;
759 struct cbuf buffer;
760 struct cbuf *bufp = &buffer;
761
762 size = 4; /* fid[4] */
763 fc = v9fs_create_common(bufp, size, TCLUNK);
764 if (IS_ERR(fc))
765 goto error;
766
767 v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid);
768
769 if (buf_check_overflow(bufp)) {
770 kfree(fc);
771 fc = ERR_PTR(-ENOMEM);
772 }
773 error:
774 return fc;
775}
776
777struct v9fs_fcall *v9fs_create_tremove(u32 fid)
778{
779 int size;
780 struct v9fs_fcall *fc;
781 struct cbuf buffer;
782 struct cbuf *bufp = &buffer;
783
784 size = 4; /* fid[4] */
785 fc = v9fs_create_common(bufp, size, TREMOVE);
786 if (IS_ERR(fc))
787 goto error;
788
789 v9fs_put_int32(bufp, fid, &fc->params.tremove.fid);
790
791 if (buf_check_overflow(bufp)) {
792 kfree(fc);
793 fc = ERR_PTR(-ENOMEM);
794 }
795 error:
796 return fc;
797}
798
799struct v9fs_fcall *v9fs_create_tstat(u32 fid)
800{
801 int size;
802 struct v9fs_fcall *fc;
803 struct cbuf buffer;
804 struct cbuf *bufp = &buffer;
805
806 size = 4; /* fid[4] */
807 fc = v9fs_create_common(bufp, size, TSTAT);
808 if (IS_ERR(fc))
809 goto error;
810
811 v9fs_put_int32(bufp, fid, &fc->params.tstat.fid);
812
813 if (buf_check_overflow(bufp)) {
814 kfree(fc);
815 fc = ERR_PTR(-ENOMEM);
816 }
817 error:
818 return fc;
819}
820
821struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
822 int extended)
823{
824 int size, statsz;
825 struct v9fs_fcall *fc;
826 struct cbuf buffer;
827 struct cbuf *bufp = &buffer;
828
829 statsz = v9fs_size_wstat(wstat, extended);
830 size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */
831 fc = v9fs_create_common(bufp, size, TWSTAT);
832 if (IS_ERR(fc))
833 goto error;
834
835 v9fs_put_int32(bufp, fid, &fc->params.twstat.fid);
836 buf_put_int16(bufp, statsz + 2);
837 v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended);
838
839 if (buf_check_overflow(bufp)) {
840 kfree(fc);
841 fc = ERR_PTR(-ENOMEM);
842 }
843 error:
844 return fc;
845}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
deleted file mode 100644
index dd5b6b1b610..00000000000
--- a/fs/9p/conv.h
+++ /dev/null
@@ -1,50 +0,0 @@
1/*
2 * linux/fs/9p/conv.h
3 *
4 * 9P protocol conversion definitions.
5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
28 int extended);
29int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
30 int extended);
31
32void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
33
34struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
35struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
36 char *aname);
37struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
38struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
39 char **wnames);
40struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
41struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
42 char *extension, int extended);
43struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
44struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
45 const char __user *data);
46struct v9fs_fcall *v9fs_create_tclunk(u32 fid);
47struct v9fs_fcall *v9fs_create_tremove(u32 fid);
48struct v9fs_fcall *v9fs_create_tstat(u32 fid);
49struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
50 int extended);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
deleted file mode 100644
index 4228c0bb3c3..00000000000
--- a/fs/9p/debug.h
+++ /dev/null
@@ -1,77 +0,0 @@
1/*
2 * linux/fs/9p/debug.h - V9FS Debug Definitions
3 *
4 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
5 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2
9 * as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to:
18 * Free Software Foundation
19 * 51 Franklin Street, Fifth Floor
20 * Boston, MA 02111-1301 USA
21 *
22 */
23
24#define DEBUG_ERROR (1<<0)
25#define DEBUG_CURRENT (1<<1)
26#define DEBUG_9P (1<<2)
27#define DEBUG_VFS (1<<3)
28#define DEBUG_CONV (1<<4)
29#define DEBUG_MUX (1<<5)
30#define DEBUG_TRANS (1<<6)
31#define DEBUG_SLABS (1<<7)
32#define DEBUG_FCALL (1<<8)
33
34#define DEBUG_DUMP_PKT 0
35
36extern int v9fs_debug_level;
37
38#define dprintk(level, format, arg...) \
39do { \
40 if((v9fs_debug_level & level)==level) \
41 printk(KERN_NOTICE "-- %s (%d): " \
42 format , __FUNCTION__, current->pid , ## arg); \
43} while(0)
44
45#define eprintk(level, format, arg...) \
46do { \
47 printk(level "v9fs: %s (%d): " \
48 format , __FUNCTION__, current->pid , ## arg); \
49} while(0)
50
51#if DEBUG_DUMP_PKT
52static inline void dump_data(const unsigned char *data, unsigned int datalen)
53{
54 int i, n;
55 char buf[5*8];
56
57 n = 0;
58 i = 0;
59 while (i < datalen) {
60 n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]);
61 if (i%4 == 0)
62 n += snprintf(buf+n, sizeof(buf)-n, " ");
63
64 if (i%16 == 0) {
65 dprintk(DEBUG_ERROR, "%s\n", buf);
66 n = 0;
67 }
68 }
69
70 dprintk(DEBUG_ERROR, "%s\n", buf);
71}
72#else /* DEBUG_DUMP_PKT */
73static inline void dump_data(const unsigned char *data, unsigned int datalen)
74{
75
76}
77#endif /* DEBUG_DUMP_PKT */
diff --git a/fs/9p/error.c b/fs/9p/error.c
deleted file mode 100644
index 0d7fa4e0881..00000000000
--- a/fs/9p/error.c
+++ /dev/null
@@ -1,93 +0,0 @@
1/*
2 * linux/fs/9p/error.c
3 *
4 * Error string handling
5 *
6 * Plan 9 uses error strings, Unix uses error numbers. These functions
7 * try to help manage that and provide for dynamically adding error
8 * mappings.
9 *
10 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
11 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to:
24 * Free Software Foundation
25 * 51 Franklin Street, Fifth Floor
26 * Boston, MA 02111-1301 USA
27 *
28 */
29
30#include <linux/module.h>
31
32#include <linux/list.h>
33#include <linux/jhash.h>
34
35#include "debug.h"
36#include "error.h"
37
38/**
39 * v9fs_error_init - preload
40 * @errstr: error string
41 *
42 */
43
44int v9fs_error_init(void)
45{
46 struct errormap *c;
47 int bucket;
48
49 /* initialize hash table */
50 for (bucket = 0; bucket < ERRHASHSZ; bucket++)
51 INIT_HLIST_HEAD(&hash_errmap[bucket]);
52
53 /* load initial error map into hash table */
54 for (c = errmap; c->name != NULL; c++) {
55 c->namelen = strlen(c->name);
56 bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
57 INIT_HLIST_NODE(&c->list);
58 hlist_add_head(&c->list, &hash_errmap[bucket]);
59 }
60
61 return 1;
62}
63
64/**
65 * errstr2errno - convert error string to error number
66 * @errstr: error string
67 *
68 */
69
70int v9fs_errstr2errno(char *errstr, int len)
71{
72 int errno = 0;
73 struct hlist_node *p = NULL;
74 struct errormap *c = NULL;
75 int bucket = jhash(errstr, len, 0) % ERRHASHSZ;
76
77 hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
78 if (c->namelen==len && !memcmp(c->name, errstr, len)) {
79 errno = c->val;
80 break;
81 }
82 }
83
84 if (errno == 0) {
85 /* TODO: if error isn't found, add it dynamically */
86 errstr[len] = 0;
87 printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
88 errstr);
89 errno = 1;
90 }
91
92 return -errno;
93}
diff --git a/fs/9p/error.h b/fs/9p/error.h
deleted file mode 100644
index 5f3ca522b31..00000000000
--- a/fs/9p/error.h
+++ /dev/null
@@ -1,177 +0,0 @@
1/*
2 * linux/fs/9p/error.h
3 *
4 * Huge Nasty Error Table
5 *
6 * Plan 9 uses error strings, Unix uses error numbers. This table tries to
7 * match UNIX strings and Plan 9 strings to unix error numbers. It is used
8 * to preload the dynamic error table which can also track user-specific error
9 * strings.
10 *
11 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
12 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License version 2
16 * as published by the Free Software Foundation.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to:
25 * Free Software Foundation
26 * 51 Franklin Street, Fifth Floor
27 * Boston, MA 02111-1301 USA
28 *
29 */
30
31#include <linux/errno.h>
32#include <asm/errno.h>
33
34struct errormap {
35 char *name;
36 int val;
37
38 int namelen;
39 struct hlist_node list;
40};
41
42#define ERRHASHSZ 32
43static struct hlist_head hash_errmap[ERRHASHSZ];
44
45/* FixMe - reduce to a reasonable size */
46static struct errormap errmap[] = {
47 {"Operation not permitted", EPERM},
48 {"wstat prohibited", EPERM},
49 {"No such file or directory", ENOENT},
50 {"directory entry not found", ENOENT},
51 {"file not found", ENOENT},
52 {"Interrupted system call", EINTR},
53 {"Input/output error", EIO},
54 {"No such device or address", ENXIO},
55 {"Argument list too long", E2BIG},
56 {"Bad file descriptor", EBADF},
57 {"Resource temporarily unavailable", EAGAIN},
58 {"Cannot allocate memory", ENOMEM},
59 {"Permission denied", EACCES},
60 {"Bad address", EFAULT},
61 {"Block device required", ENOTBLK},
62 {"Device or resource busy", EBUSY},
63 {"File exists", EEXIST},
64 {"Invalid cross-device link", EXDEV},
65 {"No such device", ENODEV},
66 {"Not a directory", ENOTDIR},
67 {"Is a directory", EISDIR},
68 {"Invalid argument", EINVAL},
69 {"Too many open files in system", ENFILE},
70 {"Too many open files", EMFILE},
71 {"Text file busy", ETXTBSY},
72 {"File too large", EFBIG},
73 {"No space left on device", ENOSPC},
74 {"Illegal seek", ESPIPE},
75 {"Read-only file system", EROFS},
76 {"Too many links", EMLINK},
77 {"Broken pipe", EPIPE},
78 {"Numerical argument out of domain", EDOM},
79 {"Numerical result out of range", ERANGE},
80 {"Resource deadlock avoided", EDEADLK},
81 {"File name too long", ENAMETOOLONG},
82 {"No locks available", ENOLCK},
83 {"Function not implemented", ENOSYS},
84 {"Directory not empty", ENOTEMPTY},
85 {"Too many levels of symbolic links", ELOOP},
86 {"No message of desired type", ENOMSG},
87 {"Identifier removed", EIDRM},
88 {"No data available", ENODATA},
89 {"Machine is not on the network", ENONET},
90 {"Package not installed", ENOPKG},
91 {"Object is remote", EREMOTE},
92 {"Link has been severed", ENOLINK},
93 {"Communication error on send", ECOMM},
94 {"Protocol error", EPROTO},
95 {"Bad message", EBADMSG},
96 {"File descriptor in bad state", EBADFD},
97 {"Streams pipe error", ESTRPIPE},
98 {"Too many users", EUSERS},
99 {"Socket operation on non-socket", ENOTSOCK},
100 {"Message too long", EMSGSIZE},
101 {"Protocol not available", ENOPROTOOPT},
102 {"Protocol not supported", EPROTONOSUPPORT},
103 {"Socket type not supported", ESOCKTNOSUPPORT},
104 {"Operation not supported", EOPNOTSUPP},
105 {"Protocol family not supported", EPFNOSUPPORT},
106 {"Network is down", ENETDOWN},
107 {"Network is unreachable", ENETUNREACH},
108 {"Network dropped connection on reset", ENETRESET},
109 {"Software caused connection abort", ECONNABORTED},
110 {"Connection reset by peer", ECONNRESET},
111 {"No buffer space available", ENOBUFS},
112 {"Transport endpoint is already connected", EISCONN},
113 {"Transport endpoint is not connected", ENOTCONN},
114 {"Cannot send after transport endpoint shutdown", ESHUTDOWN},
115 {"Connection timed out", ETIMEDOUT},
116 {"Connection refused", ECONNREFUSED},
117 {"Host is down", EHOSTDOWN},
118 {"No route to host", EHOSTUNREACH},
119 {"Operation already in progress", EALREADY},
120 {"Operation now in progress", EINPROGRESS},
121 {"Is a named type file", EISNAM},
122 {"Remote I/O error", EREMOTEIO},
123 {"Disk quota exceeded", EDQUOT},
124/* errors from fossil, vacfs, and u9fs */
125 {"fid unknown or out of range", EBADF},
126 {"permission denied", EACCES},
127 {"file does not exist", ENOENT},
128 {"authentication failed", ECONNREFUSED},
129 {"bad offset in directory read", ESPIPE},
130 {"bad use of fid", EBADF},
131 {"wstat can't convert between files and directories", EPERM},
132 {"directory is not empty", ENOTEMPTY},
133 {"file exists", EEXIST},
134 {"file already exists", EEXIST},
135 {"file or directory already exists", EEXIST},
136 {"fid already in use", EBADF},
137 {"file in use", ETXTBSY},
138 {"i/o error", EIO},
139 {"file already open for I/O", ETXTBSY},
140 {"illegal mode", EINVAL},
141 {"illegal name", ENAMETOOLONG},
142 {"not a directory", ENOTDIR},
143 {"not a member of proposed group", EPERM},
144 {"not owner", EACCES},
145 {"only owner can change group in wstat", EACCES},
146 {"read only file system", EROFS},
147 {"no access to special file", EPERM},
148 {"i/o count too large", EIO},
149 {"unknown group", EINVAL},
150 {"unknown user", EINVAL},
151 {"bogus wstat buffer", EPROTO},
152 {"exclusive use file already open", EAGAIN},
153 {"corrupted directory entry", EIO},
154 {"corrupted file entry", EIO},
155 {"corrupted block label", EIO},
156 {"corrupted meta data", EIO},
157 {"illegal offset", EINVAL},
158 {"illegal path element", ENOENT},
159 {"root of file system is corrupted", EIO},
160 {"corrupted super block", EIO},
161 {"protocol botch", EPROTO},
162 {"file system is full", ENOSPC},
163 {"file is in use", EAGAIN},
164 {"directory entry is not allocated", ENOENT},
165 {"file is read only", EROFS},
166 {"file has been removed", EIDRM},
167 {"only support truncation to zero length", EPERM},
168 {"cannot remove root", EPERM},
169 {"file too big", EFBIG},
170 {"venti i/o error", EIO},
171 /* these are not errors */
172 {"u9fs rhostsauth: no authentication required", 0},
173 {"u9fs authnone: no authentication required", 0},
174 {NULL, -1}
175};
176
177extern int v9fs_error_init(void);
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c
deleted file mode 100644
index dc336a67592..00000000000
--- a/fs/9p/fcall.c
+++ /dev/null
@@ -1,427 +0,0 @@
1/*
2 * linux/fs/9p/fcall.c
3 *
4 * This file contains functions to perform synchronous 9P calls
5 *
6 * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/sched.h>
31#include <linux/idr.h>
32
33#include "debug.h"
34#include "v9fs.h"
35#include "9p.h"
36#include "conv.h"
37#include "mux.h"
38
39/**
40 * v9fs_t_version - negotiate protocol parameters with sever
41 * @v9ses: 9P2000 session information
42 * @msize: requested max size packet
43 * @version: requested version.extension string
44 * @fcall: pointer to response fcall pointer
45 *
46 */
47
48int
49v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
50 char *version, struct v9fs_fcall **rcp)
51{
52 int ret;
53 struct v9fs_fcall *tc;
54
55 dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
56 tc = v9fs_create_tversion(msize, version);
57
58 if (!IS_ERR(tc)) {
59 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
60 kfree(tc);
61 } else
62 ret = PTR_ERR(tc);
63
64 return ret;
65}
66
67/**
68 * v9fs_t_attach - mount the server
69 * @v9ses: 9P2000 session information
70 * @uname: user name doing the attach
71 * @aname: remote name being attached to
72 * @fid: mount fid to attatch to root node
73 * @afid: authentication fid (in this case result key)
74 * @fcall: pointer to response fcall pointer
75 *
76 */
77
78int
79v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
80 u32 fid, u32 afid, struct v9fs_fcall **rcp)
81{
82 int ret;
83 struct v9fs_fcall* tc;
84
85 dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
86 aname, fid, afid);
87
88 tc = v9fs_create_tattach(fid, afid, uname, aname);
89 if (!IS_ERR(tc)) {
90 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
91 kfree(tc);
92 } else
93 ret = PTR_ERR(tc);
94
95 return ret;
96}
97
98static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
99 struct v9fs_fcall *rc, int err)
100{
101 int fid, id;
102 struct v9fs_session_info *v9ses;
103
104 id = 0;
105 fid = tc->params.tclunk.fid;
106 if (rc)
107 id = rc->id;
108
109 kfree(tc);
110 kfree(rc);
111 if (id == RCLUNK) {
112 v9ses = a;
113 v9fs_put_idpool(fid, &v9ses->fidpool);
114 }
115}
116
117/**
118 * v9fs_t_clunk - release a fid (finish a transaction)
119 * @v9ses: 9P2000 session information
120 * @fid: fid to release
121 * @fcall: pointer to response fcall pointer
122 *
123 */
124
125int
126v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
127{
128 int ret;
129 struct v9fs_fcall *tc, *rc;
130
131 dprintk(DEBUG_9P, "fid %d\n", fid);
132
133 rc = NULL;
134 tc = v9fs_create_tclunk(fid);
135 if (!IS_ERR(tc))
136 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
137 else
138 ret = PTR_ERR(tc);
139
140 if (ret)
141 dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
142
143 v9fs_t_clunk_cb(v9ses, tc, rc, ret);
144 return ret;
145}
146
147#if 0
148/**
149 * v9fs_v9fs_t_flush - flush a pending transaction
150 * @v9ses: 9P2000 session information
151 * @tag: tag to release
152 *
153 */
154int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
155{
156 int ret;
157 struct v9fs_fcall *tc;
158
159 dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
160
161 tc = v9fs_create_tflush(oldtag);
162 if (!IS_ERR(tc)) {
163 ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
164 kfree(tc);
165 } else
166 ret = PTR_ERR(tc);
167
168 return ret;
169}
170#endif
171
172/**
173 * v9fs_t_stat - read a file's meta-data
174 * @v9ses: 9P2000 session information
175 * @fid: fid pointing to file or directory to get info about
176 * @fcall: pointer to response fcall
177 *
178 */
179
180int
181v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
182{
183 int ret;
184 struct v9fs_fcall *tc;
185
186 dprintk(DEBUG_9P, "fid %d\n", fid);
187
188 ret = -ENOMEM;
189 tc = v9fs_create_tstat(fid);
190 if (!IS_ERR(tc)) {
191 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
192 kfree(tc);
193 } else
194 ret = PTR_ERR(tc);
195
196 return ret;
197}
198
199/**
200 * v9fs_t_wstat - write a file's meta-data
201 * @v9ses: 9P2000 session information
202 * @fid: fid pointing to file or directory to write info about
203 * @stat: metadata
204 * @fcall: pointer to response fcall
205 *
206 */
207
208int
209v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
210 struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
211{
212 int ret;
213 struct v9fs_fcall *tc;
214
215 dprintk(DEBUG_9P, "fid %d\n", fid);
216
217 tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
218 if (!IS_ERR(tc)) {
219 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
220 kfree(tc);
221 } else
222 ret = PTR_ERR(tc);
223
224 return ret;
225}
226
227/**
228 * v9fs_t_walk - walk a fid to a new file or directory
229 * @v9ses: 9P2000 session information
230 * @fid: fid to walk
231 * @newfid: new fid (for clone operations)
232 * @name: path to walk fid to
233 * @fcall: pointer to response fcall
234 *
235 */
236
237/* TODO: support multiple walk */
238
239int
240v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
241 char *name, struct v9fs_fcall **rcp)
242{
243 int ret;
244 struct v9fs_fcall *tc;
245 int nwname;
246
247 dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
248
249 if (name)
250 nwname = 1;
251 else
252 nwname = 0;
253
254 tc = v9fs_create_twalk(fid, newfid, nwname, &name);
255 if (!IS_ERR(tc)) {
256 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
257 kfree(tc);
258 } else
259 ret = PTR_ERR(tc);
260
261 return ret;
262}
263
264/**
265 * v9fs_t_open - open a file
266 *
267 * @v9ses - 9P2000 session information
268 * @fid - fid to open
269 * @mode - mode to open file (R, RW, etc)
270 * @fcall - pointer to response fcall
271 *
272 */
273
274int
275v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
276 struct v9fs_fcall **rcp)
277{
278 int ret;
279 struct v9fs_fcall *tc;
280
281 dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
282
283 tc = v9fs_create_topen(fid, mode);
284 if (!IS_ERR(tc)) {
285 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
286 kfree(tc);
287 } else
288 ret = PTR_ERR(tc);
289
290 return ret;
291}
292
293/**
294 * v9fs_t_remove - remove a file or directory
295 * @v9ses: 9P2000 session information
296 * @fid: fid to remove
297 * @fcall: pointer to response fcall
298 *
299 */
300
301int
302v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
303 struct v9fs_fcall **rcp)
304{
305 int ret;
306 struct v9fs_fcall *tc;
307
308 dprintk(DEBUG_9P, "fid %d\n", fid);
309
310 tc = v9fs_create_tremove(fid);
311 if (!IS_ERR(tc)) {
312 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
313 kfree(tc);
314 } else
315 ret = PTR_ERR(tc);
316
317 return ret;
318}
319
320/**
321 * v9fs_t_create - create a file or directory
322 * @v9ses: 9P2000 session information
323 * @fid: fid to create
324 * @name: name of the file or directory to create
325 * @perm: permissions to create with
326 * @mode: mode to open file (R, RW, etc)
327 * @fcall: pointer to response fcall
328 *
329 */
330
331int
332v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm,
333 u8 mode, char *extension, struct v9fs_fcall **rcp)
334{
335 int ret;
336 struct v9fs_fcall *tc;
337
338 dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
339 fid, name, perm, mode);
340
341 tc = v9fs_create_tcreate(fid, name, perm, mode, extension,
342 v9ses->extended);
343
344 if (!IS_ERR(tc)) {
345 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
346 kfree(tc);
347 } else
348 ret = PTR_ERR(tc);
349
350 return ret;
351}
352
353/**
354 * v9fs_t_read - read data
355 * @v9ses: 9P2000 session information
356 * @fid: fid to read from
357 * @offset: offset to start read at
358 * @count: how many bytes to read
359 * @fcall: pointer to response fcall (with data)
360 *
361 */
362
363int
364v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
365 u32 count, struct v9fs_fcall **rcp)
366{
367 int ret;
368 struct v9fs_fcall *tc, *rc;
369
370 dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
371 (long long unsigned) offset, count);
372
373 tc = v9fs_create_tread(fid, offset, count);
374 if (!IS_ERR(tc)) {
375 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
376 if (!ret)
377 ret = rc->params.rread.count;
378 if (rcp)
379 *rcp = rc;
380 else
381 kfree(rc);
382
383 kfree(tc);
384 } else
385 ret = PTR_ERR(tc);
386
387 return ret;
388}
389
390/**
391 * v9fs_t_write - write data
392 * @v9ses: 9P2000 session information
393 * @fid: fid to write to
394 * @offset: offset to start write at
395 * @count: how many bytes to write
396 * @fcall: pointer to response fcall
397 *
398 */
399
400int
401v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
402 const char __user *data, struct v9fs_fcall **rcp)
403{
404 int ret;
405 struct v9fs_fcall *tc, *rc;
406
407 dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
408 (long long unsigned) offset, count);
409
410 tc = v9fs_create_twrite(fid, offset, count, data);
411 if (!IS_ERR(tc)) {
412 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
413
414 if (!ret)
415 ret = rc->params.rwrite.count;
416 if (rcp)
417 *rcp = rc;
418 else
419 kfree(rc);
420
421 kfree(tc);
422 } else
423 ret = PTR_ERR(tc);
424
425 return ret;
426}
427
diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c
deleted file mode 100644
index 34b96114a28..00000000000
--- a/fs/9p/fcprint.c
+++ /dev/null
@@ -1,345 +0,0 @@
1/*
2 * linux/fs/9p/fcprint.c
3 *
4 * Print 9P call.
5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to:
19 * Free Software Foundation
20 * 51 Franklin Street, Fifth Floor
21 * Boston, MA 02111-1301 USA
22 *
23 */
24#include <linux/module.h>
25#include <linux/errno.h>
26#include <linux/fs.h>
27#include <linux/idr.h>
28
29#include "debug.h"
30#include "v9fs.h"
31#include "9p.h"
32#include "mux.h"
33
34static int
35v9fs_printqid(char *buf, int buflen, struct v9fs_qid *q)
36{
37 int n;
38 char b[10];
39
40 n = 0;
41 if (q->type & V9FS_QTDIR)
42 b[n++] = 'd';
43 if (q->type & V9FS_QTAPPEND)
44 b[n++] = 'a';
45 if (q->type & V9FS_QTAUTH)
46 b[n++] = 'A';
47 if (q->type & V9FS_QTEXCL)
48 b[n++] = 'l';
49 if (q->type & V9FS_QTTMP)
50 b[n++] = 't';
51 if (q->type & V9FS_QTSYMLINK)
52 b[n++] = 'L';
53 b[n] = '\0';
54
55 return scnprintf(buf, buflen, "(%.16llx %x %s)", (long long int) q->path,
56 q->version, b);
57}
58
59static int
60v9fs_printperm(char *buf, int buflen, int perm)
61{
62 int n;
63 char b[15];
64
65 n = 0;
66 if (perm & V9FS_DMDIR)
67 b[n++] = 'd';
68 if (perm & V9FS_DMAPPEND)
69 b[n++] = 'a';
70 if (perm & V9FS_DMAUTH)
71 b[n++] = 'A';
72 if (perm & V9FS_DMEXCL)
73 b[n++] = 'l';
74 if (perm & V9FS_DMTMP)
75 b[n++] = 't';
76 if (perm & V9FS_DMDEVICE)
77 b[n++] = 'D';
78 if (perm & V9FS_DMSOCKET)
79 b[n++] = 'S';
80 if (perm & V9FS_DMNAMEDPIPE)
81 b[n++] = 'P';
82 if (perm & V9FS_DMSYMLINK)
83 b[n++] = 'L';
84 b[n] = '\0';
85
86 return scnprintf(buf, buflen, "%s%03o", b, perm&077);
87}
88
89static int
90v9fs_printstat(char *buf, int buflen, struct v9fs_stat *st, int extended)
91{
92 int n;
93
94 n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len,
95 st->name.str, st->uid.len, st->uid.str);
96 if (extended)
97 n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid);
98
99 n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str);
100 if (extended)
101 n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid);
102
103 n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str);
104 if (extended)
105 n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid);
106
107 n += scnprintf(buf+n, buflen-n, " q ");
108 n += v9fs_printqid(buf+n, buflen-n, &st->qid);
109 n += scnprintf(buf+n, buflen-n, " m ");
110 n += v9fs_printperm(buf+n, buflen-n, st->mode);
111 n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld",
112 st->atime, st->mtime, (long long int) st->length);
113
114 if (extended)
115 n += scnprintf(buf+n, buflen-n, " ext '%.*s'",
116 st->extension.len, st->extension.str);
117
118 return n;
119}
120
121static int
122v9fs_dumpdata(char *buf, int buflen, u8 *data, int datalen)
123{
124 int i, n;
125
126 i = n = 0;
127 while (i < datalen) {
128 n += scnprintf(buf + n, buflen - n, "%02x", data[i]);
129 if (i%4 == 3)
130 n += scnprintf(buf + n, buflen - n, " ");
131 if (i%32 == 31)
132 n += scnprintf(buf + n, buflen - n, "\n");
133
134 i++;
135 }
136 n += scnprintf(buf + n, buflen - n, "\n");
137
138 return n;
139}
140
141static int
142v9fs_printdata(char *buf, int buflen, u8 *data, int datalen)
143{
144 return v9fs_dumpdata(buf, buflen, data, datalen<16?datalen:16);
145}
146
147int
148v9fs_printfcall(char *buf, int buflen, struct v9fs_fcall *fc, int extended)
149{
150 int i, ret, type, tag;
151
152 if (!fc)
153 return scnprintf(buf, buflen, "<NULL>");
154
155 type = fc->id;
156 tag = fc->tag;
157
158 ret = 0;
159 switch (type) {
160 case TVERSION:
161 ret += scnprintf(buf+ret, buflen-ret,
162 "Tversion tag %u msize %u version '%.*s'", tag,
163 fc->params.tversion.msize, fc->params.tversion.version.len,
164 fc->params.tversion.version.str);
165 break;
166
167 case RVERSION:
168 ret += scnprintf(buf+ret, buflen-ret,
169 "Rversion tag %u msize %u version '%.*s'", tag,
170 fc->params.rversion.msize, fc->params.rversion.version.len,
171 fc->params.rversion.version.str);
172 break;
173
174 case TAUTH:
175 ret += scnprintf(buf+ret, buflen-ret,
176 "Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag,
177 fc->params.tauth.afid, fc->params.tauth.uname.len,
178 fc->params.tauth.uname.str, fc->params.tauth.aname.len,
179 fc->params.tauth.aname.str);
180 break;
181
182 case RAUTH:
183 ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag);
184 v9fs_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid);
185 break;
186
187 case TATTACH:
188 ret += scnprintf(buf+ret, buflen-ret,
189 "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'",
190 tag, fc->params.tattach.fid, fc->params.tattach.afid,
191 fc->params.tattach.uname.len, fc->params.tattach.uname.str,
192 fc->params.tattach.aname.len, fc->params.tattach.aname.str);
193 break;
194
195 case RATTACH:
196 ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", tag);
197 v9fs_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid);
198 break;
199
200 case RERROR:
201 ret += scnprintf(buf+ret, buflen-ret, "Rerror tag %u ename '%.*s'",
202 tag, fc->params.rerror.error.len,
203 fc->params.rerror.error.str);
204 if (extended)
205 ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n",
206 fc->params.rerror.errno);
207 break;
208
209 case TFLUSH:
210 ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u",
211 tag, fc->params.tflush.oldtag);
212 break;
213
214 case RFLUSH:
215 ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag);
216 break;
217
218 case TWALK:
219 ret += scnprintf(buf+ret, buflen-ret,
220 "Twalk tag %u fid %d newfid %d nwname %d", tag,
221 fc->params.twalk.fid, fc->params.twalk.newfid,
222 fc->params.twalk.nwname);
223 for(i = 0; i < fc->params.twalk.nwname; i++)
224 ret += scnprintf(buf+ret, buflen-ret," '%.*s'",
225 fc->params.twalk.wnames[i].len,
226 fc->params.twalk.wnames[i].str);
227 break;
228
229 case RWALK:
230 ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d",
231 tag, fc->params.rwalk.nwqid);
232 for(i = 0; i < fc->params.rwalk.nwqid; i++)
233 ret += v9fs_printqid(buf+ret, buflen-ret,
234 &fc->params.rwalk.wqids[i]);
235 break;
236
237 case TOPEN:
238 ret += scnprintf(buf+ret, buflen-ret,
239 "Topen tag %u fid %d mode %d", tag,
240 fc->params.topen.fid, fc->params.topen.mode);
241 break;
242
243 case ROPEN:
244 ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag);
245 ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid);
246 ret += scnprintf(buf+ret, buflen-ret," iounit %d",
247 fc->params.ropen.iounit);
248 break;
249
250 case TCREATE:
251 ret += scnprintf(buf+ret, buflen-ret,
252 "Tcreate tag %u fid %d name '%.*s' perm ", tag,
253 fc->params.tcreate.fid, fc->params.tcreate.name.len,
254 fc->params.tcreate.name.str);
255
256 ret += v9fs_printperm(buf+ret, buflen-ret, fc->params.tcreate.perm);
257 ret += scnprintf(buf+ret, buflen-ret, " mode %d",
258 fc->params.tcreate.mode);
259 break;
260
261 case RCREATE:
262 ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag);
263 ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.rcreate.qid);
264 ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
265 fc->params.rcreate.iounit);
266 break;
267
268 case TREAD:
269 ret += scnprintf(buf+ret, buflen-ret,
270 "Tread tag %u fid %d offset %lld count %u", tag,
271 fc->params.tread.fid,
272 (long long int) fc->params.tread.offset,
273 fc->params.tread.count);
274 break;
275
276 case RREAD:
277 ret += scnprintf(buf+ret, buflen-ret,
278 "Rread tag %u count %u data ", tag,
279 fc->params.rread.count);
280 ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.rread.data,
281 fc->params.rread.count);
282 break;
283
284 case TWRITE:
285 ret += scnprintf(buf+ret, buflen-ret,
286 "Twrite tag %u fid %d offset %lld count %u data ",
287 tag, fc->params.twrite.fid,
288 (long long int) fc->params.twrite.offset,
289 fc->params.twrite.count);
290 ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.twrite.data,
291 fc->params.twrite.count);
292 break;
293
294 case RWRITE:
295 ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u",
296 tag, fc->params.rwrite.count);
297 break;
298
299 case TCLUNK:
300 ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d",
301 tag, fc->params.tclunk.fid);
302 break;
303
304 case RCLUNK:
305 ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag);
306 break;
307
308 case TREMOVE:
309 ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d",
310 tag, fc->params.tremove.fid);
311 break;
312
313 case RREMOVE:
314 ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag);
315 break;
316
317 case TSTAT:
318 ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d",
319 tag, fc->params.tstat.fid);
320 break;
321
322 case RSTAT:
323 ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag);
324 ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat,
325 extended);
326 break;
327
328 case TWSTAT:
329 ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ",
330 tag, fc->params.twstat.fid);
331 ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.twstat.stat,
332 extended);
333 break;
334
335 case RWSTAT:
336 ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag);
337 break;
338
339 default:
340 ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type);
341 break;
342 }
343
344 return ret;
345}
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 90419715c7e..08fa320b7e6 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -26,10 +26,10 @@
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/idr.h> 27#include <linux/idr.h>
28#include <asm/semaphore.h> 28#include <asm/semaphore.h>
29#include <net/9p/9p.h>
30#include <net/9p/client.h>
29 31
30#include "debug.h"
31#include "v9fs.h" 32#include "v9fs.h"
32#include "9p.h"
33#include "v9fs_vfs.h" 33#include "v9fs_vfs.h"
34#include "fid.h" 34#include "fid.h"
35 35
@@ -40,67 +40,29 @@
40 * 40 *
41 */ 41 */
42 42
43int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry) 43int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
44{ 44{
45 struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; 45 struct v9fs_dentry *dent;
46 dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid,
47 dentry->d_iname, dentry);
48 if (dentry->d_fsdata == NULL) {
49 dentry->d_fsdata =
50 kmalloc(sizeof(struct list_head), GFP_KERNEL);
51 if (dentry->d_fsdata == NULL) {
52 dprintk(DEBUG_ERROR, "Out of memory\n");
53 return -ENOMEM;
54 }
55 fid_list = (struct list_head *)dentry->d_fsdata;
56 INIT_LIST_HEAD(fid_list); /* Initialize list head */
57 }
58 46
59 fid->uid = current->uid; 47 P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n",
60 list_add(&fid->list, fid_list); 48 fid->fid, dentry->d_iname);
61 return 0;
62}
63 49
64/** 50 dent = dentry->d_fsdata;
65 * v9fs_fid_create - allocate a FID structure 51 if (!dent) {
66 * @dentry - dentry to link newly created fid to 52 dent = kmalloc(sizeof(struct v9fs_dentry), GFP_KERNEL);
67 * 53 if (!dent)
68 */ 54 return -ENOMEM;
69
70struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *v9ses, int fid)
71{
72 struct v9fs_fid *new;
73 55
74 dprintk(DEBUG_9P, "fid create fid %d\n", fid); 56 spin_lock_init(&dent->lock);
75 new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL); 57 INIT_LIST_HEAD(&dent->fidlist);
76 if (new == NULL) { 58 dentry->d_fsdata = dent;
77 dprintk(DEBUG_ERROR, "Out of Memory\n");
78 return ERR_PTR(-ENOMEM);
79 } 59 }
80 60
81 new->fid = fid; 61 spin_lock(&dent->lock);
82 new->v9ses = v9ses; 62 list_add(&fid->dlist, &dent->fidlist);
83 new->fidopen = 0; 63 spin_unlock(&dent->lock);
84 new->fidclunked = 0;
85 new->iounit = 0;
86 new->rdir_pos = 0;
87 new->rdir_fcall = NULL;
88 init_MUTEX(&new->lock);
89 INIT_LIST_HEAD(&new->list);
90
91 return new;
92}
93
94/**
95 * v9fs_fid_destroy - deallocate a FID structure
96 * @fid: fid to destroy
97 *
98 */
99 64
100void v9fs_fid_destroy(struct v9fs_fid *fid) 65 return 0;
101{
102 list_del(&fid->list);
103 kfree(fid);
104} 66}
105 67
106/** 68/**
@@ -114,30 +76,42 @@ void v9fs_fid_destroy(struct v9fs_fid *fid)
114 * 76 *
115 */ 77 */
116 78
117struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry) 79struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
118{ 80{
119 struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; 81 struct v9fs_dentry *dent;
120 struct v9fs_fid *return_fid = NULL; 82 struct p9_fid *fid;
121 83
122 dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry); 84 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
123 85 dent = dentry->d_fsdata;
124 if (fid_list) 86 if (dent)
125 return_fid = list_entry(fid_list->next, struct v9fs_fid, list); 87 fid = list_entry(dent->fidlist.next, struct p9_fid, dlist);
88 else
89 fid = ERR_PTR(-EBADF);
90
91 P9_DPRINTK(P9_DEBUG_VFS, " fid: %p\n", fid);
92 return fid;
93}
126 94
127 if (!return_fid) { 95struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry)
128 dprintk(DEBUG_ERROR, "Couldn't find a fid in dentry\n"); 96{
129 return_fid = ERR_PTR(-EBADF); 97 struct p9_fid *fid;
98 struct v9fs_dentry *dent;
99
100 dent = dentry->d_fsdata;
101 fid = v9fs_fid_lookup(dentry);
102 if (!IS_ERR(fid)) {
103 spin_lock(&dent->lock);
104 list_del(&fid->dlist);
105 spin_unlock(&dent->lock);
130 } 106 }
131 107
132 if(down_interruptible(&return_fid->lock)) 108 return fid;
133 return ERR_PTR(-EINTR);
134
135 return return_fid;
136} 109}
137 110
111
138/** 112/**
139 * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and 113 * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and
140 * release it 114 * release it
141 * @dentry: dentry to look for fid in 115 * @dentry: dentry to look for fid in
142 * 116 *
143 * find a fid in the dentry and then clone to a new private fid 117 * find a fid in the dentry and then clone to a new private fid
@@ -146,49 +120,15 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry)
146 * 120 *
147 */ 121 */
148 122
149struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry) 123struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
150{ 124{
151 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); 125 struct p9_fid *ofid, *fid;
152 struct v9fs_fid *base_fid, *new_fid = ERR_PTR(-EBADF);
153 struct v9fs_fcall *fcall = NULL;
154 int fid, err;
155
156 base_fid = v9fs_fid_lookup(dentry);
157
158 if(IS_ERR(base_fid))
159 return base_fid;
160
161 if(base_fid) { /* clone fid */
162 fid = v9fs_get_idpool(&v9ses->fidpool);
163 if (fid < 0) {
164 eprintk(KERN_WARNING, "newfid fails!\n");
165 new_fid = ERR_PTR(-ENOSPC);
166 goto Release_Fid;
167 }
168
169 err = v9fs_t_walk(v9ses, base_fid->fid, fid, NULL, &fcall);
170 if (err < 0) {
171 dprintk(DEBUG_ERROR, "clone walk didn't work\n");
172 v9fs_put_idpool(fid, &v9ses->fidpool);
173 new_fid = ERR_PTR(err);
174 goto Free_Fcall;
175 }
176 new_fid = v9fs_fid_create(v9ses, fid);
177 if (new_fid == NULL) {
178 dprintk(DEBUG_ERROR, "out of memory\n");
179 new_fid = ERR_PTR(-ENOMEM);
180 }
181Free_Fcall:
182 kfree(fcall);
183 }
184 126
185Release_Fid: 127 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
186 up(&base_fid->lock); 128 ofid = v9fs_fid_lookup(dentry);
187 return new_fid; 129 if (IS_ERR(ofid))
188} 130 return ofid;
189 131
190void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid) 132 fid = p9_client_walk(ofid, 0, NULL, 1);
191{ 133 return fid;
192 v9fs_t_clunk(v9ses, fid->fid);
193 v9fs_fid_destroy(fid);
194} 134}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 48fc170c26c..47a0ba74287 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -22,41 +22,12 @@
22 22
23#include <linux/list.h> 23#include <linux/list.h>
24 24
25#define FID_OP 0 25struct v9fs_dentry {
26#define FID_WALK 1 26 spinlock_t lock; /* protect fidlist */
27#define FID_CREATE 2 27 struct list_head fidlist;
28
29struct v9fs_fid {
30 struct list_head list; /* list of fids associated with a dentry */
31 struct list_head active; /* XXX - debug */
32
33 struct semaphore lock;
34
35 u32 fid;
36 unsigned char fidopen; /* set when fid is opened */
37 unsigned char fidclunked; /* set when fid has already been clunked */
38
39 struct v9fs_qid qid;
40 u32 iounit;
41
42 /* readdir stuff */
43 int rdir_fpos;
44 loff_t rdir_pos;
45 struct v9fs_fcall *rdir_fcall;
46
47 /* management stuff */
48 uid_t uid; /* user associated with this fid */
49
50 /* private data */
51 struct file *filp; /* backpointer to File struct for open files */
52 struct v9fs_session_info *v9ses; /* session info for this FID */
53}; 28};
54 29
55struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry); 30struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
56struct v9fs_fid *v9fs_fid_get_created(struct dentry *); 31struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry);
57void v9fs_fid_destroy(struct v9fs_fid *fid); 32struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
58struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *, int fid); 33int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
59int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry);
60struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry);
61void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid);
62
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
deleted file mode 100644
index c783874a9ca..00000000000
--- a/fs/9p/mux.c
+++ /dev/null
@@ -1,1033 +0,0 @@
1/*
2 * linux/fs/9p/mux.c
3 *
4 * Protocol Multiplexer
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26#include <linux/module.h>
27#include <linux/errno.h>
28#include <linux/fs.h>
29#include <linux/poll.h>
30#include <linux/kthread.h>
31#include <linux/idr.h>
32#include <linux/mutex.h>
33
34#include "debug.h"
35#include "v9fs.h"
36#include "9p.h"
37#include "conv.h"
38#include "transport.h"
39#include "mux.h"
40
41#define ERREQFLUSH 1
42#define SCHED_TIMEOUT 10
43#define MAXPOLLWADDR 2
44
45enum {
46 Rworksched = 1, /* read work scheduled or running */
47 Rpending = 2, /* can read */
48 Wworksched = 4, /* write work scheduled or running */
49 Wpending = 8, /* can write */
50};
51
52enum {
53 None,
54 Flushing,
55 Flushed,
56};
57
58struct v9fs_mux_poll_task;
59
60struct v9fs_req {
61 spinlock_t lock;
62 int tag;
63 struct v9fs_fcall *tcall;
64 struct v9fs_fcall *rcall;
65 int err;
66 v9fs_mux_req_callback cb;
67 void *cba;
68 int flush;
69 struct list_head req_list;
70};
71
72struct v9fs_mux_data {
73 spinlock_t lock;
74 struct list_head mux_list;
75 struct v9fs_mux_poll_task *poll_task;
76 int msize;
77 unsigned char *extended;
78 struct v9fs_transport *trans;
79 struct v9fs_idpool tagpool;
80 int err;
81 wait_queue_head_t equeue;
82 struct list_head req_list;
83 struct list_head unsent_req_list;
84 struct v9fs_fcall *rcall;
85 int rpos;
86 char *rbuf;
87 int wpos;
88 int wsize;
89 char *wbuf;
90 wait_queue_t poll_wait[MAXPOLLWADDR];
91 wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
92 poll_table pt;
93 struct work_struct rq;
94 struct work_struct wq;
95 unsigned long wsched;
96};
97
98struct v9fs_mux_poll_task {
99 struct task_struct *task;
100 struct list_head mux_list;
101 int muxnum;
102};
103
104struct v9fs_mux_rpc {
105 struct v9fs_mux_data *m;
106 int err;
107 struct v9fs_fcall *tcall;
108 struct v9fs_fcall *rcall;
109 wait_queue_head_t wqueue;
110};
111
112static int v9fs_poll_proc(void *);
113static void v9fs_read_work(struct work_struct *work);
114static void v9fs_write_work(struct work_struct *work);
115static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
116 poll_table * p);
117static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
118static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
119
120static DEFINE_MUTEX(v9fs_mux_task_lock);
121static struct workqueue_struct *v9fs_mux_wq;
122
123static int v9fs_mux_num;
124static int v9fs_mux_poll_task_num;
125static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100];
126
127int v9fs_mux_global_init(void)
128{
129 int i;
130
131 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++)
132 v9fs_mux_poll_tasks[i].task = NULL;
133
134 v9fs_mux_wq = create_workqueue("v9fs");
135 if (!v9fs_mux_wq) {
136 printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
137 return -ENOMEM;
138 }
139
140 return 0;
141}
142
143void v9fs_mux_global_exit(void)
144{
145 destroy_workqueue(v9fs_mux_wq);
146}
147
148/**
149 * v9fs_mux_calc_poll_procs - calculates the number of polling procs
150 * based on the number of mounted v9fs filesystems.
151 *
152 * The current implementation returns sqrt of the number of mounts.
153 */
154static int v9fs_mux_calc_poll_procs(int muxnum)
155{
156 int n;
157
158 if (v9fs_mux_poll_task_num)
159 n = muxnum / v9fs_mux_poll_task_num +
160 (muxnum % v9fs_mux_poll_task_num ? 1 : 0);
161 else
162 n = 1;
163
164 if (n > ARRAY_SIZE(v9fs_mux_poll_tasks))
165 n = ARRAY_SIZE(v9fs_mux_poll_tasks);
166
167 return n;
168}
169
170static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
171{
172 int i, n;
173 struct v9fs_mux_poll_task *vpt, *vptlast;
174 struct task_struct *pproc;
175
176 dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
177 v9fs_mux_poll_task_num);
178 mutex_lock(&v9fs_mux_task_lock);
179
180 n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
181 if (n > v9fs_mux_poll_task_num) {
182 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
183 if (v9fs_mux_poll_tasks[i].task == NULL) {
184 vpt = &v9fs_mux_poll_tasks[i];
185 dprintk(DEBUG_MUX, "create proc %p\n", vpt);
186 pproc = kthread_create(v9fs_poll_proc, vpt,
187 "v9fs-poll");
188
189 if (!IS_ERR(pproc)) {
190 vpt->task = pproc;
191 INIT_LIST_HEAD(&vpt->mux_list);
192 vpt->muxnum = 0;
193 v9fs_mux_poll_task_num++;
194 wake_up_process(vpt->task);
195 }
196 break;
197 }
198 }
199
200 if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks))
201 dprintk(DEBUG_ERROR, "warning: no free poll slots\n");
202 }
203
204 n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num +
205 ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0);
206
207 vptlast = NULL;
208 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
209 vpt = &v9fs_mux_poll_tasks[i];
210 if (vpt->task != NULL) {
211 vptlast = vpt;
212 if (vpt->muxnum < n) {
213 dprintk(DEBUG_MUX, "put in proc %d\n", i);
214 list_add(&m->mux_list, &vpt->mux_list);
215 vpt->muxnum++;
216 m->poll_task = vpt;
217 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
218 init_poll_funcptr(&m->pt, v9fs_pollwait);
219 break;
220 }
221 }
222 }
223
224 if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) {
225 if (vptlast == NULL)
226 return -ENOMEM;
227
228 dprintk(DEBUG_MUX, "put in proc %d\n", i);
229 list_add(&m->mux_list, &vptlast->mux_list);
230 vptlast->muxnum++;
231 m->poll_task = vptlast;
232 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
233 init_poll_funcptr(&m->pt, v9fs_pollwait);
234 }
235
236 v9fs_mux_num++;
237 mutex_unlock(&v9fs_mux_task_lock);
238
239 return 0;
240}
241
242static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
243{
244 int i;
245 struct v9fs_mux_poll_task *vpt;
246
247 mutex_lock(&v9fs_mux_task_lock);
248 vpt = m->poll_task;
249 list_del(&m->mux_list);
250 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
251 if (m->poll_waddr[i] != NULL) {
252 remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
253 m->poll_waddr[i] = NULL;
254 }
255 }
256 vpt->muxnum--;
257 if (!vpt->muxnum) {
258 dprintk(DEBUG_MUX, "destroy proc %p\n", vpt);
259 kthread_stop(vpt->task);
260 vpt->task = NULL;
261 v9fs_mux_poll_task_num--;
262 }
263 v9fs_mux_num--;
264 mutex_unlock(&v9fs_mux_task_lock);
265}
266
267/**
268 * v9fs_mux_init - allocate and initialize the per-session mux data
269 * Creates the polling task if this is the first session.
270 *
271 * @trans - transport structure
272 * @msize - maximum message size
273 * @extended - pointer to the extended flag
274 */
275struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
276 unsigned char *extended)
277{
278 int i, n;
279 struct v9fs_mux_data *m, *mtmp;
280
281 dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize);
282 m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL);
283 if (!m)
284 return ERR_PTR(-ENOMEM);
285
286 spin_lock_init(&m->lock);
287 INIT_LIST_HEAD(&m->mux_list);
288 m->msize = msize;
289 m->extended = extended;
290 m->trans = trans;
291 idr_init(&m->tagpool.pool);
292 init_MUTEX(&m->tagpool.lock);
293 m->err = 0;
294 init_waitqueue_head(&m->equeue);
295 INIT_LIST_HEAD(&m->req_list);
296 INIT_LIST_HEAD(&m->unsent_req_list);
297 m->rcall = NULL;
298 m->rpos = 0;
299 m->rbuf = NULL;
300 m->wpos = m->wsize = 0;
301 m->wbuf = NULL;
302 INIT_WORK(&m->rq, v9fs_read_work);
303 INIT_WORK(&m->wq, v9fs_write_work);
304 m->wsched = 0;
305 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
306 m->poll_task = NULL;
307 n = v9fs_mux_poll_start(m);
308 if (n)
309 return ERR_PTR(n);
310
311 n = trans->poll(trans, &m->pt);
312 if (n & POLLIN) {
313 dprintk(DEBUG_MUX, "mux %p can read\n", m);
314 set_bit(Rpending, &m->wsched);
315 }
316
317 if (n & POLLOUT) {
318 dprintk(DEBUG_MUX, "mux %p can write\n", m);
319 set_bit(Wpending, &m->wsched);
320 }
321
322 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
323 if (IS_ERR(m->poll_waddr[i])) {
324 v9fs_mux_poll_stop(m);
325 mtmp = (void *)m->poll_waddr; /* the error code */
326 kfree(m);
327 m = mtmp;
328 break;
329 }
330 }
331
332 return m;
333}
334
335/**
336 * v9fs_mux_destroy - cancels all pending requests and frees mux resources
337 */
338void v9fs_mux_destroy(struct v9fs_mux_data *m)
339{
340 dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m,
341 m->mux_list.prev, m->mux_list.next);
342 v9fs_mux_cancel(m, -ECONNRESET);
343
344 if (!list_empty(&m->req_list)) {
345 /* wait until all processes waiting on this session exit */
346 dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n",
347 m);
348 wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
349 dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m,
350 list_empty(&m->req_list));
351 }
352
353 v9fs_mux_poll_stop(m);
354 m->trans = NULL;
355
356 kfree(m);
357}
358
359/**
360 * v9fs_pollwait - called by files poll operation to add v9fs-poll task
361 * to files wait queue
362 */
363static void
364v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
365 poll_table * p)
366{
367 int i;
368 struct v9fs_mux_data *m;
369
370 m = container_of(p, struct v9fs_mux_data, pt);
371 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
372 if (m->poll_waddr[i] == NULL)
373 break;
374
375 if (i >= ARRAY_SIZE(m->poll_waddr)) {
376 dprintk(DEBUG_ERROR, "not enough wait_address slots\n");
377 return;
378 }
379
380 m->poll_waddr[i] = wait_address;
381
382 if (!wait_address) {
383 dprintk(DEBUG_ERROR, "no wait_address\n");
384 m->poll_waddr[i] = ERR_PTR(-EIO);
385 return;
386 }
387
388 init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
389 add_wait_queue(wait_address, &m->poll_wait[i]);
390}
391
392/**
393 * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
394 */
395static void v9fs_poll_mux(struct v9fs_mux_data *m)
396{
397 int n;
398
399 if (m->err < 0)
400 return;
401
402 n = m->trans->poll(m->trans, NULL);
403 if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
404 dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n);
405 if (n >= 0)
406 n = -ECONNRESET;
407 v9fs_mux_cancel(m, n);
408 }
409
410 if (n & POLLIN) {
411 set_bit(Rpending, &m->wsched);
412 dprintk(DEBUG_MUX, "mux %p can read\n", m);
413 if (!test_and_set_bit(Rworksched, &m->wsched)) {
414 dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
415 queue_work(v9fs_mux_wq, &m->rq);
416 }
417 }
418
419 if (n & POLLOUT) {
420 set_bit(Wpending, &m->wsched);
421 dprintk(DEBUG_MUX, "mux %p can write\n", m);
422 if ((m->wsize || !list_empty(&m->unsent_req_list))
423 && !test_and_set_bit(Wworksched, &m->wsched)) {
424 dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
425 queue_work(v9fs_mux_wq, &m->wq);
426 }
427 }
428}
429
430/**
431 * v9fs_poll_proc - polls all v9fs transports for new events and queues
432 * the appropriate work to the work queue
433 */
434static int v9fs_poll_proc(void *a)
435{
436 struct v9fs_mux_data *m, *mtmp;
437 struct v9fs_mux_poll_task *vpt;
438
439 vpt = a;
440 dprintk(DEBUG_MUX, "start %p %p\n", current, vpt);
441 while (!kthread_should_stop()) {
442 set_current_state(TASK_INTERRUPTIBLE);
443
444 list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
445 v9fs_poll_mux(m);
446 }
447
448 dprintk(DEBUG_MUX, "sleeping...\n");
449 schedule_timeout(SCHED_TIMEOUT * HZ);
450 }
451
452 __set_current_state(TASK_RUNNING);
453 dprintk(DEBUG_MUX, "finish\n");
454 return 0;
455}
456
457/**
458 * v9fs_write_work - called when a transport can send some data
459 */
460static void v9fs_write_work(struct work_struct *work)
461{
462 int n, err;
463 struct v9fs_mux_data *m;
464 struct v9fs_req *req;
465
466 m = container_of(work, struct v9fs_mux_data, wq);
467
468 if (m->err < 0) {
469 clear_bit(Wworksched, &m->wsched);
470 return;
471 }
472
473 if (!m->wsize) {
474 if (list_empty(&m->unsent_req_list)) {
475 clear_bit(Wworksched, &m->wsched);
476 return;
477 }
478
479 spin_lock(&m->lock);
480again:
481 req = list_entry(m->unsent_req_list.next, struct v9fs_req,
482 req_list);
483 list_move_tail(&req->req_list, &m->req_list);
484 if (req->err == ERREQFLUSH)
485 goto again;
486
487 m->wbuf = req->tcall->sdata;
488 m->wsize = req->tcall->size;
489 m->wpos = 0;
490 dump_data(m->wbuf, m->wsize);
491 spin_unlock(&m->lock);
492 }
493
494 dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize);
495 clear_bit(Wpending, &m->wsched);
496 err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
497 dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
498 if (err == -EAGAIN) {
499 clear_bit(Wworksched, &m->wsched);
500 return;
501 }
502
503 if (err <= 0)
504 goto error;
505
506 m->wpos += err;
507 if (m->wpos == m->wsize)
508 m->wpos = m->wsize = 0;
509
510 if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
511 if (test_and_clear_bit(Wpending, &m->wsched))
512 n = POLLOUT;
513 else
514 n = m->trans->poll(m->trans, NULL);
515
516 if (n & POLLOUT) {
517 dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
518 queue_work(v9fs_mux_wq, &m->wq);
519 } else
520 clear_bit(Wworksched, &m->wsched);
521 } else
522 clear_bit(Wworksched, &m->wsched);
523
524 return;
525
526 error:
527 v9fs_mux_cancel(m, err);
528 clear_bit(Wworksched, &m->wsched);
529}
530
531static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
532{
533 int ecode;
534 struct v9fs_str *ename;
535
536 if (!req->err && req->rcall->id == RERROR) {
537 ecode = req->rcall->params.rerror.errno;
538 ename = &req->rcall->params.rerror.error;
539
540 dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str);
541
542 if (*m->extended)
543 req->err = -ecode;
544
545 if (!req->err) {
546 req->err = v9fs_errstr2errno(ename->str, ename->len);
547
548 if (!req->err) { /* string match failed */
549 PRINT_FCALL_ERROR("unknown error", req->rcall);
550 }
551
552 if (!req->err)
553 req->err = -ESERVERFAULT;
554 }
555 } else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
556 dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n",
557 req->tcall->id + 1, req->rcall->id);
558 if (!req->err)
559 req->err = -EIO;
560 }
561}
562
563/**
564 * v9fs_read_work - called when there is some data to be read from a transport
565 */
566static void v9fs_read_work(struct work_struct *work)
567{
568 int n, err;
569 struct v9fs_mux_data *m;
570 struct v9fs_req *req, *rptr, *rreq;
571 struct v9fs_fcall *rcall;
572 char *rbuf;
573
574 m = container_of(work, struct v9fs_mux_data, rq);
575
576 if (m->err < 0)
577 return;
578
579 rcall = NULL;
580 dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
581
582 if (!m->rcall) {
583 m->rcall =
584 kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL);
585 if (!m->rcall) {
586 err = -ENOMEM;
587 goto error;
588 }
589
590 m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
591 m->rpos = 0;
592 }
593
594 clear_bit(Rpending, &m->wsched);
595 err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
596 dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err);
597 if (err == -EAGAIN) {
598 clear_bit(Rworksched, &m->wsched);
599 return;
600 }
601
602 if (err <= 0)
603 goto error;
604
605 m->rpos += err;
606 while (m->rpos > 4) {
607 n = le32_to_cpu(*(__le32 *) m->rbuf);
608 if (n >= m->msize) {
609 dprintk(DEBUG_ERROR,
610 "requested packet size too big: %d\n", n);
611 err = -EIO;
612 goto error;
613 }
614
615 if (m->rpos < n)
616 break;
617
618 dump_data(m->rbuf, n);
619 err =
620 v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended);
621 if (err < 0) {
622 goto error;
623 }
624
625 if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
626 char buf[150];
627
628 v9fs_printfcall(buf, sizeof(buf), m->rcall,
629 *m->extended);
630 printk(KERN_NOTICE ">>> %p %s\n", m, buf);
631 }
632
633 rcall = m->rcall;
634 rbuf = m->rbuf;
635 if (m->rpos > n) {
636 m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize,
637 GFP_KERNEL);
638 if (!m->rcall) {
639 err = -ENOMEM;
640 goto error;
641 }
642
643 m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
644 memmove(m->rbuf, rbuf + n, m->rpos - n);
645 m->rpos -= n;
646 } else {
647 m->rcall = NULL;
648 m->rbuf = NULL;
649 m->rpos = 0;
650 }
651
652 dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id,
653 rcall->tag);
654
655 req = NULL;
656 spin_lock(&m->lock);
657 list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
658 if (rreq->tag == rcall->tag) {
659 req = rreq;
660 if (req->flush != Flushing)
661 list_del(&req->req_list);
662 break;
663 }
664 }
665 spin_unlock(&m->lock);
666
667 if (req) {
668 req->rcall = rcall;
669 process_request(m, req);
670
671 if (req->flush != Flushing) {
672 if (req->cb)
673 (*req->cb) (req, req->cba);
674 else
675 kfree(req->rcall);
676
677 wake_up(&m->equeue);
678 }
679 } else {
680 if (err >= 0 && rcall->id != RFLUSH)
681 dprintk(DEBUG_ERROR,
682 "unexpected response mux %p id %d tag %d\n",
683 m, rcall->id, rcall->tag);
684 kfree(rcall);
685 }
686 }
687
688 if (!list_empty(&m->req_list)) {
689 if (test_and_clear_bit(Rpending, &m->wsched))
690 n = POLLIN;
691 else
692 n = m->trans->poll(m->trans, NULL);
693
694 if (n & POLLIN) {
695 dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
696 queue_work(v9fs_mux_wq, &m->rq);
697 } else
698 clear_bit(Rworksched, &m->wsched);
699 } else
700 clear_bit(Rworksched, &m->wsched);
701
702 return;
703
704 error:
705 v9fs_mux_cancel(m, err);
706 clear_bit(Rworksched, &m->wsched);
707}
708
709/**
710 * v9fs_send_request - send 9P request
711 * The function can sleep until the request is scheduled for sending.
712 * The function can be interrupted. Return from the function is not
713 * a guarantee that the request is sent successfully. Can return errors
714 * that can be retrieved by PTR_ERR macros.
715 *
716 * @m: mux data
717 * @tc: request to be sent
718 * @cb: callback function to call when response is received
719 * @cba: parameter to pass to the callback function
720 */
721static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
722 struct v9fs_fcall *tc,
723 v9fs_mux_req_callback cb, void *cba)
724{
725 int n;
726 struct v9fs_req *req;
727
728 dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
729 tc, tc->id);
730 if (m->err < 0)
731 return ERR_PTR(m->err);
732
733 req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL);
734 if (!req)
735 return ERR_PTR(-ENOMEM);
736
737 if (tc->id == TVERSION)
738 n = V9FS_NOTAG;
739 else
740 n = v9fs_mux_get_tag(m);
741
742 if (n < 0)
743 return ERR_PTR(-ENOMEM);
744
745 v9fs_set_tag(tc, n);
746 if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
747 char buf[150];
748
749 v9fs_printfcall(buf, sizeof(buf), tc, *m->extended);
750 printk(KERN_NOTICE "<<< %p %s\n", m, buf);
751 }
752
753 spin_lock_init(&req->lock);
754 req->tag = n;
755 req->tcall = tc;
756 req->rcall = NULL;
757 req->err = 0;
758 req->cb = cb;
759 req->cba = cba;
760 req->flush = None;
761
762 spin_lock(&m->lock);
763 list_add_tail(&req->req_list, &m->unsent_req_list);
764 spin_unlock(&m->lock);
765
766 if (test_and_clear_bit(Wpending, &m->wsched))
767 n = POLLOUT;
768 else
769 n = m->trans->poll(m->trans, NULL);
770
771 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
772 queue_work(v9fs_mux_wq, &m->wq);
773
774 return req;
775}
776
777static void v9fs_mux_free_request(struct v9fs_mux_data *m, struct v9fs_req *req)
778{
779 v9fs_mux_put_tag(m, req->tag);
780 kfree(req);
781}
782
783static void v9fs_mux_flush_cb(struct v9fs_req *freq, void *a)
784{
785 v9fs_mux_req_callback cb;
786 int tag;
787 struct v9fs_mux_data *m;
788 struct v9fs_req *req, *rreq, *rptr;
789
790 m = a;
791 dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
792 freq->tcall, freq->rcall, freq->err,
793 freq->tcall->params.tflush.oldtag);
794
795 spin_lock(&m->lock);
796 cb = NULL;
797 tag = freq->tcall->params.tflush.oldtag;
798 req = NULL;
799 list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
800 if (rreq->tag == tag) {
801 req = rreq;
802 list_del(&req->req_list);
803 break;
804 }
805 }
806 spin_unlock(&m->lock);
807
808 if (req) {
809 spin_lock(&req->lock);
810 req->flush = Flushed;
811 spin_unlock(&req->lock);
812
813 if (req->cb)
814 (*req->cb) (req, req->cba);
815 else
816 kfree(req->rcall);
817
818 wake_up(&m->equeue);
819 }
820
821 kfree(freq->tcall);
822 kfree(freq->rcall);
823 v9fs_mux_free_request(m, freq);
824}
825
826static int
827v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
828{
829 struct v9fs_fcall *fc;
830 struct v9fs_req *rreq, *rptr;
831
832 dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
833
834 /* if a response was received for a request, do nothing */
835 spin_lock(&req->lock);
836 if (req->rcall || req->err) {
837 spin_unlock(&req->lock);
838 dprintk(DEBUG_MUX, "mux %p req %p response already received\n", m, req);
839 return 0;
840 }
841
842 req->flush = Flushing;
843 spin_unlock(&req->lock);
844
845 spin_lock(&m->lock);
846 /* if the request is not sent yet, just remove it from the list */
847 list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) {
848 if (rreq->tag == req->tag) {
849 dprintk(DEBUG_MUX, "mux %p req %p request is not sent yet\n", m, req);
850 list_del(&rreq->req_list);
851 req->flush = Flushed;
852 spin_unlock(&m->lock);
853 if (req->cb)
854 (*req->cb) (req, req->cba);
855 return 0;
856 }
857 }
858 spin_unlock(&m->lock);
859
860 clear_thread_flag(TIF_SIGPENDING);
861 fc = v9fs_create_tflush(req->tag);
862 v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
863 return 1;
864}
865
866static void
867v9fs_mux_rpc_cb(struct v9fs_req *req, void *a)
868{
869 struct v9fs_mux_rpc *r;
870
871 dprintk(DEBUG_MUX, "req %p r %p\n", req, a);
872 r = a;
873 r->rcall = req->rcall;
874 r->err = req->err;
875
876 if (req->flush!=None && !req->err)
877 r->err = -ERESTARTSYS;
878
879 wake_up(&r->wqueue);
880}
881
882/**
883 * v9fs_mux_rpc - sends 9P request and waits until a response is available.
884 * The function can be interrupted.
885 * @m: mux data
886 * @tc: request to be sent
887 * @rc: pointer where a pointer to the response is stored
888 */
889int
890v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
891 struct v9fs_fcall **rc)
892{
893 int err, sigpending;
894 unsigned long flags;
895 struct v9fs_req *req;
896 struct v9fs_mux_rpc r;
897
898 r.err = 0;
899 r.tcall = tc;
900 r.rcall = NULL;
901 r.m = m;
902 init_waitqueue_head(&r.wqueue);
903
904 if (rc)
905 *rc = NULL;
906
907 sigpending = 0;
908 if (signal_pending(current)) {
909 sigpending = 1;
910 clear_thread_flag(TIF_SIGPENDING);
911 }
912
913 req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
914 if (IS_ERR(req)) {
915 err = PTR_ERR(req);
916 dprintk(DEBUG_MUX, "error %d\n", err);
917 return err;
918 }
919
920 err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
921 if (r.err < 0)
922 err = r.err;
923
924 if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
925 if (v9fs_mux_flush_request(m, req)) {
926 /* wait until we get response of the flush message */
927 do {
928 clear_thread_flag(TIF_SIGPENDING);
929 err = wait_event_interruptible(r.wqueue,
930 r.rcall || r.err);
931 } while (!r.rcall && !r.err && err==-ERESTARTSYS &&
932 m->trans->status==Connected && !m->err);
933
934 err = -ERESTARTSYS;
935 }
936 sigpending = 1;
937 }
938
939 if (sigpending) {
940 spin_lock_irqsave(&current->sighand->siglock, flags);
941 recalc_sigpending();
942 spin_unlock_irqrestore(&current->sighand->siglock, flags);
943 }
944
945 if (rc)
946 *rc = r.rcall;
947 else
948 kfree(r.rcall);
949
950 v9fs_mux_free_request(m, req);
951 if (err > 0)
952 err = -EIO;
953
954 return err;
955}
956
957#if 0
958/**
959 * v9fs_mux_rpcnb - sends 9P request without waiting for response.
960 * @m: mux data
961 * @tc: request to be sent
962 * @cb: callback function to be called when response arrives
963 * @cba: value to pass to the callback function
964 */
965int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
966 v9fs_mux_req_callback cb, void *a)
967{
968 int err;
969 struct v9fs_req *req;
970
971 req = v9fs_send_request(m, tc, cb, a);
972 if (IS_ERR(req)) {
973 err = PTR_ERR(req);
974 dprintk(DEBUG_MUX, "error %d\n", err);
975 return PTR_ERR(req);
976 }
977
978 dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
979 return 0;
980}
981#endif /* 0 */
982
983/**
984 * v9fs_mux_cancel - cancel all pending requests with error
985 * @m: mux data
986 * @err: error code
987 */
988void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
989{
990 struct v9fs_req *req, *rtmp;
991 LIST_HEAD(cancel_list);
992
993 dprintk(DEBUG_ERROR, "mux %p err %d\n", m, err);
994 m->err = err;
995 spin_lock(&m->lock);
996 list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
997 list_move(&req->req_list, &cancel_list);
998 }
999 list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
1000 list_move(&req->req_list, &cancel_list);
1001 }
1002 spin_unlock(&m->lock);
1003
1004 list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
1005 list_del(&req->req_list);
1006 if (!req->err)
1007 req->err = err;
1008
1009 if (req->cb)
1010 (*req->cb) (req, req->cba);
1011 else
1012 kfree(req->rcall);
1013 }
1014
1015 wake_up(&m->equeue);
1016}
1017
1018static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
1019{
1020 int tag;
1021
1022 tag = v9fs_get_idpool(&m->tagpool);
1023 if (tag < 0)
1024 return V9FS_NOTAG;
1025 else
1026 return (u16) tag;
1027}
1028
1029static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
1030{
1031 if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tagpool))
1032 v9fs_put_idpool(tag, &m->tagpool);
1033}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
deleted file mode 100644
index fb10c50186a..00000000000
--- a/fs/9p/mux.h
+++ /dev/null
@@ -1,55 +0,0 @@
1/*
2 * linux/fs/9p/mux.h
3 *
4 * Multiplexer Definitions
5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26struct v9fs_mux_data;
27struct v9fs_req;
28
29/**
30 * v9fs_mux_req_callback - callback function that is called when the
31 * response of a request is received. The callback is called from
32 * a workqueue and shouldn't block.
33 *
34 * @a - the pointer that was specified when the request was send to be
35 * passed to the callback
36 * @tc - request call
37 * @rc - response call
38 * @err - error code (non-zero if error occured)
39 */
40typedef void (*v9fs_mux_req_callback)(struct v9fs_req *req, void *a);
41
42int v9fs_mux_global_init(void);
43void v9fs_mux_global_exit(void);
44
45struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
46 unsigned char *extended);
47void v9fs_mux_destroy(struct v9fs_mux_data *);
48
49int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
50struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
51int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
52
53void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
54void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
55int v9fs_errstr2errno(char *errstr, int len);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
deleted file mode 100644
index 34d43355beb..00000000000
--- a/fs/9p/trans_fd.c
+++ /dev/null
@@ -1,308 +0,0 @@
1/*
2 * linux/fs/9p/trans_fd.c
3 *
4 * Fd transport layer. Includes deprecated socket layer.
5 *
6 * Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
7 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
8 * Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com>
9 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2
13 * as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to:
22 * Free Software Foundation
23 * 51 Franklin Street, Fifth Floor
24 * Boston, MA 02111-1301 USA
25 *
26 */
27
28#include <linux/in.h>
29#include <linux/module.h>
30#include <linux/net.h>
31#include <linux/ipv6.h>
32#include <linux/errno.h>
33#include <linux/kernel.h>
34#include <linux/un.h>
35#include <asm/uaccess.h>
36#include <linux/inet.h>
37#include <linux/idr.h>
38#include <linux/file.h>
39
40#include "debug.h"
41#include "v9fs.h"
42#include "transport.h"
43
44#define V9FS_PORT 564
45
46struct v9fs_trans_fd {
47 struct file *rd;
48 struct file *wr;
49};
50
51/**
52 * v9fs_fd_read- read from a fd
53 * @v9ses: session information
54 * @v: buffer to receive data into
55 * @len: size of receive buffer
56 *
57 */
58static int v9fs_fd_read(struct v9fs_transport *trans, void *v, int len)
59{
60 int ret;
61 struct v9fs_trans_fd *ts;
62
63 if (!trans || trans->status == Disconnected || !(ts = trans->priv))
64 return -EREMOTEIO;
65
66 if (!(ts->rd->f_flags & O_NONBLOCK))
67 dprintk(DEBUG_ERROR, "blocking read ...\n");
68
69 ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
70 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
71 trans->status = Disconnected;
72 return ret;
73}
74
75/**
76 * v9fs_fd_write - write to a socket
77 * @v9ses: session information
78 * @v: buffer to send data from
79 * @len: size of send buffer
80 *
81 */
82static int v9fs_fd_write(struct v9fs_transport *trans, void *v, int len)
83{
84 int ret;
85 mm_segment_t oldfs;
86 struct v9fs_trans_fd *ts;
87
88 if (!trans || trans->status == Disconnected || !(ts = trans->priv))
89 return -EREMOTEIO;
90
91 if (!(ts->wr->f_flags & O_NONBLOCK))
92 dprintk(DEBUG_ERROR, "blocking write ...\n");
93
94 oldfs = get_fs();
95 set_fs(get_ds());
96 /* The cast to a user pointer is valid due to the set_fs() */
97 ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
98 set_fs(oldfs);
99
100 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
101 trans->status = Disconnected;
102 return ret;
103}
104
105static unsigned int
106v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
107{
108 int ret, n;
109 struct v9fs_trans_fd *ts;
110 mm_segment_t oldfs;
111
112 if (!trans || trans->status != Connected || !(ts = trans->priv))
113 return -EREMOTEIO;
114
115 if (!ts->rd->f_op || !ts->rd->f_op->poll)
116 return -EIO;
117
118 if (!ts->wr->f_op || !ts->wr->f_op->poll)
119 return -EIO;
120
121 oldfs = get_fs();
122 set_fs(get_ds());
123
124 ret = ts->rd->f_op->poll(ts->rd, pt);
125 if (ret < 0)
126 goto end;
127
128 if (ts->rd != ts->wr) {
129 n = ts->wr->f_op->poll(ts->wr, pt);
130 if (n < 0) {
131 ret = n;
132 goto end;
133 }
134 ret = (ret & ~POLLOUT) | (n & ~POLLIN);
135 }
136
137 end:
138 set_fs(oldfs);
139 return ret;
140}
141
142static int v9fs_fd_open(struct v9fs_session_info *v9ses, int rfd, int wfd)
143{
144 struct v9fs_transport *trans = v9ses->transport;
145 struct v9fs_trans_fd *ts = kmalloc(sizeof(struct v9fs_trans_fd),
146 GFP_KERNEL);
147 if (!ts)
148 return -ENOMEM;
149
150 ts->rd = fget(rfd);
151 ts->wr = fget(wfd);
152 if (!ts->rd || !ts->wr) {
153 if (ts->rd)
154 fput(ts->rd);
155 if (ts->wr)
156 fput(ts->wr);
157 kfree(ts);
158 return -EIO;
159 }
160
161 trans->priv = ts;
162 trans->status = Connected;
163
164 return 0;
165}
166
167static int v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr,
168 char *data)
169{
170 if (v9ses->rfdno == ~0 || v9ses->wfdno == ~0) {
171 printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
172 return -ENOPROTOOPT;
173 }
174
175 return v9fs_fd_open(v9ses, v9ses->rfdno, v9ses->wfdno);
176}
177
178static int v9fs_socket_open(struct v9fs_session_info *v9ses,
179 struct socket *csocket)
180{
181 int fd, ret;
182
183 csocket->sk->sk_allocation = GFP_NOIO;
184 if ((fd = sock_map_fd(csocket)) < 0) {
185 eprintk(KERN_ERR, "v9fs_socket_open: failed to map fd\n");
186 ret = fd;
187 release_csocket:
188 sock_release(csocket);
189 return ret;
190 }
191
192 if ((ret = v9fs_fd_open(v9ses, fd, fd)) < 0) {
193 sockfd_put(csocket);
194 eprintk(KERN_ERR, "v9fs_socket_open: failed to open fd\n");
195 goto release_csocket;
196 }
197
198 ((struct v9fs_trans_fd *)v9ses->transport->priv)->rd->f_flags |=
199 O_NONBLOCK;
200 return 0;
201}
202
203static int v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr,
204 char *data)
205{
206 int ret;
207 struct socket *csocket = NULL;
208 struct sockaddr_in sin_server;
209
210 sin_server.sin_family = AF_INET;
211 sin_server.sin_addr.s_addr = in_aton(addr);
212 sin_server.sin_port = htons(v9ses->port);
213 sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
214
215 if (!csocket) {
216 eprintk(KERN_ERR, "v9fs_trans_tcp: problem creating socket\n");
217 return -1;
218 }
219
220 ret = csocket->ops->connect(csocket,
221 (struct sockaddr *)&sin_server,
222 sizeof(struct sockaddr_in), 0);
223 if (ret < 0) {
224 eprintk(KERN_ERR,
225 "v9fs_trans_tcp: problem connecting socket to %s\n",
226 addr);
227 return ret;
228 }
229
230 return v9fs_socket_open(v9ses, csocket);
231}
232
233static int
234v9fs_unix_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
235{
236 int ret;
237 struct socket *csocket;
238 struct sockaddr_un sun_server;
239
240 if (strlen(addr) > UNIX_PATH_MAX) {
241 eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
242 addr);
243 return -ENAMETOOLONG;
244 }
245
246 sun_server.sun_family = PF_UNIX;
247 strcpy(sun_server.sun_path, addr);
248 sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
249 ret = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
250 sizeof(struct sockaddr_un) - 1, 0);
251 if (ret < 0) {
252 eprintk(KERN_ERR,
253 "v9fs_trans_unix: problem connecting socket: %s: %d\n",
254 addr, ret);
255 return ret;
256 }
257
258 return v9fs_socket_open(v9ses, csocket);
259}
260
261/**
262 * v9fs_sock_close - shutdown socket
263 * @trans: private socket structure
264 *
265 */
266static void v9fs_fd_close(struct v9fs_transport *trans)
267{
268 struct v9fs_trans_fd *ts;
269
270 if (!trans)
271 return;
272
273 ts = xchg(&trans->priv, NULL);
274
275 if (!ts)
276 return;
277
278 trans->status = Disconnected;
279 if (ts->rd)
280 fput(ts->rd);
281 if (ts->wr)
282 fput(ts->wr);
283 kfree(ts);
284}
285
286struct v9fs_transport v9fs_trans_fd = {
287 .init = v9fs_fd_init,
288 .write = v9fs_fd_write,
289 .read = v9fs_fd_read,
290 .close = v9fs_fd_close,
291 .poll = v9fs_fd_poll,
292};
293
294struct v9fs_transport v9fs_trans_tcp = {
295 .init = v9fs_tcp_init,
296 .write = v9fs_fd_write,
297 .read = v9fs_fd_read,
298 .close = v9fs_fd_close,
299 .poll = v9fs_fd_poll,
300};
301
302struct v9fs_transport v9fs_trans_unix = {
303 .init = v9fs_unix_init,
304 .write = v9fs_fd_write,
305 .read = v9fs_fd_read,
306 .close = v9fs_fd_close,
307 .poll = v9fs_fd_poll,
308};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
deleted file mode 100644
index b38a4b8a41c..00000000000
--- a/fs/9p/transport.h
+++ /dev/null
@@ -1,45 +0,0 @@
1/*
2 * linux/fs/9p/transport.h
3 *
4 * Transport Definition
5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26enum v9fs_transport_status {
27 Connected,
28 Disconnected,
29 Hung,
30};
31
32struct v9fs_transport {
33 enum v9fs_transport_status status;
34 void *priv;
35
36 int (*init) (struct v9fs_session_info *, const char *, char *);
37 int (*write) (struct v9fs_transport *, void *, int);
38 int (*read) (struct v9fs_transport *, void *, int);
39 void (*close) (struct v9fs_transport *);
40 unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *);
41};
42
43extern struct v9fs_transport v9fs_trans_tcp;
44extern struct v9fs_transport v9fs_trans_unix;
45extern struct v9fs_transport v9fs_trans_fd;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 6ad6f192b6e..45c35986d49 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -29,16 +29,12 @@
29#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/parser.h> 30#include <linux/parser.h>
31#include <linux/idr.h> 31#include <linux/idr.h>
32 32#include <net/9p/9p.h>
33#include "debug.h" 33#include <net/9p/transport.h>
34#include <net/9p/conn.h>
35#include <net/9p/client.h>
34#include "v9fs.h" 36#include "v9fs.h"
35#include "9p.h"
36#include "v9fs_vfs.h" 37#include "v9fs_vfs.h"
37#include "transport.h"
38#include "mux.h"
39
40/* TODO: sysfs or debugfs interface */
41int v9fs_debug_level = 0; /* feature-rific global debug level */
42 38
43/* 39/*
44 * Option Parsing (code inspired by NFS code) 40 * Option Parsing (code inspired by NFS code)
@@ -47,12 +43,12 @@ int v9fs_debug_level = 0; /* feature-rific global debug level */
47 43
48enum { 44enum {
49 /* Options that take integer arguments */ 45 /* Options that take integer arguments */
50 Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug, 46 Opt_debug, Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid,
51 Opt_rfdno, Opt_wfdno, 47 Opt_rfdno, Opt_wfdno,
52 /* String options */ 48 /* String options */
53 Opt_uname, Opt_remotename, 49 Opt_uname, Opt_remotename,
54 /* Options that take no arguments */ 50 /* Options that take no arguments */
55 Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, 51 Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, Opt_pci,
56 /* Cache options */ 52 /* Cache options */
57 Opt_cache_loose, 53 Opt_cache_loose,
58 /* Error token */ 54 /* Error token */
@@ -60,6 +56,7 @@ enum {
60}; 56};
61 57
62static match_table_t tokens = { 58static match_table_t tokens = {
59 {Opt_debug, "debug=%x"},
63 {Opt_port, "port=%u"}, 60 {Opt_port, "port=%u"},
64 {Opt_msize, "msize=%u"}, 61 {Opt_msize, "msize=%u"},
65 {Opt_uid, "uid=%u"}, 62 {Opt_uid, "uid=%u"},
@@ -67,12 +64,14 @@ static match_table_t tokens = {
67 {Opt_afid, "afid=%u"}, 64 {Opt_afid, "afid=%u"},
68 {Opt_rfdno, "rfdno=%u"}, 65 {Opt_rfdno, "rfdno=%u"},
69 {Opt_wfdno, "wfdno=%u"}, 66 {Opt_wfdno, "wfdno=%u"},
70 {Opt_debug, "debug=%x"},
71 {Opt_uname, "uname=%s"}, 67 {Opt_uname, "uname=%s"},
72 {Opt_remotename, "aname=%s"}, 68 {Opt_remotename, "aname=%s"},
73 {Opt_unix, "proto=unix"}, 69 {Opt_unix, "proto=unix"},
74 {Opt_tcp, "proto=tcp"}, 70 {Opt_tcp, "proto=tcp"},
75 {Opt_fd, "proto=fd"}, 71 {Opt_fd, "proto=fd"},
72#ifdef CONFIG_PCI_9P
73 {Opt_pci, "proto=pci"},
74#endif
76 {Opt_tcp, "tcp"}, 75 {Opt_tcp, "tcp"},
77 {Opt_unix, "unix"}, 76 {Opt_unix, "unix"},
78 {Opt_fd, "fd"}, 77 {Opt_fd, "fd"},
@@ -83,6 +82,8 @@ static match_table_t tokens = {
83 {Opt_err, NULL} 82 {Opt_err, NULL}
84}; 83};
85 84
85extern struct p9_transport *p9pci_trans_create(void);
86
86/* 87/*
87 * Parse option string. 88 * Parse option string.
88 */ 89 */
@@ -122,12 +123,16 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
122 token = match_token(p, tokens, args); 123 token = match_token(p, tokens, args);
123 if (token < Opt_uname) { 124 if (token < Opt_uname) {
124 if ((ret = match_int(&args[0], &option)) < 0) { 125 if ((ret = match_int(&args[0], &option)) < 0) {
125 dprintk(DEBUG_ERROR, 126 P9_DPRINTK(P9_DEBUG_ERROR,
126 "integer field, but no integer?\n"); 127 "integer field, but no integer?\n");
127 continue; 128 continue;
128 } 129 }
129 } 130 }
130 switch (token) { 131 switch (token) {
132 case Opt_debug:
133 v9ses->debug = option;
134 p9_debug_level = option;
135 break;
131 case Opt_port: 136 case Opt_port:
132 v9ses->port = option; 137 v9ses->port = option;
133 break; 138 break;
@@ -149,15 +154,15 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
149 case Opt_wfdno: 154 case Opt_wfdno:
150 v9ses->wfdno = option; 155 v9ses->wfdno = option;
151 break; 156 break;
152 case Opt_debug:
153 v9ses->debug = option;
154 break;
155 case Opt_tcp: 157 case Opt_tcp:
156 v9ses->proto = PROTO_TCP; 158 v9ses->proto = PROTO_TCP;
157 break; 159 break;
158 case Opt_unix: 160 case Opt_unix:
159 v9ses->proto = PROTO_UNIX; 161 v9ses->proto = PROTO_UNIX;
160 break; 162 break;
163 case Opt_pci:
164 v9ses->proto = PROTO_PCI;
165 break;
161 case Opt_fd: 166 case Opt_fd:
162 v9ses->proto = PROTO_FD; 167 v9ses->proto = PROTO_FD;
163 break; 168 break;
@@ -183,82 +188,6 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
183} 188}
184 189
185/** 190/**
186 * v9fs_inode2v9ses - safely extract v9fs session info from super block
187 * @inode: inode to extract information from
188 *
189 * Paranoid function to extract v9ses information from superblock,
190 * if anything is missing it will report an error.
191 *
192 */
193
194struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
195{
196 return (inode->i_sb->s_fs_info);
197}
198
199/**
200 * v9fs_get_idpool - allocate numeric id from pool
201 * @p - pool to allocate from
202 *
203 * XXX - This seems to be an awful generic function, should it be in idr.c with
204 * the lock included in struct idr?
205 */
206
207int v9fs_get_idpool(struct v9fs_idpool *p)
208{
209 int i = 0;
210 int error;
211
212retry:
213 if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
214 return 0;
215
216 if (down_interruptible(&p->lock) == -EINTR) {
217 eprintk(KERN_WARNING, "Interrupted while locking\n");
218 return -1;
219 }
220
221 /* no need to store exactly p, we just need something non-null */
222 error = idr_get_new(&p->pool, p, &i);
223 up(&p->lock);
224
225 if (error == -EAGAIN)
226 goto retry;
227 else if (error)
228 return -1;
229
230 return i;
231}
232
233/**
234 * v9fs_put_idpool - release numeric id from pool
235 * @p - pool to allocate from
236 *
237 * XXX - This seems to be an awful generic function, should it be in idr.c with
238 * the lock included in struct idr?
239 */
240
241void v9fs_put_idpool(int id, struct v9fs_idpool *p)
242{
243 if (down_interruptible(&p->lock) == -EINTR) {
244 eprintk(KERN_WARNING, "Interrupted while locking\n");
245 return;
246 }
247 idr_remove(&p->pool, id);
248 up(&p->lock);
249}
250
251/**
252 * v9fs_check_idpool - check if the specified id is available
253 * @id - id to check
254 * @p - pool
255 */
256int v9fs_check_idpool(int id, struct v9fs_idpool *p)
257{
258 return idr_find(&p->pool, id) != NULL;
259}
260
261/**
262 * v9fs_session_init - initialize session 191 * v9fs_session_init - initialize session
263 * @v9ses: session information structure 192 * @v9ses: session information structure
264 * @dev_name: device being mounted 193 * @dev_name: device being mounted
@@ -266,25 +195,21 @@ int v9fs_check_idpool(int id, struct v9fs_idpool *p)
266 * 195 *
267 */ 196 */
268 197
269int 198struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
270v9fs_session_init(struct v9fs_session_info *v9ses,
271 const char *dev_name, char *data) 199 const char *dev_name, char *data)
272{ 200{
273 struct v9fs_fcall *fcall = NULL;
274 struct v9fs_transport *trans_proto;
275 int n = 0;
276 int newfid = -1;
277 int retval = -EINVAL; 201 int retval = -EINVAL;
278 struct v9fs_str *version; 202 struct p9_transport *trans;
203 struct p9_fid *fid;
279 204
280 v9ses->name = __getname(); 205 v9ses->name = __getname();
281 if (!v9ses->name) 206 if (!v9ses->name)
282 return -ENOMEM; 207 return ERR_PTR(-ENOMEM);
283 208
284 v9ses->remotename = __getname(); 209 v9ses->remotename = __getname();
285 if (!v9ses->remotename) { 210 if (!v9ses->remotename) {
286 __putname(v9ses->name); 211 __putname(v9ses->name);
287 return -ENOMEM; 212 return ERR_PTR(-ENOMEM);
288 } 213 }
289 214
290 strcpy(v9ses->name, V9FS_DEFUSER); 215 strcpy(v9ses->name, V9FS_DEFUSER);
@@ -292,130 +217,60 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
292 217
293 v9fs_parse_options(data, v9ses); 218 v9fs_parse_options(data, v9ses);
294 219
295 /* set global debug level */
296 v9fs_debug_level = v9ses->debug;
297
298 /* id pools that are session-dependent: fids and tags */
299 idr_init(&v9ses->fidpool.pool);
300 init_MUTEX(&v9ses->fidpool.lock);
301
302 switch (v9ses->proto) { 220 switch (v9ses->proto) {
303 case PROTO_TCP: 221 case PROTO_TCP:
304 trans_proto = &v9fs_trans_tcp; 222 trans = p9_trans_create_tcp(dev_name, v9ses->port);
305 break; 223 break;
306 case PROTO_UNIX: 224 case PROTO_UNIX:
307 trans_proto = &v9fs_trans_unix; 225 trans = p9_trans_create_unix(dev_name);
308 *v9ses->remotename = 0; 226 *v9ses->remotename = 0;
309 break; 227 break;
310 case PROTO_FD: 228 case PROTO_FD:
311 trans_proto = &v9fs_trans_fd; 229 trans = p9_trans_create_fd(v9ses->rfdno, v9ses->wfdno);
312 *v9ses->remotename = 0; 230 *v9ses->remotename = 0;
313 break; 231 break;
232#ifdef CONFIG_PCI_9P
233 case PROTO_PCI:
234 trans = p9pci_trans_create();
235 *v9ses->remotename = 0;
236 break;
237#endif
314 default: 238 default:
315 printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto); 239 printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
316 retval = -ENOPROTOOPT; 240 retval = -ENOPROTOOPT;
317 goto SessCleanUp; 241 goto error;
318 }; 242 };
319 243
320 v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL); 244 if (IS_ERR(trans)) {
321 if (!v9ses->transport) { 245 retval = PTR_ERR(trans);
322 retval = -ENOMEM; 246 trans = NULL;
323 goto SessCleanUp; 247 goto error;
324 } 248 }
325 249
326 memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport)); 250 v9ses->clnt = p9_client_create(trans, v9ses->maxdata + P9_IOHDRSZ,
251 v9ses->extended);
327 252
328 if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) { 253 if (IS_ERR(v9ses->clnt)) {
329 eprintk(KERN_ERR, "problem initializing transport\n"); 254 retval = PTR_ERR(v9ses->clnt);
330 goto SessCleanUp; 255 v9ses->clnt = NULL;
256 P9_DPRINTK(P9_DEBUG_ERROR, "problem initializing 9p client\n");
257 goto error;
331 } 258 }
332 259
333 v9ses->inprogress = 0; 260 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->name,
334 v9ses->shutdown = 0; 261 v9ses->remotename);
335 v9ses->session_hung = 0; 262 if (IS_ERR(fid)) {
336 263 retval = PTR_ERR(fid);
337 v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ, 264 fid = NULL;
338 &v9ses->extended); 265 P9_DPRINTK(P9_DEBUG_ERROR, "cannot attach\n");
339 266 goto error;
340 if (IS_ERR(v9ses->mux)) {
341 retval = PTR_ERR(v9ses->mux);
342 v9ses->mux = NULL;
343 dprintk(DEBUG_ERROR, "problem initializing mux\n");
344 goto SessCleanUp;
345 } 267 }
346 268
347 if (v9ses->afid == ~0) { 269 return fid;
348 if (v9ses->extended)
349 retval =
350 v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u",
351 &fcall);
352 else
353 retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000",
354 &fcall);
355
356 if (retval < 0) {
357 dprintk(DEBUG_ERROR, "v9fs_t_version failed\n");
358 goto FreeFcall;
359 }
360
361 version = &fcall->params.rversion.version;
362 if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) {
363 dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
364 v9ses->extended = 1;
365 } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) {
366 dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
367 v9ses->extended = 0;
368 } else {
369 retval = -EREMOTEIO;
370 goto FreeFcall;
371 }
372 270
373 n = fcall->params.rversion.msize; 271error:
374 kfree(fcall);
375
376 if (n < v9ses->maxdata)
377 v9ses->maxdata = n;
378 }
379
380 newfid = v9fs_get_idpool(&v9ses->fidpool);
381 if (newfid < 0) {
382 eprintk(KERN_WARNING, "couldn't allocate FID\n");
383 retval = -ENOMEM;
384 goto SessCleanUp;
385 }
386 /* it is a little bit ugly, but we have to prevent newfid */
387 /* being the same as afid, so if it is, get a new fid */
388 if (v9ses->afid != ~0 && newfid == v9ses->afid) {
389 newfid = v9fs_get_idpool(&v9ses->fidpool);
390 if (newfid < 0) {
391 eprintk(KERN_WARNING, "couldn't allocate FID\n");
392 retval = -ENOMEM;
393 goto SessCleanUp;
394 }
395 }
396
397 if ((retval =
398 v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid,
399 v9ses->afid, NULL))
400 < 0) {
401 dprintk(DEBUG_ERROR, "cannot attach\n");
402 goto SessCleanUp;
403 }
404
405 if (v9ses->afid != ~0) {
406 dprintk(DEBUG_ERROR, "afid not equal to ~0\n");
407 if (v9fs_t_clunk(v9ses, v9ses->afid))
408 dprintk(DEBUG_ERROR, "clunk failed\n");
409 }
410
411 return newfid;
412
413 FreeFcall:
414 kfree(fcall);
415
416 SessCleanUp:
417 v9fs_session_close(v9ses); 272 v9fs_session_close(v9ses);
418 return retval; 273 return ERR_PTR(retval);
419} 274}
420 275
421/** 276/**
@@ -426,15 +281,9 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
426 281
427void v9fs_session_close(struct v9fs_session_info *v9ses) 282void v9fs_session_close(struct v9fs_session_info *v9ses)
428{ 283{
429 if (v9ses->mux) { 284 if (v9ses->clnt) {
430 v9fs_mux_destroy(v9ses->mux); 285 p9_client_destroy(v9ses->clnt);
431 v9ses->mux = NULL; 286 v9ses->clnt = NULL;
432 }
433
434 if (v9ses->transport) {
435 v9ses->transport->close(v9ses->transport);
436 kfree(v9ses->transport);
437 v9ses->transport = NULL;
438 } 287 }
439 288
440 __putname(v9ses->name); 289 __putname(v9ses->name);
@@ -446,9 +295,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
446 * and cancel all pending requests. 295 * and cancel all pending requests.
447 */ 296 */
448void v9fs_session_cancel(struct v9fs_session_info *v9ses) { 297void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
449 dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses); 298 P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses);
450 v9ses->transport->status = Disconnected; 299 p9_client_disconnect(v9ses->clnt);
451 v9fs_mux_cancel(v9ses->mux, -EIO);
452} 300}
453 301
454extern int v9fs_error_init(void); 302extern int v9fs_error_init(void);
@@ -460,24 +308,9 @@ extern int v9fs_error_init(void);
460 308
461static int __init init_v9fs(void) 309static int __init init_v9fs(void)
462{ 310{
463 int ret;
464
465 v9fs_error_init();
466
467 printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); 311 printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
468 312
469 ret = v9fs_mux_global_init(); 313 return register_filesystem(&v9fs_fs_type);
470 if (ret) {
471 printk(KERN_WARNING "v9fs: starting mux failed\n");
472 return ret;
473 }
474 ret = register_filesystem(&v9fs_fs_type);
475 if (ret) {
476 printk(KERN_WARNING "v9fs: registering file system failed\n");
477 v9fs_mux_global_exit();
478 }
479
480 return ret;
481} 314}
482 315
483/** 316/**
@@ -487,13 +320,13 @@ static int __init init_v9fs(void)
487 320
488static void __exit exit_v9fs(void) 321static void __exit exit_v9fs(void)
489{ 322{
490 v9fs_mux_global_exit();
491 unregister_filesystem(&v9fs_fs_type); 323 unregister_filesystem(&v9fs_fs_type);
492} 324}
493 325
494module_init(init_v9fs) 326module_init(init_v9fs)
495module_exit(exit_v9fs) 327module_exit(exit_v9fs)
496 328
329MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>");
497MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); 330MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
498MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>"); 331MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
499MODULE_LICENSE("GPL"); 332MODULE_LICENSE("GPL");
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 820bf5ca35d..abc4b1668ac 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -22,16 +22,6 @@
22 */ 22 */
23 23
24/* 24/*
25 * Idpool structure provides lock and id management
26 *
27 */
28
29struct v9fs_idpool {
30 struct semaphore lock;
31 struct idr pool;
32};
33
34/*
35 * Session structure provides information for an opened session 25 * Session structure provides information for an opened session
36 * 26 *
37 */ 27 */
@@ -54,15 +44,7 @@ struct v9fs_session_info {
54 unsigned int uid; /* default uid/muid for legacy support */ 44 unsigned int uid; /* default uid/muid for legacy support */
55 unsigned int gid; /* default gid for legacy support */ 45 unsigned int gid; /* default gid for legacy support */
56 46
57 /* book keeping */ 47 struct p9_client *clnt; /* 9p client */
58 struct v9fs_idpool fidpool; /* The FID pool for file descriptors */
59
60 struct v9fs_transport *transport;
61 struct v9fs_mux_data *mux;
62
63 int inprogress; /* session in progress => true */
64 int shutdown; /* session shutting down. no more attaches. */
65 unsigned char session_hung;
66 struct dentry *debugfs_dir; 48 struct dentry *debugfs_dir;
67}; 49};
68 50
@@ -71,6 +53,7 @@ enum {
71 PROTO_TCP, 53 PROTO_TCP,
72 PROTO_UNIX, 54 PROTO_UNIX,
73 PROTO_FD, 55 PROTO_FD,
56 PROTO_PCI,
74}; 57};
75 58
76/* possible values of ->cache */ 59/* possible values of ->cache */
@@ -82,12 +65,9 @@ enum {
82 65
83extern struct dentry *v9fs_debugfs_root; 66extern struct dentry *v9fs_debugfs_root;
84 67
85int v9fs_session_init(struct v9fs_session_info *, const char *, char *); 68struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
86struct v9fs_session_info *v9fs_inode2v9ses(struct inode *); 69 char *);
87void v9fs_session_close(struct v9fs_session_info *v9ses); 70void v9fs_session_close(struct v9fs_session_info *v9ses);
88int v9fs_get_idpool(struct v9fs_idpool *p);
89void v9fs_put_idpool(int id, struct v9fs_idpool *p);
90int v9fs_check_idpool(int id, struct v9fs_idpool *p);
91void v9fs_session_cancel(struct v9fs_session_info *v9ses); 71void v9fs_session_cancel(struct v9fs_session_info *v9ses);
92 72
93#define V9FS_MAGIC 0x01021997 73#define V9FS_MAGIC 0x01021997
@@ -97,3 +77,7 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses);
97#define V9FS_DEFUSER "nobody" 77#define V9FS_DEFUSER "nobody"
98#define V9FS_DEFANAME "" 78#define V9FS_DEFANAME ""
99 79
80static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
81{
82 return (inode->i_sb->s_fs_info);
83}
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 6a82d39dc49..fd01d90cada 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -45,10 +45,10 @@ extern struct dentry_operations v9fs_dentry_operations;
45extern struct dentry_operations v9fs_cached_dentry_operations; 45extern struct dentry_operations v9fs_cached_dentry_operations;
46 46
47struct inode *v9fs_get_inode(struct super_block *sb, int mode); 47struct inode *v9fs_get_inode(struct super_block *sb, int mode);
48ino_t v9fs_qid2ino(struct v9fs_qid *qid); 48ino_t v9fs_qid2ino(struct p9_qid *qid);
49void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *); 49void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *);
50int v9fs_dir_release(struct inode *inode, struct file *filp); 50int v9fs_dir_release(struct inode *inode, struct file *filp);
51int v9fs_file_open(struct inode *inode, struct file *file); 51int v9fs_file_open(struct inode *inode, struct file *file);
52void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat); 52void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat);
53void v9fs_dentry_release(struct dentry *); 53void v9fs_dentry_release(struct dentry *);
54int v9fs_uflags2omode(int uflags); 54int v9fs_uflags2omode(int uflags);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 9ac4ffe9ac7..6248f0e727a 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,10 +33,10 @@
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <net/9p/9p.h>
37#include <net/9p/client.h>
36 38
37#include "debug.h"
38#include "v9fs.h" 39#include "v9fs.h"
39#include "9p.h"
40#include "v9fs_vfs.h" 40#include "v9fs_vfs.h"
41#include "fid.h" 41#include "fid.h"
42 42
@@ -50,55 +50,26 @@
50 50
51static int v9fs_vfs_readpage(struct file *filp, struct page *page) 51static int v9fs_vfs_readpage(struct file *filp, struct page *page)
52{ 52{
53 char *buffer = NULL; 53 int retval;
54 int retval = -EIO; 54 loff_t offset;
55 loff_t offset = page_offset(page); 55 char *buffer;
56 int count = PAGE_CACHE_SIZE; 56 struct p9_fid *fid;
57 struct inode *inode = filp->f_path.dentry->d_inode;
58 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
59 int rsize = v9ses->maxdata - V9FS_IOHDRSZ;
60 struct v9fs_fid *v9f = filp->private_data;
61 struct v9fs_fcall *fcall = NULL;
62 int fid = v9f->fid;
63 int total = 0;
64 int result = 0;
65
66 dprintk(DEBUG_VFS, "\n");
67 57
58 P9_DPRINTK(P9_DEBUG_VFS, "\n");
59 fid = filp->private_data;
68 buffer = kmap(page); 60 buffer = kmap(page);
69 do { 61 offset = page_offset(page);
70 if (count < rsize)
71 rsize = count;
72
73 result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall);
74
75 if (result < 0) {
76 printk(KERN_ERR "v9fs_t_read returned %d\n",
77 result);
78
79 kfree(fcall);
80 goto UnmapAndUnlock;
81 } else
82 offset += result;
83
84 memcpy(buffer, fcall->params.rread.data, result);
85
86 count -= result;
87 buffer += result;
88 total += result;
89
90 kfree(fcall);
91 62
92 if (result < rsize) 63 retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE);
93 break; 64 if (retval < 0)
94 } while (count); 65 goto done;
95 66
96 memset(buffer, 0, count); 67 memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
97 flush_dcache_page(page); 68 flush_dcache_page(page);
98 SetPageUptodate(page); 69 SetPageUptodate(page);
99 retval = 0; 70 retval = 0;
100 71
101UnmapAndUnlock: 72done:
102 kunmap(page); 73 kunmap(page);
103 unlock_page(page); 74 unlock_page(page);
104 return retval; 75 return retval;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index d93960429c0..f9534f18df0 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -34,10 +34,10 @@
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/idr.h> 35#include <linux/idr.h>
36#include <linux/sched.h> 36#include <linux/sched.h>
37#include <net/9p/9p.h>
38#include <net/9p/client.h>
37 39
38#include "debug.h"
39#include "v9fs.h" 40#include "v9fs.h"
40#include "9p.h"
41#include "v9fs_vfs.h" 41#include "v9fs_vfs.h"
42#include "fid.h" 42#include "fid.h"
43 43
@@ -52,7 +52,7 @@
52 52
53static int v9fs_dentry_delete(struct dentry *dentry) 53static int v9fs_dentry_delete(struct dentry *dentry)
54{ 54{
55 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 55 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
56 56
57 return 1; 57 return 1;
58} 58}
@@ -69,7 +69,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
69static int v9fs_cached_dentry_delete(struct dentry *dentry) 69static int v9fs_cached_dentry_delete(struct dentry *dentry)
70{ 70{
71 struct inode *inode = dentry->d_inode; 71 struct inode *inode = dentry->d_inode;
72 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 72 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
73 73
74 if(!inode) 74 if(!inode)
75 return 1; 75 return 1;
@@ -85,26 +85,19 @@ static int v9fs_cached_dentry_delete(struct dentry *dentry)
85 85
86void v9fs_dentry_release(struct dentry *dentry) 86void v9fs_dentry_release(struct dentry *dentry)
87{ 87{
88 int err; 88 struct v9fs_dentry *dent;
89 89 struct p9_fid *temp, *current_fid;
90 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 90
91 91 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
92 if (dentry->d_fsdata != NULL) { 92 dent = dentry->d_fsdata;
93 struct list_head *fid_list = dentry->d_fsdata; 93 if (dent) {
94 struct v9fs_fid *temp = NULL; 94 list_for_each_entry_safe(current_fid, temp, &dent->fidlist,
95 struct v9fs_fid *current_fid = NULL; 95 dlist) {
96 96 p9_client_clunk(current_fid);
97 list_for_each_entry_safe(current_fid, temp, fid_list, list) {
98 err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid);
99
100 if (err < 0)
101 dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n",
102 err, dentry->d_iname);
103
104 v9fs_fid_destroy(current_fid);
105 } 97 }
106 98
107 kfree(dentry->d_fsdata); /* free the list_head */ 99 kfree(dent);
100 dentry->d_fsdata = NULL;
108 } 101 }
109} 102}
110 103
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 1dd86ee90bc..0924d4477da 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -32,11 +32,10 @@
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <net/9p/9p.h>
36#include <net/9p/client.h>
35 37
36#include "debug.h"
37#include "v9fs.h" 38#include "v9fs.h"
38#include "9p.h"
39#include "conv.h"
40#include "v9fs_vfs.h" 39#include "v9fs_vfs.h"
41#include "fid.h" 40#include "fid.h"
42 41
@@ -46,14 +45,14 @@
46 * 45 *
47 */ 46 */
48 47
49static inline int dt_type(struct v9fs_stat *mistat) 48static inline int dt_type(struct p9_stat *mistat)
50{ 49{
51 unsigned long perm = mistat->mode; 50 unsigned long perm = mistat->mode;
52 int rettype = DT_REG; 51 int rettype = DT_REG;
53 52
54 if (perm & V9FS_DMDIR) 53 if (perm & P9_DMDIR)
55 rettype = DT_DIR; 54 rettype = DT_DIR;
56 if (perm & V9FS_DMSYMLINK) 55 if (perm & P9_DMSYMLINK)
57 rettype = DT_LNK; 56 rettype = DT_LNK;
58 57
59 return rettype; 58 return rettype;
@@ -69,106 +68,36 @@ static inline int dt_type(struct v9fs_stat *mistat)
69 68
70static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) 69static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
71{ 70{
72 struct v9fs_fcall *fcall = NULL; 71 int over;
73 struct inode *inode = filp->f_path.dentry->d_inode; 72 struct p9_fid *fid;
74 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); 73 struct v9fs_session_info *v9ses;
75 struct v9fs_fid *file = filp->private_data; 74 struct inode *inode;
76 unsigned int i, n, s; 75 struct p9_stat *st;
77 int fid = -1; 76
78 int ret = 0; 77 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
79 struct v9fs_stat stat; 78 inode = filp->f_path.dentry->d_inode;
80 int over = 0; 79 v9ses = v9fs_inode2v9ses(inode);
81 80 fid = filp->private_data;
82 dprintk(DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); 81 while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) {
83 82 if (IS_ERR(st))
84 fid = file->fid; 83 return PTR_ERR(st);
85 84
86 if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) { 85 over = filldir(dirent, st->name.str, st->name.len, filp->f_pos,
87 kfree(file->rdir_fcall); 86 v9fs_qid2ino(&st->qid), dt_type(st));
88 file->rdir_fcall = NULL; 87
89 } 88 if (over)
90
91 if (file->rdir_fcall) {
92 n = file->rdir_fcall->params.rread.count;
93 i = file->rdir_fpos;
94 while (i < n) {
95 s = v9fs_deserialize_stat(
96 file->rdir_fcall->params.rread.data + i,
97 n - i, &stat, v9ses->extended);
98
99 if (s == 0) {
100 dprintk(DEBUG_ERROR,
101 "error while deserializing stat\n");
102 ret = -EIO;
103 goto FreeStructs;
104 }
105
106 over = filldir(dirent, stat.name.str, stat.name.len,
107 filp->f_pos, v9fs_qid2ino(&stat.qid),
108 dt_type(&stat));
109
110 if (over) {
111 file->rdir_fpos = i;
112 file->rdir_pos = filp->f_pos;
113 break;
114 }
115
116 i += s;
117 filp->f_pos += s;
118 }
119
120 if (!over) {
121 kfree(file->rdir_fcall);
122 file->rdir_fcall = NULL;
123 }
124 }
125
126 while (!over) {
127 ret = v9fs_t_read(v9ses, fid, filp->f_pos,
128 v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
129 if (ret < 0) {
130 dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
131 ret, fcall);
132 goto FreeStructs;
133 } else if (ret == 0)
134 break; 89 break;
135 90
136 n = ret; 91 filp->f_pos += st->size;
137 i = 0; 92 kfree(st);
138 while (i < n) { 93 st = NULL;
139 s = v9fs_deserialize_stat(fcall->params.rread.data + i,
140 n - i, &stat, v9ses->extended);
141
142 if (s == 0) {
143 dprintk(DEBUG_ERROR,
144 "error while deserializing stat\n");
145 return -EIO;
146 }
147
148 over = filldir(dirent, stat.name.str, stat.name.len,
149 filp->f_pos, v9fs_qid2ino(&stat.qid),
150 dt_type(&stat));
151
152 if (over) {
153 file->rdir_fcall = fcall;
154 file->rdir_fpos = i;
155 file->rdir_pos = filp->f_pos;
156 fcall = NULL;
157 break;
158 }
159
160 i += s;
161 filp->f_pos += s;
162 }
163
164 kfree(fcall);
165 } 94 }
166 95
167 FreeStructs: 96 kfree(st);
168 kfree(fcall); 97 return 0;
169 return ret;
170} 98}
171 99
100
172/** 101/**
173 * v9fs_dir_release - close a directory 102 * v9fs_dir_release - close a directory
174 * @inode: inode of the directory 103 * @inode: inode of the directory
@@ -178,29 +107,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
178 107
179int v9fs_dir_release(struct inode *inode, struct file *filp) 108int v9fs_dir_release(struct inode *inode, struct file *filp)
180{ 109{
181 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); 110 struct p9_fid *fid;
182 struct v9fs_fid *fid = filp->private_data;
183 int fidnum = -1;
184
185 dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp,
186 fid->fid);
187 fidnum = fid->fid;
188 111
112 fid = filp->private_data;
113 P9_DPRINTK(P9_DEBUG_VFS,
114 "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid);
189 filemap_write_and_wait(inode->i_mapping); 115 filemap_write_and_wait(inode->i_mapping);
190 116 p9_client_clunk(fid);
191 if (fidnum >= 0) {
192 dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
193 fid->fid);
194
195 if (v9fs_t_clunk(v9ses, fidnum))
196 dprintk(DEBUG_ERROR, "clunk failed\n");
197
198 kfree(fid->rdir_fcall);
199 kfree(fid);
200
201 filp->private_data = NULL;
202 }
203
204 return 0; 117 return 0;
205} 118}
206 119
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 6e7678e4852..2a40c2946d0 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -34,10 +34,10 @@
34#include <linux/list.h> 34#include <linux/list.h>
35#include <asm/uaccess.h> 35#include <asm/uaccess.h>
36#include <linux/idr.h> 36#include <linux/idr.h>
37#include <net/9p/9p.h>
38#include <net/9p/client.h>
37 39
38#include "debug.h"
39#include "v9fs.h" 40#include "v9fs.h"
40#include "9p.h"
41#include "v9fs_vfs.h" 41#include "v9fs_vfs.h"
42#include "fid.h" 42#include "fid.h"
43 43
@@ -52,48 +52,40 @@ static const struct file_operations v9fs_cached_file_operations;
52 52
53int v9fs_file_open(struct inode *inode, struct file *file) 53int v9fs_file_open(struct inode *inode, struct file *file)
54{ 54{
55 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
56 struct v9fs_fid *vfid;
57 struct v9fs_fcall *fcall = NULL;
58 int omode;
59 int err; 55 int err;
56 struct v9fs_session_info *v9ses;
57 struct p9_fid *fid;
58 int omode;
60 59
61 dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file); 60 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
62 61 v9ses = v9fs_inode2v9ses(inode);
63 vfid = v9fs_fid_clone(file->f_path.dentry);
64 if (IS_ERR(vfid))
65 return PTR_ERR(vfid);
66
67 omode = v9fs_uflags2omode(file->f_flags); 62 omode = v9fs_uflags2omode(file->f_flags);
68 err = v9fs_t_open(v9ses, vfid->fid, omode, &fcall); 63 fid = file->private_data;
69 if (err < 0) { 64 if (!fid) {
70 PRINT_FCALL_ERROR("open failed", fcall); 65 fid = v9fs_fid_clone(file->f_path.dentry);
71 goto Clunk_Fid; 66 if (IS_ERR(fid))
67 return PTR_ERR(fid);
68
69 err = p9_client_open(fid, omode);
70 if (err < 0) {
71 p9_client_clunk(fid);
72 return err;
73 }
74 if (omode & P9_OTRUNC) {
75 inode->i_size = 0;
76 inode->i_blocks = 0;
77 }
72 } 78 }
73 79
74 file->private_data = vfid; 80 file->private_data = fid;
75 vfid->fidopen = 1; 81 if ((fid->qid.version) && (v9ses->cache)) {
76 vfid->fidclunked = 0; 82 P9_DPRINTK(P9_DEBUG_VFS, "cached");
77 vfid->iounit = fcall->params.ropen.iounit;
78 vfid->rdir_pos = 0;
79 vfid->rdir_fcall = NULL;
80 vfid->filp = file;
81 kfree(fcall);
82
83 if((vfid->qid.version) && (v9ses->cache)) {
84 dprintk(DEBUG_VFS, "cached");
85 /* enable cached file options */ 83 /* enable cached file options */
86 if(file->f_op == &v9fs_file_operations) 84 if(file->f_op == &v9fs_file_operations)
87 file->f_op = &v9fs_cached_file_operations; 85 file->f_op = &v9fs_cached_file_operations;
88 } 86 }
89 87
90 return 0; 88 return 0;
91
92Clunk_Fid:
93 v9fs_fid_clunk(v9ses, vfid);
94 kfree(fcall);
95
96 return err;
97} 89}
98 90
99/** 91/**
@@ -110,7 +102,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
110 int res = 0; 102 int res = 0;
111 struct inode *inode = filp->f_path.dentry->d_inode; 103 struct inode *inode = filp->f_path.dentry->d_inode;
112 104
113 dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); 105 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
114 106
115 /* No mandatory locks */ 107 /* No mandatory locks */
116 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) 108 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
@@ -136,55 +128,16 @@ static ssize_t
136v9fs_file_read(struct file *filp, char __user * data, size_t count, 128v9fs_file_read(struct file *filp, char __user * data, size_t count,
137 loff_t * offset) 129 loff_t * offset)
138{ 130{
139 struct inode *inode = filp->f_path.dentry->d_inode; 131 int ret;
140 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); 132 struct p9_fid *fid;
141 struct v9fs_fid *v9f = filp->private_data;
142 struct v9fs_fcall *fcall = NULL;
143 int fid = v9f->fid;
144 int rsize = 0;
145 int result = 0;
146 int total = 0;
147 int n;
148
149 dprintk(DEBUG_VFS, "\n");
150
151 rsize = v9ses->maxdata - V9FS_IOHDRSZ;
152 if (v9f->iounit != 0 && rsize > v9f->iounit)
153 rsize = v9f->iounit;
154
155 do {
156 if (count < rsize)
157 rsize = count;
158 133
159 result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall); 134 P9_DPRINTK(P9_DEBUG_VFS, "\n");
135 fid = filp->private_data;
136 ret = p9_client_uread(fid, data, *offset, count);
137 if (ret > 0)
138 *offset += ret;
160 139
161 if (result < 0) { 140 return ret;
162 printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n",
163 result);
164
165 kfree(fcall);
166 return total;
167 } else
168 *offset += result;
169
170 n = copy_to_user(data, fcall->params.rread.data, result);
171 if (n) {
172 dprintk(DEBUG_ERROR, "Problem copying to user %d\n", n);
173 kfree(fcall);
174 return -EFAULT;
175 }
176
177 count -= result;
178 data += result;
179 total += result;
180
181 kfree(fcall);
182
183 if (result < rsize)
184 break;
185 } while (count);
186
187 return total;
188} 141}
189 142
190/** 143/**
@@ -200,50 +153,25 @@ static ssize_t
200v9fs_file_write(struct file *filp, const char __user * data, 153v9fs_file_write(struct file *filp, const char __user * data,
201 size_t count, loff_t * offset) 154 size_t count, loff_t * offset)
202{ 155{
156 int ret;
157 struct p9_fid *fid;
203 struct inode *inode = filp->f_path.dentry->d_inode; 158 struct inode *inode = filp->f_path.dentry->d_inode;
204 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
205 struct v9fs_fid *v9fid = filp->private_data;
206 struct v9fs_fcall *fcall;
207 int fid = v9fid->fid;
208 int result = -EIO;
209 int rsize = 0;
210 int total = 0;
211
212 dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count,
213 (int)*offset);
214 rsize = v9ses->maxdata - V9FS_IOHDRSZ;
215 if (v9fid->iounit != 0 && rsize > v9fid->iounit)
216 rsize = v9fid->iounit;
217
218 do {
219 if (count < rsize)
220 rsize = count;
221 159
222 result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall); 160 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
223 if (result < 0) { 161 (int)count, (int)*offset);
224 PRINT_FCALL_ERROR("error while writing", fcall);
225 kfree(fcall);
226 return result;
227 } else
228 *offset += result;
229 162
230 kfree(fcall); 163 fid = filp->private_data;
231 fcall = NULL; 164 ret = p9_client_uwrite(fid, data, *offset, count);
165 if (ret > 0)
166 *offset += ret;
232 167
233 if (result != rsize) { 168 if (*offset > inode->i_size) {
234 eprintk(KERN_ERR, 169 inode->i_size = *offset;
235 "short write: v9fs_t_write returned %d\n", 170 inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
236 result); 171 }
237 break;
238 }
239
240 count -= result;
241 data += result;
242 total += result;
243 } while (count);
244 172
245 invalidate_inode_pages2(inode->i_mapping); 173 invalidate_inode_pages2(inode->i_mapping);
246 return total; 174 return ret;
247} 175}
248 176
249static const struct file_operations v9fs_cached_file_operations = { 177static const struct file_operations v9fs_cached_file_operations = {
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c76cd8fa3f6..e5c45eed58a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -34,10 +34,10 @@
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/idr.h> 35#include <linux/idr.h>
36#include <linux/sched.h> 36#include <linux/sched.h>
37#include <net/9p/9p.h>
38#include <net/9p/client.h>
37 39
38#include "debug.h"
39#include "v9fs.h" 40#include "v9fs.h"
40#include "9p.h"
41#include "v9fs_vfs.h" 41#include "v9fs_vfs.h"
42#include "fid.h" 42#include "fid.h"
43 43
@@ -58,27 +58,27 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
58 int res; 58 int res;
59 res = mode & 0777; 59 res = mode & 0777;
60 if (S_ISDIR(mode)) 60 if (S_ISDIR(mode))
61 res |= V9FS_DMDIR; 61 res |= P9_DMDIR;
62 if (v9ses->extended) { 62 if (v9ses->extended) {
63 if (S_ISLNK(mode)) 63 if (S_ISLNK(mode))
64 res |= V9FS_DMSYMLINK; 64 res |= P9_DMSYMLINK;
65 if (v9ses->nodev == 0) { 65 if (v9ses->nodev == 0) {
66 if (S_ISSOCK(mode)) 66 if (S_ISSOCK(mode))
67 res |= V9FS_DMSOCKET; 67 res |= P9_DMSOCKET;
68 if (S_ISFIFO(mode)) 68 if (S_ISFIFO(mode))
69 res |= V9FS_DMNAMEDPIPE; 69 res |= P9_DMNAMEDPIPE;
70 if (S_ISBLK(mode)) 70 if (S_ISBLK(mode))
71 res |= V9FS_DMDEVICE; 71 res |= P9_DMDEVICE;
72 if (S_ISCHR(mode)) 72 if (S_ISCHR(mode))
73 res |= V9FS_DMDEVICE; 73 res |= P9_DMDEVICE;
74 } 74 }
75 75
76 if ((mode & S_ISUID) == S_ISUID) 76 if ((mode & S_ISUID) == S_ISUID)
77 res |= V9FS_DMSETUID; 77 res |= P9_DMSETUID;
78 if ((mode & S_ISGID) == S_ISGID) 78 if ((mode & S_ISGID) == S_ISGID)
79 res |= V9FS_DMSETGID; 79 res |= P9_DMSETGID;
80 if ((mode & V9FS_DMLINK)) 80 if ((mode & P9_DMLINK))
81 res |= V9FS_DMLINK; 81 res |= P9_DMLINK;
82 } 82 }
83 83
84 return res; 84 return res;
@@ -97,27 +97,27 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
97 97
98 res = mode & 0777; 98 res = mode & 0777;
99 99
100 if ((mode & V9FS_DMDIR) == V9FS_DMDIR) 100 if ((mode & P9_DMDIR) == P9_DMDIR)
101 res |= S_IFDIR; 101 res |= S_IFDIR;
102 else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended)) 102 else if ((mode & P9_DMSYMLINK) && (v9ses->extended))
103 res |= S_IFLNK; 103 res |= S_IFLNK;
104 else if ((mode & V9FS_DMSOCKET) && (v9ses->extended) 104 else if ((mode & P9_DMSOCKET) && (v9ses->extended)
105 && (v9ses->nodev == 0)) 105 && (v9ses->nodev == 0))
106 res |= S_IFSOCK; 106 res |= S_IFSOCK;
107 else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended) 107 else if ((mode & P9_DMNAMEDPIPE) && (v9ses->extended)
108 && (v9ses->nodev == 0)) 108 && (v9ses->nodev == 0))
109 res |= S_IFIFO; 109 res |= S_IFIFO;
110 else if ((mode & V9FS_DMDEVICE) && (v9ses->extended) 110 else if ((mode & P9_DMDEVICE) && (v9ses->extended)
111 && (v9ses->nodev == 0)) 111 && (v9ses->nodev == 0))
112 res |= S_IFBLK; 112 res |= S_IFBLK;
113 else 113 else
114 res |= S_IFREG; 114 res |= S_IFREG;
115 115
116 if (v9ses->extended) { 116 if (v9ses->extended) {
117 if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID) 117 if ((mode & P9_DMSETUID) == P9_DMSETUID)
118 res |= S_ISUID; 118 res |= S_ISUID;
119 119
120 if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID) 120 if ((mode & P9_DMSETGID) == P9_DMSETGID)
121 res |= S_ISGID; 121 res |= S_ISGID;
122 } 122 }
123 123
@@ -132,26 +132,26 @@ int v9fs_uflags2omode(int uflags)
132 switch (uflags&3) { 132 switch (uflags&3) {
133 default: 133 default:
134 case O_RDONLY: 134 case O_RDONLY:
135 ret = V9FS_OREAD; 135 ret = P9_OREAD;
136 break; 136 break;
137 137
138 case O_WRONLY: 138 case O_WRONLY:
139 ret = V9FS_OWRITE; 139 ret = P9_OWRITE;
140 break; 140 break;
141 141
142 case O_RDWR: 142 case O_RDWR:
143 ret = V9FS_ORDWR; 143 ret = P9_ORDWR;
144 break; 144 break;
145 } 145 }
146 146
147 if (uflags & O_EXCL) 147 if (uflags & O_EXCL)
148 ret |= V9FS_OEXCL; 148 ret |= P9_OEXCL;
149 149
150 if (uflags & O_TRUNC) 150 if (uflags & O_TRUNC)
151 ret |= V9FS_OTRUNC; 151 ret |= P9_OTRUNC;
152 152
153 if (uflags & O_APPEND) 153 if (uflags & O_APPEND)
154 ret |= V9FS_OAPPEND; 154 ret |= P9_OAPPEND;
155 155
156 return ret; 156 return ret;
157} 157}
@@ -164,7 +164,7 @@ int v9fs_uflags2omode(int uflags)
164 */ 164 */
165 165
166static void 166static void
167v9fs_blank_wstat(struct v9fs_wstat *wstat) 167v9fs_blank_wstat(struct p9_wstat *wstat)
168{ 168{
169 wstat->type = ~0; 169 wstat->type = ~0;
170 wstat->dev = ~0; 170 wstat->dev = ~0;
@@ -197,7 +197,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
197 struct inode *inode; 197 struct inode *inode;
198 struct v9fs_session_info *v9ses = sb->s_fs_info; 198 struct v9fs_session_info *v9ses = sb->s_fs_info;
199 199
200 dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); 200 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
201 201
202 inode = new_inode(sb); 202 inode = new_inode(sb);
203 if (inode) { 203 if (inode) {
@@ -215,7 +215,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
215 case S_IFCHR: 215 case S_IFCHR:
216 case S_IFSOCK: 216 case S_IFSOCK:
217 if(!v9ses->extended) { 217 if(!v9ses->extended) {
218 dprintk(DEBUG_ERROR, "special files without extended mode\n"); 218 P9_DPRINTK(P9_DEBUG_ERROR,
219 "special files without extended mode\n");
219 return ERR_PTR(-EINVAL); 220 return ERR_PTR(-EINVAL);
220 } 221 }
221 init_special_inode(inode, inode->i_mode, 222 init_special_inode(inode, inode->i_mode,
@@ -227,7 +228,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
227 break; 228 break;
228 case S_IFLNK: 229 case S_IFLNK:
229 if(!v9ses->extended) { 230 if(!v9ses->extended) {
230 dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n"); 231 P9_DPRINTK(P9_DEBUG_ERROR,
232 "extended modes used w/o 9P2000.u\n");
231 return ERR_PTR(-EINVAL); 233 return ERR_PTR(-EINVAL);
232 } 234 }
233 inode->i_op = &v9fs_symlink_inode_operations; 235 inode->i_op = &v9fs_symlink_inode_operations;
@@ -241,71 +243,19 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
241 inode->i_fop = &v9fs_dir_operations; 243 inode->i_fop = &v9fs_dir_operations;
242 break; 244 break;
243 default: 245 default:
244 dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", 246 P9_DPRINTK(P9_DEBUG_ERROR,
247 "BAD mode 0x%x S_IFMT 0x%x\n",
245 mode, mode & S_IFMT); 248 mode, mode & S_IFMT);
246 return ERR_PTR(-EINVAL); 249 return ERR_PTR(-EINVAL);
247 } 250 }
248 } else { 251 } else {
249 eprintk(KERN_WARNING, "Problem allocating inode\n"); 252 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
250 return ERR_PTR(-ENOMEM); 253 return ERR_PTR(-ENOMEM);
251 } 254 }
252 return inode; 255 return inode;
253} 256}
254 257
255static int 258/*
256v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
257 u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
258{
259 int fid;
260 int err;
261 struct v9fs_fcall *fcall;
262
263 fid = v9fs_get_idpool(&v9ses->fidpool);
264 if (fid < 0) {
265 eprintk(KERN_WARNING, "no free fids available\n");
266 return -ENOSPC;
267 }
268
269 err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall);
270 if (err < 0) {
271 PRINT_FCALL_ERROR("clone error", fcall);
272 if (fcall && fcall->id == RWALK)
273 goto clunk_fid;
274 else
275 goto put_fid;
276 }
277 kfree(fcall);
278
279 err = v9fs_t_create(v9ses, fid, name, perm, mode, extension, &fcall);
280 if (err < 0) {
281 PRINT_FCALL_ERROR("create fails", fcall);
282 goto clunk_fid;
283 }
284
285 if (iounit)
286 *iounit = fcall->params.rcreate.iounit;
287
288 if (qid)
289 *qid = fcall->params.rcreate.qid;
290
291 if (fidp)
292 *fidp = fid;
293
294 kfree(fcall);
295 return 0;
296
297clunk_fid:
298 v9fs_t_clunk(v9ses, fid);
299 fid = V9FS_NOFID;
300
301put_fid:
302 if (fid != V9FS_NOFID)
303 v9fs_put_idpool(fid, &v9ses->fidpool);
304
305 kfree(fcall);
306 return err;
307}
308
309static struct v9fs_fid* 259static struct v9fs_fid*
310v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry) 260v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry)
311{ 261{
@@ -355,23 +305,25 @@ error:
355 kfree(fcall); 305 kfree(fcall);
356 return ERR_PTR(err); 306 return ERR_PTR(err);
357} 307}
308*/
358 309
359static struct inode * 310static struct inode *
360v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid, 311v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
361 struct super_block *sb) 312 struct super_block *sb)
362{ 313{
363 int err, umode; 314 int err, umode;
364 struct inode *ret; 315 struct inode *ret;
365 struct v9fs_fcall *fcall; 316 struct p9_stat *st;
366 317
367 ret = NULL; 318 ret = NULL;
368 err = v9fs_t_stat(v9ses, fid, &fcall); 319 st = p9_client_stat(fid);
369 if (err) { 320 if (IS_ERR(st)) {
370 PRINT_FCALL_ERROR("stat error", fcall); 321 err = PTR_ERR(st);
322 st = NULL;
371 goto error; 323 goto error;
372 } 324 }
373 325
374 umode = p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode); 326 umode = p9mode2unixmode(v9ses, st->mode);
375 ret = v9fs_get_inode(sb, umode); 327 ret = v9fs_get_inode(sb, umode);
376 if (IS_ERR(ret)) { 328 if (IS_ERR(ret)) {
377 err = PTR_ERR(ret); 329 err = PTR_ERR(ret);
@@ -379,12 +331,13 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid,
379 goto error; 331 goto error;
380 } 332 }
381 333
382 v9fs_stat2inode(&fcall->params.rstat.stat, ret, sb); 334 v9fs_stat2inode(st, ret, sb);
383 kfree(fcall); 335 ret->i_ino = v9fs_qid2ino(&st->qid);
336 kfree(st);
384 return ret; 337 return ret;
385 338
386error: 339error:
387 kfree(fcall); 340 kfree(st);
388 if (ret) 341 if (ret)
389 iput(ret); 342 iput(ret);
390 343
@@ -401,43 +354,20 @@ error:
401 354
402static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) 355static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
403{ 356{
404 struct v9fs_fcall *fcall = NULL; 357 struct inode *file_inode;
405 struct super_block *sb = NULL; 358 struct v9fs_session_info *v9ses;
406 struct v9fs_session_info *v9ses = NULL; 359 struct p9_fid *v9fid;
407 struct v9fs_fid *v9fid = NULL;
408 struct inode *file_inode = NULL;
409 int fid = -1;
410 int result = 0;
411 360
412 dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, 361 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
413 rmdir); 362 rmdir);
414 363
415 file_inode = file->d_inode; 364 file_inode = file->d_inode;
416 sb = file_inode->i_sb;
417 v9ses = v9fs_inode2v9ses(file_inode); 365 v9ses = v9fs_inode2v9ses(file_inode);
418 v9fid = v9fs_fid_clone(file); 366 v9fid = v9fs_fid_clone(file);
419 if(IS_ERR(v9fid)) 367 if(IS_ERR(v9fid))
420 return PTR_ERR(v9fid); 368 return PTR_ERR(v9fid);
421 369
422 fid = v9fid->fid; 370 return p9_client_remove(v9fid);
423 if (fid < 0) {
424 dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n",
425 file_inode->i_ino);
426 return -EBADF;
427 }
428
429 result = v9fs_t_remove(v9ses, fid, &fcall);
430 if (result < 0) {
431 PRINT_FCALL_ERROR("remove fails", fcall);
432 goto Error;
433 }
434
435 v9fs_put_idpool(fid, &v9ses->fidpool);
436 v9fs_fid_destroy(v9fid);
437
438Error:
439 kfree(fcall);
440 return result;
441} 371}
442 372
443static int 373static int
@@ -446,61 +376,59 @@ v9fs_open_created(struct inode *inode, struct file *file)
446 return 0; 376 return 0;
447} 377}
448 378
379
449/** 380/**
450 * v9fs_vfs_create - VFS hook to create files 381 * v9fs_create - Create a file
451 * @inode: directory inode that is being deleted 382 * @dentry: dentry that is being created
452 * @dentry: dentry that is being deleted 383 * @perm: create permissions
453 * @mode: create permissions 384 * @mode: open mode
454 * @nd: path information
455 * 385 *
456 */ 386 */
457 387static struct p9_fid *
458static int 388v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
459v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, 389 struct dentry *dentry, char *extension, u32 perm, u8 mode)
460 struct nameidata *nd)
461{ 390{
462 int err; 391 int err;
463 u32 fid, perm, iounit; 392 char *name;
464 int flags; 393 struct p9_fid *dfid, *ofid, *fid;
465 struct v9fs_session_info *v9ses;
466 struct v9fs_fid *dfid, *vfid, *ffid;
467 struct inode *inode; 394 struct inode *inode;
468 struct v9fs_qid qid;
469 struct file *filp;
470 395
471 inode = NULL; 396 err = 0;
472 vfid = NULL; 397 ofid = NULL;
473 v9ses = v9fs_inode2v9ses(dir); 398 fid = NULL;
399 name = (char *) dentry->d_name.name;
474 dfid = v9fs_fid_clone(dentry->d_parent); 400 dfid = v9fs_fid_clone(dentry->d_parent);
475 if(IS_ERR(dfid)) { 401 if(IS_ERR(dfid)) {
476 err = PTR_ERR(dfid); 402 err = PTR_ERR(dfid);
403 dfid = NULL;
477 goto error; 404 goto error;
478 } 405 }
479 406
480 perm = unixmode2p9mode(v9ses, mode); 407 /* clone a fid to use for creation */
481 if (nd && nd->flags & LOOKUP_OPEN) 408 ofid = p9_client_walk(dfid, 0, NULL, 1);
482 flags = nd->intent.open.flags - 1; 409 if (IS_ERR(ofid)) {
483 else 410 err = PTR_ERR(ofid);
484 flags = O_RDWR; 411 ofid = NULL;
485 412 goto error;
486 err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, 413 }
487 perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit);
488 414
489 if (err) 415 err = p9_client_fcreate(ofid, name, perm, mode, extension);
490 goto clunk_dfid; 416 if (err < 0)
417 goto error;
491 418
492 vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); 419 /* now walk from the parent so we can get unopened fid */
493 v9fs_fid_clunk(v9ses, dfid); 420 fid = p9_client_walk(dfid, 1, &name, 0);
494 if (IS_ERR(vfid)) { 421 if (IS_ERR(fid)) {
495 err = PTR_ERR(vfid); 422 err = PTR_ERR(fid);
496 vfid = NULL; 423 fid = NULL;
497 goto error; 424 goto error;
498 } 425 } else
426 dfid = NULL;
499 427
500 inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb); 428 /* instantiate inode and assign the unopened fid to the dentry */
429 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
501 if (IS_ERR(inode)) { 430 if (IS_ERR(inode)) {
502 err = PTR_ERR(inode); 431 err = PTR_ERR(inode);
503 inode = NULL;
504 goto error; 432 goto error;
505 } 433 }
506 434
@@ -508,35 +436,78 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
508 dentry->d_op = &v9fs_cached_dentry_operations; 436 dentry->d_op = &v9fs_cached_dentry_operations;
509 else 437 else
510 dentry->d_op = &v9fs_dentry_operations; 438 dentry->d_op = &v9fs_dentry_operations;
439
511 d_instantiate(dentry, inode); 440 d_instantiate(dentry, inode);
441 v9fs_fid_add(dentry, fid);
442 return ofid;
512 443
513 if (nd && nd->flags & LOOKUP_OPEN) { 444error:
514 ffid = v9fs_fid_create(v9ses, fid); 445 if (dfid)
515 if (!ffid) 446 p9_client_clunk(dfid);
516 return -ENOMEM; 447
448 if (ofid)
449 p9_client_clunk(ofid);
450
451 if (fid)
452 p9_client_clunk(fid);
453
454 return ERR_PTR(err);
455}
456
457/**
458 * v9fs_vfs_create - VFS hook to create files
459 * @inode: directory inode that is being created
460 * @dentry: dentry that is being deleted
461 * @mode: create permissions
462 * @nd: path information
463 *
464 */
517 465
466static int
467v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
468 struct nameidata *nd)
469{
470 int err;
471 u32 perm;
472 int flags;
473 struct v9fs_session_info *v9ses;
474 struct p9_fid *fid;
475 struct file *filp;
476
477 err = 0;
478 fid = NULL;
479 v9ses = v9fs_inode2v9ses(dir);
480 perm = unixmode2p9mode(v9ses, mode);
481 if (nd && nd->flags & LOOKUP_OPEN)
482 flags = nd->intent.open.flags - 1;
483 else
484 flags = O_RDWR;
485
486 fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
487 v9fs_uflags2omode(flags));
488 if (IS_ERR(fid)) {
489 err = PTR_ERR(fid);
490 fid = NULL;
491 goto error;
492 }
493
494 /* if we are opening a file, assign the open fid to the file */
495 if (nd && nd->flags & LOOKUP_OPEN) {
518 filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created); 496 filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
519 if (IS_ERR(filp)) { 497 if (IS_ERR(filp)) {
520 v9fs_fid_destroy(ffid); 498 err = PTR_ERR(filp);
521 return PTR_ERR(filp); 499 goto error;
522 } 500 }
523 501
524 ffid->rdir_pos = 0; 502 filp->private_data = fid;
525 ffid->rdir_fcall = NULL; 503 } else
526 ffid->fidopen = 1; 504 p9_client_clunk(fid);
527 ffid->iounit = iounit;
528 ffid->filp = filp;
529 filp->private_data = ffid;
530 }
531 505
532 return 0; 506 return 0;
533 507
534clunk_dfid:
535 v9fs_fid_clunk(v9ses, dfid);
536
537error: 508error:
538 if (vfid) 509 if (fid)
539 v9fs_fid_destroy(vfid); 510 p9_client_clunk(fid);
540 511
541 return err; 512 return err;
542} 513}
@@ -552,57 +523,23 @@ error:
552static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 523static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
553{ 524{
554 int err; 525 int err;
555 u32 fid, perm; 526 u32 perm;
556 struct v9fs_session_info *v9ses; 527 struct v9fs_session_info *v9ses;
557 struct v9fs_fid *dfid, *vfid; 528 struct p9_fid *fid;
558 struct inode *inode;
559 529
560 inode = NULL; 530 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
561 vfid = NULL; 531 err = 0;
562 v9ses = v9fs_inode2v9ses(dir); 532 v9ses = v9fs_inode2v9ses(dir);
563 dfid = v9fs_fid_clone(dentry->d_parent);
564 if(IS_ERR(dfid)) {
565 err = PTR_ERR(dfid);
566 goto error;
567 }
568
569 perm = unixmode2p9mode(v9ses, mode | S_IFDIR); 533 perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
570 534 fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_OREAD);
571 err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, 535 if (IS_ERR(fid)) {
572 perm, V9FS_OREAD, NULL, &fid, NULL, NULL); 536 err = PTR_ERR(fid);
573 537 fid = NULL;
574 if (err) {
575 dprintk(DEBUG_ERROR, "create error %d\n", err);
576 goto clean_up_dfid;
577 } 538 }
578 539
579 vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); 540 if (fid)
580 if (IS_ERR(vfid)) { 541 p9_client_clunk(fid);
581 err = PTR_ERR(vfid);
582 vfid = NULL;
583 goto clean_up_dfid;
584 }
585 542
586 v9fs_fid_clunk(v9ses, dfid);
587 inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
588 if (IS_ERR(inode)) {
589 err = PTR_ERR(inode);
590 inode = NULL;
591 v9fs_fid_destroy(vfid);
592 goto error;
593 }
594
595 if(v9ses->cache)
596 dentry->d_op = &v9fs_cached_dentry_operations;
597 else
598 dentry->d_op = &v9fs_dentry_operations;
599 d_instantiate(dentry, inode);
600 return 0;
601
602clean_up_dfid:
603 v9fs_fid_clunk(v9ses, dfid);
604
605error:
606 return err; 543 return err;
607} 544}
608 545
@@ -619,104 +556,54 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
619{ 556{
620 struct super_block *sb; 557 struct super_block *sb;
621 struct v9fs_session_info *v9ses; 558 struct v9fs_session_info *v9ses;
622 struct v9fs_fid *dirfid; 559 struct p9_fid *dfid, *fid;
623 struct v9fs_fid *fid;
624 struct inode *inode; 560 struct inode *inode;
625 struct v9fs_fcall *fcall = NULL; 561 char *name;
626 int dirfidnum = -1;
627 int newfid = -1;
628 int result = 0; 562 int result = 0;
629 563
630 dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", 564 P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
631 dir, dentry->d_name.name, dentry, nameidata); 565 dir, dentry->d_name.name, dentry, nameidata);
632 566
633 sb = dir->i_sb; 567 sb = dir->i_sb;
634 v9ses = v9fs_inode2v9ses(dir); 568 v9ses = v9fs_inode2v9ses(dir);
635 dirfid = v9fs_fid_lookup(dentry->d_parent); 569 dfid = v9fs_fid_lookup(dentry->d_parent);
636 570 if (IS_ERR(dfid))
637 if(IS_ERR(dirfid)) 571 return ERR_PTR(PTR_ERR(dfid));
638 return ERR_PTR(PTR_ERR(dirfid)); 572
639 573 name = (char *) dentry->d_name.name;
640 dirfidnum = dirfid->fid; 574 fid = p9_client_walk(dfid, 1, &name, 1);
641 575 if (IS_ERR(fid)) {
642 newfid = v9fs_get_idpool(&v9ses->fidpool); 576 result = PTR_ERR(fid);
643 if (newfid < 0) {
644 eprintk(KERN_WARNING, "newfid fails!\n");
645 result = -ENOSPC;
646 goto Release_Dirfid;
647 }
648
649 result = v9fs_t_walk(v9ses, dirfidnum, newfid,
650 (char *)dentry->d_name.name, &fcall);
651
652 up(&dirfid->lock);
653
654 if (result < 0) {
655 if (fcall && fcall->id == RWALK)
656 v9fs_t_clunk(v9ses, newfid);
657 else
658 v9fs_put_idpool(newfid, &v9ses->fidpool);
659
660 if (result == -ENOENT) { 577 if (result == -ENOENT) {
661 d_add(dentry, NULL); 578 d_add(dentry, NULL);
662 dprintk(DEBUG_VFS,
663 "Return negative dentry %p count %d\n",
664 dentry, atomic_read(&dentry->d_count));
665 kfree(fcall);
666 return NULL; 579 return NULL;
667 } 580 }
668 dprintk(DEBUG_ERROR, "walk error:%d\n", result);
669 goto FreeFcall;
670 }
671 kfree(fcall);
672
673 result = v9fs_t_stat(v9ses, newfid, &fcall);
674 if (result < 0) {
675 dprintk(DEBUG_ERROR, "stat error\n");
676 goto FreeFcall;
677 }
678
679 inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses,
680 fcall->params.rstat.stat.mode));
681 581
682 if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) { 582 return ERR_PTR(result);
683 eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
684 PTR_ERR(inode));
685
686 result = -ENOSPC;
687 goto FreeFcall;
688 } 583 }
689 584
690 inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid); 585 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
691 586 if (IS_ERR(inode)) {
692 fid = v9fs_fid_create(v9ses, newfid); 587 result = PTR_ERR(inode);
693 if (fid == NULL) { 588 inode = NULL;
694 dprintk(DEBUG_ERROR, "couldn't insert\n"); 589 goto error;
695 result = -ENOMEM;
696 goto FreeFcall;
697 } 590 }
698 591
699 result = v9fs_fid_insert(fid, dentry); 592 result = v9fs_fid_add(dentry, fid);
700 if (result < 0) 593 if (result < 0)
701 goto FreeFcall; 594 goto error;
702 595
703 fid->qid = fcall->params.rstat.stat.qid;
704 v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
705 if((fid->qid.version)&&(v9ses->cache)) 596 if((fid->qid.version)&&(v9ses->cache))
706 dentry->d_op = &v9fs_cached_dentry_operations; 597 dentry->d_op = &v9fs_cached_dentry_operations;
707 else 598 else
708 dentry->d_op = &v9fs_dentry_operations; 599 dentry->d_op = &v9fs_dentry_operations;
709 600
710 d_add(dentry, inode); 601 d_add(dentry, inode);
711 kfree(fcall);
712
713 return NULL; 602 return NULL;
714 603
715Release_Dirfid: 604error:
716 up(&dirfid->lock); 605 if (fid)
717 606 p9_client_clunk(fid);
718FreeFcall:
719 kfree(fcall);
720 607
721 return ERR_PTR(result); 608 return ERR_PTR(result);
722} 609}
@@ -758,73 +645,54 @@ static int
758v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 645v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
759 struct inode *new_dir, struct dentry *new_dentry) 646 struct inode *new_dir, struct dentry *new_dentry)
760{ 647{
761 struct inode *old_inode = old_dentry->d_inode; 648 struct inode *old_inode;
762 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode); 649 struct v9fs_session_info *v9ses;
763 struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry); 650 struct p9_fid *oldfid;
764 struct v9fs_fid *olddirfid; 651 struct p9_fid *olddirfid;
765 struct v9fs_fid *newdirfid; 652 struct p9_fid *newdirfid;
766 struct v9fs_wstat wstat; 653 struct p9_wstat wstat;
767 struct v9fs_fcall *fcall = NULL; 654 int retval;
768 int fid = -1;
769 int olddirfidnum = -1;
770 int newdirfidnum = -1;
771 int retval = 0;
772
773 dprintk(DEBUG_VFS, "\n");
774 655
656 P9_DPRINTK(P9_DEBUG_VFS, "\n");
657 retval = 0;
658 old_inode = old_dentry->d_inode;
659 v9ses = v9fs_inode2v9ses(old_inode);
660 oldfid = v9fs_fid_lookup(old_dentry);
775 if(IS_ERR(oldfid)) 661 if(IS_ERR(oldfid))
776 return PTR_ERR(oldfid); 662 return PTR_ERR(oldfid);
777 663
778 olddirfid = v9fs_fid_clone(old_dentry->d_parent); 664 olddirfid = v9fs_fid_clone(old_dentry->d_parent);
779 if(IS_ERR(olddirfid)) { 665 if(IS_ERR(olddirfid)) {
780 retval = PTR_ERR(olddirfid); 666 retval = PTR_ERR(olddirfid);
781 goto Release_lock; 667 goto done;
782 } 668 }
783 669
784 newdirfid = v9fs_fid_clone(new_dentry->d_parent); 670 newdirfid = v9fs_fid_clone(new_dentry->d_parent);
785 if(IS_ERR(newdirfid)) { 671 if(IS_ERR(newdirfid)) {
786 retval = PTR_ERR(newdirfid); 672 retval = PTR_ERR(newdirfid);
787 goto Clunk_olddir; 673 goto clunk_olddir;
788 } 674 }
789 675
790 /* 9P can only handle file rename in the same directory */ 676 /* 9P can only handle file rename in the same directory */
791 if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { 677 if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
792 dprintk(DEBUG_ERROR, "old dir and new dir are different\n"); 678 P9_DPRINTK(P9_DEBUG_ERROR,
679 "old dir and new dir are different\n");
793 retval = -EXDEV; 680 retval = -EXDEV;
794 goto Clunk_newdir; 681 goto clunk_newdir;
795 }
796
797 fid = oldfid->fid;
798 olddirfidnum = olddirfid->fid;
799 newdirfidnum = newdirfid->fid;
800
801 if (fid < 0) {
802 dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
803 old_inode->i_ino);
804 retval = -EBADF;
805 goto Clunk_newdir;
806 } 682 }
807 683
808 v9fs_blank_wstat(&wstat); 684 v9fs_blank_wstat(&wstat);
809 wstat.muid = v9ses->name; 685 wstat.muid = v9ses->name;
810 wstat.name = (char *) new_dentry->d_name.name; 686 wstat.name = (char *) new_dentry->d_name.name;
687 retval = p9_client_wstat(oldfid, &wstat);
811 688
812 retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall); 689clunk_newdir:
690 p9_client_clunk(olddirfid);
813 691
814 if (retval < 0) 692clunk_olddir:
815 PRINT_FCALL_ERROR("wstat error", fcall); 693 p9_client_clunk(newdirfid);
816
817 kfree(fcall);
818
819Clunk_newdir:
820 v9fs_fid_clunk(v9ses, newdirfid);
821
822Clunk_olddir:
823 v9fs_fid_clunk(v9ses, olddirfid);
824
825Release_lock:
826 up(&oldfid->lock);
827 694
695done:
828 return retval; 696 return retval;
829} 697}
830 698
@@ -840,28 +708,30 @@ static int
840v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 708v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
841 struct kstat *stat) 709 struct kstat *stat)
842{ 710{
843 struct v9fs_fcall *fcall = NULL; 711 int err;
844 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); 712 struct v9fs_session_info *v9ses;
845 struct v9fs_fid *fid = v9fs_fid_clone(dentry); 713 struct p9_fid *fid;
846 int err = -EPERM; 714 struct p9_stat *st;
847 715
848 dprintk(DEBUG_VFS, "dentry: %p\n", dentry); 716 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
849 if(IS_ERR(fid)) 717 err = -EPERM;
718 v9ses = v9fs_inode2v9ses(dentry->d_inode);
719 if (v9ses->cache == CACHE_LOOSE)
720 return simple_getattr(mnt, dentry, stat);
721
722 fid = v9fs_fid_lookup(dentry);
723 if (IS_ERR(fid))
850 return PTR_ERR(fid); 724 return PTR_ERR(fid);
851 725
852 err = v9fs_t_stat(v9ses, fid->fid, &fcall); 726 st = p9_client_stat(fid);
727 if (IS_ERR(st))
728 return PTR_ERR(st);
853 729
854 if (err < 0) 730 v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
855 dprintk(DEBUG_ERROR, "stat error\n");
856 else {
857 v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode,
858 dentry->d_inode->i_sb);
859 generic_fillattr(dentry->d_inode, stat); 731 generic_fillattr(dentry->d_inode, stat);
860 }
861 732
862 kfree(fcall); 733 kfree(st);
863 v9fs_fid_clunk(v9ses, fid); 734 return 0;
864 return err;
865} 735}
866 736
867/** 737/**
@@ -873,13 +743,15 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
873 743
874static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) 744static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
875{ 745{
876 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); 746 int retval;
877 struct v9fs_fid *fid = v9fs_fid_clone(dentry); 747 struct v9fs_session_info *v9ses;
878 struct v9fs_fcall *fcall = NULL; 748 struct p9_fid *fid;
879 struct v9fs_wstat wstat; 749 struct p9_wstat wstat;
880 int res = -EPERM;
881 750
882 dprintk(DEBUG_VFS, "\n"); 751 P9_DPRINTK(P9_DEBUG_VFS, "\n");
752 retval = -EPERM;
753 v9ses = v9fs_inode2v9ses(dentry->d_inode);
754 fid = v9fs_fid_lookup(dentry);
883 if(IS_ERR(fid)) 755 if(IS_ERR(fid))
884 return PTR_ERR(fid); 756 return PTR_ERR(fid);
885 757
@@ -904,17 +776,11 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
904 wstat.n_gid = iattr->ia_gid; 776 wstat.n_gid = iattr->ia_gid;
905 } 777 }
906 778
907 res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall); 779 retval = p9_client_wstat(fid, &wstat);
780 if (retval >= 0)
781 retval = inode_setattr(dentry->d_inode, iattr);
908 782
909 if (res < 0) 783 return retval;
910 PRINT_FCALL_ERROR("wstat error", fcall);
911
912 kfree(fcall);
913 if (res >= 0)
914 res = inode_setattr(dentry->d_inode, iattr);
915
916 v9fs_fid_clunk(v9ses, fid);
917 return res;
918} 784}
919 785
920/** 786/**
@@ -926,7 +792,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
926 */ 792 */
927 793
928void 794void
929v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, 795v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
930 struct super_block *sb) 796 struct super_block *sb)
931{ 797{
932 int n; 798 int n;
@@ -967,8 +833,9 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
967 case 'b': 833 case 'b':
968 break; 834 break;
969 default: 835 default:
970 dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n", 836 P9_DPRINTK(P9_DEBUG_ERROR,
971 type, stat->extension.len, stat->extension.str); 837 "Unknown special type %c (%.*s)\n", type,
838 stat->extension.len, stat->extension.str);
972 }; 839 };
973 inode->i_rdev = MKDEV(major, minor); 840 inode->i_rdev = MKDEV(major, minor);
974 } else 841 } else
@@ -976,8 +843,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
976 843
977 inode->i_size = stat->length; 844 inode->i_size = stat->length;
978 845
979 inode->i_blocks = 846 /* not real number of blocks, but 512 byte ones ... */
980 (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; 847 inode->i_blocks = (inode->i_size + 512 - 1) >> 9;
981} 848}
982 849
983/** 850/**
@@ -987,7 +854,7 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
987 * BUG: potential for inode number collisions? 854 * BUG: potential for inode number collisions?
988 */ 855 */
989 856
990ino_t v9fs_qid2ino(struct v9fs_qid *qid) 857ino_t v9fs_qid2ino(struct p9_qid *qid)
991{ 858{
992 u64 path = qid->path + 2; 859 u64 path = qid->path + 2;
993 ino_t i = 0; 860 ino_t i = 0;
@@ -1010,56 +877,46 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid)
1010 877
1011static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) 878static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1012{ 879{
1013 int retval = -EPERM; 880 int retval;
1014 881
1015 struct v9fs_fcall *fcall = NULL; 882 struct v9fs_session_info *v9ses;
1016 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); 883 struct p9_fid *fid;
1017 struct v9fs_fid *fid = v9fs_fid_clone(dentry); 884 struct p9_stat *st;
1018 885
886 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
887 retval = -EPERM;
888 v9ses = v9fs_inode2v9ses(dentry->d_inode);
889 fid = v9fs_fid_lookup(dentry);
1019 if(IS_ERR(fid)) 890 if(IS_ERR(fid))
1020 return PTR_ERR(fid); 891 return PTR_ERR(fid);
1021 892
1022 if (!v9ses->extended) { 893 if (!v9ses->extended)
1023 retval = -EBADF; 894 return -EBADF;
1024 dprintk(DEBUG_ERROR, "not extended\n");
1025 goto ClunkFid;
1026 }
1027
1028 dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
1029 retval = v9fs_t_stat(v9ses, fid->fid, &fcall);
1030
1031 if (retval < 0) {
1032 dprintk(DEBUG_ERROR, "stat error\n");
1033 goto FreeFcall;
1034 }
1035 895
1036 if (!fcall) { 896 st = p9_client_stat(fid);
1037 retval = -EIO; 897 if (IS_ERR(st))
1038 goto ClunkFid; 898 return PTR_ERR(st);
1039 }
1040 899
1041 if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) { 900 if (!(st->mode & P9_DMSYMLINK)) {
1042 retval = -EINVAL; 901 retval = -EINVAL;
1043 goto FreeFcall; 902 goto done;
1044 } 903 }
1045 904
1046 /* copy extension buffer into buffer */ 905 /* copy extension buffer into buffer */
1047 if (fcall->params.rstat.stat.extension.len < buflen) 906 if (st->extension.len < buflen)
1048 buflen = fcall->params.rstat.stat.extension.len + 1; 907 buflen = st->extension.len + 1;
1049 908
1050 memmove(buffer, fcall->params.rstat.stat.extension.str, buflen - 1); 909 memmove(buffer, st->extension.str, buflen - 1);
1051 buffer[buflen-1] = 0; 910 buffer[buflen-1] = 0;
1052 911
1053 dprintk(DEBUG_ERROR, "%s -> %.*s (%s)\n", dentry->d_name.name, fcall->params.rstat.stat.extension.len, 912 P9_DPRINTK(P9_DEBUG_VFS,
1054 fcall->params.rstat.stat.extension.str, buffer); 913 "%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len,
1055 retval = buflen; 914 st->extension.str, buffer);
1056 915
1057FreeFcall: 916 retval = buflen;
1058 kfree(fcall);
1059
1060ClunkFid:
1061 v9fs_fid_clunk(v9ses, fid);
1062 917
918done:
919 kfree(st);
1063 return retval; 920 return retval;
1064} 921}
1065 922
@@ -1084,14 +941,14 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1084 if (buflen > PATH_MAX) 941 if (buflen > PATH_MAX)
1085 buflen = PATH_MAX; 942 buflen = PATH_MAX;
1086 943
1087 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 944 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
1088 945
1089 retval = v9fs_readlink(dentry, link, buflen); 946 retval = v9fs_readlink(dentry, link, buflen);
1090 947
1091 if (retval > 0) { 948 if (retval > 0) {
1092 if ((ret = copy_to_user(buffer, link, retval)) != 0) { 949 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1093 dprintk(DEBUG_ERROR, "problem copying to user: %d\n", 950 P9_DPRINTK(P9_DEBUG_ERROR,
1094 ret); 951 "problem copying to user: %d\n", ret);
1095 retval = ret; 952 retval = ret;
1096 } 953 }
1097 } 954 }
@@ -1112,7 +969,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
1112 int len = 0; 969 int len = 0;
1113 char *link = __getname(); 970 char *link = __getname();
1114 971
1115 dprintk(DEBUG_VFS, "%s n", dentry->d_name.name); 972 P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
1116 973
1117 if (!link) 974 if (!link)
1118 link = ERR_PTR(-ENOMEM); 975 link = ERR_PTR(-ENOMEM);
@@ -1141,7 +998,7 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
1141{ 998{
1142 char *s = nd_get_link(nd); 999 char *s = nd_get_link(nd);
1143 1000
1144 dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); 1001 P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
1145 if (!IS_ERR(s)) 1002 if (!IS_ERR(s))
1146 __putname(s); 1003 __putname(s);
1147} 1004}
@@ -1149,66 +1006,24 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
1149static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, 1006static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1150 int mode, const char *extension) 1007 int mode, const char *extension)
1151{ 1008{
1152 int err; 1009 u32 perm;
1153 u32 fid, perm;
1154 struct v9fs_session_info *v9ses; 1010 struct v9fs_session_info *v9ses;
1155 struct v9fs_fid *dfid, *vfid = NULL; 1011 struct p9_fid *fid;
1156 struct inode *inode = NULL;
1157 1012
1158 v9ses = v9fs_inode2v9ses(dir); 1013 v9ses = v9fs_inode2v9ses(dir);
1159 if (!v9ses->extended) { 1014 if (!v9ses->extended) {
1160 dprintk(DEBUG_ERROR, "not extended\n"); 1015 P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n");
1161 return -EPERM; 1016 return -EPERM;
1162 } 1017 }
1163 1018
1164 dfid = v9fs_fid_clone(dentry->d_parent);
1165 if(IS_ERR(dfid)) {
1166 err = PTR_ERR(dfid);
1167 goto error;
1168 }
1169
1170 perm = unixmode2p9mode(v9ses, mode); 1019 perm = unixmode2p9mode(v9ses, mode);
1020 fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm,
1021 P9_OREAD);
1022 if (IS_ERR(fid))
1023 return PTR_ERR(fid);
1171 1024
1172 err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, 1025 p9_client_clunk(fid);
1173 perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL);
1174
1175 if (err)
1176 goto clunk_dfid;
1177
1178 err = v9fs_t_clunk(v9ses, fid);
1179 if (err)
1180 goto clunk_dfid;
1181
1182 vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry);
1183 if (IS_ERR(vfid)) {
1184 err = PTR_ERR(vfid);
1185 vfid = NULL;
1186 goto clunk_dfid;
1187 }
1188
1189 inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb);
1190 if (IS_ERR(inode)) {
1191 err = PTR_ERR(inode);
1192 inode = NULL;
1193 goto free_vfid;
1194 }
1195
1196 if(v9ses->cache)
1197 dentry->d_op = &v9fs_cached_dentry_operations;
1198 else
1199 dentry->d_op = &v9fs_dentry_operations;
1200 d_instantiate(dentry, inode);
1201 return 0; 1026 return 0;
1202
1203free_vfid:
1204 v9fs_fid_destroy(vfid);
1205
1206clunk_dfid:
1207 v9fs_fid_clunk(v9ses, dfid);
1208
1209error:
1210 return err;
1211
1212} 1027}
1213 1028
1214/** 1029/**
@@ -1224,8 +1039,8 @@ error:
1224static int 1039static int
1225v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 1040v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1226{ 1041{
1227 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1042 P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino,
1228 symname); 1043 dentry->d_name.name, symname);
1229 1044
1230 return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname); 1045 return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
1231} 1046}
@@ -1247,11 +1062,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1247 struct dentry *dentry) 1062 struct dentry *dentry)
1248{ 1063{
1249 int retval; 1064 int retval;
1250 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); 1065 struct p9_fid *oldfid;
1251 struct v9fs_fid *oldfid;
1252 char *name; 1066 char *name;
1253 1067
1254 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1068 P9_DPRINTK(P9_DEBUG_VFS,
1069 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
1255 old_dentry->d_name.name); 1070 old_dentry->d_name.name);
1256 1071
1257 oldfid = v9fs_fid_clone(old_dentry); 1072 oldfid = v9fs_fid_clone(old_dentry);
@@ -1265,11 +1080,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1265 } 1080 }
1266 1081
1267 sprintf(name, "%d\n", oldfid->fid); 1082 sprintf(name, "%d\n", oldfid->fid);
1268 retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name); 1083 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
1269 __putname(name); 1084 __putname(name);
1270 1085
1271clunk_fid: 1086clunk_fid:
1272 v9fs_fid_clunk(v9ses, oldfid); 1087 p9_client_clunk(oldfid);
1273 return retval; 1088 return retval;
1274} 1089}
1275 1090
@@ -1288,7 +1103,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1288 int retval; 1103 int retval;
1289 char *name; 1104 char *name;
1290 1105
1291 dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, 1106 P9_DPRINTK(P9_DEBUG_VFS,
1107 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1292 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); 1108 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
1293 1109
1294 if (!new_valid_dev(rdev)) 1110 if (!new_valid_dev(rdev))
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 7bdf8b32684..ba904371218 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -37,10 +37,10 @@
37#include <linux/mount.h> 37#include <linux/mount.h>
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <net/9p/9p.h>
41#include <net/9p/client.h>
40 42
41#include "debug.h"
42#include "v9fs.h" 43#include "v9fs.h"
43#include "9p.h"
44#include "v9fs_vfs.h" 44#include "v9fs_vfs.h"
45#include "fid.h" 45#include "fid.h"
46 46
@@ -107,41 +107,48 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
107 struct vfsmount *mnt) 107 struct vfsmount *mnt)
108{ 108{
109 struct super_block *sb = NULL; 109 struct super_block *sb = NULL;
110 struct v9fs_fcall *fcall = NULL;
111 struct inode *inode = NULL; 110 struct inode *inode = NULL;
112 struct dentry *root = NULL; 111 struct dentry *root = NULL;
113 struct v9fs_session_info *v9ses = NULL; 112 struct v9fs_session_info *v9ses = NULL;
114 struct v9fs_fid *root_fid = NULL; 113 struct p9_stat *st = NULL;
115 int mode = S_IRWXUGO | S_ISVTX; 114 int mode = S_IRWXUGO | S_ISVTX;
116 uid_t uid = current->fsuid; 115 uid_t uid = current->fsuid;
117 gid_t gid = current->fsgid; 116 gid_t gid = current->fsgid;
118 int stat_result = 0; 117 struct p9_fid *fid;
119 int newfid = 0;
120 int retval = 0; 118 int retval = 0;
121 119
122 dprintk(DEBUG_VFS, " \n"); 120 P9_DPRINTK(P9_DEBUG_VFS, " \n");
123 121
124 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); 122 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
125 if (!v9ses) 123 if (!v9ses)
126 return -ENOMEM; 124 return -ENOMEM;
127 125
128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { 126 fid = v9fs_session_init(v9ses, dev_name, data);
129 dprintk(DEBUG_ERROR, "problem initiating session\n"); 127 if (IS_ERR(fid)) {
130 retval = newfid; 128 retval = PTR_ERR(fid);
131 goto out_free_session; 129 fid = NULL;
130 kfree(v9ses);
131 v9ses = NULL;
132 goto error;
133 }
134
135 st = p9_client_stat(fid);
136 if (IS_ERR(st)) {
137 retval = PTR_ERR(st);
138 goto error;
132 } 139 }
133 140
134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses); 141 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
135 if (IS_ERR(sb)) { 142 if (IS_ERR(sb)) {
136 retval = PTR_ERR(sb); 143 retval = PTR_ERR(sb);
137 goto out_close_session; 144 goto error;
138 } 145 }
139 v9fs_fill_super(sb, v9ses, flags); 146 v9fs_fill_super(sb, v9ses, flags);
140 147
141 inode = v9fs_get_inode(sb, S_IFDIR | mode); 148 inode = v9fs_get_inode(sb, S_IFDIR | mode);
142 if (IS_ERR(inode)) { 149 if (IS_ERR(inode)) {
143 retval = PTR_ERR(inode); 150 retval = PTR_ERR(inode);
144 goto put_back_sb; 151 goto error;
145 } 152 }
146 153
147 inode->i_uid = uid; 154 inode->i_uid = uid;
@@ -150,54 +157,30 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
150 root = d_alloc_root(inode); 157 root = d_alloc_root(inode);
151 if (!root) { 158 if (!root) {
152 retval = -ENOMEM; 159 retval = -ENOMEM;
153 goto put_back_sb; 160 goto error;
154 } 161 }
155 162
156 sb->s_root = root; 163 sb->s_root = root;
164 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
165 v9fs_stat2inode(st, root->d_inode, sb);
166 v9fs_fid_add(root, fid);
157 167
158 stat_result = v9fs_t_stat(v9ses, newfid, &fcall); 168 return simple_set_mnt(mnt, sb);
159 if (stat_result < 0) {
160 dprintk(DEBUG_ERROR, "stat error\n");
161 v9fs_t_clunk(v9ses, newfid);
162 } else {
163 /* Setup the Root Inode */
164 root_fid = v9fs_fid_create(v9ses, newfid);
165 if (root_fid == NULL) {
166 retval = -ENOMEM;
167 goto put_back_sb;
168 }
169
170 retval = v9fs_fid_insert(root_fid, root);
171 if (retval < 0) {
172 kfree(fcall);
173 goto put_back_sb;
174 }
175
176 root_fid->qid = fcall->params.rstat.stat.qid;
177 root->d_inode->i_ino =
178 v9fs_qid2ino(&fcall->params.rstat.stat.qid);
179 v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb);
180 }
181 169
182 kfree(fcall); 170error:
171 if (fid)
172 p9_client_clunk(fid);
183 173
184 if (stat_result < 0) { 174 if (v9ses) {
185 retval = stat_result; 175 v9fs_session_close(v9ses);
186 goto put_back_sb; 176 kfree(v9ses);
187 } 177 }
188 178
189 return simple_set_mnt(mnt, sb); 179 if (sb) {
190 180 up_write(&sb->s_umount);
191out_close_session: 181 deactivate_super(sb);
192 v9fs_session_close(v9ses); 182 }
193out_free_session:
194 kfree(v9ses);
195 return retval;
196 183
197put_back_sb:
198 /* deactivate_super calls v9fs_kill_super which will frees the rest */
199 up_write(&sb->s_umount);
200 deactivate_super(sb);
201 return retval; 184 return retval;
202} 185}
203 186
@@ -211,7 +194,7 @@ static void v9fs_kill_super(struct super_block *s)
211{ 194{
212 struct v9fs_session_info *v9ses = s->s_fs_info; 195 struct v9fs_session_info *v9ses = s->s_fs_info;
213 196
214 dprintk(DEBUG_VFS, " %p\n", s); 197 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
215 198
216 v9fs_dentry_release(s->s_root); /* clunk root */ 199 v9fs_dentry_release(s->s_root); /* clunk root */
217 200
@@ -219,7 +202,7 @@ static void v9fs_kill_super(struct super_block *s)
219 202
220 v9fs_session_close(v9ses); 203 v9fs_session_close(v9ses);
221 kfree(v9ses); 204 kfree(v9ses);
222 dprintk(DEBUG_VFS, "exiting kill_super\n"); 205 P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n");
223} 206}
224 207
225/** 208/**
@@ -234,7 +217,7 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
234 struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info; 217 struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
235 218
236 if (v9ses->debug != 0) 219 if (v9ses->debug != 0)
237 seq_printf(m, ",debug=%u", v9ses->debug); 220 seq_printf(m, ",debug=%x", v9ses->debug);
238 if (v9ses->port != V9FS_PORT) 221 if (v9ses->port != V9FS_PORT)
239 seq_printf(m, ",port=%u", v9ses->port); 222 seq_printf(m, ",port=%u", v9ses->port);
240 if (v9ses->maxdata != 9000) 223 if (v9ses->maxdata != 9000)
diff --git a/fs/Kconfig b/fs/Kconfig
index 0fa0c1193e8..ee11f8d9408 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -991,7 +991,7 @@ config TMPFS_POSIX_ACL
991 991
992config HUGETLBFS 992config HUGETLBFS
993 bool "HugeTLB file system support" 993 bool "HugeTLB file system support"
994 depends on X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN 994 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN
995 help 995 help
996 hugetlbfs is a filesystem backing for HugeTLB pages, based on 996 hugetlbfs is a filesystem backing for HugeTLB pages, based on
997 ramfs. For architectures that support it, say Y here and read 997 ramfs. For architectures that support it, say Y here and read
@@ -2048,7 +2048,7 @@ config AFS_DEBUG
2048 2048
2049config 9P_FS 2049config 9P_FS
2050 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" 2050 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
2051 depends on INET && EXPERIMENTAL 2051 depends on INET && NET_9P && EXPERIMENTAL
2052 help 2052 help
2053 If you say Y here, you will get experimental support for 2053 If you say Y here, you will get experimental support for
2054 Plan 9 resource sharing via the 9P2000 protocol. 2054 Plan 9 resource sharing via the 9P2000 protocol.
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 73ce561f3ea..a66671082cf 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -8,6 +8,7 @@ kafs-objs := \
8 cmservice.o \ 8 cmservice.o \
9 dir.o \ 9 dir.o \
10 file.o \ 10 file.o \
11 flock.o \
11 fsclient.o \ 12 fsclient.o \
12 inode.o \ 13 inode.o \
13 main.o \ 14 main.o \
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 24525794814..c548aa346f0 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -37,6 +37,13 @@ typedef enum {
37 AFS_FTYPE_SYMLINK = 3, 37 AFS_FTYPE_SYMLINK = 3,
38} afs_file_type_t; 38} afs_file_type_t;
39 39
40typedef enum {
41 AFS_LOCK_READ = 0, /* read lock request */
42 AFS_LOCK_WRITE = 1, /* write lock request */
43} afs_lock_type_t;
44
45#define AFS_LOCKWAIT (5 * 60) /* time until a lock times out (seconds) */
46
40/* 47/*
41 * AFS file identifier 48 * AFS file identifier
42 */ 49 */
@@ -120,6 +127,7 @@ struct afs_file_status {
120 struct afs_fid parent; /* parent dir ID for non-dirs only */ 127 struct afs_fid parent; /* parent dir ID for non-dirs only */
121 time_t mtime_client; /* last time client changed data */ 128 time_t mtime_client; /* last time client changed data */
122 time_t mtime_server; /* last time server changed data */ 129 time_t mtime_server; /* last time server changed data */
130 s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
123}; 131};
124 132
125/* 133/*
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index a18c374ebe0..eb647323d8f 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -31,6 +31,9 @@ enum AFS_FS_Operations {
31 FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */ 31 FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */
32 FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */ 32 FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */
33 FSGETROOTVOLUME = 151, /* AFS Get root volume name */ 33 FSGETROOTVOLUME = 151, /* AFS Get root volume name */
34 FSSETLOCK = 156, /* AFS Request a file lock */
35 FSEXTENDLOCK = 157, /* AFS Extend a file lock */
36 FSRELEASELOCK = 158, /* AFS Release a file lock */
34 FSLOOKUP = 161, /* AFS lookup file in directory */ 37 FSLOOKUP = 161, /* AFS lookup file in directory */
35 FSFETCHDATA64 = 65537, /* AFS Fetch file data */ 38 FSFETCHDATA64 = 65537, /* AFS Fetch file data */
36 FSSTOREDATA64 = 65538, /* AFS Store file data */ 39 FSSTOREDATA64 = 65538, /* AFS Store file data */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index bacf518c6fa..b8243945818 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -125,6 +125,9 @@ static void afs_break_callback(struct afs_server *server,
125 spin_unlock(&server->cb_lock); 125 spin_unlock(&server->cb_lock);
126 126
127 queue_work(afs_callback_update_worker, &vnode->cb_broken_work); 127 queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
128 if (list_empty(&vnode->granted_locks) &&
129 !list_empty(&vnode->pending_locks))
130 afs_lock_may_be_available(vnode);
128 spin_unlock(&vnode->lock); 131 spin_unlock(&vnode->lock);
129 } 132 }
130} 133}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 546c59522eb..33fe39ad4e0 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -44,6 +44,7 @@ const struct file_operations afs_dir_file_operations = {
44 .open = afs_dir_open, 44 .open = afs_dir_open,
45 .release = afs_release, 45 .release = afs_release,
46 .readdir = afs_readdir, 46 .readdir = afs_readdir,
47 .lock = afs_lock,
47}; 48};
48 49
49const struct inode_operations afs_dir_inode_operations = { 50const struct inode_operations afs_dir_inode_operations = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index aede7eb66dd..525f7c56e06 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -34,6 +34,8 @@ const struct file_operations afs_file_operations = {
34 .mmap = generic_file_readonly_mmap, 34 .mmap = generic_file_readonly_mmap,
35 .splice_read = generic_file_splice_read, 35 .splice_read = generic_file_splice_read,
36 .fsync = afs_fsync, 36 .fsync = afs_fsync,
37 .lock = afs_lock,
38 .flock = afs_flock,
37}; 39};
38 40
39const struct inode_operations afs_file_inode_operations = { 41const struct inode_operations afs_file_inode_operations = {
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
new file mode 100644
index 00000000000..8f07f8d1bfa
--- /dev/null
+++ b/fs/afs/flock.c
@@ -0,0 +1,558 @@
1/* AFS file locking support
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/smp_lock.h>
13#include "internal.h"
14
15#define AFS_LOCK_GRANTED 0
16#define AFS_LOCK_PENDING 1
17
18static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
19static void afs_fl_release_private(struct file_lock *fl);
20
21static struct workqueue_struct *afs_lock_manager;
22
23static struct file_lock_operations afs_lock_ops = {
24 .fl_copy_lock = afs_fl_copy_lock,
25 .fl_release_private = afs_fl_release_private,
26};
27
28/*
29 * initialise the lock manager thread if it isn't already running
30 */
31static int afs_init_lock_manager(void)
32{
33 if (!afs_lock_manager) {
34 afs_lock_manager = create_singlethread_workqueue("kafs_lockd");
35 if (!afs_lock_manager)
36 return -ENOMEM;
37 }
38 return 0;
39}
40
41/*
42 * destroy the lock manager thread if it's running
43 */
44void __exit afs_kill_lock_manager(void)
45{
46 if (afs_lock_manager)
47 destroy_workqueue(afs_lock_manager);
48}
49
50/*
51 * if the callback is broken on this vnode, then the lock may now be available
52 */
53void afs_lock_may_be_available(struct afs_vnode *vnode)
54{
55 _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
56
57 queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
58}
59
60/*
61 * the lock will time out in 5 minutes unless we extend it, so schedule
62 * extension in a bit less than that time
63 */
64static void afs_schedule_lock_extension(struct afs_vnode *vnode)
65{
66 queue_delayed_work(afs_lock_manager, &vnode->lock_work,
67 AFS_LOCKWAIT * HZ / 2);
68}
69
70/*
71 * do work for a lock, including:
72 * - probing for a lock we're waiting on but didn't get immediately
73 * - extending a lock that's close to timing out
74 */
75void afs_lock_work(struct work_struct *work)
76{
77 struct afs_vnode *vnode =
78 container_of(work, struct afs_vnode, lock_work.work);
79 struct file_lock *fl;
80 afs_lock_type_t type;
81 struct key *key;
82 int ret;
83
84 _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
85
86 spin_lock(&vnode->lock);
87
88 if (test_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) {
89 _debug("unlock");
90 spin_unlock(&vnode->lock);
91
92 /* attempt to release the server lock; if it fails, we just
93 * wait 5 minutes and it'll time out anyway */
94 ret = afs_vnode_release_lock(vnode, vnode->unlock_key);
95 if (ret < 0)
96 printk(KERN_WARNING "AFS:"
97 " Failed to release lock on {%x:%x} error %d\n",
98 vnode->fid.vid, vnode->fid.vnode, ret);
99
100 spin_lock(&vnode->lock);
101 key_put(vnode->unlock_key);
102 vnode->unlock_key = NULL;
103 clear_bit(AFS_VNODE_UNLOCKING, &vnode->flags);
104 }
105
106 /* if we've got a lock, then it must be time to extend that lock as AFS
107 * locks time out after 5 minutes */
108 if (!list_empty(&vnode->granted_locks)) {
109 _debug("extend");
110
111 if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
112 BUG();
113 fl = list_entry(vnode->granted_locks.next,
114 struct file_lock, fl_u.afs.link);
115 key = key_get(fl->fl_file->private_data);
116 spin_unlock(&vnode->lock);
117
118 ret = afs_vnode_extend_lock(vnode, key);
119 clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
120 key_put(key);
121 switch (ret) {
122 case 0:
123 afs_schedule_lock_extension(vnode);
124 break;
125 default:
126 /* ummm... we failed to extend the lock - retry
127 * extension shortly */
128 printk(KERN_WARNING "AFS:"
129 " Failed to extend lock on {%x:%x} error %d\n",
130 vnode->fid.vid, vnode->fid.vnode, ret);
131 queue_delayed_work(afs_lock_manager, &vnode->lock_work,
132 HZ * 10);
133 break;
134 }
135 _leave(" [extend]");
136 return;
137 }
138
139 /* if we don't have a granted lock, then we must've been called back by
140 * the server, and so if might be possible to get a lock we're
141 * currently waiting for */
142 if (!list_empty(&vnode->pending_locks)) {
143 _debug("get");
144
145 if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
146 BUG();
147 fl = list_entry(vnode->pending_locks.next,
148 struct file_lock, fl_u.afs.link);
149 key = key_get(fl->fl_file->private_data);
150 type = (fl->fl_type == F_RDLCK) ?
151 AFS_LOCK_READ : AFS_LOCK_WRITE;
152 spin_unlock(&vnode->lock);
153
154 ret = afs_vnode_set_lock(vnode, key, type);
155 clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
156 switch (ret) {
157 case -EWOULDBLOCK:
158 _debug("blocked");
159 break;
160 case 0:
161 _debug("acquired");
162 if (type == AFS_LOCK_READ)
163 set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
164 else
165 set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
166 ret = AFS_LOCK_GRANTED;
167 default:
168 spin_lock(&vnode->lock);
169 /* the pending lock may have been withdrawn due to a
170 * signal */
171 if (list_entry(vnode->pending_locks.next,
172 struct file_lock, fl_u.afs.link) == fl) {
173 fl->fl_u.afs.state = ret;
174 if (ret == AFS_LOCK_GRANTED)
175 list_move_tail(&fl->fl_u.afs.link,
176 &vnode->granted_locks);
177 else
178 list_del_init(&fl->fl_u.afs.link);
179 wake_up(&fl->fl_wait);
180 spin_unlock(&vnode->lock);
181 } else {
182 _debug("withdrawn");
183 clear_bit(AFS_VNODE_READLOCKED, &vnode->flags);
184 clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
185 spin_unlock(&vnode->lock);
186 afs_vnode_release_lock(vnode, key);
187 if (!list_empty(&vnode->pending_locks))
188 afs_lock_may_be_available(vnode);
189 }
190 break;
191 }
192 key_put(key);
193 _leave(" [pend]");
194 return;
195 }
196
197 /* looks like the lock request was withdrawn on a signal */
198 spin_unlock(&vnode->lock);
199 _leave(" [no locks]");
200}
201
202/*
203 * pass responsibility for the unlocking of a vnode on the server to the
204 * manager thread, lest a pending signal in the calling thread interrupt
205 * AF_RXRPC
206 * - the caller must hold the vnode lock
207 */
208static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
209{
210 cancel_delayed_work(&vnode->lock_work);
211 if (!test_and_clear_bit(AFS_VNODE_READLOCKED, &vnode->flags) &&
212 !test_and_clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
213 BUG();
214 if (test_and_set_bit(AFS_VNODE_UNLOCKING, &vnode->flags))
215 BUG();
216 vnode->unlock_key = key_get(key);
217 afs_lock_may_be_available(vnode);
218}
219
220/*
221 * request a lock on a file on the server
222 */
223static int afs_do_setlk(struct file *file, struct file_lock *fl)
224{
225 struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
226 afs_lock_type_t type;
227 struct key *key = file->private_data;
228 int ret;
229
230 _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
231
232 /* only whole-file locks are supported */
233 if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
234 return -EINVAL;
235
236 ret = afs_init_lock_manager();
237 if (ret < 0)
238 return ret;
239
240 fl->fl_ops = &afs_lock_ops;
241 INIT_LIST_HEAD(&fl->fl_u.afs.link);
242 fl->fl_u.afs.state = AFS_LOCK_PENDING;
243
244 type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
245
246 lock_kernel();
247
248 /* make sure we've got a callback on this file and that our view of the
249 * data version is up to date */
250 ret = afs_vnode_fetch_status(vnode, NULL, key);
251 if (ret < 0)
252 goto error;
253
254 if (vnode->status.lock_count != 0 && !(fl->fl_flags & FL_SLEEP)) {
255 ret = -EAGAIN;
256 goto error;
257 }
258
259 spin_lock(&vnode->lock);
260
261 if (list_empty(&vnode->pending_locks)) {
262 /* if there's no-one else with a lock on this vnode, then we
263 * need to ask the server for a lock */
264 if (list_empty(&vnode->granted_locks)) {
265 _debug("not locked");
266 ASSERTCMP(vnode->flags &
267 ((1 << AFS_VNODE_LOCKING) |
268 (1 << AFS_VNODE_READLOCKED) |
269 (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
270 list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
271 set_bit(AFS_VNODE_LOCKING, &vnode->flags);
272 spin_unlock(&vnode->lock);
273
274 ret = afs_vnode_set_lock(vnode, key, type);
275 clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
276 switch (ret) {
277 case 0:
278 goto acquired_server_lock;
279 case -EWOULDBLOCK:
280 spin_lock(&vnode->lock);
281 ASSERT(list_empty(&vnode->granted_locks));
282 ASSERTCMP(vnode->pending_locks.next, ==,
283 &fl->fl_u.afs.link);
284 goto wait;
285 default:
286 spin_lock(&vnode->lock);
287 list_del_init(&fl->fl_u.afs.link);
288 spin_unlock(&vnode->lock);
289 goto error;
290 }
291 }
292
293 /* if we've already got a readlock on the server and no waiting
294 * writelocks, then we might be able to instantly grant another
295 * readlock */
296 if (type == AFS_LOCK_READ &&
297 vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
298 _debug("instant readlock");
299 ASSERTCMP(vnode->flags &
300 ((1 << AFS_VNODE_LOCKING) |
301 (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
302 ASSERT(!list_empty(&vnode->granted_locks));
303 goto sharing_existing_lock;
304 }
305 }
306
307 /* otherwise, we need to wait for a local lock to become available */
308 _debug("wait local");
309 list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
310wait:
311 if (!(fl->fl_flags & FL_SLEEP)) {
312 _debug("noblock");
313 ret = -EAGAIN;
314 goto abort_attempt;
315 }
316 spin_unlock(&vnode->lock);
317
318 /* now we need to sleep and wait for the lock manager thread to get the
319 * lock from the server */
320 _debug("sleep");
321 ret = wait_event_interruptible(fl->fl_wait,
322 fl->fl_u.afs.state <= AFS_LOCK_GRANTED);
323 if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
324 ret = fl->fl_u.afs.state;
325 if (ret < 0)
326 goto error;
327 spin_lock(&vnode->lock);
328 goto given_lock;
329 }
330
331 /* we were interrupted, but someone may still be in the throes of
332 * giving us the lock */
333 _debug("intr");
334 ASSERTCMP(ret, ==, -ERESTARTSYS);
335
336 spin_lock(&vnode->lock);
337 if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
338 ret = fl->fl_u.afs.state;
339 if (ret < 0) {
340 spin_unlock(&vnode->lock);
341 goto error;
342 }
343 goto given_lock;
344 }
345
346abort_attempt:
347 /* we aren't going to get the lock, either because we're unwilling to
348 * wait, or because some signal happened */
349 _debug("abort");
350 if (list_empty(&vnode->granted_locks) &&
351 vnode->pending_locks.next == &fl->fl_u.afs.link) {
352 if (vnode->pending_locks.prev != &fl->fl_u.afs.link) {
353 /* kick the next pending lock into having a go */
354 list_del_init(&fl->fl_u.afs.link);
355 afs_lock_may_be_available(vnode);
356 }
357 } else {
358 list_del_init(&fl->fl_u.afs.link);
359 }
360 spin_unlock(&vnode->lock);
361 goto error;
362
363acquired_server_lock:
364 /* we've acquired a server lock, but it needs to be renewed after 5
365 * mins */
366 spin_lock(&vnode->lock);
367 afs_schedule_lock_extension(vnode);
368 if (type == AFS_LOCK_READ)
369 set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
370 else
371 set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
372sharing_existing_lock:
373 /* the lock has been granted as far as we're concerned... */
374 fl->fl_u.afs.state = AFS_LOCK_GRANTED;
375 list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
376given_lock:
377 /* ... but we do still need to get the VFS's blessing */
378 ASSERT(!(vnode->flags & (1 << AFS_VNODE_LOCKING)));
379 ASSERT((vnode->flags & ((1 << AFS_VNODE_READLOCKED) |
380 (1 << AFS_VNODE_WRITELOCKED))) != 0);
381 ret = posix_lock_file(file, fl, NULL);
382 if (ret < 0)
383 goto vfs_rejected_lock;
384 spin_unlock(&vnode->lock);
385
386 /* again, make sure we've got a callback on this file and, again, make
387 * sure that our view of the data version is up to date (we ignore
388 * errors incurred here and deal with the consequences elsewhere) */
389 afs_vnode_fetch_status(vnode, NULL, key);
390
391error:
392 unlock_kernel();
393 _leave(" = %d", ret);
394 return ret;
395
396vfs_rejected_lock:
397 /* the VFS rejected the lock we just obtained, so we have to discard
398 * what we just got */
399 _debug("vfs refused %d", ret);
400 list_del_init(&fl->fl_u.afs.link);
401 if (list_empty(&vnode->granted_locks))
402 afs_defer_unlock(vnode, key);
403 spin_unlock(&vnode->lock);
404 goto abort_attempt;
405}
406
407/*
408 * unlock on a file on the server
409 */
410static int afs_do_unlk(struct file *file, struct file_lock *fl)
411{
412 struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
413 struct key *key = file->private_data;
414 int ret;
415
416 _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
417
418 /* only whole-file unlocks are supported */
419 if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
420 return -EINVAL;
421
422 fl->fl_ops = &afs_lock_ops;
423 INIT_LIST_HEAD(&fl->fl_u.afs.link);
424 fl->fl_u.afs.state = AFS_LOCK_PENDING;
425
426 spin_lock(&vnode->lock);
427 ret = posix_lock_file(file, fl, NULL);
428 if (ret < 0) {
429 spin_unlock(&vnode->lock);
430 _leave(" = %d [vfs]", ret);
431 return ret;
432 }
433
434 /* discard the server lock only if all granted locks are gone */
435 if (list_empty(&vnode->granted_locks))
436 afs_defer_unlock(vnode, key);
437 spin_unlock(&vnode->lock);
438 _leave(" = 0");
439 return 0;
440}
441
442/*
443 * return information about a lock we currently hold, if indeed we hold one
444 */
445static int afs_do_getlk(struct file *file, struct file_lock *fl)
446{
447 struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
448 struct key *key = file->private_data;
449 int ret, lock_count;
450
451 _enter("");
452
453 fl->fl_type = F_UNLCK;
454
455 mutex_lock(&vnode->vfs_inode.i_mutex);
456
457 /* check local lock records first */
458 ret = 0;
459 if (posix_test_lock(file, fl) == 0) {
460 /* no local locks; consult the server */
461 ret = afs_vnode_fetch_status(vnode, NULL, key);
462 if (ret < 0)
463 goto error;
464 lock_count = vnode->status.lock_count;
465 if (lock_count) {
466 if (lock_count > 0)
467 fl->fl_type = F_RDLCK;
468 else
469 fl->fl_type = F_WRLCK;
470 fl->fl_start = 0;
471 fl->fl_end = OFFSET_MAX;
472 }
473 }
474
475error:
476 mutex_unlock(&vnode->vfs_inode.i_mutex);
477 _leave(" = %d [%hd]", ret, fl->fl_type);
478 return ret;
479}
480
481/*
482 * manage POSIX locks on a file
483 */
484int afs_lock(struct file *file, int cmd, struct file_lock *fl)
485{
486 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
487
488 _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
489 vnode->fid.vid, vnode->fid.vnode, cmd,
490 fl->fl_type, fl->fl_flags,
491 (long long) fl->fl_start, (long long) fl->fl_end);
492
493 /* AFS doesn't support mandatory locks */
494 if ((vnode->vfs_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
495 fl->fl_type != F_UNLCK)
496 return -ENOLCK;
497
498 if (IS_GETLK(cmd))
499 return afs_do_getlk(file, fl);
500 if (fl->fl_type == F_UNLCK)
501 return afs_do_unlk(file, fl);
502 return afs_do_setlk(file, fl);
503}
504
505/*
506 * manage FLOCK locks on a file
507 */
508int afs_flock(struct file *file, int cmd, struct file_lock *fl)
509{
510 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
511
512 _enter("{%x:%u},%d,{t=%x,fl=%x}",
513 vnode->fid.vid, vnode->fid.vnode, cmd,
514 fl->fl_type, fl->fl_flags);
515
516 /*
517 * No BSD flocks over NFS allowed.
518 * Note: we could try to fake a POSIX lock request here by
519 * using ((u32) filp | 0x80000000) or some such as the pid.
520 * Not sure whether that would be unique, though, or whether
521 * that would break in other places.
522 */
523 if (!(fl->fl_flags & FL_FLOCK))
524 return -ENOLCK;
525
526 /* we're simulating flock() locks using posix locks on the server */
527 fl->fl_owner = (fl_owner_t) file;
528 fl->fl_start = 0;
529 fl->fl_end = OFFSET_MAX;
530
531 if (fl->fl_type == F_UNLCK)
532 return afs_do_unlk(file, fl);
533 return afs_do_setlk(file, fl);
534}
535
536/*
537 * the POSIX lock management core VFS code copies the lock record and adds the
538 * copy into its own list, so we need to add that copy to the vnode's lock
539 * queue in the same place as the original (which will be deleted shortly
540 * after)
541 */
542static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
543{
544 _enter("");
545
546 list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link);
547}
548
549/*
550 * need to remove this lock from the vnode queue when it's removed from the
551 * VFS's list
552 */
553static void afs_fl_release_private(struct file_lock *fl)
554{
555 _enter("");
556
557 list_del_init(&fl->fl_u.afs.link);
558}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 5dff1308b6f..023b95b0d9d 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -67,7 +67,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
67 EXTRACT(status->group); 67 EXTRACT(status->group);
68 bp++; /* sync counter */ 68 bp++; /* sync counter */
69 data_version |= (u64) ntohl(*bp++) << 32; 69 data_version |= (u64) ntohl(*bp++) << 32;
70 bp++; /* lock count */ 70 EXTRACT(status->lock_count);
71 size |= (u64) ntohl(*bp++) << 32; 71 size |= (u64) ntohl(*bp++) << 32;
72 bp++; /* spare 4 */ 72 bp++; /* spare 4 */
73 *_bp = bp; 73 *_bp = bp;
@@ -1748,3 +1748,156 @@ int afs_fs_get_volume_status(struct afs_server *server,
1748 1748
1749 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); 1749 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
1750} 1750}
1751
1752/*
1753 * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock
1754 */
1755static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
1756 struct sk_buff *skb, bool last)
1757{
1758 const __be32 *bp;
1759
1760 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
1761
1762 afs_transfer_reply(call, skb);
1763 if (!last)
1764 return 0;
1765
1766 if (call->reply_size != call->reply_max)
1767 return -EBADMSG;
1768
1769 /* unmarshall the reply once we've received all of it */
1770 bp = call->buffer;
1771 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
1772
1773 _leave(" = 0 [done]");
1774 return 0;
1775}
1776
1777/*
1778 * FS.SetLock operation type
1779 */
1780static const struct afs_call_type afs_RXFSSetLock = {
1781 .name = "FS.SetLock",
1782 .deliver = afs_deliver_fs_xxxx_lock,
1783 .abort_to_error = afs_abort_to_error,
1784 .destructor = afs_flat_call_destructor,
1785};
1786
1787/*
1788 * FS.ExtendLock operation type
1789 */
1790static const struct afs_call_type afs_RXFSExtendLock = {
1791 .name = "FS.ExtendLock",
1792 .deliver = afs_deliver_fs_xxxx_lock,
1793 .abort_to_error = afs_abort_to_error,
1794 .destructor = afs_flat_call_destructor,
1795};
1796
1797/*
1798 * FS.ReleaseLock operation type
1799 */
1800static const struct afs_call_type afs_RXFSReleaseLock = {
1801 .name = "FS.ReleaseLock",
1802 .deliver = afs_deliver_fs_xxxx_lock,
1803 .abort_to_error = afs_abort_to_error,
1804 .destructor = afs_flat_call_destructor,
1805};
1806
1807/*
1808 * get a lock on a file
1809 */
1810int afs_fs_set_lock(struct afs_server *server,
1811 struct key *key,
1812 struct afs_vnode *vnode,
1813 afs_lock_type_t type,
1814 const struct afs_wait_mode *wait_mode)
1815{
1816 struct afs_call *call;
1817 __be32 *bp;
1818
1819 _enter("");
1820
1821 call = afs_alloc_flat_call(&afs_RXFSSetLock, 5 * 4, 6 * 4);
1822 if (!call)
1823 return -ENOMEM;
1824
1825 call->key = key;
1826 call->reply = vnode;
1827 call->service_id = FS_SERVICE;
1828 call->port = htons(AFS_FS_PORT);
1829
1830 /* marshall the parameters */
1831 bp = call->request;
1832 *bp++ = htonl(FSSETLOCK);
1833 *bp++ = htonl(vnode->fid.vid);
1834 *bp++ = htonl(vnode->fid.vnode);
1835 *bp++ = htonl(vnode->fid.unique);
1836 *bp++ = htonl(type);
1837
1838 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
1839}
1840
1841/*
1842 * extend a lock on a file
1843 */
1844int afs_fs_extend_lock(struct afs_server *server,
1845 struct key *key,
1846 struct afs_vnode *vnode,
1847 const struct afs_wait_mode *wait_mode)
1848{
1849 struct afs_call *call;
1850 __be32 *bp;
1851
1852 _enter("");
1853
1854 call = afs_alloc_flat_call(&afs_RXFSExtendLock, 4 * 4, 6 * 4);
1855 if (!call)
1856 return -ENOMEM;
1857
1858 call->key = key;
1859 call->reply = vnode;
1860 call->service_id = FS_SERVICE;
1861 call->port = htons(AFS_FS_PORT);
1862
1863 /* marshall the parameters */
1864 bp = call->request;
1865 *bp++ = htonl(FSEXTENDLOCK);
1866 *bp++ = htonl(vnode->fid.vid);
1867 *bp++ = htonl(vnode->fid.vnode);
1868 *bp++ = htonl(vnode->fid.unique);
1869
1870 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
1871}
1872
1873/*
1874 * release a lock on a file
1875 */
1876int afs_fs_release_lock(struct afs_server *server,
1877 struct key *key,
1878 struct afs_vnode *vnode,
1879 const struct afs_wait_mode *wait_mode)
1880{
1881 struct afs_call *call;
1882 __be32 *bp;
1883
1884 _enter("");
1885
1886 call = afs_alloc_flat_call(&afs_RXFSReleaseLock, 4 * 4, 6 * 4);
1887 if (!call)
1888 return -ENOMEM;
1889
1890 call->key = key;
1891 call->reply = vnode;
1892 call->service_id = FS_SERVICE;
1893 call->port = htons(AFS_FS_PORT);
1894
1895 /* marshall the parameters */
1896 bp = call->request;
1897 *bp++ = htonl(FSRELEASELOCK);
1898 *bp++ = htonl(vnode->fid.vid);
1899 *bp++ = htonl(vnode->fid.vnode);
1900 *bp++ = htonl(vnode->fid.unique);
1901
1902 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
1903}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 2c55dd94a1d..6306438f331 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -351,10 +351,18 @@ struct afs_vnode {
351#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ 351#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
352#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ 352#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
353#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ 353#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
354#define AFS_VNODE_LOCKING 6 /* set if waiting for lock on vnode */
355#define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */
356#define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */
357#define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */
354 358
355 long acl_order; /* ACL check count (callback break count) */ 359 long acl_order; /* ACL check count (callback break count) */
356 360
357 struct list_head writebacks; /* alterations in pagecache that need writing */ 361 struct list_head writebacks; /* alterations in pagecache that need writing */
362 struct list_head pending_locks; /* locks waiting to be granted */
363 struct list_head granted_locks; /* locks granted on this file */
364 struct delayed_work lock_work; /* work to be done in locking */
365 struct key *unlock_key; /* key to be used in unlocking */
358 366
359 /* outstanding callback notification on this file */ 367 /* outstanding callback notification on this file */
360 struct rb_node server_rb; /* link in server->fs_vnodes */ 368 struct rb_node server_rb; /* link in server->fs_vnodes */
@@ -474,6 +482,15 @@ extern int afs_open(struct inode *, struct file *);
474extern int afs_release(struct inode *, struct file *); 482extern int afs_release(struct inode *, struct file *);
475 483
476/* 484/*
485 * flock.c
486 */
487extern void __exit afs_kill_lock_manager(void);
488extern void afs_lock_work(struct work_struct *);
489extern void afs_lock_may_be_available(struct afs_vnode *);
490extern int afs_lock(struct file *, int, struct file_lock *);
491extern int afs_flock(struct file *, int, struct file_lock *);
492
493/*
477 * fsclient.c 494 * fsclient.c
478 */ 495 */
479extern int afs_fs_fetch_file_status(struct afs_server *, struct key *, 496extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
@@ -513,6 +530,15 @@ extern int afs_fs_get_volume_status(struct afs_server *, struct key *,
513 struct afs_vnode *, 530 struct afs_vnode *,
514 struct afs_volume_status *, 531 struct afs_volume_status *,
515 const struct afs_wait_mode *); 532 const struct afs_wait_mode *);
533extern int afs_fs_set_lock(struct afs_server *, struct key *,
534 struct afs_vnode *, afs_lock_type_t,
535 const struct afs_wait_mode *);
536extern int afs_fs_extend_lock(struct afs_server *, struct key *,
537 struct afs_vnode *,
538 const struct afs_wait_mode *);
539extern int afs_fs_release_lock(struct afs_server *, struct key *,
540 struct afs_vnode *,
541 const struct afs_wait_mode *);
516 542
517/* 543/*
518 * inode.c 544 * inode.c
@@ -681,6 +707,10 @@ extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t,
681extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); 707extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *);
682extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *, 708extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *,
683 struct afs_volume_status *); 709 struct afs_volume_status *);
710extern int afs_vnode_set_lock(struct afs_vnode *, struct key *,
711 afs_lock_type_t);
712extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *);
713extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
684 714
685/* 715/*
686 * volume.c 716 * volume.c
diff --git a/fs/afs/main.c b/fs/afs/main.c
index cd21195bbb2..0f60f6b3576 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -168,6 +168,7 @@ static void __exit afs_exit(void)
168 printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); 168 printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
169 169
170 afs_fs_exit(); 170 afs_fs_exit();
171 afs_kill_lock_manager();
171 afs_close_socket(); 172 afs_close_socket();
172 afs_purge_servers(); 173 afs_purge_servers();
173 afs_callback_update_kill(); 174 afs_callback_update_kill();
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index d1a889c4074..2d33a5f7d21 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -35,6 +35,7 @@ int afs_abort_to_error(u32 abort_code)
35 case VOVERQUOTA: return -EDQUOT; 35 case VOVERQUOTA: return -EDQUOT;
36 case VBUSY: return -EBUSY; 36 case VBUSY: return -EBUSY;
37 case VMOVED: return -ENXIO; 37 case VMOVED: return -ENXIO;
38 case 0x2f6df0a: return -EWOULDBLOCK;
38 case 0x2f6df0c: return -EACCES; 39 case 0x2f6df0c: return -EACCES;
39 case 0x2f6df0f: return -EBUSY; 40 case 0x2f6df0f: return -EBUSY;
40 case 0x2f6df10: return -EEXIST; 41 case 0x2f6df10: return -EEXIST;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 13df512aea9..6edb56683b9 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -201,23 +201,9 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
201 */ 201 */
202static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) 202static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
203{ 203{
204 struct list_head *_p;
205 loff_t pos = *_pos;
206
207 /* lock the list against modification */ 204 /* lock the list against modification */
208 down_read(&afs_proc_cells_sem); 205 down_read(&afs_proc_cells_sem);
209 206 return seq_list_start_head(&afs_proc_cells, *_pos);
210 /* allow for the header line */
211 if (!pos)
212 return (void *) 1;
213 pos--;
214
215 /* find the n'th element in the list */
216 list_for_each(_p, &afs_proc_cells)
217 if (!pos--)
218 break;
219
220 return _p != &afs_proc_cells ? _p : NULL;
221} 207}
222 208
223/* 209/*
@@ -225,14 +211,7 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
225 */ 211 */
226static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos) 212static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos)
227{ 213{
228 struct list_head *_p; 214 return seq_list_next(v, &afs_proc_cells, pos);
229
230 (*pos)++;
231
232 _p = v;
233 _p = v == (void *) 1 ? afs_proc_cells.next : _p->next;
234
235 return _p != &afs_proc_cells ? _p : NULL;
236} 215}
237 216
238/* 217/*
@@ -250,7 +229,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
250{ 229{
251 struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); 230 struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
252 231
253 if (v == (void *) 1) { 232 if (v == &afs_proc_cells) {
254 /* display header on line 1 */ 233 /* display header on line 1 */
255 seq_puts(m, "USE NAME\n"); 234 seq_puts(m, "USE NAME\n");
256 return 0; 235 return 0;
@@ -503,26 +482,13 @@ static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file)
503 */ 482 */
504static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) 483static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
505{ 484{
506 struct list_head *_p;
507 struct afs_cell *cell = m->private; 485 struct afs_cell *cell = m->private;
508 loff_t pos = *_pos;
509 486
510 _enter("cell=%p pos=%Ld", cell, *_pos); 487 _enter("cell=%p pos=%Ld", cell, *_pos);
511 488
512 /* lock the list against modification */ 489 /* lock the list against modification */
513 down_read(&cell->vl_sem); 490 down_read(&cell->vl_sem);
514 491 return seq_list_start_head(&cell->vl_list, *_pos);
515 /* allow for the header line */
516 if (!pos)
517 return (void *) 1;
518 pos--;
519
520 /* find the n'th element in the list */
521 list_for_each(_p, &cell->vl_list)
522 if (!pos--)
523 break;
524
525 return _p != &cell->vl_list ? _p : NULL;
526} 492}
527 493
528/* 494/*
@@ -531,17 +497,10 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
531static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, 497static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
532 loff_t *_pos) 498 loff_t *_pos)
533{ 499{
534 struct list_head *_p;
535 struct afs_cell *cell = p->private; 500 struct afs_cell *cell = p->private;
536 501
537 _enter("cell=%p pos=%Ld", cell, *_pos); 502 _enter("cell=%p pos=%Ld", cell, *_pos);
538 503 return seq_list_next(v, &cell->vl_list, _pos);
539 (*_pos)++;
540
541 _p = v;
542 _p = (v == (void *) 1) ? cell->vl_list.next : _p->next;
543
544 return (_p != &cell->vl_list) ? _p : NULL;
545} 504}
546 505
547/* 506/*
@@ -569,11 +528,12 @@ const char afs_vlocation_states[][4] = {
569 */ 528 */
570static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) 529static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
571{ 530{
531 struct afs_cell *cell = m->private;
572 struct afs_vlocation *vlocation = 532 struct afs_vlocation *vlocation =
573 list_entry(v, struct afs_vlocation, link); 533 list_entry(v, struct afs_vlocation, link);
574 534
575 /* display header on line 1 */ 535 /* display header on line 1 */
576 if (v == (void *) 1) { 536 if (v == &cell->vl_list) {
577 seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n"); 537 seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n");
578 return 0; 538 return 0;
579 } 539 }
@@ -734,26 +694,13 @@ static int afs_proc_cell_servers_release(struct inode *inode,
734static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) 694static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
735 __acquires(m->private->servers_lock) 695 __acquires(m->private->servers_lock)
736{ 696{
737 struct list_head *_p;
738 struct afs_cell *cell = m->private; 697 struct afs_cell *cell = m->private;
739 loff_t pos = *_pos;
740 698
741 _enter("cell=%p pos=%Ld", cell, *_pos); 699 _enter("cell=%p pos=%Ld", cell, *_pos);
742 700
743 /* lock the list against modification */ 701 /* lock the list against modification */
744 read_lock(&cell->servers_lock); 702 read_lock(&cell->servers_lock);
745 703 return seq_list_start_head(&cell->servers, *_pos);
746 /* allow for the header line */
747 if (!pos)
748 return (void *) 1;
749 pos--;
750
751 /* find the n'th element in the list */
752 list_for_each(_p, &cell->servers)
753 if (!pos--)
754 break;
755
756 return _p != &cell->servers ? _p : NULL;
757} 704}
758 705
759/* 706/*
@@ -762,17 +709,10 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
762static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, 709static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
763 loff_t *_pos) 710 loff_t *_pos)
764{ 711{
765 struct list_head *_p;
766 struct afs_cell *cell = p->private; 712 struct afs_cell *cell = p->private;
767 713
768 _enter("cell=%p pos=%Ld", cell, *_pos); 714 _enter("cell=%p pos=%Ld", cell, *_pos);
769 715 return seq_list_next(v, &cell->servers, _pos);
770 (*_pos)++;
771
772 _p = v;
773 _p = v == (void *) 1 ? cell->servers.next : _p->next;
774
775 return _p != &cell->servers ? _p : NULL;
776} 716}
777 717
778/* 718/*
@@ -791,11 +731,12 @@ static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
791 */ 731 */
792static int afs_proc_cell_servers_show(struct seq_file *m, void *v) 732static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
793{ 733{
734 struct afs_cell *cell = m->private;
794 struct afs_server *server = list_entry(v, struct afs_server, link); 735 struct afs_server *server = list_entry(v, struct afs_server, link);
795 char ipaddr[20]; 736 char ipaddr[20];
796 737
797 /* display header on line 1 */ 738 /* display header on line 1 */
798 if (v == (void *) 1) { 739 if (v == &cell->servers) {
799 seq_puts(m, "USE ADDR STATE\n"); 740 seq_puts(m, "USE ADDR STATE\n");
800 return 0; 741 return 0;
801 } 742 }
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 2e8496ba120..993cdf1cce3 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -460,6 +460,9 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
460 spin_lock_init(&vnode->writeback_lock); 460 spin_lock_init(&vnode->writeback_lock);
461 spin_lock_init(&vnode->lock); 461 spin_lock_init(&vnode->lock);
462 INIT_LIST_HEAD(&vnode->writebacks); 462 INIT_LIST_HEAD(&vnode->writebacks);
463 INIT_LIST_HEAD(&vnode->pending_locks);
464 INIT_LIST_HEAD(&vnode->granted_locks);
465 INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
463 INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); 466 INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
464} 467}
465 468
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 232c55dc245..2f05c4fc2a7 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -561,7 +561,7 @@ no_server:
561/* 561/*
562 * create a hard link 562 * create a hard link
563 */ 563 */
564extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, 564int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
565 struct key *key, const char *name) 565 struct key *key, const char *name)
566{ 566{
567 struct afs_server *server; 567 struct afs_server *server;
@@ -887,11 +887,6 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
887 vnode->fid.unique, 887 vnode->fid.unique,
888 key_serial(key)); 888 key_serial(key));
889 889
890 /* this op will fetch the status */
891 spin_lock(&vnode->lock);
892 vnode->update_cnt++;
893 spin_unlock(&vnode->lock);
894
895 do { 890 do {
896 /* pick a server to query */ 891 /* pick a server to query */
897 server = afs_volume_pick_fileserver(vnode); 892 server = afs_volume_pick_fileserver(vnode);
@@ -905,20 +900,127 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
905 } while (!afs_volume_release_fileserver(vnode, server, ret)); 900 } while (!afs_volume_release_fileserver(vnode, server, ret));
906 901
907 /* adjust the flags */ 902 /* adjust the flags */
908 if (ret == 0) { 903 if (ret == 0)
909 afs_vnode_finalise_status_update(vnode, server); 904 afs_put_server(server);
905
906 _leave(" = %d", ret);
907 return ret;
908
909no_server:
910 return PTR_ERR(server);
911}
912
913/*
914 * get a lock on a file
915 */
916int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key,
917 afs_lock_type_t type)
918{
919 struct afs_server *server;
920 int ret;
921
922 _enter("%s{%x:%u.%u},%x,%u",
923 vnode->volume->vlocation->vldb.name,
924 vnode->fid.vid,
925 vnode->fid.vnode,
926 vnode->fid.unique,
927 key_serial(key), type);
928
929 do {
930 /* pick a server to query */
931 server = afs_volume_pick_fileserver(vnode);
932 if (IS_ERR(server))
933 goto no_server;
934
935 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
936
937 ret = afs_fs_set_lock(server, key, vnode, type, &afs_sync_call);
938
939 } while (!afs_volume_release_fileserver(vnode, server, ret));
940
941 /* adjust the flags */
942 if (ret == 0)
943 afs_put_server(server);
944
945 _leave(" = %d", ret);
946 return ret;
947
948no_server:
949 return PTR_ERR(server);
950}
951
952/*
953 * extend a lock on a file
954 */
955int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key)
956{
957 struct afs_server *server;
958 int ret;
959
960 _enter("%s{%x:%u.%u},%x",
961 vnode->volume->vlocation->vldb.name,
962 vnode->fid.vid,
963 vnode->fid.vnode,
964 vnode->fid.unique,
965 key_serial(key));
966
967 do {
968 /* pick a server to query */
969 server = afs_volume_pick_fileserver(vnode);
970 if (IS_ERR(server))
971 goto no_server;
972
973 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
974
975 ret = afs_fs_extend_lock(server, key, vnode, &afs_sync_call);
976
977 } while (!afs_volume_release_fileserver(vnode, server, ret));
978
979 /* adjust the flags */
980 if (ret == 0)
981 afs_put_server(server);
982
983 _leave(" = %d", ret);
984 return ret;
985
986no_server:
987 return PTR_ERR(server);
988}
989
990/*
991 * release a lock on a file
992 */
993int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key)
994{
995 struct afs_server *server;
996 int ret;
997
998 _enter("%s{%x:%u.%u},%x",
999 vnode->volume->vlocation->vldb.name,
1000 vnode->fid.vid,
1001 vnode->fid.vnode,
1002 vnode->fid.unique,
1003 key_serial(key));
1004
1005 do {
1006 /* pick a server to query */
1007 server = afs_volume_pick_fileserver(vnode);
1008 if (IS_ERR(server))
1009 goto no_server;
1010
1011 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
1012
1013 ret = afs_fs_release_lock(server, key, vnode, &afs_sync_call);
1014
1015 } while (!afs_volume_release_fileserver(vnode, server, ret));
1016
1017 /* adjust the flags */
1018 if (ret == 0)
910 afs_put_server(server); 1019 afs_put_server(server);
911 } else {
912 afs_vnode_status_update_failed(vnode, ret);
913 }
914 1020
915 _leave(" = %d", ret); 1021 _leave(" = %d", ret);
916 return ret; 1022 return ret;
917 1023
918no_server: 1024no_server:
919 spin_lock(&vnode->lock);
920 vnode->update_cnt--;
921 ASSERTCMP(vnode->update_cnt, >=, 0);
922 spin_unlock(&vnode->lock);
923 return PTR_ERR(server); 1025 return PTR_ERR(server);
924} 1026}
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 40fe3a3222e..a260198306c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -53,7 +53,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
53}; 53};
54 54
55/** 55/**
56 * anon_inode_getfd - creates a new file instance by hooking it up to and 56 * anon_inode_getfd - creates a new file instance by hooking it up to an
57 * anonymous inode, and a dentry that describe the "class" 57 * anonymous inode, and a dentry that describe the "class"
58 * of the file 58 * of the file
59 * 59 *
@@ -66,7 +66,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
66 * 66 *
67 * Creates a new file by hooking it on a single inode. This is useful for files 67 * Creates a new file by hooking it on a single inode. This is useful for files
68 * that do not need to have a full-fledged inode in order to operate correctly. 68 * that do not need to have a full-fledged inode in order to operate correctly.
69 * All the files created with anon_inode_getfd() will share a single inode, by 69 * All the files created with anon_inode_getfd() will share a single inode,
70 * hence saving memory and avoiding code duplication for the file/inode/dentry 70 * hence saving memory and avoiding code duplication for the file/inode/dentry
71 * setup. 71 * setup.
72 */ 72 */
@@ -141,9 +141,9 @@ err_put_filp:
141} 141}
142 142
143/* 143/*
144 * A single inode exist for all anon_inode files. Contrary to pipes, 144 * A single inode exists for all anon_inode files. Contrary to pipes,
145 * anon_inode inodes has no per-instance data associated, so we can avoid 145 * anon_inode inodes have no associated per-instance data, so we need
146 * the allocation of multiple of them. 146 * only allocate one of them.
147 */ 147 */
148static struct inode *anon_inode_mkinode(void) 148static struct inode *anon_inode_mkinode(void)
149{ 149{
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 08e4414b837..a27e42bf340 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@
45 45
46static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); 46static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47static int load_elf_library(struct file *); 47static int load_elf_library(struct file *);
48static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); 48static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
49 49
50/* 50/*
51 * If we don't support core dumping, then supply a NULL so we 51 * If we don't support core dumping, then supply a NULL so we
@@ -80,7 +80,7 @@ static struct linux_binfmt elf_format = {
80 .hasvdso = 1 80 .hasvdso = 1
81}; 81};
82 82
83#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) 83#define BAD_ADDR(x) IS_ERR_VALUE(x)
84 84
85static int set_brk(unsigned long start, unsigned long end) 85static int set_brk(unsigned long start, unsigned long end)
86{ 86{
@@ -285,33 +285,70 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
285#ifndef elf_map 285#ifndef elf_map
286 286
287static unsigned long elf_map(struct file *filep, unsigned long addr, 287static unsigned long elf_map(struct file *filep, unsigned long addr,
288 struct elf_phdr *eppnt, int prot, int type) 288 struct elf_phdr *eppnt, int prot, int type,
289 unsigned long total_size)
289{ 290{
290 unsigned long map_addr; 291 unsigned long map_addr;
291 unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr); 292 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
293 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
294 addr = ELF_PAGESTART(addr);
295 size = ELF_PAGEALIGN(size);
292 296
293 down_write(&current->mm->mmap_sem);
294 /* mmap() will return -EINVAL if given a zero size, but a 297 /* mmap() will return -EINVAL if given a zero size, but a
295 * segment with zero filesize is perfectly valid */ 298 * segment with zero filesize is perfectly valid */
296 if (eppnt->p_filesz + pageoffset) 299 if (!size)
297 map_addr = do_mmap(filep, ELF_PAGESTART(addr), 300 return addr;
298 eppnt->p_filesz + pageoffset, prot, type, 301
299 eppnt->p_offset - pageoffset); 302 down_write(&current->mm->mmap_sem);
300 else 303 /*
301 map_addr = ELF_PAGESTART(addr); 304 * total_size is the size of the ELF (interpreter) image.
305 * The _first_ mmap needs to know the full size, otherwise
306 * randomization might put this image into an overlapping
307 * position with the ELF binary image. (since size < total_size)
308 * So we first map the 'big' image - and unmap the remainder at
309 * the end. (which unmap is needed for ELF images with holes.)
310 */
311 if (total_size) {
312 total_size = ELF_PAGEALIGN(total_size);
313 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
314 if (!BAD_ADDR(map_addr))
315 do_munmap(current->mm, map_addr+size, total_size-size);
316 } else
317 map_addr = do_mmap(filep, addr, size, prot, type, off);
318
302 up_write(&current->mm->mmap_sem); 319 up_write(&current->mm->mmap_sem);
303 return(map_addr); 320 return(map_addr);
304} 321}
305 322
306#endif /* !elf_map */ 323#endif /* !elf_map */
307 324
325static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
326{
327 int i, first_idx = -1, last_idx = -1;
328
329 for (i = 0; i < nr; i++) {
330 if (cmds[i].p_type == PT_LOAD) {
331 last_idx = i;
332 if (first_idx == -1)
333 first_idx = i;
334 }
335 }
336 if (first_idx == -1)
337 return 0;
338
339 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
340 ELF_PAGESTART(cmds[first_idx].p_vaddr);
341}
342
343
308/* This is much more generalized than the library routine read function, 344/* This is much more generalized than the library routine read function,
309 so we keep this separate. Technically the library read function 345 so we keep this separate. Technically the library read function
310 is only provided so that we can read a.out libraries that have 346 is only provided so that we can read a.out libraries that have
311 an ELF header */ 347 an ELF header */
312 348
313static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, 349static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
314 struct file *interpreter, unsigned long *interp_load_addr) 350 struct file *interpreter, unsigned long *interp_map_addr,
351 unsigned long no_base)
315{ 352{
316 struct elf_phdr *elf_phdata; 353 struct elf_phdr *elf_phdata;
317 struct elf_phdr *eppnt; 354 struct elf_phdr *eppnt;
@@ -319,6 +356,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
319 int load_addr_set = 0; 356 int load_addr_set = 0;
320 unsigned long last_bss = 0, elf_bss = 0; 357 unsigned long last_bss = 0, elf_bss = 0;
321 unsigned long error = ~0UL; 358 unsigned long error = ~0UL;
359 unsigned long total_size;
322 int retval, i, size; 360 int retval, i, size;
323 361
324 /* First of all, some simple consistency checks */ 362 /* First of all, some simple consistency checks */
@@ -357,6 +395,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
357 goto out_close; 395 goto out_close;
358 } 396 }
359 397
398 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
399 if (!total_size) {
400 error = -EINVAL;
401 goto out_close;
402 }
403
360 eppnt = elf_phdata; 404 eppnt = elf_phdata;
361 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { 405 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
362 if (eppnt->p_type == PT_LOAD) { 406 if (eppnt->p_type == PT_LOAD) {
@@ -374,9 +418,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
374 vaddr = eppnt->p_vaddr; 418 vaddr = eppnt->p_vaddr;
375 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) 419 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
376 elf_type |= MAP_FIXED; 420 elf_type |= MAP_FIXED;
421 else if (no_base && interp_elf_ex->e_type == ET_DYN)
422 load_addr = -vaddr;
377 423
378 map_addr = elf_map(interpreter, load_addr + vaddr, 424 map_addr = elf_map(interpreter, load_addr + vaddr,
379 eppnt, elf_prot, elf_type); 425 eppnt, elf_prot, elf_type, total_size);
426 total_size = 0;
427 if (!*interp_map_addr)
428 *interp_map_addr = map_addr;
380 error = map_addr; 429 error = map_addr;
381 if (BAD_ADDR(map_addr)) 430 if (BAD_ADDR(map_addr))
382 goto out_close; 431 goto out_close;
@@ -442,8 +491,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
442 goto out_close; 491 goto out_close;
443 } 492 }
444 493
445 *interp_load_addr = load_addr; 494 error = load_addr;
446 error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
447 495
448out_close: 496out_close:
449 kfree(elf_phdata); 497 kfree(elf_phdata);
@@ -540,7 +588,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
540 int elf_exec_fileno; 588 int elf_exec_fileno;
541 int retval, i; 589 int retval, i;
542 unsigned int size; 590 unsigned int size;
543 unsigned long elf_entry, interp_load_addr = 0; 591 unsigned long elf_entry;
592 unsigned long interp_load_addr = 0;
544 unsigned long start_code, end_code, start_data, end_data; 593 unsigned long start_code, end_code, start_data, end_data;
545 unsigned long reloc_func_desc = 0; 594 unsigned long reloc_func_desc = 0;
546 char passed_fileno[6]; 595 char passed_fileno[6];
@@ -808,9 +857,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
808 current->mm->start_stack = bprm->p; 857 current->mm->start_stack = bprm->p;
809 858
810 /* Now we do a little grungy work by mmaping the ELF image into 859 /* Now we do a little grungy work by mmaping the ELF image into
811 the correct location in memory. At this point, we assume that 860 the correct location in memory. */
812 the image should be loaded at fixed address, not at a variable
813 address. */
814 for(i = 0, elf_ppnt = elf_phdata; 861 for(i = 0, elf_ppnt = elf_phdata;
815 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 862 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
816 int elf_prot = 0, elf_flags; 863 int elf_prot = 0, elf_flags;
@@ -864,11 +911,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
864 * default mmap base, as well as whatever program they 911 * default mmap base, as well as whatever program they
865 * might try to exec. This is because the brk will 912 * might try to exec. This is because the brk will
866 * follow the loader, and is not movable. */ 913 * follow the loader, and is not movable. */
914#ifdef CONFIG_X86
915 load_bias = 0;
916#else
867 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 917 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
918#endif
868 } 919 }
869 920
870 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, 921 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
871 elf_prot, elf_flags); 922 elf_prot, elf_flags,0);
872 if (BAD_ADDR(error)) { 923 if (BAD_ADDR(error)) {
873 send_sig(SIGKILL, current, 0); 924 send_sig(SIGKILL, current, 0);
874 retval = IS_ERR((void *)error) ? 925 retval = IS_ERR((void *)error) ?
@@ -944,13 +995,25 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
944 } 995 }
945 996
946 if (elf_interpreter) { 997 if (elf_interpreter) {
947 if (interpreter_type == INTERPRETER_AOUT) 998 if (interpreter_type == INTERPRETER_AOUT) {
948 elf_entry = load_aout_interp(&loc->interp_ex, 999 elf_entry = load_aout_interp(&loc->interp_ex,
949 interpreter); 1000 interpreter);
950 else 1001 } else {
1002 unsigned long uninitialized_var(interp_map_addr);
1003
951 elf_entry = load_elf_interp(&loc->interp_elf_ex, 1004 elf_entry = load_elf_interp(&loc->interp_elf_ex,
952 interpreter, 1005 interpreter,
953 &interp_load_addr); 1006 &interp_map_addr,
1007 load_bias);
1008 if (!BAD_ADDR(elf_entry)) {
1009 /*
1010 * load_elf_interp() returns relocation
1011 * adjustment
1012 */
1013 interp_load_addr = elf_entry;
1014 elf_entry += loc->interp_elf_ex.e_entry;
1015 }
1016 }
954 if (BAD_ADDR(elf_entry)) { 1017 if (BAD_ADDR(elf_entry)) {
955 force_sig(SIGSEGV, current); 1018 force_sig(SIGSEGV, current);
956 retval = IS_ERR((void *)elf_entry) ? 1019 retval = IS_ERR((void *)elf_entry) ?
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b3e9bfa748c..3635315e3b9 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -588,12 +588,10 @@ EXPORT_SYMBOL(bdget);
588 588
589long nr_blockdev_pages(void) 589long nr_blockdev_pages(void)
590{ 590{
591 struct list_head *p; 591 struct block_device *bdev;
592 long ret = 0; 592 long ret = 0;
593 spin_lock(&bdev_lock); 593 spin_lock(&bdev_lock);
594 list_for_each(p, &all_bdevs) { 594 list_for_each_entry(bdev, &all_bdevs, bd_list) {
595 struct block_device *bdev;
596 bdev = list_entry(p, struct block_device, bd_list);
597 ret += bdev->bd_inode->i_mapping->nrpages; 595 ret += bdev->bd_inode->i_mapping->nrpages;
598 } 596 }
599 spin_unlock(&bdev_lock); 597 spin_unlock(&bdev_lock);
@@ -874,7 +872,7 @@ static struct bd_holder *find_bd_holder(struct block_device *bdev,
874 */ 872 */
875static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) 873static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
876{ 874{
877 int ret; 875 int err;
878 876
879 if (!bo) 877 if (!bo)
880 return -EINVAL; 878 return -EINVAL;
@@ -882,15 +880,18 @@ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
882 if (!bd_holder_grab_dirs(bdev, bo)) 880 if (!bd_holder_grab_dirs(bdev, bo))
883 return -EBUSY; 881 return -EBUSY;
884 882
885 ret = add_symlink(bo->sdir, bo->sdev); 883 err = add_symlink(bo->sdir, bo->sdev);
886 if (ret == 0) { 884 if (err)
887 ret = add_symlink(bo->hdir, bo->hdev); 885 return err;
888 if (ret) 886
889 del_symlink(bo->sdir, bo->sdev); 887 err = add_symlink(bo->hdir, bo->hdev);
888 if (err) {
889 del_symlink(bo->sdir, bo->sdev);
890 return err;
890 } 891 }
891 if (ret == 0) 892
892 list_add_tail(&bo->list, &bdev->bd_holder_list); 893 list_add_tail(&bo->list, &bdev->bd_holder_list);
893 return ret; 894 return 0;
894} 895}
895 896
896/** 897/**
@@ -948,7 +949,7 @@ static struct bd_holder *del_bd_holder(struct block_device *bdev,
948static int bd_claim_by_kobject(struct block_device *bdev, void *holder, 949static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
949 struct kobject *kobj) 950 struct kobject *kobj)
950{ 951{
951 int res; 952 int err;
952 struct bd_holder *bo, *found; 953 struct bd_holder *bo, *found;
953 954
954 if (!kobj) 955 if (!kobj)
@@ -959,21 +960,24 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
959 return -ENOMEM; 960 return -ENOMEM;
960 961
961 mutex_lock(&bdev->bd_mutex); 962 mutex_lock(&bdev->bd_mutex);
962 res = bd_claim(bdev, holder);
963 if (res == 0) {
964 found = find_bd_holder(bdev, bo);
965 if (found == NULL) {
966 res = add_bd_holder(bdev, bo);
967 if (res)
968 bd_release(bdev);
969 }
970 }
971 963
972 if (res || found) 964 err = bd_claim(bdev, holder);
973 free_bd_holder(bo); 965 if (err)
974 mutex_unlock(&bdev->bd_mutex); 966 goto fail;
975 967
976 return res; 968 found = find_bd_holder(bdev, bo);
969 if (found)
970 goto fail;
971
972 err = add_bd_holder(bdev, bo);
973 if (err)
974 bd_release(bdev);
975 else
976 bo = NULL;
977fail:
978 mutex_unlock(&bdev->bd_mutex);
979 free_bd_holder(bo);
980 return err;
977} 981}
978 982
979/** 983/**
@@ -987,15 +991,12 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
987static void bd_release_from_kobject(struct block_device *bdev, 991static void bd_release_from_kobject(struct block_device *bdev,
988 struct kobject *kobj) 992 struct kobject *kobj)
989{ 993{
990 struct bd_holder *bo;
991
992 if (!kobj) 994 if (!kobj)
993 return; 995 return;
994 996
995 mutex_lock(&bdev->bd_mutex); 997 mutex_lock(&bdev->bd_mutex);
996 bd_release(bdev); 998 bd_release(bdev);
997 if ((bo = del_bd_holder(bdev, kobj))) 999 free_bd_holder(del_bd_holder(bdev, kobj));
998 free_bd_holder(bo);
999 mutex_unlock(&bdev->bd_mutex); 1000 mutex_unlock(&bdev->bd_mutex);
1000} 1001}
1001 1002
diff --git a/fs/buffer.c b/fs/buffer.c
index aa68206bd51..424165b569f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1026,11 +1026,6 @@ failed:
1026/* 1026/*
1027 * Create buffers for the specified block device block's page. If 1027 * Create buffers for the specified block device block's page. If
1028 * that page was dirty, the buffers are set dirty also. 1028 * that page was dirty, the buffers are set dirty also.
1029 *
1030 * Except that's a bug. Attaching dirty buffers to a dirty
1031 * blockdev's page can result in filesystem corruption, because
1032 * some of those buffers may be aliases of filesystem data.
1033 * grow_dev_page() will go BUG() if this happens.
1034 */ 1029 */
1035static int 1030static int
1036grow_buffers(struct block_device *bdev, sector_t block, int size) 1031grow_buffers(struct block_device *bdev, sector_t block, int size)
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9ddf5ed6216..898a86dde8f 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -470,7 +470,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
470 470
471 ret = -ENOENT; 471 ret = -ENOENT;
472 if (!IS_DEADDIR(host_inode)) { 472 if (!IS_DEADDIR(host_inode)) {
473 ret = host_file->f_op->readdir(host_file, filldir, dirent); 473 ret = host_file->f_op->readdir(host_file, dirent, filldir);
474 file_accessed(host_file); 474 file_accessed(host_file);
475 } 475 }
476 } 476 }
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6b44cdc96fa..e440a7b95d0 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -63,6 +63,7 @@
63#include <linux/wireless.h> 63#include <linux/wireless.h>
64#include <linux/atalk.h> 64#include <linux/atalk.h>
65#include <linux/blktrace_api.h> 65#include <linux/blktrace_api.h>
66#include <linux/loop.h>
66 67
67#include <net/bluetooth/bluetooth.h> 68#include <net/bluetooth/bluetooth.h>
68#include <net/bluetooth/hci.h> 69#include <net/bluetooth/hci.h>
@@ -3489,6 +3490,9 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
3489 3490
3490IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) 3491IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32)
3491IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) 3492IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32)
3493
3494/* loop */
3495IGNORE_IOCTL(LOOP_CLR_FD)
3492}; 3496};
3493 3497
3494#define IOCTL_HASHSIZE 256 3498#define IOCTL_HASHSIZE 256
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 7b48c034b31..3b0185fdf9a 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -29,10 +29,11 @@
29 29
30struct configfs_dirent { 30struct configfs_dirent {
31 atomic_t s_count; 31 atomic_t s_count;
32 int s_dependent_count;
32 struct list_head s_sibling; 33 struct list_head s_sibling;
33 struct list_head s_children; 34 struct list_head s_children;
34 struct list_head s_links; 35 struct list_head s_links;
35 void * s_element; 36 void * s_element;
36 int s_type; 37 int s_type;
37 umode_t s_mode; 38 umode_t s_mode;
38 struct dentry * s_dentry; 39 struct dentry * s_dentry;
@@ -41,8 +42,8 @@ struct configfs_dirent {
41 42
42#define CONFIGFS_ROOT 0x0001 43#define CONFIGFS_ROOT 0x0001
43#define CONFIGFS_DIR 0x0002 44#define CONFIGFS_DIR 0x0002
44#define CONFIGFS_ITEM_ATTR 0x0004 45#define CONFIGFS_ITEM_ATTR 0x0004
45#define CONFIGFS_ITEM_LINK 0x0020 46#define CONFIGFS_ITEM_LINK 0x0020
46#define CONFIGFS_USET_DIR 0x0040 47#define CONFIGFS_USET_DIR 0x0040
47#define CONFIGFS_USET_DEFAULT 0x0080 48#define CONFIGFS_USET_DEFAULT 0x0080
48#define CONFIGFS_USET_DROPPING 0x0100 49#define CONFIGFS_USET_DROPPING 0x0100
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5e6e37e58f3..2f436d4f1d6 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -355,6 +355,10 @@ static int configfs_detach_prep(struct dentry *dentry)
355 /* Mark that we've taken i_mutex */ 355 /* Mark that we've taken i_mutex */
356 sd->s_type |= CONFIGFS_USET_DROPPING; 356 sd->s_type |= CONFIGFS_USET_DROPPING;
357 357
358 /*
359 * Yup, recursive. If there's a problem, blame
360 * deep nesting of default_groups
361 */
358 ret = configfs_detach_prep(sd->s_dentry); 362 ret = configfs_detach_prep(sd->s_dentry);
359 if (!ret) 363 if (!ret)
360 continue; 364 continue;
@@ -562,7 +566,7 @@ static int populate_groups(struct config_group *group)
562 566
563/* 567/*
564 * All of link_obj/unlink_obj/link_group/unlink_group require that 568 * All of link_obj/unlink_obj/link_group/unlink_group require that
565 * subsys->su_sem is held. 569 * subsys->su_mutex is held.
566 */ 570 */
567 571
568static void unlink_obj(struct config_item *item) 572static void unlink_obj(struct config_item *item)
@@ -714,6 +718,28 @@ static void configfs_detach_group(struct config_item *item)
714} 718}
715 719
716/* 720/*
721 * After the item has been detached from the filesystem view, we are
722 * ready to tear it out of the hierarchy. Notify the client before
723 * we do that so they can perform any cleanup that requires
724 * navigating the hierarchy. A client does not need to provide this
725 * callback. The subsystem semaphore MUST be held by the caller, and
726 * references must be valid for both items. It also assumes the
727 * caller has validated ci_type.
728 */
729static void client_disconnect_notify(struct config_item *parent_item,
730 struct config_item *item)
731{
732 struct config_item_type *type;
733
734 type = parent_item->ci_type;
735 BUG_ON(!type);
736
737 if (type->ct_group_ops && type->ct_group_ops->disconnect_notify)
738 type->ct_group_ops->disconnect_notify(to_config_group(parent_item),
739 item);
740}
741
742/*
717 * Drop the initial reference from make_item()/make_group() 743 * Drop the initial reference from make_item()/make_group()
718 * This function assumes that reference is held on item 744 * This function assumes that reference is held on item
719 * and that item holds a valid reference to the parent. Also, it 745 * and that item holds a valid reference to the parent. Also, it
@@ -733,11 +759,244 @@ static void client_drop_item(struct config_item *parent_item,
733 */ 759 */
734 if (type->ct_group_ops && type->ct_group_ops->drop_item) 760 if (type->ct_group_ops && type->ct_group_ops->drop_item)
735 type->ct_group_ops->drop_item(to_config_group(parent_item), 761 type->ct_group_ops->drop_item(to_config_group(parent_item),
736 item); 762 item);
737 else 763 else
738 config_item_put(item); 764 config_item_put(item);
739} 765}
740 766
767#ifdef DEBUG
768static void configfs_dump_one(struct configfs_dirent *sd, int level)
769{
770 printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd));
771
772#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type);
773 type_print(CONFIGFS_ROOT);
774 type_print(CONFIGFS_DIR);
775 type_print(CONFIGFS_ITEM_ATTR);
776 type_print(CONFIGFS_ITEM_LINK);
777 type_print(CONFIGFS_USET_DIR);
778 type_print(CONFIGFS_USET_DEFAULT);
779 type_print(CONFIGFS_USET_DROPPING);
780#undef type_print
781}
782
783static int configfs_dump(struct configfs_dirent *sd, int level)
784{
785 struct configfs_dirent *child_sd;
786 int ret = 0;
787
788 configfs_dump_one(sd, level);
789
790 if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT)))
791 return 0;
792
793 list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
794 ret = configfs_dump(child_sd, level + 2);
795 if (ret)
796 break;
797 }
798
799 return ret;
800}
801#endif
802
803
804/*
805 * configfs_depend_item() and configfs_undepend_item()
806 *
807 * WARNING: Do not call these from a configfs callback!
808 *
809 * This describes these functions and their helpers.
810 *
811 * Allow another kernel system to depend on a config_item. If this
812 * happens, the item cannot go away until the dependant can live without
813 * it. The idea is to give client modules as simple an interface as
814 * possible. When a system asks them to depend on an item, they just
815 * call configfs_depend_item(). If the item is live and the client
816 * driver is in good shape, we'll happily do the work for them.
817 *
818 * Why is the locking complex? Because configfs uses the VFS to handle
819 * all locking, but this function is called outside the normal
820 * VFS->configfs path. So it must take VFS locks to prevent the
821 * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is
822 * why you can't call these functions underneath configfs callbacks.
823 *
824 * Note, btw, that this can be called at *any* time, even when a configfs
825 * subsystem isn't registered, or when configfs is loading or unloading.
826 * Just like configfs_register_subsystem(). So we take the same
827 * precautions. We pin the filesystem. We lock each i_mutex _in_order_
828 * on our way down the tree. If we can find the target item in the
829 * configfs tree, it must be part of the subsystem tree as well, so we
830 * do not need the subsystem semaphore. Holding the i_mutex chain locks
831 * out mkdir() and rmdir(), who might be racing us.
832 */
833
834/*
835 * configfs_depend_prep()
836 *
837 * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are
838 * attributes. This is similar but not the same to configfs_detach_prep().
839 * Note that configfs_detach_prep() expects the parent to be locked when it
840 * is called, but we lock the parent *inside* configfs_depend_prep(). We
841 * do that so we can unlock it if we find nothing.
842 *
843 * Here we do a depth-first search of the dentry hierarchy looking for
844 * our object. We take i_mutex on each step of the way down. IT IS
845 * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch,
846 * we'll drop the i_mutex.
847 *
848 * If the target is not found, -ENOENT is bubbled up and we have released
849 * all locks. If the target was found, the locks will be cleared by
850 * configfs_depend_rollback().
851 *
852 * This adds a requirement that all config_items be unique!
853 *
854 * This is recursive because the locking traversal is tricky. There isn't
855 * much on the stack, though, so folks that need this function - be careful
856 * about your stack! Patches will be accepted to make it iterative.
857 */
858static int configfs_depend_prep(struct dentry *origin,
859 struct config_item *target)
860{
861 struct configfs_dirent *child_sd, *sd = origin->d_fsdata;
862 int ret = 0;
863
864 BUG_ON(!origin || !sd);
865
866 /* Lock this guy on the way down */
867 mutex_lock(&sd->s_dentry->d_inode->i_mutex);
868 if (sd->s_element == target) /* Boo-yah */
869 goto out;
870
871 list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
872 if (child_sd->s_type & CONFIGFS_DIR) {
873 ret = configfs_depend_prep(child_sd->s_dentry,
874 target);
875 if (!ret)
876 goto out; /* Child path boo-yah */
877 }
878 }
879
880 /* We looped all our children and didn't find target */
881 mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
882 ret = -ENOENT;
883
884out:
885 return ret;
886}
887
888/*
889 * This is ONLY called if configfs_depend_prep() did its job. So we can
890 * trust the entire path from item back up to origin.
891 *
892 * We walk backwards from item, unlocking each i_mutex. We finish by
893 * unlocking origin.
894 */
895static void configfs_depend_rollback(struct dentry *origin,
896 struct config_item *item)
897{
898 struct dentry *dentry = item->ci_dentry;
899
900 while (dentry != origin) {
901 mutex_unlock(&dentry->d_inode->i_mutex);
902 dentry = dentry->d_parent;
903 }
904
905 mutex_unlock(&origin->d_inode->i_mutex);
906}
907
908int configfs_depend_item(struct configfs_subsystem *subsys,
909 struct config_item *target)
910{
911 int ret;
912 struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
913 struct config_item *s_item = &subsys->su_group.cg_item;
914
915 /*
916 * Pin the configfs filesystem. This means we can safely access
917 * the root of the configfs filesystem.
918 */
919 ret = configfs_pin_fs();
920 if (ret)
921 return ret;
922
923 /*
924 * Next, lock the root directory. We're going to check that the
925 * subsystem is really registered, and so we need to lock out
926 * configfs_[un]register_subsystem().
927 */
928 mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
929
930 root_sd = configfs_sb->s_root->d_fsdata;
931
932 list_for_each_entry(p, &root_sd->s_children, s_sibling) {
933 if (p->s_type & CONFIGFS_DIR) {
934 if (p->s_element == s_item) {
935 subsys_sd = p;
936 break;
937 }
938 }
939 }
940
941 if (!subsys_sd) {
942 ret = -ENOENT;
943 goto out_unlock_fs;
944 }
945
946 /* Ok, now we can trust subsys/s_item */
947
948 /* Scan the tree, locking i_mutex recursively, return 0 if found */
949 ret = configfs_depend_prep(subsys_sd->s_dentry, target);
950 if (ret)
951 goto out_unlock_fs;
952
953 /* We hold all i_mutexes from the subsystem down to the target */
954 p = target->ci_dentry->d_fsdata;
955 p->s_dependent_count += 1;
956
957 configfs_depend_rollback(subsys_sd->s_dentry, target);
958
959out_unlock_fs:
960 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
961
962 /*
963 * If we succeeded, the fs is pinned via other methods. If not,
964 * we're done with it anyway. So release_fs() is always right.
965 */
966 configfs_release_fs();
967
968 return ret;
969}
970EXPORT_SYMBOL(configfs_depend_item);
971
972/*
973 * Release the dependent linkage. This is much simpler than
974 * configfs_depend_item() because we know that that the client driver is
975 * pinned, thus the subsystem is pinned, and therefore configfs is pinned.
976 */
977void configfs_undepend_item(struct configfs_subsystem *subsys,
978 struct config_item *target)
979{
980 struct configfs_dirent *sd;
981
982 /*
983 * Since we can trust everything is pinned, we just need i_mutex
984 * on the item.
985 */
986 mutex_lock(&target->ci_dentry->d_inode->i_mutex);
987
988 sd = target->ci_dentry->d_fsdata;
989 BUG_ON(sd->s_dependent_count < 1);
990
991 sd->s_dependent_count -= 1;
992
993 /*
994 * After this unlock, we cannot trust the item to stay alive!
995 * DO NOT REFERENCE item after this unlock.
996 */
997 mutex_unlock(&target->ci_dentry->d_inode->i_mutex);
998}
999EXPORT_SYMBOL(configfs_undepend_item);
741 1000
742static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1001static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
743{ 1002{
@@ -783,7 +1042,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
783 1042
784 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); 1043 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
785 1044
786 down(&subsys->su_sem); 1045 mutex_lock(&subsys->su_mutex);
787 group = NULL; 1046 group = NULL;
788 item = NULL; 1047 item = NULL;
789 if (type->ct_group_ops->make_group) { 1048 if (type->ct_group_ops->make_group) {
@@ -797,7 +1056,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
797 if (item) 1056 if (item)
798 link_obj(parent_item, item); 1057 link_obj(parent_item, item);
799 } 1058 }
800 up(&subsys->su_sem); 1059 mutex_unlock(&subsys->su_mutex);
801 1060
802 kfree(name); 1061 kfree(name);
803 if (!item) { 1062 if (!item) {
@@ -841,13 +1100,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
841out_unlink: 1100out_unlink:
842 if (ret) { 1101 if (ret) {
843 /* Tear down everything we built up */ 1102 /* Tear down everything we built up */
844 down(&subsys->su_sem); 1103 mutex_lock(&subsys->su_mutex);
1104
1105 client_disconnect_notify(parent_item, item);
845 if (group) 1106 if (group)
846 unlink_group(group); 1107 unlink_group(group);
847 else 1108 else
848 unlink_obj(item); 1109 unlink_obj(item);
849 client_drop_item(parent_item, item); 1110 client_drop_item(parent_item, item);
850 up(&subsys->su_sem); 1111
1112 mutex_unlock(&subsys->su_mutex);
851 1113
852 if (module_got) 1114 if (module_got)
853 module_put(owner); 1115 module_put(owner);
@@ -881,6 +1143,13 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
881 if (sd->s_type & CONFIGFS_USET_DEFAULT) 1143 if (sd->s_type & CONFIGFS_USET_DEFAULT)
882 return -EPERM; 1144 return -EPERM;
883 1145
1146 /*
1147 * Here's where we check for dependents. We're protected by
1148 * i_mutex.
1149 */
1150 if (sd->s_dependent_count)
1151 return -EBUSY;
1152
884 /* Get a working ref until we have the child */ 1153 /* Get a working ref until we have the child */
885 parent_item = configfs_get_config_item(dentry->d_parent); 1154 parent_item = configfs_get_config_item(dentry->d_parent);
886 subsys = to_config_group(parent_item)->cg_subsys; 1155 subsys = to_config_group(parent_item)->cg_subsys;
@@ -910,17 +1179,19 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
910 if (sd->s_type & CONFIGFS_USET_DIR) { 1179 if (sd->s_type & CONFIGFS_USET_DIR) {
911 configfs_detach_group(item); 1180 configfs_detach_group(item);
912 1181
913 down(&subsys->su_sem); 1182 mutex_lock(&subsys->su_mutex);
1183 client_disconnect_notify(parent_item, item);
914 unlink_group(to_config_group(item)); 1184 unlink_group(to_config_group(item));
915 } else { 1185 } else {
916 configfs_detach_item(item); 1186 configfs_detach_item(item);
917 1187
918 down(&subsys->su_sem); 1188 mutex_lock(&subsys->su_mutex);
1189 client_disconnect_notify(parent_item, item);
919 unlink_obj(item); 1190 unlink_obj(item);
920 } 1191 }
921 1192
922 client_drop_item(parent_item, item); 1193 client_drop_item(parent_item, item);
923 up(&subsys->su_sem); 1194 mutex_unlock(&subsys->su_mutex);
924 1195
925 /* Drop our reference from above */ 1196 /* Drop our reference from above */
926 config_item_put(item); 1197 config_item_put(item);
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 3527c7c6def..a3658f9a082 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -27,19 +27,26 @@
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/mutex.h>
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31#include <asm/semaphore.h>
32 32
33#include <linux/configfs.h> 33#include <linux/configfs.h>
34#include "configfs_internal.h" 34#include "configfs_internal.h"
35 35
36/*
37 * A simple attribute can only be 4096 characters. Why 4k? Because the
38 * original code limited it to PAGE_SIZE. That's a bad idea, though,
39 * because an attribute of 16k on ia64 won't work on x86. So we limit to
40 * 4k, our minimum common page size.
41 */
42#define SIMPLE_ATTR_SIZE 4096
36 43
37struct configfs_buffer { 44struct configfs_buffer {
38 size_t count; 45 size_t count;
39 loff_t pos; 46 loff_t pos;
40 char * page; 47 char * page;
41 struct configfs_item_operations * ops; 48 struct configfs_item_operations * ops;
42 struct semaphore sem; 49 struct mutex mutex;
43 int needs_read_fill; 50 int needs_read_fill;
44}; 51};
45 52
@@ -69,7 +76,7 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
69 76
70 count = ops->show_attribute(item,attr,buffer->page); 77 count = ops->show_attribute(item,attr,buffer->page);
71 buffer->needs_read_fill = 0; 78 buffer->needs_read_fill = 0;
72 BUG_ON(count > (ssize_t)PAGE_SIZE); 79 BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
73 if (count >= 0) 80 if (count >= 0)
74 buffer->count = count; 81 buffer->count = count;
75 else 82 else
@@ -102,7 +109,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
102 struct configfs_buffer * buffer = file->private_data; 109 struct configfs_buffer * buffer = file->private_data;
103 ssize_t retval = 0; 110 ssize_t retval = 0;
104 111
105 down(&buffer->sem); 112 mutex_lock(&buffer->mutex);
106 if (buffer->needs_read_fill) { 113 if (buffer->needs_read_fill) {
107 if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) 114 if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
108 goto out; 115 goto out;
@@ -112,7 +119,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
112 retval = simple_read_from_buffer(buf, count, ppos, buffer->page, 119 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
113 buffer->count); 120 buffer->count);
114out: 121out:
115 up(&buffer->sem); 122 mutex_unlock(&buffer->mutex);
116 return retval; 123 return retval;
117} 124}
118 125
@@ -137,8 +144,8 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size
137 if (!buffer->page) 144 if (!buffer->page)
138 return -ENOMEM; 145 return -ENOMEM;
139 146
140 if (count >= PAGE_SIZE) 147 if (count >= SIMPLE_ATTR_SIZE)
141 count = PAGE_SIZE - 1; 148 count = SIMPLE_ATTR_SIZE - 1;
142 error = copy_from_user(buffer->page,buf,count); 149 error = copy_from_user(buffer->page,buf,count);
143 buffer->needs_read_fill = 1; 150 buffer->needs_read_fill = 1;
144 /* if buf is assumed to contain a string, terminate it by \0, 151 /* if buf is assumed to contain a string, terminate it by \0,
@@ -193,13 +200,13 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof
193 struct configfs_buffer * buffer = file->private_data; 200 struct configfs_buffer * buffer = file->private_data;
194 ssize_t len; 201 ssize_t len;
195 202
196 down(&buffer->sem); 203 mutex_lock(&buffer->mutex);
197 len = fill_write_buffer(buffer, buf, count); 204 len = fill_write_buffer(buffer, buf, count);
198 if (len > 0) 205 if (len > 0)
199 len = flush_write_buffer(file->f_path.dentry, buffer, count); 206 len = flush_write_buffer(file->f_path.dentry, buffer, count);
200 if (len > 0) 207 if (len > 0)
201 *ppos += len; 208 *ppos += len;
202 up(&buffer->sem); 209 mutex_unlock(&buffer->mutex);
203 return len; 210 return len;
204} 211}
205 212
@@ -253,7 +260,7 @@ static int check_perm(struct inode * inode, struct file * file)
253 error = -ENOMEM; 260 error = -ENOMEM;
254 goto Enomem; 261 goto Enomem;
255 } 262 }
256 init_MUTEX(&buffer->sem); 263 mutex_init(&buffer->mutex);
257 buffer->needs_read_fill = 1; 264 buffer->needs_read_fill = 1;
258 buffer->ops = ops; 265 buffer->ops = ops;
259 file->private_data = buffer; 266 file->private_data = buffer;
@@ -292,6 +299,7 @@ static int configfs_release(struct inode * inode, struct file * filp)
292 if (buffer) { 299 if (buffer) {
293 if (buffer->page) 300 if (buffer->page)
294 free_page((unsigned long)buffer->page); 301 free_page((unsigned long)buffer->page);
302 mutex_destroy(&buffer->mutex);
295 kfree(buffer); 303 kfree(buffer);
296 } 304 }
297 return 0; 305 return 0;
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 24421209f85..76dc4c3e5d5 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -62,7 +62,6 @@ void config_item_init(struct config_item * item)
62 * dynamically allocated string that @item->ci_name points to. 62 * dynamically allocated string that @item->ci_name points to.
63 * Otherwise, use the static @item->ci_namebuf array. 63 * Otherwise, use the static @item->ci_namebuf array.
64 */ 64 */
65
66int config_item_set_name(struct config_item * item, const char * fmt, ...) 65int config_item_set_name(struct config_item * item, const char * fmt, ...)
67{ 66{
68 int error = 0; 67 int error = 0;
@@ -139,12 +138,7 @@ struct config_item * config_item_get(struct config_item * item)
139 return item; 138 return item;
140} 139}
141 140
142/** 141static void config_item_cleanup(struct config_item * item)
143 * config_item_cleanup - free config_item resources.
144 * @item: item.
145 */
146
147void config_item_cleanup(struct config_item * item)
148{ 142{
149 struct config_item_type * t = item->ci_type; 143 struct config_item_type * t = item->ci_type;
150 struct config_group * s = item->ci_group; 144 struct config_group * s = item->ci_group;
@@ -179,39 +173,35 @@ void config_item_put(struct config_item * item)
179 kref_put(&item->ci_kref, config_item_release); 173 kref_put(&item->ci_kref, config_item_release);
180} 174}
181 175
182
183/** 176/**
184 * config_group_init - initialize a group for use 177 * config_group_init - initialize a group for use
185 * @k: group 178 * @k: group
186 */ 179 */
187
188void config_group_init(struct config_group *group) 180void config_group_init(struct config_group *group)
189{ 181{
190 config_item_init(&group->cg_item); 182 config_item_init(&group->cg_item);
191 INIT_LIST_HEAD(&group->cg_children); 183 INIT_LIST_HEAD(&group->cg_children);
192} 184}
193 185
194
195/** 186/**
196 * config_group_find_obj - search for item in group. 187 * config_group_find_item - search for item in group.
197 * @group: group we're looking in. 188 * @group: group we're looking in.
198 * @name: item's name. 189 * @name: item's name.
199 * 190 *
200 * Lock group via @group->cg_subsys, and iterate over @group->cg_list, 191 * Iterate over @group->cg_list, looking for a matching config_item.
201 * looking for a matching config_item. If matching item is found 192 * If matching item is found take a reference and return the item.
202 * take a reference and return the item. 193 * Caller must have locked group via @group->cg_subsys->su_mtx.
203 */ 194 */
204 195struct config_item *config_group_find_item(struct config_group *group,
205struct config_item * config_group_find_obj(struct config_group * group, const char * name) 196 const char *name)
206{ 197{
207 struct list_head * entry; 198 struct list_head * entry;
208 struct config_item * ret = NULL; 199 struct config_item * ret = NULL;
209 200
210 /* XXX LOCKING! */
211 list_for_each(entry,&group->cg_children) { 201 list_for_each(entry,&group->cg_children) {
212 struct config_item * item = to_item(entry); 202 struct config_item * item = to_item(entry);
213 if (config_item_name(item) && 203 if (config_item_name(item) &&
214 !strcmp(config_item_name(item), name)) { 204 !strcmp(config_item_name(item), name)) {
215 ret = config_item_get(item); 205 ret = config_item_get(item);
216 break; 206 break;
217 } 207 }
@@ -219,9 +209,8 @@ struct config_item * config_group_find_obj(struct config_group * group, const ch
219 return ret; 209 return ret;
220} 210}
221 211
222
223EXPORT_SYMBOL(config_item_init); 212EXPORT_SYMBOL(config_item_init);
224EXPORT_SYMBOL(config_group_init); 213EXPORT_SYMBOL(config_group_init);
225EXPORT_SYMBOL(config_item_get); 214EXPORT_SYMBOL(config_item_get);
226EXPORT_SYMBOL(config_item_put); 215EXPORT_SYMBOL(config_item_put);
227EXPORT_SYMBOL(config_group_find_obj); 216EXPORT_SYMBOL(config_group_find_item);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index ec8896b264d..1d533a2ec3a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -368,6 +368,69 @@ void debugfs_remove(struct dentry *dentry)
368} 368}
369EXPORT_SYMBOL_GPL(debugfs_remove); 369EXPORT_SYMBOL_GPL(debugfs_remove);
370 370
371/**
372 * debugfs_rename - rename a file/directory in the debugfs filesystem
373 * @old_dir: a pointer to the parent dentry for the renamed object. This
374 * should be a directory dentry.
375 * @old_dentry: dentry of an object to be renamed.
376 * @new_dir: a pointer to the parent dentry where the object should be
377 * moved. This should be a directory dentry.
378 * @new_name: a pointer to a string containing the target name.
379 *
380 * This function renames a file/directory in debugfs. The target must not
381 * exist for rename to succeed.
382 *
383 * This function will return a pointer to old_dentry (which is updated to
384 * reflect renaming) if it succeeds. If an error occurs, %NULL will be
385 * returned.
386 *
387 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
388 * returned.
389 */
390struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
391 struct dentry *new_dir, const char *new_name)
392{
393 int error;
394 struct dentry *dentry = NULL, *trap;
395 const char *old_name;
396
397 trap = lock_rename(new_dir, old_dir);
398 /* Source or destination directories don't exist? */
399 if (!old_dir->d_inode || !new_dir->d_inode)
400 goto exit;
401 /* Source does not exist, cyclic rename, or mountpoint? */
402 if (!old_dentry->d_inode || old_dentry == trap ||
403 d_mountpoint(old_dentry))
404 goto exit;
405 dentry = lookup_one_len(new_name, new_dir, strlen(new_name));
406 /* Lookup failed, cyclic rename or target exists? */
407 if (IS_ERR(dentry) || dentry == trap || dentry->d_inode)
408 goto exit;
409
410 old_name = fsnotify_oldname_init(old_dentry->d_name.name);
411
412 error = simple_rename(old_dir->d_inode, old_dentry, new_dir->d_inode,
413 dentry);
414 if (error) {
415 fsnotify_oldname_free(old_name);
416 goto exit;
417 }
418 d_move(old_dentry, dentry);
419 fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
420 old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode),
421 NULL, old_dentry->d_inode);
422 fsnotify_oldname_free(old_name);
423 unlock_rename(new_dir, old_dir);
424 dput(dentry);
425 return old_dentry;
426exit:
427 if (dentry && !IS_ERR(dentry))
428 dput(dentry);
429 unlock_rename(new_dir, old_dir);
430 return NULL;
431}
432EXPORT_SYMBOL_GPL(debugfs_rename);
433
371static decl_subsys(debug, NULL, NULL); 434static decl_subsys(debug, NULL, NULL);
372 435
373static int __init debugfs_init(void) 436static int __init debugfs_init(void)
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 5069b2cb5a1..2f8e3c81bc1 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -133,14 +133,6 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
133 return len; 133 return len;
134} 134}
135 135
136#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
137 .attr = { .ca_name = __stringify(_name), \
138 .ca_mode = _mode, \
139 .ca_owner = THIS_MODULE }, \
140 .show = _read, \
141 .store = _write, \
142}
143
144#define CLUSTER_ATTR(name, check_zero) \ 136#define CLUSTER_ATTR(name, check_zero) \
145static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ 137static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
146{ \ 138{ \
@@ -615,7 +607,7 @@ static struct clusters clusters_root = {
615int dlm_config_init(void) 607int dlm_config_init(void)
616{ 608{
617 config_group_init(&clusters_root.subsys.su_group); 609 config_group_init(&clusters_root.subsys.su_group);
618 init_MUTEX(&clusters_root.subsys.su_sem); 610 mutex_init(&clusters_root.subsys.su_mutex);
619 return configfs_register_subsystem(&clusters_root.subsys); 611 return configfs_register_subsystem(&clusters_root.subsys);
620} 612}
621 613
@@ -759,9 +751,9 @@ static struct space *get_space(char *name)
759 if (!space_list) 751 if (!space_list)
760 return NULL; 752 return NULL;
761 753
762 down(&space_list->cg_subsys->su_sem); 754 mutex_lock(&space_list->cg_subsys->su_mutex);
763 i = config_group_find_obj(space_list, name); 755 i = config_group_find_item(space_list, name);
764 up(&space_list->cg_subsys->su_sem); 756 mutex_unlock(&space_list->cg_subsys->su_mutex);
765 757
766 return to_space(i); 758 return to_space(i);
767} 759}
@@ -780,7 +772,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
780 if (!comm_list) 772 if (!comm_list)
781 return NULL; 773 return NULL;
782 774
783 down(&clusters_root.subsys.su_sem); 775 mutex_lock(&clusters_root.subsys.su_mutex);
784 776
785 list_for_each_entry(i, &comm_list->cg_children, ci_entry) { 777 list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
786 cm = to_comm(i); 778 cm = to_comm(i);
@@ -800,7 +792,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
800 break; 792 break;
801 } 793 }
802 } 794 }
803 up(&clusters_root.subsys.su_sem); 795 mutex_unlock(&clusters_root.subsys.su_mutex);
804 796
805 if (!found) 797 if (!found)
806 cm = NULL; 798 cm = NULL;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 03ea7696fe3..59375efcf39 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -20,7 +20,7 @@ static void drop_pagecache_sb(struct super_block *sb)
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE)) 21 if (inode->i_state & (I_FREEING|I_WILL_FREE))
22 continue; 22 continue;
23 invalidate_mapping_pages(inode->i_mapping, 0, -1); 23 __invalidate_mapping_pages(inode->i_mapping, 0, -1, true);
24 } 24 }
25 spin_unlock(&inode_lock); 25 spin_unlock(&inode_lock);
26} 26}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 606128f5c92..02ca6f1e55d 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -840,8 +840,6 @@ static int __init ecryptfs_init(void)
840 goto out; 840 goto out;
841 } 841 }
842 kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); 842 kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
843 sysfs_attr_version.attr.owner = THIS_MODULE;
844 sysfs_attr_version_str.attr.owner = THIS_MODULE;
845 rc = do_sysfs_registration(); 843 rc = do_sysfs_registration();
846 if (rc) { 844 if (rc) {
847 printk(KERN_ERR "sysfs registration failed\n"); 845 printk(KERN_ERR "sysfs registration failed\n");
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 04afeecaaef..ab7961260c4 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -24,9 +24,9 @@
24#include "acl.h" 24#include "acl.h"
25 25
26/* 26/*
27 * Called when an inode is released. Note that this is different 27 * Called when filp is released. This happens when all file descriptors
28 * from ext2_open_file: open gets called at every open, but release 28 * for a single struct file are closed. Note that different open() calls
29 * gets called only when /all/ the files are closed. 29 * for the same file yield different struct file structures.
30 */ 30 */
31static int ext2_release_file (struct inode * inode, struct file * filp) 31static int ext2_release_file (struct inode * inode, struct file * filp)
32{ 32{
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 5de5061eb33..b2efd9083b9 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1099,15 +1099,18 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1099 struct super_block *sb = dentry->d_sb; 1099 struct super_block *sb = dentry->d_sb;
1100 struct ext2_sb_info *sbi = EXT2_SB(sb); 1100 struct ext2_sb_info *sbi = EXT2_SB(sb);
1101 struct ext2_super_block *es = sbi->s_es; 1101 struct ext2_super_block *es = sbi->s_es;
1102 unsigned long overhead;
1103 int i;
1104 u64 fsid; 1102 u64 fsid;
1105 1103
1106 if (test_opt (sb, MINIX_DF)) 1104 if (test_opt (sb, MINIX_DF))
1107 overhead = 0; 1105 sbi->s_overhead_last = 0;
1108 else { 1106 else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
1107 unsigned long i, overhead = 0;
1108 smp_rmb();
1109
1109 /* 1110 /*
1110 * Compute the overhead (FS structures) 1111 * Compute the overhead (FS structures). This is constant
1112 * for a given filesystem unless the number of block groups
1113 * changes so we cache the previous value until it does.
1111 */ 1114 */
1112 1115
1113 /* 1116 /*
@@ -1131,17 +1134,22 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1131 */ 1134 */
1132 overhead += (sbi->s_groups_count * 1135 overhead += (sbi->s_groups_count *
1133 (2 + sbi->s_itb_per_group)); 1136 (2 + sbi->s_itb_per_group));
1137 sbi->s_overhead_last = overhead;
1138 smp_wmb();
1139 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
1134 } 1140 }
1135 1141
1136 buf->f_type = EXT2_SUPER_MAGIC; 1142 buf->f_type = EXT2_SUPER_MAGIC;
1137 buf->f_bsize = sb->s_blocksize; 1143 buf->f_bsize = sb->s_blocksize;
1138 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; 1144 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
1139 buf->f_bfree = ext2_count_free_blocks(sb); 1145 buf->f_bfree = ext2_count_free_blocks(sb);
1146 es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
1140 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); 1147 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
1141 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) 1148 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
1142 buf->f_bavail = 0; 1149 buf->f_bavail = 0;
1143 buf->f_files = le32_to_cpu(es->s_inodes_count); 1150 buf->f_files = le32_to_cpu(es->s_inodes_count);
1144 buf->f_ffree = ext2_count_free_inodes(sb); 1151 buf->f_ffree = ext2_count_free_inodes(sb);
1152 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
1145 buf->f_namelen = EXT2_NAME_LEN; 1153 buf->f_namelen = EXT2_NAME_LEN;
1146 fsid = le64_to_cpup((void *)es->s_uuid) ^ 1154 fsid = le64_to_cpup((void *)es->s_uuid) ^
1147 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 1155 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2a85ddee474..de4e3161e47 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3195,7 +3195,7 @@ int ext3_change_inode_journal_flag(struct inode *inode, int val)
3195 */ 3195 */
3196 3196
3197 journal = EXT3_JOURNAL(inode); 3197 journal = EXT3_JOURNAL(inode);
3198 if (is_journal_aborted(journal) || IS_RDONLY(inode)) 3198 if (is_journal_aborted(journal))
3199 return -EROFS; 3199 return -EROFS;
3200 3200
3201 journal_lock_updates(journal); 3201 journal_lock_updates(journal);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 9bb046df827..1586807b817 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1019,6 +1019,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1019 1019
1020 if (!inode) 1020 if (!inode)
1021 return ERR_PTR(-EACCES); 1021 return ERR_PTR(-EACCES);
1022
1023 if (is_bad_inode(inode)) {
1024 iput(inode);
1025 return ERR_PTR(-ENOENT);
1026 }
1022 } 1027 }
1023 return d_splice_alias(inode, dentry); 1028 return d_splice_alias(inode, dentry);
1024} 1029}
@@ -1054,6 +1059,11 @@ struct dentry *ext3_get_parent(struct dentry *child)
1054 if (!inode) 1059 if (!inode)
1055 return ERR_PTR(-EACCES); 1060 return ERR_PTR(-EACCES);
1056 1061
1062 if (is_bad_inode(inode)) {
1063 iput(inode);
1064 return ERR_PTR(-ENOENT);
1065 }
1066
1057 parent = d_alloc_anon(inode); 1067 parent = d_alloc_anon(inode);
1058 if (!parent) { 1068 if (!parent) {
1059 iput(inode); 1069 iput(inode);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6e3062913a9..51d1c456cda 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -35,6 +35,7 @@
35#include <linux/namei.h> 35#include <linux/namei.h>
36#include <linux/quotaops.h> 36#include <linux/quotaops.h>
37#include <linux/seq_file.h> 37#include <linux/seq_file.h>
38#include <linux/log2.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40 41
@@ -459,6 +460,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
459 460
460static void ext3_destroy_inode(struct inode *inode) 461static void ext3_destroy_inode(struct inode *inode)
461{ 462{
463 if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
464 printk("EXT3 Inode %p: orphan list check failed!\n",
465 EXT3_I(inode));
466 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
467 EXT3_I(inode), sizeof(struct ext3_inode_info),
468 false);
469 dump_stack();
470 }
462 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 471 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
463} 472}
464 473
@@ -1566,7 +1575,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1566 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 1575 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
1567 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 1576 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
1568 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || 1577 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
1569 (sbi->s_inode_size & (sbi->s_inode_size - 1)) || 1578 (!is_power_of_2(sbi->s_inode_size)) ||
1570 (sbi->s_inode_size > blocksize)) { 1579 (sbi->s_inode_size > blocksize)) {
1571 printk (KERN_ERR 1580 printk (KERN_ERR
1572 "EXT3-fs: unsupported inode size: %d\n", 1581 "EXT3-fs: unsupported inode size: %d\n",
@@ -2075,6 +2084,7 @@ static int ext3_create_journal(struct super_block * sb,
2075 unsigned int journal_inum) 2084 unsigned int journal_inum)
2076{ 2085{
2077 journal_t *journal; 2086 journal_t *journal;
2087 int err;
2078 2088
2079 if (sb->s_flags & MS_RDONLY) { 2089 if (sb->s_flags & MS_RDONLY) {
2080 printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to " 2090 printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
@@ -2082,13 +2092,15 @@ static int ext3_create_journal(struct super_block * sb,
2082 return -EROFS; 2092 return -EROFS;
2083 } 2093 }
2084 2094
2085 if (!(journal = ext3_get_journal(sb, journal_inum))) 2095 journal = ext3_get_journal(sb, journal_inum);
2096 if (!journal)
2086 return -EINVAL; 2097 return -EINVAL;
2087 2098
2088 printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n", 2099 printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
2089 journal_inum); 2100 journal_inum);
2090 2101
2091 if (journal_create(journal)) { 2102 err = journal_create(journal);
2103 if (err) {
2092 printk(KERN_ERR "EXT3-fs: error creating journal.\n"); 2104 printk(KERN_ERR "EXT3-fs: error creating journal.\n");
2093 journal_destroy(journal); 2105 journal_destroy(journal);
2094 return -EIO; 2106 return -EIO;
@@ -2139,12 +2151,14 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
2139 2151
2140 journal_lock_updates(journal); 2152 journal_lock_updates(journal);
2141 journal_flush(journal); 2153 journal_flush(journal);
2154 lock_super(sb);
2142 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2155 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2143 sb->s_flags & MS_RDONLY) { 2156 sb->s_flags & MS_RDONLY) {
2144 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2157 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2145 sb->s_dirt = 0; 2158 sb->s_dirt = 0;
2146 ext3_commit_super(sb, es, 1); 2159 ext3_commit_super(sb, es, 1);
2147 } 2160 }
2161 unlock_super(sb);
2148 journal_unlock_updates(journal); 2162 journal_unlock_updates(journal);
2149} 2163}
2150 2164
@@ -2333,7 +2347,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2333 (sbi->s_mount_state & EXT3_VALID_FS)) 2347 (sbi->s_mount_state & EXT3_VALID_FS))
2334 es->s_state = cpu_to_le16(sbi->s_mount_state); 2348 es->s_state = cpu_to_le16(sbi->s_mount_state);
2335 2349
2350 /*
2351 * We have to unlock super so that we can wait for
2352 * transactions.
2353 */
2354 unlock_super(sb);
2336 ext3_mark_recovery_complete(sb, es); 2355 ext3_mark_recovery_complete(sb, es);
2356 lock_super(sb);
2337 } else { 2357 } else {
2338 __le32 ret; 2358 __le32 ret;
2339 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 2359 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
@@ -2406,19 +2426,19 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2406 struct super_block *sb = dentry->d_sb; 2426 struct super_block *sb = dentry->d_sb;
2407 struct ext3_sb_info *sbi = EXT3_SB(sb); 2427 struct ext3_sb_info *sbi = EXT3_SB(sb);
2408 struct ext3_super_block *es = sbi->s_es; 2428 struct ext3_super_block *es = sbi->s_es;
2409 ext3_fsblk_t overhead;
2410 int i;
2411 u64 fsid; 2429 u64 fsid;
2412 2430
2413 if (test_opt (sb, MINIX_DF)) 2431 if (test_opt(sb, MINIX_DF)) {
2414 overhead = 0; 2432 sbi->s_overhead_last = 0;
2415 else { 2433 } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
2416 unsigned long ngroups; 2434 unsigned long ngroups = sbi->s_groups_count, i;
2417 ngroups = EXT3_SB(sb)->s_groups_count; 2435 ext3_fsblk_t overhead = 0;
2418 smp_rmb(); 2436 smp_rmb();
2419 2437
2420 /* 2438 /*
2421 * Compute the overhead (FS structures) 2439 * Compute the overhead (FS structures). This is constant
2440 * for a given filesystem unless the number of block groups
2441 * changes so we cache the previous value until it does.
2422 */ 2442 */
2423 2443
2424 /* 2444 /*
@@ -2442,18 +2462,23 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2442 * Every block group has an inode bitmap, a block 2462 * Every block group has an inode bitmap, a block
2443 * bitmap, and an inode table. 2463 * bitmap, and an inode table.
2444 */ 2464 */
2445 overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group)); 2465 overhead += ngroups * (2 + sbi->s_itb_per_group);
2466 sbi->s_overhead_last = overhead;
2467 smp_wmb();
2468 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
2446 } 2469 }
2447 2470
2448 buf->f_type = EXT3_SUPER_MAGIC; 2471 buf->f_type = EXT3_SUPER_MAGIC;
2449 buf->f_bsize = sb->s_blocksize; 2472 buf->f_bsize = sb->s_blocksize;
2450 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; 2473 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
2451 buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); 2474 buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
2475 es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
2452 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); 2476 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
2453 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) 2477 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
2454 buf->f_bavail = 0; 2478 buf->f_bavail = 0;
2455 buf->f_files = le32_to_cpu(es->s_inodes_count); 2479 buf->f_files = le32_to_cpu(es->s_inodes_count);
2456 buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); 2480 buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
2481 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
2457 buf->f_namelen = EXT3_NAME_LEN; 2482 buf->f_namelen = EXT3_NAME_LEN;
2458 fsid = le64_to_cpup((void *)es->s_uuid) ^ 2483 fsid = le64_to_cpup((void *)es->s_uuid) ^
2459 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 2484 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 3b64bb16c72..9de54ae48de 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1585,7 +1585,7 @@ allocated:
1585 ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); 1585 ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
1586 1586
1587 if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || 1587 if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
1588 in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || 1588 in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) ||
1589 in_range(ret_block, ext4_inode_table(sb, gdp), 1589 in_range(ret_block, ext4_inode_table(sb, gdp),
1590 EXT4_SB(sb)->s_itb_per_group) || 1590 EXT4_SB(sb)->s_itb_per_group) ||
1591 in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), 1591 in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2811e5720ad..2de339dd755 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1017,6 +1017,11 @@ static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, str
1017 1017
1018 if (!inode) 1018 if (!inode)
1019 return ERR_PTR(-EACCES); 1019 return ERR_PTR(-EACCES);
1020
1021 if (is_bad_inode(inode)) {
1022 iput(inode);
1023 return ERR_PTR(-ENOENT);
1024 }
1020 } 1025 }
1021 return d_splice_alias(inode, dentry); 1026 return d_splice_alias(inode, dentry);
1022} 1027}
@@ -1052,6 +1057,11 @@ struct dentry *ext4_get_parent(struct dentry *child)
1052 if (!inode) 1057 if (!inode)
1053 return ERR_PTR(-EACCES); 1058 return ERR_PTR(-EACCES);
1054 1059
1060 if (is_bad_inode(inode)) {
1061 iput(inode);
1062 return ERR_PTR(-ENOENT);
1063 }
1064
1055 parent = d_alloc_anon(inode); 1065 parent = d_alloc_anon(inode);
1056 if (!parent) { 1066 if (!parent) {
1057 iput(inode); 1067 iput(inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 175b68c6096..d0d8c76c7ed 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -510,6 +510,14 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
510 510
511static void ext4_destroy_inode(struct inode *inode) 511static void ext4_destroy_inode(struct inode *inode)
512{ 512{
513 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
514 printk("EXT4 Inode %p: orphan list check failed!\n",
515 EXT4_I(inode));
516 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
517 EXT4_I(inode), sizeof(struct ext4_inode_info),
518 true);
519 dump_stack();
520 }
513 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 521 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
514} 522}
515 523
@@ -2150,6 +2158,7 @@ static int ext4_create_journal(struct super_block * sb,
2150 unsigned int journal_inum) 2158 unsigned int journal_inum)
2151{ 2159{
2152 journal_t *journal; 2160 journal_t *journal;
2161 int err;
2153 2162
2154 if (sb->s_flags & MS_RDONLY) { 2163 if (sb->s_flags & MS_RDONLY) {
2155 printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " 2164 printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to "
@@ -2157,13 +2166,15 @@ static int ext4_create_journal(struct super_block * sb,
2157 return -EROFS; 2166 return -EROFS;
2158 } 2167 }
2159 2168
2160 if (!(journal = ext4_get_journal(sb, journal_inum))) 2169 journal = ext4_get_journal(sb, journal_inum);
2170 if (!journal)
2161 return -EINVAL; 2171 return -EINVAL;
2162 2172
2163 printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", 2173 printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n",
2164 journal_inum); 2174 journal_inum);
2165 2175
2166 if (jbd2_journal_create(journal)) { 2176 err = jbd2_journal_create(journal);
2177 if (err) {
2167 printk(KERN_ERR "EXT4-fs: error creating journal.\n"); 2178 printk(KERN_ERR "EXT4-fs: error creating journal.\n");
2168 jbd2_journal_destroy(journal); 2179 jbd2_journal_destroy(journal);
2169 return -EIO; 2180 return -EIO;
@@ -2214,12 +2225,14 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
2214 2225
2215 jbd2_journal_lock_updates(journal); 2226 jbd2_journal_lock_updates(journal);
2216 jbd2_journal_flush(journal); 2227 jbd2_journal_flush(journal);
2228 lock_super(sb);
2217 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 2229 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
2218 sb->s_flags & MS_RDONLY) { 2230 sb->s_flags & MS_RDONLY) {
2219 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2231 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
2220 sb->s_dirt = 0; 2232 sb->s_dirt = 0;
2221 ext4_commit_super(sb, es, 1); 2233 ext4_commit_super(sb, es, 1);
2222 } 2234 }
2235 unlock_super(sb);
2223 jbd2_journal_unlock_updates(journal); 2236 jbd2_journal_unlock_updates(journal);
2224} 2237}
2225 2238
@@ -2408,7 +2421,13 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
2408 (sbi->s_mount_state & EXT4_VALID_FS)) 2421 (sbi->s_mount_state & EXT4_VALID_FS))
2409 es->s_state = cpu_to_le16(sbi->s_mount_state); 2422 es->s_state = cpu_to_le16(sbi->s_mount_state);
2410 2423
2424 /*
2425 * We have to unlock super so that we can wait for
2426 * transactions.
2427 */
2428 unlock_super(sb);
2411 ext4_mark_recovery_complete(sb, es); 2429 ext4_mark_recovery_complete(sb, es);
2430 lock_super(sb);
2412 } else { 2431 } else {
2413 __le32 ret; 2432 __le32 ret;
2414 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2433 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@ -2481,19 +2500,19 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
2481 struct super_block *sb = dentry->d_sb; 2500 struct super_block *sb = dentry->d_sb;
2482 struct ext4_sb_info *sbi = EXT4_SB(sb); 2501 struct ext4_sb_info *sbi = EXT4_SB(sb);
2483 struct ext4_super_block *es = sbi->s_es; 2502 struct ext4_super_block *es = sbi->s_es;
2484 ext4_fsblk_t overhead;
2485 int i;
2486 u64 fsid; 2503 u64 fsid;
2487 2504
2488 if (test_opt (sb, MINIX_DF)) 2505 if (test_opt(sb, MINIX_DF)) {
2489 overhead = 0; 2506 sbi->s_overhead_last = 0;
2490 else { 2507 } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
2491 unsigned long ngroups; 2508 unsigned long ngroups = sbi->s_groups_count, i;
2492 ngroups = EXT4_SB(sb)->s_groups_count; 2509 ext4_fsblk_t overhead = 0;
2493 smp_rmb(); 2510 smp_rmb();
2494 2511
2495 /* 2512 /*
2496 * Compute the overhead (FS structures) 2513 * Compute the overhead (FS structures). This is constant
2514 * for a given filesystem unless the number of block groups
2515 * changes so we cache the previous value until it does.
2497 */ 2516 */
2498 2517
2499 /* 2518 /*
@@ -2517,18 +2536,23 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
2517 * Every block group has an inode bitmap, a block 2536 * Every block group has an inode bitmap, a block
2518 * bitmap, and an inode table. 2537 * bitmap, and an inode table.
2519 */ 2538 */
2520 overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group)); 2539 overhead += ngroups * (2 + sbi->s_itb_per_group);
2540 sbi->s_overhead_last = overhead;
2541 smp_wmb();
2542 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
2521 } 2543 }
2522 2544
2523 buf->f_type = EXT4_SUPER_MAGIC; 2545 buf->f_type = EXT4_SUPER_MAGIC;
2524 buf->f_bsize = sb->s_blocksize; 2546 buf->f_bsize = sb->s_blocksize;
2525 buf->f_blocks = ext4_blocks_count(es) - overhead; 2547 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
2526 buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); 2548 buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
2549 es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
2527 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 2550 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
2528 if (buf->f_bfree < ext4_r_blocks_count(es)) 2551 if (buf->f_bfree < ext4_r_blocks_count(es))
2529 buf->f_bavail = 0; 2552 buf->f_bavail = 0;
2530 buf->f_files = le32_to_cpu(es->s_inodes_count); 2553 buf->f_files = le32_to_cpu(es->s_inodes_count);
2531 buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); 2554 buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
2555 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
2532 buf->f_namelen = EXT4_NAME_LEN; 2556 buf->f_namelen = EXT4_NAME_LEN;
2533 fsid = le64_to_cpup((void *)es->s_uuid) ^ 2557 fsid = le64_to_cpup((void *)es->s_uuid) ^
2534 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 2558 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ccf161dffb6..72cbcd61bd9 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -313,7 +313,7 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
313 wchar_t bufuname[14]; 313 wchar_t bufuname[14];
314 unsigned char xlate_len, nr_slots; 314 unsigned char xlate_len, nr_slots;
315 wchar_t *unicode = NULL; 315 wchar_t *unicode = NULL;
316 unsigned char work[8], bufname[260]; /* 256 + 4 */ 316 unsigned char work[MSDOS_NAME], bufname[260]; /* 256 + 4 */
317 int uni_xlate = sbi->options.unicode_xlate; 317 int uni_xlate = sbi->options.unicode_xlate;
318 int utf8 = sbi->options.utf8; 318 int utf8 = sbi->options.utf8;
319 int anycase = (sbi->options.name_check != 's'); 319 int anycase = (sbi->options.name_check != 's');
@@ -351,7 +351,8 @@ parse_record:
351 if (work[0] == 0x05) 351 if (work[0] == 0x05)
352 work[0] = 0xE5; 352 work[0] = 0xE5;
353 for (i = 0, j = 0, last_u = 0; i < 8;) { 353 for (i = 0, j = 0, last_u = 0; i < 8;) {
354 if (!work[i]) break; 354 if (!work[i])
355 break;
355 chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, 356 chl = fat_shortname2uni(nls_disk, &work[i], 8 - i,
356 &bufuname[j++], opt_shortname, 357 &bufuname[j++], opt_shortname,
357 de->lcase & CASE_LOWER_BASE); 358 de->lcase & CASE_LOWER_BASE);
@@ -365,13 +366,15 @@ parse_record:
365 } 366 }
366 j = last_u; 367 j = last_u;
367 fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); 368 fat_short2uni(nls_disk, ".", 1, &bufuname[j++]);
368 for (i = 0; i < 3;) { 369 for (i = 8; i < MSDOS_NAME;) {
369 if (!de->ext[i]) break; 370 if (!work[i])
370 chl = fat_shortname2uni(nls_disk, &de->ext[i], 3 - i, 371 break;
372 chl = fat_shortname2uni(nls_disk, &work[i],
373 MSDOS_NAME - i,
371 &bufuname[j++], opt_shortname, 374 &bufuname[j++], opt_shortname,
372 de->lcase & CASE_LOWER_EXT); 375 de->lcase & CASE_LOWER_EXT);
373 if (chl <= 1) { 376 if (chl <= 1) {
374 if (de->ext[i] != ' ') 377 if (work[i] != ' ')
375 last_u = j; 378 last_u = j;
376 } else { 379 } else {
377 last_u = j; 380 last_u = j;
@@ -445,7 +448,7 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
445 int fill_len; 448 int fill_len;
446 wchar_t bufuname[14]; 449 wchar_t bufuname[14];
447 wchar_t *unicode = NULL; 450 wchar_t *unicode = NULL;
448 unsigned char c, work[8], bufname[56], *ptname = bufname; 451 unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname;
449 unsigned long lpos, dummy, *furrfu = &lpos; 452 unsigned long lpos, dummy, *furrfu = &lpos;
450 int uni_xlate = sbi->options.unicode_xlate; 453 int uni_xlate = sbi->options.unicode_xlate;
451 int isvfat = sbi->options.isvfat; 454 int isvfat = sbi->options.isvfat;
@@ -527,7 +530,8 @@ parse_record:
527 if (work[0] == 0x05) 530 if (work[0] == 0x05)
528 work[0] = 0xE5; 531 work[0] = 0xE5;
529 for (i = 0, j = 0, last = 0, last_u = 0; i < 8;) { 532 for (i = 0, j = 0, last = 0, last_u = 0; i < 8;) {
530 if (!(c = work[i])) break; 533 if (!(c = work[i]))
534 break;
531 chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, 535 chl = fat_shortname2uni(nls_disk, &work[i], 8 - i,
532 &bufuname[j++], opt_shortname, 536 &bufuname[j++], opt_shortname,
533 de->lcase & CASE_LOWER_BASE); 537 de->lcase & CASE_LOWER_BASE);
@@ -549,9 +553,10 @@ parse_record:
549 j = last_u; 553 j = last_u;
550 fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); 554 fat_short2uni(nls_disk, ".", 1, &bufuname[j++]);
551 ptname[i++] = '.'; 555 ptname[i++] = '.';
552 for (i2 = 0; i2 < 3;) { 556 for (i2 = 8; i2 < MSDOS_NAME;) {
553 if (!(c = de->ext[i2])) break; 557 if (!(c = work[i2]))
554 chl = fat_shortname2uni(nls_disk, &de->ext[i2], 3 - i2, 558 break;
559 chl = fat_shortname2uni(nls_disk, &work[i2], MSDOS_NAME - i2,
555 &bufuname[j++], opt_shortname, 560 &bufuname[j++], opt_shortname,
556 de->lcase & CASE_LOWER_EXT); 561 de->lcase & CASE_LOWER_EXT);
557 if (chl <= 1) { 562 if (chl <= 1) {
@@ -563,8 +568,8 @@ parse_record:
563 } 568 }
564 } else { 569 } else {
565 last_u = j; 570 last_u = j;
566 for (chi = 0; chi < chl && i2 < 3; chi++) { 571 for (chi = 0; chi < chl && i2 < MSDOS_NAME; chi++) {
567 ptname[i++] = de->ext[i2++]; 572 ptname[i++] = work[i2++];
568 last = i; 573 last = i;
569 } 574 }
570 } 575 }
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index ab171ea8e86..2c1b73fb82a 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -17,6 +17,8 @@ struct fatent_operations {
17 int (*ent_next)(struct fat_entry *); 17 int (*ent_next)(struct fat_entry *);
18}; 18};
19 19
20static DEFINE_SPINLOCK(fat12_entry_lock);
21
20static void fat12_ent_blocknr(struct super_block *sb, int entry, 22static void fat12_ent_blocknr(struct super_block *sb, int entry,
21 int *offset, sector_t *blocknr) 23 int *offset, sector_t *blocknr)
22{ 24{
@@ -116,10 +118,13 @@ static int fat12_ent_get(struct fat_entry *fatent)
116 u8 **ent12_p = fatent->u.ent12_p; 118 u8 **ent12_p = fatent->u.ent12_p;
117 int next; 119 int next;
118 120
121 spin_lock(&fat12_entry_lock);
119 if (fatent->entry & 1) 122 if (fatent->entry & 1)
120 next = (*ent12_p[0] >> 4) | (*ent12_p[1] << 4); 123 next = (*ent12_p[0] >> 4) | (*ent12_p[1] << 4);
121 else 124 else
122 next = (*ent12_p[1] << 8) | *ent12_p[0]; 125 next = (*ent12_p[1] << 8) | *ent12_p[0];
126 spin_unlock(&fat12_entry_lock);
127
123 next &= 0x0fff; 128 next &= 0x0fff;
124 if (next >= BAD_FAT12) 129 if (next >= BAD_FAT12)
125 next = FAT_ENT_EOF; 130 next = FAT_ENT_EOF;
@@ -151,6 +156,7 @@ static void fat12_ent_put(struct fat_entry *fatent, int new)
151 if (new == FAT_ENT_EOF) 156 if (new == FAT_ENT_EOF)
152 new = EOF_FAT12; 157 new = EOF_FAT12;
153 158
159 spin_lock(&fat12_entry_lock);
154 if (fatent->entry & 1) { 160 if (fatent->entry & 1) {
155 *ent12_p[0] = (new << 4) | (*ent12_p[0] & 0x0f); 161 *ent12_p[0] = (new << 4) | (*ent12_p[0] & 0x0f);
156 *ent12_p[1] = new >> 4; 162 *ent12_p[1] = new >> 4;
@@ -158,6 +164,7 @@ static void fat12_ent_put(struct fat_entry *fatent, int new)
158 *ent12_p[0] = new & 0xff; 164 *ent12_p[0] = new & 0xff;
159 *ent12_p[1] = (*ent12_p[1] & 0xf0) | (new >> 8); 165 *ent12_p[1] = (*ent12_p[1] & 0xf0) | (new >> 8);
160 } 166 }
167 spin_unlock(&fat12_entry_lock);
161 168
162 mark_buffer_dirty(fatent->bhs[0]); 169 mark_buffer_dirty(fatent->bhs[0]);
163 if (fatent->nr_bhs == 2) 170 if (fatent->nr_bhs == 2)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 479722d8966..cfaf5877d98 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -354,8 +354,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
354 } else { /* not a directory */ 354 } else { /* not a directory */
355 inode->i_generation |= 1; 355 inode->i_generation |= 1;
356 inode->i_mode = MSDOS_MKMODE(de->attr, 356 inode->i_mode = MSDOS_MKMODE(de->attr,
357 ((sbi->options.showexec && 357 ((sbi->options.showexec && !is_exec(de->name + 8))
358 !is_exec(de->ext))
359 ? S_IRUGO|S_IWUGO : S_IRWXUGO) 358 ? S_IRUGO|S_IWUGO : S_IRWXUGO)
360 & ~sbi->options.fs_fmask) | S_IFREG; 359 & ~sbi->options.fs_fmask) | S_IFREG;
361 MSDOS_I(inode)->i_start = le16_to_cpu(de->start); 360 MSDOS_I(inode)->i_start = le16_to_cpu(de->start);
diff --git a/fs/freevxfs/vxfs_dir.h b/fs/freevxfs/vxfs_dir.h
index 8a4dfef1dda..3c96d6e6397 100644
--- a/fs/freevxfs/vxfs_dir.h
+++ b/fs/freevxfs/vxfs_dir.h
@@ -80,7 +80,7 @@ struct vxfs_direct {
80 * a d_name with size len. 80 * a d_name with size len.
81 */ 81 */
82#define VXFS_DIRPAD 4 82#define VXFS_DIRPAD 4
83#define VXFS_NAMEMIN ((int)((struct vxfs_direct *)0)->d_name) 83#define VXFS_NAMEMIN offsetof(struct vxfs_direct, d_name)
84#define VXFS_DIRROUND(len) ((VXFS_DIRPAD + (len) - 1) & ~(VXFS_DIRPAD -1)) 84#define VXFS_DIRROUND(len) ((VXFS_DIRPAD + (len) - 1) & ~(VXFS_DIRPAD -1))
85#define VXFS_DIRLEN(len) (VXFS_DIRROUND(VXFS_NAMEMIN + (len))) 85#define VXFS_DIRLEN(len) (VXFS_DIRROUND(VXFS_NAMEMIN + (len)))
86 86
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index c1f44009853..1ab3e9d7388 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -11,6 +11,7 @@
11#include <linux/spinlock.h> 11#include <linux/spinlock.h>
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/capability.h>
14#include <linux/xattr.h> 15#include <linux/xattr.h>
15#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h> 17#include <linux/lm_interface.h>
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 90ebab753d3..050d29c0a5b 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -62,8 +62,10 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
62 if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) && 62 if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) &&
63 (head->key_type == HFSPLUS_KEY_BINARY)) 63 (head->key_type == HFSPLUS_KEY_BINARY))
64 tree->keycmp = hfsplus_cat_bin_cmp_key; 64 tree->keycmp = hfsplus_cat_bin_cmp_key;
65 else 65 else {
66 tree->keycmp = hfsplus_cat_case_cmp_key; 66 tree->keycmp = hfsplus_cat_case_cmp_key;
67 HFSPLUS_SB(sb).flags |= HFSPLUS_SB_CASEFOLD;
68 }
67 } else { 69 } else {
68 printk(KERN_ERR "hfs: unknown B*Tree requested\n"); 70 printk(KERN_ERR "hfs: unknown B*Tree requested\n");
69 goto fail_page; 71 goto fail_page;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 80b5682a227..1955ee61251 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -36,6 +36,8 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
36 u16 type; 36 u16 type;
37 37
38 sb = dir->i_sb; 38 sb = dir->i_sb;
39
40 dentry->d_op = &hfsplus_dentry_operations;
39 dentry->d_fsdata = NULL; 41 dentry->d_fsdata = NULL;
40 hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); 42 hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
41 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); 43 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 3915635b447..d9f5eda6d03 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -150,6 +150,7 @@ struct hfsplus_sb_info {
150#define HFSPLUS_SB_NODECOMPOSE 0x0002 150#define HFSPLUS_SB_NODECOMPOSE 0x0002
151#define HFSPLUS_SB_FORCE 0x0004 151#define HFSPLUS_SB_FORCE 0x0004
152#define HFSPLUS_SB_HFSX 0x0008 152#define HFSPLUS_SB_HFSX 0x0008
153#define HFSPLUS_SB_CASEFOLD 0x0010
153 154
154 155
155struct hfsplus_inode_info { 156struct hfsplus_inode_info {
@@ -321,6 +322,7 @@ void hfsplus_file_truncate(struct inode *);
321/* inode.c */ 322/* inode.c */
322extern const struct address_space_operations hfsplus_aops; 323extern const struct address_space_operations hfsplus_aops;
323extern const struct address_space_operations hfsplus_btree_aops; 324extern const struct address_space_operations hfsplus_btree_aops;
325extern struct dentry_operations hfsplus_dentry_operations;
324 326
325void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *); 327void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *);
326void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *); 328void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *);
@@ -353,6 +355,8 @@ int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unist
353int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); 355int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
354int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); 356int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *);
355int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); 357int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int);
358int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str);
359int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2);
356 360
357/* wrapper.c */ 361/* wrapper.c */
358int hfsplus_read_wrapper(struct super_block *); 362int hfsplus_read_wrapper(struct super_block *);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 409ce5429c9..6f7c662174d 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -131,6 +131,11 @@ const struct address_space_operations hfsplus_aops = {
131 .writepages = hfsplus_writepages, 131 .writepages = hfsplus_writepages,
132}; 132};
133 133
134struct dentry_operations hfsplus_dentry_operations = {
135 .d_hash = hfsplus_hash_dentry,
136 .d_compare = hfsplus_compare_dentry,
137};
138
134static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dentry, 139static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dentry,
135 struct nameidata *nd) 140 struct nameidata *nd)
136{ 141{
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ebd1b380cbb..6d87a2a9534 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -283,11 +283,10 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
283 struct nls_table *nls = NULL; 283 struct nls_table *nls = NULL;
284 int err = -EINVAL; 284 int err = -EINVAL;
285 285
286 sbi = kmalloc(sizeof(struct hfsplus_sb_info), GFP_KERNEL); 286 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
287 if (!sbi) 287 if (!sbi)
288 return -ENOMEM; 288 return -ENOMEM;
289 289
290 memset(sbi, 0, sizeof(HFSPLUS_SB(sb)));
291 sb->s_fs_info = sbi; 290 sb->s_fs_info = sbi;
292 INIT_HLIST_HEAD(&sbi->rsrc_inodes); 291 INIT_HLIST_HEAD(&sbi->rsrc_inodes);
293 hfsplus_fill_defaults(sbi); 292 hfsplus_fill_defaults(sbi);
@@ -381,6 +380,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
381 iput(root); 380 iput(root);
382 goto cleanup; 381 goto cleanup;
383 } 382 }
383 sb->s_root->d_op = &hfsplus_dentry_operations;
384 384
385 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 385 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
386 str.name = HFSP_HIDDENDIR_NAME; 386 str.name = HFSP_HIDDENDIR_NAME;
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 689c8bd721f..9e10f9444b6 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -239,61 +239,201 @@ out:
239 return res; 239 return res;
240} 240}
241 241
242int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, const char *astr, int len) 242/*
243 * Convert one or more ASCII characters into a single unicode character.
244 * Returns the number of ASCII characters corresponding to the unicode char.
245 */
246static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
247 wchar_t *uc)
243{ 248{
244 struct nls_table *nls = HFSPLUS_SB(sb).nls; 249 int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc);
245 int size, off, decompose; 250 if (size <= 0) {
251 *uc = '?';
252 size = 1;
253 }
254 switch (*uc) {
255 case 0x2400:
256 *uc = 0;
257 break;
258 case ':':
259 *uc = '/';
260 break;
261 }
262 return size;
263}
264
265/* Decomposes a single unicode character. */
266static inline u16 *decompose_unichar(wchar_t uc, int *size)
267{
268 int off;
269
270 off = hfsplus_decompose_table[(uc >> 12) & 0xf];
271 if (off == 0 || off == 0xffff)
272 return NULL;
273
274 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
275 if (!off)
276 return NULL;
277
278 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
279 if (!off)
280 return NULL;
281
282 off = hfsplus_decompose_table[off + (uc & 0xf)];
283 *size = off & 3;
284 if (*size == 0)
285 return NULL;
286 return hfsplus_decompose_table + (off / 4);
287}
288
289int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
290 const char *astr, int len)
291{
292 int size, dsize, decompose;
293 u16 *dstr, outlen = 0;
246 wchar_t c; 294 wchar_t c;
247 u16 outlen = 0;
248 295
249 decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); 296 decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
250
251 while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { 297 while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
252 size = nls->char2uni(astr, len, &c); 298 size = asc2unichar(sb, astr, len, &c);
253 if (size <= 0) { 299
254 c = '?'; 300 if (decompose && (dstr = decompose_unichar(c, &dsize))) {
255 size = 1; 301 if (outlen + dsize > HFSPLUS_MAX_STRLEN)
256 }
257 astr += size;
258 len -= size;
259 switch (c) {
260 case 0x2400:
261 c = 0;
262 break;
263 case ':':
264 c = '/';
265 break;
266 }
267 if (c >= 0xc0 && decompose) {
268 off = hfsplus_decompose_table[(c >> 12) & 0xf];
269 if (!off)
270 goto done;
271 if (off == 0xffff) {
272 goto done;
273 }
274 off = hfsplus_decompose_table[off + ((c >> 8) & 0xf)];
275 if (!off)
276 goto done;
277 off = hfsplus_decompose_table[off + ((c >> 4) & 0xf)];
278 if (!off)
279 goto done;
280 off = hfsplus_decompose_table[off + (c & 0xf)];
281 size = off & 3;
282 if (!size)
283 goto done;
284 off /= 4;
285 if (outlen + size > HFSPLUS_MAX_STRLEN)
286 break; 302 break;
287 do { 303 do {
288 ustr->unicode[outlen++] = cpu_to_be16(hfsplus_decompose_table[off++]); 304 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
289 } while (--size > 0); 305 } while (--dsize > 0);
290 continue; 306 } else
291 } 307 ustr->unicode[outlen++] = cpu_to_be16(c);
292 done: 308
293 ustr->unicode[outlen++] = cpu_to_be16(c); 309 astr += size;
310 len -= size;
294 } 311 }
295 ustr->length = cpu_to_be16(outlen); 312 ustr->length = cpu_to_be16(outlen);
296 if (len > 0) 313 if (len > 0)
297 return -ENAMETOOLONG; 314 return -ENAMETOOLONG;
298 return 0; 315 return 0;
299} 316}
317
318/*
319 * Hash a string to an integer as appropriate for the HFS+ filesystem.
320 * Composed unicode characters are decomposed and case-folding is performed
321 * if the appropriate bits are (un)set on the superblock.
322 */
323int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
324{
325 struct super_block *sb = dentry->d_sb;
326 const char *astr;
327 const u16 *dstr;
328 int casefold, decompose, size, dsize, len;
329 unsigned long hash;
330 wchar_t c;
331 u16 c2;
332
333 casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
334 decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
335 hash = init_name_hash();
336 astr = str->name;
337 len = str->len;
338 while (len > 0) {
339 size = asc2unichar(sb, astr, len, &c);
340 astr += size;
341 len -= size;
342
343 if (decompose && (dstr = decompose_unichar(c, &dsize))) {
344 do {
345 c2 = *dstr++;
346 if (!casefold || (c2 = case_fold(c2)))
347 hash = partial_name_hash(c2, hash);
348 } while (--dsize > 0);
349 } else {
350 c2 = c;
351 if (!casefold || (c2 = case_fold(c2)))
352 hash = partial_name_hash(c2, hash);
353 }
354 }
355 str->hash = end_name_hash(hash);
356
357 return 0;
358}
359
360/*
361 * Compare strings with HFS+ filename ordering.
362 * Composed unicode characters are decomposed and case-folding is performed
363 * if the appropriate bits are (un)set on the superblock.
364 */
365int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
366{
367 struct super_block *sb = dentry->d_sb;
368 int casefold, decompose, size;
369 int dsize1, dsize2, len1, len2;
370 const u16 *dstr1, *dstr2;
371 const char *astr1, *astr2;
372 u16 c1, c2;
373 wchar_t c;
374
375 casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
376 decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
377 astr1 = s1->name;
378 len1 = s1->len;
379 astr2 = s2->name;
380 len2 = s2->len;
381 dsize1 = dsize2 = 0;
382 dstr1 = dstr2 = NULL;
383
384 while (len1 > 0 && len2 > 0) {
385 if (!dsize1) {
386 size = asc2unichar(sb, astr1, len1, &c);
387 astr1 += size;
388 len1 -= size;
389
390 if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) {
391 c1 = c;
392 dstr1 = &c1;
393 dsize1 = 1;
394 }
395 }
396
397 if (!dsize2) {
398 size = asc2unichar(sb, astr2, len2, &c);
399 astr2 += size;
400 len2 -= size;
401
402 if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) {
403 c2 = c;
404 dstr2 = &c2;
405 dsize2 = 1;
406 }
407 }
408
409 c1 = *dstr1;
410 c2 = *dstr2;
411 if (casefold) {
412 if (!(c1 = case_fold(c1))) {
413 dstr1++;
414 dsize1--;
415 continue;
416 }
417 if (!(c2 = case_fold(c2))) {
418 dstr2++;
419 dsize2--;
420 continue;
421 }
422 }
423 if (c1 < c2)
424 return -1;
425 else if (c1 > c2)
426 return 1;
427
428 dstr1++;
429 dsize1--;
430 dstr2++;
431 dsize2--;
432 }
433
434 if (len1 < len2)
435 return -1;
436 if (len1 > len2)
437 return 1;
438 return 0;
439}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e6b46b3ac2f..d145cb79c30 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -13,15 +13,18 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/mount.h> 14#include <linux/mount.h>
15#include <linux/file.h> 15#include <linux/file.h>
16#include <linux/kernel.h>
16#include <linux/writeback.h> 17#include <linux/writeback.h>
17#include <linux/pagemap.h> 18#include <linux/pagemap.h>
18#include <linux/highmem.h> 19#include <linux/highmem.h>
19#include <linux/init.h> 20#include <linux/init.h>
20#include <linux/string.h> 21#include <linux/string.h>
21#include <linux/capability.h> 22#include <linux/capability.h>
23#include <linux/ctype.h>
22#include <linux/backing-dev.h> 24#include <linux/backing-dev.h>
23#include <linux/hugetlb.h> 25#include <linux/hugetlb.h>
24#include <linux/pagevec.h> 26#include <linux/pagevec.h>
27#include <linux/parser.h>
25#include <linux/mman.h> 28#include <linux/mman.h>
26#include <linux/quotaops.h> 29#include <linux/quotaops.h>
27#include <linux/slab.h> 30#include <linux/slab.h>
@@ -47,6 +50,21 @@ static struct backing_dev_info hugetlbfs_backing_dev_info = {
47 50
48int sysctl_hugetlb_shm_group; 51int sysctl_hugetlb_shm_group;
49 52
53enum {
54 Opt_size, Opt_nr_inodes,
55 Opt_mode, Opt_uid, Opt_gid,
56 Opt_err,
57};
58
59static match_table_t tokens = {
60 {Opt_size, "size=%s"},
61 {Opt_nr_inodes, "nr_inodes=%s"},
62 {Opt_mode, "mode=%o"},
63 {Opt_uid, "uid=%u"},
64 {Opt_gid, "gid=%u"},
65 {Opt_err, NULL},
66};
67
50static void huge_pagevec_release(struct pagevec *pvec) 68static void huge_pagevec_release(struct pagevec *pvec)
51{ 69{
52 int i; 70 int i;
@@ -594,46 +612,73 @@ static const struct super_operations hugetlbfs_ops = {
594static int 612static int
595hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 613hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
596{ 614{
597 char *opt, *value, *rest; 615 char *p, *rest;
616 substring_t args[MAX_OPT_ARGS];
617 int option;
598 618
599 if (!options) 619 if (!options)
600 return 0; 620 return 0;
601 while ((opt = strsep(&options, ",")) != NULL) { 621
602 if (!*opt) 622 while ((p = strsep(&options, ",")) != NULL) {
623 int token;
624 if (!*p)
603 continue; 625 continue;
604 626
605 value = strchr(opt, '='); 627 token = match_token(p, tokens, args);
606 if (!value || !*value) 628 switch (token) {
607 return -EINVAL; 629 case Opt_uid:
608 else 630 if (match_int(&args[0], &option))
609 *value++ = '\0'; 631 goto bad_val;
610 632 pconfig->uid = option;
611 if (!strcmp(opt, "uid")) 633 break;
612 pconfig->uid = simple_strtoul(value, &value, 0); 634
613 else if (!strcmp(opt, "gid")) 635 case Opt_gid:
614 pconfig->gid = simple_strtoul(value, &value, 0); 636 if (match_int(&args[0], &option))
615 else if (!strcmp(opt, "mode")) 637 goto bad_val;
616 pconfig->mode = simple_strtoul(value,&value,0) & 0777U; 638 pconfig->gid = option;
617 else if (!strcmp(opt, "size")) { 639 break;
618 unsigned long long size = memparse(value, &rest); 640
641 case Opt_mode:
642 if (match_octal(&args[0], &option))
643 goto bad_val;
644 pconfig->mode = option & 0777U;
645 break;
646
647 case Opt_size: {
648 unsigned long long size;
649 /* memparse() will accept a K/M/G without a digit */
650 if (!isdigit(*args[0].from))
651 goto bad_val;
652 size = memparse(args[0].from, &rest);
619 if (*rest == '%') { 653 if (*rest == '%') {
620 size <<= HPAGE_SHIFT; 654 size <<= HPAGE_SHIFT;
621 size *= max_huge_pages; 655 size *= max_huge_pages;
622 do_div(size, 100); 656 do_div(size, 100);
623 rest++;
624 } 657 }
625 pconfig->nr_blocks = (size >> HPAGE_SHIFT); 658 pconfig->nr_blocks = (size >> HPAGE_SHIFT);
626 value = rest; 659 break;
627 } else if (!strcmp(opt,"nr_inodes")) { 660 }
628 pconfig->nr_inodes = memparse(value, &rest); 661
629 value = rest; 662 case Opt_nr_inodes:
630 } else 663 /* memparse() will accept a K/M/G without a digit */
631 return -EINVAL; 664 if (!isdigit(*args[0].from))
665 goto bad_val;
666 pconfig->nr_inodes = memparse(args[0].from, &rest);
667 break;
632 668
633 if (*value) 669 default:
670 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
671 p);
634 return -EINVAL; 672 return -EINVAL;
673 break;
674 }
635 } 675 }
636 return 0; 676 return 0;
677
678bad_val:
679 printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n",
680 args[0].from, p);
681 return 1;
637} 682}
638 683
639static int 684static int
@@ -651,7 +696,6 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
651 config.gid = current->fsgid; 696 config.gid = current->fsgid;
652 config.mode = 0755; 697 config.mode = 0755;
653 ret = hugetlbfs_parse_options(data, &config); 698 ret = hugetlbfs_parse_options(data, &config);
654
655 if (ret) 699 if (ret)
656 return ret; 700 return ret;
657 701
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8c90cbc903f..c2a773e8620 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -12,7 +12,6 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/security.h> 13#include <linux/security.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/kallsyms.h>
16 15
17#include <asm/uaccess.h> 16#include <asm/uaccess.h>
18#include <asm/ioctls.h> 17#include <asm/ioctls.h>
@@ -21,7 +20,6 @@ static long do_ioctl(struct file *filp, unsigned int cmd,
21 unsigned long arg) 20 unsigned long arg)
22{ 21{
23 int error = -ENOTTY; 22 int error = -ENOTTY;
24 void *f;
25 23
26 if (!filp->f_op) 24 if (!filp->f_op)
27 goto out; 25 goto out;
@@ -31,16 +29,10 @@ static long do_ioctl(struct file *filp, unsigned int cmd,
31 if (error == -ENOIOCTLCMD) 29 if (error == -ENOIOCTLCMD)
32 error = -EINVAL; 30 error = -EINVAL;
33 goto out; 31 goto out;
34 } else if ((f = filp->f_op->ioctl)) { 32 } else if (filp->f_op->ioctl) {
35 lock_kernel(); 33 lock_kernel();
36 if (!filp->f_op->ioctl) { 34 error = filp->f_op->ioctl(filp->f_path.dentry->d_inode,
37 printk("%s: ioctl %p disappeared\n", __FUNCTION__, f); 35 filp, cmd, arg);
38 print_symbol("symbol: %s\n", (unsigned long)f);
39 dump_stack();
40 } else {
41 error = filp->f_op->ioctl(filp->f_path.dentry->d_inode,
42 filp, cmd, arg);
43 }
44 unlock_kernel(); 36 unlock_kernel();
45 } 37 }
46 38
@@ -182,11 +174,3 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
182 out: 174 out:
183 return error; 175 return error;
184} 176}
185
186/*
187 * Platforms implementing 32 bit compatibility ioctl handlers in
188 * modules need this exported
189 */
190#ifdef CONFIG_COMPAT
191EXPORT_SYMBOL(sys_ioctl);
192#endif
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 0e94c31cad9..1ba407c64df 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -7,34 +7,18 @@
7 * 7 *
8 * Steve Beynon : Missing last directory entries fixed 8 * Steve Beynon : Missing last directory entries fixed
9 * (stephen@askone.demon.co.uk) : 21st June 1996 9 * (stephen@askone.demon.co.uk) : 21st June 1996
10 * 10 *
11 * isofs directory handling functions 11 * isofs directory handling functions
12 */ 12 */
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include "isofs.h" 14#include "isofs.h"
15 15
16static int isofs_readdir(struct file *, void *, filldir_t);
17
18const struct file_operations isofs_dir_operations =
19{
20 .read = generic_read_dir,
21 .readdir = isofs_readdir,
22};
23
24/*
25 * directories can handle most operations...
26 */
27const struct inode_operations isofs_dir_inode_operations =
28{
29 .lookup = isofs_lookup,
30};
31
32int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode) 16int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode)
33{ 17{
34 char * old = de->name; 18 char * old = de->name;
35 int len = de->name_len[0]; 19 int len = de->name_len[0];
36 int i; 20 int i;
37 21
38 for (i = 0; i < len; i++) { 22 for (i = 0; i < len; i++) {
39 unsigned char c = old[i]; 23 unsigned char c = old[i];
40 if (!c) 24 if (!c)
@@ -62,22 +46,27 @@ int isofs_name_translate(struct iso_directory_record *de, char *new, struct inod
62} 46}
63 47
64/* Acorn extensions written by Matthew Wilcox <willy@bofh.ai> 1998 */ 48/* Acorn extensions written by Matthew Wilcox <willy@bofh.ai> 1998 */
65int get_acorn_filename(struct iso_directory_record * de, 49int get_acorn_filename(struct iso_directory_record *de,
66 char * retname, struct inode * inode) 50 char *retname, struct inode *inode)
67{ 51{
68 int std; 52 int std;
69 unsigned char * chr; 53 unsigned char *chr;
70 int retnamlen = isofs_name_translate(de, retname, inode); 54 int retnamlen = isofs_name_translate(de, retname, inode);
71 if (retnamlen == 0) return 0; 55
56 if (retnamlen == 0)
57 return 0;
72 std = sizeof(struct iso_directory_record) + de->name_len[0]; 58 std = sizeof(struct iso_directory_record) + de->name_len[0];
73 if (std & 1) std++; 59 if (std & 1)
74 if ((*((unsigned char *) de) - std) != 32) return retnamlen; 60 std++;
61 if ((*((unsigned char *) de) - std) != 32)
62 return retnamlen;
75 chr = ((unsigned char *) de) + std; 63 chr = ((unsigned char *) de) + std;
76 if (strncmp(chr, "ARCHIMEDES", 10)) return retnamlen; 64 if (strncmp(chr, "ARCHIMEDES", 10))
77 if ((*retname == '_') && ((chr[19] & 1) == 1)) *retname = '!'; 65 return retnamlen;
66 if ((*retname == '_') && ((chr[19] & 1) == 1))
67 *retname = '!';
78 if (((de->flags[0] & 2) == 0) && (chr[13] == 0xff) 68 if (((de->flags[0] & 2) == 0) && (chr[13] == 0xff)
79 && ((chr[12] & 0xf0) == 0xf0)) 69 && ((chr[12] & 0xf0) == 0xf0)) {
80 {
81 retname[retnamlen] = ','; 70 retname[retnamlen] = ',';
82 sprintf(retname+retnamlen+1, "%3.3x", 71 sprintf(retname+retnamlen+1, "%3.3x",
83 ((chr[12] & 0xf) << 8) | chr[11]); 72 ((chr[12] & 0xf) << 8) | chr[11]);
@@ -91,7 +80,7 @@ int get_acorn_filename(struct iso_directory_record * de,
91 */ 80 */
92static int do_isofs_readdir(struct inode *inode, struct file *filp, 81static int do_isofs_readdir(struct inode *inode, struct file *filp,
93 void *dirent, filldir_t filldir, 82 void *dirent, filldir_t filldir,
94 char * tmpname, struct iso_directory_record * tmpde) 83 char *tmpname, struct iso_directory_record *tmpde)
95{ 84{
96 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); 85 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
97 unsigned char bufbits = ISOFS_BUFFER_BITS(inode); 86 unsigned char bufbits = ISOFS_BUFFER_BITS(inode);
@@ -121,9 +110,11 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
121 110
122 de_len = *(unsigned char *) de; 111 de_len = *(unsigned char *) de;
123 112
124 /* If the length byte is zero, we should move on to the next 113 /*
125 CDROM sector. If we are at the end of the directory, we 114 * If the length byte is zero, we should move on to the next
126 kick out of the while loop. */ 115 * CDROM sector. If we are at the end of the directory, we
116 * kick out of the while loop.
117 */
127 118
128 if (de_len == 0) { 119 if (de_len == 0) {
129 brelse(bh); 120 brelse(bh);
@@ -157,11 +148,10 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
157 148
158 if (first_de) { 149 if (first_de) {
159 isofs_normalize_block_and_offset(de, 150 isofs_normalize_block_and_offset(de,
160 &block_saved, 151 &block_saved,
161 &offset_saved); 152 &offset_saved);
162 inode_number = isofs_get_ino(block_saved, 153 inode_number = isofs_get_ino(block_saved,
163 offset_saved, 154 offset_saved, bufbits);
164 bufbits);
165 } 155 }
166 156
167 if (de->flags[-sbi->s_high_sierra] & 0x80) { 157 if (de->flags[-sbi->s_high_sierra] & 0x80) {
@@ -199,7 +189,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
199 */ 189 */
200 if ((sbi->s_hide == 'y' && 190 if ((sbi->s_hide == 'y' &&
201 (de->flags[-sbi->s_high_sierra] & 1)) || 191 (de->flags[-sbi->s_high_sierra] & 1)) ||
202 (sbi->s_showassoc =='n' && 192 (sbi->s_showassoc =='n' &&
203 (de->flags[-sbi->s_high_sierra] & 4))) { 193 (de->flags[-sbi->s_high_sierra] & 4))) {
204 filp->f_pos += de_len; 194 filp->f_pos += de_len;
205 continue; 195 continue;
@@ -240,7 +230,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
240 230
241 continue; 231 continue;
242 } 232 }
243 if (bh) brelse(bh); 233 if (bh)
234 brelse(bh);
244 return 0; 235 return 0;
245} 236}
246 237
@@ -253,8 +244,8 @@ static int isofs_readdir(struct file *filp,
253 void *dirent, filldir_t filldir) 244 void *dirent, filldir_t filldir)
254{ 245{
255 int result; 246 int result;
256 char * tmpname; 247 char *tmpname;
257 struct iso_directory_record * tmpde; 248 struct iso_directory_record *tmpde;
258 struct inode *inode = filp->f_path.dentry->d_inode; 249 struct inode *inode = filp->f_path.dentry->d_inode;
259 250
260 tmpname = (char *)__get_free_page(GFP_KERNEL); 251 tmpname = (char *)__get_free_page(GFP_KERNEL);
@@ -270,3 +261,19 @@ static int isofs_readdir(struct file *filp,
270 unlock_kernel(); 261 unlock_kernel();
271 return result; 262 return result;
272} 263}
264
265const struct file_operations isofs_dir_operations =
266{
267 .read = generic_read_dir,
268 .readdir = isofs_readdir,
269};
270
271/*
272 * directories can handle most operations...
273 */
274const struct inode_operations isofs_dir_inode_operations =
275{
276 .lookup = isofs_lookup,
277};
278
279
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 5c3eecf7542..4f5418be059 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -73,20 +73,20 @@ static void isofs_destroy_inode(struct inode *inode)
73 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); 73 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
74} 74}
75 75
76static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) 76static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
77{ 77{
78 struct iso_inode_info *ei = foo; 78 struct iso_inode_info *ei = foo;
79 79
80 inode_init_once(&ei->vfs_inode); 80 inode_init_once(&ei->vfs_inode);
81} 81}
82 82
83static int init_inodecache(void) 83static int init_inodecache(void)
84{ 84{
85 isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", 85 isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
86 sizeof(struct iso_inode_info), 86 sizeof(struct iso_inode_info),
87 0, (SLAB_RECLAIM_ACCOUNT| 87 0, (SLAB_RECLAIM_ACCOUNT|
88 SLAB_MEM_SPREAD), 88 SLAB_MEM_SPREAD),
89 init_once, NULL); 89 init_once, NULL);
90 if (isofs_inode_cachep == NULL) 90 if (isofs_inode_cachep == NULL)
91 return -ENOMEM; 91 return -ENOMEM;
92 return 0; 92 return 0;
@@ -150,9 +150,9 @@ struct iso9660_options{
150 uid_t uid; 150 uid_t uid;
151 char *iocharset; 151 char *iocharset;
152 unsigned char utf8; 152 unsigned char utf8;
153 /* LVE */ 153 /* LVE */
154 s32 session; 154 s32 session;
155 s32 sbsector; 155 s32 sbsector;
156}; 156};
157 157
158/* 158/*
@@ -197,7 +197,7 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
197 hash = init_name_hash(); 197 hash = init_name_hash();
198 while (len--) { 198 while (len--) {
199 c = tolower(*name++); 199 c = tolower(*name++);
200 hash = partial_name_hash(tolower(c), hash); 200 hash = partial_name_hash(c, hash);
201 } 201 }
202 qstr->hash = end_name_hash(hash); 202 qstr->hash = end_name_hash(hash);
203 203
@@ -360,10 +360,12 @@ static int parse_options(char *options, struct iso9660_options *popt)
360 popt->check = 'u'; /* unset */ 360 popt->check = 'u'; /* unset */
361 popt->nocompress = 0; 361 popt->nocompress = 0;
362 popt->blocksize = 1024; 362 popt->blocksize = 1024;
363 popt->mode = S_IRUGO | S_IXUGO; /* r-x for all. The disc could 363 popt->mode = S_IRUGO | S_IXUGO; /*
364 be shared with DOS machines so 364 * r-x for all. The disc could
365 virtually anything could be 365 * be shared with DOS machines so
366 a valid executable. */ 366 * virtually anything could be
367 * a valid executable.
368 */
367 popt->gid = 0; 369 popt->gid = 0;
368 popt->uid = 0; 370 popt->uid = 0;
369 popt->iocharset = NULL; 371 popt->iocharset = NULL;
@@ -503,30 +505,30 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
503 Te.cdte_format=CDROM_LBA; 505 Te.cdte_format=CDROM_LBA;
504 i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te); 506 i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te);
505 if (!i) { 507 if (!i) {
506 printk(KERN_DEBUG "Session %d start %d type %d\n", 508 printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n",
507 session, Te.cdte_addr.lba, 509 session, Te.cdte_addr.lba,
508 Te.cdte_ctrl&CDROM_DATA_TRACK); 510 Te.cdte_ctrl&CDROM_DATA_TRACK);
509 if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4) 511 if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4)
510 return Te.cdte_addr.lba; 512 return Te.cdte_addr.lba;
511 } 513 }
512 514
513 printk(KERN_ERR "Invalid session number or type of track\n"); 515 printk(KERN_ERR "ISOFS: Invalid session number or type of track\n");
514 } 516 }
515 i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info); 517 i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
516 if (session > 0) 518 if (session > 0)
517 printk(KERN_ERR "Invalid session number\n"); 519 printk(KERN_ERR "ISOFS: Invalid session number\n");
518#if 0 520#if 0
519 printk("isofs.inode: CDROMMULTISESSION: rc=%d\n",i); 521 printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
520 if (i==0) { 522 if (i==0) {
521 printk("isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no"); 523 printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
522 printk("isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba); 524 printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
523 } 525 }
524#endif 526#endif
525 if (i==0) 527 if (i==0)
526#if WE_OBEY_THE_WRITTEN_STANDARDS 528#if WE_OBEY_THE_WRITTEN_STANDARDS
527 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ 529 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
528#endif 530#endif
529 vol_desc_start=ms_info.addr.lba; 531 vol_desc_start=ms_info.addr.lba;
530 return vol_desc_start; 532 return vol_desc_start;
531} 533}
532 534
@@ -538,20 +540,20 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
538 */ 540 */
539static int isofs_fill_super(struct super_block *s, void *data, int silent) 541static int isofs_fill_super(struct super_block *s, void *data, int silent)
540{ 542{
541 struct buffer_head * bh = NULL, *pri_bh = NULL; 543 struct buffer_head *bh = NULL, *pri_bh = NULL;
542 struct hs_primary_descriptor * h_pri = NULL; 544 struct hs_primary_descriptor *h_pri = NULL;
543 struct iso_primary_descriptor * pri = NULL; 545 struct iso_primary_descriptor *pri = NULL;
544 struct iso_supplementary_descriptor *sec = NULL; 546 struct iso_supplementary_descriptor *sec = NULL;
545 struct iso_directory_record * rootp; 547 struct iso_directory_record *rootp;
546 int joliet_level = 0; 548 struct inode *inode;
547 int iso_blknum, block; 549 struct iso9660_options opt;
548 int orig_zonesize; 550 struct isofs_sb_info *sbi;
549 int table; 551 unsigned long first_data_zone;
550 unsigned int vol_desc_start; 552 int joliet_level = 0;
551 unsigned long first_data_zone; 553 int iso_blknum, block;
552 struct inode * inode; 554 int orig_zonesize;
553 struct iso9660_options opt; 555 int table;
554 struct isofs_sb_info * sbi; 556 unsigned int vol_desc_start;
555 557
556 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 558 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
557 if (!sbi) 559 if (!sbi)
@@ -577,72 +579,73 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
577 vol_desc_start = (opt.sbsector != -1) ? 579 vol_desc_start = (opt.sbsector != -1) ?
578 opt.sbsector : isofs_get_last_session(s,opt.session); 580 opt.sbsector : isofs_get_last_session(s,opt.session);
579 581
580 for (iso_blknum = vol_desc_start+16; 582 for (iso_blknum = vol_desc_start+16;
581 iso_blknum < vol_desc_start+100; iso_blknum++) 583 iso_blknum < vol_desc_start+100; iso_blknum++) {
582 { 584 struct hs_volume_descriptor *hdp;
583 struct hs_volume_descriptor * hdp; 585 struct iso_volume_descriptor *vdp;
584 struct iso_volume_descriptor * vdp; 586
585 587 block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits);
586 block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits); 588 if (!(bh = sb_bread(s, block)))
587 if (!(bh = sb_bread(s, block))) 589 goto out_no_read;
588 goto out_no_read; 590
589 591 vdp = (struct iso_volume_descriptor *)bh->b_data;
590 vdp = (struct iso_volume_descriptor *)bh->b_data; 592 hdp = (struct hs_volume_descriptor *)bh->b_data;
591 hdp = (struct hs_volume_descriptor *)bh->b_data; 593
592 594 /*
593 /* Due to the overlapping physical location of the descriptors, 595 * Due to the overlapping physical location of the descriptors,
594 * ISO CDs can match hdp->id==HS_STANDARD_ID as well. To ensure 596 * ISO CDs can match hdp->id==HS_STANDARD_ID as well. To ensure
595 * proper identification in this case, we first check for ISO. 597 * proper identification in this case, we first check for ISO.
596 */ 598 */
597 if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) { 599 if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) {
598 if (isonum_711 (vdp->type) == ISO_VD_END) 600 if (isonum_711(vdp->type) == ISO_VD_END)
599 break; 601 break;
600 if (isonum_711 (vdp->type) == ISO_VD_PRIMARY) { 602 if (isonum_711(vdp->type) == ISO_VD_PRIMARY) {
601 if (pri == NULL) { 603 if (pri == NULL) {
602 pri = (struct iso_primary_descriptor *)vdp; 604 pri = (struct iso_primary_descriptor *)vdp;
603 /* Save the buffer in case we need it ... */ 605 /* Save the buffer in case we need it ... */
604 pri_bh = bh; 606 pri_bh = bh;
605 bh = NULL; 607 bh = NULL;
606 } 608 }
607 } 609 }
608#ifdef CONFIG_JOLIET 610#ifdef CONFIG_JOLIET
609 else if (isonum_711 (vdp->type) == ISO_VD_SUPPLEMENTARY) { 611 else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) {
610 sec = (struct iso_supplementary_descriptor *)vdp; 612 sec = (struct iso_supplementary_descriptor *)vdp;
611 if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { 613 if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) {
612 if (opt.joliet == 'y') { 614 if (opt.joliet == 'y') {
613 if (sec->escape[2] == 0x40) { 615 if (sec->escape[2] == 0x40)
614 joliet_level = 1; 616 joliet_level = 1;
615 } else if (sec->escape[2] == 0x43) { 617 else if (sec->escape[2] == 0x43)
616 joliet_level = 2; 618 joliet_level = 2;
617 } else if (sec->escape[2] == 0x45) { 619 else if (sec->escape[2] == 0x45)
618 joliet_level = 3; 620 joliet_level = 3;
619 } 621
620 printk(KERN_DEBUG"ISO 9660 Extensions: Microsoft Joliet Level %d\n", 622 printk(KERN_DEBUG "ISO 9660 Extensions: "
621 joliet_level); 623 "Microsoft Joliet Level %d\n",
624 joliet_level);
625 }
626 goto root_found;
627 } else {
628 /* Unknown supplementary volume descriptor */
629 sec = NULL;
630 }
622 } 631 }
623 goto root_found;
624 } else {
625 /* Unknown supplementary volume descriptor */
626 sec = NULL;
627 }
628 }
629#endif 632#endif
630 } else { 633 } else {
631 if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) { 634 if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) {
632 if (isonum_711 (hdp->type) != ISO_VD_PRIMARY) 635 if (isonum_711(hdp->type) != ISO_VD_PRIMARY)
633 goto out_freebh; 636 goto out_freebh;
634 637
635 sbi->s_high_sierra = 1; 638 sbi->s_high_sierra = 1;
636 opt.rock = 'n'; 639 opt.rock = 'n';
637 h_pri = (struct hs_primary_descriptor *)vdp; 640 h_pri = (struct hs_primary_descriptor *)vdp;
638 goto root_found; 641 goto root_found;
642 }
639 } 643 }
640 }
641 644
642 /* Just skip any volume descriptors we don't recognize */ 645 /* Just skip any volume descriptors we don't recognize */
643 646
644 brelse(bh); 647 brelse(bh);
645 bh = NULL; 648 bh = NULL;
646 } 649 }
647 /* 650 /*
648 * If we fall through, either no volume descriptor was found, 651 * If we fall through, either no volume descriptor was found,
@@ -657,24 +660,24 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
657root_found: 660root_found:
658 661
659 if (joliet_level && (pri == NULL || opt.rock == 'n')) { 662 if (joliet_level && (pri == NULL || opt.rock == 'n')) {
660 /* This is the case of Joliet with the norock mount flag. 663 /* This is the case of Joliet with the norock mount flag.
661 * A disc with both Joliet and Rock Ridge is handled later 664 * A disc with both Joliet and Rock Ridge is handled later
662 */ 665 */
663 pri = (struct iso_primary_descriptor *) sec; 666 pri = (struct iso_primary_descriptor *) sec;
664 } 667 }
665 668
666 if(sbi->s_high_sierra){ 669 if(sbi->s_high_sierra){
667 rootp = (struct iso_directory_record *) h_pri->root_directory_record; 670 rootp = (struct iso_directory_record *) h_pri->root_directory_record;
668 sbi->s_nzones = isonum_733 (h_pri->volume_space_size); 671 sbi->s_nzones = isonum_733(h_pri->volume_space_size);
669 sbi->s_log_zone_size = isonum_723 (h_pri->logical_block_size); 672 sbi->s_log_zone_size = isonum_723(h_pri->logical_block_size);
670 sbi->s_max_size = isonum_733(h_pri->volume_space_size); 673 sbi->s_max_size = isonum_733(h_pri->volume_space_size);
671 } else { 674 } else {
672 if (!pri) 675 if (!pri)
673 goto out_freebh; 676 goto out_freebh;
674 rootp = (struct iso_directory_record *) pri->root_directory_record; 677 rootp = (struct iso_directory_record *) pri->root_directory_record;
675 sbi->s_nzones = isonum_733 (pri->volume_space_size); 678 sbi->s_nzones = isonum_733(pri->volume_space_size);
676 sbi->s_log_zone_size = isonum_723 (pri->logical_block_size); 679 sbi->s_log_zone_size = isonum_723(pri->logical_block_size);
677 sbi->s_max_size = isonum_733(pri->volume_space_size); 680 sbi->s_max_size = isonum_733(pri->volume_space_size);
678 } 681 }
679 682
680 sbi->s_ninodes = 0; /* No way to figure this out easily */ 683 sbi->s_ninodes = 0; /* No way to figure this out easily */
@@ -687,42 +690,43 @@ root_found:
687 * blocks that were 512 bytes (which should only very rarely 690 * blocks that were 512 bytes (which should only very rarely
688 * happen.) 691 * happen.)
689 */ 692 */
690 if(orig_zonesize < opt.blocksize) 693 if (orig_zonesize < opt.blocksize)
691 goto out_bad_size; 694 goto out_bad_size;
692 695
693 /* RDE: convert log zone size to bit shift */ 696 /* RDE: convert log zone size to bit shift */
694 switch (sbi->s_log_zone_size) 697 switch (sbi->s_log_zone_size) {
695 { case 512: sbi->s_log_zone_size = 9; break; 698 case 512: sbi->s_log_zone_size = 9; break;
696 case 1024: sbi->s_log_zone_size = 10; break; 699 case 1024: sbi->s_log_zone_size = 10; break;
697 case 2048: sbi->s_log_zone_size = 11; break; 700 case 2048: sbi->s_log_zone_size = 11; break;
698 701
699 default: 702 default:
700 goto out_bad_zone_size; 703 goto out_bad_zone_size;
701 } 704 }
702 705
703 s->s_magic = ISOFS_SUPER_MAGIC; 706 s->s_magic = ISOFS_SUPER_MAGIC;
704 s->s_maxbytes = 0xffffffff; /* We can handle files up to 4 GB */ 707 s->s_maxbytes = 0xffffffff; /* We can handle files up to 4 GB */
705 708
706 /* The CDROM is read-only, has no nodes (devices) on it, and since 709 /*
707 all of the files appear to be owned by root, we really do not want 710 * The CDROM is read-only, has no nodes (devices) on it, and since
708 to allow suid. (suid or devices will not show up unless we have 711 * all of the files appear to be owned by root, we really do not want
709 Rock Ridge extensions) */ 712 * to allow suid. (suid or devices will not show up unless we have
713 * Rock Ridge extensions)
714 */
710 715
711 s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; 716 s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */;
712 717
713 /* Set this for reference. Its not currently used except on write 718 /* Set this for reference. Its not currently used except on write
714 which we don't have .. */ 719 which we don't have .. */
715 720
716 first_data_zone = isonum_733 (rootp->extent) + 721 first_data_zone = isonum_733(rootp->extent) +
717 isonum_711 (rootp->ext_attr_length); 722 isonum_711(rootp->ext_attr_length);
718 sbi->s_firstdatazone = first_data_zone; 723 sbi->s_firstdatazone = first_data_zone;
719#ifndef BEQUIET 724#ifndef BEQUIET
720 printk(KERN_DEBUG "Max size:%ld Log zone size:%ld\n", 725 printk(KERN_DEBUG "ISOFS: Max size:%ld Log zone size:%ld\n",
721 sbi->s_max_size, 726 sbi->s_max_size, 1UL << sbi->s_log_zone_size);
722 1UL << sbi->s_log_zone_size); 727 printk(KERN_DEBUG "ISOFS: First datazone:%ld\n", sbi->s_firstdatazone);
723 printk(KERN_DEBUG "First datazone:%ld\n", sbi->s_firstdatazone);
724 if(sbi->s_high_sierra) 728 if(sbi->s_high_sierra)
725 printk(KERN_DEBUG "Disc in High Sierra format.\n"); 729 printk(KERN_DEBUG "ISOFS: Disc in High Sierra format.\n");
726#endif 730#endif
727 731
728 /* 732 /*
@@ -737,8 +741,8 @@ root_found:
737 pri = (struct iso_primary_descriptor *) sec; 741 pri = (struct iso_primary_descriptor *) sec;
738 rootp = (struct iso_directory_record *) 742 rootp = (struct iso_directory_record *)
739 pri->root_directory_record; 743 pri->root_directory_record;
740 first_data_zone = isonum_733 (rootp->extent) + 744 first_data_zone = isonum_733(rootp->extent) +
741 isonum_711 (rootp->ext_attr_length); 745 isonum_711(rootp->ext_attr_length);
742 } 746 }
743 747
744 /* 748 /*
@@ -771,7 +775,7 @@ root_found:
771 775
772#ifdef CONFIG_JOLIET 776#ifdef CONFIG_JOLIET
773 if (joliet_level && opt.utf8 == 0) { 777 if (joliet_level && opt.utf8 == 0) {
774 char * p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT; 778 char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT;
775 sbi->s_nls_iocharset = load_nls(p); 779 sbi->s_nls_iocharset = load_nls(p);
776 if (! sbi->s_nls_iocharset) { 780 if (! sbi->s_nls_iocharset) {
777 /* Fail only if explicit charset specified */ 781 /* Fail only if explicit charset specified */
@@ -821,7 +825,7 @@ root_found:
821 sbi->s_rock = 0; 825 sbi->s_rock = 0;
822 if (sbi->s_firstdatazone != first_data_zone) { 826 if (sbi->s_firstdatazone != first_data_zone) {
823 sbi->s_firstdatazone = first_data_zone; 827 sbi->s_firstdatazone = first_data_zone;
824 printk(KERN_DEBUG 828 printk(KERN_DEBUG
825 "ISOFS: changing to secondary root\n"); 829 "ISOFS: changing to secondary root\n");
826 iput(inode); 830 iput(inode);
827 inode = isofs_iget(s, sbi->s_firstdatazone, 0); 831 inode = isofs_iget(s, sbi->s_firstdatazone, 0);
@@ -830,8 +834,10 @@ root_found:
830 834
831 if (opt.check == 'u') { 835 if (opt.check == 'u') {
832 /* Only Joliet is case insensitive by default */ 836 /* Only Joliet is case insensitive by default */
833 if (joliet_level) opt.check = 'r'; 837 if (joliet_level)
834 else opt.check = 's'; 838 opt.check = 'r';
839 else
840 opt.check = 's';
835 } 841 }
836 sbi->s_joliet_level = joliet_level; 842 sbi->s_joliet_level = joliet_level;
837 843
@@ -846,8 +852,10 @@ root_found:
846 goto out_no_root; 852 goto out_no_root;
847 853
848 table = 0; 854 table = 0;
849 if (joliet_level) table += 2; 855 if (joliet_level)
850 if (opt.check == 'r') table++; 856 table += 2;
857 if (opt.check == 'r')
858 table++;
851 s->s_root->d_op = &isofs_dentry_ops[table]; 859 s->s_root->d_op = &isofs_dentry_ops[table];
852 860
853 kfree(opt.iocharset); 861 kfree(opt.iocharset);
@@ -858,10 +866,10 @@ root_found:
858 * Display error messages and free resources. 866 * Display error messages and free resources.
859 */ 867 */
860out_bad_root: 868out_bad_root:
861 printk(KERN_WARNING "isofs_fill_super: root inode not initialized\n"); 869 printk(KERN_WARNING "%s: root inode not initialized\n", __func__);
862 goto out_iput; 870 goto out_iput;
863out_no_root: 871out_no_root:
864 printk(KERN_WARNING "isofs_fill_super: get root inode failed\n"); 872 printk(KERN_WARNING "%s: get root inode failed\n", __func__);
865out_iput: 873out_iput:
866 iput(inode); 874 iput(inode);
867#ifdef CONFIG_JOLIET 875#ifdef CONFIG_JOLIET
@@ -870,21 +878,20 @@ out_iput:
870#endif 878#endif
871 goto out_freesbi; 879 goto out_freesbi;
872out_no_read: 880out_no_read:
873 printk(KERN_WARNING "isofs_fill_super: " 881 printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
874 "bread failed, dev=%s, iso_blknum=%d, block=%d\n", 882 __func__, s->s_id, iso_blknum, block);
875 s->s_id, iso_blknum, block);
876 goto out_freesbi; 883 goto out_freesbi;
877out_bad_zone_size: 884out_bad_zone_size:
878 printk(KERN_WARNING "Bad logical zone size %ld\n", 885 printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
879 sbi->s_log_zone_size); 886 sbi->s_log_zone_size);
880 goto out_freebh; 887 goto out_freebh;
881out_bad_size: 888out_bad_size:
882 printk(KERN_WARNING "Logical zone size(%d) < hardware blocksize(%u)\n", 889 printk(KERN_WARNING "ISOFS: Logical zone size(%d) < hardware blocksize(%u)\n",
883 orig_zonesize, opt.blocksize); 890 orig_zonesize, opt.blocksize);
884 goto out_freebh; 891 goto out_freebh;
885out_unknown_format: 892out_unknown_format:
886 if (!silent) 893 if (!silent)
887 printk(KERN_WARNING "Unable to identify CD-ROM format.\n"); 894 printk(KERN_WARNING "ISOFS: Unable to identify CD-ROM format.\n");
888 895
889out_freebh: 896out_freebh:
890 brelse(bh); 897 brelse(bh);
@@ -902,7 +909,7 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf)
902 buf->f_type = ISOFS_SUPER_MAGIC; 909 buf->f_type = ISOFS_SUPER_MAGIC;
903 buf->f_bsize = sb->s_blocksize; 910 buf->f_bsize = sb->s_blocksize;
904 buf->f_blocks = (ISOFS_SB(sb)->s_nzones 911 buf->f_blocks = (ISOFS_SB(sb)->s_nzones
905 << (ISOFS_SB(sb)->s_log_zone_size - sb->s_blocksize_bits)); 912 << (ISOFS_SB(sb)->s_log_zone_size - sb->s_blocksize_bits));
906 buf->f_bfree = 0; 913 buf->f_bfree = 0;
907 buf->f_bavail = 0; 914 buf->f_bavail = 0;
908 buf->f_files = ISOFS_SB(sb)->s_ninodes; 915 buf->f_files = ISOFS_SB(sb)->s_ninodes;
@@ -931,20 +938,20 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
931 938
932 rv = 0; 939 rv = 0;
933 if (iblock < 0 || iblock != iblock_s) { 940 if (iblock < 0 || iblock != iblock_s) {
934 printk("isofs_get_blocks: block number too large\n"); 941 printk(KERN_DEBUG "%s: block number too large\n", __func__);
935 goto abort; 942 goto abort;
936 } 943 }
937 944
938 b_off = iblock; 945 b_off = iblock;
939 946
940 offset = 0; 947 offset = 0;
941 firstext = ei->i_first_extent; 948 firstext = ei->i_first_extent;
942 sect_size = ei->i_section_size >> ISOFS_BUFFER_BITS(inode); 949 sect_size = ei->i_section_size >> ISOFS_BUFFER_BITS(inode);
943 nextblk = ei->i_next_section_block; 950 nextblk = ei->i_next_section_block;
944 nextoff = ei->i_next_section_offset; 951 nextoff = ei->i_next_section_offset;
945 section = 0; 952 section = 0;
946 953
947 while ( nblocks ) { 954 while (nblocks) {
948 /* If we are *way* beyond the end of the file, print a message. 955 /* If we are *way* beyond the end of the file, print a message.
949 * Access beyond the end of the file up to the next page boundary 956 * Access beyond the end of the file up to the next page boundary
950 * is normal, however because of the way the page cache works. 957 * is normal, however because of the way the page cache works.
@@ -953,11 +960,11 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
953 * I/O errors. 960 * I/O errors.
954 */ 961 */
955 if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) { 962 if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
956 printk("isofs_get_blocks: block >= EOF (%ld, %ld)\n", 963 printk(KERN_DEBUG "%s: block >= EOF (%ld, %ld)\n",
957 iblock, (unsigned long) inode->i_size); 964 __func__, iblock, (unsigned long) inode->i_size);
958 goto abort; 965 goto abort;
959 } 966 }
960 967
961 /* On the last section, nextblk == 0, section size is likely to 968 /* On the last section, nextblk == 0, section size is likely to
962 * exceed sect_size by a partial block, and access beyond the 969 * exceed sect_size by a partial block, and access beyond the
963 * end of the file will reach beyond the section size, too. 970 * end of the file will reach beyond the section size, too.
@@ -976,20 +983,21 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
976 iput(ninode); 983 iput(ninode);
977 984
978 if (++section > 100) { 985 if (++section > 100) {
979 printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n"); 986 printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
980 printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u " 987 " aborting...\n", __func__);
981 "nextblk=%lu nextoff=%lu\n", 988 printk(KERN_DEBUG "%s: block=%ld firstext=%u sect_size=%u "
982 iblock, firstext, (unsigned) sect_size, 989 "nextblk=%lu nextoff=%lu\n", __func__,
983 nextblk, nextoff); 990 iblock, firstext, (unsigned) sect_size,
991 nextblk, nextoff);
984 goto abort; 992 goto abort;
985 } 993 }
986 } 994 }
987 995
988 if ( *bh ) { 996 if (*bh) {
989 map_bh(*bh, inode->i_sb, firstext + b_off - offset); 997 map_bh(*bh, inode->i_sb, firstext + b_off - offset);
990 } else { 998 } else {
991 *bh = sb_getblk(inode->i_sb, firstext+b_off-offset); 999 *bh = sb_getblk(inode->i_sb, firstext+b_off-offset);
992 if ( !*bh ) 1000 if (!*bh)
993 goto abort; 1001 goto abort;
994 } 1002 }
995 bh++; /* Next buffer head */ 1003 bh++; /* Next buffer head */
@@ -1010,7 +1018,7 @@ static int isofs_get_block(struct inode *inode, sector_t iblock,
1010 struct buffer_head *bh_result, int create) 1018 struct buffer_head *bh_result, int create)
1011{ 1019{
1012 if (create) { 1020 if (create) {
1013 printk("isofs_get_block: Kernel tries to allocate a block\n"); 1021 printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__);
1014 return -EROFS; 1022 return -EROFS;
1015 } 1023 }
1016 1024
@@ -1070,11 +1078,11 @@ static int isofs_read_level3_size(struct inode *inode)
1070{ 1078{
1071 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); 1079 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
1072 int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra; 1080 int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra;
1073 struct buffer_head * bh = NULL; 1081 struct buffer_head *bh = NULL;
1074 unsigned long block, offset, block_saved, offset_saved; 1082 unsigned long block, offset, block_saved, offset_saved;
1075 int i = 0; 1083 int i = 0;
1076 int more_entries = 0; 1084 int more_entries = 0;
1077 struct iso_directory_record * tmpde = NULL; 1085 struct iso_directory_record *tmpde = NULL;
1078 struct iso_inode_info *ei = ISOFS_I(inode); 1086 struct iso_inode_info *ei = ISOFS_I(inode);
1079 1087
1080 inode->i_size = 0; 1088 inode->i_size = 0;
@@ -1089,7 +1097,7 @@ static int isofs_read_level3_size(struct inode *inode)
1089 offset = ei->i_iget5_offset; 1097 offset = ei->i_iget5_offset;
1090 1098
1091 do { 1099 do {
1092 struct iso_directory_record * de; 1100 struct iso_directory_record *de;
1093 unsigned int de_len; 1101 unsigned int de_len;
1094 1102
1095 if (!bh) { 1103 if (!bh) {
@@ -1163,10 +1171,9 @@ out_noread:
1163 return -EIO; 1171 return -EIO;
1164 1172
1165out_toomany: 1173out_toomany:
1166 printk(KERN_INFO "isofs_read_level3_size: " 1174 printk(KERN_INFO "%s: More than 100 file sections ?!?, aborting...\n"
1167 "More than 100 file sections ?!?, aborting...\n" 1175 "isofs_read_level3_size: inode=%lu\n",
1168 "isofs_read_level3_size: inode=%lu\n", 1176 __func__, inode->i_ino);
1169 inode->i_ino);
1170 goto out; 1177 goto out;
1171} 1178}
1172 1179
@@ -1177,9 +1184,9 @@ static void isofs_read_inode(struct inode *inode)
1177 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); 1184 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
1178 unsigned long block; 1185 unsigned long block;
1179 int high_sierra = sbi->s_high_sierra; 1186 int high_sierra = sbi->s_high_sierra;
1180 struct buffer_head * bh = NULL; 1187 struct buffer_head *bh = NULL;
1181 struct iso_directory_record * de; 1188 struct iso_directory_record *de;
1182 struct iso_directory_record * tmpde = NULL; 1189 struct iso_directory_record *tmpde = NULL;
1183 unsigned int de_len; 1190 unsigned int de_len;
1184 unsigned long offset; 1191 unsigned long offset;
1185 struct iso_inode_info *ei = ISOFS_I(inode); 1192 struct iso_inode_info *ei = ISOFS_I(inode);
@@ -1199,7 +1206,7 @@ static void isofs_read_inode(struct inode *inode)
1199 1206
1200 tmpde = kmalloc(de_len, GFP_KERNEL); 1207 tmpde = kmalloc(de_len, GFP_KERNEL);
1201 if (tmpde == NULL) { 1208 if (tmpde == NULL) {
1202 printk(KERN_INFO "isofs_read_inode: out of memory\n"); 1209 printk(KERN_INFO "%s: out of memory\n", __func__);
1203 goto fail; 1210 goto fail;
1204 } 1211 }
1205 memcpy(tmpde, bh->b_data + offset, frag1); 1212 memcpy(tmpde, bh->b_data + offset, frag1);
@@ -1212,24 +1219,26 @@ static void isofs_read_inode(struct inode *inode)
1212 } 1219 }
1213 1220
1214 inode->i_ino = isofs_get_ino(ei->i_iget5_block, 1221 inode->i_ino = isofs_get_ino(ei->i_iget5_block,
1215 ei->i_iget5_offset, 1222 ei->i_iget5_offset,
1216 ISOFS_BUFFER_BITS(inode)); 1223 ISOFS_BUFFER_BITS(inode));
1217 1224
1218 /* Assume it is a normal-format file unless told otherwise */ 1225 /* Assume it is a normal-format file unless told otherwise */
1219 ei->i_file_format = isofs_file_normal; 1226 ei->i_file_format = isofs_file_normal;
1220 1227
1221 if (de->flags[-high_sierra] & 2) { 1228 if (de->flags[-high_sierra] & 2) {
1222 inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR; 1229 inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR;
1223 inode->i_nlink = 1; /* Set to 1. We know there are 2, but 1230 inode->i_nlink = 1; /*
1224 the find utility tries to optimize 1231 * Set to 1. We know there are 2, but
1225 if it is 2, and it screws up. It is 1232 * the find utility tries to optimize
1226 easier to give 1 which tells find to 1233 * if it is 2, and it screws up. It is
1227 do it the hard way. */ 1234 * easier to give 1 which tells find to
1235 * do it the hard way.
1236 */
1228 } else { 1237 } else {
1229 /* Everybody gets to read the file. */ 1238 /* Everybody gets to read the file. */
1230 inode->i_mode = sbi->s_mode; 1239 inode->i_mode = sbi->s_mode;
1231 inode->i_nlink = 1; 1240 inode->i_nlink = 1;
1232 inode->i_mode |= S_IFREG; 1241 inode->i_mode |= S_IFREG;
1233 } 1242 }
1234 inode->i_uid = sbi->s_uid; 1243 inode->i_uid = sbi->s_uid;
1235 inode->i_gid = sbi->s_gid; 1244 inode->i_gid = sbi->s_gid;
@@ -1239,13 +1248,14 @@ static void isofs_read_inode(struct inode *inode)
1239 ei->i_format_parm[1] = 0; 1248 ei->i_format_parm[1] = 0;
1240 ei->i_format_parm[2] = 0; 1249 ei->i_format_parm[2] = 0;
1241 1250
1242 ei->i_section_size = isonum_733 (de->size); 1251 ei->i_section_size = isonum_733(de->size);
1243 if (de->flags[-high_sierra] & 0x80) { 1252 if (de->flags[-high_sierra] & 0x80) {
1244 if(isofs_read_level3_size(inode)) goto fail; 1253 if(isofs_read_level3_size(inode))
1254 goto fail;
1245 } else { 1255 } else {
1246 ei->i_next_section_block = 0; 1256 ei->i_next_section_block = 0;
1247 ei->i_next_section_offset = 0; 1257 ei->i_next_section_offset = 0;
1248 inode->i_size = isonum_733 (de->size); 1258 inode->i_size = isonum_733(de->size);
1249 } 1259 }
1250 1260
1251 /* 1261 /*
@@ -1258,23 +1268,24 @@ static void isofs_read_inode(struct inode *inode)
1258 inode->i_size &= 0x00ffffff; 1268 inode->i_size &= 0x00ffffff;
1259 1269
1260 if (de->interleave[0]) { 1270 if (de->interleave[0]) {
1261 printk("Interleaved files not (yet) supported.\n"); 1271 printk(KERN_DEBUG "ISOFS: Interleaved files not (yet) supported.\n");
1262 inode->i_size = 0; 1272 inode->i_size = 0;
1263 } 1273 }
1264 1274
1265 /* I have no idea what file_unit_size is used for, so 1275 /* I have no idea what file_unit_size is used for, so
1266 we will flag it for now */ 1276 we will flag it for now */
1267 if (de->file_unit_size[0] != 0) { 1277 if (de->file_unit_size[0] != 0) {
1268 printk("File unit size != 0 for ISO file (%ld).\n", 1278 printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%ld).\n",
1269 inode->i_ino); 1279 inode->i_ino);
1270 } 1280 }
1271 1281
1272 /* I have no idea what other flag bits are used for, so 1282 /* I have no idea what other flag bits are used for, so
1273 we will flag it for now */ 1283 we will flag it for now */
1274#ifdef DEBUG 1284#ifdef DEBUG
1275 if((de->flags[-high_sierra] & ~2)!= 0){ 1285 if((de->flags[-high_sierra] & ~2)!= 0){
1276 printk("Unusual flag settings for ISO file (%ld %x).\n", 1286 printk(KERN_DEBUG "ISOFS: Unusual flag settings for ISO file "
1277 inode->i_ino, de->flags[-high_sierra]); 1287 "(%ld %x).\n",
1288 inode->i_ino, de->flags[-high_sierra]);
1278 } 1289 }
1279#endif 1290#endif
1280 1291
@@ -1285,11 +1296,11 @@ static void isofs_read_inode(struct inode *inode)
1285 inode->i_atime.tv_nsec = 1296 inode->i_atime.tv_nsec =
1286 inode->i_ctime.tv_nsec = 0; 1297 inode->i_ctime.tv_nsec = 0;
1287 1298
1288 ei->i_first_extent = (isonum_733 (de->extent) + 1299 ei->i_first_extent = (isonum_733(de->extent) +
1289 isonum_711 (de->ext_attr_length)); 1300 isonum_711(de->ext_attr_length));
1290 1301
1291 /* Set the number of blocks for stat() - should be done before RR */ 1302 /* Set the number of blocks for stat() - should be done before RR */
1292 inode->i_blocks = (inode->i_size + 511) >> 9; 1303 inode->i_blocks = (inode->i_size + 511) >> 9;
1293 1304
1294 /* 1305 /*
1295 * Now test for possible Rock Ridge extensions which will override 1306 * Now test for possible Rock Ridge extensions which will override
@@ -1306,7 +1317,7 @@ static void isofs_read_inode(struct inode *inode)
1306 /* Install the inode operations vector */ 1317 /* Install the inode operations vector */
1307 if (S_ISREG(inode->i_mode)) { 1318 if (S_ISREG(inode->i_mode)) {
1308 inode->i_fop = &generic_ro_fops; 1319 inode->i_fop = &generic_ro_fops;
1309 switch ( ei->i_file_format ) { 1320 switch (ei->i_file_format) {
1310#ifdef CONFIG_ZISOFS 1321#ifdef CONFIG_ZISOFS
1311 case isofs_file_compressed: 1322 case isofs_file_compressed:
1312 inode->i_data.a_ops = &zisofs_aops; 1323 inode->i_data.a_ops = &zisofs_aops;
@@ -1350,7 +1361,7 @@ static int isofs_iget5_test(struct inode *ino, void *data)
1350 struct isofs_iget5_callback_data *d = 1361 struct isofs_iget5_callback_data *d =
1351 (struct isofs_iget5_callback_data*)data; 1362 (struct isofs_iget5_callback_data*)data;
1352 return (i->i_iget5_block == d->block) 1363 return (i->i_iget5_block == d->block)
1353 && (i->i_iget5_offset == d->offset); 1364 && (i->i_iget5_offset == d->offset);
1354} 1365}
1355 1366
1356static int isofs_iget5_set(struct inode *ino, void *data) 1367static int isofs_iget5_set(struct inode *ino, void *data)
@@ -1384,7 +1395,7 @@ struct inode *isofs_iget(struct super_block *sb,
1384 hashval = (block << sb->s_blocksize_bits) | offset; 1395 hashval = (block << sb->s_blocksize_bits) | offset;
1385 1396
1386 inode = iget5_locked(sb, hashval, &isofs_iget5_test, 1397 inode = iget5_locked(sb, hashval, &isofs_iget5_test,
1387 &isofs_iget5_set, &data); 1398 &isofs_iget5_set, &data);
1388 1399
1389 if (inode && (inode->i_state & I_NEW)) { 1400 if (inode && (inode->i_state & I_NEW)) {
1390 sb->s_op->read_inode(inode); 1401 sb->s_op->read_inode(inode);
@@ -1398,7 +1409,7 @@ static int isofs_get_sb(struct file_system_type *fs_type,
1398 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1409 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1399{ 1410{
1400 return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, 1411 return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super,
1401 mnt); 1412 mnt);
1402} 1413}
1403 1414
1404static struct file_system_type iso9660_fs_type = { 1415static struct file_system_type iso9660_fs_type = {
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index fb8fe7a9ddc..92c14b850e9 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -80,22 +80,20 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st
80 80
81 if (utf8) { 81 if (utf8) {
82 len = wcsntombs_be(outname, de->name, 82 len = wcsntombs_be(outname, de->name,
83 de->name_len[0] >> 1, PAGE_SIZE); 83 de->name_len[0] >> 1, PAGE_SIZE);
84 } else { 84 } else {
85 len = uni16_to_x8(outname, (__be16 *) de->name, 85 len = uni16_to_x8(outname, (__be16 *) de->name,
86 de->name_len[0] >> 1, nls); 86 de->name_len[0] >> 1, nls);
87 } 87 }
88 if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1')) { 88 if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1'))
89 len -= 2; 89 len -= 2;
90 }
91 90
92 /* 91 /*
93 * Windows doesn't like periods at the end of a name, 92 * Windows doesn't like periods at the end of a name,
94 * so neither do we 93 * so neither do we
95 */ 94 */
96 while (len >= 2 && (outname[len-1] == '.')) { 95 while (len >= 2 && (outname[len-1] == '.'))
97 len--; 96 len--;
98 }
99 97
100 return len; 98 return len;
101} 99}
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index c04b3a14a3e..c8c7e5138a0 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -15,7 +15,7 @@
15 * some sanity tests. 15 * some sanity tests.
16 */ 16 */
17static int 17static int
18isofs_cmp(struct dentry * dentry, const char * compare, int dlen) 18isofs_cmp(struct dentry *dentry, const char *compare, int dlen)
19{ 19{
20 struct qstr qstr; 20 struct qstr qstr;
21 21
@@ -48,24 +48,24 @@ isofs_cmp(struct dentry * dentry, const char * compare, int dlen)
48 */ 48 */
49static unsigned long 49static unsigned long
50isofs_find_entry(struct inode *dir, struct dentry *dentry, 50isofs_find_entry(struct inode *dir, struct dentry *dentry,
51 unsigned long *block_rv, unsigned long* offset_rv, 51 unsigned long *block_rv, unsigned long *offset_rv,
52 char * tmpname, struct iso_directory_record * tmpde) 52 char *tmpname, struct iso_directory_record *tmpde)
53{ 53{
54 unsigned long bufsize = ISOFS_BUFFER_SIZE(dir); 54 unsigned long bufsize = ISOFS_BUFFER_SIZE(dir);
55 unsigned char bufbits = ISOFS_BUFFER_BITS(dir); 55 unsigned char bufbits = ISOFS_BUFFER_BITS(dir);
56 unsigned long block, f_pos, offset, block_saved, offset_saved; 56 unsigned long block, f_pos, offset, block_saved, offset_saved;
57 struct buffer_head * bh = NULL; 57 struct buffer_head *bh = NULL;
58 struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb); 58 struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb);
59 59
60 if (!ISOFS_I(dir)->i_first_extent) 60 if (!ISOFS_I(dir)->i_first_extent)
61 return 0; 61 return 0;
62 62
63 f_pos = 0; 63 f_pos = 0;
64 offset = 0; 64 offset = 0;
65 block = 0; 65 block = 0;
66 66
67 while (f_pos < dir->i_size) { 67 while (f_pos < dir->i_size) {
68 struct iso_directory_record * de; 68 struct iso_directory_record *de;
69 int de_len, match, i, dlen; 69 int de_len, match, i, dlen;
70 char *dpnt; 70 char *dpnt;
71 71
@@ -114,7 +114,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
114 114
115 if (sbi->s_rock && 115 if (sbi->s_rock &&
116 ((i = get_rock_ridge_filename(de, tmpname, dir)))) { 116 ((i = get_rock_ridge_filename(de, tmpname, dir)))) {
117 dlen = i; /* possibly -1 */ 117 dlen = i; /* possibly -1 */
118 dpnt = tmpname; 118 dpnt = tmpname;
119#ifdef CONFIG_JOLIET 119#ifdef CONFIG_JOLIET
120 } else if (sbi->s_joliet_level) { 120 } else if (sbi->s_joliet_level) {
@@ -145,8 +145,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
145 isofs_normalize_block_and_offset(de, 145 isofs_normalize_block_and_offset(de,
146 &block_saved, 146 &block_saved,
147 &offset_saved); 147 &offset_saved);
148 *block_rv = block_saved; 148 *block_rv = block_saved;
149 *offset_rv = offset_saved; 149 *offset_rv = offset_saved;
150 brelse(bh); 150 brelse(bh);
151 return 1; 151 return 1;
152 } 152 }
@@ -155,7 +155,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
155 return 0; 155 return 0;
156} 156}
157 157
158struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 158struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
159{ 159{
160 int found; 160 int found;
161 unsigned long block, offset; 161 unsigned long block, offset;
@@ -170,9 +170,9 @@ struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct n
170 170
171 lock_kernel(); 171 lock_kernel();
172 found = isofs_find_entry(dir, dentry, 172 found = isofs_find_entry(dir, dentry,
173 &block, &offset, 173 &block, &offset,
174 page_address(page), 174 page_address(page),
175 1024 + page_address(page)); 175 1024 + page_address(page));
176 __free_page(page); 176 __free_page(page);
177 177
178 inode = NULL; 178 inode = NULL;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 1facfaff97c..a003d50edcd 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -887,7 +887,8 @@ restart_loop:
887 journal->j_committing_transaction = NULL; 887 journal->j_committing_transaction = NULL;
888 spin_unlock(&journal->j_state_lock); 888 spin_unlock(&journal->j_state_lock);
889 889
890 if (commit_transaction->t_checkpoint_list == NULL) { 890 if (commit_transaction->t_checkpoint_list == NULL &&
891 commit_transaction->t_checkpoint_io_list == NULL) {
891 __journal_drop_transaction(journal, commit_transaction); 892 __journal_drop_transaction(journal, commit_transaction);
892 } else { 893 } else {
893 if (journal->j_checkpoint_transactions == NULL) { 894 if (journal->j_checkpoint_transactions == NULL) {
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 824e3b7d4ec..8db2fa25170 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -68,6 +68,7 @@
68#include <linux/list.h> 68#include <linux/list.h>
69#include <linux/init.h> 69#include <linux/init.h>
70#endif 70#endif
71#include <linux/log2.h>
71 72
72static struct kmem_cache *revoke_record_cache; 73static struct kmem_cache *revoke_record_cache;
73static struct kmem_cache *revoke_table_cache; 74static struct kmem_cache *revoke_table_cache;
@@ -211,7 +212,7 @@ int journal_init_revoke(journal_t *journal, int hash_size)
211 journal->j_revoke = journal->j_revoke_table[0]; 212 journal->j_revoke = journal->j_revoke_table[0];
212 213
213 /* Check that the hash_size is a power of two */ 214 /* Check that the hash_size is a power of two */
214 J_ASSERT ((hash_size & (hash_size-1)) == 0); 215 J_ASSERT(is_power_of_2(hash_size));
215 216
216 journal->j_revoke->hash_size = hash_size; 217 journal->j_revoke->hash_size = hash_size;
217 218
@@ -238,7 +239,7 @@ int journal_init_revoke(journal_t *journal, int hash_size)
238 journal->j_revoke = journal->j_revoke_table[1]; 239 journal->j_revoke = journal->j_revoke_table[1];
239 240
240 /* Check that the hash_size is a power of two */ 241 /* Check that the hash_size is a power of two */
241 J_ASSERT ((hash_size & (hash_size-1)) == 0); 242 J_ASSERT(is_power_of_2(hash_size));
242 243
243 journal->j_revoke->hash_size = hash_size; 244 journal->j_revoke->hash_size = hash_size;
244 245
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 2856e1100a5..c0f59d1b13d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -896,7 +896,8 @@ restart_loop:
896 journal->j_committing_transaction = NULL; 896 journal->j_committing_transaction = NULL;
897 spin_unlock(&journal->j_state_lock); 897 spin_unlock(&journal->j_state_lock);
898 898
899 if (commit_transaction->t_checkpoint_list == NULL) { 899 if (commit_transaction->t_checkpoint_list == NULL &&
900 commit_transaction->t_checkpoint_io_list == NULL) {
900 __jbd2_journal_drop_transaction(journal, commit_transaction); 901 __jbd2_journal_drop_transaction(journal, commit_transaction);
901 } else { 902 } else {
902 if (journal->j_checkpoint_transactions == NULL) { 903 if (journal->j_checkpoint_transactions == NULL) {
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 9246e763da7..28cac049a56 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -68,6 +68,7 @@
68#include <linux/list.h> 68#include <linux/list.h>
69#include <linux/init.h> 69#include <linux/init.h>
70#endif 70#endif
71#include <linux/log2.h>
71 72
72static struct kmem_cache *jbd2_revoke_record_cache; 73static struct kmem_cache *jbd2_revoke_record_cache;
73static struct kmem_cache *jbd2_revoke_table_cache; 74static struct kmem_cache *jbd2_revoke_table_cache;
@@ -212,7 +213,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
212 journal->j_revoke = journal->j_revoke_table[0]; 213 journal->j_revoke = journal->j_revoke_table[0];
213 214
214 /* Check that the hash_size is a power of two */ 215 /* Check that the hash_size is a power of two */
215 J_ASSERT ((hash_size & (hash_size-1)) == 0); 216 J_ASSERT(is_power_of_2(hash_size));
216 217
217 journal->j_revoke->hash_size = hash_size; 218 journal->j_revoke->hash_size = hash_size;
218 219
@@ -239,7 +240,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
239 journal->j_revoke = journal->j_revoke_table[1]; 240 journal->j_revoke = journal->j_revoke_table[1];
240 241
241 /* Check that the hash_size is a power of two */ 242 /* Check that the hash_size is a power of two */
242 J_ASSERT ((hash_size & (hash_size-1)) == 0); 243 J_ASSERT(is_power_of_2(hash_size));
243 244
244 journal->j_revoke->hash_size = hash_size; 245 journal->j_revoke->hash_size = hash_size;
245 246
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 96070bff93f..572601e98dc 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -44,9 +44,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
44 */ 44 */
45static struct nlm_host * 45static struct nlm_host *
46nlm_lookup_host(int server, const struct sockaddr_in *sin, 46nlm_lookup_host(int server, const struct sockaddr_in *sin,
47 int proto, int version, 47 int proto, int version, const char *hostname,
48 const char *hostname, 48 int hostname_len, const struct sockaddr_in *ssin)
49 int hostname_len)
50{ 49{
51 struct hlist_head *chain; 50 struct hlist_head *chain;
52 struct hlist_node *pos; 51 struct hlist_node *pos;
@@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
54 struct nsm_handle *nsm = NULL; 53 struct nsm_handle *nsm = NULL;
55 int hash; 54 int hash;
56 55
57 dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n", 56 dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
57 ", p=%d, v=%d, my role=%s, name=%.*s)\n",
58 NIPQUAD(ssin->sin_addr.s_addr),
58 NIPQUAD(sin->sin_addr.s_addr), proto, version, 59 NIPQUAD(sin->sin_addr.s_addr), proto, version,
59 server? "server" : "client", 60 server? "server" : "client",
60 hostname_len, 61 hostname_len,
@@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
91 continue; 92 continue;
92 if (host->h_server != server) 93 if (host->h_server != server)
93 continue; 94 continue;
95 if (!nlm_cmp_addr(&host->h_saddr, ssin))
96 continue;
94 97
95 /* Move to head of hash chain. */ 98 /* Move to head of hash chain. */
96 hlist_del(&host->h_hash); 99 hlist_del(&host->h_hash);
@@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
118 host->h_name = nsm->sm_name; 121 host->h_name = nsm->sm_name;
119 host->h_addr = *sin; 122 host->h_addr = *sin;
120 host->h_addr.sin_port = 0; /* ouch! */ 123 host->h_addr.sin_port = 0; /* ouch! */
124 host->h_saddr = *ssin;
121 host->h_version = version; 125 host->h_version = version;
122 host->h_proto = proto; 126 host->h_proto = proto;
123 host->h_rpcclnt = NULL; 127 host->h_rpcclnt = NULL;
@@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host)
161 */ 165 */
162 nsm_unmonitor(host); 166 nsm_unmonitor(host);
163 167
164 if ((clnt = host->h_rpcclnt) != NULL) { 168 clnt = host->h_rpcclnt;
165 if (atomic_read(&clnt->cl_users)) { 169 if (clnt != NULL)
166 printk(KERN_WARNING 170 rpc_shutdown_client(clnt);
167 "lockd: active RPC handle\n");
168 clnt->cl_dead = 1;
169 } else {
170 rpc_destroy_client(host->h_rpcclnt);
171 }
172 }
173 kfree(host); 171 kfree(host);
174} 172}
175 173
@@ -180,8 +178,10 @@ struct nlm_host *
180nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, 178nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
181 const char *hostname, int hostname_len) 179 const char *hostname, int hostname_len)
182{ 180{
181 struct sockaddr_in ssin = {0};
182
183 return nlm_lookup_host(0, sin, proto, version, 183 return nlm_lookup_host(0, sin, proto, version,
184 hostname, hostname_len); 184 hostname, hostname_len, &ssin);
185} 185}
186 186
187/* 187/*
@@ -191,9 +191,12 @@ struct nlm_host *
191nlmsvc_lookup_host(struct svc_rqst *rqstp, 191nlmsvc_lookup_host(struct svc_rqst *rqstp,
192 const char *hostname, int hostname_len) 192 const char *hostname, int hostname_len)
193{ 193{
194 struct sockaddr_in ssin = {0};
195
196 ssin.sin_addr = rqstp->rq_daddr.addr;
194 return nlm_lookup_host(1, svc_addr_in(rqstp), 197 return nlm_lookup_host(1, svc_addr_in(rqstp),
195 rqstp->rq_prot, rqstp->rq_vers, 198 rqstp->rq_prot, rqstp->rq_vers,
196 hostname, hostname_len); 199 hostname, hostname_len, &ssin);
197} 200}
198 201
199/* 202/*
@@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host)
204{ 207{
205 struct rpc_clnt *clnt; 208 struct rpc_clnt *clnt;
206 209
207 dprintk("lockd: nlm_bind_host(%08x)\n", 210 dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n",
208 (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); 211 NIPQUAD(host->h_saddr.sin_addr),
212 NIPQUAD(host->h_addr.sin_addr));
209 213
210 /* Lock host handle */ 214 /* Lock host handle */
211 mutex_lock(&host->h_mutex); 215 mutex_lock(&host->h_mutex);
@@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host)
232 .protocol = host->h_proto, 236 .protocol = host->h_proto,
233 .address = (struct sockaddr *)&host->h_addr, 237 .address = (struct sockaddr *)&host->h_addr,
234 .addrsize = sizeof(host->h_addr), 238 .addrsize = sizeof(host->h_addr),
239 .saddress = (struct sockaddr *)&host->h_saddr,
235 .timeout = &timeparms, 240 .timeout = &timeparms,
236 .servername = host->h_name, 241 .servername = host->h_name,
237 .program = &nlm_program, 242 .program = &nlm_program,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 2102e2d0134..3353ed8421a 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
61 status); 61 status);
62 else 62 else
63 status = 0; 63 status = 0;
64 rpc_shutdown_client(clnt);
64 out: 65 out:
65 return status; 66 return status;
66} 67}
@@ -138,7 +139,6 @@ nsm_create(void)
138 .program = &nsm_program, 139 .program = &nsm_program,
139 .version = SM_VERSION, 140 .version = SM_VERSION,
140 .authflavor = RPC_AUTH_NULL, 141 .authflavor = RPC_AUTH_NULL,
141 .flags = (RPC_CLNT_CREATE_ONESHOT),
142 }; 142 };
143 143
144 return rpc_create(&args); 144 return rpc_create(&args);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 126b1bf02c0..26809325469 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp)
123 /* Process request with signals blocked, but allow SIGKILL. */ 123 /* Process request with signals blocked, but allow SIGKILL. */
124 allow_signal(SIGKILL); 124 allow_signal(SIGKILL);
125 125
126 /* kick rpciod */
127 rpciod_up();
128
129 dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); 126 dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
130 127
131 if (!nlm_timeout) 128 if (!nlm_timeout)
@@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp)
202 /* Exit the RPC thread */ 199 /* Exit the RPC thread */
203 svc_exit_thread(rqstp); 200 svc_exit_thread(rqstp);
204 201
205 /* release rpciod */
206 rpciod_down();
207
208 /* Release module */ 202 /* Release module */
209 unlock_kernel(); 203 unlock_kernel();
210 module_put_and_exit(0); 204 module_put_and_exit(0);
diff --git a/fs/namespace.c b/fs/namespace.c
index b696e3a0d18..4198003d7e1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -28,6 +28,7 @@
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/unistd.h> 29#include <asm/unistd.h>
30#include "pnode.h" 30#include "pnode.h"
31#include "internal.h"
31 32
32/* spinlock for vfsmount related operations, inplace of dcache_lock */ 33/* spinlock for vfsmount related operations, inplace of dcache_lock */
33__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 34__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
@@ -320,22 +321,16 @@ EXPORT_SYMBOL(mnt_unpin);
320static void *m_start(struct seq_file *m, loff_t *pos) 321static void *m_start(struct seq_file *m, loff_t *pos)
321{ 322{
322 struct mnt_namespace *n = m->private; 323 struct mnt_namespace *n = m->private;
323 struct list_head *p;
324 loff_t l = *pos;
325 324
326 down_read(&namespace_sem); 325 down_read(&namespace_sem);
327 list_for_each(p, &n->list) 326 return seq_list_start(&n->list, *pos);
328 if (!l--)
329 return list_entry(p, struct vfsmount, mnt_list);
330 return NULL;
331} 327}
332 328
333static void *m_next(struct seq_file *m, void *v, loff_t *pos) 329static void *m_next(struct seq_file *m, void *v, loff_t *pos)
334{ 330{
335 struct mnt_namespace *n = m->private; 331 struct mnt_namespace *n = m->private;
336 struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; 332
337 (*pos)++; 333 return seq_list_next(v, &n->list, pos);
338 return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
339} 334}
340 335
341static void m_stop(struct seq_file *m, void *v) 336static void m_stop(struct seq_file *m, void *v)
@@ -350,7 +345,7 @@ static inline void mangle(struct seq_file *m, const char *s)
350 345
351static int show_vfsmnt(struct seq_file *m, void *v) 346static int show_vfsmnt(struct seq_file *m, void *v)
352{ 347{
353 struct vfsmount *mnt = v; 348 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
354 int err = 0; 349 int err = 0;
355 static struct proc_fs_info { 350 static struct proc_fs_info {
356 int flag; 351 int flag;
@@ -405,7 +400,7 @@ struct seq_operations mounts_op = {
405 400
406static int show_vfsstat(struct seq_file *m, void *v) 401static int show_vfsstat(struct seq_file *m, void *v)
407{ 402{
408 struct vfsmount *mnt = v; 403 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
409 int err = 0; 404 int err = 0;
410 405
411 /* device */ 406 /* device */
@@ -1457,7 +1452,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1457 1452
1458 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 1453 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
1459 if (!new_ns) 1454 if (!new_ns)
1460 return NULL; 1455 return ERR_PTR(-ENOMEM);
1461 1456
1462 atomic_set(&new_ns->count, 1); 1457 atomic_set(&new_ns->count, 1);
1463 INIT_LIST_HEAD(&new_ns->list); 1458 INIT_LIST_HEAD(&new_ns->list);
@@ -1471,7 +1466,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1471 if (!new_ns->root) { 1466 if (!new_ns->root) {
1472 up_write(&namespace_sem); 1467 up_write(&namespace_sem);
1473 kfree(new_ns); 1468 kfree(new_ns);
1474 return NULL; 1469 return ERR_PTR(-ENOMEM);;
1475 } 1470 }
1476 spin_lock(&vfsmount_lock); 1471 spin_lock(&vfsmount_lock);
1477 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 1472 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
@@ -1515,7 +1510,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1515 return new_ns; 1510 return new_ns;
1516} 1511}
1517 1512
1518struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns, 1513struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
1519 struct fs_struct *new_fs) 1514 struct fs_struct *new_fs)
1520{ 1515{
1521 struct mnt_namespace *new_ns; 1516 struct mnt_namespace *new_ns;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index d3152f8d95c..2b145de45b3 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -203,7 +203,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
203 203
204 if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { 204 if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
205 if (pos >= MAX_NON_LFS) { 205 if (pos >= MAX_NON_LFS) {
206 send_sig(SIGXFSZ, current, 0);
207 return -EFBIG; 206 return -EFBIG;
208 } 207 }
209 if (count > MAX_NON_LFS - (u32)pos) { 208 if (count > MAX_NON_LFS - (u32)pos) {
@@ -212,7 +211,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
212 } 211 }
213 if (pos >= inode->i_sb->s_maxbytes) { 212 if (pos >= inode->i_sb->s_maxbytes) {
214 if (count || pos > inode->i_sb->s_maxbytes) { 213 if (count || pos > inode->i_sb->s_maxbytes) {
215 send_sig(SIGXFSZ, current, 0);
216 return -EFBIG; 214 return -EFBIG;
217 } 215 }
218 } 216 }
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index f4580b44eef..b55cb236cf7 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
8 pagelist.o proc.o read.o symlink.o unlink.o \ 8 pagelist.o proc.o read.o symlink.o unlink.o \
9 write.o namespace.o 9 write.o namespace.o mount_clnt.o
10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o 10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
13nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ 13nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 881fa490092..a49f9feff77 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
102 int nfsversion) 102 int nfsversion)
103{ 103{
104 struct nfs_client *clp; 104 struct nfs_client *clp;
105 int error;
106 105
107 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) 106 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
108 goto error_0; 107 goto error_0;
109 108
110 error = rpciod_up();
111 if (error < 0) {
112 dprintk("%s: couldn't start rpciod! Error = %d\n",
113 __FUNCTION__, error);
114 goto error_1;
115 }
116 __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
117
118 if (nfsversion == 4) { 109 if (nfsversion == 4) {
119 if (nfs_callback_up() < 0) 110 if (nfs_callback_up() < 0)
120 goto error_2; 111 goto error_2;
@@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
139#ifdef CONFIG_NFS_V4 130#ifdef CONFIG_NFS_V4
140 init_rwsem(&clp->cl_sem); 131 init_rwsem(&clp->cl_sem);
141 INIT_LIST_HEAD(&clp->cl_delegations); 132 INIT_LIST_HEAD(&clp->cl_delegations);
142 INIT_LIST_HEAD(&clp->cl_state_owners);
143 INIT_LIST_HEAD(&clp->cl_unused);
144 spin_lock_init(&clp->cl_lock); 133 spin_lock_init(&clp->cl_lock);
145 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); 134 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
146 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); 135 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -154,9 +143,6 @@ error_3:
154 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) 143 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
155 nfs_callback_down(); 144 nfs_callback_down();
156error_2: 145error_2:
157 rpciod_down();
158 __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
159error_1:
160 kfree(clp); 146 kfree(clp);
161error_0: 147error_0:
162 return NULL; 148 return NULL;
@@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
167#ifdef CONFIG_NFS_V4 153#ifdef CONFIG_NFS_V4
168 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) 154 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
169 nfs4_kill_renewd(clp); 155 nfs4_kill_renewd(clp);
170 while (!list_empty(&clp->cl_unused)) { 156 BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
171 struct nfs4_state_owner *sp;
172
173 sp = list_entry(clp->cl_unused.next,
174 struct nfs4_state_owner,
175 so_list);
176 list_del(&sp->so_list);
177 kfree(sp);
178 }
179 BUG_ON(!list_empty(&clp->cl_state_owners));
180 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) 157 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
181 nfs_idmap_delete(clp); 158 nfs_idmap_delete(clp);
182#endif 159#endif
@@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp)
198 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) 175 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
199 nfs_callback_down(); 176 nfs_callback_down();
200 177
201 if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
202 rpciod_down();
203
204 kfree(clp->cl_hostname); 178 kfree(clp->cl_hostname);
205 kfree(clp); 179 kfree(clp);
206 180
@@ -1232,23 +1206,9 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
1232 */ 1206 */
1233static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) 1207static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1234{ 1208{
1235 struct list_head *_p;
1236 loff_t pos = *_pos;
1237
1238 /* lock the list against modification */ 1209 /* lock the list against modification */
1239 spin_lock(&nfs_client_lock); 1210 spin_lock(&nfs_client_lock);
1240 1211 return seq_list_start_head(&nfs_client_list, *_pos);
1241 /* allow for the header line */
1242 if (!pos)
1243 return SEQ_START_TOKEN;
1244 pos--;
1245
1246 /* find the n'th element in the list */
1247 list_for_each(_p, &nfs_client_list)
1248 if (!pos--)
1249 break;
1250
1251 return _p != &nfs_client_list ? _p : NULL;
1252} 1212}
1253 1213
1254/* 1214/*
@@ -1256,14 +1216,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1256 */ 1216 */
1257static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) 1217static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1258{ 1218{
1259 struct list_head *_p; 1219 return seq_list_next(v, &nfs_client_list, pos);
1260
1261 (*pos)++;
1262
1263 _p = v;
1264 _p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next;
1265
1266 return _p != &nfs_client_list ? _p : NULL;
1267} 1220}
1268 1221
1269/* 1222/*
@@ -1282,7 +1235,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
1282 struct nfs_client *clp; 1235 struct nfs_client *clp;
1283 1236
1284 /* display header on line 1 */ 1237 /* display header on line 1 */
1285 if (v == SEQ_START_TOKEN) { 1238 if (v == &nfs_client_list) {
1286 seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); 1239 seq_puts(m, "NV SERVER PORT USE HOSTNAME\n");
1287 return 0; 1240 return 0;
1288 } 1241 }
@@ -1323,23 +1276,9 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
1323 */ 1276 */
1324static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) 1277static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1325{ 1278{
1326 struct list_head *_p;
1327 loff_t pos = *_pos;
1328
1329 /* lock the list against modification */ 1279 /* lock the list against modification */
1330 spin_lock(&nfs_client_lock); 1280 spin_lock(&nfs_client_lock);
1331 1281 return seq_list_start_head(&nfs_volume_list, *_pos);
1332 /* allow for the header line */
1333 if (!pos)
1334 return SEQ_START_TOKEN;
1335 pos--;
1336
1337 /* find the n'th element in the list */
1338 list_for_each(_p, &nfs_volume_list)
1339 if (!pos--)
1340 break;
1341
1342 return _p != &nfs_volume_list ? _p : NULL;
1343} 1282}
1344 1283
1345/* 1284/*
@@ -1347,14 +1286,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1347 */ 1286 */
1348static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) 1287static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1349{ 1288{
1350 struct list_head *_p; 1289 return seq_list_next(v, &nfs_volume_list, pos);
1351
1352 (*pos)++;
1353
1354 _p = v;
1355 _p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next;
1356
1357 return _p != &nfs_volume_list ? _p : NULL;
1358} 1290}
1359 1291
1360/* 1292/*
@@ -1375,7 +1307,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1375 char dev[8], fsid[17]; 1307 char dev[8], fsid[17];
1376 1308
1377 /* display header on line 1 */ 1309 /* display header on line 1 */
1378 if (v == SEQ_START_TOKEN) { 1310 if (v == &nfs_volume_list) {
1379 seq_puts(m, "NV SERVER PORT DEV FSID\n"); 1311 seq_puts(m, "NV SERVER PORT DEV FSID\n");
1380 return 0; 1312 return 0;
1381 } 1313 }
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f37d1bea83..20ac403469a 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
27 kfree(delegation); 27 kfree(delegation);
28} 28}
29 29
30static void nfs_free_delegation_callback(struct rcu_head *head)
31{
32 struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
33
34 nfs_free_delegation(delegation);
35}
36
30static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) 37static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
31{ 38{
32 struct inode *inode = state->inode; 39 struct inode *inode = state->inode;
@@ -57,7 +64,7 @@ out_err:
57 return status; 64 return status;
58} 65}
59 66
60static void nfs_delegation_claim_opens(struct inode *inode) 67static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
61{ 68{
62 struct nfs_inode *nfsi = NFS_I(inode); 69 struct nfs_inode *nfsi = NFS_I(inode);
63 struct nfs_open_context *ctx; 70 struct nfs_open_context *ctx;
@@ -72,9 +79,11 @@ again:
72 continue; 79 continue;
73 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) 80 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
74 continue; 81 continue;
82 if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
83 continue;
75 get_nfs_open_context(ctx); 84 get_nfs_open_context(ctx);
76 spin_unlock(&inode->i_lock); 85 spin_unlock(&inode->i_lock);
77 err = nfs4_open_delegation_recall(ctx->dentry, state); 86 err = nfs4_open_delegation_recall(ctx, state, stateid);
78 if (err >= 0) 87 if (err >= 0)
79 err = nfs_delegation_claim_locks(ctx, state); 88 err = nfs_delegation_claim_locks(ctx, state);
80 put_nfs_open_context(ctx); 89 put_nfs_open_context(ctx);
@@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
115 struct nfs_delegation *delegation; 124 struct nfs_delegation *delegation;
116 int status = 0; 125 int status = 0;
117 126
118 /* Ensure we first revalidate the attributes and page cache! */
119 if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
120 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
121
122 delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); 127 delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
123 if (delegation == NULL) 128 if (delegation == NULL)
124 return -ENOMEM; 129 return -ENOMEM;
@@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
131 delegation->inode = inode; 136 delegation->inode = inode;
132 137
133 spin_lock(&clp->cl_lock); 138 spin_lock(&clp->cl_lock);
134 if (nfsi->delegation == NULL) { 139 if (rcu_dereference(nfsi->delegation) == NULL) {
135 list_add(&delegation->super_list, &clp->cl_delegations); 140 list_add_rcu(&delegation->super_list, &clp->cl_delegations);
136 nfsi->delegation = delegation;
137 nfsi->delegation_state = delegation->type; 141 nfsi->delegation_state = delegation->type;
142 rcu_assign_pointer(nfsi->delegation, delegation);
138 delegation = NULL; 143 delegation = NULL;
139 } else { 144 } else {
140 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, 145 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
@@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
145 status = -EIO; 150 status = -EIO;
146 } 151 }
147 } 152 }
153
154 /* Ensure we revalidate the attributes and page cache! */
155 spin_lock(&inode->i_lock);
156 nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
157 spin_unlock(&inode->i_lock);
158
148 spin_unlock(&clp->cl_lock); 159 spin_unlock(&clp->cl_lock);
149 kfree(delegation); 160 kfree(delegation);
150 return status; 161 return status;
@@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
155 int res = 0; 166 int res = 0;
156 167
157 res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); 168 res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
158 nfs_free_delegation(delegation); 169 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
159 return res; 170 return res;
160} 171}
161 172
@@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode)
170/* 181/*
171 * Basic procedure for returning a delegation to the server 182 * Basic procedure for returning a delegation to the server
172 */ 183 */
173int __nfs_inode_return_delegation(struct inode *inode) 184static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
174{ 185{
175 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 186 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
176 struct nfs_inode *nfsi = NFS_I(inode); 187 struct nfs_inode *nfsi = NFS_I(inode);
177 struct nfs_delegation *delegation;
178 int res = 0;
179 188
180 nfs_msync_inode(inode); 189 nfs_msync_inode(inode);
181 down_read(&clp->cl_sem); 190 down_read(&clp->cl_sem);
182 /* Guard against new delegated open calls */ 191 /* Guard against new delegated open calls */
183 down_write(&nfsi->rwsem); 192 down_write(&nfsi->rwsem);
184 spin_lock(&clp->cl_lock); 193 nfs_delegation_claim_opens(inode, &delegation->stateid);
185 delegation = nfsi->delegation;
186 if (delegation != NULL) {
187 list_del_init(&delegation->super_list);
188 nfsi->delegation = NULL;
189 nfsi->delegation_state = 0;
190 }
191 spin_unlock(&clp->cl_lock);
192 nfs_delegation_claim_opens(inode);
193 up_write(&nfsi->rwsem); 194 up_write(&nfsi->rwsem);
194 up_read(&clp->cl_sem); 195 up_read(&clp->cl_sem);
195 nfs_msync_inode(inode); 196 nfs_msync_inode(inode);
196 197
197 if (delegation != NULL) 198 return nfs_do_return_delegation(inode, delegation);
198 res = nfs_do_return_delegation(inode, delegation); 199}
199 return res; 200
201static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
202{
203 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
204
205 if (delegation == NULL)
206 goto nomatch;
207 if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
208 sizeof(delegation->stateid.data)) != 0)
209 goto nomatch;
210 list_del_rcu(&delegation->super_list);
211 nfsi->delegation_state = 0;
212 rcu_assign_pointer(nfsi->delegation, NULL);
213 return delegation;
214nomatch:
215 return NULL;
216}
217
218int nfs_inode_return_delegation(struct inode *inode)
219{
220 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
221 struct nfs_inode *nfsi = NFS_I(inode);
222 struct nfs_delegation *delegation;
223 int err = 0;
224
225 if (rcu_dereference(nfsi->delegation) != NULL) {
226 spin_lock(&clp->cl_lock);
227 delegation = nfs_detach_delegation_locked(nfsi, NULL);
228 spin_unlock(&clp->cl_lock);
229 if (delegation != NULL)
230 err = __nfs_inode_return_delegation(inode, delegation);
231 }
232 return err;
200} 233}
201 234
202/* 235/*
@@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb)
211 if (clp == NULL) 244 if (clp == NULL)
212 return; 245 return;
213restart: 246restart:
214 spin_lock(&clp->cl_lock); 247 rcu_read_lock();
215 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 248 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
216 if (delegation->inode->i_sb != sb) 249 if (delegation->inode->i_sb != sb)
217 continue; 250 continue;
218 inode = igrab(delegation->inode); 251 inode = igrab(delegation->inode);
219 if (inode == NULL) 252 if (inode == NULL)
220 continue; 253 continue;
254 spin_lock(&clp->cl_lock);
255 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
221 spin_unlock(&clp->cl_lock); 256 spin_unlock(&clp->cl_lock);
222 nfs_inode_return_delegation(inode); 257 rcu_read_unlock();
258 if (delegation != NULL)
259 __nfs_inode_return_delegation(inode, delegation);
223 iput(inode); 260 iput(inode);
224 goto restart; 261 goto restart;
225 } 262 }
226 spin_unlock(&clp->cl_lock); 263 rcu_read_unlock();
227} 264}
228 265
229static int nfs_do_expire_all_delegations(void *ptr) 266static int nfs_do_expire_all_delegations(void *ptr)
@@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr)
234 271
235 allow_signal(SIGKILL); 272 allow_signal(SIGKILL);
236restart: 273restart:
237 spin_lock(&clp->cl_lock);
238 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0) 274 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
239 goto out; 275 goto out;
240 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) 276 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
241 goto out; 277 goto out;
242 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 278 rcu_read_lock();
279 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
243 inode = igrab(delegation->inode); 280 inode = igrab(delegation->inode);
244 if (inode == NULL) 281 if (inode == NULL)
245 continue; 282 continue;
283 spin_lock(&clp->cl_lock);
284 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
246 spin_unlock(&clp->cl_lock); 285 spin_unlock(&clp->cl_lock);
247 nfs_inode_return_delegation(inode); 286 rcu_read_unlock();
287 if (delegation)
288 __nfs_inode_return_delegation(inode, delegation);
248 iput(inode); 289 iput(inode);
249 goto restart; 290 goto restart;
250 } 291 }
292 rcu_read_unlock();
251out: 293out:
252 spin_unlock(&clp->cl_lock);
253 nfs_put_client(clp); 294 nfs_put_client(clp);
254 module_put_and_exit(0); 295 module_put_and_exit(0);
255} 296}
@@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
280 if (clp == NULL) 321 if (clp == NULL)
281 return; 322 return;
282restart: 323restart:
283 spin_lock(&clp->cl_lock); 324 rcu_read_lock();
284 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 325 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
285 inode = igrab(delegation->inode); 326 inode = igrab(delegation->inode);
286 if (inode == NULL) 327 if (inode == NULL)
287 continue; 328 continue;
329 spin_lock(&clp->cl_lock);
330 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
288 spin_unlock(&clp->cl_lock); 331 spin_unlock(&clp->cl_lock);
289 nfs_inode_return_delegation(inode); 332 rcu_read_unlock();
333 if (delegation != NULL)
334 __nfs_inode_return_delegation(inode, delegation);
290 iput(inode); 335 iput(inode);
291 goto restart; 336 goto restart;
292 } 337 }
293 spin_unlock(&clp->cl_lock); 338 rcu_read_unlock();
294} 339}
295 340
296struct recall_threadargs { 341struct recall_threadargs {
@@ -316,21 +361,14 @@ static int recall_thread(void *data)
316 down_read(&clp->cl_sem); 361 down_read(&clp->cl_sem);
317 down_write(&nfsi->rwsem); 362 down_write(&nfsi->rwsem);
318 spin_lock(&clp->cl_lock); 363 spin_lock(&clp->cl_lock);
319 delegation = nfsi->delegation; 364 delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
320 if (delegation != NULL && memcmp(delegation->stateid.data, 365 if (delegation != NULL)
321 args->stateid->data,
322 sizeof(delegation->stateid.data)) == 0) {
323 list_del_init(&delegation->super_list);
324 nfsi->delegation = NULL;
325 nfsi->delegation_state = 0;
326 args->result = 0; 366 args->result = 0;
327 } else { 367 else
328 delegation = NULL;
329 args->result = -ENOENT; 368 args->result = -ENOENT;
330 }
331 spin_unlock(&clp->cl_lock); 369 spin_unlock(&clp->cl_lock);
332 complete(&args->started); 370 complete(&args->started);
333 nfs_delegation_claim_opens(inode); 371 nfs_delegation_claim_opens(inode, args->stateid);
334 up_write(&nfsi->rwsem); 372 up_write(&nfsi->rwsem);
335 up_read(&clp->cl_sem); 373 up_read(&clp->cl_sem);
336 nfs_msync_inode(inode); 374 nfs_msync_inode(inode);
@@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
371{ 409{
372 struct nfs_delegation *delegation; 410 struct nfs_delegation *delegation;
373 struct inode *res = NULL; 411 struct inode *res = NULL;
374 spin_lock(&clp->cl_lock); 412 rcu_read_lock();
375 list_for_each_entry(delegation, &clp->cl_delegations, super_list) { 413 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
376 if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { 414 if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
377 res = igrab(delegation->inode); 415 res = igrab(delegation->inode);
378 break; 416 break;
379 } 417 }
380 } 418 }
381 spin_unlock(&clp->cl_lock); 419 rcu_read_unlock();
382 return res; 420 return res;
383} 421}
384 422
@@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
388void nfs_delegation_mark_reclaim(struct nfs_client *clp) 426void nfs_delegation_mark_reclaim(struct nfs_client *clp)
389{ 427{
390 struct nfs_delegation *delegation; 428 struct nfs_delegation *delegation;
391 spin_lock(&clp->cl_lock); 429 rcu_read_lock();
392 list_for_each_entry(delegation, &clp->cl_delegations, super_list) 430 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
393 delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; 431 delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
394 spin_unlock(&clp->cl_lock); 432 rcu_read_unlock();
395} 433}
396 434
397/* 435/*
@@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
399 */ 437 */
400void nfs_delegation_reap_unclaimed(struct nfs_client *clp) 438void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
401{ 439{
402 struct nfs_delegation *delegation, *n; 440 struct nfs_delegation *delegation;
403 LIST_HEAD(head); 441restart:
404 spin_lock(&clp->cl_lock); 442 rcu_read_lock();
405 list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) { 443 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
406 if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) 444 if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
407 continue; 445 continue;
408 list_move(&delegation->super_list, &head); 446 spin_lock(&clp->cl_lock);
409 NFS_I(delegation->inode)->delegation = NULL; 447 delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL);
410 NFS_I(delegation->inode)->delegation_state = 0; 448 spin_unlock(&clp->cl_lock);
411 } 449 rcu_read_unlock();
412 spin_unlock(&clp->cl_lock); 450 if (delegation != NULL)
413 while(!list_empty(&head)) { 451 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
414 delegation = list_entry(head.next, struct nfs_delegation, super_list); 452 goto restart;
415 list_del(&delegation->super_list);
416 nfs_free_delegation(delegation);
417 } 453 }
454 rcu_read_unlock();
418} 455}
419 456
420int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) 457int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
421{ 458{
422 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
423 struct nfs_inode *nfsi = NFS_I(inode); 459 struct nfs_inode *nfsi = NFS_I(inode);
424 struct nfs_delegation *delegation; 460 struct nfs_delegation *delegation;
425 int res = 0; 461 int ret = 0;
426 462
427 if (nfsi->delegation_state == 0) 463 rcu_read_lock();
428 return 0; 464 delegation = rcu_dereference(nfsi->delegation);
429 spin_lock(&clp->cl_lock);
430 delegation = nfsi->delegation;
431 if (delegation != NULL) { 465 if (delegation != NULL) {
432 memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); 466 memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
433 res = 1; 467 ret = 1;
434 } 468 }
435 spin_unlock(&clp->cl_lock); 469 rcu_read_unlock();
436 return res; 470 return ret;
437} 471}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2cfd4b24c7f..5874ce7fdba 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -22,11 +22,12 @@ struct nfs_delegation {
22 long flags; 22 long flags;
23 loff_t maxsize; 23 loff_t maxsize;
24 __u64 change_attr; 24 __u64 change_attr;
25 struct rcu_head rcu;
25}; 26};
26 27
27int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 28int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
28void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 29void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
29int __nfs_inode_return_delegation(struct inode *inode); 30int nfs_inode_return_delegation(struct inode *inode);
30int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); 31int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
31 32
32struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); 33struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
@@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
39 40
40/* NFSv4 delegation-related procedures */ 41/* NFSv4 delegation-related procedures */
41int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); 42int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
42int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); 43int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
43int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); 44int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
44int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); 45int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
45 46
46static inline int nfs_have_delegation(struct inode *inode, int flags) 47static inline int nfs_have_delegation(struct inode *inode, int flags)
47{ 48{
49 struct nfs_delegation *delegation;
50 int ret = 0;
51
48 flags &= FMODE_READ|FMODE_WRITE; 52 flags &= FMODE_READ|FMODE_WRITE;
49 smp_rmb(); 53 rcu_read_lock();
50 if ((NFS_I(inode)->delegation_state & flags) == flags) 54 delegation = rcu_dereference(NFS_I(inode)->delegation);
51 return 1; 55 if (delegation != NULL && (delegation->type & flags) == flags)
52 return 0; 56 ret = 1;
57 rcu_read_unlock();
58 return ret;
53} 59}
54 60
55static inline int nfs_inode_return_delegation(struct inode *inode)
56{
57 int err = 0;
58
59 if (NFS_I(inode)->delegation != NULL)
60 err = __nfs_inode_return_delegation(inode);
61 return err;
62}
63#else 61#else
64static inline int nfs_have_delegation(struct inode *inode, int flags) 62static inline int nfs_have_delegation(struct inode *inode, int flags)
65{ 63{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c27258b5d3e..322141f4ab4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
897 return (nd->intent.open.flags & O_EXCL) != 0; 897 return (nd->intent.open.flags & O_EXCL) != 0;
898} 898}
899 899
900static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, 900static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
901 struct nfs_fh *fh, struct nfs_fattr *fattr)
902{ 901{
903 struct nfs_server *server = NFS_SERVER(dir); 902 struct nfs_server *server = NFS_SERVER(dir);
904 903
905 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) 904 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
906 /* Revalidate fsid on root dir */ 905 /* Revalidate fsid using the parent directory */
907 return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); 906 return __nfs_revalidate_inode(server, dir);
908 return 0; 907 return 0;
909} 908}
910 909
@@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
946 res = ERR_PTR(error); 945 res = ERR_PTR(error);
947 goto out_unlock; 946 goto out_unlock;
948 } 947 }
949 error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); 948 error = nfs_reval_fsid(dir, &fattr);
950 if (error < 0) { 949 if (error < 0) {
951 res = ERR_PTR(error); 950 res = ERR_PTR(error);
952 goto out_unlock; 951 goto out_unlock;
@@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1244 attr.ia_mode = mode; 1243 attr.ia_mode = mode;
1245 attr.ia_valid = ATTR_MODE; 1244 attr.ia_valid = ATTR_MODE;
1246 1245
1247 if (nd && (nd->flags & LOOKUP_CREATE)) 1246 if ((nd->flags & LOOKUP_CREATE) != 0)
1248 open_flags = nd->intent.open.flags; 1247 open_flags = nd->intent.open.flags;
1249 1248
1250 lock_kernel(); 1249 lock_kernel();
@@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1535 1534
1536 lock_kernel(); 1535 lock_kernel();
1537 1536
1538 page = alloc_page(GFP_KERNEL); 1537 page = alloc_page(GFP_HIGHUSER);
1539 if (!page) { 1538 if (!page) {
1540 unlock_kernel(); 1539 unlock_kernel();
1541 return -ENOMEM; 1540 return -ENOMEM;
@@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
1744 struct nfs_inode *nfsi; 1743 struct nfs_inode *nfsi;
1745 struct nfs_access_entry *cache; 1744 struct nfs_access_entry *cache;
1746 1745
1747 spin_lock(&nfs_access_lru_lock);
1748restart: 1746restart:
1747 spin_lock(&nfs_access_lru_lock);
1749 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { 1748 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
1750 struct inode *inode; 1749 struct inode *inode;
1751 1750
@@ -1770,6 +1769,7 @@ remove_lru_entry:
1770 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); 1769 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
1771 } 1770 }
1772 spin_unlock(&inode->i_lock); 1771 spin_unlock(&inode->i_lock);
1772 spin_unlock(&nfs_access_lru_lock);
1773 iput(inode); 1773 iput(inode);
1774 goto restart; 1774 goto restart;
1775 } 1775 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 00eee87510f..a5c82b6f3b4 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
266static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) 266static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
267{ 267{
268 struct nfs_open_context *ctx = dreq->ctx; 268 struct nfs_open_context *ctx = dreq->ctx;
269 struct inode *inode = ctx->dentry->d_inode; 269 struct inode *inode = ctx->path.dentry->d_inode;
270 size_t rsize = NFS_SERVER(inode)->rsize; 270 size_t rsize = NFS_SERVER(inode)->rsize;
271 unsigned int pgbase; 271 unsigned int pgbase;
272 int result; 272 int result;
@@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
295 break; 295 break;
296 } 296 }
297 if ((unsigned)result < data->npages) { 297 if ((unsigned)result < data->npages) {
298 nfs_direct_release_pages(data->pagevec, result); 298 bytes = result * PAGE_SIZE;
299 nfs_readdata_release(data); 299 if (bytes <= pgbase) {
300 break; 300 nfs_direct_release_pages(data->pagevec, result);
301 nfs_readdata_release(data);
302 break;
303 }
304 bytes -= pgbase;
305 data->npages = result;
301 } 306 }
302 307
303 get_dreq(dreq); 308 get_dreq(dreq);
@@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
601static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) 606static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
602{ 607{
603 struct nfs_open_context *ctx = dreq->ctx; 608 struct nfs_open_context *ctx = dreq->ctx;
604 struct inode *inode = ctx->dentry->d_inode; 609 struct inode *inode = ctx->path.dentry->d_inode;
605 size_t wsize = NFS_SERVER(inode)->wsize; 610 size_t wsize = NFS_SERVER(inode)->wsize;
606 unsigned int pgbase; 611 unsigned int pgbase;
607 int result; 612 int result;
@@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
630 break; 635 break;
631 } 636 }
632 if ((unsigned)result < data->npages) { 637 if ((unsigned)result < data->npages) {
633 nfs_direct_release_pages(data->pagevec, result); 638 bytes = result * PAGE_SIZE;
634 nfs_writedata_release(data); 639 if (bytes <= pgbase) {
635 break; 640 nfs_direct_release_pages(data->pagevec, result);
641 nfs_writedata_release(data);
642 break;
643 }
644 bytes -= pgbase;
645 data->npages = result;
636 } 646 }
637 647
638 get_dreq(dreq); 648 get_dreq(dreq);
@@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
763 (unsigned long) count, (long long) pos); 773 (unsigned long) count, (long long) pos);
764 774
765 if (nr_segs != 1) 775 if (nr_segs != 1)
766 return -EINVAL;
767
768 if (count < 0)
769 goto out; 776 goto out;
777
770 retval = -EFAULT; 778 retval = -EFAULT;
771 if (!access_ok(VERIFY_WRITE, buf, count)) 779 if (!access_ok(VERIFY_WRITE, buf, count))
772 goto out; 780 goto out;
@@ -814,7 +822,7 @@ out:
814ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, 822ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
815 unsigned long nr_segs, loff_t pos) 823 unsigned long nr_segs, loff_t pos)
816{ 824{
817 ssize_t retval; 825 ssize_t retval = -EINVAL;
818 struct file *file = iocb->ki_filp; 826 struct file *file = iocb->ki_filp;
819 struct address_space *mapping = file->f_mapping; 827 struct address_space *mapping = file->f_mapping;
820 /* XXX: temporary */ 828 /* XXX: temporary */
@@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
827 (unsigned long) count, (long long) pos); 835 (unsigned long) count, (long long) pos);
828 836
829 if (nr_segs != 1) 837 if (nr_segs != 1)
830 return -EINVAL; 838 goto out;
831 839
832 retval = generic_write_checks(file, &pos, &count, 0); 840 retval = generic_write_checks(file, &pos, &count, 0);
833 if (retval) 841 if (retval)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd9f5a83659..3d9fccf4ef9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
461 461
462 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 462 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
463 if (ctx != NULL) { 463 if (ctx != NULL) {
464 atomic_set(&ctx->count, 1); 464 ctx->path.dentry = dget(dentry);
465 ctx->dentry = dget(dentry); 465 ctx->path.mnt = mntget(mnt);
466 ctx->vfsmnt = mntget(mnt);
467 ctx->cred = get_rpccred(cred); 466 ctx->cred = get_rpccred(cred);
468 ctx->state = NULL; 467 ctx->state = NULL;
469 ctx->lockowner = current->files; 468 ctx->lockowner = current->files;
470 ctx->error = 0; 469 ctx->error = 0;
471 ctx->dir_cookie = 0; 470 ctx->dir_cookie = 0;
471 kref_init(&ctx->kref);
472 } 472 }
473 return ctx; 473 return ctx;
474} 474}
@@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
476struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) 476struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
477{ 477{
478 if (ctx != NULL) 478 if (ctx != NULL)
479 atomic_inc(&ctx->count); 479 kref_get(&ctx->kref);
480 return ctx; 480 return ctx;
481} 481}
482 482
483void put_nfs_open_context(struct nfs_open_context *ctx) 483static void nfs_free_open_context(struct kref *kref)
484{ 484{
485 if (atomic_dec_and_test(&ctx->count)) { 485 struct nfs_open_context *ctx = container_of(kref,
486 if (!list_empty(&ctx->list)) { 486 struct nfs_open_context, kref);
487 struct inode *inode = ctx->dentry->d_inode; 487
488 spin_lock(&inode->i_lock); 488 if (!list_empty(&ctx->list)) {
489 list_del(&ctx->list); 489 struct inode *inode = ctx->path.dentry->d_inode;
490 spin_unlock(&inode->i_lock); 490 spin_lock(&inode->i_lock);
491 } 491 list_del(&ctx->list);
492 if (ctx->state != NULL) 492 spin_unlock(&inode->i_lock);
493 nfs4_close_state(ctx->state, ctx->mode);
494 if (ctx->cred != NULL)
495 put_rpccred(ctx->cred);
496 dput(ctx->dentry);
497 mntput(ctx->vfsmnt);
498 kfree(ctx);
499 } 493 }
494 if (ctx->state != NULL)
495 nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
496 if (ctx->cred != NULL)
497 put_rpccred(ctx->cred);
498 dput(ctx->path.dentry);
499 mntput(ctx->path.mnt);
500 kfree(ctx);
501}
502
503void put_nfs_open_context(struct nfs_open_context *ctx)
504{
505 kref_put(&ctx->kref, nfs_free_open_context);
500} 506}
501 507
502/* 508/*
@@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
961 goto out_changed; 967 goto out_changed;
962 968
963 server = NFS_SERVER(inode); 969 server = NFS_SERVER(inode);
964 /* Update the fsid if and only if this is the root directory */ 970 /* Update the fsid? */
965 if (inode == inode->i_sb->s_root->d_inode 971 if (S_ISDIR(inode->i_mode)
966 && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) 972 && !nfs_fsid_equal(&server->fsid, &fattr->fsid))
967 server->fsid = fattr->fsid; 973 server->fsid = fattr->fsid;
968 974
@@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1066 invalid &= ~NFS_INO_INVALID_DATA; 1072 invalid &= ~NFS_INO_INVALID_DATA;
1067 if (data_stable) 1073 if (data_stable)
1068 invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); 1074 invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
1069 if (!nfs_have_delegation(inode, FMODE_READ)) 1075 if (!nfs_have_delegation(inode, FMODE_READ) ||
1076 (nfsi->cache_validity & NFS_INO_REVAL_FORCED))
1070 nfsi->cache_validity |= invalid; 1077 nfsi->cache_validity |= invalid;
1078 nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
1071 1079
1072 return 0; 1080 return 0;
1073 out_changed: 1081 out_changed:
@@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1103 */ 1111 */
1104void nfs4_clear_inode(struct inode *inode) 1112void nfs4_clear_inode(struct inode *inode)
1105{ 1113{
1106 struct nfs_inode *nfsi = NFS_I(inode);
1107
1108 /* If we are holding a delegation, return it! */ 1114 /* If we are holding a delegation, return it! */
1109 nfs_inode_return_delegation(inode); 1115 nfs_inode_return_delegation(inode);
1110 /* First call standard NFS clear_inode() code */ 1116 /* First call standard NFS clear_inode() code */
1111 nfs_clear_inode(inode); 1117 nfs_clear_inode(inode);
1112 /* Now clear out any remaining state */
1113 while (!list_empty(&nfsi->open_states)) {
1114 struct nfs4_state *state;
1115
1116 state = list_entry(nfsi->open_states.next,
1117 struct nfs4_state,
1118 inode_states);
1119 dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
1120 __FUNCTION__,
1121 inode->i_sb->s_id,
1122 (long long)NFS_FILEID(inode),
1123 state);
1124 BUG_ON(atomic_read(&state->count) != 1);
1125 nfs4_close_state(state, state->state);
1126 }
1127} 1118}
1128#endif 1119#endif
1129 1120
@@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
1165 struct nfs_inode *nfsi = (struct nfs_inode *) foo; 1156 struct nfs_inode *nfsi = (struct nfs_inode *) foo;
1166 1157
1167 inode_init_once(&nfsi->vfs_inode); 1158 inode_init_once(&nfsi->vfs_inode);
1168 spin_lock_init(&nfsi->req_lock);
1169 INIT_LIST_HEAD(&nfsi->dirty);
1170 INIT_LIST_HEAD(&nfsi->commit);
1171 INIT_LIST_HEAD(&nfsi->open_files); 1159 INIT_LIST_HEAD(&nfsi->open_files);
1172 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1160 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1173 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1161 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1174 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1162 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1175 atomic_set(&nfsi->data_updates, 0); 1163 atomic_set(&nfsi->data_updates, 0);
1176 nfsi->ndirty = 0;
1177 nfsi->ncommit = 0; 1164 nfsi->ncommit = 0;
1178 nfsi->npages = 0; 1165 nfsi->npages = 0;
1179 nfs4_init_once(nfsi); 1166 nfs4_init_once(nfsi);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ad2b40db1e6..76cf55d5710 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
183/* 183/*
184 * Calculate the number of 512byte blocks used. 184 * Calculate the number of 512byte blocks used.
185 */ 185 */
186static inline unsigned long nfs_calc_block_size(u64 tsize) 186static inline blkcnt_t nfs_calc_block_size(u64 tsize)
187{ 187{
188 loff_t used = (tsize + 511) >> 9; 188 blkcnt_t used = (tsize + 511) >> 9;
189 return (used > ULONG_MAX) ? ULONG_MAX : used; 189 return (used > ULONG_MAX) ? ULONG_MAX : used;
190} 190}
191 191
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca5a266a314..8afd9f7e7a9 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -1,7 +1,5 @@
1/* 1/*
2 * linux/fs/nfs/mount_clnt.c 2 * In-kernel MOUNT protocol client
3 *
4 * MOUNT client to support NFSroot.
5 * 3 *
6 * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
@@ -18,33 +16,31 @@
18#include <linux/nfs_fs.h> 16#include <linux/nfs_fs.h>
19 17
20#ifdef RPC_DEBUG 18#ifdef RPC_DEBUG
21# define NFSDBG_FACILITY NFSDBG_ROOT 19# define NFSDBG_FACILITY NFSDBG_MOUNT
22#endif 20#endif
23 21
24/*
25#define MOUNT_PROGRAM 100005
26#define MOUNT_VERSION 1
27#define MOUNT_MNT 1
28#define MOUNT_UMNT 3
29 */
30
31static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *,
32 int, int);
33static struct rpc_program mnt_program; 22static struct rpc_program mnt_program;
34 23
35struct mnt_fhstatus { 24struct mnt_fhstatus {
36 unsigned int status; 25 u32 status;
37 struct nfs_fh * fh; 26 struct nfs_fh *fh;
38}; 27};
39 28
40/* 29/**
41 * Obtain an NFS file handle for the given host and path 30 * nfs_mount - Obtain an NFS file handle for the given host and path
31 * @addr: pointer to server's address
32 * @len: size of server's address
33 * @hostname: name of server host, or NULL
34 * @path: pointer to string containing export path to mount
35 * @version: mount version to use for this request
36 * @protocol: transport protocol to use for thie request
37 * @fh: pointer to location to place returned file handle
38 *
39 * Uses default timeout parameters specified by underlying transport.
42 */ 40 */
43int 41int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
44nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, 42 int version, int protocol, struct nfs_fh *fh)
45 int version, int protocol)
46{ 43{
47 struct rpc_clnt *mnt_clnt;
48 struct mnt_fhstatus result = { 44 struct mnt_fhstatus result = {
49 .fh = fh 45 .fh = fh
50 }; 46 };
@@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
52 .rpc_argp = path, 48 .rpc_argp = path,
53 .rpc_resp = &result, 49 .rpc_resp = &result,
54 }; 50 };
55 char hostname[32]; 51 struct rpc_create_args args = {
52 .protocol = protocol,
53 .address = addr,
54 .addrsize = len,
55 .servername = hostname,
56 .program = &mnt_program,
57 .version = version,
58 .authflavor = RPC_AUTH_UNIX,
59 .flags = RPC_CLNT_CREATE_INTR,
60 };
61 struct rpc_clnt *mnt_clnt;
56 int status; 62 int status;
57 63
58 dprintk("NFS: nfs_mount(%08x:%s)\n", 64 dprintk("NFS: sending MNT request for %s:%s\n",
59 (unsigned)ntohl(addr->sin_addr.s_addr), path); 65 (hostname ? hostname : "server"), path);
60 66
61 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr)); 67 mnt_clnt = rpc_create(&args);
62 mnt_clnt = mnt_create(hostname, addr, version, protocol);
63 if (IS_ERR(mnt_clnt)) 68 if (IS_ERR(mnt_clnt))
64 return PTR_ERR(mnt_clnt); 69 goto out_clnt_err;
65 70
66 if (version == NFS_MNT3_VERSION) 71 if (version == NFS_MNT3_VERSION)
67 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; 72 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
@@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
69 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; 74 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
70 75
71 status = rpc_call_sync(mnt_clnt, &msg, 0); 76 status = rpc_call_sync(mnt_clnt, &msg, 0);
72 return status < 0? status : (result.status? -EACCES : 0); 77 rpc_shutdown_client(mnt_clnt);
73}
74 78
75static struct rpc_clnt * 79 if (status < 0)
76mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, 80 goto out_call_err;
77 int protocol) 81 if (result.status != 0)
78{ 82 goto out_mnt_err;
79 struct rpc_create_args args = { 83
80 .protocol = protocol, 84 dprintk("NFS: MNT request succeeded\n");
81 .address = (struct sockaddr *)srvaddr, 85 status = 0;
82 .addrsize = sizeof(*srvaddr), 86
83 .servername = hostname, 87out:
84 .program = &mnt_program, 88 return status;
85 .version = version, 89
86 .authflavor = RPC_AUTH_UNIX, 90out_clnt_err:
87 .flags = (RPC_CLNT_CREATE_ONESHOT | 91 status = PTR_ERR(mnt_clnt);
88 RPC_CLNT_CREATE_INTR), 92 dprintk("NFS: failed to create RPC client, status=%d\n", status);
89 }; 93 goto out;
94
95out_call_err:
96 dprintk("NFS: failed to start MNT request, status=%d\n", status);
97 goto out;
90 98
91 return rpc_create(&args); 99out_mnt_err:
100 dprintk("NFS: MNT server returned result %d\n", result.status);
101 status = -EACCES;
102 goto out;
92} 103}
93 104
94/* 105/*
95 * XDR encode/decode functions for MOUNT 106 * XDR encode/decode functions for MOUNT
96 */ 107 */
97static int 108static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p,
98xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path) 109 const char *path)
99{ 110{
100 p = xdr_encode_string(p, path); 111 p = xdr_encode_string(p, path);
101 112
@@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
103 return 0; 114 return 0;
104} 115}
105 116
106static int 117static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p,
107xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) 118 struct mnt_fhstatus *res)
108{ 119{
109 struct nfs_fh *fh = res->fh; 120 struct nfs_fh *fh = res->fh;
110 121
@@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
115 return 0; 126 return 0;
116} 127}
117 128
118static int 129static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
119xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) 130 struct mnt_fhstatus *res)
120{ 131{
121 struct nfs_fh *fh = res->fh; 132 struct nfs_fh *fh = res->fh;
122 133
@@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
135#define MNT_fhstatus_sz (1 + 8) 146#define MNT_fhstatus_sz (1 + 8)
136#define MNT_fhstatus3_sz (1 + 16) 147#define MNT_fhstatus3_sz (1 + 16)
137 148
138static struct rpc_procinfo mnt_procedures[] = { 149static struct rpc_procinfo mnt_procedures[] = {
139[MNTPROC_MNT] = { 150 [MNTPROC_MNT] = {
140 .p_proc = MNTPROC_MNT, 151 .p_proc = MNTPROC_MNT,
141 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 152 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
142 .p_decode = (kxdrproc_t) xdr_decode_fhstatus, 153 .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
143 .p_arglen = MNT_dirpath_sz, 154 .p_arglen = MNT_dirpath_sz,
144 .p_replen = MNT_fhstatus_sz, 155 .p_replen = MNT_fhstatus_sz,
145 .p_statidx = MNTPROC_MNT, 156 .p_statidx = MNTPROC_MNT,
146 .p_name = "MOUNT", 157 .p_name = "MOUNT",
147 }, 158 },
148}; 159};
149 160
150static struct rpc_procinfo mnt3_procedures[] = { 161static struct rpc_procinfo mnt3_procedures[] = {
151[MOUNTPROC3_MNT] = { 162 [MOUNTPROC3_MNT] = {
152 .p_proc = MOUNTPROC3_MNT, 163 .p_proc = MOUNTPROC3_MNT,
153 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 164 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
154 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, 165 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
155 .p_arglen = MNT_dirpath_sz, 166 .p_arglen = MNT_dirpath_sz,
156 .p_replen = MNT_fhstatus3_sz, 167 .p_replen = MNT_fhstatus3_sz,
157 .p_statidx = MOUNTPROC3_MNT, 168 .p_statidx = MOUNTPROC3_MNT,
158 .p_name = "MOUNT", 169 .p_name = "MOUNT",
159 }, 170 },
160}; 171};
161 172
162 173
163static struct rpc_version mnt_version1 = { 174static struct rpc_version mnt_version1 = {
164 .number = 1, 175 .number = 1,
165 .nrprocs = 2, 176 .nrprocs = 2,
166 .procs = mnt_procedures 177 .procs = mnt_procedures,
167}; 178};
168 179
169static struct rpc_version mnt_version3 = { 180static struct rpc_version mnt_version3 = {
170 .number = 3, 181 .number = 3,
171 .nrprocs = 2, 182 .nrprocs = 2,
172 .procs = mnt3_procedures 183 .procs = mnt3_procedures,
173}; 184};
174 185
175static struct rpc_version * mnt_version[] = { 186static struct rpc_version *mnt_version[] = {
176 NULL, 187 NULL,
177 &mnt_version1, 188 &mnt_version1,
178 NULL, 189 NULL,
179 &mnt_version3, 190 &mnt_version3,
180}; 191};
181 192
182static struct rpc_stat mnt_stats; 193static struct rpc_stat mnt_stats;
183 194
184static struct rpc_program mnt_program = { 195static struct rpc_program mnt_program = {
185 .name = "mount", 196 .name = "mount",
186 .number = NFS_MNT_PROGRAM, 197 .number = NFS_MNT_PROGRAM,
187 .nrvers = ARRAY_SIZE(mnt_version), 198 .nrvers = ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index cd3ca7b5d3d..7fcc78f2aa7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
223static int 223static int
224nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 224nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
225{ 225{
226 struct rpc_auth *auth = req->rq_task->tk_auth; 226 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
227 unsigned int replen; 227 unsigned int replen;
228 u32 offset = (u32)args->offset; 228 u32 offset = (u32)args->offset;
229 u32 count = args->count; 229 u32 count = args->count;
@@ -380,7 +380,7 @@ static int
380nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) 380nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
381{ 381{
382 struct rpc_task *task = req->rq_task; 382 struct rpc_task *task = req->rq_task;
383 struct rpc_auth *auth = task->tk_auth; 383 struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth;
384 unsigned int replen; 384 unsigned int replen;
385 u32 count = args->count; 385 u32 count = args->count;
386 386
@@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
541static int 541static int
542nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) 542nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
543{ 543{
544 struct rpc_auth *auth = req->rq_task->tk_auth; 544 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
545 unsigned int replen; 545 unsigned int replen;
546 546
547 p = xdr_encode_fhandle(p, args->fh); 547 p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 45268d6def2..814d886b6aa 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -335,9 +335,7 @@ again:
335 * not sure this buys us anything (and I'd have 335 * not sure this buys us anything (and I'd have
336 * to revamp the NFSv3 XDR code) */ 336 * to revamp the NFSv3 XDR code) */
337 status = nfs3_proc_setattr(dentry, &fattr, sattr); 337 status = nfs3_proc_setattr(dentry, &fattr, sattr);
338 if (status == 0) 338 nfs_post_op_update_inode(dentry->d_inode, &fattr);
339 nfs_setattr_update_inode(dentry->d_inode, sattr);
340 nfs_refresh_inode(dentry->d_inode, &fattr);
341 dprintk("NFS reply setattr (post-create): %d\n", status); 339 dprintk("NFS reply setattr (post-create): %d\n", status);
342 } 340 }
343 if (status != 0) 341 if (status != 0)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b51df8eb9f0..b4647a22f34 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
319static int 319static int
320nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 320nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
321{ 321{
322 struct rpc_auth *auth = req->rq_task->tk_auth; 322 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
323 unsigned int replen; 323 unsigned int replen;
324 u32 count = args->count; 324 u32 count = args->count;
325 325
@@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
458static int 458static int
459nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) 459nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
460{ 460{
461 struct rpc_auth *auth = req->rq_task->tk_auth; 461 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
462 unsigned int replen; 462 unsigned int replen;
463 u32 count = args->count; 463 u32 count = args->count;
464 464
@@ -643,7 +643,7 @@ static int
643nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, 643nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
644 struct nfs3_getaclargs *args) 644 struct nfs3_getaclargs *args)
645{ 645{
646 struct rpc_auth *auth = req->rq_task->tk_auth; 646 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
647 unsigned int replen; 647 unsigned int replen;
648 648
649 p = xdr_encode_fhandle(p, args->fh); 649 p = xdr_encode_fhandle(p, args->fh);
@@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
773static int 773static int
774nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) 774nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
775{ 775{
776 struct rpc_auth *auth = req->rq_task->tk_auth; 776 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
777 unsigned int replen; 777 unsigned int replen;
778 778
779 p = xdr_encode_fhandle(p, args->fh); 779 p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cf3a17eb5c0..6c028e734fe 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
70 seqid->flags |= NFS_SEQID_CONFIRMED; 70 seqid->flags |= NFS_SEQID_CONFIRMED;
71} 71}
72 72
73struct nfs_unique_id {
74 struct rb_node rb_node;
75 __u64 id;
76};
77
73/* 78/*
74 * NFS4 state_owners and lock_owners are simply labels for ordered 79 * NFS4 state_owners and lock_owners are simply labels for ordered
75 * sequences of RPC calls. Their sole purpose is to provide once-only 80 * sequences of RPC calls. Their sole purpose is to provide once-only
76 * semantics by allowing the server to identify replayed requests. 81 * semantics by allowing the server to identify replayed requests.
77 */ 82 */
78struct nfs4_state_owner { 83struct nfs4_state_owner {
79 spinlock_t so_lock; 84 struct nfs_unique_id so_owner_id;
80 struct list_head so_list; /* per-clientid list of state_owners */
81 struct nfs_client *so_client; 85 struct nfs_client *so_client;
82 u32 so_id; /* 32-bit identifier, unique */ 86 struct nfs_server *so_server;
83 atomic_t so_count; 87 struct rb_node so_client_node;
84 88
85 struct rpc_cred *so_cred; /* Associated cred */ 89 struct rpc_cred *so_cred; /* Associated cred */
90
91 spinlock_t so_lock;
92 atomic_t so_count;
86 struct list_head so_states; 93 struct list_head so_states;
87 struct list_head so_delegations; 94 struct list_head so_delegations;
88 struct nfs_seqid_counter so_seqid; 95 struct nfs_seqid_counter so_seqid;
@@ -108,7 +115,7 @@ struct nfs4_lock_state {
108#define NFS_LOCK_INITIALIZED 1 115#define NFS_LOCK_INITIALIZED 1
109 int ls_flags; 116 int ls_flags;
110 struct nfs_seqid_counter ls_seqid; 117 struct nfs_seqid_counter ls_seqid;
111 u32 ls_id; 118 struct nfs_unique_id ls_id;
112 nfs4_stateid ls_stateid; 119 nfs4_stateid ls_stateid;
113 atomic_t ls_count; 120 atomic_t ls_count;
114}; 121};
@@ -116,7 +123,10 @@ struct nfs4_lock_state {
116/* bits for nfs4_state->flags */ 123/* bits for nfs4_state->flags */
117enum { 124enum {
118 LK_STATE_IN_USE, 125 LK_STATE_IN_USE,
119 NFS_DELEGATED_STATE, 126 NFS_DELEGATED_STATE, /* Current stateid is delegation */
127 NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
128 NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
129 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
120}; 130};
121 131
122struct nfs4_state { 132struct nfs4_state {
@@ -130,11 +140,14 @@ struct nfs4_state {
130 unsigned long flags; /* Do we hold any locks? */ 140 unsigned long flags; /* Do we hold any locks? */
131 spinlock_t state_lock; /* Protects the lock_states list */ 141 spinlock_t state_lock; /* Protects the lock_states list */
132 142
133 nfs4_stateid stateid; 143 seqlock_t seqlock; /* Protects the stateid/open_stateid */
144 nfs4_stateid stateid; /* Current stateid: may be delegation */
145 nfs4_stateid open_stateid; /* OPEN stateid */
134 146
135 unsigned int n_rdonly; 147 /* The following 3 fields are protected by owner->so_lock */
136 unsigned int n_wronly; 148 unsigned int n_rdonly; /* Number of read-only references */
137 unsigned int n_rdwr; 149 unsigned int n_wronly; /* Number of write-only references */
150 unsigned int n_rdwr; /* Number of read/write references */
138 int state; /* State on the server (R,W, or RW) */ 151 int state; /* State on the server (R,W, or RW) */
139 atomic_t count; 152 atomic_t count;
140}; 153};
@@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
165extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); 178extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
166extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); 179extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
167extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 180extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
168extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); 181extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
169extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 182extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
170extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); 183extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
171extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 184extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *);
189 202
190/* nfs4state.c */ 203/* nfs4state.c */
191struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); 204struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
192extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
193 205
194extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 206extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
195extern void nfs4_put_state_owner(struct nfs4_state_owner *); 207extern void nfs4_put_state_owner(struct nfs4_state_owner *);
196extern void nfs4_drop_state_owner(struct nfs4_state_owner *); 208extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
197extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 209extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
198extern void nfs4_put_open_state(struct nfs4_state *); 210extern void nfs4_put_open_state(struct nfs4_state *);
199extern void nfs4_close_state(struct nfs4_state *, mode_t); 211extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
200extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); 212extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
201extern void nfs4_schedule_state_recovery(struct nfs_client *); 213extern void nfs4_schedule_state_recovery(struct nfs_client *);
202extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 214extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
@@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1;
222 234
223#else 235#else
224 236
225#define nfs4_close_state(a, b) do { } while (0) 237#define nfs4_close_state(a, b, c) do { } while (0)
226 238
227#endif /* CONFIG_NFS_V4 */ 239#endif /* CONFIG_NFS_V4 */
228#endif /* __LINUX_FS_NFS_NFS4_FS.H */ 240#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 648e0ac0f90..fee2da856c9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *)
65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); 65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); 66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); 67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
68static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
68 69
69/* Prevent leaks of NFSv4 errors into userland */ 70/* Prevent leaks of NFSv4 errors into userland */
70int nfs4_map_errors(int err) 71int nfs4_map_errors(int err)
@@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
214} 215}
215 216
216struct nfs4_opendata { 217struct nfs4_opendata {
217 atomic_t count; 218 struct kref kref;
218 struct nfs_openargs o_arg; 219 struct nfs_openargs o_arg;
219 struct nfs_openres o_res; 220 struct nfs_openres o_res;
220 struct nfs_open_confirmargs c_arg; 221 struct nfs_open_confirmargs c_arg;
221 struct nfs_open_confirmres c_res; 222 struct nfs_open_confirmres c_res;
222 struct nfs_fattr f_attr; 223 struct nfs_fattr f_attr;
223 struct nfs_fattr dir_attr; 224 struct nfs_fattr dir_attr;
224 struct dentry *dentry; 225 struct path path;
225 struct dentry *dir; 226 struct dentry *dir;
226 struct nfs4_state_owner *owner; 227 struct nfs4_state_owner *owner;
228 struct nfs4_state *state;
227 struct iattr attrs; 229 struct iattr attrs;
228 unsigned long timestamp; 230 unsigned long timestamp;
231 unsigned int rpc_done : 1;
229 int rpc_status; 232 int rpc_status;
230 int cancelled; 233 int cancelled;
231}; 234};
232 235
233static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, 236
237static void nfs4_init_opendata_res(struct nfs4_opendata *p)
238{
239 p->o_res.f_attr = &p->f_attr;
240 p->o_res.dir_attr = &p->dir_attr;
241 p->o_res.server = p->o_arg.server;
242 nfs_fattr_init(&p->f_attr);
243 nfs_fattr_init(&p->dir_attr);
244}
245
246static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
234 struct nfs4_state_owner *sp, int flags, 247 struct nfs4_state_owner *sp, int flags,
235 const struct iattr *attrs) 248 const struct iattr *attrs)
236{ 249{
237 struct dentry *parent = dget_parent(dentry); 250 struct dentry *parent = dget_parent(path->dentry);
238 struct inode *dir = parent->d_inode; 251 struct inode *dir = parent->d_inode;
239 struct nfs_server *server = NFS_SERVER(dir); 252 struct nfs_server *server = NFS_SERVER(dir);
240 struct nfs4_opendata *p; 253 struct nfs4_opendata *p;
@@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
245 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); 258 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
246 if (p->o_arg.seqid == NULL) 259 if (p->o_arg.seqid == NULL)
247 goto err_free; 260 goto err_free;
248 atomic_set(&p->count, 1); 261 p->path.mnt = mntget(path->mnt);
249 p->dentry = dget(dentry); 262 p->path.dentry = dget(path->dentry);
250 p->dir = parent; 263 p->dir = parent;
251 p->owner = sp; 264 p->owner = sp;
252 atomic_inc(&sp->so_count); 265 atomic_inc(&sp->so_count);
253 p->o_arg.fh = NFS_FH(dir); 266 p->o_arg.fh = NFS_FH(dir);
254 p->o_arg.open_flags = flags, 267 p->o_arg.open_flags = flags,
255 p->o_arg.clientid = server->nfs_client->cl_clientid; 268 p->o_arg.clientid = server->nfs_client->cl_clientid;
256 p->o_arg.id = sp->so_id; 269 p->o_arg.id = sp->so_owner_id.id;
257 p->o_arg.name = &dentry->d_name; 270 p->o_arg.name = &p->path.dentry->d_name;
258 p->o_arg.server = server; 271 p->o_arg.server = server;
259 p->o_arg.bitmask = server->attr_bitmask; 272 p->o_arg.bitmask = server->attr_bitmask;
260 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 273 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
261 p->o_res.f_attr = &p->f_attr;
262 p->o_res.dir_attr = &p->dir_attr;
263 p->o_res.server = server;
264 nfs_fattr_init(&p->f_attr);
265 nfs_fattr_init(&p->dir_attr);
266 if (flags & O_EXCL) { 274 if (flags & O_EXCL) {
267 u32 *s = (u32 *) p->o_arg.u.verifier.data; 275 u32 *s = (u32 *) p->o_arg.u.verifier.data;
268 s[0] = jiffies; 276 s[0] = jiffies;
@@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
274 p->c_arg.fh = &p->o_res.fh; 282 p->c_arg.fh = &p->o_res.fh;
275 p->c_arg.stateid = &p->o_res.stateid; 283 p->c_arg.stateid = &p->o_res.stateid;
276 p->c_arg.seqid = p->o_arg.seqid; 284 p->c_arg.seqid = p->o_arg.seqid;
285 nfs4_init_opendata_res(p);
286 kref_init(&p->kref);
277 return p; 287 return p;
278err_free: 288err_free:
279 kfree(p); 289 kfree(p);
@@ -282,27 +292,25 @@ err:
282 return NULL; 292 return NULL;
283} 293}
284 294
285static void nfs4_opendata_free(struct nfs4_opendata *p) 295static void nfs4_opendata_free(struct kref *kref)
286{ 296{
287 if (p != NULL && atomic_dec_and_test(&p->count)) { 297 struct nfs4_opendata *p = container_of(kref,
288 nfs_free_seqid(p->o_arg.seqid); 298 struct nfs4_opendata, kref);
289 nfs4_put_state_owner(p->owner); 299
290 dput(p->dir); 300 nfs_free_seqid(p->o_arg.seqid);
291 dput(p->dentry); 301 if (p->state != NULL)
292 kfree(p); 302 nfs4_put_open_state(p->state);
293 } 303 nfs4_put_state_owner(p->owner);
304 dput(p->dir);
305 dput(p->path.dentry);
306 mntput(p->path.mnt);
307 kfree(p);
294} 308}
295 309
296/* Helper for asynchronous RPC calls */ 310static void nfs4_opendata_put(struct nfs4_opendata *p)
297static int nfs4_call_async(struct rpc_clnt *clnt,
298 const struct rpc_call_ops *tk_ops, void *calldata)
299{ 311{
300 struct rpc_task *task; 312 if (p != NULL)
301 313 kref_put(&p->kref, nfs4_opendata_free);
302 if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
303 return -ENOMEM;
304 rpc_execute(task);
305 return 0;
306} 314}
307 315
308static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) 316static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
@@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
316 return ret; 324 return ret;
317} 325}
318 326
319static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) 327static int can_open_cached(struct nfs4_state *state, int mode)
328{
329 int ret = 0;
330 switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
331 case FMODE_READ:
332 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
333 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
334 break;
335 case FMODE_WRITE:
336 ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
337 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
338 break;
339 case FMODE_READ|FMODE_WRITE:
340 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
341 }
342 return ret;
343}
344
345static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
346{
347 if ((delegation->type & open_flags) != open_flags)
348 return 0;
349 if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM)
350 return 0;
351 return 1;
352}
353
354static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
320{ 355{
321 switch (open_flags) { 356 switch (open_flags) {
322 case FMODE_WRITE: 357 case FMODE_WRITE:
@@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_
328 case FMODE_READ|FMODE_WRITE: 363 case FMODE_READ|FMODE_WRITE:
329 state->n_rdwr++; 364 state->n_rdwr++;
330 } 365 }
366 nfs4_state_set_mode_locked(state, state->state | open_flags);
331} 367}
332 368
333static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) 369static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
334{ 370{
335 struct inode *inode = state->inode; 371 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
372 memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
373 memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
374 switch (open_flags) {
375 case FMODE_READ:
376 set_bit(NFS_O_RDONLY_STATE, &state->flags);
377 break;
378 case FMODE_WRITE:
379 set_bit(NFS_O_WRONLY_STATE, &state->flags);
380 break;
381 case FMODE_READ|FMODE_WRITE:
382 set_bit(NFS_O_RDWR_STATE, &state->flags);
383 }
384}
385
386static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
387{
388 write_seqlock(&state->seqlock);
389 nfs_set_open_stateid_locked(state, stateid, open_flags);
390 write_sequnlock(&state->seqlock);
391}
336 392
393static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags)
394{
337 open_flags &= (FMODE_READ|FMODE_WRITE); 395 open_flags &= (FMODE_READ|FMODE_WRITE);
338 /* Protect against nfs4_find_state_byowner() */ 396 /*
397 * Protect the call to nfs4_state_set_mode_locked and
398 * serialise the stateid update
399 */
400 write_seqlock(&state->seqlock);
401 if (deleg_stateid != NULL) {
402 memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
403 set_bit(NFS_DELEGATED_STATE, &state->flags);
404 }
405 if (open_stateid != NULL)
406 nfs_set_open_stateid_locked(state, open_stateid, open_flags);
407 write_sequnlock(&state->seqlock);
339 spin_lock(&state->owner->so_lock); 408 spin_lock(&state->owner->so_lock);
340 spin_lock(&inode->i_lock);
341 memcpy(&state->stateid, stateid, sizeof(state->stateid));
342 update_open_stateflags(state, open_flags); 409 update_open_stateflags(state, open_flags);
343 nfs4_state_set_mode_locked(state, state->state | open_flags);
344 spin_unlock(&inode->i_lock);
345 spin_unlock(&state->owner->so_lock); 410 spin_unlock(&state->owner->so_lock);
346} 411}
347 412
413static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
414{
415 struct nfs_delegation *delegation;
416
417 rcu_read_lock();
418 delegation = rcu_dereference(NFS_I(inode)->delegation);
419 if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
420 rcu_read_unlock();
421 return;
422 }
423 rcu_read_unlock();
424 nfs_inode_return_delegation(inode);
425}
426
427static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
428{
429 struct nfs4_state *state = opendata->state;
430 struct nfs_inode *nfsi = NFS_I(state->inode);
431 struct nfs_delegation *delegation;
432 int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
433 nfs4_stateid stateid;
434 int ret = -EAGAIN;
435
436 rcu_read_lock();
437 delegation = rcu_dereference(nfsi->delegation);
438 for (;;) {
439 if (can_open_cached(state, open_mode)) {
440 spin_lock(&state->owner->so_lock);
441 if (can_open_cached(state, open_mode)) {
442 update_open_stateflags(state, open_mode);
443 spin_unlock(&state->owner->so_lock);
444 rcu_read_unlock();
445 goto out_return_state;
446 }
447 spin_unlock(&state->owner->so_lock);
448 }
449 if (delegation == NULL)
450 break;
451 if (!can_open_delegated(delegation, open_mode))
452 break;
453 /* Save the delegation */
454 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
455 rcu_read_unlock();
456 lock_kernel();
457 ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
458 unlock_kernel();
459 if (ret != 0)
460 goto out;
461 ret = -EAGAIN;
462 rcu_read_lock();
463 delegation = rcu_dereference(nfsi->delegation);
464 /* If no delegation, try a cached open */
465 if (delegation == NULL)
466 continue;
467 /* Is the delegation still valid? */
468 if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
469 continue;
470 rcu_read_unlock();
471 update_open_stateid(state, NULL, &stateid, open_mode);
472 goto out_return_state;
473 }
474 rcu_read_unlock();
475out:
476 return ERR_PTR(ret);
477out_return_state:
478 atomic_inc(&state->count);
479 return state;
480}
481
348static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) 482static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
349{ 483{
350 struct inode *inode; 484 struct inode *inode;
351 struct nfs4_state *state = NULL; 485 struct nfs4_state *state = NULL;
486 struct nfs_delegation *delegation;
487 nfs4_stateid *deleg_stateid = NULL;
488 int ret;
352 489
353 if (!(data->f_attr.valid & NFS_ATTR_FATTR)) 490 if (!data->rpc_done) {
491 state = nfs4_try_open_cached(data);
354 goto out; 492 goto out;
493 }
494
495 ret = -EAGAIN;
496 if (!(data->f_attr.valid & NFS_ATTR_FATTR))
497 goto err;
355 inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); 498 inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
499 ret = PTR_ERR(inode);
356 if (IS_ERR(inode)) 500 if (IS_ERR(inode))
357 goto out; 501 goto err;
502 ret = -ENOMEM;
358 state = nfs4_get_open_state(inode, data->owner); 503 state = nfs4_get_open_state(inode, data->owner);
359 if (state == NULL) 504 if (state == NULL)
360 goto put_inode; 505 goto err_put_inode;
361 update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags); 506 if (data->o_res.delegation_type != 0) {
362put_inode: 507 int delegation_flags = 0;
508
509 rcu_read_lock();
510 delegation = rcu_dereference(NFS_I(inode)->delegation);
511 if (delegation)
512 delegation_flags = delegation->flags;
513 rcu_read_unlock();
514 if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
515 nfs_inode_set_delegation(state->inode,
516 data->owner->so_cred,
517 &data->o_res);
518 else
519 nfs_inode_reclaim_delegation(state->inode,
520 data->owner->so_cred,
521 &data->o_res);
522 }
523 rcu_read_lock();
524 delegation = rcu_dereference(NFS_I(inode)->delegation);
525 if (delegation != NULL)
526 deleg_stateid = &delegation->stateid;
527 update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
528 rcu_read_unlock();
363 iput(inode); 529 iput(inode);
364out: 530out:
365 return state; 531 return state;
532err_put_inode:
533 iput(inode);
534err:
535 return ERR_PTR(ret);
366} 536}
367 537
368static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) 538static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
382 return ERR_PTR(-ENOENT); 552 return ERR_PTR(-ENOENT);
383} 553}
384 554
385static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid) 555static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
386{ 556{
557 struct nfs4_state *newstate;
387 int ret; 558 int ret;
388 559
389 opendata->o_arg.open_flags = openflags; 560 opendata->o_arg.open_flags = openflags;
561 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
562 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
563 nfs4_init_opendata_res(opendata);
390 ret = _nfs4_proc_open(opendata); 564 ret = _nfs4_proc_open(opendata);
391 if (ret != 0) 565 if (ret != 0)
392 return ret; 566 return ret;
393 memcpy(stateid->data, opendata->o_res.stateid.data, 567 newstate = nfs4_opendata_to_nfs4_state(opendata);
394 sizeof(stateid->data)); 568 if (IS_ERR(newstate))
569 return PTR_ERR(newstate);
570 nfs4_close_state(&opendata->path, newstate, openflags);
571 *res = newstate;
395 return 0; 572 return 0;
396} 573}
397 574
398static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) 575static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
399{ 576{
400 nfs4_stateid stateid;
401 struct nfs4_state *newstate; 577 struct nfs4_state *newstate;
402 int mode = 0;
403 int delegation = 0;
404 int ret; 578 int ret;
405 579
406 /* memory barrier prior to reading state->n_* */ 580 /* memory barrier prior to reading state->n_* */
581 clear_bit(NFS_DELEGATED_STATE, &state->flags);
407 smp_rmb(); 582 smp_rmb();
408 if (state->n_rdwr != 0) { 583 if (state->n_rdwr != 0) {
409 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid); 584 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
410 if (ret != 0) 585 if (ret != 0)
411 return ret; 586 return ret;
412 mode |= FMODE_READ|FMODE_WRITE; 587 if (newstate != state)
413 if (opendata->o_res.delegation_type != 0) 588 return -ESTALE;
414 delegation = opendata->o_res.delegation_type;
415 smp_rmb();
416 } 589 }
417 if (state->n_wronly != 0) { 590 if (state->n_wronly != 0) {
418 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid); 591 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
419 if (ret != 0) 592 if (ret != 0)
420 return ret; 593 return ret;
421 mode |= FMODE_WRITE; 594 if (newstate != state)
422 if (opendata->o_res.delegation_type != 0) 595 return -ESTALE;
423 delegation = opendata->o_res.delegation_type;
424 smp_rmb();
425 } 596 }
426 if (state->n_rdonly != 0) { 597 if (state->n_rdonly != 0) {
427 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid); 598 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
428 if (ret != 0) 599 if (ret != 0)
429 return ret; 600 return ret;
430 mode |= FMODE_READ; 601 if (newstate != state)
602 return -ESTALE;
431 } 603 }
432 clear_bit(NFS_DELEGATED_STATE, &state->flags); 604 /*
433 if (mode == 0) 605 * We may have performed cached opens for all three recoveries.
434 return 0; 606 * Check if we need to update the current stateid.
435 if (opendata->o_res.delegation_type == 0) 607 */
436 opendata->o_res.delegation_type = delegation; 608 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
437 opendata->o_arg.open_flags |= mode; 609 memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
438 newstate = nfs4_opendata_to_nfs4_state(opendata); 610 write_seqlock(&state->seqlock);
439 if (newstate != NULL) { 611 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
440 if (opendata->o_res.delegation_type != 0) { 612 memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
441 struct nfs_inode *nfsi = NFS_I(newstate->inode); 613 write_sequnlock(&state->seqlock);
442 int delegation_flags = 0;
443 if (nfsi->delegation)
444 delegation_flags = nfsi->delegation->flags;
445 if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
446 nfs_inode_set_delegation(newstate->inode,
447 opendata->owner->so_cred,
448 &opendata->o_res);
449 else
450 nfs_inode_reclaim_delegation(newstate->inode,
451 opendata->owner->so_cred,
452 &opendata->o_res);
453 }
454 nfs4_close_state(newstate, opendata->o_arg.open_flags);
455 } 614 }
456 if (newstate != state)
457 return -ESTALE;
458 return 0; 615 return 0;
459} 616}
460 617
@@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
462 * OPEN_RECLAIM: 619 * OPEN_RECLAIM:
463 * reclaim state on the server after a reboot. 620 * reclaim state on the server after a reboot.
464 */ 621 */
465static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 622static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
466{ 623{
467 struct nfs_delegation *delegation = NFS_I(state->inode)->delegation; 624 struct nfs_delegation *delegation;
468 struct nfs4_opendata *opendata; 625 struct nfs4_opendata *opendata;
469 int delegation_type = 0; 626 int delegation_type = 0;
470 int status; 627 int status;
471 628
472 if (delegation != NULL) { 629 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
473 if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
474 memcpy(&state->stateid, &delegation->stateid,
475 sizeof(state->stateid));
476 set_bit(NFS_DELEGATED_STATE, &state->flags);
477 return 0;
478 }
479 delegation_type = delegation->type;
480 }
481 opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
482 if (opendata == NULL) 630 if (opendata == NULL)
483 return -ENOMEM; 631 return -ENOMEM;
484 opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; 632 opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
485 opendata->o_arg.fh = NFS_FH(state->inode); 633 opendata->o_arg.fh = NFS_FH(state->inode);
486 nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh); 634 nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
635 rcu_read_lock();
636 delegation = rcu_dereference(NFS_I(state->inode)->delegation);
637 if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
638 delegation_type = delegation->flags;
639 rcu_read_unlock();
487 opendata->o_arg.u.delegation_type = delegation_type; 640 opendata->o_arg.u.delegation_type = delegation_type;
488 status = nfs4_open_recover(opendata, state); 641 status = nfs4_open_recover(opendata, state);
489 nfs4_opendata_free(opendata); 642 nfs4_opendata_put(opendata);
490 return status; 643 return status;
491} 644}
492 645
493static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 646static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
494{ 647{
495 struct nfs_server *server = NFS_SERVER(state->inode); 648 struct nfs_server *server = NFS_SERVER(state->inode);
496 struct nfs4_exception exception = { }; 649 struct nfs4_exception exception = { };
497 int err; 650 int err;
498 do { 651 do {
499 err = _nfs4_do_open_reclaim(sp, state, dentry); 652 err = _nfs4_do_open_reclaim(ctx, state);
500 if (err != -NFS4ERR_DELAY) 653 if (err != -NFS4ERR_DELAY)
501 break; 654 break;
502 nfs4_handle_exception(server, err, &exception); 655 nfs4_handle_exception(server, err, &exception);
@@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
512 ctx = nfs4_state_find_open_context(state); 665 ctx = nfs4_state_find_open_context(state);
513 if (IS_ERR(ctx)) 666 if (IS_ERR(ctx))
514 return PTR_ERR(ctx); 667 return PTR_ERR(ctx);
515 ret = nfs4_do_open_reclaim(sp, state, ctx->dentry); 668 ret = nfs4_do_open_reclaim(ctx, state);
516 put_nfs_open_context(ctx); 669 put_nfs_open_context(ctx);
517 return ret; 670 return ret;
518} 671}
519 672
520static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) 673static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
521{ 674{
522 struct nfs4_state_owner *sp = state->owner; 675 struct nfs4_state_owner *sp = state->owner;
523 struct nfs4_opendata *opendata; 676 struct nfs4_opendata *opendata;
524 int ret; 677 int ret;
525 678
526 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) 679 opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
527 return 0;
528 opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
529 if (opendata == NULL) 680 if (opendata == NULL)
530 return -ENOMEM; 681 return -ENOMEM;
531 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; 682 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
532 memcpy(opendata->o_arg.u.delegation.data, state->stateid.data, 683 memcpy(opendata->o_arg.u.delegation.data, stateid->data,
533 sizeof(opendata->o_arg.u.delegation.data)); 684 sizeof(opendata->o_arg.u.delegation.data));
534 ret = nfs4_open_recover(opendata, state); 685 ret = nfs4_open_recover(opendata, state);
535 nfs4_opendata_free(opendata); 686 nfs4_opendata_put(opendata);
536 return ret; 687 return ret;
537} 688}
538 689
539int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) 690int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
540{ 691{
541 struct nfs4_exception exception = { }; 692 struct nfs4_exception exception = { };
542 struct nfs_server *server = NFS_SERVER(dentry->d_inode); 693 struct nfs_server *server = NFS_SERVER(state->inode);
543 int err; 694 int err;
544 do { 695 do {
545 err = _nfs4_open_delegation_recall(dentry, state); 696 err = _nfs4_open_delegation_recall(ctx, state, stateid);
546 switch (err) { 697 switch (err) {
547 case 0: 698 case 0:
548 return err; 699 return err;
@@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
582 memcpy(data->o_res.stateid.data, data->c_res.stateid.data, 733 memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
583 sizeof(data->o_res.stateid.data)); 734 sizeof(data->o_res.stateid.data));
584 renew_lease(data->o_res.server, data->timestamp); 735 renew_lease(data->o_res.server, data->timestamp);
736 data->rpc_done = 1;
585 } 737 }
586 nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
587 nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status); 738 nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
739 nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
588} 740}
589 741
590static void nfs4_open_confirm_release(void *calldata) 742static void nfs4_open_confirm_release(void *calldata)
@@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata)
596 if (data->cancelled == 0) 748 if (data->cancelled == 0)
597 goto out_free; 749 goto out_free;
598 /* In case of error, no cleanup! */ 750 /* In case of error, no cleanup! */
599 if (data->rpc_status != 0) 751 if (!data->rpc_done)
600 goto out_free; 752 goto out_free;
601 nfs_confirm_seqid(&data->owner->so_seqid, 0); 753 nfs_confirm_seqid(&data->owner->so_seqid, 0);
602 state = nfs4_opendata_to_nfs4_state(data); 754 state = nfs4_opendata_to_nfs4_state(data);
603 if (state != NULL) 755 if (!IS_ERR(state))
604 nfs4_close_state(state, data->o_arg.open_flags); 756 nfs4_close_state(&data->path, state, data->o_arg.open_flags);
605out_free: 757out_free:
606 nfs4_opendata_free(data); 758 nfs4_opendata_put(data);
607} 759}
608 760
609static const struct rpc_call_ops nfs4_open_confirm_ops = { 761static const struct rpc_call_ops nfs4_open_confirm_ops = {
@@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
621 struct rpc_task *task; 773 struct rpc_task *task;
622 int status; 774 int status;
623 775
624 atomic_inc(&data->count); 776 kref_get(&data->kref);
625 /* 777 data->rpc_done = 0;
626 * If rpc_run_task() ends up calling ->rpc_release(), we 778 data->rpc_status = 0;
627 * want to ensure that it takes the 'error' code path.
628 */
629 data->rpc_status = -ENOMEM;
630 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); 779 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
631 if (IS_ERR(task)) 780 if (IS_ERR(task))
632 return PTR_ERR(task); 781 return PTR_ERR(task);
@@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
653 802
654 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) 803 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
655 return; 804 return;
805 /*
806 * Check if we still need to send an OPEN call, or if we can use
807 * a delegation instead.
808 */
809 if (data->state != NULL) {
810 struct nfs_delegation *delegation;
811
812 if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
813 goto out_no_action;
814 rcu_read_lock();
815 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
816 if (delegation != NULL &&
817 (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) {
818 rcu_read_unlock();
819 goto out_no_action;
820 }
821 rcu_read_unlock();
822 }
656 /* Update sequence id. */ 823 /* Update sequence id. */
657 data->o_arg.id = sp->so_id; 824 data->o_arg.id = sp->so_owner_id.id;
658 data->o_arg.clientid = sp->so_client->cl_clientid; 825 data->o_arg.clientid = sp->so_client->cl_clientid;
659 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) 826 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
660 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; 827 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
661 data->timestamp = jiffies; 828 data->timestamp = jiffies;
662 rpc_call_setup(task, &msg, 0); 829 rpc_call_setup(task, &msg, 0);
830 return;
831out_no_action:
832 task->tk_action = NULL;
833
663} 834}
664 835
665static void nfs4_open_done(struct rpc_task *task, void *calldata) 836static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
683 data->rpc_status = -ENOTDIR; 854 data->rpc_status = -ENOTDIR;
684 } 855 }
685 renew_lease(data->o_res.server, data->timestamp); 856 renew_lease(data->o_res.server, data->timestamp);
857 if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
858 nfs_confirm_seqid(&data->owner->so_seqid, 0);
686 } 859 }
687 nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid); 860 nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
861 data->rpc_done = 1;
688} 862}
689 863
690static void nfs4_open_release(void *calldata) 864static void nfs4_open_release(void *calldata)
@@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata)
696 if (data->cancelled == 0) 870 if (data->cancelled == 0)
697 goto out_free; 871 goto out_free;
698 /* In case of error, no cleanup! */ 872 /* In case of error, no cleanup! */
699 if (data->rpc_status != 0) 873 if (data->rpc_status != 0 || !data->rpc_done)
700 goto out_free; 874 goto out_free;
701 /* In case we need an open_confirm, no cleanup! */ 875 /* In case we need an open_confirm, no cleanup! */
702 if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) 876 if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
703 goto out_free; 877 goto out_free;
704 nfs_confirm_seqid(&data->owner->so_seqid, 0); 878 nfs_confirm_seqid(&data->owner->so_seqid, 0);
705 state = nfs4_opendata_to_nfs4_state(data); 879 state = nfs4_opendata_to_nfs4_state(data);
706 if (state != NULL) 880 if (!IS_ERR(state))
707 nfs4_close_state(state, data->o_arg.open_flags); 881 nfs4_close_state(&data->path, state, data->o_arg.open_flags);
708out_free: 882out_free:
709 nfs4_opendata_free(data); 883 nfs4_opendata_put(data);
710} 884}
711 885
712static const struct rpc_call_ops nfs4_open_ops = { 886static const struct rpc_call_ops nfs4_open_ops = {
@@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
727 struct rpc_task *task; 901 struct rpc_task *task;
728 int status; 902 int status;
729 903
730 atomic_inc(&data->count); 904 kref_get(&data->kref);
731 /* 905 data->rpc_done = 0;
732 * If rpc_run_task() ends up calling ->rpc_release(), we 906 data->rpc_status = 0;
733 * want to ensure that it takes the 'error' code path. 907 data->cancelled = 0;
734 */
735 data->rpc_status = -ENOMEM;
736 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); 908 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
737 if (IS_ERR(task)) 909 if (IS_ERR(task))
738 return PTR_ERR(task); 910 return PTR_ERR(task);
@@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
743 } else 915 } else
744 status = data->rpc_status; 916 status = data->rpc_status;
745 rpc_put_task(task); 917 rpc_put_task(task);
746 if (status != 0) 918 if (status != 0 || !data->rpc_done)
747 return status; 919 return status;
748 920
749 if (o_arg->open_flags & O_CREAT) { 921 if (o_arg->open_flags & O_CREAT) {
@@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
756 if (status != 0) 928 if (status != 0)
757 return status; 929 return status;
758 } 930 }
759 nfs_confirm_seqid(&data->owner->so_seqid, 0);
760 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) 931 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
761 return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); 932 return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
762 return 0; 933 return 0;
@@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
772 mask |= MAY_READ; 943 mask |= MAY_READ;
773 if (openflags & FMODE_WRITE) 944 if (openflags & FMODE_WRITE)
774 mask |= MAY_WRITE; 945 mask |= MAY_WRITE;
946 if (openflags & FMODE_EXEC)
947 mask |= MAY_EXEC;
775 status = nfs_access_get_cached(inode, cred, &cache); 948 status = nfs_access_get_cached(inode, cred, &cache);
776 if (status == 0) 949 if (status == 0)
777 goto out; 950 goto out;
@@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
811 * reclaim state on the server after a network partition. 984 * reclaim state on the server after a network partition.
812 * Assumes caller holds the appropriate lock 985 * Assumes caller holds the appropriate lock
813 */ 986 */
814static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 987static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
815{ 988{
816 struct inode *inode = state->inode;
817 struct nfs_delegation *delegation = NFS_I(inode)->delegation;
818 struct nfs4_opendata *opendata; 989 struct nfs4_opendata *opendata;
819 int openflags = state->state & (FMODE_READ|FMODE_WRITE);
820 int ret; 990 int ret;
821 991
822 if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { 992 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
823 ret = _nfs4_do_access(inode, sp->so_cred, openflags);
824 if (ret < 0)
825 return ret;
826 memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
827 set_bit(NFS_DELEGATED_STATE, &state->flags);
828 return 0;
829 }
830 opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
831 if (opendata == NULL) 993 if (opendata == NULL)
832 return -ENOMEM; 994 return -ENOMEM;
833 ret = nfs4_open_recover(opendata, state); 995 ret = nfs4_open_recover(opendata, state);
834 if (ret == -ESTALE) { 996 if (ret == -ESTALE) {
835 /* Invalidate the state owner so we don't ever use it again */ 997 /* Invalidate the state owner so we don't ever use it again */
836 nfs4_drop_state_owner(sp); 998 nfs4_drop_state_owner(state->owner);
837 d_drop(dentry); 999 d_drop(ctx->path.dentry);
838 } 1000 }
839 nfs4_opendata_free(opendata); 1001 nfs4_opendata_put(opendata);
840 return ret; 1002 return ret;
841} 1003}
842 1004
843static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) 1005static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
844{ 1006{
845 struct nfs_server *server = NFS_SERVER(dentry->d_inode); 1007 struct nfs_server *server = NFS_SERVER(state->inode);
846 struct nfs4_exception exception = { }; 1008 struct nfs4_exception exception = { };
847 int err; 1009 int err;
848 1010
849 do { 1011 do {
850 err = _nfs4_open_expired(sp, state, dentry); 1012 err = _nfs4_open_expired(ctx, state);
851 if (err == -NFS4ERR_DELAY) 1013 if (err == -NFS4ERR_DELAY)
852 nfs4_handle_exception(server, err, &exception); 1014 nfs4_handle_exception(server, err, &exception);
853 } while (exception.retry); 1015 } while (exception.retry);
@@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
862 ctx = nfs4_state_find_open_context(state); 1024 ctx = nfs4_state_find_open_context(state);
863 if (IS_ERR(ctx)) 1025 if (IS_ERR(ctx))
864 return PTR_ERR(ctx); 1026 return PTR_ERR(ctx);
865 ret = nfs4_do_open_expired(sp, state, ctx->dentry); 1027 ret = nfs4_do_open_expired(ctx, state);
866 put_nfs_open_context(ctx); 1028 put_nfs_open_context(ctx);
867 return ret; 1029 return ret;
868} 1030}
869 1031
870/* 1032/*
871 * Returns a referenced nfs4_state if there is an open delegation on the file 1033 * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
1034 * fields corresponding to attributes that were used to store the verifier.
1035 * Make sure we clobber those fields in the later setattr call
872 */ 1036 */
873static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) 1037static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
874{
875 struct nfs_delegation *delegation;
876 struct nfs_server *server = NFS_SERVER(inode);
877 struct nfs_client *clp = server->nfs_client;
878 struct nfs_inode *nfsi = NFS_I(inode);
879 struct nfs4_state_owner *sp = NULL;
880 struct nfs4_state *state = NULL;
881 int open_flags = flags & (FMODE_READ|FMODE_WRITE);
882 int err;
883
884 err = -ENOMEM;
885 if (!(sp = nfs4_get_state_owner(server, cred))) {
886 dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
887 return err;
888 }
889 err = nfs4_recover_expired_lease(server);
890 if (err != 0)
891 goto out_put_state_owner;
892 /* Protect against reboot recovery - NOTE ORDER! */
893 down_read(&clp->cl_sem);
894 /* Protect against delegation recall */
895 down_read(&nfsi->rwsem);
896 delegation = NFS_I(inode)->delegation;
897 err = -ENOENT;
898 if (delegation == NULL || (delegation->type & open_flags) != open_flags)
899 goto out_err;
900 err = -ENOMEM;
901 state = nfs4_get_open_state(inode, sp);
902 if (state == NULL)
903 goto out_err;
904
905 err = -ENOENT;
906 if ((state->state & open_flags) == open_flags) {
907 spin_lock(&inode->i_lock);
908 update_open_stateflags(state, open_flags);
909 spin_unlock(&inode->i_lock);
910 goto out_ok;
911 } else if (state->state != 0)
912 goto out_put_open_state;
913
914 lock_kernel();
915 err = _nfs4_do_access(inode, cred, open_flags);
916 unlock_kernel();
917 if (err != 0)
918 goto out_put_open_state;
919 set_bit(NFS_DELEGATED_STATE, &state->flags);
920 update_open_stateid(state, &delegation->stateid, open_flags);
921out_ok:
922 nfs4_put_state_owner(sp);
923 up_read(&nfsi->rwsem);
924 up_read(&clp->cl_sem);
925 *res = state;
926 return 0;
927out_put_open_state:
928 nfs4_put_open_state(state);
929out_err:
930 up_read(&nfsi->rwsem);
931 up_read(&clp->cl_sem);
932 if (err != -EACCES)
933 nfs_inode_return_delegation(inode);
934out_put_state_owner:
935 nfs4_put_state_owner(sp);
936 return err;
937}
938
939static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
940{ 1038{
941 struct nfs4_exception exception = { }; 1039 if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
942 struct nfs4_state *res = ERR_PTR(-EIO); 1040 !(sattr->ia_valid & ATTR_ATIME_SET))
943 int err; 1041 sattr->ia_valid |= ATTR_ATIME;
944 1042
945 do { 1043 if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
946 err = _nfs4_open_delegated(inode, flags, cred, &res); 1044 !(sattr->ia_valid & ATTR_MTIME_SET))
947 if (err == 0) 1045 sattr->ia_valid |= ATTR_MTIME;
948 break;
949 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
950 err, &exception));
951 } while (exception.retry);
952 return res;
953} 1046}
954 1047
955/* 1048/*
956 * Returns a referenced nfs4_state 1049 * Returns a referenced nfs4_state
957 */ 1050 */
958static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) 1051static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
959{ 1052{
960 struct nfs4_state_owner *sp; 1053 struct nfs4_state_owner *sp;
961 struct nfs4_state *state = NULL; 1054 struct nfs4_state *state = NULL;
962 struct nfs_server *server = NFS_SERVER(dir); 1055 struct nfs_server *server = NFS_SERVER(dir);
963 struct nfs_client *clp = server->nfs_client; 1056 struct nfs_client *clp = server->nfs_client;
964 struct nfs4_opendata *opendata; 1057 struct nfs4_opendata *opendata;
965 int status; 1058 int status;
966 1059
967 /* Protect against reboot recovery conflicts */ 1060 /* Protect against reboot recovery conflicts */
968 status = -ENOMEM; 1061 status = -ENOMEM;
@@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
973 status = nfs4_recover_expired_lease(server); 1066 status = nfs4_recover_expired_lease(server);
974 if (status != 0) 1067 if (status != 0)
975 goto err_put_state_owner; 1068 goto err_put_state_owner;
1069 if (path->dentry->d_inode != NULL)
1070 nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
976 down_read(&clp->cl_sem); 1071 down_read(&clp->cl_sem);
977 status = -ENOMEM; 1072 status = -ENOMEM;
978 opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); 1073 opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
979 if (opendata == NULL) 1074 if (opendata == NULL)
980 goto err_release_rwsem; 1075 goto err_release_rwsem;
981 1076
1077 if (path->dentry->d_inode != NULL)
1078 opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
1079
982 status = _nfs4_proc_open(opendata); 1080 status = _nfs4_proc_open(opendata);
983 if (status != 0) 1081 if (status != 0)
984 goto err_opendata_free; 1082 goto err_opendata_put;
1083
1084 if (opendata->o_arg.open_flags & O_EXCL)
1085 nfs4_exclusive_attrset(opendata, sattr);
985 1086
986 status = -ENOMEM;
987 state = nfs4_opendata_to_nfs4_state(opendata); 1087 state = nfs4_opendata_to_nfs4_state(opendata);
988 if (state == NULL) 1088 status = PTR_ERR(state);
989 goto err_opendata_free; 1089 if (IS_ERR(state))
990 if (opendata->o_res.delegation_type != 0) 1090 goto err_opendata_put;
991 nfs_inode_set_delegation(state->inode, cred, &opendata->o_res); 1091 nfs4_opendata_put(opendata);
992 nfs4_opendata_free(opendata);
993 nfs4_put_state_owner(sp); 1092 nfs4_put_state_owner(sp);
994 up_read(&clp->cl_sem); 1093 up_read(&clp->cl_sem);
995 *res = state; 1094 *res = state;
996 return 0; 1095 return 0;
997err_opendata_free: 1096err_opendata_put:
998 nfs4_opendata_free(opendata); 1097 nfs4_opendata_put(opendata);
999err_release_rwsem: 1098err_release_rwsem:
1000 up_read(&clp->cl_sem); 1099 up_read(&clp->cl_sem);
1001err_put_state_owner: 1100err_put_state_owner:
@@ -1006,14 +1105,14 @@ out_err:
1006} 1105}
1007 1106
1008 1107
1009static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) 1108static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
1010{ 1109{
1011 struct nfs4_exception exception = { }; 1110 struct nfs4_exception exception = { };
1012 struct nfs4_state *res; 1111 struct nfs4_state *res;
1013 int status; 1112 int status;
1014 1113
1015 do { 1114 do {
1016 status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); 1115 status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
1017 if (status == 0) 1116 if (status == 0)
1018 break; 1117 break;
1019 /* NOTE: BAD_SEQID means the server and client disagree about the 1118 /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
1028 * the user though... 1127 * the user though...
1029 */ 1128 */
1030 if (status == -NFS4ERR_BAD_SEQID) { 1129 if (status == -NFS4ERR_BAD_SEQID) {
1031 printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); 1130 printk(KERN_WARNING "NFS: v4 server %s "
1131 " returned a bad sequence-id error!\n",
1132 NFS_SERVER(dir)->nfs_client->cl_hostname);
1032 exception.retry = 1; 1133 exception.retry = 1;
1033 continue; 1134 continue;
1034 } 1135 }
@@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
1042 exception.retry = 1; 1143 exception.retry = 1;
1043 continue; 1144 continue;
1044 } 1145 }
1146 if (status == -EAGAIN) {
1147 /* We must have found a delegation */
1148 exception.retry = 1;
1149 continue;
1150 }
1045 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), 1151 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
1046 status, &exception)); 1152 status, &exception));
1047 } while (exception.retry); 1153 } while (exception.retry);
@@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
1101} 1207}
1102 1208
1103struct nfs4_closedata { 1209struct nfs4_closedata {
1210 struct path path;
1104 struct inode *inode; 1211 struct inode *inode;
1105 struct nfs4_state *state; 1212 struct nfs4_state *state;
1106 struct nfs_closeargs arg; 1213 struct nfs_closeargs arg;
@@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data)
1117 nfs4_put_open_state(calldata->state); 1224 nfs4_put_open_state(calldata->state);
1118 nfs_free_seqid(calldata->arg.seqid); 1225 nfs_free_seqid(calldata->arg.seqid);
1119 nfs4_put_state_owner(sp); 1226 nfs4_put_state_owner(sp);
1227 dput(calldata->path.dentry);
1228 mntput(calldata->path.mnt);
1120 kfree(calldata); 1229 kfree(calldata);
1121} 1230}
1122 1231
@@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1134 nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); 1243 nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
1135 switch (task->tk_status) { 1244 switch (task->tk_status) {
1136 case 0: 1245 case 0:
1137 memcpy(&state->stateid, &calldata->res.stateid, 1246 nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags);
1138 sizeof(state->stateid));
1139 renew_lease(server, calldata->timestamp); 1247 renew_lease(server, calldata->timestamp);
1140 break; 1248 break;
1141 case -NFS4ERR_STALE_STATEID: 1249 case -NFS4ERR_STALE_STATEID:
@@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1160 .rpc_resp = &calldata->res, 1268 .rpc_resp = &calldata->res,
1161 .rpc_cred = state->owner->so_cred, 1269 .rpc_cred = state->owner->so_cred,
1162 }; 1270 };
1163 int mode = 0, old_mode; 1271 int clear_rd, clear_wr, clear_rdwr;
1272 int mode;
1164 1273
1165 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 1274 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
1166 return; 1275 return;
1167 /* Recalculate the new open mode in case someone reopened the file 1276
1168 * while we were waiting in line to be scheduled. 1277 mode = FMODE_READ|FMODE_WRITE;
1169 */ 1278 clear_rd = clear_wr = clear_rdwr = 0;
1170 spin_lock(&state->owner->so_lock); 1279 spin_lock(&state->owner->so_lock);
1171 spin_lock(&calldata->inode->i_lock); 1280 /* Calculate the change in open mode */
1172 mode = old_mode = state->state;
1173 if (state->n_rdwr == 0) { 1281 if (state->n_rdwr == 0) {
1174 if (state->n_rdonly == 0) 1282 if (state->n_rdonly == 0) {
1175 mode &= ~FMODE_READ; 1283 mode &= ~FMODE_READ;
1176 if (state->n_wronly == 0) 1284 clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1285 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
1286 }
1287 if (state->n_wronly == 0) {
1177 mode &= ~FMODE_WRITE; 1288 mode &= ~FMODE_WRITE;
1289 clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1290 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
1291 }
1178 } 1292 }
1179 nfs4_state_set_mode_locked(state, mode);
1180 spin_unlock(&calldata->inode->i_lock);
1181 spin_unlock(&state->owner->so_lock); 1293 spin_unlock(&state->owner->so_lock);
1182 if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) { 1294 if (!clear_rd && !clear_wr && !clear_rdwr) {
1183 /* Note: exit _without_ calling nfs4_close_done */ 1295 /* Note: exit _without_ calling nfs4_close_done */
1184 task->tk_action = NULL; 1296 task->tk_action = NULL;
1185 return; 1297 return;
@@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = {
1209 * 1321 *
1210 * NOTE: Caller must be holding the sp->so_owner semaphore! 1322 * NOTE: Caller must be holding the sp->so_owner semaphore!
1211 */ 1323 */
1212int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 1324int nfs4_do_close(struct path *path, struct nfs4_state *state)
1213{ 1325{
1214 struct nfs_server *server = NFS_SERVER(inode); 1326 struct nfs_server *server = NFS_SERVER(state->inode);
1215 struct nfs4_closedata *calldata; 1327 struct nfs4_closedata *calldata;
1328 struct nfs4_state_owner *sp = state->owner;
1329 struct rpc_task *task;
1216 int status = -ENOMEM; 1330 int status = -ENOMEM;
1217 1331
1218 calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); 1332 calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
1219 if (calldata == NULL) 1333 if (calldata == NULL)
1220 goto out; 1334 goto out;
1221 calldata->inode = inode; 1335 calldata->inode = state->inode;
1222 calldata->state = state; 1336 calldata->state = state;
1223 calldata->arg.fh = NFS_FH(inode); 1337 calldata->arg.fh = NFS_FH(state->inode);
1224 calldata->arg.stateid = &state->stateid; 1338 calldata->arg.stateid = &state->open_stateid;
1225 /* Serialization for the sequence id */ 1339 /* Serialization for the sequence id */
1226 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1340 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
1227 if (calldata->arg.seqid == NULL) 1341 if (calldata->arg.seqid == NULL)
@@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
1229 calldata->arg.bitmask = server->attr_bitmask; 1343 calldata->arg.bitmask = server->attr_bitmask;
1230 calldata->res.fattr = &calldata->fattr; 1344 calldata->res.fattr = &calldata->fattr;
1231 calldata->res.server = server; 1345 calldata->res.server = server;
1346 calldata->path.mnt = mntget(path->mnt);
1347 calldata->path.dentry = dget(path->dentry);
1232 1348
1233 status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); 1349 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
1234 if (status == 0) 1350 if (IS_ERR(task))
1235 goto out; 1351 return PTR_ERR(task);
1236 1352 rpc_put_task(task);
1237 nfs_free_seqid(calldata->arg.seqid); 1353 return 0;
1238out_free_calldata: 1354out_free_calldata:
1239 kfree(calldata); 1355 kfree(calldata);
1240out: 1356out:
1357 nfs4_put_open_state(state);
1358 nfs4_put_state_owner(sp);
1241 return status; 1359 return status;
1242} 1360}
1243 1361
1244static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) 1362static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
1245{ 1363{
1246 struct file *filp; 1364 struct file *filp;
1365 int ret;
1247 1366
1248 filp = lookup_instantiate_filp(nd, dentry, NULL); 1367 /* If the open_intent is for execute, we have an extra check to make */
1368 if (nd->intent.open.flags & FMODE_EXEC) {
1369 ret = _nfs4_do_access(state->inode,
1370 state->owner->so_cred,
1371 nd->intent.open.flags);
1372 if (ret < 0)
1373 goto out_close;
1374 }
1375 filp = lookup_instantiate_filp(nd, path->dentry, NULL);
1249 if (!IS_ERR(filp)) { 1376 if (!IS_ERR(filp)) {
1250 struct nfs_open_context *ctx; 1377 struct nfs_open_context *ctx;
1251 ctx = (struct nfs_open_context *)filp->private_data; 1378 ctx = (struct nfs_open_context *)filp->private_data;
1252 ctx->state = state; 1379 ctx->state = state;
1253 return 0; 1380 return 0;
1254 } 1381 }
1255 nfs4_close_state(state, nd->intent.open.flags); 1382 ret = PTR_ERR(filp);
1256 return PTR_ERR(filp); 1383out_close:
1384 nfs4_close_state(path, state, nd->intent.open.flags);
1385 return ret;
1257} 1386}
1258 1387
1259struct dentry * 1388struct dentry *
1260nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 1389nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1261{ 1390{
1391 struct path path = {
1392 .mnt = nd->mnt,
1393 .dentry = dentry,
1394 };
1262 struct iattr attr; 1395 struct iattr attr;
1263 struct rpc_cred *cred; 1396 struct rpc_cred *cred;
1264 struct nfs4_state *state; 1397 struct nfs4_state *state;
@@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1277 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); 1410 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1278 if (IS_ERR(cred)) 1411 if (IS_ERR(cred))
1279 return (struct dentry *)cred; 1412 return (struct dentry *)cred;
1280 state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); 1413 state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
1281 put_rpccred(cred); 1414 put_rpccred(cred);
1282 if (IS_ERR(state)) { 1415 if (IS_ERR(state)) {
1283 if (PTR_ERR(state) == -ENOENT) 1416 if (PTR_ERR(state) == -ENOENT)
@@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1287 res = d_add_unique(dentry, igrab(state->inode)); 1420 res = d_add_unique(dentry, igrab(state->inode));
1288 if (res != NULL) 1421 if (res != NULL)
1289 dentry = res; 1422 dentry = res;
1290 nfs4_intent_set_file(nd, dentry, state); 1423 nfs4_intent_set_file(nd, &path, state);
1291 return res; 1424 return res;
1292} 1425}
1293 1426
1294int 1427int
1295nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) 1428nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
1296{ 1429{
1430 struct path path = {
1431 .mnt = nd->mnt,
1432 .dentry = dentry,
1433 };
1297 struct rpc_cred *cred; 1434 struct rpc_cred *cred;
1298 struct nfs4_state *state; 1435 struct nfs4_state *state;
1299 1436
1300 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); 1437 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1301 if (IS_ERR(cred)) 1438 if (IS_ERR(cred))
1302 return PTR_ERR(cred); 1439 return PTR_ERR(cred);
1303 state = nfs4_open_delegated(dentry->d_inode, openflags, cred); 1440 state = nfs4_do_open(dir, &path, openflags, NULL, cred);
1304 if (IS_ERR(state))
1305 state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
1306 put_rpccred(cred); 1441 put_rpccred(cred);
1307 if (IS_ERR(state)) { 1442 if (IS_ERR(state)) {
1308 switch (PTR_ERR(state)) { 1443 switch (PTR_ERR(state)) {
@@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1318 } 1453 }
1319 } 1454 }
1320 if (state->inode == dentry->d_inode) { 1455 if (state->inode == dentry->d_inode) {
1321 nfs4_intent_set_file(nd, dentry, state); 1456 nfs4_intent_set_file(nd, &path, state);
1322 return 1; 1457 return 1;
1323 } 1458 }
1324 nfs4_close_state(state, openflags); 1459 nfs4_close_state(&path, state, openflags);
1325out_drop: 1460out_drop:
1326 d_drop(dentry); 1461 d_drop(dentry);
1327 return 0; 1462 return 0;
@@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1559 dprintk("NFS call lookupfh %s\n", name->name); 1694 dprintk("NFS call lookupfh %s\n", name->name);
1560 status = rpc_call_sync(server->client, &msg, 0); 1695 status = rpc_call_sync(server->client, &msg, 0);
1561 dprintk("NFS reply lookupfh: %d\n", status); 1696 dprintk("NFS reply lookupfh: %d\n", status);
1562 if (status == -NFS4ERR_MOVED)
1563 status = -EREMOTE;
1564 return status; 1697 return status;
1565} 1698}
1566 1699
@@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1571 struct nfs4_exception exception = { }; 1704 struct nfs4_exception exception = { };
1572 int err; 1705 int err;
1573 do { 1706 do {
1574 err = nfs4_handle_exception(server, 1707 err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr);
1575 _nfs4_proc_lookupfh(server, dirfh, name, 1708 /* FIXME: !!!! */
1576 fhandle, fattr), 1709 if (err == -NFS4ERR_MOVED) {
1577 &exception); 1710 err = -EREMOTE;
1711 break;
1712 }
1713 err = nfs4_handle_exception(server, err, &exception);
1578 } while (exception.retry); 1714 } while (exception.retry);
1579 return err; 1715 return err;
1580} 1716}
@@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1582static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, 1718static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
1583 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 1719 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
1584{ 1720{
1585 int status; 1721 int status;
1586 struct nfs_server *server = NFS_SERVER(dir);
1587 struct nfs4_lookup_arg args = {
1588 .bitmask = server->attr_bitmask,
1589 .dir_fh = NFS_FH(dir),
1590 .name = name,
1591 };
1592 struct nfs4_lookup_res res = {
1593 .server = server,
1594 .fattr = fattr,
1595 .fh = fhandle,
1596 };
1597 struct rpc_message msg = {
1598 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
1599 .rpc_argp = &args,
1600 .rpc_resp = &res,
1601 };
1602
1603 nfs_fattr_init(fattr);
1604 1722
1605 dprintk("NFS call lookup %s\n", name->name); 1723 dprintk("NFS call lookup %s\n", name->name);
1606 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 1724 status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
1607 if (status == -NFS4ERR_MOVED) 1725 if (status == -NFS4ERR_MOVED)
1608 status = nfs4_get_referral(dir, name, fattr, fhandle); 1726 status = nfs4_get_referral(dir, name, fattr, fhandle);
1609 dprintk("NFS reply lookup: %d\n", status); 1727 dprintk("NFS reply lookup: %d\n", status);
@@ -1752,6 +1870,10 @@ static int
1752nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 1870nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1753 int flags, struct nameidata *nd) 1871 int flags, struct nameidata *nd)
1754{ 1872{
1873 struct path path = {
1874 .mnt = nd->mnt,
1875 .dentry = dentry,
1876 };
1755 struct nfs4_state *state; 1877 struct nfs4_state *state;
1756 struct rpc_cred *cred; 1878 struct rpc_cred *cred;
1757 int status = 0; 1879 int status = 0;
@@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1761 status = PTR_ERR(cred); 1883 status = PTR_ERR(cred);
1762 goto out; 1884 goto out;
1763 } 1885 }
1764 state = nfs4_do_open(dir, dentry, flags, sattr, cred); 1886 state = nfs4_do_open(dir, &path, flags, sattr, cred);
1765 put_rpccred(cred); 1887 put_rpccred(cred);
1766 if (IS_ERR(state)) { 1888 if (IS_ERR(state)) {
1767 status = PTR_ERR(state); 1889 status = PTR_ERR(state);
@@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1773 status = nfs4_do_setattr(state->inode, &fattr, sattr, state); 1895 status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
1774 if (status == 0) 1896 if (status == 0)
1775 nfs_setattr_update_inode(state->inode, sattr); 1897 nfs_setattr_update_inode(state->inode, sattr);
1898 nfs_post_op_update_inode(state->inode, &fattr);
1776 } 1899 }
1777 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) 1900 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
1778 status = nfs4_intent_set_file(nd, dentry, state); 1901 status = nfs4_intent_set_file(nd, &path, state);
1779 else 1902 else
1780 nfs4_close_state(state, flags); 1903 nfs4_close_state(&path, state, flags);
1781out: 1904out:
1782 return status; 1905 return status;
1783} 1906}
@@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3008 if (status != 0) 3131 if (status != 0)
3009 goto out; 3132 goto out;
3010 lsp = request->fl_u.nfs4_fl.owner; 3133 lsp = request->fl_u.nfs4_fl.owner;
3011 arg.lock_owner.id = lsp->ls_id; 3134 arg.lock_owner.id = lsp->ls_id.id;
3012 status = rpc_call_sync(server->client, &msg, 0); 3135 status = rpc_call_sync(server->client, &msg, 0);
3013 switch (status) { 3136 switch (status) {
3014 case 0: 3137 case 0:
@@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3152{ 3275{
3153 struct nfs4_unlockdata *data; 3276 struct nfs4_unlockdata *data;
3154 3277
3278 /* Ensure this is an unlock - when canceling a lock, the
3279 * canceled lock is passed in, and it won't be an unlock.
3280 */
3281 fl->fl_type = F_UNLCK;
3282
3155 data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); 3283 data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
3156 if (data == NULL) { 3284 if (data == NULL) {
3157 nfs_free_seqid(seqid); 3285 nfs_free_seqid(seqid);
@@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3222 goto out_free; 3350 goto out_free;
3223 p->arg.lock_stateid = &lsp->ls_stateid; 3351 p->arg.lock_stateid = &lsp->ls_stateid;
3224 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 3352 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
3225 p->arg.lock_owner.id = lsp->ls_id; 3353 p->arg.lock_owner.id = lsp->ls_id.id;
3226 p->lsp = lsp; 3354 p->lsp = lsp;
3227 atomic_inc(&lsp->ls_count); 3355 atomic_inc(&lsp->ls_count);
3228 p->ctx = get_nfs_open_context(ctx); 3356 p->ctx = get_nfs_open_context(ctx);
@@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
3285 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, 3413 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
3286 sizeof(data->lsp->ls_stateid.data)); 3414 sizeof(data->lsp->ls_stateid.data));
3287 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; 3415 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
3288 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); 3416 renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
3289 } 3417 }
3290 nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid); 3418 nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
3291out: 3419out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8ed79d5c54f..e9662ba81d8 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -38,12 +38,14 @@
38 * subsequent patch. 38 * subsequent patch.
39 */ 39 */
40 40
41#include <linux/kernel.h>
41#include <linux/slab.h> 42#include <linux/slab.h>
42#include <linux/smp_lock.h> 43#include <linux/smp_lock.h>
43#include <linux/nfs_fs.h> 44#include <linux/nfs_fs.h>
44#include <linux/nfs_idmap.h> 45#include <linux/nfs_idmap.h>
45#include <linux/kthread.h> 46#include <linux/kthread.h>
46#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/random.h>
47#include <linux/workqueue.h> 49#include <linux/workqueue.h>
48#include <linux/bitops.h> 50#include <linux/bitops.h>
49 51
@@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
69 return status; 71 return status;
70} 72}
71 73
72u32
73nfs4_alloc_lockowner_id(struct nfs_client *clp)
74{
75 return clp->cl_lockowner_id ++;
76}
77
78static struct nfs4_state_owner *
79nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
80{
81 struct nfs4_state_owner *sp = NULL;
82
83 if (!list_empty(&clp->cl_unused)) {
84 sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
85 atomic_inc(&sp->so_count);
86 sp->so_cred = cred;
87 list_move(&sp->so_list, &clp->cl_state_owners);
88 clp->cl_nunused--;
89 }
90 return sp;
91}
92
93struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) 74struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
94{ 75{
95 struct nfs4_state_owner *sp; 76 struct nfs4_state_owner *sp;
77 struct rb_node *pos;
96 struct rpc_cred *cred = NULL; 78 struct rpc_cred *cred = NULL;
97 79
98 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 80 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
81 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
99 if (list_empty(&sp->so_states)) 82 if (list_empty(&sp->so_states))
100 continue; 83 continue;
101 cred = get_rpccred(sp->so_cred); 84 cred = get_rpccred(sp->so_cred);
@@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
107static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) 90static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
108{ 91{
109 struct nfs4_state_owner *sp; 92 struct nfs4_state_owner *sp;
93 struct rb_node *pos;
110 94
111 if (!list_empty(&clp->cl_state_owners)) { 95 pos = rb_first(&clp->cl_state_owners);
112 sp = list_entry(clp->cl_state_owners.next, 96 if (pos != NULL) {
113 struct nfs4_state_owner, so_list); 97 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
114 return get_rpccred(sp->so_cred); 98 return get_rpccred(sp->so_cred);
115 } 99 }
116 return NULL; 100 return NULL;
117} 101}
118 102
103static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
104 __u64 minval, int maxbits)
105{
106 struct rb_node **p, *parent;
107 struct nfs_unique_id *pos;
108 __u64 mask = ~0ULL;
109
110 if (maxbits < 64)
111 mask = (1ULL << maxbits) - 1ULL;
112
113 /* Ensure distribution is more or less flat */
114 get_random_bytes(&new->id, sizeof(new->id));
115 new->id &= mask;
116 if (new->id < minval)
117 new->id += minval;
118retry:
119 p = &root->rb_node;
120 parent = NULL;
121
122 while (*p != NULL) {
123 parent = *p;
124 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
125
126 if (new->id < pos->id)
127 p = &(*p)->rb_left;
128 else if (new->id > pos->id)
129 p = &(*p)->rb_right;
130 else
131 goto id_exists;
132 }
133 rb_link_node(&new->rb_node, parent, p);
134 rb_insert_color(&new->rb_node, root);
135 return;
136id_exists:
137 for (;;) {
138 new->id++;
139 if (new->id < minval || (new->id & mask) != new->id) {
140 new->id = minval;
141 break;
142 }
143 parent = rb_next(parent);
144 if (parent == NULL)
145 break;
146 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
147 if (new->id < pos->id)
148 break;
149 }
150 goto retry;
151}
152
153static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
154{
155 rb_erase(&id->rb_node, root);
156}
157
119static struct nfs4_state_owner * 158static struct nfs4_state_owner *
120nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) 159nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
121{ 160{
161 struct nfs_client *clp = server->nfs_client;
162 struct rb_node **p = &clp->cl_state_owners.rb_node,
163 *parent = NULL;
122 struct nfs4_state_owner *sp, *res = NULL; 164 struct nfs4_state_owner *sp, *res = NULL;
123 165
124 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 166 while (*p != NULL) {
125 if (sp->so_cred != cred) 167 parent = *p;
168 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
169
170 if (server < sp->so_server) {
171 p = &parent->rb_left;
126 continue; 172 continue;
127 atomic_inc(&sp->so_count); 173 }
128 /* Move to the head of the list */ 174 if (server > sp->so_server) {
129 list_move(&sp->so_list, &clp->cl_state_owners); 175 p = &parent->rb_right;
130 res = sp; 176 continue;
131 break; 177 }
178 if (cred < sp->so_cred)
179 p = &parent->rb_left;
180 else if (cred > sp->so_cred)
181 p = &parent->rb_right;
182 else {
183 atomic_inc(&sp->so_count);
184 res = sp;
185 break;
186 }
132 } 187 }
133 return res; 188 return res;
134} 189}
135 190
191static struct nfs4_state_owner *
192nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
193{
194 struct rb_node **p = &clp->cl_state_owners.rb_node,
195 *parent = NULL;
196 struct nfs4_state_owner *sp;
197
198 while (*p != NULL) {
199 parent = *p;
200 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
201
202 if (new->so_server < sp->so_server) {
203 p = &parent->rb_left;
204 continue;
205 }
206 if (new->so_server > sp->so_server) {
207 p = &parent->rb_right;
208 continue;
209 }
210 if (new->so_cred < sp->so_cred)
211 p = &parent->rb_left;
212 else if (new->so_cred > sp->so_cred)
213 p = &parent->rb_right;
214 else {
215 atomic_inc(&sp->so_count);
216 return sp;
217 }
218 }
219 nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
220 rb_link_node(&new->so_client_node, parent, p);
221 rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
222 return new;
223}
224
225static void
226nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
227{
228 if (!RB_EMPTY_NODE(&sp->so_client_node))
229 rb_erase(&sp->so_client_node, &clp->cl_state_owners);
230 nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
231}
232
136/* 233/*
137 * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to 234 * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
138 * create a new state_owner. 235 * create a new state_owner.
@@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void)
160void 257void
161nfs4_drop_state_owner(struct nfs4_state_owner *sp) 258nfs4_drop_state_owner(struct nfs4_state_owner *sp)
162{ 259{
163 struct nfs_client *clp = sp->so_client; 260 if (!RB_EMPTY_NODE(&sp->so_client_node)) {
164 spin_lock(&clp->cl_lock); 261 struct nfs_client *clp = sp->so_client;
165 list_del_init(&sp->so_list); 262
166 spin_unlock(&clp->cl_lock); 263 spin_lock(&clp->cl_lock);
264 rb_erase(&sp->so_client_node, &clp->cl_state_owners);
265 RB_CLEAR_NODE(&sp->so_client_node);
266 spin_unlock(&clp->cl_lock);
267 }
167} 268}
168 269
169/* 270/*
@@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
175 struct nfs_client *clp = server->nfs_client; 276 struct nfs_client *clp = server->nfs_client;
176 struct nfs4_state_owner *sp, *new; 277 struct nfs4_state_owner *sp, *new;
177 278
178 get_rpccred(cred);
179 new = nfs4_alloc_state_owner();
180 spin_lock(&clp->cl_lock); 279 spin_lock(&clp->cl_lock);
181 sp = nfs4_find_state_owner(clp, cred); 280 sp = nfs4_find_state_owner(server, cred);
182 if (sp == NULL)
183 sp = nfs4_client_grab_unused(clp, cred);
184 if (sp == NULL && new != NULL) {
185 list_add(&new->so_list, &clp->cl_state_owners);
186 new->so_client = clp;
187 new->so_id = nfs4_alloc_lockowner_id(clp);
188 new->so_cred = cred;
189 sp = new;
190 new = NULL;
191 }
192 spin_unlock(&clp->cl_lock); 281 spin_unlock(&clp->cl_lock);
193 kfree(new);
194 if (sp != NULL) 282 if (sp != NULL)
195 return sp; 283 return sp;
196 put_rpccred(cred); 284 new = nfs4_alloc_state_owner();
197 return NULL; 285 if (new == NULL)
286 return NULL;
287 new->so_client = clp;
288 new->so_server = server;
289 new->so_cred = cred;
290 spin_lock(&clp->cl_lock);
291 sp = nfs4_insert_state_owner(clp, new);
292 spin_unlock(&clp->cl_lock);
293 if (sp == new)
294 get_rpccred(cred);
295 else
296 kfree(new);
297 return sp;
198} 298}
199 299
200/* 300/*
@@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
208 308
209 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 309 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
210 return; 310 return;
211 if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) 311 nfs4_remove_state_owner(clp, sp);
212 goto out_free;
213 if (list_empty(&sp->so_list))
214 goto out_free;
215 list_move(&sp->so_list, &clp->cl_unused);
216 clp->cl_nunused++;
217 spin_unlock(&clp->cl_lock);
218 put_rpccred(cred);
219 cred = NULL;
220 return;
221out_free:
222 list_del(&sp->so_list);
223 spin_unlock(&clp->cl_lock); 312 spin_unlock(&clp->cl_lock);
224 put_rpccred(cred); 313 put_rpccred(cred);
225 kfree(sp); 314 kfree(sp);
@@ -236,6 +325,7 @@ nfs4_alloc_open_state(void)
236 atomic_set(&state->count, 1); 325 atomic_set(&state->count, 1);
237 INIT_LIST_HEAD(&state->lock_states); 326 INIT_LIST_HEAD(&state->lock_states);
238 spin_lock_init(&state->state_lock); 327 spin_lock_init(&state->state_lock);
328 seqlock_init(&state->seqlock);
239 return state; 329 return state;
240} 330}
241 331
@@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
263 struct nfs4_state *state; 353 struct nfs4_state *state;
264 354
265 list_for_each_entry(state, &nfsi->open_states, inode_states) { 355 list_for_each_entry(state, &nfsi->open_states, inode_states) {
266 /* Is this in the process of being freed? */ 356 if (state->owner != owner)
267 if (state->state == 0)
268 continue; 357 continue;
269 if (state->owner == owner) { 358 if (atomic_inc_not_zero(&state->count))
270 atomic_inc(&state->count);
271 return state; 359 return state;
272 }
273 } 360 }
274 return NULL; 361 return NULL;
275} 362}
@@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
341/* 428/*
342 * Close the current file. 429 * Close the current file.
343 */ 430 */
344void nfs4_close_state(struct nfs4_state *state, mode_t mode) 431void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
345{ 432{
346 struct inode *inode = state->inode;
347 struct nfs4_state_owner *owner = state->owner; 433 struct nfs4_state_owner *owner = state->owner;
348 int oldstate, newstate = 0; 434 int call_close = 0;
435 int newstate;
349 436
350 atomic_inc(&owner->so_count); 437 atomic_inc(&owner->so_count);
351 /* Protect against nfs4_find_state() */ 438 /* Protect against nfs4_find_state() */
352 spin_lock(&owner->so_lock); 439 spin_lock(&owner->so_lock);
353 spin_lock(&inode->i_lock);
354 switch (mode & (FMODE_READ | FMODE_WRITE)) { 440 switch (mode & (FMODE_READ | FMODE_WRITE)) {
355 case FMODE_READ: 441 case FMODE_READ:
356 state->n_rdonly--; 442 state->n_rdonly--;
@@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
361 case FMODE_READ|FMODE_WRITE: 447 case FMODE_READ|FMODE_WRITE:
362 state->n_rdwr--; 448 state->n_rdwr--;
363 } 449 }
364 oldstate = newstate = state->state; 450 newstate = FMODE_READ|FMODE_WRITE;
365 if (state->n_rdwr == 0) { 451 if (state->n_rdwr == 0) {
366 if (state->n_rdonly == 0) 452 if (state->n_rdonly == 0) {
367 newstate &= ~FMODE_READ; 453 newstate &= ~FMODE_READ;
368 if (state->n_wronly == 0) 454 call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
455 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
456 }
457 if (state->n_wronly == 0) {
369 newstate &= ~FMODE_WRITE; 458 newstate &= ~FMODE_WRITE;
459 call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
460 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
461 }
462 if (newstate == 0)
463 clear_bit(NFS_DELEGATED_STATE, &state->flags);
370 } 464 }
371 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { 465 nfs4_state_set_mode_locked(state, newstate);
372 nfs4_state_set_mode_locked(state, newstate);
373 oldstate = newstate;
374 }
375 spin_unlock(&inode->i_lock);
376 spin_unlock(&owner->so_lock); 466 spin_unlock(&owner->so_lock);
377 467
378 if (oldstate != newstate && nfs4_do_close(inode, state) == 0) 468 if (!call_close) {
379 return; 469 nfs4_put_open_state(state);
380 nfs4_put_open_state(state); 470 nfs4_put_state_owner(owner);
381 nfs4_put_state_owner(owner); 471 } else
472 nfs4_do_close(path, state);
382} 473}
383 474
384/* 475/*
@@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
415 atomic_set(&lsp->ls_count, 1); 506 atomic_set(&lsp->ls_count, 1);
416 lsp->ls_owner = fl_owner; 507 lsp->ls_owner = fl_owner;
417 spin_lock(&clp->cl_lock); 508 spin_lock(&clp->cl_lock);
418 lsp->ls_id = nfs4_alloc_lockowner_id(clp); 509 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
419 spin_unlock(&clp->cl_lock); 510 spin_unlock(&clp->cl_lock);
420 INIT_LIST_HEAD(&lsp->ls_locks); 511 INIT_LIST_HEAD(&lsp->ls_locks);
421 return lsp; 512 return lsp;
422} 513}
423 514
515static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
516{
517 struct nfs_client *clp = lsp->ls_state->owner->so_client;
518
519 spin_lock(&clp->cl_lock);
520 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
521 spin_unlock(&clp->cl_lock);
522 kfree(lsp);
523}
524
424/* 525/*
425 * Return a compatible lock_state. If no initialized lock_state structure 526 * Return a compatible lock_state. If no initialized lock_state structure
426 * exists, return an uninitialized one. 527 * exists, return an uninitialized one.
@@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
450 return NULL; 551 return NULL;
451 } 552 }
452 spin_unlock(&state->state_lock); 553 spin_unlock(&state->state_lock);
453 kfree(new); 554 if (new != NULL)
555 nfs4_free_lock_state(new);
454 return lsp; 556 return lsp;
455} 557}
456 558
@@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
471 if (list_empty(&state->lock_states)) 573 if (list_empty(&state->lock_states))
472 clear_bit(LK_STATE_IN_USE, &state->flags); 574 clear_bit(LK_STATE_IN_USE, &state->flags);
473 spin_unlock(&state->state_lock); 575 spin_unlock(&state->state_lock);
474 kfree(lsp); 576 nfs4_free_lock_state(lsp);
475} 577}
476 578
477static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) 579static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
513void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) 615void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
514{ 616{
515 struct nfs4_lock_state *lsp; 617 struct nfs4_lock_state *lsp;
618 int seq;
516 619
517 memcpy(dst, &state->stateid, sizeof(*dst)); 620 do {
621 seq = read_seqbegin(&state->seqlock);
622 memcpy(dst, &state->stateid, sizeof(*dst));
623 } while (read_seqretry(&state->seqlock, seq));
518 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) 624 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
519 return; 625 return;
520 626
@@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
557 * failed with a seqid incrementing error - 663 * failed with a seqid incrementing error -
558 * see comments nfs_fs.h:seqid_mutating_error() 664 * see comments nfs_fs.h:seqid_mutating_error()
559 */ 665 */
560static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) 666static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
561{ 667{
562 switch (status) { 668 switch (status) {
563 case 0: 669 case 0:
564 break; 670 break;
565 case -NFS4ERR_BAD_SEQID: 671 case -NFS4ERR_BAD_SEQID:
672 if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
673 return;
674 printk(KERN_WARNING "NFS: v4 server returned a bad"
675 "sequence-id error on an"
676 "unconfirmed sequence %p!\n",
677 seqid->sequence);
566 case -NFS4ERR_STALE_CLIENTID: 678 case -NFS4ERR_STALE_CLIENTID:
567 case -NFS4ERR_STALE_STATEID: 679 case -NFS4ERR_STALE_STATEID:
568 case -NFS4ERR_BAD_STATEID: 680 case -NFS4ERR_BAD_STATEID:
@@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
586 struct nfs4_state_owner, so_seqid); 698 struct nfs4_state_owner, so_seqid);
587 nfs4_drop_state_owner(sp); 699 nfs4_drop_state_owner(sp);
588 } 700 }
589 return nfs_increment_seqid(status, seqid); 701 nfs_increment_seqid(status, seqid);
590} 702}
591 703
592/* 704/*
@@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
596 */ 708 */
597void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) 709void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
598{ 710{
599 return nfs_increment_seqid(status, seqid); 711 nfs_increment_seqid(status, seqid);
600} 712}
601 713
602int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) 714int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
@@ -748,15 +860,21 @@ out_err:
748static void nfs4_state_mark_reclaim(struct nfs_client *clp) 860static void nfs4_state_mark_reclaim(struct nfs_client *clp)
749{ 861{
750 struct nfs4_state_owner *sp; 862 struct nfs4_state_owner *sp;
863 struct rb_node *pos;
751 struct nfs4_state *state; 864 struct nfs4_state *state;
752 struct nfs4_lock_state *lock; 865 struct nfs4_lock_state *lock;
753 866
754 /* Reset all sequence ids to zero */ 867 /* Reset all sequence ids to zero */
755 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 868 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
869 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
756 sp->so_seqid.counter = 0; 870 sp->so_seqid.counter = 0;
757 sp->so_seqid.flags = 0; 871 sp->so_seqid.flags = 0;
758 spin_lock(&sp->so_lock); 872 spin_lock(&sp->so_lock);
759 list_for_each_entry(state, &sp->so_states, open_states) { 873 list_for_each_entry(state, &sp->so_states, open_states) {
874 clear_bit(NFS_DELEGATED_STATE, &state->flags);
875 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
876 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
877 clear_bit(NFS_O_RDWR_STATE, &state->flags);
760 list_for_each_entry(lock, &state->lock_states, ls_locks) { 878 list_for_each_entry(lock, &state->lock_states, ls_locks) {
761 lock->ls_seqid.counter = 0; 879 lock->ls_seqid.counter = 0;
762 lock->ls_seqid.flags = 0; 880 lock->ls_seqid.flags = 0;
@@ -771,6 +889,7 @@ static int reclaimer(void *ptr)
771{ 889{
772 struct nfs_client *clp = ptr; 890 struct nfs_client *clp = ptr;
773 struct nfs4_state_owner *sp; 891 struct nfs4_state_owner *sp;
892 struct rb_node *pos;
774 struct nfs4_state_recovery_ops *ops; 893 struct nfs4_state_recovery_ops *ops;
775 struct rpc_cred *cred; 894 struct rpc_cred *cred;
776 int status = 0; 895 int status = 0;
@@ -816,7 +935,8 @@ restart_loop:
816 /* Mark all delegations for reclaim */ 935 /* Mark all delegations for reclaim */
817 nfs_delegation_mark_reclaim(clp); 936 nfs_delegation_mark_reclaim(clp);
818 /* Note: list is protected by exclusive lock on cl->cl_sem */ 937 /* Note: list is protected by exclusive lock on cl->cl_sem */
819 list_for_each_entry(sp, &clp->cl_state_owners, so_list) { 938 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
939 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
820 status = nfs4_reclaim_open_state(ops, sp); 940 status = nfs4_reclaim_open_state(ops, sp);
821 if (status < 0) { 941 if (status < 0) {
822 if (status == -NFS4ERR_NO_GRACE) { 942 if (status == -NFS4ERR_NO_GRACE) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8003c91ccb9..c08738441f7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int);
68#endif 68#endif
69 69
70/* lock,open owner id: 70/* lock,open owner id:
71 * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) 71 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
72 */ 72 */
73#define owner_id_maxsz (1 + 1) 73#define open_owner_id_maxsz (1 + 4)
74#define lock_owner_id_maxsz (1 + 4)
74#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 75#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
75#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 76#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
76#define op_encode_hdr_maxsz (1) 77#define op_encode_hdr_maxsz (1)
@@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int);
87#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) 88#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
88#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) 89#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
89#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) 90#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
91#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
92#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
90/* This is based on getfattr, which uses the most attributes: */ 93/* This is based on getfattr, which uses the most attributes: */
91#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ 94#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
92 3 + 3 + 3 + 2 * nfs4_name_maxsz)) 95 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
93#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ 96#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
94 nfs4_fattr_value_maxsz) 97 nfs4_fattr_value_maxsz)
95#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) 98#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int);
116 3 + (NFS4_VERIFIER_SIZE >> 2)) 119 3 + (NFS4_VERIFIER_SIZE >> 2))
117#define decode_setclientid_confirm_maxsz \ 120#define decode_setclientid_confirm_maxsz \
118 (op_decode_hdr_maxsz) 121 (op_decode_hdr_maxsz)
119#define encode_lookup_maxsz (op_encode_hdr_maxsz + \ 122#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
120 1 + ((3 + NFS4_FHSIZE) >> 2)) 123#define decode_lookup_maxsz (op_decode_hdr_maxsz)
124#define encode_share_access_maxsz \
125 (2)
126#define encode_createmode_maxsz (1 + nfs4_fattr_maxsz)
127#define encode_opentype_maxsz (1 + encode_createmode_maxsz)
128#define encode_claim_null_maxsz (1 + nfs4_name_maxsz)
129#define encode_open_maxsz (op_encode_hdr_maxsz + \
130 2 + encode_share_access_maxsz + 2 + \
131 open_owner_id_maxsz + \
132 encode_opentype_maxsz + \
133 encode_claim_null_maxsz)
134#define decode_ace_maxsz (3 + nfs4_owner_maxsz)
135#define decode_delegation_maxsz (1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \
136 decode_ace_maxsz)
137#define decode_change_info_maxsz (5)
138#define decode_open_maxsz (op_decode_hdr_maxsz + \
139 XDR_QUADLEN(NFS4_STATEID_SIZE) + \
140 decode_change_info_maxsz + 1 + \
141 nfs4_fattr_bitmap_maxsz + \
142 decode_delegation_maxsz)
121#define encode_remove_maxsz (op_encode_hdr_maxsz + \ 143#define encode_remove_maxsz (op_encode_hdr_maxsz + \
122 nfs4_name_maxsz) 144 nfs4_name_maxsz)
123#define encode_rename_maxsz (op_encode_hdr_maxsz + \ 145#define encode_rename_maxsz (op_encode_hdr_maxsz + \
@@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int);
134#define encode_create_maxsz (op_encode_hdr_maxsz + \ 156#define encode_create_maxsz (op_encode_hdr_maxsz + \
135 2 + nfs4_name_maxsz + \ 157 2 + nfs4_name_maxsz + \
136 nfs4_fattr_maxsz) 158 nfs4_fattr_maxsz)
137#define decode_create_maxsz (op_decode_hdr_maxsz + 8) 159#define decode_create_maxsz (op_decode_hdr_maxsz + \
160 decode_change_info_maxsz + \
161 nfs4_fattr_bitmap_maxsz)
138#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) 162#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
139#define decode_delegreturn_maxsz (op_decode_hdr_maxsz) 163#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
164#define encode_fs_locations_maxsz \
165 (encode_getattr_maxsz)
166#define decode_fs_locations_maxsz \
167 (0)
140#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ 168#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */
141#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ 169#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */
142#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ 170#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \
@@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int);
174 op_decode_hdr_maxsz + 2 + \ 202 op_decode_hdr_maxsz + 2 + \
175 decode_getattr_maxsz) 203 decode_getattr_maxsz)
176#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ 204#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
177 encode_putfh_maxsz + \ 205 encode_putfh_maxsz + \
178 op_encode_hdr_maxsz + \ 206 encode_savefh_maxsz + \
179 13 + 3 + 2 + 64 + \ 207 encode_open_maxsz + \
180 encode_getattr_maxsz + \ 208 encode_getfh_maxsz + \
181 encode_getfh_maxsz) 209 encode_getattr_maxsz + \
210 encode_restorefh_maxsz + \
211 encode_getattr_maxsz)
182#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ 212#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
183 decode_putfh_maxsz + \ 213 decode_putfh_maxsz + \
184 op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \ 214 decode_savefh_maxsz + \
185 decode_getattr_maxsz + \ 215 decode_open_maxsz + \
186 decode_getfh_maxsz) 216 decode_getfh_maxsz + \
217 decode_getattr_maxsz + \
218 decode_restorefh_maxsz + \
219 decode_getattr_maxsz)
187#define NFS4_enc_open_confirm_sz \ 220#define NFS4_enc_open_confirm_sz \
188 (compound_encode_hdr_maxsz + \ 221 (compound_encode_hdr_maxsz + \
189 encode_putfh_maxsz + \ 222 encode_putfh_maxsz + \
@@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int);
193 op_decode_hdr_maxsz + 4) 226 op_decode_hdr_maxsz + 4)
194#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ 227#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \
195 encode_putfh_maxsz + \ 228 encode_putfh_maxsz + \
196 op_encode_hdr_maxsz + \ 229 encode_open_maxsz + \
197 11) 230 encode_getattr_maxsz)
198#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ 231#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
199 decode_putfh_maxsz + \ 232 decode_putfh_maxsz + \
200 op_decode_hdr_maxsz + \ 233 decode_open_maxsz + \
201 4 + 5 + 2 + 3) 234 decode_getattr_maxsz)
202#define NFS4_enc_open_downgrade_sz \ 235#define NFS4_enc_open_downgrade_sz \
203 (compound_encode_hdr_maxsz + \ 236 (compound_encode_hdr_maxsz + \
204 encode_putfh_maxsz + \ 237 encode_putfh_maxsz + \
@@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int);
256 op_encode_hdr_maxsz + \ 289 op_encode_hdr_maxsz + \
257 1 + 1 + 2 + 2 + \ 290 1 + 1 + 2 + 2 + \
258 1 + 4 + 1 + 2 + \ 291 1 + 4 + 1 + 2 + \
259 owner_id_maxsz) 292 lock_owner_id_maxsz)
260#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ 293#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \
261 decode_putfh_maxsz + \ 294 decode_putfh_maxsz + \
262 decode_getattr_maxsz + \ 295 decode_getattr_maxsz + \
263 op_decode_hdr_maxsz + \ 296 op_decode_hdr_maxsz + \
264 2 + 2 + 1 + 2 + \ 297 2 + 2 + 1 + 2 + \
265 owner_id_maxsz) 298 lock_owner_id_maxsz)
266#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ 299#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \
267 encode_putfh_maxsz + \ 300 encode_putfh_maxsz + \
268 encode_getattr_maxsz + \ 301 encode_getattr_maxsz + \
269 op_encode_hdr_maxsz + \ 302 op_encode_hdr_maxsz + \
270 1 + 2 + 2 + 2 + \ 303 1 + 2 + 2 + 2 + \
271 owner_id_maxsz) 304 lock_owner_id_maxsz)
272#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz) 305#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz)
273#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ 306#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \
274 encode_putfh_maxsz + \ 307 encode_putfh_maxsz + \
@@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int);
298 encode_getfh_maxsz) 331 encode_getfh_maxsz)
299#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ 332#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \
300 decode_putfh_maxsz + \ 333 decode_putfh_maxsz + \
301 op_decode_hdr_maxsz + \ 334 decode_lookup_maxsz + \
302 decode_getattr_maxsz + \ 335 decode_getattr_maxsz + \
303 decode_getfh_maxsz) 336 decode_getfh_maxsz)
304#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ 337#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
@@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int);
417#define NFS4_enc_fs_locations_sz \ 450#define NFS4_enc_fs_locations_sz \
418 (compound_encode_hdr_maxsz + \ 451 (compound_encode_hdr_maxsz + \
419 encode_putfh_maxsz + \ 452 encode_putfh_maxsz + \
420 encode_getattr_maxsz) 453 encode_lookup_maxsz + \
454 encode_fs_locations_maxsz)
421#define NFS4_dec_fs_locations_sz \ 455#define NFS4_dec_fs_locations_sz \
422 (compound_decode_hdr_maxsz + \ 456 (compound_decode_hdr_maxsz + \
423 decode_putfh_maxsz + \ 457 decode_putfh_maxsz + \
424 op_decode_hdr_maxsz + \ 458 decode_lookup_maxsz + \
425 nfs4_fattr_bitmap_maxsz) 459 decode_fs_locations_maxsz)
426 460
427static struct { 461static struct {
428 unsigned int mode; 462 unsigned int mode;
@@ -793,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
793 WRITE64(nfs4_lock_length(args->fl)); 827 WRITE64(nfs4_lock_length(args->fl));
794 WRITE32(args->new_lock_owner); 828 WRITE32(args->new_lock_owner);
795 if (args->new_lock_owner){ 829 if (args->new_lock_owner){
796 RESERVE_SPACE(4+NFS4_STATEID_SIZE+20); 830 RESERVE_SPACE(4+NFS4_STATEID_SIZE+32);
797 WRITE32(args->open_seqid->sequence->counter); 831 WRITE32(args->open_seqid->sequence->counter);
798 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); 832 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
799 WRITE32(args->lock_seqid->sequence->counter); 833 WRITE32(args->lock_seqid->sequence->counter);
800 WRITE64(args->lock_owner.clientid); 834 WRITE64(args->lock_owner.clientid);
801 WRITE32(4); 835 WRITE32(16);
802 WRITE32(args->lock_owner.id); 836 WRITEMEM("lock id:", 8);
837 WRITE64(args->lock_owner.id);
803 } 838 }
804 else { 839 else {
805 RESERVE_SPACE(NFS4_STATEID_SIZE+4); 840 RESERVE_SPACE(NFS4_STATEID_SIZE+4);
@@ -814,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
814{ 849{
815 __be32 *p; 850 __be32 *p;
816 851
817 RESERVE_SPACE(40); 852 RESERVE_SPACE(52);
818 WRITE32(OP_LOCKT); 853 WRITE32(OP_LOCKT);
819 WRITE32(nfs4_lock_type(args->fl, 0)); 854 WRITE32(nfs4_lock_type(args->fl, 0));
820 WRITE64(args->fl->fl_start); 855 WRITE64(args->fl->fl_start);
821 WRITE64(nfs4_lock_length(args->fl)); 856 WRITE64(nfs4_lock_length(args->fl));
822 WRITE64(args->lock_owner.clientid); 857 WRITE64(args->lock_owner.clientid);
823 WRITE32(4); 858 WRITE32(16);
824 WRITE32(args->lock_owner.id); 859 WRITEMEM("lock id:", 8);
860 WRITE64(args->lock_owner.id);
825 861
826 return 0; 862 return 0;
827} 863}
@@ -886,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
886 WRITE32(OP_OPEN); 922 WRITE32(OP_OPEN);
887 WRITE32(arg->seqid->sequence->counter); 923 WRITE32(arg->seqid->sequence->counter);
888 encode_share_access(xdr, arg->open_flags); 924 encode_share_access(xdr, arg->open_flags);
889 RESERVE_SPACE(16); 925 RESERVE_SPACE(28);
890 WRITE64(arg->clientid); 926 WRITE64(arg->clientid);
891 WRITE32(4); 927 WRITE32(16);
892 WRITE32(arg->id); 928 WRITEMEM("open id:", 8);
929 WRITE64(arg->id);
893} 930}
894 931
895static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) 932static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1071,7 +1108,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
1071 1108
1072static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) 1109static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
1073{ 1110{
1074 struct rpc_auth *auth = req->rq_task->tk_auth; 1111 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1075 uint32_t attrs[2] = { 1112 uint32_t attrs[2] = {
1076 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, 1113 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
1077 FATTR4_WORD1_MOUNTED_ON_FILEID, 1114 FATTR4_WORD1_MOUNTED_ON_FILEID,
@@ -1117,7 +1154,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1117 1154
1118static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) 1155static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
1119{ 1156{
1120 struct rpc_auth *auth = req->rq_task->tk_auth; 1157 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1121 unsigned int replen; 1158 unsigned int replen;
1122 __be32 *p; 1159 __be32 *p;
1123 1160
@@ -1735,7 +1772,7 @@ out:
1735 */ 1772 */
1736static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 1773static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
1737{ 1774{
1738 struct rpc_auth *auth = req->rq_task->tk_auth; 1775 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1739 struct xdr_stream xdr; 1776 struct xdr_stream xdr;
1740 struct compound_hdr hdr = { 1777 struct compound_hdr hdr = {
1741 .nops = 2, 1778 .nops = 2,
@@ -1795,7 +1832,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
1795 struct nfs_getaclargs *args) 1832 struct nfs_getaclargs *args)
1796{ 1833{
1797 struct xdr_stream xdr; 1834 struct xdr_stream xdr;
1798 struct rpc_auth *auth = req->rq_task->tk_auth; 1835 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1799 struct compound_hdr hdr = { 1836 struct compound_hdr hdr = {
1800 .nops = 2, 1837 .nops = 2,
1801 }; 1838 };
@@ -2030,7 +2067,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
2030 struct compound_hdr hdr = { 2067 struct compound_hdr hdr = {
2031 .nops = 3, 2068 .nops = 3,
2032 }; 2069 };
2033 struct rpc_auth *auth = req->rq_task->tk_auth; 2070 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
2034 int replen; 2071 int replen;
2035 int status; 2072 int status;
2036 2073
@@ -3269,7 +3306,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3269static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3306static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3270{ 3307{
3271 __be32 *p; 3308 __be32 *p;
3272 uint32_t bmlen; 3309 uint32_t savewords, bmlen, i;
3273 int status; 3310 int status;
3274 3311
3275 status = decode_op_hdr(xdr, OP_OPEN); 3312 status = decode_op_hdr(xdr, OP_OPEN);
@@ -3287,7 +3324,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3287 goto xdr_error; 3324 goto xdr_error;
3288 3325
3289 READ_BUF(bmlen << 2); 3326 READ_BUF(bmlen << 2);
3290 p += bmlen; 3327 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
3328 for (i = 0; i < savewords; ++i)
3329 READ32(res->attrset[i]);
3330 for (; i < NFS4_BITMAP_SIZE; i++)
3331 res->attrset[i] = 0;
3332
3291 return decode_delegation(xdr, res); 3333 return decode_delegation(xdr, res);
3292xdr_error: 3334xdr_error:
3293 dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen); 3335 dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 49d1008ce1d..3490322d114 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto)
428 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", 428 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
429 program, version, NIPQUAD(servaddr)); 429 program, version, NIPQUAD(servaddr));
430 set_sockaddr(&sin, servaddr, 0); 430 set_sockaddr(&sin, servaddr, 0);
431 return rpcb_getport_external(&sin, program, version, proto); 431 return rpcb_getport_sync(&sin, program, version, proto);
432} 432}
433 433
434 434
@@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void)
496 NFS_MNT3_VERSION : NFS_MNT_VERSION; 496 NFS_MNT3_VERSION : NFS_MNT_VERSION;
497 497
498 set_sockaddr(&sin, servaddr, htons(mount_port)); 498 set_sockaddr(&sin, servaddr, htons(mount_port));
499 status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol); 499 status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL,
500 nfs_path, version, protocol, &fh);
500 if (status < 0) 501 if (status < 0)
501 printk(KERN_ERR "Root-NFS: Server returned error %d " 502 printk(KERN_ERR "Root-NFS: Server returned error %d "
502 "while mounting %s\n", status, nfs_path); 503 "while mounting %s\n", status, nfs_path);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index c5bb51a29e8..f56dae5216f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
85 req->wb_offset = offset; 85 req->wb_offset = offset;
86 req->wb_pgbase = offset; 86 req->wb_pgbase = offset;
87 req->wb_bytes = count; 87 req->wb_bytes = count;
88 atomic_set(&req->wb_count, 1);
89 req->wb_context = get_nfs_open_context(ctx); 88 req->wb_context = get_nfs_open_context(ctx);
90 89 kref_init(&req->wb_kref);
91 return req; 90 return req;
92} 91}
93 92
@@ -109,30 +108,31 @@ void nfs_unlock_request(struct nfs_page *req)
109} 108}
110 109
111/** 110/**
112 * nfs_set_page_writeback_locked - Lock a request for writeback 111 * nfs_set_page_tag_locked - Tag a request as locked
113 * @req: 112 * @req:
114 */ 113 */
115int nfs_set_page_writeback_locked(struct nfs_page *req) 114static int nfs_set_page_tag_locked(struct nfs_page *req)
116{ 115{
117 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); 116 struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
118 117
119 if (!nfs_lock_request(req)) 118 if (!nfs_lock_request(req))
120 return 0; 119 return 0;
121 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); 120 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
122 return 1; 121 return 1;
123} 122}
124 123
125/** 124/**
126 * nfs_clear_page_writeback - Unlock request and wake up sleepers 125 * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
127 */ 126 */
128void nfs_clear_page_writeback(struct nfs_page *req) 127void nfs_clear_page_tag_locked(struct nfs_page *req)
129{ 128{
130 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); 129 struct inode *inode = req->wb_context->path.dentry->d_inode;
130 struct nfs_inode *nfsi = NFS_I(inode);
131 131
132 if (req->wb_page != NULL) { 132 if (req->wb_page != NULL) {
133 spin_lock(&nfsi->req_lock); 133 spin_lock(&inode->i_lock);
134 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); 134 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
135 spin_unlock(&nfsi->req_lock); 135 spin_unlock(&inode->i_lock);
136 } 136 }
137 nfs_unlock_request(req); 137 nfs_unlock_request(req);
138} 138}
@@ -160,11 +160,9 @@ void nfs_clear_request(struct nfs_page *req)
160 * 160 *
161 * Note: Should never be called with the spinlock held! 161 * Note: Should never be called with the spinlock held!
162 */ 162 */
163void 163static void nfs_free_request(struct kref *kref)
164nfs_release_request(struct nfs_page *req)
165{ 164{
166 if (!atomic_dec_and_test(&req->wb_count)) 165 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
167 return;
168 166
169 /* Release struct file or cached credential */ 167 /* Release struct file or cached credential */
170 nfs_clear_request(req); 168 nfs_clear_request(req);
@@ -172,6 +170,11 @@ nfs_release_request(struct nfs_page *req)
172 nfs_page_free(req); 170 nfs_page_free(req);
173} 171}
174 172
173void nfs_release_request(struct nfs_page *req)
174{
175 kref_put(&req->wb_kref, nfs_free_request);
176}
177
175static int nfs_wait_bit_interruptible(void *word) 178static int nfs_wait_bit_interruptible(void *word)
176{ 179{
177 int ret = 0; 180 int ret = 0;
@@ -193,7 +196,7 @@ static int nfs_wait_bit_interruptible(void *word)
193int 196int
194nfs_wait_on_request(struct nfs_page *req) 197nfs_wait_on_request(struct nfs_page *req)
195{ 198{
196 struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); 199 struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
197 sigset_t oldmask; 200 sigset_t oldmask;
198 int ret = 0; 201 int ret = 0;
199 202
@@ -379,20 +382,20 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
379/** 382/**
380 * nfs_scan_list - Scan a list for matching requests 383 * nfs_scan_list - Scan a list for matching requests
381 * @nfsi: NFS inode 384 * @nfsi: NFS inode
382 * @head: One of the NFS inode request lists
383 * @dst: Destination list 385 * @dst: Destination list
384 * @idx_start: lower bound of page->index to scan 386 * @idx_start: lower bound of page->index to scan
385 * @npages: idx_start + npages sets the upper bound to scan. 387 * @npages: idx_start + npages sets the upper bound to scan.
388 * @tag: tag to scan for
386 * 389 *
387 * Moves elements from one of the inode request lists. 390 * Moves elements from one of the inode request lists.
388 * If the number of requests is set to 0, the entire address_space 391 * If the number of requests is set to 0, the entire address_space
389 * starting at index idx_start, is scanned. 392 * starting at index idx_start, is scanned.
390 * The requests are *not* checked to ensure that they form a contiguous set. 393 * The requests are *not* checked to ensure that they form a contiguous set.
391 * You must be holding the inode's req_lock when calling this function 394 * You must be holding the inode's i_lock when calling this function
392 */ 395 */
393int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, 396int nfs_scan_list(struct nfs_inode *nfsi,
394 struct list_head *dst, pgoff_t idx_start, 397 struct list_head *dst, pgoff_t idx_start,
395 unsigned int npages) 398 unsigned int npages, int tag)
396{ 399{
397 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; 400 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
398 struct nfs_page *req; 401 struct nfs_page *req;
@@ -407,9 +410,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
407 idx_end = idx_start + npages - 1; 410 idx_end = idx_start + npages - 1;
408 411
409 for (;;) { 412 for (;;) {
410 found = radix_tree_gang_lookup(&nfsi->nfs_page_tree, 413 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
411 (void **)&pgvec[0], idx_start, 414 (void **)&pgvec[0], idx_start,
412 NFS_SCAN_MAXENTRIES); 415 NFS_SCAN_MAXENTRIES, tag);
413 if (found <= 0) 416 if (found <= 0)
414 break; 417 break;
415 for (i = 0; i < found; i++) { 418 for (i = 0; i < found; i++) {
@@ -417,15 +420,18 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
417 if (req->wb_index > idx_end) 420 if (req->wb_index > idx_end)
418 goto out; 421 goto out;
419 idx_start = req->wb_index + 1; 422 idx_start = req->wb_index + 1;
420 if (req->wb_list_head != head) 423 if (nfs_set_page_tag_locked(req)) {
421 continue;
422 if (nfs_set_page_writeback_locked(req)) {
423 nfs_list_remove_request(req); 424 nfs_list_remove_request(req);
425 radix_tree_tag_clear(&nfsi->nfs_page_tree,
426 req->wb_index, tag);
424 nfs_list_add_request(req, dst); 427 nfs_list_add_request(req, dst);
425 res++; 428 res++;
429 if (res == INT_MAX)
430 goto out;
426 } 431 }
427 } 432 }
428 433 /* for latency reduction */
434 cond_resched_lock(&nfsi->vfs_inode.i_lock);
429 } 435 }
430out: 436out:
431 return res; 437 return res;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7bd7cb95c03..6ae2e58ed05 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req)
145 unlock_page(req->wb_page); 145 unlock_page(req->wb_page);
146 146
147 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 147 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
148 req->wb_context->dentry->d_inode->i_sb->s_id, 148 req->wb_context->path.dentry->d_inode->i_sb->s_id,
149 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 149 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
150 req->wb_bytes, 150 req->wb_bytes,
151 (long long)req_offset(req)); 151 (long long)req_offset(req));
152 nfs_clear_request(req); 152 nfs_clear_request(req);
@@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
164 int flags; 164 int flags;
165 165
166 data->req = req; 166 data->req = req;
167 data->inode = inode = req->wb_context->dentry->d_inode; 167 data->inode = inode = req->wb_context->path.dentry->d_inode;
168 data->cred = req->wb_context->cred; 168 data->cred = req->wb_context->cred;
169 169
170 data->args.fh = NFS_FH(inode); 170 data->args.fh = NFS_FH(inode);
@@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page)
483 */ 483 */
484 error = nfs_wb_page(inode, page); 484 error = nfs_wb_page(inode, page);
485 if (error) 485 if (error)
486 goto out_error; 486 goto out_unlock;
487 if (PageUptodate(page))
488 goto out_unlock;
487 489
488 error = -ESTALE; 490 error = -ESTALE;
489 if (NFS_STALE(inode)) 491 if (NFS_STALE(inode))
490 goto out_error; 492 goto out_unlock;
491 493
492 if (file == NULL) { 494 if (file == NULL) {
493 error = -EBADF; 495 error = -EBADF;
494 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 496 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
495 if (ctx == NULL) 497 if (ctx == NULL)
496 goto out_error; 498 goto out_unlock;
497 } else 499 } else
498 ctx = get_nfs_open_context((struct nfs_open_context *) 500 ctx = get_nfs_open_context((struct nfs_open_context *)
499 file->private_data); 501 file->private_data);
@@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page)
502 504
503 put_nfs_open_context(ctx); 505 put_nfs_open_context(ctx);
504 return error; 506 return error;
505 507out_unlock:
506out_error:
507 unlock_page(page); 508 unlock_page(page);
508 return error; 509 return error;
509} 510}
@@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page)
520 struct inode *inode = page->mapping->host; 521 struct inode *inode = page->mapping->host;
521 struct nfs_page *new; 522 struct nfs_page *new;
522 unsigned int len; 523 unsigned int len;
524 int error;
525
526 error = nfs_wb_page(inode, page);
527 if (error)
528 goto out_unlock;
529 if (PageUptodate(page))
530 goto out_unlock;
523 531
524 nfs_wb_page(inode, page);
525 len = nfs_page_length(page); 532 len = nfs_page_length(page);
526 if (len == 0) 533 if (len == 0)
527 return nfs_return_empty_page(page); 534 return nfs_return_empty_page(page);
535
528 new = nfs_create_request(desc->ctx, inode, page, 0, len); 536 new = nfs_create_request(desc->ctx, inode, page, 0, len);
529 if (IS_ERR(new)) { 537 if (IS_ERR(new))
530 SetPageError(page); 538 goto out_error;
531 unlock_page(page); 539
532 return PTR_ERR(new);
533 }
534 if (len < PAGE_CACHE_SIZE) 540 if (len < PAGE_CACHE_SIZE)
535 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); 541 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
536 nfs_pageio_add_request(desc->pgio, new); 542 nfs_pageio_add_request(desc->pgio, new);
537 return 0; 543 return 0;
544out_error:
545 error = PTR_ERR(new);
546 SetPageError(page);
547out_unlock:
548 unlock_page(page);
549 return error;
538} 550}
539 551
540int nfs_readpages(struct file *filp, struct address_space *mapping, 552int nfs_readpages(struct file *filp, struct address_space *mapping,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ca20d3cc260..a2b1af89ca1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -45,6 +45,7 @@
45#include <linux/inet.h> 45#include <linux/inet.h>
46#include <linux/nfs_xdr.h> 46#include <linux/nfs_xdr.h>
47#include <linux/magic.h> 47#include <linux/magic.h>
48#include <linux/parser.h>
48 49
49#include <asm/system.h> 50#include <asm/system.h>
50#include <asm/uaccess.h> 51#include <asm/uaccess.h>
@@ -57,6 +58,167 @@
57 58
58#define NFSDBG_FACILITY NFSDBG_VFS 59#define NFSDBG_FACILITY NFSDBG_VFS
59 60
61
62struct nfs_parsed_mount_data {
63 int flags;
64 int rsize, wsize;
65 int timeo, retrans;
66 int acregmin, acregmax,
67 acdirmin, acdirmax;
68 int namlen;
69 unsigned int bsize;
70 unsigned int auth_flavor_len;
71 rpc_authflavor_t auth_flavors[1];
72 char *client_address;
73
74 struct {
75 struct sockaddr_in address;
76 unsigned int program;
77 unsigned int version;
78 unsigned short port;
79 int protocol;
80 } mount_server;
81
82 struct {
83 struct sockaddr_in address;
84 char *hostname;
85 char *export_path;
86 unsigned int program;
87 int protocol;
88 } nfs_server;
89};
90
91enum {
92 /* Mount options that take no arguments */
93 Opt_soft, Opt_hard,
94 Opt_intr, Opt_nointr,
95 Opt_posix, Opt_noposix,
96 Opt_cto, Opt_nocto,
97 Opt_ac, Opt_noac,
98 Opt_lock, Opt_nolock,
99 Opt_v2, Opt_v3,
100 Opt_udp, Opt_tcp,
101 Opt_acl, Opt_noacl,
102 Opt_rdirplus, Opt_nordirplus,
103 Opt_sharecache, Opt_nosharecache,
104
105 /* Mount options that take integer arguments */
106 Opt_port,
107 Opt_rsize, Opt_wsize, Opt_bsize,
108 Opt_timeo, Opt_retrans,
109 Opt_acregmin, Opt_acregmax,
110 Opt_acdirmin, Opt_acdirmax,
111 Opt_actimeo,
112 Opt_namelen,
113 Opt_mountport,
114 Opt_mountprog, Opt_mountvers,
115 Opt_nfsprog, Opt_nfsvers,
116
117 /* Mount options that take string arguments */
118 Opt_sec, Opt_proto, Opt_mountproto,
119 Opt_addr, Opt_mounthost, Opt_clientaddr,
120
121 /* Mount options that are ignored */
122 Opt_userspace, Opt_deprecated,
123
124 Opt_err
125};
126
127static match_table_t nfs_mount_option_tokens = {
128 { Opt_userspace, "bg" },
129 { Opt_userspace, "fg" },
130 { Opt_soft, "soft" },
131 { Opt_hard, "hard" },
132 { Opt_intr, "intr" },
133 { Opt_nointr, "nointr" },
134 { Opt_posix, "posix" },
135 { Opt_noposix, "noposix" },
136 { Opt_cto, "cto" },
137 { Opt_nocto, "nocto" },
138 { Opt_ac, "ac" },
139 { Opt_noac, "noac" },
140 { Opt_lock, "lock" },
141 { Opt_nolock, "nolock" },
142 { Opt_v2, "v2" },
143 { Opt_v3, "v3" },
144 { Opt_udp, "udp" },
145 { Opt_tcp, "tcp" },
146 { Opt_acl, "acl" },
147 { Opt_noacl, "noacl" },
148 { Opt_rdirplus, "rdirplus" },
149 { Opt_nordirplus, "nordirplus" },
150 { Opt_sharecache, "sharecache" },
151 { Opt_nosharecache, "nosharecache" },
152
153 { Opt_port, "port=%u" },
154 { Opt_rsize, "rsize=%u" },
155 { Opt_wsize, "wsize=%u" },
156 { Opt_bsize, "bsize=%u" },
157 { Opt_timeo, "timeo=%u" },
158 { Opt_retrans, "retrans=%u" },
159 { Opt_acregmin, "acregmin=%u" },
160 { Opt_acregmax, "acregmax=%u" },
161 { Opt_acdirmin, "acdirmin=%u" },
162 { Opt_acdirmax, "acdirmax=%u" },
163 { Opt_actimeo, "actimeo=%u" },
164 { Opt_userspace, "retry=%u" },
165 { Opt_namelen, "namlen=%u" },
166 { Opt_mountport, "mountport=%u" },
167 { Opt_mountprog, "mountprog=%u" },
168 { Opt_mountvers, "mountvers=%u" },
169 { Opt_nfsprog, "nfsprog=%u" },
170 { Opt_nfsvers, "nfsvers=%u" },
171 { Opt_nfsvers, "vers=%u" },
172
173 { Opt_sec, "sec=%s" },
174 { Opt_proto, "proto=%s" },
175 { Opt_mountproto, "mountproto=%s" },
176 { Opt_addr, "addr=%s" },
177 { Opt_clientaddr, "clientaddr=%s" },
178 { Opt_mounthost, "mounthost=%s" },
179
180 { Opt_err, NULL }
181};
182
183enum {
184 Opt_xprt_udp, Opt_xprt_tcp,
185
186 Opt_xprt_err
187};
188
189static match_table_t nfs_xprt_protocol_tokens = {
190 { Opt_xprt_udp, "udp" },
191 { Opt_xprt_tcp, "tcp" },
192
193 { Opt_xprt_err, NULL }
194};
195
196enum {
197 Opt_sec_none, Opt_sec_sys,
198 Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
199 Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
200 Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
201
202 Opt_sec_err
203};
204
205static match_table_t nfs_secflavor_tokens = {
206 { Opt_sec_none, "none" },
207 { Opt_sec_none, "null" },
208 { Opt_sec_sys, "sys" },
209
210 { Opt_sec_krb5, "krb5" },
211 { Opt_sec_krb5i, "krb5i" },
212 { Opt_sec_krb5p, "krb5p" },
213
214 { Opt_sec_lkey, "lkey" },
215 { Opt_sec_lkeyi, "lkeyi" },
216 { Opt_sec_lkeyp, "lkeyp" },
217
218 { Opt_sec_err, NULL }
219};
220
221
60static void nfs_umount_begin(struct vfsmount *, int); 222static void nfs_umount_begin(struct vfsmount *, int);
61static int nfs_statfs(struct dentry *, struct kstatfs *); 223static int nfs_statfs(struct dentry *, struct kstatfs *);
62static int nfs_show_options(struct seq_file *, struct vfsmount *); 224static int nfs_show_options(struct seq_file *, struct vfsmount *);
@@ -263,11 +425,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
263 { RPC_AUTH_GSS_SPKM, "spkm" }, 425 { RPC_AUTH_GSS_SPKM, "spkm" },
264 { RPC_AUTH_GSS_SPKMI, "spkmi" }, 426 { RPC_AUTH_GSS_SPKMI, "spkmi" },
265 { RPC_AUTH_GSS_SPKMP, "spkmp" }, 427 { RPC_AUTH_GSS_SPKMP, "spkmp" },
266 { -1, "unknown" } 428 { UINT_MAX, "unknown" }
267 }; 429 };
268 int i; 430 int i;
269 431
270 for (i=0; sec_flavours[i].flavour != -1; i++) { 432 for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) {
271 if (sec_flavours[i].flavour == flavour) 433 if (sec_flavours[i].flavour == flavour)
272 break; 434 break;
273 } 435 }
@@ -291,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
291 { NFS_MOUNT_NONLM, ",nolock", "" }, 453 { NFS_MOUNT_NONLM, ",nolock", "" },
292 { NFS_MOUNT_NOACL, ",noacl", "" }, 454 { NFS_MOUNT_NOACL, ",noacl", "" },
293 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, 455 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
456 { NFS_MOUNT_UNSHARED, ",nosharecache", ""},
294 { 0, NULL, NULL } 457 { 0, NULL, NULL }
295 }; 458 };
296 const struct proc_nfs_info *nfs_infop; 459 const struct proc_nfs_info *nfs_infop;
@@ -430,87 +593,641 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
430 */ 593 */
431static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) 594static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
432{ 595{
596 struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
597 struct rpc_clnt *rpc;
598
433 shrink_submounts(vfsmnt, &nfs_automount_list); 599 shrink_submounts(vfsmnt, &nfs_automount_list);
600
601 if (!(flags & MNT_FORCE))
602 return;
603 /* -EIO all pending I/O */
604 rpc = server->client_acl;
605 if (!IS_ERR(rpc))
606 rpc_killall_tasks(rpc);
607 rpc = server->client;
608 if (!IS_ERR(rpc))
609 rpc_killall_tasks(rpc);
434} 610}
435 611
436/* 612/*
437 * Validate the NFS2/NFS3 mount data 613 * Sanity-check a server address provided by the mount command
438 * - fills in the mount root filehandle
439 */ 614 */
440static int nfs_validate_mount_data(struct nfs_mount_data *data, 615static int nfs_verify_server_address(struct sockaddr *addr)
441 struct nfs_fh *mntfh)
442{ 616{
443 if (data == NULL) { 617 switch (addr->sa_family) {
444 dprintk("%s: missing data argument\n", __FUNCTION__); 618 case AF_INET: {
445 return -EINVAL; 619 struct sockaddr_in *sa = (struct sockaddr_in *) addr;
620 if (sa->sin_addr.s_addr != INADDR_ANY)
621 return 1;
622 break;
623 }
446 } 624 }
447 625
448 if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { 626 return 0;
449 dprintk("%s: bad mount version\n", __FUNCTION__); 627}
450 return -EINVAL; 628
629/*
630 * Error-check and convert a string of mount options from user space into
631 * a data structure
632 */
633static int nfs_parse_mount_options(char *raw,
634 struct nfs_parsed_mount_data *mnt)
635{
636 char *p, *string;
637
638 if (!raw) {
639 dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
640 return 1;
451 } 641 }
642 dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
452 643
453 switch (data->version) { 644 while ((p = strsep(&raw, ",")) != NULL) {
454 case 1: 645 substring_t args[MAX_OPT_ARGS];
455 data->namlen = 0; 646 int option, token;
456 case 2: 647
457 data->bsize = 0; 648 if (!*p)
458 case 3: 649 continue;
459 if (data->flags & NFS_MOUNT_VER3) { 650
460 dprintk("%s: mount structure version %d does not support NFSv3\n", 651 dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p);
461 __FUNCTION__, 652
462 data->version); 653 token = match_token(p, nfs_mount_option_tokens, args);
463 return -EINVAL; 654 switch (token) {
655 case Opt_soft:
656 mnt->flags |= NFS_MOUNT_SOFT;
657 break;
658 case Opt_hard:
659 mnt->flags &= ~NFS_MOUNT_SOFT;
660 break;
661 case Opt_intr:
662 mnt->flags |= NFS_MOUNT_INTR;
663 break;
664 case Opt_nointr:
665 mnt->flags &= ~NFS_MOUNT_INTR;
666 break;
667 case Opt_posix:
668 mnt->flags |= NFS_MOUNT_POSIX;
669 break;
670 case Opt_noposix:
671 mnt->flags &= ~NFS_MOUNT_POSIX;
672 break;
673 case Opt_cto:
674 mnt->flags &= ~NFS_MOUNT_NOCTO;
675 break;
676 case Opt_nocto:
677 mnt->flags |= NFS_MOUNT_NOCTO;
678 break;
679 case Opt_ac:
680 mnt->flags &= ~NFS_MOUNT_NOAC;
681 break;
682 case Opt_noac:
683 mnt->flags |= NFS_MOUNT_NOAC;
684 break;
685 case Opt_lock:
686 mnt->flags &= ~NFS_MOUNT_NONLM;
687 break;
688 case Opt_nolock:
689 mnt->flags |= NFS_MOUNT_NONLM;
690 break;
691 case Opt_v2:
692 mnt->flags &= ~NFS_MOUNT_VER3;
693 break;
694 case Opt_v3:
695 mnt->flags |= NFS_MOUNT_VER3;
696 break;
697 case Opt_udp:
698 mnt->flags &= ~NFS_MOUNT_TCP;
699 mnt->nfs_server.protocol = IPPROTO_UDP;
700 mnt->timeo = 7;
701 mnt->retrans = 5;
702 break;
703 case Opt_tcp:
704 mnt->flags |= NFS_MOUNT_TCP;
705 mnt->nfs_server.protocol = IPPROTO_TCP;
706 mnt->timeo = 600;
707 mnt->retrans = 2;
708 break;
709 case Opt_acl:
710 mnt->flags &= ~NFS_MOUNT_NOACL;
711 break;
712 case Opt_noacl:
713 mnt->flags |= NFS_MOUNT_NOACL;
714 break;
715 case Opt_rdirplus:
716 mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
717 break;
718 case Opt_nordirplus:
719 mnt->flags |= NFS_MOUNT_NORDIRPLUS;
720 break;
721 case Opt_sharecache:
722 mnt->flags &= ~NFS_MOUNT_UNSHARED;
723 break;
724 case Opt_nosharecache:
725 mnt->flags |= NFS_MOUNT_UNSHARED;
726 break;
727
728 case Opt_port:
729 if (match_int(args, &option))
730 return 0;
731 if (option < 0 || option > 65535)
732 return 0;
733 mnt->nfs_server.address.sin_port = htonl(option);
734 break;
735 case Opt_rsize:
736 if (match_int(args, &mnt->rsize))
737 return 0;
738 break;
739 case Opt_wsize:
740 if (match_int(args, &mnt->wsize))
741 return 0;
742 break;
743 case Opt_bsize:
744 if (match_int(args, &option))
745 return 0;
746 if (option < 0)
747 return 0;
748 mnt->bsize = option;
749 break;
750 case Opt_timeo:
751 if (match_int(args, &mnt->timeo))
752 return 0;
753 break;
754 case Opt_retrans:
755 if (match_int(args, &mnt->retrans))
756 return 0;
757 break;
758 case Opt_acregmin:
759 if (match_int(args, &mnt->acregmin))
760 return 0;
761 break;
762 case Opt_acregmax:
763 if (match_int(args, &mnt->acregmax))
764 return 0;
765 break;
766 case Opt_acdirmin:
767 if (match_int(args, &mnt->acdirmin))
768 return 0;
769 break;
770 case Opt_acdirmax:
771 if (match_int(args, &mnt->acdirmax))
772 return 0;
773 break;
774 case Opt_actimeo:
775 if (match_int(args, &option))
776 return 0;
777 if (option < 0)
778 return 0;
779 mnt->acregmin =
780 mnt->acregmax =
781 mnt->acdirmin =
782 mnt->acdirmax = option;
783 break;
784 case Opt_namelen:
785 if (match_int(args, &mnt->namlen))
786 return 0;
787 break;
788 case Opt_mountport:
789 if (match_int(args, &option))
790 return 0;
791 if (option < 0 || option > 65535)
792 return 0;
793 mnt->mount_server.port = option;
794 break;
795 case Opt_mountprog:
796 if (match_int(args, &option))
797 return 0;
798 if (option < 0)
799 return 0;
800 mnt->mount_server.program = option;
801 break;
802 case Opt_mountvers:
803 if (match_int(args, &option))
804 return 0;
805 if (option < 0)
806 return 0;
807 mnt->mount_server.version = option;
808 break;
809 case Opt_nfsprog:
810 if (match_int(args, &option))
811 return 0;
812 if (option < 0)
813 return 0;
814 mnt->nfs_server.program = option;
815 break;
816 case Opt_nfsvers:
817 if (match_int(args, &option))
818 return 0;
819 switch (option) {
820 case 2:
821 mnt->flags &= ~NFS_MOUNT_VER3;
822 break;
823 case 3:
824 mnt->flags |= NFS_MOUNT_VER3;
825 break;
826 default:
827 goto out_unrec_vers;
464 } 828 }
465 data->root.size = NFS2_FHSIZE; 829 break;
466 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); 830
467 case 4: 831 case Opt_sec:
468 if (data->flags & NFS_MOUNT_SECFLAVOUR) { 832 string = match_strdup(args);
469 dprintk("%s: mount structure version %d does not support strong security\n", 833 if (string == NULL)
470 __FUNCTION__, 834 goto out_nomem;
471 data->version); 835 token = match_token(string, nfs_secflavor_tokens, args);
472 return -EINVAL; 836 kfree(string);
837
838 /*
839 * The flags setting is for v2/v3. The flavor_len
840 * setting is for v4. v2/v3 also need to know the
841 * difference between NULL and UNIX.
842 */
843 switch (token) {
844 case Opt_sec_none:
845 mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
846 mnt->auth_flavor_len = 0;
847 mnt->auth_flavors[0] = RPC_AUTH_NULL;
848 break;
849 case Opt_sec_sys:
850 mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
851 mnt->auth_flavor_len = 0;
852 mnt->auth_flavors[0] = RPC_AUTH_UNIX;
853 break;
854 case Opt_sec_krb5:
855 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
856 mnt->auth_flavor_len = 1;
857 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
858 break;
859 case Opt_sec_krb5i:
860 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
861 mnt->auth_flavor_len = 1;
862 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
863 break;
864 case Opt_sec_krb5p:
865 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
866 mnt->auth_flavor_len = 1;
867 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
868 break;
869 case Opt_sec_lkey:
870 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
871 mnt->auth_flavor_len = 1;
872 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
873 break;
874 case Opt_sec_lkeyi:
875 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
876 mnt->auth_flavor_len = 1;
877 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
878 break;
879 case Opt_sec_lkeyp:
880 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
881 mnt->auth_flavor_len = 1;
882 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
883 break;
884 case Opt_sec_spkm:
885 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
886 mnt->auth_flavor_len = 1;
887 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
888 break;
889 case Opt_sec_spkmi:
890 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
891 mnt->auth_flavor_len = 1;
892 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
893 break;
894 case Opt_sec_spkmp:
895 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
896 mnt->auth_flavor_len = 1;
897 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
898 break;
899 default:
900 goto out_unrec_sec;
473 } 901 }
474 case 5: 902 break;
475 memset(data->context, 0, sizeof(data->context)); 903 case Opt_proto:
476 } 904 string = match_strdup(args);
905 if (string == NULL)
906 goto out_nomem;
907 token = match_token(string,
908 nfs_xprt_protocol_tokens, args);
909 kfree(string);
910
911 switch (token) {
912 case Opt_udp:
913 mnt->flags &= ~NFS_MOUNT_TCP;
914 mnt->nfs_server.protocol = IPPROTO_UDP;
915 mnt->timeo = 7;
916 mnt->retrans = 5;
917 break;
918 case Opt_tcp:
919 mnt->flags |= NFS_MOUNT_TCP;
920 mnt->nfs_server.protocol = IPPROTO_TCP;
921 mnt->timeo = 600;
922 mnt->retrans = 2;
923 break;
924 default:
925 goto out_unrec_xprt;
926 }
927 break;
928 case Opt_mountproto:
929 string = match_strdup(args);
930 if (string == NULL)
931 goto out_nomem;
932 token = match_token(string,
933 nfs_xprt_protocol_tokens, args);
934 kfree(string);
935
936 switch (token) {
937 case Opt_udp:
938 mnt->mount_server.protocol = IPPROTO_UDP;
939 break;
940 case Opt_tcp:
941 mnt->mount_server.protocol = IPPROTO_TCP;
942 break;
943 default:
944 goto out_unrec_xprt;
945 }
946 break;
947 case Opt_addr:
948 string = match_strdup(args);
949 if (string == NULL)
950 goto out_nomem;
951 mnt->nfs_server.address.sin_family = AF_INET;
952 mnt->nfs_server.address.sin_addr.s_addr =
953 in_aton(string);
954 kfree(string);
955 break;
956 case Opt_clientaddr:
957 string = match_strdup(args);
958 if (string == NULL)
959 goto out_nomem;
960 mnt->client_address = string;
961 break;
962 case Opt_mounthost:
963 string = match_strdup(args);
964 if (string == NULL)
965 goto out_nomem;
966 mnt->mount_server.address.sin_family = AF_INET;
967 mnt->mount_server.address.sin_addr.s_addr =
968 in_aton(string);
969 kfree(string);
970 break;
477 971
478 /* Set the pseudoflavor */ 972 case Opt_userspace:
479 if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) 973 case Opt_deprecated:
480 data->pseudoflavor = RPC_AUTH_UNIX; 974 break;
481 975
482#ifndef CONFIG_NFS_V3 976 default:
483 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ 977 goto out_unknown;
484 if (data->flags & NFS_MOUNT_VER3) { 978 }
485 dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
486 return -EPROTONOSUPPORT;
487 } 979 }
488#endif /* CONFIG_NFS_V3 */
489 980
490 /* We now require that the mount process passes the remote address */ 981 return 1;
491 if (data->addr.sin_addr.s_addr == INADDR_ANY) { 982
492 dprintk("%s: mount program didn't pass remote address!\n", 983out_nomem:
493 __FUNCTION__); 984 printk(KERN_INFO "NFS: not enough memory to parse option\n");
494 return -EINVAL; 985 return 0;
986
987out_unrec_vers:
988 printk(KERN_INFO "NFS: unrecognized NFS version number\n");
989 return 0;
990
991out_unrec_xprt:
992 printk(KERN_INFO "NFS: unrecognized transport protocol\n");
993 return 0;
994
995out_unrec_sec:
996 printk(KERN_INFO "NFS: unrecognized security flavor\n");
997 return 0;
998
999out_unknown:
1000 printk(KERN_INFO "NFS: unknown mount option: %s\n", p);
1001 return 0;
1002}
1003
1004/*
1005 * Use the remote server's MOUNT service to request the NFS file handle
1006 * corresponding to the provided path.
1007 */
1008static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1009 struct nfs_fh *root_fh)
1010{
1011 struct sockaddr_in sin;
1012 int status;
1013
1014 if (args->mount_server.version == 0) {
1015 if (args->flags & NFS_MOUNT_VER3)
1016 args->mount_server.version = NFS_MNT3_VERSION;
1017 else
1018 args->mount_server.version = NFS_MNT_VERSION;
495 } 1019 }
496 1020
497 /* Prepare the root filehandle */ 1021 /*
498 if (data->flags & NFS_MOUNT_VER3) 1022 * Construct the mount server's address.
499 mntfh->size = data->root.size; 1023 */
1024 if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
1025 sin = args->mount_server.address;
500 else 1026 else
501 mntfh->size = NFS2_FHSIZE; 1027 sin = args->nfs_server.address;
1028 if (args->mount_server.port == 0) {
1029 status = rpcb_getport_sync(&sin,
1030 args->mount_server.program,
1031 args->mount_server.version,
1032 args->mount_server.protocol);
1033 if (status < 0)
1034 goto out_err;
1035 sin.sin_port = htons(status);
1036 } else
1037 sin.sin_port = htons(args->mount_server.port);
1038
1039 /*
1040 * Now ask the mount server to map our export path
1041 * to a file handle.
1042 */
1043 status = nfs_mount((struct sockaddr *) &sin,
1044 sizeof(sin),
1045 args->nfs_server.hostname,
1046 args->nfs_server.export_path,
1047 args->mount_server.version,
1048 args->mount_server.protocol,
1049 root_fh);
1050 if (status < 0)
1051 goto out_err;
1052
1053 return status;
502 1054
503 if (mntfh->size > sizeof(mntfh->data)) { 1055out_err:
504 dprintk("%s: invalid root filehandle\n", __FUNCTION__); 1056 dfprintk(MOUNT, "NFS: unable to contact server on host "
505 return -EINVAL; 1057 NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
1058 return status;
1059}
1060
1061/*
1062 * Validate the NFS2/NFS3 mount data
1063 * - fills in the mount root filehandle
1064 *
1065 * For option strings, user space handles the following behaviors:
1066 *
1067 * + DNS: mapping server host name to IP address ("addr=" option)
1068 *
1069 * + failure mode: how to behave if a mount request can't be handled
1070 * immediately ("fg/bg" option)
1071 *
1072 * + retry: how often to retry a mount request ("retry=" option)
1073 *
1074 * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
1075 * mountproto=tcp after mountproto=udp, and so on
1076 *
1077 * XXX: as far as I can tell, changing the NFS program number is not
1078 * supported in the NFS client.
1079 */
1080static int nfs_validate_mount_data(struct nfs_mount_data **options,
1081 struct nfs_fh *mntfh,
1082 const char *dev_name)
1083{
1084 struct nfs_mount_data *data = *options;
1085
1086 if (data == NULL)
1087 goto out_no_data;
1088
1089 switch (data->version) {
1090 case 1:
1091 data->namlen = 0;
1092 case 2:
1093 data->bsize = 0;
1094 case 3:
1095 if (data->flags & NFS_MOUNT_VER3)
1096 goto out_no_v3;
1097 data->root.size = NFS2_FHSIZE;
1098 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
1099 case 4:
1100 if (data->flags & NFS_MOUNT_SECFLAVOUR)
1101 goto out_no_sec;
1102 case 5:
1103 memset(data->context, 0, sizeof(data->context));
1104 case 6:
1105 if (data->flags & NFS_MOUNT_VER3)
1106 mntfh->size = data->root.size;
1107 else
1108 mntfh->size = NFS2_FHSIZE;
1109
1110 if (mntfh->size > sizeof(mntfh->data))
1111 goto out_invalid_fh;
1112
1113 memcpy(mntfh->data, data->root.data, mntfh->size);
1114 if (mntfh->size < sizeof(mntfh->data))
1115 memset(mntfh->data + mntfh->size, 0,
1116 sizeof(mntfh->data) - mntfh->size);
1117 break;
1118 default: {
1119 unsigned int len;
1120 char *c;
1121 int status;
1122 struct nfs_parsed_mount_data args = {
1123 .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
1124 .rsize = NFS_MAX_FILE_IO_SIZE,
1125 .wsize = NFS_MAX_FILE_IO_SIZE,
1126 .timeo = 600,
1127 .retrans = 2,
1128 .acregmin = 3,
1129 .acregmax = 60,
1130 .acdirmin = 30,
1131 .acdirmax = 60,
1132 .mount_server.protocol = IPPROTO_UDP,
1133 .mount_server.program = NFS_MNT_PROGRAM,
1134 .nfs_server.protocol = IPPROTO_TCP,
1135 .nfs_server.program = NFS_PROGRAM,
1136 };
1137
1138 if (nfs_parse_mount_options((char *) *options, &args) == 0)
1139 return -EINVAL;
1140
1141 data = kzalloc(sizeof(*data), GFP_KERNEL);
1142 if (data == NULL)
1143 return -ENOMEM;
1144
1145 /*
1146 * NB: after this point, caller will free "data"
1147 * if we return an error
1148 */
1149 *options = data;
1150
1151 c = strchr(dev_name, ':');
1152 if (c == NULL)
1153 return -EINVAL;
1154 len = c - dev_name - 1;
1155 if (len > sizeof(data->hostname))
1156 return -EINVAL;
1157 strncpy(data->hostname, dev_name, len);
1158 args.nfs_server.hostname = data->hostname;
1159
1160 c++;
1161 if (strlen(c) > NFS_MAXPATHLEN)
1162 return -EINVAL;
1163 args.nfs_server.export_path = c;
1164
1165 status = nfs_try_mount(&args, mntfh);
1166 if (status)
1167 return -EINVAL;
1168
1169 /*
1170 * Translate to nfs_mount_data, which nfs_fill_super
1171 * can deal with.
1172 */
1173 data->version = 6;
1174 data->flags = args.flags;
1175 data->rsize = args.rsize;
1176 data->wsize = args.wsize;
1177 data->timeo = args.timeo;
1178 data->retrans = args.retrans;
1179 data->acregmin = args.acregmin;
1180 data->acregmax = args.acregmax;
1181 data->acdirmin = args.acdirmin;
1182 data->acdirmax = args.acdirmax;
1183 data->addr = args.nfs_server.address;
1184 data->namlen = args.namlen;
1185 data->bsize = args.bsize;
1186 data->pseudoflavor = args.auth_flavors[0];
1187
1188 break;
1189 }
506 } 1190 }
507 1191
508 memcpy(mntfh->data, data->root.data, mntfh->size); 1192 if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
509 if (mntfh->size < sizeof(mntfh->data)) 1193 data->pseudoflavor = RPC_AUTH_UNIX;
510 memset(mntfh->data + mntfh->size, 0, 1194
511 sizeof(mntfh->data) - mntfh->size); 1195#ifndef CONFIG_NFS_V3
1196 if (data->flags & NFS_MOUNT_VER3)
1197 goto out_v3_not_compiled;
1198#endif /* !CONFIG_NFS_V3 */
1199
1200 if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
1201 goto out_no_address;
512 1202
513 return 0; 1203 return 0;
1204
1205out_no_data:
1206 dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
1207 return -EINVAL;
1208
1209out_no_v3:
1210 dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
1211 data->version);
1212 return -EINVAL;
1213
1214out_no_sec:
1215 dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
1216 return -EINVAL;
1217
1218#ifndef CONFIG_NFS_V3
1219out_v3_not_compiled:
1220 dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n");
1221 return -EPROTONOSUPPORT;
1222#endif /* !CONFIG_NFS_V3 */
1223
1224out_no_address:
1225 dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
1226 return -EINVAL;
1227
1228out_invalid_fh:
1229 dfprintk(MOUNT, "NFS: invalid root filehandle\n");
1230 return -EINVAL;
514} 1231}
515 1232
516/* 1233/*
@@ -600,13 +1317,51 @@ static int nfs_compare_super(struct super_block *sb, void *data)
600{ 1317{
601 struct nfs_server *server = data, *old = NFS_SB(sb); 1318 struct nfs_server *server = data, *old = NFS_SB(sb);
602 1319
603 if (old->nfs_client != server->nfs_client) 1320 if (memcmp(&old->nfs_client->cl_addr,
1321 &server->nfs_client->cl_addr,
1322 sizeof(old->nfs_client->cl_addr)) != 0)
1323 return 0;
1324 /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
1325 if (old->flags & NFS_MOUNT_UNSHARED)
604 return 0; 1326 return 0;
605 if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) 1327 if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
606 return 0; 1328 return 0;
607 return 1; 1329 return 1;
608} 1330}
609 1331
1332#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
1333
1334static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
1335{
1336 const struct nfs_server *a = s->s_fs_info;
1337 const struct rpc_clnt *clnt_a = a->client;
1338 const struct rpc_clnt *clnt_b = b->client;
1339
1340 if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
1341 goto Ebusy;
1342 if (a->nfs_client != b->nfs_client)
1343 goto Ebusy;
1344 if (a->flags != b->flags)
1345 goto Ebusy;
1346 if (a->wsize != b->wsize)
1347 goto Ebusy;
1348 if (a->rsize != b->rsize)
1349 goto Ebusy;
1350 if (a->acregmin != b->acregmin)
1351 goto Ebusy;
1352 if (a->acregmax != b->acregmax)
1353 goto Ebusy;
1354 if (a->acdirmin != b->acdirmin)
1355 goto Ebusy;
1356 if (a->acdirmax != b->acdirmax)
1357 goto Ebusy;
1358 if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
1359 goto Ebusy;
1360 return 0;
1361Ebusy:
1362 return -EBUSY;
1363}
1364
610static int nfs_get_sb(struct file_system_type *fs_type, 1365static int nfs_get_sb(struct file_system_type *fs_type,
611 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1366 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
612{ 1367{
@@ -615,30 +1370,37 @@ static int nfs_get_sb(struct file_system_type *fs_type,
615 struct nfs_fh mntfh; 1370 struct nfs_fh mntfh;
616 struct nfs_mount_data *data = raw_data; 1371 struct nfs_mount_data *data = raw_data;
617 struct dentry *mntroot; 1372 struct dentry *mntroot;
1373 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
618 int error; 1374 int error;
619 1375
620 /* Validate the mount data */ 1376 /* Validate the mount data */
621 error = nfs_validate_mount_data(data, &mntfh); 1377 error = nfs_validate_mount_data(&data, &mntfh, dev_name);
622 if (error < 0) 1378 if (error < 0)
623 return error; 1379 goto out;
624 1380
625 /* Get a volume representation */ 1381 /* Get a volume representation */
626 server = nfs_create_server(data, &mntfh); 1382 server = nfs_create_server(data, &mntfh);
627 if (IS_ERR(server)) { 1383 if (IS_ERR(server)) {
628 error = PTR_ERR(server); 1384 error = PTR_ERR(server);
629 goto out_err_noserver; 1385 goto out;
630 } 1386 }
631 1387
1388 if (server->flags & NFS_MOUNT_UNSHARED)
1389 compare_super = NULL;
1390
632 /* Get a superblock - note that we may end up sharing one that already exists */ 1391 /* Get a superblock - note that we may end up sharing one that already exists */
633 s = sget(fs_type, nfs_compare_super, nfs_set_super, server); 1392 s = sget(fs_type, compare_super, nfs_set_super, server);
634 if (IS_ERR(s)) { 1393 if (IS_ERR(s)) {
635 error = PTR_ERR(s); 1394 error = PTR_ERR(s);
636 goto out_err_nosb; 1395 goto out_err_nosb;
637 } 1396 }
638 1397
639 if (s->s_fs_info != server) { 1398 if (s->s_fs_info != server) {
1399 error = nfs_compare_mount_options(s, server, flags);
640 nfs_free_server(server); 1400 nfs_free_server(server);
641 server = NULL; 1401 server = NULL;
1402 if (error < 0)
1403 goto error_splat_super;
642 } 1404 }
643 1405
644 if (!s->s_root) { 1406 if (!s->s_root) {
@@ -656,17 +1418,21 @@ static int nfs_get_sb(struct file_system_type *fs_type,
656 s->s_flags |= MS_ACTIVE; 1418 s->s_flags |= MS_ACTIVE;
657 mnt->mnt_sb = s; 1419 mnt->mnt_sb = s;
658 mnt->mnt_root = mntroot; 1420 mnt->mnt_root = mntroot;
659 return 0; 1421 error = 0;
1422
1423out:
1424 if (data != raw_data)
1425 kfree(data);
1426 return error;
660 1427
661out_err_nosb: 1428out_err_nosb:
662 nfs_free_server(server); 1429 nfs_free_server(server);
663out_err_noserver: 1430 goto out;
664 return error;
665 1431
666error_splat_super: 1432error_splat_super:
667 up_write(&s->s_umount); 1433 up_write(&s->s_umount);
668 deactivate_super(s); 1434 deactivate_super(s);
669 return error; 1435 goto out;
670} 1436}
671 1437
672/* 1438/*
@@ -691,6 +1457,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
691 struct super_block *s; 1457 struct super_block *s;
692 struct nfs_server *server; 1458 struct nfs_server *server;
693 struct dentry *mntroot; 1459 struct dentry *mntroot;
1460 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
694 int error; 1461 int error;
695 1462
696 dprintk("--> nfs_xdev_get_sb()\n"); 1463 dprintk("--> nfs_xdev_get_sb()\n");
@@ -702,16 +1469,22 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
702 goto out_err_noserver; 1469 goto out_err_noserver;
703 } 1470 }
704 1471
1472 if (server->flags & NFS_MOUNT_UNSHARED)
1473 compare_super = NULL;
1474
705 /* Get a superblock - note that we may end up sharing one that already exists */ 1475 /* Get a superblock - note that we may end up sharing one that already exists */
706 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); 1476 s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
707 if (IS_ERR(s)) { 1477 if (IS_ERR(s)) {
708 error = PTR_ERR(s); 1478 error = PTR_ERR(s);
709 goto out_err_nosb; 1479 goto out_err_nosb;
710 } 1480 }
711 1481
712 if (s->s_fs_info != server) { 1482 if (s->s_fs_info != server) {
1483 error = nfs_compare_mount_options(s, server, flags);
713 nfs_free_server(server); 1484 nfs_free_server(server);
714 server = NULL; 1485 server = NULL;
1486 if (error < 0)
1487 goto error_splat_super;
715 } 1488 }
716 1489
717 if (!s->s_root) { 1490 if (!s->s_root) {
@@ -772,25 +1545,164 @@ static void nfs4_fill_super(struct super_block *sb)
772 nfs_initialise_sb(sb); 1545 nfs_initialise_sb(sb);
773} 1546}
774 1547
775static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) 1548/*
1549 * Validate NFSv4 mount options
1550 */
1551static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
1552 const char *dev_name,
1553 struct sockaddr_in *addr,
1554 rpc_authflavor_t *authflavour,
1555 char **hostname,
1556 char **mntpath,
1557 char **ip_addr)
776{ 1558{
777 void *p = NULL; 1559 struct nfs4_mount_data *data = *options;
778 1560 char *c;
779 if (!src->len) 1561
780 return ERR_PTR(-EINVAL); 1562 if (data == NULL)
781 if (src->len < maxlen) 1563 goto out_no_data;
782 maxlen = src->len; 1564
783 if (dst == NULL) { 1565 switch (data->version) {
784 p = dst = kmalloc(maxlen + 1, GFP_KERNEL); 1566 case 1:
785 if (p == NULL) 1567 if (data->host_addrlen != sizeof(*addr))
786 return ERR_PTR(-ENOMEM); 1568 goto out_no_address;
787 } 1569 if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
788 if (copy_from_user(dst, src->data, maxlen)) { 1570 return -EFAULT;
789 kfree(p); 1571 if (addr->sin_port == 0)
790 return ERR_PTR(-EFAULT); 1572 addr->sin_port = htons(NFS_PORT);
1573 if (!nfs_verify_server_address((struct sockaddr *) addr))
1574 goto out_no_address;
1575
1576 switch (data->auth_flavourlen) {
1577 case 0:
1578 *authflavour = RPC_AUTH_UNIX;
1579 break;
1580 case 1:
1581 if (copy_from_user(authflavour, data->auth_flavours,
1582 sizeof(*authflavour)))
1583 return -EFAULT;
1584 break;
1585 default:
1586 goto out_inval_auth;
1587 }
1588
1589 c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
1590 if (IS_ERR(c))
1591 return PTR_ERR(c);
1592 *hostname = c;
1593
1594 c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
1595 if (IS_ERR(c))
1596 return PTR_ERR(c);
1597 *mntpath = c;
1598 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
1599
1600 c = strndup_user(data->client_addr.data, 16);
1601 if (IS_ERR(c))
1602 return PTR_ERR(c);
1603 *ip_addr = c;
1604
1605 break;
1606 default: {
1607 unsigned int len;
1608 struct nfs_parsed_mount_data args = {
1609 .rsize = NFS_MAX_FILE_IO_SIZE,
1610 .wsize = NFS_MAX_FILE_IO_SIZE,
1611 .timeo = 600,
1612 .retrans = 2,
1613 .acregmin = 3,
1614 .acregmax = 60,
1615 .acdirmin = 30,
1616 .acdirmax = 60,
1617 .nfs_server.protocol = IPPROTO_TCP,
1618 };
1619
1620 if (nfs_parse_mount_options((char *) *options, &args) == 0)
1621 return -EINVAL;
1622
1623 if (!nfs_verify_server_address((struct sockaddr *)
1624 &args.nfs_server.address))
1625 return -EINVAL;
1626 *addr = args.nfs_server.address;
1627
1628 switch (args.auth_flavor_len) {
1629 case 0:
1630 *authflavour = RPC_AUTH_UNIX;
1631 break;
1632 case 1:
1633 *authflavour = (rpc_authflavor_t) args.auth_flavors[0];
1634 break;
1635 default:
1636 goto out_inval_auth;
1637 }
1638
1639 /*
1640 * Translate to nfs4_mount_data, which nfs4_fill_super
1641 * can deal with.
1642 */
1643 data = kzalloc(sizeof(*data), GFP_KERNEL);
1644 if (data == NULL)
1645 return -ENOMEM;
1646 *options = data;
1647
1648 data->version = 1;
1649 data->flags = args.flags & NFS4_MOUNT_FLAGMASK;
1650 data->rsize = args.rsize;
1651 data->wsize = args.wsize;
1652 data->timeo = args.timeo;
1653 data->retrans = args.retrans;
1654 data->acregmin = args.acregmin;
1655 data->acregmax = args.acregmax;
1656 data->acdirmin = args.acdirmin;
1657 data->acdirmax = args.acdirmax;
1658 data->proto = args.nfs_server.protocol;
1659
1660 /*
1661 * Split "dev_name" into "hostname:mntpath".
1662 */
1663 c = strchr(dev_name, ':');
1664 if (c == NULL)
1665 return -EINVAL;
1666 /* while calculating len, pretend ':' is '\0' */
1667 len = c - dev_name;
1668 if (len > NFS4_MAXNAMLEN)
1669 return -EINVAL;
1670 *hostname = kzalloc(len, GFP_KERNEL);
1671 if (*hostname == NULL)
1672 return -ENOMEM;
1673 strncpy(*hostname, dev_name, len - 1);
1674
1675 c++; /* step over the ':' */
1676 len = strlen(c);
1677 if (len > NFS4_MAXPATHLEN)
1678 return -EINVAL;
1679 *mntpath = kzalloc(len + 1, GFP_KERNEL);
1680 if (*mntpath == NULL)
1681 return -ENOMEM;
1682 strncpy(*mntpath, c, len);
1683
1684 dprintk("MNTPATH: %s\n", *mntpath);
1685
1686 *ip_addr = args.client_address;
1687
1688 break;
1689 }
791 } 1690 }
792 dst[maxlen] = '\0'; 1691
793 return dst; 1692 return 0;
1693
1694out_no_data:
1695 dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
1696 return -EINVAL;
1697
1698out_inval_auth:
1699 dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
1700 data->auth_flavourlen);
1701 return -EINVAL;
1702
1703out_no_address:
1704 dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
1705 return -EINVAL;
794} 1706}
795 1707
796/* 1708/*
@@ -806,81 +1718,29 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
806 rpc_authflavor_t authflavour; 1718 rpc_authflavor_t authflavour;
807 struct nfs_fh mntfh; 1719 struct nfs_fh mntfh;
808 struct dentry *mntroot; 1720 struct dentry *mntroot;
809 char *mntpath = NULL, *hostname = NULL, ip_addr[16]; 1721 char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
810 void *p; 1722 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
811 int error; 1723 int error;
812 1724
813 if (data == NULL) { 1725 /* Validate the mount data */
814 dprintk("%s: missing data argument\n", __FUNCTION__); 1726 error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
815 return -EINVAL; 1727 &hostname, &mntpath, &ip_addr);
816 } 1728 if (error < 0)
817 if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { 1729 goto out;
818 dprintk("%s: bad mount version\n", __FUNCTION__);
819 return -EINVAL;
820 }
821
822 /* We now require that the mount process passes the remote address */
823 if (data->host_addrlen != sizeof(addr))
824 return -EINVAL;
825
826 if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
827 return -EFAULT;
828
829 if (addr.sin_family != AF_INET ||
830 addr.sin_addr.s_addr == INADDR_ANY
831 ) {
832 dprintk("%s: mount program didn't pass remote IP address!\n",
833 __FUNCTION__);
834 return -EINVAL;
835 }
836 /* RFC3530: The default port for NFS is 2049 */
837 if (addr.sin_port == 0)
838 addr.sin_port = htons(NFS_PORT);
839
840 /* Grab the authentication type */
841 authflavour = RPC_AUTH_UNIX;
842 if (data->auth_flavourlen != 0) {
843 if (data->auth_flavourlen != 1) {
844 dprintk("%s: Invalid number of RPC auth flavours %d.\n",
845 __FUNCTION__, data->auth_flavourlen);
846 error = -EINVAL;
847 goto out_err_noserver;
848 }
849
850 if (copy_from_user(&authflavour, data->auth_flavours,
851 sizeof(authflavour))) {
852 error = -EFAULT;
853 goto out_err_noserver;
854 }
855 }
856
857 p = nfs_copy_user_string(NULL, &data->hostname, 256);
858 if (IS_ERR(p))
859 goto out_err;
860 hostname = p;
861
862 p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
863 if (IS_ERR(p))
864 goto out_err;
865 mntpath = p;
866
867 dprintk("MNTPATH: %s\n", mntpath);
868
869 p = nfs_copy_user_string(ip_addr, &data->client_addr,
870 sizeof(ip_addr) - 1);
871 if (IS_ERR(p))
872 goto out_err;
873 1730
874 /* Get a volume representation */ 1731 /* Get a volume representation */
875 server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, 1732 server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
876 authflavour, &mntfh); 1733 authflavour, &mntfh);
877 if (IS_ERR(server)) { 1734 if (IS_ERR(server)) {
878 error = PTR_ERR(server); 1735 error = PTR_ERR(server);
879 goto out_err_noserver; 1736 goto out;
880 } 1737 }
881 1738
1739 if (server->flags & NFS4_MOUNT_UNSHARED)
1740 compare_super = NULL;
1741
882 /* Get a superblock - note that we may end up sharing one that already exists */ 1742 /* Get a superblock - note that we may end up sharing one that already exists */
883 s = sget(fs_type, nfs_compare_super, nfs_set_super, server); 1743 s = sget(fs_type, compare_super, nfs_set_super, server);
884 if (IS_ERR(s)) { 1744 if (IS_ERR(s)) {
885 error = PTR_ERR(s); 1745 error = PTR_ERR(s);
886 goto out_free; 1746 goto out_free;
@@ -906,25 +1766,22 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
906 s->s_flags |= MS_ACTIVE; 1766 s->s_flags |= MS_ACTIVE;
907 mnt->mnt_sb = s; 1767 mnt->mnt_sb = s;
908 mnt->mnt_root = mntroot; 1768 mnt->mnt_root = mntroot;
1769 error = 0;
1770
1771out:
1772 kfree(ip_addr);
909 kfree(mntpath); 1773 kfree(mntpath);
910 kfree(hostname); 1774 kfree(hostname);
911 return 0; 1775 return error;
912
913out_err:
914 error = PTR_ERR(p);
915 goto out_err_noserver;
916 1776
917out_free: 1777out_free:
918 nfs_free_server(server); 1778 nfs_free_server(server);
919out_err_noserver: 1779 goto out;
920 kfree(mntpath);
921 kfree(hostname);
922 return error;
923 1780
924error_splat_super: 1781error_splat_super:
925 up_write(&s->s_umount); 1782 up_write(&s->s_umount);
926 deactivate_super(s); 1783 deactivate_super(s);
927 goto out_err_noserver; 1784 goto out;
928} 1785}
929 1786
930static void nfs4_kill_super(struct super_block *sb) 1787static void nfs4_kill_super(struct super_block *sb)
@@ -949,6 +1806,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
949 struct super_block *s; 1806 struct super_block *s;
950 struct nfs_server *server; 1807 struct nfs_server *server;
951 struct dentry *mntroot; 1808 struct dentry *mntroot;
1809 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
952 int error; 1810 int error;
953 1811
954 dprintk("--> nfs4_xdev_get_sb()\n"); 1812 dprintk("--> nfs4_xdev_get_sb()\n");
@@ -960,8 +1818,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
960 goto out_err_noserver; 1818 goto out_err_noserver;
961 } 1819 }
962 1820
1821 if (server->flags & NFS4_MOUNT_UNSHARED)
1822 compare_super = NULL;
1823
963 /* Get a superblock - note that we may end up sharing one that already exists */ 1824 /* Get a superblock - note that we may end up sharing one that already exists */
964 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); 1825 s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
965 if (IS_ERR(s)) { 1826 if (IS_ERR(s)) {
966 error = PTR_ERR(s); 1827 error = PTR_ERR(s);
967 goto out_err_nosb; 1828 goto out_err_nosb;
@@ -1016,6 +1877,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
1016 struct nfs_server *server; 1877 struct nfs_server *server;
1017 struct dentry *mntroot; 1878 struct dentry *mntroot;
1018 struct nfs_fh mntfh; 1879 struct nfs_fh mntfh;
1880 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
1019 int error; 1881 int error;
1020 1882
1021 dprintk("--> nfs4_referral_get_sb()\n"); 1883 dprintk("--> nfs4_referral_get_sb()\n");
@@ -1027,8 +1889,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
1027 goto out_err_noserver; 1889 goto out_err_noserver;
1028 } 1890 }
1029 1891
1892 if (server->flags & NFS4_MOUNT_UNSHARED)
1893 compare_super = NULL;
1894
1030 /* Get a superblock - note that we may end up sharing one that already exists */ 1895 /* Get a superblock - note that we may end up sharing one that already exists */
1031 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); 1896 s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
1032 if (IS_ERR(s)) { 1897 if (IS_ERR(s)) {
1033 error = PTR_ERR(s); 1898 error = PTR_ERR(s);
1034 goto out_err_nosb; 1899 goto out_err_nosb;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af344a158e0..73ac992ece8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -117,19 +117,19 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page)
117 if (PagePrivate(page)) { 117 if (PagePrivate(page)) {
118 req = (struct nfs_page *)page_private(page); 118 req = (struct nfs_page *)page_private(page);
119 if (req != NULL) 119 if (req != NULL)
120 atomic_inc(&req->wb_count); 120 kref_get(&req->wb_kref);
121 } 121 }
122 return req; 122 return req;
123} 123}
124 124
125static struct nfs_page *nfs_page_find_request(struct page *page) 125static struct nfs_page *nfs_page_find_request(struct page *page)
126{ 126{
127 struct inode *inode = page->mapping->host;
127 struct nfs_page *req = NULL; 128 struct nfs_page *req = NULL;
128 spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
129 129
130 spin_lock(req_lock); 130 spin_lock(&inode->i_lock);
131 req = nfs_page_find_request_locked(page); 131 req = nfs_page_find_request_locked(page);
132 spin_unlock(req_lock); 132 spin_unlock(&inode->i_lock);
133 return req; 133 return req;
134} 134}
135 135
@@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
191 } 191 }
192 /* Update file length */ 192 /* Update file length */
193 nfs_grow_file(page, offset, count); 193 nfs_grow_file(page, offset, count);
194 /* Set the PG_uptodate flag? */
195 nfs_mark_uptodate(page, offset, count);
196 nfs_unlock_request(req); 194 nfs_unlock_request(req);
197 return 0; 195 return 0;
198} 196}
@@ -253,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page)
253static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 251static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
254 struct page *page) 252 struct page *page)
255{ 253{
254 struct inode *inode = page->mapping->host;
255 struct nfs_inode *nfsi = NFS_I(inode);
256 struct nfs_page *req; 256 struct nfs_page *req;
257 struct nfs_inode *nfsi = NFS_I(page->mapping->host);
258 spinlock_t *req_lock = &nfsi->req_lock;
259 int ret; 257 int ret;
260 258
261 spin_lock(req_lock); 259 spin_lock(&inode->i_lock);
262 for(;;) { 260 for(;;) {
263 req = nfs_page_find_request_locked(page); 261 req = nfs_page_find_request_locked(page);
264 if (req == NULL) { 262 if (req == NULL) {
265 spin_unlock(req_lock); 263 spin_unlock(&inode->i_lock);
266 return 1; 264 return 1;
267 } 265 }
268 if (nfs_lock_request_dontget(req)) 266 if (nfs_lock_request_dontget(req))
@@ -272,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
272 * succeed provided that someone hasn't already marked the 270 * succeed provided that someone hasn't already marked the
273 * request as dirty (in which case we don't care). 271 * request as dirty (in which case we don't care).
274 */ 272 */
275 spin_unlock(req_lock); 273 spin_unlock(&inode->i_lock);
276 ret = nfs_wait_on_request(req); 274 ret = nfs_wait_on_request(req);
277 nfs_release_request(req); 275 nfs_release_request(req);
278 if (ret != 0) 276 if (ret != 0)
279 return ret; 277 return ret;
280 spin_lock(req_lock); 278 spin_lock(&inode->i_lock);
281 } 279 }
282 if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { 280 if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
283 /* This request is marked for commit */ 281 /* This request is marked for commit */
284 spin_unlock(req_lock); 282 spin_unlock(&inode->i_lock);
285 nfs_unlock_request(req); 283 nfs_unlock_request(req);
286 nfs_pageio_complete(pgio); 284 nfs_pageio_complete(pgio);
287 return 1; 285 return 1;
288 } 286 }
289 if (nfs_set_page_writeback(page) != 0) { 287 if (nfs_set_page_writeback(page) != 0) {
290 spin_unlock(req_lock); 288 spin_unlock(&inode->i_lock);
291 BUG(); 289 BUG();
292 } 290 }
293 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, 291 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
294 NFS_PAGE_TAG_WRITEBACK); 292 NFS_PAGE_TAG_LOCKED);
295 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); 293 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
296 spin_unlock(req_lock); 294 spin_unlock(&inode->i_lock);
297 nfs_pageio_add_request(pgio, req); 295 nfs_pageio_add_request(pgio, req);
298 return ret; 296 return ret;
299} 297}
@@ -400,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
400 if (PageDirty(req->wb_page)) 398 if (PageDirty(req->wb_page))
401 set_bit(PG_NEED_FLUSH, &req->wb_flags); 399 set_bit(PG_NEED_FLUSH, &req->wb_flags);
402 nfsi->npages++; 400 nfsi->npages++;
403 atomic_inc(&req->wb_count); 401 kref_get(&req->wb_kref);
404 return 0; 402 return 0;
405} 403}
406 404
@@ -409,12 +407,12 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
409 */ 407 */
410static void nfs_inode_remove_request(struct nfs_page *req) 408static void nfs_inode_remove_request(struct nfs_page *req)
411{ 409{
412 struct inode *inode = req->wb_context->dentry->d_inode; 410 struct inode *inode = req->wb_context->path.dentry->d_inode;
413 struct nfs_inode *nfsi = NFS_I(inode); 411 struct nfs_inode *nfsi = NFS_I(inode);
414 412
415 BUG_ON (!NFS_WBACK_BUSY(req)); 413 BUG_ON (!NFS_WBACK_BUSY(req));
416 414
417 spin_lock(&nfsi->req_lock); 415 spin_lock(&inode->i_lock);
418 set_page_private(req->wb_page, 0); 416 set_page_private(req->wb_page, 0);
419 ClearPagePrivate(req->wb_page); 417 ClearPagePrivate(req->wb_page);
420 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 418 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
@@ -422,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
422 __set_page_dirty_nobuffers(req->wb_page); 420 __set_page_dirty_nobuffers(req->wb_page);
423 nfsi->npages--; 421 nfsi->npages--;
424 if (!nfsi->npages) { 422 if (!nfsi->npages) {
425 spin_unlock(&nfsi->req_lock); 423 spin_unlock(&inode->i_lock);
426 nfs_end_data_update(inode); 424 nfs_end_data_update(inode);
427 iput(inode); 425 iput(inode);
428 } else 426 } else
429 spin_unlock(&nfsi->req_lock); 427 spin_unlock(&inode->i_lock);
430 nfs_clear_request(req); 428 nfs_clear_request(req);
431 nfs_release_request(req); 429 nfs_release_request(req);
432} 430}
@@ -457,14 +455,16 @@ nfs_dirty_request(struct nfs_page *req)
457static void 455static void
458nfs_mark_request_commit(struct nfs_page *req) 456nfs_mark_request_commit(struct nfs_page *req)
459{ 457{
460 struct inode *inode = req->wb_context->dentry->d_inode; 458 struct inode *inode = req->wb_context->path.dentry->d_inode;
461 struct nfs_inode *nfsi = NFS_I(inode); 459 struct nfs_inode *nfsi = NFS_I(inode);
462 460
463 spin_lock(&nfsi->req_lock); 461 spin_lock(&inode->i_lock);
464 nfs_list_add_request(req, &nfsi->commit);
465 nfsi->ncommit++; 462 nfsi->ncommit++;
466 set_bit(PG_NEED_COMMIT, &(req)->wb_flags); 463 set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
467 spin_unlock(&nfsi->req_lock); 464 radix_tree_tag_set(&nfsi->nfs_page_tree,
465 req->wb_index,
466 NFS_PAGE_TAG_COMMIT);
467 spin_unlock(&inode->i_lock);
468 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 468 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
469 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 469 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
470} 470}
@@ -526,18 +526,18 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
526 idx_end = idx_start + npages - 1; 526 idx_end = idx_start + npages - 1;
527 527
528 next = idx_start; 528 next = idx_start;
529 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { 529 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
530 if (req->wb_index > idx_end) 530 if (req->wb_index > idx_end)
531 break; 531 break;
532 532
533 next = req->wb_index + 1; 533 next = req->wb_index + 1;
534 BUG_ON(!NFS_WBACK_BUSY(req)); 534 BUG_ON(!NFS_WBACK_BUSY(req));
535 535
536 atomic_inc(&req->wb_count); 536 kref_get(&req->wb_kref);
537 spin_unlock(&nfsi->req_lock); 537 spin_unlock(&inode->i_lock);
538 error = nfs_wait_on_request(req); 538 error = nfs_wait_on_request(req);
539 nfs_release_request(req); 539 nfs_release_request(req);
540 spin_lock(&nfsi->req_lock); 540 spin_lock(&inode->i_lock);
541 if (error < 0) 541 if (error < 0)
542 return error; 542 return error;
543 res++; 543 res++;
@@ -577,10 +577,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
577 int res = 0; 577 int res = 0;
578 578
579 if (nfsi->ncommit != 0) { 579 if (nfsi->ncommit != 0) {
580 res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); 580 res = nfs_scan_list(nfsi, dst, idx_start, npages,
581 NFS_PAGE_TAG_COMMIT);
581 nfsi->ncommit -= res; 582 nfsi->ncommit -= res;
582 if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
583 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
584 } 583 }
585 return res; 584 return res;
586} 585}
@@ -603,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
603{ 602{
604 struct address_space *mapping = page->mapping; 603 struct address_space *mapping = page->mapping;
605 struct inode *inode = mapping->host; 604 struct inode *inode = mapping->host;
606 struct nfs_inode *nfsi = NFS_I(inode);
607 struct nfs_page *req, *new = NULL; 605 struct nfs_page *req, *new = NULL;
608 pgoff_t rqend, end; 606 pgoff_t rqend, end;
609 607
@@ -613,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
613 /* Loop over all inode entries and see if we find 611 /* Loop over all inode entries and see if we find
614 * A request for the page we wish to update 612 * A request for the page we wish to update
615 */ 613 */
616 spin_lock(&nfsi->req_lock); 614 spin_lock(&inode->i_lock);
617 req = nfs_page_find_request_locked(page); 615 req = nfs_page_find_request_locked(page);
618 if (req) { 616 if (req) {
619 if (!nfs_lock_request_dontget(req)) { 617 if (!nfs_lock_request_dontget(req)) {
620 int error; 618 int error;
621 619
622 spin_unlock(&nfsi->req_lock); 620 spin_unlock(&inode->i_lock);
623 error = nfs_wait_on_request(req); 621 error = nfs_wait_on_request(req);
624 nfs_release_request(req); 622 nfs_release_request(req);
625 if (error < 0) { 623 if (error < 0) {
@@ -629,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
629 } 627 }
630 continue; 628 continue;
631 } 629 }
632 spin_unlock(&nfsi->req_lock); 630 spin_unlock(&inode->i_lock);
633 if (new) 631 if (new)
634 nfs_release_request(new); 632 nfs_release_request(new);
635 break; 633 break;
@@ -640,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
640 nfs_lock_request_dontget(new); 638 nfs_lock_request_dontget(new);
641 error = nfs_inode_add_request(inode, new); 639 error = nfs_inode_add_request(inode, new);
642 if (error) { 640 if (error) {
643 spin_unlock(&nfsi->req_lock); 641 spin_unlock(&inode->i_lock);
644 nfs_unlock_request(new); 642 nfs_unlock_request(new);
645 return ERR_PTR(error); 643 return ERR_PTR(error);
646 } 644 }
647 spin_unlock(&nfsi->req_lock); 645 spin_unlock(&inode->i_lock);
648 return new; 646 return new;
649 } 647 }
650 spin_unlock(&nfsi->req_lock); 648 spin_unlock(&inode->i_lock);
651 649
652 new = nfs_create_request(ctx, inode, page, offset, bytes); 650 new = nfs_create_request(ctx, inode, page, offset, bytes);
653 if (IS_ERR(new)) 651 if (IS_ERR(new))
@@ -751,12 +749,17 @@ int nfs_updatepage(struct file *file, struct page *page,
751static void nfs_writepage_release(struct nfs_page *req) 749static void nfs_writepage_release(struct nfs_page *req)
752{ 750{
753 751
754 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { 752 if (PageError(req->wb_page)) {
753 nfs_end_page_writeback(req->wb_page);
754 nfs_inode_remove_request(req);
755 } else if (!nfs_reschedule_unstable_write(req)) {
756 /* Set the PG_uptodate flag */
757 nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
755 nfs_end_page_writeback(req->wb_page); 758 nfs_end_page_writeback(req->wb_page);
756 nfs_inode_remove_request(req); 759 nfs_inode_remove_request(req);
757 } else 760 } else
758 nfs_end_page_writeback(req->wb_page); 761 nfs_end_page_writeback(req->wb_page);
759 nfs_clear_page_writeback(req); 762 nfs_clear_page_tag_locked(req);
760} 763}
761 764
762static inline int flush_task_priority(int how) 765static inline int flush_task_priority(int how)
@@ -786,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
786 * NB: take care not to mess about with data->commit et al. */ 789 * NB: take care not to mess about with data->commit et al. */
787 790
788 data->req = req; 791 data->req = req;
789 data->inode = inode = req->wb_context->dentry->d_inode; 792 data->inode = inode = req->wb_context->path.dentry->d_inode;
790 data->cred = req->wb_context->cred; 793 data->cred = req->wb_context->cred;
791 794
792 data->args.fh = NFS_FH(inode); 795 data->args.fh = NFS_FH(inode);
@@ -885,7 +888,7 @@ out_bad:
885 } 888 }
886 nfs_redirty_request(req); 889 nfs_redirty_request(req);
887 nfs_end_page_writeback(req->wb_page); 890 nfs_end_page_writeback(req->wb_page);
888 nfs_clear_page_writeback(req); 891 nfs_clear_page_tag_locked(req);
889 return -ENOMEM; 892 return -ENOMEM;
890} 893}
891 894
@@ -928,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
928 nfs_list_remove_request(req); 931 nfs_list_remove_request(req);
929 nfs_redirty_request(req); 932 nfs_redirty_request(req);
930 nfs_end_page_writeback(req->wb_page); 933 nfs_end_page_writeback(req->wb_page);
931 nfs_clear_page_writeback(req); 934 nfs_clear_page_tag_locked(req);
932 } 935 }
933 return -ENOMEM; 936 return -ENOMEM;
934} 937}
@@ -954,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
954 struct page *page = req->wb_page; 957 struct page *page = req->wb_page;
955 958
956 dprintk("NFS: write (%s/%Ld %d@%Ld)", 959 dprintk("NFS: write (%s/%Ld %d@%Ld)",
957 req->wb_context->dentry->d_inode->i_sb->s_id, 960 req->wb_context->path.dentry->d_inode->i_sb->s_id,
958 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 961 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
959 req->wb_bytes, 962 req->wb_bytes,
960 (long long)req_offset(req)); 963 (long long)req_offset(req));
961 964
@@ -970,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
970 } 973 }
971 974
972 if (nfs_write_need_commit(data)) { 975 if (nfs_write_need_commit(data)) {
973 spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; 976 struct inode *inode = page->mapping->host;
974 977
975 spin_lock(req_lock); 978 spin_lock(&inode->i_lock);
976 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { 979 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
977 /* Do nothing we need to resend the writes */ 980 /* Do nothing we need to resend the writes */
978 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { 981 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
@@ -983,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
983 clear_bit(PG_NEED_COMMIT, &req->wb_flags); 986 clear_bit(PG_NEED_COMMIT, &req->wb_flags);
984 dprintk(" server reboot detected\n"); 987 dprintk(" server reboot detected\n");
985 } 988 }
986 spin_unlock(req_lock); 989 spin_unlock(&inode->i_lock);
987 } else 990 } else
988 dprintk(" OK\n"); 991 dprintk(" OK\n");
989 992
@@ -1020,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1020 page = req->wb_page; 1023 page = req->wb_page;
1021 1024
1022 dprintk("NFS: write (%s/%Ld %d@%Ld)", 1025 dprintk("NFS: write (%s/%Ld %d@%Ld)",
1023 req->wb_context->dentry->d_inode->i_sb->s_id, 1026 req->wb_context->path.dentry->d_inode->i_sb->s_id,
1024 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 1027 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
1025 req->wb_bytes, 1028 req->wb_bytes,
1026 (long long)req_offset(req)); 1029 (long long)req_offset(req));
1027 1030
@@ -1039,12 +1042,14 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1039 dprintk(" marked for commit\n"); 1042 dprintk(" marked for commit\n");
1040 goto next; 1043 goto next;
1041 } 1044 }
1045 /* Set the PG_uptodate flag? */
1046 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
1042 dprintk(" OK\n"); 1047 dprintk(" OK\n");
1043remove_request: 1048remove_request:
1044 nfs_end_page_writeback(page); 1049 nfs_end_page_writeback(page);
1045 nfs_inode_remove_request(req); 1050 nfs_inode_remove_request(req);
1046 next: 1051 next:
1047 nfs_clear_page_writeback(req); 1052 nfs_clear_page_tag_locked(req);
1048 } 1053 }
1049} 1054}
1050 1055
@@ -1157,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
1157 1162
1158 list_splice_init(head, &data->pages); 1163 list_splice_init(head, &data->pages);
1159 first = nfs_list_entry(data->pages.next); 1164 first = nfs_list_entry(data->pages.next);
1160 inode = first->wb_context->dentry->d_inode; 1165 inode = first->wb_context->path.dentry->d_inode;
1161 1166
1162 data->inode = inode; 1167 data->inode = inode;
1163 data->cred = first->wb_context->cred; 1168 data->cred = first->wb_context->cred;
@@ -1207,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1207 nfs_list_remove_request(req); 1212 nfs_list_remove_request(req);
1208 nfs_mark_request_commit(req); 1213 nfs_mark_request_commit(req);
1209 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1214 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1210 nfs_clear_page_writeback(req); 1215 nfs_clear_page_tag_locked(req);
1211 } 1216 }
1212 return -ENOMEM; 1217 return -ENOMEM;
1213} 1218}
@@ -1234,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1234 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1239 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1235 1240
1236 dprintk("NFS: commit (%s/%Ld %d@%Ld)", 1241 dprintk("NFS: commit (%s/%Ld %d@%Ld)",
1237 req->wb_context->dentry->d_inode->i_sb->s_id, 1242 req->wb_context->path.dentry->d_inode->i_sb->s_id,
1238 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 1243 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
1239 req->wb_bytes, 1244 req->wb_bytes,
1240 (long long)req_offset(req)); 1245 (long long)req_offset(req));
1241 if (task->tk_status < 0) { 1246 if (task->tk_status < 0) {
@@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1249 * returned by the server against all stored verfs. */ 1254 * returned by the server against all stored verfs. */
1250 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { 1255 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1251 /* We have a match */ 1256 /* We have a match */
1257 /* Set the PG_uptodate flag */
1258 nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
1259 req->wb_bytes);
1252 nfs_inode_remove_request(req); 1260 nfs_inode_remove_request(req);
1253 dprintk(" OK\n"); 1261 dprintk(" OK\n");
1254 goto next; 1262 goto next;
@@ -1257,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1257 dprintk(" mismatch\n"); 1265 dprintk(" mismatch\n");
1258 nfs_redirty_request(req); 1266 nfs_redirty_request(req);
1259 next: 1267 next:
1260 nfs_clear_page_writeback(req); 1268 nfs_clear_page_tag_locked(req);
1261 } 1269 }
1262} 1270}
1263 1271
@@ -1268,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = {
1268 1276
1269int nfs_commit_inode(struct inode *inode, int how) 1277int nfs_commit_inode(struct inode *inode, int how)
1270{ 1278{
1271 struct nfs_inode *nfsi = NFS_I(inode);
1272 LIST_HEAD(head); 1279 LIST_HEAD(head);
1273 int res; 1280 int res;
1274 1281
1275 spin_lock(&nfsi->req_lock); 1282 spin_lock(&inode->i_lock);
1276 res = nfs_scan_commit(inode, &head, 0, 0); 1283 res = nfs_scan_commit(inode, &head, 0, 0);
1277 spin_unlock(&nfsi->req_lock); 1284 spin_unlock(&inode->i_lock);
1278 if (res) { 1285 if (res) {
1279 int error = nfs_commit_list(inode, &head, how); 1286 int error = nfs_commit_list(inode, &head, how);
1280 if (error < 0) 1287 if (error < 0)
@@ -1292,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
1292long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) 1299long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
1293{ 1300{
1294 struct inode *inode = mapping->host; 1301 struct inode *inode = mapping->host;
1295 struct nfs_inode *nfsi = NFS_I(inode);
1296 pgoff_t idx_start, idx_end; 1302 pgoff_t idx_start, idx_end;
1297 unsigned int npages = 0; 1303 unsigned int npages = 0;
1298 LIST_HEAD(head); 1304 LIST_HEAD(head);
@@ -1314,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
1314 } 1320 }
1315 } 1321 }
1316 how &= ~FLUSH_NOCOMMIT; 1322 how &= ~FLUSH_NOCOMMIT;
1317 spin_lock(&nfsi->req_lock); 1323 spin_lock(&inode->i_lock);
1318 do { 1324 do {
1319 ret = nfs_wait_on_requests_locked(inode, idx_start, npages); 1325 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1320 if (ret != 0) 1326 if (ret != 0)
@@ -1325,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
1325 if (pages == 0) 1331 if (pages == 0)
1326 break; 1332 break;
1327 if (how & FLUSH_INVALIDATE) { 1333 if (how & FLUSH_INVALIDATE) {
1328 spin_unlock(&nfsi->req_lock); 1334 spin_unlock(&inode->i_lock);
1329 nfs_cancel_commit_list(&head); 1335 nfs_cancel_commit_list(&head);
1330 ret = pages; 1336 ret = pages;
1331 spin_lock(&nfsi->req_lock); 1337 spin_lock(&inode->i_lock);
1332 continue; 1338 continue;
1333 } 1339 }
1334 pages += nfs_scan_commit(inode, &head, 0, 0); 1340 pages += nfs_scan_commit(inode, &head, 0, 0);
1335 spin_unlock(&nfsi->req_lock); 1341 spin_unlock(&inode->i_lock);
1336 ret = nfs_commit_list(inode, &head, how); 1342 ret = nfs_commit_list(inode, &head, how);
1337 spin_lock(&nfsi->req_lock); 1343 spin_lock(&inode->i_lock);
1344
1338 } while (ret >= 0); 1345 } while (ret >= 0);
1339 spin_unlock(&nfsi->req_lock); 1346 spin_unlock(&inode->i_lock);
1340 return ret; 1347 return ret;
1341} 1348}
1342 1349
@@ -1430,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page)
1430{ 1437{
1431 struct address_space *mapping = page->mapping; 1438 struct address_space *mapping = page->mapping;
1432 struct inode *inode; 1439 struct inode *inode;
1433 spinlock_t *req_lock;
1434 struct nfs_page *req; 1440 struct nfs_page *req;
1435 int ret; 1441 int ret;
1436 1442
@@ -1439,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page)
1439 inode = mapping->host; 1445 inode = mapping->host;
1440 if (!inode) 1446 if (!inode)
1441 goto out_raced; 1447 goto out_raced;
1442 req_lock = &NFS_I(inode)->req_lock; 1448 spin_lock(&inode->i_lock);
1443 spin_lock(req_lock);
1444 req = nfs_page_find_request_locked(page); 1449 req = nfs_page_find_request_locked(page);
1445 if (req != NULL) { 1450 if (req != NULL) {
1446 /* Mark any existing write requests for flushing */ 1451 /* Mark any existing write requests for flushing */
1447 ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); 1452 ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
1448 spin_unlock(req_lock); 1453 spin_unlock(&inode->i_lock);
1449 nfs_release_request(req); 1454 nfs_release_request(req);
1450 return ret; 1455 return ret;
1451 } 1456 }
1452 ret = __set_page_dirty_nobuffers(page); 1457 ret = __set_page_dirty_nobuffers(page);
1453 spin_unlock(req_lock); 1458 spin_unlock(&inode->i_lock);
1454 return ret; 1459 return ret;
1455out_raced: 1460out_raced:
1456 return !TestSetPageDirty(page); 1461 return !TestSetPageDirty(page);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 864090edc28..5443c52b57a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
394 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 394 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
395 .rpc_argp = clp, 395 .rpc_argp = clp,
396 }; 396 };
397 char clientname[16];
398 int status; 397 int status;
399 398
400 if (atomic_read(&cb->cb_set)) 399 if (atomic_read(&cb->cb_set))
@@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
417 memset(program->stats, 0, sizeof(cb->cb_stat)); 416 memset(program->stats, 0, sizeof(cb->cb_stat));
418 program->stats->program = program; 417 program->stats->program = program;
419 418
420 /* Just here to make some printk's more useful: */
421 snprintf(clientname, sizeof(clientname),
422 "%u.%u.%u.%u", NIPQUAD(addr.sin_addr));
423 args.servername = clientname;
424
425 /* Create RPC client */ 419 /* Create RPC client */
426 cb->cb_client = rpc_create(&args); 420 cb->cb_client = rpc_create(&args);
427 if (IS_ERR(cb->cb_client)) { 421 if (IS_ERR(cb->cb_client)) {
@@ -429,29 +423,23 @@ nfsd4_probe_callback(struct nfs4_client *clp)
429 goto out_err; 423 goto out_err;
430 } 424 }
431 425
432 /* Kick rpciod, put the call on the wire. */
433 if (rpciod_up() != 0)
434 goto out_clnt;
435
436 /* the task holds a reference to the nfs4_client struct */ 426 /* the task holds a reference to the nfs4_client struct */
437 atomic_inc(&clp->cl_count); 427 atomic_inc(&clp->cl_count);
438 428
439 msg.rpc_cred = nfsd4_lookupcred(clp,0); 429 msg.rpc_cred = nfsd4_lookupcred(clp,0);
440 if (IS_ERR(msg.rpc_cred)) 430 if (IS_ERR(msg.rpc_cred))
441 goto out_rpciod; 431 goto out_release_clp;
442 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); 432 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
443 put_rpccred(msg.rpc_cred); 433 put_rpccred(msg.rpc_cred);
444 434
445 if (status != 0) { 435 if (status != 0) {
446 dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); 436 dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
447 goto out_rpciod; 437 goto out_release_clp;
448 } 438 }
449 return; 439 return;
450 440
451out_rpciod: 441out_release_clp:
452 atomic_dec(&clp->cl_count); 442 atomic_dec(&clp->cl_count);
453 rpciod_down();
454out_clnt:
455 rpc_shutdown_client(cb->cb_client); 443 rpc_shutdown_client(cb->cb_client);
456out_err: 444out_err:
457 cb->cb_client = NULL; 445 cb->cb_client = NULL;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3cc8ce422ab..8c52913d7cb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp)
378 if (clnt) { 378 if (clnt) {
379 clp->cl_callback.cb_client = NULL; 379 clp->cl_callback.cb_client = NULL;
380 rpc_shutdown_client(clnt); 380 rpc_shutdown_client(clnt);
381 rpciod_down();
382 } 381 }
383} 382}
384 383
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8604e35bd48..945b1cedde2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -879,6 +879,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
879 .u.data = rqstp, 879 .u.data = rqstp,
880 }; 880 };
881 881
882 rqstp->rq_resused = 1;
882 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); 883 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
883 } else { 884 } else {
884 oldfs = get_fs(); 885 oldfs = get_fs();
diff --git a/fs/nls/Makefile b/fs/nls/Makefile
index a7ade138d68..f499dd7c390 100644
--- a/fs/nls/Makefile
+++ b/fs/nls/Makefile
@@ -36,11 +36,9 @@ obj-$(CONFIG_NLS_ISO8859_6) += nls_iso8859-6.o
36obj-$(CONFIG_NLS_ISO8859_7) += nls_iso8859-7.o 36obj-$(CONFIG_NLS_ISO8859_7) += nls_iso8859-7.o
37obj-$(CONFIG_NLS_ISO8859_8) += nls_cp1255.o 37obj-$(CONFIG_NLS_ISO8859_8) += nls_cp1255.o
38obj-$(CONFIG_NLS_ISO8859_9) += nls_iso8859-9.o 38obj-$(CONFIG_NLS_ISO8859_9) += nls_iso8859-9.o
39obj-$(CONFIG_NLS_ISO8859_10) += nls_iso8859-10.o
40obj-$(CONFIG_NLS_ISO8859_13) += nls_iso8859-13.o 39obj-$(CONFIG_NLS_ISO8859_13) += nls_iso8859-13.o
41obj-$(CONFIG_NLS_ISO8859_14) += nls_iso8859-14.o 40obj-$(CONFIG_NLS_ISO8859_14) += nls_iso8859-14.o
42obj-$(CONFIG_NLS_ISO8859_15) += nls_iso8859-15.o 41obj-$(CONFIG_NLS_ISO8859_15) += nls_iso8859-15.o
43obj-$(CONFIG_NLS_KOI8_R) += nls_koi8-r.o 42obj-$(CONFIG_NLS_KOI8_R) += nls_koi8-r.o
44obj-$(CONFIG_NLS_KOI8_U) += nls_koi8-u.o nls_koi8-ru.o 43obj-$(CONFIG_NLS_KOI8_U) += nls_koi8-u.o nls_koi8-ru.o
45obj-$(CONFIG_NLS_ABC) += nls_abc.o
46obj-$(CONFIG_NLS_UTF8) += nls_utf8.o 44obj-$(CONFIG_NLS_UTF8) += nls_utf8.o
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 19712a7d145..f5e11f4fa95 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -50,6 +50,8 @@
50#include "buffer_head_io.h" 50#include "buffer_head_io.h"
51 51
52static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 52static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
53static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
54 struct ocfs2_extent_block *eb);
53 55
54/* 56/*
55 * Structures which describe a path through a btree, and functions to 57 * Structures which describe a path through a btree, and functions to
@@ -117,6 +119,31 @@ static void ocfs2_free_path(struct ocfs2_path *path)
117} 119}
118 120
119/* 121/*
122 * All the elements of src into dest. After this call, src could be freed
123 * without affecting dest.
124 *
125 * Both paths should have the same root. Any non-root elements of dest
126 * will be freed.
127 */
128static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src)
129{
130 int i;
131
132 BUG_ON(path_root_bh(dest) != path_root_bh(src));
133 BUG_ON(path_root_el(dest) != path_root_el(src));
134
135 ocfs2_reinit_path(dest, 1);
136
137 for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
138 dest->p_node[i].bh = src->p_node[i].bh;
139 dest->p_node[i].el = src->p_node[i].el;
140
141 if (dest->p_node[i].bh)
142 get_bh(dest->p_node[i].bh);
143 }
144}
145
146/*
120 * Make the *dest path the same as src and re-initialize src path to 147 * Make the *dest path the same as src and re-initialize src path to
121 * have a root only. 148 * have a root only.
122 */ 149 */
@@ -212,10 +239,41 @@ out:
212 return ret; 239 return ret;
213} 240}
214 241
242/*
243 * Return the index of the extent record which contains cluster #v_cluster.
244 * -1 is returned if it was not found.
245 *
246 * Should work fine on interior and exterior nodes.
247 */
248int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
249{
250 int ret = -1;
251 int i;
252 struct ocfs2_extent_rec *rec;
253 u32 rec_end, rec_start, clusters;
254
255 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
256 rec = &el->l_recs[i];
257
258 rec_start = le32_to_cpu(rec->e_cpos);
259 clusters = ocfs2_rec_clusters(el, rec);
260
261 rec_end = rec_start + clusters;
262
263 if (v_cluster >= rec_start && v_cluster < rec_end) {
264 ret = i;
265 break;
266 }
267 }
268
269 return ret;
270}
271
215enum ocfs2_contig_type { 272enum ocfs2_contig_type {
216 CONTIG_NONE = 0, 273 CONTIG_NONE = 0,
217 CONTIG_LEFT, 274 CONTIG_LEFT,
218 CONTIG_RIGHT 275 CONTIG_RIGHT,
276 CONTIG_LEFTRIGHT,
219}; 277};
220 278
221 279
@@ -253,6 +311,14 @@ static enum ocfs2_contig_type
253{ 311{
254 u64 blkno = le64_to_cpu(insert_rec->e_blkno); 312 u64 blkno = le64_to_cpu(insert_rec->e_blkno);
255 313
314 /*
315 * Refuse to coalesce extent records with different flag
316 * fields - we don't want to mix unwritten extents with user
317 * data.
318 */
319 if (ext->e_flags != insert_rec->e_flags)
320 return CONTIG_NONE;
321
256 if (ocfs2_extents_adjacent(ext, insert_rec) && 322 if (ocfs2_extents_adjacent(ext, insert_rec) &&
257 ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) 323 ocfs2_block_extent_contig(inode->i_sb, ext, blkno))
258 return CONTIG_RIGHT; 324 return CONTIG_RIGHT;
@@ -277,7 +343,14 @@ enum ocfs2_append_type {
277 APPEND_TAIL, 343 APPEND_TAIL,
278}; 344};
279 345
346enum ocfs2_split_type {
347 SPLIT_NONE = 0,
348 SPLIT_LEFT,
349 SPLIT_RIGHT,
350};
351
280struct ocfs2_insert_type { 352struct ocfs2_insert_type {
353 enum ocfs2_split_type ins_split;
281 enum ocfs2_append_type ins_appending; 354 enum ocfs2_append_type ins_appending;
282 enum ocfs2_contig_type ins_contig; 355 enum ocfs2_contig_type ins_contig;
283 int ins_contig_index; 356 int ins_contig_index;
@@ -285,6 +358,13 @@ struct ocfs2_insert_type {
285 int ins_tree_depth; 358 int ins_tree_depth;
286}; 359};
287 360
361struct ocfs2_merge_ctxt {
362 enum ocfs2_contig_type c_contig_type;
363 int c_has_empty_extent;
364 int c_split_covers_rec;
365 int c_used_tail_recs;
366};
367
288/* 368/*
289 * How many free extents have we got before we need more meta data? 369 * How many free extents have we got before we need more meta data?
290 */ 370 */
@@ -384,13 +464,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
384 strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); 464 strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
385 eb->h_blkno = cpu_to_le64(first_blkno); 465 eb->h_blkno = cpu_to_le64(first_blkno);
386 eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 466 eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
387
388#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
389 /* we always use slot zero's suballocator */
390 eb->h_suballoc_slot = 0;
391#else
392 eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); 467 eb->h_suballoc_slot = cpu_to_le16(osb->slot_num);
393#endif
394 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 468 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
395 eb->h_list.l_count = 469 eb->h_list.l_count =
396 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); 470 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
@@ -461,7 +535,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
461 struct inode *inode, 535 struct inode *inode,
462 struct buffer_head *fe_bh, 536 struct buffer_head *fe_bh,
463 struct buffer_head *eb_bh, 537 struct buffer_head *eb_bh,
464 struct buffer_head *last_eb_bh, 538 struct buffer_head **last_eb_bh,
465 struct ocfs2_alloc_context *meta_ac) 539 struct ocfs2_alloc_context *meta_ac)
466{ 540{
467 int status, new_blocks, i; 541 int status, new_blocks, i;
@@ -476,7 +550,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
476 550
477 mlog_entry_void(); 551 mlog_entry_void();
478 552
479 BUG_ON(!last_eb_bh); 553 BUG_ON(!last_eb_bh || !*last_eb_bh);
480 554
481 fe = (struct ocfs2_dinode *) fe_bh->b_data; 555 fe = (struct ocfs2_dinode *) fe_bh->b_data;
482 556
@@ -507,7 +581,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
507 goto bail; 581 goto bail;
508 } 582 }
509 583
510 eb = (struct ocfs2_extent_block *)last_eb_bh->b_data; 584 eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
511 new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); 585 new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
512 586
513 /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be 587 /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
@@ -568,7 +642,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
568 * journal_dirty erroring as it won't unless we've aborted the 642 * journal_dirty erroring as it won't unless we've aborted the
569 * handle (in which case we would never be here) so reserving 643 * handle (in which case we would never be here) so reserving
570 * the write with journal_access is all we need to do. */ 644 * the write with journal_access is all we need to do. */
571 status = ocfs2_journal_access(handle, inode, last_eb_bh, 645 status = ocfs2_journal_access(handle, inode, *last_eb_bh,
572 OCFS2_JOURNAL_ACCESS_WRITE); 646 OCFS2_JOURNAL_ACCESS_WRITE);
573 if (status < 0) { 647 if (status < 0) {
574 mlog_errno(status); 648 mlog_errno(status);
@@ -601,10 +675,10 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
601 * next_leaf on the previously last-extent-block. */ 675 * next_leaf on the previously last-extent-block. */
602 fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); 676 fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
603 677
604 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 678 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
605 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 679 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
606 680
607 status = ocfs2_journal_dirty(handle, last_eb_bh); 681 status = ocfs2_journal_dirty(handle, *last_eb_bh);
608 if (status < 0) 682 if (status < 0)
609 mlog_errno(status); 683 mlog_errno(status);
610 status = ocfs2_journal_dirty(handle, fe_bh); 684 status = ocfs2_journal_dirty(handle, fe_bh);
@@ -616,6 +690,14 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
616 mlog_errno(status); 690 mlog_errno(status);
617 } 691 }
618 692
693 /*
694 * Some callers want to track the rightmost leaf so pass it
695 * back here.
696 */
697 brelse(*last_eb_bh);
698 get_bh(new_eb_bhs[0]);
699 *last_eb_bh = new_eb_bhs[0];
700
619 status = 0; 701 status = 0;
620bail: 702bail:
621 if (new_eb_bhs) { 703 if (new_eb_bhs) {
@@ -829,6 +911,87 @@ bail:
829} 911}
830 912
831/* 913/*
914 * Grow a b-tree so that it has more records.
915 *
916 * We might shift the tree depth in which case existing paths should
917 * be considered invalid.
918 *
919 * Tree depth after the grow is returned via *final_depth.
920 *
921 * *last_eb_bh will be updated by ocfs2_add_branch().
922 */
923static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
924 struct buffer_head *di_bh, int *final_depth,
925 struct buffer_head **last_eb_bh,
926 struct ocfs2_alloc_context *meta_ac)
927{
928 int ret, shift;
929 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
930 int depth = le16_to_cpu(di->id2.i_list.l_tree_depth);
931 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
932 struct buffer_head *bh = NULL;
933
934 BUG_ON(meta_ac == NULL);
935
936 shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh);
937 if (shift < 0) {
938 ret = shift;
939 mlog_errno(ret);
940 goto out;
941 }
942
943 /* We traveled all the way to the bottom of the allocation tree
944 * and didn't find room for any more extents - we need to add
945 * another tree level */
946 if (shift) {
947 BUG_ON(bh);
948 mlog(0, "need to shift tree depth (current = %d)\n", depth);
949
950 /* ocfs2_shift_tree_depth will return us a buffer with
951 * the new extent block (so we can pass that to
952 * ocfs2_add_branch). */
953 ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh,
954 meta_ac, &bh);
955 if (ret < 0) {
956 mlog_errno(ret);
957 goto out;
958 }
959 depth++;
960 if (depth == 1) {
961 /*
962 * Special case: we have room now if we shifted from
963 * tree_depth 0, so no more work needs to be done.
964 *
965 * We won't be calling add_branch, so pass
966 * back *last_eb_bh as the new leaf. At depth
967 * zero, it should always be null so there's
968 * no reason to brelse.
969 */
970 BUG_ON(*last_eb_bh);
971 get_bh(bh);
972 *last_eb_bh = bh;
973 goto out;
974 }
975 }
976
977 /* call ocfs2_add_branch to add the final part of the tree with
978 * the new data. */
979 mlog(0, "add branch. bh = %p\n", bh);
980 ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh,
981 meta_ac);
982 if (ret < 0) {
983 mlog_errno(ret);
984 goto out;
985 }
986
987out:
988 if (final_depth)
989 *final_depth = depth;
990 brelse(bh);
991 return ret;
992}
993
994/*
832 * This is only valid for leaf nodes, which are the only ones that can 995 * This is only valid for leaf nodes, which are the only ones that can
833 * have empty extents anyway. 996 * have empty extents anyway.
834 */ 997 */
@@ -934,6 +1097,22 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
934 1097
935} 1098}
936 1099
1100static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el)
1101{
1102 int size, num_recs = le16_to_cpu(el->l_next_free_rec);
1103
1104 BUG_ON(num_recs == 0);
1105
1106 if (ocfs2_is_empty_extent(&el->l_recs[0])) {
1107 num_recs--;
1108 size = num_recs * sizeof(struct ocfs2_extent_rec);
1109 memmove(&el->l_recs[0], &el->l_recs[1], size);
1110 memset(&el->l_recs[num_recs], 0,
1111 sizeof(struct ocfs2_extent_rec));
1112 el->l_next_free_rec = cpu_to_le16(num_recs);
1113 }
1114}
1115
937/* 1116/*
938 * Create an empty extent record . 1117 * Create an empty extent record .
939 * 1118 *
@@ -1211,6 +1390,10 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
1211 * immediately to their right. 1390 * immediately to their right.
1212 */ 1391 */
1213 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1392 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
1393 if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) {
1394 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
1395 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
1396 }
1214 left_clusters -= le32_to_cpu(left_rec->e_cpos); 1397 left_clusters -= le32_to_cpu(left_rec->e_cpos);
1215 left_rec->e_int_clusters = cpu_to_le32(left_clusters); 1398 left_rec->e_int_clusters = cpu_to_le32(left_clusters);
1216 1399
@@ -1531,10 +1714,16 @@ out:
1531 return ret; 1714 return ret;
1532} 1715}
1533 1716
1717/*
1718 * Extend the transaction by enough credits to complete the rotation,
1719 * and still leave at least the original number of credits allocated
1720 * to this transaction.
1721 */
1534static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, 1722static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
1723 int op_credits,
1535 struct ocfs2_path *path) 1724 struct ocfs2_path *path)
1536{ 1725{
1537 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1; 1726 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
1538 1727
1539 if (handle->h_buffer_credits < credits) 1728 if (handle->h_buffer_credits < credits)
1540 return ocfs2_extend_trans(handle, credits); 1729 return ocfs2_extend_trans(handle, credits);
@@ -1568,6 +1757,29 @@ static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path,
1568 return 0; 1757 return 0;
1569} 1758}
1570 1759
1760static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
1761{
1762 int next_free = le16_to_cpu(el->l_next_free_rec);
1763 unsigned int range;
1764 struct ocfs2_extent_rec *rec;
1765
1766 if (next_free == 0)
1767 return 0;
1768
1769 rec = &el->l_recs[0];
1770 if (ocfs2_is_empty_extent(rec)) {
1771 /* Empty list. */
1772 if (next_free == 1)
1773 return 0;
1774 rec = &el->l_recs[1];
1775 }
1776
1777 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
1778 if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range)
1779 return 1;
1780 return 0;
1781}
1782
1571/* 1783/*
1572 * Rotate all the records in a btree right one record, starting at insert_cpos. 1784 * Rotate all the records in a btree right one record, starting at insert_cpos.
1573 * 1785 *
@@ -1586,11 +1798,12 @@ static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path,
1586 */ 1798 */
1587static int ocfs2_rotate_tree_right(struct inode *inode, 1799static int ocfs2_rotate_tree_right(struct inode *inode,
1588 handle_t *handle, 1800 handle_t *handle,
1801 enum ocfs2_split_type split,
1589 u32 insert_cpos, 1802 u32 insert_cpos,
1590 struct ocfs2_path *right_path, 1803 struct ocfs2_path *right_path,
1591 struct ocfs2_path **ret_left_path) 1804 struct ocfs2_path **ret_left_path)
1592{ 1805{
1593 int ret, start; 1806 int ret, start, orig_credits = handle->h_buffer_credits;
1594 u32 cpos; 1807 u32 cpos;
1595 struct ocfs2_path *left_path = NULL; 1808 struct ocfs2_path *left_path = NULL;
1596 1809
@@ -1657,9 +1870,9 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
1657 (unsigned long long) 1870 (unsigned long long)
1658 path_leaf_bh(left_path)->b_blocknr); 1871 path_leaf_bh(left_path)->b_blocknr);
1659 1872
1660 if (ocfs2_rotate_requires_path_adjustment(left_path, 1873 if (split == SPLIT_NONE &&
1874 ocfs2_rotate_requires_path_adjustment(left_path,
1661 insert_cpos)) { 1875 insert_cpos)) {
1662 mlog(0, "Path adjustment required\n");
1663 1876
1664 /* 1877 /*
1665 * We've rotated the tree as much as we 1878 * We've rotated the tree as much as we
@@ -1687,7 +1900,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
1687 right_path->p_tree_depth); 1900 right_path->p_tree_depth);
1688 1901
1689 ret = ocfs2_extend_rotate_transaction(handle, start, 1902 ret = ocfs2_extend_rotate_transaction(handle, start,
1690 right_path); 1903 orig_credits, right_path);
1691 if (ret) { 1904 if (ret) {
1692 mlog_errno(ret); 1905 mlog_errno(ret);
1693 goto out; 1906 goto out;
@@ -1700,6 +1913,24 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
1700 goto out; 1913 goto out;
1701 } 1914 }
1702 1915
1916 if (split != SPLIT_NONE &&
1917 ocfs2_leftmost_rec_contains(path_leaf_el(right_path),
1918 insert_cpos)) {
1919 /*
1920 * A rotate moves the rightmost left leaf
1921 * record over to the leftmost right leaf
1922 * slot. If we're doing an extent split
1923 * instead of a real insert, then we have to
1924 * check that the extent to be split wasn't
1925 * just moved over. If it was, then we can
1926 * exit here, passing left_path back -
1927 * ocfs2_split_extent() is smart enough to
1928 * search both leaves.
1929 */
1930 *ret_left_path = left_path;
1931 goto out_ret_path;
1932 }
1933
1703 /* 1934 /*
1704 * There is no need to re-read the next right path 1935 * There is no need to re-read the next right path
1705 * as we know that it'll be our current left 1936 * as we know that it'll be our current left
@@ -1722,6 +1953,1031 @@ out_ret_path:
1722 return ret; 1953 return ret;
1723} 1954}
1724 1955
1956static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
1957 struct ocfs2_path *path)
1958{
1959 int i, idx;
1960 struct ocfs2_extent_rec *rec;
1961 struct ocfs2_extent_list *el;
1962 struct ocfs2_extent_block *eb;
1963 u32 range;
1964
1965 /* Path should always be rightmost. */
1966 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
1967 BUG_ON(eb->h_next_leaf_blk != 0ULL);
1968
1969 el = &eb->h_list;
1970 BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
1971 idx = le16_to_cpu(el->l_next_free_rec) - 1;
1972 rec = &el->l_recs[idx];
1973 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
1974
1975 for (i = 0; i < path->p_tree_depth; i++) {
1976 el = path->p_node[i].el;
1977 idx = le16_to_cpu(el->l_next_free_rec) - 1;
1978 rec = &el->l_recs[idx];
1979
1980 rec->e_int_clusters = cpu_to_le32(range);
1981 le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos));
1982
1983 ocfs2_journal_dirty(handle, path->p_node[i].bh);
1984 }
1985}
1986
1987static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
1988 struct ocfs2_cached_dealloc_ctxt *dealloc,
1989 struct ocfs2_path *path, int unlink_start)
1990{
1991 int ret, i;
1992 struct ocfs2_extent_block *eb;
1993 struct ocfs2_extent_list *el;
1994 struct buffer_head *bh;
1995
1996 for(i = unlink_start; i < path_num_items(path); i++) {
1997 bh = path->p_node[i].bh;
1998
1999 eb = (struct ocfs2_extent_block *)bh->b_data;
2000 /*
2001 * Not all nodes might have had their final count
2002 * decremented by the caller - handle this here.
2003 */
2004 el = &eb->h_list;
2005 if (le16_to_cpu(el->l_next_free_rec) > 1) {
2006 mlog(ML_ERROR,
2007 "Inode %llu, attempted to remove extent block "
2008 "%llu with %u records\n",
2009 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2010 (unsigned long long)le64_to_cpu(eb->h_blkno),
2011 le16_to_cpu(el->l_next_free_rec));
2012
2013 ocfs2_journal_dirty(handle, bh);
2014 ocfs2_remove_from_cache(inode, bh);
2015 continue;
2016 }
2017
2018 el->l_next_free_rec = 0;
2019 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
2020
2021 ocfs2_journal_dirty(handle, bh);
2022
2023 ret = ocfs2_cache_extent_block_free(dealloc, eb);
2024 if (ret)
2025 mlog_errno(ret);
2026
2027 ocfs2_remove_from_cache(inode, bh);
2028 }
2029}
2030
2031static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
2032 struct ocfs2_path *left_path,
2033 struct ocfs2_path *right_path,
2034 int subtree_index,
2035 struct ocfs2_cached_dealloc_ctxt *dealloc)
2036{
2037 int i;
2038 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
2039 struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el;
2040 struct ocfs2_extent_list *el;
2041 struct ocfs2_extent_block *eb;
2042
2043 el = path_leaf_el(left_path);
2044
2045 eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data;
2046
2047 for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
2048 if (root_el->l_recs[i].e_blkno == eb->h_blkno)
2049 break;
2050
2051 BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec));
2052
2053 memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
2054 le16_add_cpu(&root_el->l_next_free_rec, -1);
2055
2056 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2057 eb->h_next_leaf_blk = 0;
2058
2059 ocfs2_journal_dirty(handle, root_bh);
2060 ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
2061
2062 ocfs2_unlink_path(inode, handle, dealloc, right_path,
2063 subtree_index + 1);
2064}
2065
2066static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2067 struct ocfs2_path *left_path,
2068 struct ocfs2_path *right_path,
2069 int subtree_index,
2070 struct ocfs2_cached_dealloc_ctxt *dealloc,
2071 int *deleted)
2072{
2073 int ret, i, del_right_subtree = 0, right_has_empty = 0;
2074 struct buffer_head *root_bh, *di_bh = path_root_bh(right_path);
2075 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2076 struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
2077 struct ocfs2_extent_block *eb;
2078
2079 *deleted = 0;
2080
2081 right_leaf_el = path_leaf_el(right_path);
2082 left_leaf_el = path_leaf_el(left_path);
2083 root_bh = left_path->p_node[subtree_index].bh;
2084 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
2085
2086 if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0]))
2087 return 0;
2088
2089 eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data;
2090 if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0])) {
2091 /*
2092 * It's legal for us to proceed if the right leaf is
2093 * the rightmost one and it has an empty extent. There
2094 * are two cases to handle - whether the leaf will be
2095 * empty after removal or not. If the leaf isn't empty
2096 * then just remove the empty extent up front. The
2097 * next block will handle empty leaves by flagging
2098 * them for unlink.
2099 *
2100 * Non rightmost leaves will throw -EAGAIN and the
2101 * caller can manually move the subtree and retry.
2102 */
2103
2104 if (eb->h_next_leaf_blk != 0ULL)
2105 return -EAGAIN;
2106
2107 if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
2108 ret = ocfs2_journal_access(handle, inode,
2109 path_leaf_bh(right_path),
2110 OCFS2_JOURNAL_ACCESS_WRITE);
2111 if (ret) {
2112 mlog_errno(ret);
2113 goto out;
2114 }
2115
2116 ocfs2_remove_empty_extent(right_leaf_el);
2117 } else
2118 right_has_empty = 1;
2119 }
2120
2121 if (eb->h_next_leaf_blk == 0ULL &&
2122 le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) {
2123 /*
2124 * We have to update i_last_eb_blk during the meta
2125 * data delete.
2126 */
2127 ret = ocfs2_journal_access(handle, inode, di_bh,
2128 OCFS2_JOURNAL_ACCESS_WRITE);
2129 if (ret) {
2130 mlog_errno(ret);
2131 goto out;
2132 }
2133
2134 del_right_subtree = 1;
2135 }
2136
2137 /*
2138 * Getting here with an empty extent in the right path implies
2139 * that it's the rightmost path and will be deleted.
2140 */
2141 BUG_ON(right_has_empty && !del_right_subtree);
2142
2143 ret = ocfs2_journal_access(handle, inode, root_bh,
2144 OCFS2_JOURNAL_ACCESS_WRITE);
2145 if (ret) {
2146 mlog_errno(ret);
2147 goto out;
2148 }
2149
2150 for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
2151 ret = ocfs2_journal_access(handle, inode,
2152 right_path->p_node[i].bh,
2153 OCFS2_JOURNAL_ACCESS_WRITE);
2154 if (ret) {
2155 mlog_errno(ret);
2156 goto out;
2157 }
2158
2159 ret = ocfs2_journal_access(handle, inode,
2160 left_path->p_node[i].bh,
2161 OCFS2_JOURNAL_ACCESS_WRITE);
2162 if (ret) {
2163 mlog_errno(ret);
2164 goto out;
2165 }
2166 }
2167
2168 if (!right_has_empty) {
2169 /*
2170 * Only do this if we're moving a real
2171 * record. Otherwise, the action is delayed until
2172 * after removal of the right path in which case we
2173 * can do a simple shift to remove the empty extent.
2174 */
2175 ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]);
2176 memset(&right_leaf_el->l_recs[0], 0,
2177 sizeof(struct ocfs2_extent_rec));
2178 }
2179 if (eb->h_next_leaf_blk == 0ULL) {
2180 /*
2181 * Move recs over to get rid of empty extent, decrease
2182 * next_free. This is allowed to remove the last
2183 * extent in our leaf (setting l_next_free_rec to
2184 * zero) - the delete code below won't care.
2185 */
2186 ocfs2_remove_empty_extent(right_leaf_el);
2187 }
2188
2189 ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
2190 if (ret)
2191 mlog_errno(ret);
2192 ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
2193 if (ret)
2194 mlog_errno(ret);
2195
2196 if (del_right_subtree) {
2197 ocfs2_unlink_subtree(inode, handle, left_path, right_path,
2198 subtree_index, dealloc);
2199 ocfs2_update_edge_lengths(inode, handle, left_path);
2200
2201 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2202 di->i_last_eb_blk = eb->h_blkno;
2203
2204 /*
2205 * Removal of the extent in the left leaf was skipped
2206 * above so we could delete the right path
2207 * 1st.
2208 */
2209 if (right_has_empty)
2210 ocfs2_remove_empty_extent(left_leaf_el);
2211
2212 ret = ocfs2_journal_dirty(handle, di_bh);
2213 if (ret)
2214 mlog_errno(ret);
2215
2216 *deleted = 1;
2217 } else
2218 ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
2219 subtree_index);
2220
2221out:
2222 return ret;
2223}
2224
2225/*
2226 * Given a full path, determine what cpos value would return us a path
2227 * containing the leaf immediately to the right of the current one.
2228 *
2229 * Will return zero if the path passed in is already the rightmost path.
2230 *
2231 * This looks similar, but is subtly different to
2232 * ocfs2_find_cpos_for_left_leaf().
2233 */
2234static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
2235 struct ocfs2_path *path, u32 *cpos)
2236{
2237 int i, j, ret = 0;
2238 u64 blkno;
2239 struct ocfs2_extent_list *el;
2240
2241 *cpos = 0;
2242
2243 if (path->p_tree_depth == 0)
2244 return 0;
2245
2246 blkno = path_leaf_bh(path)->b_blocknr;
2247
2248 /* Start at the tree node just above the leaf and work our way up. */
2249 i = path->p_tree_depth - 1;
2250 while (i >= 0) {
2251 int next_free;
2252
2253 el = path->p_node[i].el;
2254
2255 /*
2256 * Find the extent record just after the one in our
2257 * path.
2258 */
2259 next_free = le16_to_cpu(el->l_next_free_rec);
2260 for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) {
2261 if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) {
2262 if (j == (next_free - 1)) {
2263 if (i == 0) {
2264 /*
2265 * We've determined that the
2266 * path specified is already
2267 * the rightmost one - return a
2268 * cpos of zero.
2269 */
2270 goto out;
2271 }
2272 /*
2273 * The rightmost record points to our
2274 * leaf - we need to travel up the
2275 * tree one level.
2276 */
2277 goto next_node;
2278 }
2279
2280 *cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos);
2281 goto out;
2282 }
2283 }
2284
2285 /*
2286 * If we got here, we never found a valid node where
2287 * the tree indicated one should be.
2288 */
2289 ocfs2_error(sb,
2290 "Invalid extent tree at extent block %llu\n",
2291 (unsigned long long)blkno);
2292 ret = -EROFS;
2293 goto out;
2294
2295next_node:
2296 blkno = path->p_node[i].bh->b_blocknr;
2297 i--;
2298 }
2299
2300out:
2301 return ret;
2302}
2303
2304static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
2305 handle_t *handle,
2306 struct buffer_head *bh,
2307 struct ocfs2_extent_list *el)
2308{
2309 int ret;
2310
2311 if (!ocfs2_is_empty_extent(&el->l_recs[0]))
2312 return 0;
2313
2314 ret = ocfs2_journal_access(handle, inode, bh,
2315 OCFS2_JOURNAL_ACCESS_WRITE);
2316 if (ret) {
2317 mlog_errno(ret);
2318 goto out;
2319 }
2320
2321 ocfs2_remove_empty_extent(el);
2322
2323 ret = ocfs2_journal_dirty(handle, bh);
2324 if (ret)
2325 mlog_errno(ret);
2326
2327out:
2328 return ret;
2329}
2330
2331static int __ocfs2_rotate_tree_left(struct inode *inode,
2332 handle_t *handle, int orig_credits,
2333 struct ocfs2_path *path,
2334 struct ocfs2_cached_dealloc_ctxt *dealloc,
2335 struct ocfs2_path **empty_extent_path)
2336{
2337 int ret, subtree_root, deleted;
2338 u32 right_cpos;
2339 struct ocfs2_path *left_path = NULL;
2340 struct ocfs2_path *right_path = NULL;
2341
2342 BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
2343
2344 *empty_extent_path = NULL;
2345
2346 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path,
2347 &right_cpos);
2348 if (ret) {
2349 mlog_errno(ret);
2350 goto out;
2351 }
2352
2353 left_path = ocfs2_new_path(path_root_bh(path),
2354 path_root_el(path));
2355 if (!left_path) {
2356 ret = -ENOMEM;
2357 mlog_errno(ret);
2358 goto out;
2359 }
2360
2361 ocfs2_cp_path(left_path, path);
2362
2363 right_path = ocfs2_new_path(path_root_bh(path),
2364 path_root_el(path));
2365 if (!right_path) {
2366 ret = -ENOMEM;
2367 mlog_errno(ret);
2368 goto out;
2369 }
2370
2371 while (right_cpos) {
2372 ret = ocfs2_find_path(inode, right_path, right_cpos);
2373 if (ret) {
2374 mlog_errno(ret);
2375 goto out;
2376 }
2377
2378 subtree_root = ocfs2_find_subtree_root(inode, left_path,
2379 right_path);
2380
2381 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
2382 subtree_root,
2383 (unsigned long long)
2384 right_path->p_node[subtree_root].bh->b_blocknr,
2385 right_path->p_tree_depth);
2386
2387 ret = ocfs2_extend_rotate_transaction(handle, subtree_root,
2388 orig_credits, left_path);
2389 if (ret) {
2390 mlog_errno(ret);
2391 goto out;
2392 }
2393
2394 ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
2395 right_path, subtree_root,
2396 dealloc, &deleted);
2397 if (ret == -EAGAIN) {
2398 /*
2399 * The rotation has to temporarily stop due to
2400 * the right subtree having an empty
2401 * extent. Pass it back to the caller for a
2402 * fixup.
2403 */
2404 *empty_extent_path = right_path;
2405 right_path = NULL;
2406 goto out;
2407 }
2408 if (ret) {
2409 mlog_errno(ret);
2410 goto out;
2411 }
2412
2413 /*
2414 * The subtree rotate might have removed records on
2415 * the rightmost edge. If so, then rotation is
2416 * complete.
2417 */
2418 if (deleted)
2419 break;
2420
2421 ocfs2_mv_path(left_path, right_path);
2422
2423 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
2424 &right_cpos);
2425 if (ret) {
2426 mlog_errno(ret);
2427 goto out;
2428 }
2429 }
2430
2431out:
2432 ocfs2_free_path(right_path);
2433 ocfs2_free_path(left_path);
2434
2435 return ret;
2436}
2437
2438static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
2439 struct ocfs2_path *path,
2440 struct ocfs2_cached_dealloc_ctxt *dealloc)
2441{
2442 int ret, subtree_index;
2443 u32 cpos;
2444 struct ocfs2_path *left_path = NULL;
2445 struct ocfs2_dinode *di;
2446 struct ocfs2_extent_block *eb;
2447 struct ocfs2_extent_list *el;
2448
2449 /*
2450 * XXX: This code assumes that the root is an inode, which is
2451 * true for now but may change as tree code gets generic.
2452 */
2453 di = (struct ocfs2_dinode *)path_root_bh(path)->b_data;
2454 if (!OCFS2_IS_VALID_DINODE(di)) {
2455 ret = -EIO;
2456 ocfs2_error(inode->i_sb,
2457 "Inode %llu has invalid path root",
2458 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2459 goto out;
2460 }
2461
2462 /*
2463 * There's two ways we handle this depending on
2464 * whether path is the only existing one.
2465 */
2466 ret = ocfs2_extend_rotate_transaction(handle, 0,
2467 handle->h_buffer_credits,
2468 path);
2469 if (ret) {
2470 mlog_errno(ret);
2471 goto out;
2472 }
2473
2474 ret = ocfs2_journal_access_path(inode, handle, path);
2475 if (ret) {
2476 mlog_errno(ret);
2477 goto out;
2478 }
2479
2480 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
2481 if (ret) {
2482 mlog_errno(ret);
2483 goto out;
2484 }
2485
2486 if (cpos) {
2487 /*
2488 * We have a path to the left of this one - it needs
2489 * an update too.
2490 */
2491 left_path = ocfs2_new_path(path_root_bh(path),
2492 path_root_el(path));
2493 if (!left_path) {
2494 ret = -ENOMEM;
2495 mlog_errno(ret);
2496 goto out;
2497 }
2498
2499 ret = ocfs2_find_path(inode, left_path, cpos);
2500 if (ret) {
2501 mlog_errno(ret);
2502 goto out;
2503 }
2504
2505 ret = ocfs2_journal_access_path(inode, handle, left_path);
2506 if (ret) {
2507 mlog_errno(ret);
2508 goto out;
2509 }
2510
2511 subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
2512
2513 ocfs2_unlink_subtree(inode, handle, left_path, path,
2514 subtree_index, dealloc);
2515 ocfs2_update_edge_lengths(inode, handle, left_path);
2516
2517 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2518 di->i_last_eb_blk = eb->h_blkno;
2519 } else {
2520 /*
2521 * 'path' is also the leftmost path which
2522 * means it must be the only one. This gets
2523 * handled differently because we want to
2524 * revert the inode back to having extents
2525 * in-line.
2526 */
2527 ocfs2_unlink_path(inode, handle, dealloc, path, 1);
2528
2529 el = &di->id2.i_list;
2530 el->l_tree_depth = 0;
2531 el->l_next_free_rec = 0;
2532 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
2533
2534 di->i_last_eb_blk = 0;
2535 }
2536
2537 ocfs2_journal_dirty(handle, path_root_bh(path));
2538
2539out:
2540 ocfs2_free_path(left_path);
2541 return ret;
2542}
2543
2544/*
2545 * Left rotation of btree records.
2546 *
2547 * In many ways, this is (unsurprisingly) the opposite of right
2548 * rotation. We start at some non-rightmost path containing an empty
2549 * extent in the leaf block. The code works its way to the rightmost
2550 * path by rotating records to the left in every subtree.
2551 *
2552 * This is used by any code which reduces the number of extent records
2553 * in a leaf. After removal, an empty record should be placed in the
2554 * leftmost list position.
2555 *
2556 * This won't handle a length update of the rightmost path records if
2557 * the rightmost tree leaf record is removed so the caller is
2558 * responsible for detecting and correcting that.
2559 */
2560static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
2561 struct ocfs2_path *path,
2562 struct ocfs2_cached_dealloc_ctxt *dealloc)
2563{
2564 int ret, orig_credits = handle->h_buffer_credits;
2565 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
2566 struct ocfs2_extent_block *eb;
2567 struct ocfs2_extent_list *el;
2568
2569 el = path_leaf_el(path);
2570 if (!ocfs2_is_empty_extent(&el->l_recs[0]))
2571 return 0;
2572
2573 if (path->p_tree_depth == 0) {
2574rightmost_no_delete:
2575 /*
2576 * In-inode extents. This is trivially handled, so do
2577 * it up front.
2578 */
2579 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
2580 path_leaf_bh(path),
2581 path_leaf_el(path));
2582 if (ret)
2583 mlog_errno(ret);
2584 goto out;
2585 }
2586
2587 /*
2588 * Handle rightmost branch now. There's several cases:
2589 * 1) simple rotation leaving records in there. That's trivial.
2590 * 2) rotation requiring a branch delete - there's no more
2591 * records left. Two cases of this:
2592 * a) There are branches to the left.
2593 * b) This is also the leftmost (the only) branch.
2594 *
2595 * 1) is handled via ocfs2_rotate_rightmost_leaf_left()
2596 * 2a) we need the left branch so that we can update it with the unlink
2597 * 2b) we need to bring the inode back to inline extents.
2598 */
2599
2600 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
2601 el = &eb->h_list;
2602 if (eb->h_next_leaf_blk == 0) {
2603 /*
2604 * This gets a bit tricky if we're going to delete the
2605 * rightmost path. Get the other cases out of the way
2606 * 1st.
2607 */
2608 if (le16_to_cpu(el->l_next_free_rec) > 1)
2609 goto rightmost_no_delete;
2610
2611 if (le16_to_cpu(el->l_next_free_rec) == 0) {
2612 ret = -EIO;
2613 ocfs2_error(inode->i_sb,
2614 "Inode %llu has empty extent block at %llu",
2615 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2616 (unsigned long long)le64_to_cpu(eb->h_blkno));
2617 goto out;
2618 }
2619
2620 /*
2621 * XXX: The caller can not trust "path" any more after
2622 * this as it will have been deleted. What do we do?
2623 *
2624 * In theory the rotate-for-merge code will never get
2625 * here because it'll always ask for a rotate in a
2626 * nonempty list.
2627 */
2628
2629 ret = ocfs2_remove_rightmost_path(inode, handle, path,
2630 dealloc);
2631 if (ret)
2632 mlog_errno(ret);
2633 goto out;
2634 }
2635
2636 /*
2637 * Now we can loop, remembering the path we get from -EAGAIN
2638 * and restarting from there.
2639 */
2640try_rotate:
2641 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path,
2642 dealloc, &restart_path);
2643 if (ret && ret != -EAGAIN) {
2644 mlog_errno(ret);
2645 goto out;
2646 }
2647
2648 while (ret == -EAGAIN) {
2649 tmp_path = restart_path;
2650 restart_path = NULL;
2651
2652 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits,
2653 tmp_path, dealloc,
2654 &restart_path);
2655 if (ret && ret != -EAGAIN) {
2656 mlog_errno(ret);
2657 goto out;
2658 }
2659
2660 ocfs2_free_path(tmp_path);
2661 tmp_path = NULL;
2662
2663 if (ret == 0)
2664 goto try_rotate;
2665 }
2666
2667out:
2668 ocfs2_free_path(tmp_path);
2669 ocfs2_free_path(restart_path);
2670 return ret;
2671}
2672
2673static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
2674 int index)
2675{
2676 struct ocfs2_extent_rec *rec = &el->l_recs[index];
2677 unsigned int size;
2678
2679 if (rec->e_leaf_clusters == 0) {
2680 /*
2681 * We consumed all of the merged-from record. An empty
2682 * extent cannot exist anywhere but the 1st array
2683 * position, so move things over if the merged-from
2684 * record doesn't occupy that position.
2685 *
2686 * This creates a new empty extent so the caller
2687 * should be smart enough to have removed any existing
2688 * ones.
2689 */
2690 if (index > 0) {
2691 BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
2692 size = index * sizeof(struct ocfs2_extent_rec);
2693 memmove(&el->l_recs[1], &el->l_recs[0], size);
2694 }
2695
2696 /*
2697 * Always memset - the caller doesn't check whether it
2698 * created an empty extent, so there could be junk in
2699 * the other fields.
2700 */
2701 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
2702 }
2703}
2704
2705/*
2706 * Remove split_rec clusters from the record at index and merge them
2707 * onto the beginning of the record at index + 1.
2708 */
2709static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
2710 handle_t *handle,
2711 struct ocfs2_extent_rec *split_rec,
2712 struct ocfs2_extent_list *el, int index)
2713{
2714 int ret;
2715 unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
2716 struct ocfs2_extent_rec *left_rec;
2717 struct ocfs2_extent_rec *right_rec;
2718
2719 BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
2720
2721 left_rec = &el->l_recs[index];
2722 right_rec = &el->l_recs[index + 1];
2723
2724 ret = ocfs2_journal_access(handle, inode, bh,
2725 OCFS2_JOURNAL_ACCESS_WRITE);
2726 if (ret) {
2727 mlog_errno(ret);
2728 goto out;
2729 }
2730
2731 le16_add_cpu(&left_rec->e_leaf_clusters, -split_clusters);
2732
2733 le32_add_cpu(&right_rec->e_cpos, -split_clusters);
2734 le64_add_cpu(&right_rec->e_blkno,
2735 -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
2736 le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters);
2737
2738 ocfs2_cleanup_merge(el, index);
2739
2740 ret = ocfs2_journal_dirty(handle, bh);
2741 if (ret)
2742 mlog_errno(ret);
2743
2744out:
2745 return ret;
2746}
2747
2748/*
2749 * Remove split_rec clusters from the record at index and merge them
2750 * onto the tail of the record at index - 1.
2751 */
2752static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
2753 handle_t *handle,
2754 struct ocfs2_extent_rec *split_rec,
2755 struct ocfs2_extent_list *el, int index)
2756{
2757 int ret, has_empty_extent = 0;
2758 unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
2759 struct ocfs2_extent_rec *left_rec;
2760 struct ocfs2_extent_rec *right_rec;
2761
2762 BUG_ON(index <= 0);
2763
2764 left_rec = &el->l_recs[index - 1];
2765 right_rec = &el->l_recs[index];
2766 if (ocfs2_is_empty_extent(&el->l_recs[0]))
2767 has_empty_extent = 1;
2768
2769 ret = ocfs2_journal_access(handle, inode, bh,
2770 OCFS2_JOURNAL_ACCESS_WRITE);
2771 if (ret) {
2772 mlog_errno(ret);
2773 goto out;
2774 }
2775
2776 if (has_empty_extent && index == 1) {
2777 /*
2778 * The easy case - we can just plop the record right in.
2779 */
2780 *left_rec = *split_rec;
2781
2782 has_empty_extent = 0;
2783 } else {
2784 le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters);
2785 }
2786
2787 le32_add_cpu(&right_rec->e_cpos, split_clusters);
2788 le64_add_cpu(&right_rec->e_blkno,
2789 ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
2790 le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters);
2791
2792 ocfs2_cleanup_merge(el, index);
2793
2794 ret = ocfs2_journal_dirty(handle, bh);
2795 if (ret)
2796 mlog_errno(ret);
2797
2798out:
2799 return ret;
2800}
2801
2802static int ocfs2_try_to_merge_extent(struct inode *inode,
2803 handle_t *handle,
2804 struct ocfs2_path *left_path,
2805 int split_index,
2806 struct ocfs2_extent_rec *split_rec,
2807 struct ocfs2_cached_dealloc_ctxt *dealloc,
2808 struct ocfs2_merge_ctxt *ctxt)
2809
2810{
2811 int ret = 0, delete_tail_recs = 0;
2812 struct ocfs2_extent_list *el = path_leaf_el(left_path);
2813 struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
2814
2815 BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
2816
2817 if (ctxt->c_split_covers_rec) {
2818 delete_tail_recs++;
2819
2820 if (ctxt->c_contig_type == CONTIG_LEFTRIGHT ||
2821 ctxt->c_has_empty_extent)
2822 delete_tail_recs++;
2823
2824 if (ctxt->c_has_empty_extent) {
2825 /*
2826 * The merge code will need to create an empty
2827 * extent to take the place of the newly
2828 * emptied slot. Remove any pre-existing empty
2829 * extents - having more than one in a leaf is
2830 * illegal.
2831 */
2832 ret = ocfs2_rotate_tree_left(inode, handle, left_path,
2833 dealloc);
2834 if (ret) {
2835 mlog_errno(ret);
2836 goto out;
2837 }
2838 split_index--;
2839 rec = &el->l_recs[split_index];
2840 }
2841 }
2842
2843 if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) {
2844 /*
2845 * Left-right contig implies this.
2846 */
2847 BUG_ON(!ctxt->c_split_covers_rec);
2848 BUG_ON(split_index == 0);
2849
2850 /*
2851 * Since the leftright insert always covers the entire
2852 * extent, this call will delete the insert record
2853 * entirely, resulting in an empty extent record added to
2854 * the extent block.
2855 *
2856 * Since the adding of an empty extent shifts
2857 * everything back to the right, there's no need to
2858 * update split_index here.
2859 */
2860 ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path),
2861 handle, split_rec, el, split_index);
2862 if (ret) {
2863 mlog_errno(ret);
2864 goto out;
2865 }
2866
2867 /*
2868 * We can only get this from logic error above.
2869 */
2870 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
2871
2872 /*
2873 * The left merge left us with an empty extent, remove
2874 * it.
2875 */
2876 ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc);
2877 if (ret) {
2878 mlog_errno(ret);
2879 goto out;
2880 }
2881 split_index--;
2882 rec = &el->l_recs[split_index];
2883
2884 /*
2885 * Note that we don't pass split_rec here on purpose -
2886 * we've merged it into the left side.
2887 */
2888 ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path),
2889 handle, rec, el, split_index);
2890 if (ret) {
2891 mlog_errno(ret);
2892 goto out;
2893 }
2894
2895 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
2896
2897 ret = ocfs2_rotate_tree_left(inode, handle, left_path,
2898 dealloc);
2899 /*
2900 * Error from this last rotate is not critical, so
2901 * print but don't bubble it up.
2902 */
2903 if (ret)
2904 mlog_errno(ret);
2905 ret = 0;
2906 } else {
2907 /*
2908 * Merge a record to the left or right.
2909 *
2910 * 'contig_type' is relative to the existing record,
2911 * so for example, if we're "right contig", it's to
2912 * the record on the left (hence the left merge).
2913 */
2914 if (ctxt->c_contig_type == CONTIG_RIGHT) {
2915 ret = ocfs2_merge_rec_left(inode,
2916 path_leaf_bh(left_path),
2917 handle, split_rec, el,
2918 split_index);
2919 if (ret) {
2920 mlog_errno(ret);
2921 goto out;
2922 }
2923 } else {
2924 ret = ocfs2_merge_rec_right(inode,
2925 path_leaf_bh(left_path),
2926 handle, split_rec, el,
2927 split_index);
2928 if (ret) {
2929 mlog_errno(ret);
2930 goto out;
2931 }
2932 }
2933
2934 if (ctxt->c_split_covers_rec) {
2935 /*
2936 * The merge may have left an empty extent in
2937 * our leaf. Try to rotate it away.
2938 */
2939 ret = ocfs2_rotate_tree_left(inode, handle, left_path,
2940 dealloc);
2941 if (ret)
2942 mlog_errno(ret);
2943 ret = 0;
2944 }
2945 }
2946
2947out:
2948 return ret;
2949}
2950
2951static void ocfs2_subtract_from_rec(struct super_block *sb,
2952 enum ocfs2_split_type split,
2953 struct ocfs2_extent_rec *rec,
2954 struct ocfs2_extent_rec *split_rec)
2955{
2956 u64 len_blocks;
2957
2958 len_blocks = ocfs2_clusters_to_blocks(sb,
2959 le16_to_cpu(split_rec->e_leaf_clusters));
2960
2961 if (split == SPLIT_LEFT) {
2962 /*
2963 * Region is on the left edge of the existing
2964 * record.
2965 */
2966 le32_add_cpu(&rec->e_cpos,
2967 le16_to_cpu(split_rec->e_leaf_clusters));
2968 le64_add_cpu(&rec->e_blkno, len_blocks);
2969 le16_add_cpu(&rec->e_leaf_clusters,
2970 -le16_to_cpu(split_rec->e_leaf_clusters));
2971 } else {
2972 /*
2973 * Region is on the right edge of the existing
2974 * record.
2975 */
2976 le16_add_cpu(&rec->e_leaf_clusters,
2977 -le16_to_cpu(split_rec->e_leaf_clusters));
2978 }
2979}
2980
1725/* 2981/*
1726 * Do the final bits of extent record insertion at the target leaf 2982 * Do the final bits of extent record insertion at the target leaf
1727 * list. If this leaf is part of an allocation tree, it is assumed 2983 * list. If this leaf is part of an allocation tree, it is assumed
@@ -1738,6 +2994,15 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
1738 2994
1739 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); 2995 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
1740 2996
2997 if (insert->ins_split != SPLIT_NONE) {
2998 i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos));
2999 BUG_ON(i == -1);
3000 rec = &el->l_recs[i];
3001 ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec,
3002 insert_rec);
3003 goto rotate;
3004 }
3005
1741 /* 3006 /*
1742 * Contiguous insert - either left or right. 3007 * Contiguous insert - either left or right.
1743 */ 3008 */
@@ -1792,6 +3057,7 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
1792 return; 3057 return;
1793 } 3058 }
1794 3059
3060rotate:
1795 /* 3061 /*
1796 * Ok, we have to rotate. 3062 * Ok, we have to rotate.
1797 * 3063 *
@@ -1815,13 +3081,53 @@ static inline void ocfs2_update_dinode_clusters(struct inode *inode,
1815 spin_unlock(&OCFS2_I(inode)->ip_lock); 3081 spin_unlock(&OCFS2_I(inode)->ip_lock);
1816} 3082}
1817 3083
3084static void ocfs2_adjust_rightmost_records(struct inode *inode,
3085 handle_t *handle,
3086 struct ocfs2_path *path,
3087 struct ocfs2_extent_rec *insert_rec)
3088{
3089 int ret, i, next_free;
3090 struct buffer_head *bh;
3091 struct ocfs2_extent_list *el;
3092 struct ocfs2_extent_rec *rec;
3093
3094 /*
3095 * Update everything except the leaf block.
3096 */
3097 for (i = 0; i < path->p_tree_depth; i++) {
3098 bh = path->p_node[i].bh;
3099 el = path->p_node[i].el;
3100
3101 next_free = le16_to_cpu(el->l_next_free_rec);
3102 if (next_free == 0) {
3103 ocfs2_error(inode->i_sb,
3104 "Dinode %llu has a bad extent list",
3105 (unsigned long long)OCFS2_I(inode)->ip_blkno);
3106 ret = -EIO;
3107 return;
3108 }
3109
3110 rec = &el->l_recs[next_free - 1];
3111
3112 rec->e_int_clusters = insert_rec->e_cpos;
3113 le32_add_cpu(&rec->e_int_clusters,
3114 le16_to_cpu(insert_rec->e_leaf_clusters));
3115 le32_add_cpu(&rec->e_int_clusters,
3116 -le32_to_cpu(rec->e_cpos));
3117
3118 ret = ocfs2_journal_dirty(handle, bh);
3119 if (ret)
3120 mlog_errno(ret);
3121
3122 }
3123}
3124
1818static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, 3125static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
1819 struct ocfs2_extent_rec *insert_rec, 3126 struct ocfs2_extent_rec *insert_rec,
1820 struct ocfs2_path *right_path, 3127 struct ocfs2_path *right_path,
1821 struct ocfs2_path **ret_left_path) 3128 struct ocfs2_path **ret_left_path)
1822{ 3129{
1823 int ret, i, next_free; 3130 int ret, next_free;
1824 struct buffer_head *bh;
1825 struct ocfs2_extent_list *el; 3131 struct ocfs2_extent_list *el;
1826 struct ocfs2_path *left_path = NULL; 3132 struct ocfs2_path *left_path = NULL;
1827 3133
@@ -1887,40 +3193,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
1887 goto out; 3193 goto out;
1888 } 3194 }
1889 3195
1890 el = path_root_el(right_path); 3196 ocfs2_adjust_rightmost_records(inode, handle, right_path, insert_rec);
1891 bh = path_root_bh(right_path);
1892 i = 0;
1893 while (1) {
1894 struct ocfs2_extent_rec *rec;
1895
1896 next_free = le16_to_cpu(el->l_next_free_rec);
1897 if (next_free == 0) {
1898 ocfs2_error(inode->i_sb,
1899 "Dinode %llu has a bad extent list",
1900 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1901 ret = -EIO;
1902 goto out;
1903 }
1904
1905 rec = &el->l_recs[next_free - 1];
1906
1907 rec->e_int_clusters = insert_rec->e_cpos;
1908 le32_add_cpu(&rec->e_int_clusters,
1909 le16_to_cpu(insert_rec->e_leaf_clusters));
1910 le32_add_cpu(&rec->e_int_clusters,
1911 -le32_to_cpu(rec->e_cpos));
1912
1913 ret = ocfs2_journal_dirty(handle, bh);
1914 if (ret)
1915 mlog_errno(ret);
1916
1917 /* Don't touch the leaf node */
1918 if (++i >= right_path->p_tree_depth)
1919 break;
1920
1921 bh = right_path->p_node[i].bh;
1922 el = right_path->p_node[i].el;
1923 }
1924 3197
1925 *ret_left_path = left_path; 3198 *ret_left_path = left_path;
1926 ret = 0; 3199 ret = 0;
@@ -1931,6 +3204,83 @@ out:
1931 return ret; 3204 return ret;
1932} 3205}
1933 3206
3207static void ocfs2_split_record(struct inode *inode,
3208 struct ocfs2_path *left_path,
3209 struct ocfs2_path *right_path,
3210 struct ocfs2_extent_rec *split_rec,
3211 enum ocfs2_split_type split)
3212{
3213 int index;
3214 u32 cpos = le32_to_cpu(split_rec->e_cpos);
3215 struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el;
3216 struct ocfs2_extent_rec *rec, *tmprec;
3217
3218 right_el = path_leaf_el(right_path);;
3219 if (left_path)
3220 left_el = path_leaf_el(left_path);
3221
3222 el = right_el;
3223 insert_el = right_el;
3224 index = ocfs2_search_extent_list(el, cpos);
3225 if (index != -1) {
3226 if (index == 0 && left_path) {
3227 BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
3228
3229 /*
3230 * This typically means that the record
3231 * started in the left path but moved to the
3232 * right as a result of rotation. We either
3233 * move the existing record to the left, or we
3234 * do the later insert there.
3235 *
3236 * In this case, the left path should always
3237 * exist as the rotate code will have passed
3238 * it back for a post-insert update.
3239 */
3240
3241 if (split == SPLIT_LEFT) {
3242 /*
3243 * It's a left split. Since we know
3244 * that the rotate code gave us an
3245 * empty extent in the left path, we
3246 * can just do the insert there.
3247 */
3248 insert_el = left_el;
3249 } else {
3250 /*
3251 * Right split - we have to move the
3252 * existing record over to the left
3253 * leaf. The insert will be into the
3254 * newly created empty extent in the
3255 * right leaf.
3256 */
3257 tmprec = &right_el->l_recs[index];
3258 ocfs2_rotate_leaf(left_el, tmprec);
3259 el = left_el;
3260
3261 memset(tmprec, 0, sizeof(*tmprec));
3262 index = ocfs2_search_extent_list(left_el, cpos);
3263 BUG_ON(index == -1);
3264 }
3265 }
3266 } else {
3267 BUG_ON(!left_path);
3268 BUG_ON(!ocfs2_is_empty_extent(&left_el->l_recs[0]));
3269 /*
3270 * Left path is easy - we can just allow the insert to
3271 * happen.
3272 */
3273 el = left_el;
3274 insert_el = left_el;
3275 index = ocfs2_search_extent_list(el, cpos);
3276 BUG_ON(index == -1);
3277 }
3278
3279 rec = &el->l_recs[index];
3280 ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec);
3281 ocfs2_rotate_leaf(insert_el, split_rec);
3282}
3283
1934/* 3284/*
1935 * This function only does inserts on an allocation b-tree. For dinode 3285 * This function only does inserts on an allocation b-tree. For dinode
1936 * lists, ocfs2_insert_at_leaf() is called directly. 3286 * lists, ocfs2_insert_at_leaf() is called directly.
@@ -1948,7 +3298,6 @@ static int ocfs2_insert_path(struct inode *inode,
1948{ 3298{
1949 int ret, subtree_index; 3299 int ret, subtree_index;
1950 struct buffer_head *leaf_bh = path_leaf_bh(right_path); 3300 struct buffer_head *leaf_bh = path_leaf_bh(right_path);
1951 struct ocfs2_extent_list *el;
1952 3301
1953 /* 3302 /*
1954 * Pass both paths to the journal. The majority of inserts 3303 * Pass both paths to the journal. The majority of inserts
@@ -1984,9 +3333,18 @@ static int ocfs2_insert_path(struct inode *inode,
1984 } 3333 }
1985 } 3334 }
1986 3335
1987 el = path_leaf_el(right_path); 3336 if (insert->ins_split != SPLIT_NONE) {
3337 /*
3338 * We could call ocfs2_insert_at_leaf() for some types
3339 * of splits, but it's easier to just let one seperate
3340 * function sort it all out.
3341 */
3342 ocfs2_split_record(inode, left_path, right_path,
3343 insert_rec, insert->ins_split);
3344 } else
3345 ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path),
3346 insert, inode);
1988 3347
1989 ocfs2_insert_at_leaf(insert_rec, el, insert, inode);
1990 ret = ocfs2_journal_dirty(handle, leaf_bh); 3348 ret = ocfs2_journal_dirty(handle, leaf_bh);
1991 if (ret) 3349 if (ret)
1992 mlog_errno(ret); 3350 mlog_errno(ret);
@@ -2075,7 +3433,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
2075 * can wind up skipping both of these two special cases... 3433 * can wind up skipping both of these two special cases...
2076 */ 3434 */
2077 if (rotate) { 3435 if (rotate) {
2078 ret = ocfs2_rotate_tree_right(inode, handle, 3436 ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split,
2079 le32_to_cpu(insert_rec->e_cpos), 3437 le32_to_cpu(insert_rec->e_cpos),
2080 right_path, &left_path); 3438 right_path, &left_path);
2081 if (ret) { 3439 if (ret) {
@@ -2100,8 +3458,9 @@ static int ocfs2_do_insert_extent(struct inode *inode,
2100 } 3458 }
2101 3459
2102out_update_clusters: 3460out_update_clusters:
2103 ocfs2_update_dinode_clusters(inode, di, 3461 if (type->ins_split == SPLIT_NONE)
2104 le16_to_cpu(insert_rec->e_leaf_clusters)); 3462 ocfs2_update_dinode_clusters(inode, di,
3463 le16_to_cpu(insert_rec->e_leaf_clusters));
2105 3464
2106 ret = ocfs2_journal_dirty(handle, di_bh); 3465 ret = ocfs2_journal_dirty(handle, di_bh);
2107 if (ret) 3466 if (ret)
@@ -2114,6 +3473,44 @@ out:
2114 return ret; 3473 return ret;
2115} 3474}
2116 3475
3476static enum ocfs2_contig_type
3477ocfs2_figure_merge_contig_type(struct inode *inode,
3478 struct ocfs2_extent_list *el, int index,
3479 struct ocfs2_extent_rec *split_rec)
3480{
3481 struct ocfs2_extent_rec *rec;
3482 enum ocfs2_contig_type ret = CONTIG_NONE;
3483
3484 /*
3485 * We're careful to check for an empty extent record here -
3486 * the merge code will know what to do if it sees one.
3487 */
3488
3489 if (index > 0) {
3490 rec = &el->l_recs[index - 1];
3491 if (index == 1 && ocfs2_is_empty_extent(rec)) {
3492 if (split_rec->e_cpos == el->l_recs[index].e_cpos)
3493 ret = CONTIG_RIGHT;
3494 } else {
3495 ret = ocfs2_extent_contig(inode, rec, split_rec);
3496 }
3497 }
3498
3499 if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) {
3500 enum ocfs2_contig_type contig_type;
3501
3502 rec = &el->l_recs[index + 1];
3503 contig_type = ocfs2_extent_contig(inode, rec, split_rec);
3504
3505 if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
3506 ret = CONTIG_LEFTRIGHT;
3507 else if (ret == CONTIG_NONE)
3508 ret = contig_type;
3509 }
3510
3511 return ret;
3512}
3513
2117static void ocfs2_figure_contig_type(struct inode *inode, 3514static void ocfs2_figure_contig_type(struct inode *inode,
2118 struct ocfs2_insert_type *insert, 3515 struct ocfs2_insert_type *insert,
2119 struct ocfs2_extent_list *el, 3516 struct ocfs2_extent_list *el,
@@ -2205,6 +3602,8 @@ static int ocfs2_figure_insert_type(struct inode *inode,
2205 struct ocfs2_path *path = NULL; 3602 struct ocfs2_path *path = NULL;
2206 struct buffer_head *bh = NULL; 3603 struct buffer_head *bh = NULL;
2207 3604
3605 insert->ins_split = SPLIT_NONE;
3606
2208 el = &di->id2.i_list; 3607 el = &di->id2.i_list;
2209 insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); 3608 insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth);
2210 3609
@@ -2327,9 +3726,10 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
2327 u32 cpos, 3726 u32 cpos,
2328 u64 start_blk, 3727 u64 start_blk,
2329 u32 new_clusters, 3728 u32 new_clusters,
3729 u8 flags,
2330 struct ocfs2_alloc_context *meta_ac) 3730 struct ocfs2_alloc_context *meta_ac)
2331{ 3731{
2332 int status, shift; 3732 int status;
2333 struct buffer_head *last_eb_bh = NULL; 3733 struct buffer_head *last_eb_bh = NULL;
2334 struct buffer_head *bh = NULL; 3734 struct buffer_head *bh = NULL;
2335 struct ocfs2_insert_type insert = {0, }; 3735 struct ocfs2_insert_type insert = {0, };
@@ -2350,6 +3750,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
2350 rec.e_cpos = cpu_to_le32(cpos); 3750 rec.e_cpos = cpu_to_le32(cpos);
2351 rec.e_blkno = cpu_to_le64(start_blk); 3751 rec.e_blkno = cpu_to_le64(start_blk);
2352 rec.e_leaf_clusters = cpu_to_le16(new_clusters); 3752 rec.e_leaf_clusters = cpu_to_le16(new_clusters);
3753 rec.e_flags = flags;
2353 3754
2354 status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, 3755 status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
2355 &insert); 3756 &insert);
@@ -2364,55 +3765,16 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
2364 insert.ins_appending, insert.ins_contig, insert.ins_contig_index, 3765 insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
2365 insert.ins_free_records, insert.ins_tree_depth); 3766 insert.ins_free_records, insert.ins_tree_depth);
2366 3767
2367 /* 3768 if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) {
2368 * Avoid growing the tree unless we're out of records and the 3769 status = ocfs2_grow_tree(inode, handle, fe_bh,
2369 * insert type requres one. 3770 &insert.ins_tree_depth, &last_eb_bh,
2370 */ 3771 meta_ac);
2371 if (insert.ins_contig != CONTIG_NONE || insert.ins_free_records) 3772 if (status) {
2372 goto out_add;
2373
2374 shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh);
2375 if (shift < 0) {
2376 status = shift;
2377 mlog_errno(status);
2378 goto bail;
2379 }
2380
2381 /* We traveled all the way to the bottom of the allocation tree
2382 * and didn't find room for any more extents - we need to add
2383 * another tree level */
2384 if (shift) {
2385 BUG_ON(bh);
2386 mlog(0, "need to shift tree depth "
2387 "(current = %d)\n", insert.ins_tree_depth);
2388
2389 /* ocfs2_shift_tree_depth will return us a buffer with
2390 * the new extent block (so we can pass that to
2391 * ocfs2_add_branch). */
2392 status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh,
2393 meta_ac, &bh);
2394 if (status < 0) {
2395 mlog_errno(status); 3773 mlog_errno(status);
2396 goto bail; 3774 goto bail;
2397 } 3775 }
2398 insert.ins_tree_depth++;
2399 /* Special case: we have room now if we shifted from
2400 * tree_depth 0 */
2401 if (insert.ins_tree_depth == 1)
2402 goto out_add;
2403 }
2404
2405 /* call ocfs2_add_branch to add the final part of the tree with
2406 * the new data. */
2407 mlog(0, "add branch. bh = %p\n", bh);
2408 status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh,
2409 meta_ac);
2410 if (status < 0) {
2411 mlog_errno(status);
2412 goto bail;
2413 } 3776 }
2414 3777
2415out_add:
2416 /* Finally, we can add clusters. This might rotate the tree for us. */ 3778 /* Finally, we can add clusters. This might rotate the tree for us. */
2417 status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); 3779 status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
2418 if (status < 0) 3780 if (status < 0)
@@ -2431,7 +3793,720 @@ bail:
2431 return status; 3793 return status;
2432} 3794}
2433 3795
2434static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) 3796static void ocfs2_make_right_split_rec(struct super_block *sb,
3797 struct ocfs2_extent_rec *split_rec,
3798 u32 cpos,
3799 struct ocfs2_extent_rec *rec)
3800{
3801 u32 rec_cpos = le32_to_cpu(rec->e_cpos);
3802 u32 rec_range = rec_cpos + le16_to_cpu(rec->e_leaf_clusters);
3803
3804 memset(split_rec, 0, sizeof(struct ocfs2_extent_rec));
3805
3806 split_rec->e_cpos = cpu_to_le32(cpos);
3807 split_rec->e_leaf_clusters = cpu_to_le16(rec_range - cpos);
3808
3809 split_rec->e_blkno = rec->e_blkno;
3810 le64_add_cpu(&split_rec->e_blkno,
3811 ocfs2_clusters_to_blocks(sb, cpos - rec_cpos));
3812
3813 split_rec->e_flags = rec->e_flags;
3814}
3815
3816static int ocfs2_split_and_insert(struct inode *inode,
3817 handle_t *handle,
3818 struct ocfs2_path *path,
3819 struct buffer_head *di_bh,
3820 struct buffer_head **last_eb_bh,
3821 int split_index,
3822 struct ocfs2_extent_rec *orig_split_rec,
3823 struct ocfs2_alloc_context *meta_ac)
3824{
3825 int ret = 0, depth;
3826 unsigned int insert_range, rec_range, do_leftright = 0;
3827 struct ocfs2_extent_rec tmprec;
3828 struct ocfs2_extent_list *rightmost_el;
3829 struct ocfs2_extent_rec rec;
3830 struct ocfs2_extent_rec split_rec = *orig_split_rec;
3831 struct ocfs2_insert_type insert;
3832 struct ocfs2_extent_block *eb;
3833 struct ocfs2_dinode *di;
3834
3835leftright:
3836 /*
3837 * Store a copy of the record on the stack - it might move
3838 * around as the tree is manipulated below.
3839 */
3840 rec = path_leaf_el(path)->l_recs[split_index];
3841
3842 di = (struct ocfs2_dinode *)di_bh->b_data;
3843 rightmost_el = &di->id2.i_list;
3844
3845 depth = le16_to_cpu(rightmost_el->l_tree_depth);
3846 if (depth) {
3847 BUG_ON(!(*last_eb_bh));
3848 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
3849 rightmost_el = &eb->h_list;
3850 }
3851
3852 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
3853 le16_to_cpu(rightmost_el->l_count)) {
3854 int old_depth = depth;
3855
3856 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh,
3857 meta_ac);
3858 if (ret) {
3859 mlog_errno(ret);
3860 goto out;
3861 }
3862
3863 if (old_depth != depth) {
3864 eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
3865 rightmost_el = &eb->h_list;
3866 }
3867 }
3868
3869 memset(&insert, 0, sizeof(struct ocfs2_insert_type));
3870 insert.ins_appending = APPEND_NONE;
3871 insert.ins_contig = CONTIG_NONE;
3872 insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
3873 - le16_to_cpu(rightmost_el->l_next_free_rec);
3874 insert.ins_tree_depth = depth;
3875
3876 insert_range = le32_to_cpu(split_rec.e_cpos) +
3877 le16_to_cpu(split_rec.e_leaf_clusters);
3878 rec_range = le32_to_cpu(rec.e_cpos) +
3879 le16_to_cpu(rec.e_leaf_clusters);
3880
3881 if (split_rec.e_cpos == rec.e_cpos) {
3882 insert.ins_split = SPLIT_LEFT;
3883 } else if (insert_range == rec_range) {
3884 insert.ins_split = SPLIT_RIGHT;
3885 } else {
3886 /*
3887 * Left/right split. We fake this as a right split
3888 * first and then make a second pass as a left split.
3889 */
3890 insert.ins_split = SPLIT_RIGHT;
3891
3892 ocfs2_make_right_split_rec(inode->i_sb, &tmprec, insert_range,
3893 &rec);
3894
3895 split_rec = tmprec;
3896
3897 BUG_ON(do_leftright);
3898 do_leftright = 1;
3899 }
3900
3901 ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec,
3902 &insert);
3903 if (ret) {
3904 mlog_errno(ret);
3905 goto out;
3906 }
3907
3908 if (do_leftright == 1) {
3909 u32 cpos;
3910 struct ocfs2_extent_list *el;
3911
3912 do_leftright++;
3913 split_rec = *orig_split_rec;
3914
3915 ocfs2_reinit_path(path, 1);
3916
3917 cpos = le32_to_cpu(split_rec.e_cpos);
3918 ret = ocfs2_find_path(inode, path, cpos);
3919 if (ret) {
3920 mlog_errno(ret);
3921 goto out;
3922 }
3923
3924 el = path_leaf_el(path);
3925 split_index = ocfs2_search_extent_list(el, cpos);
3926 goto leftright;
3927 }
3928out:
3929
3930 return ret;
3931}
3932
3933/*
3934 * Mark part or all of the extent record at split_index in the leaf
3935 * pointed to by path as written. This removes the unwritten
3936 * extent flag.
3937 *
3938 * Care is taken to handle contiguousness so as to not grow the tree.
3939 *
3940 * meta_ac is not strictly necessary - we only truly need it if growth
3941 * of the tree is required. All other cases will degrade into a less
3942 * optimal tree layout.
3943 *
3944 * last_eb_bh should be the rightmost leaf block for any inode with a
3945 * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call.
3946 *
3947 * This code is optimized for readability - several passes might be
3948 * made over certain portions of the tree. All of those blocks will
3949 * have been brought into cache (and pinned via the journal), so the
3950 * extra overhead is not expressed in terms of disk reads.
3951 */
3952static int __ocfs2_mark_extent_written(struct inode *inode,
3953 struct buffer_head *di_bh,
3954 handle_t *handle,
3955 struct ocfs2_path *path,
3956 int split_index,
3957 struct ocfs2_extent_rec *split_rec,
3958 struct ocfs2_alloc_context *meta_ac,
3959 struct ocfs2_cached_dealloc_ctxt *dealloc)
3960{
3961 int ret = 0;
3962 struct ocfs2_extent_list *el = path_leaf_el(path);
3963 struct buffer_head *eb_bh, *last_eb_bh = NULL;
3964 struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
3965 struct ocfs2_merge_ctxt ctxt;
3966 struct ocfs2_extent_list *rightmost_el;
3967
3968 if (!rec->e_flags & OCFS2_EXT_UNWRITTEN) {
3969 ret = -EIO;
3970 mlog_errno(ret);
3971 goto out;
3972 }
3973
3974 if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
3975 ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
3976 (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) {
3977 ret = -EIO;
3978 mlog_errno(ret);
3979 goto out;
3980 }
3981
3982 eb_bh = path_leaf_bh(path);
3983 ret = ocfs2_journal_access(handle, inode, eb_bh,
3984 OCFS2_JOURNAL_ACCESS_WRITE);
3985 if (ret) {
3986 mlog_errno(ret);
3987 goto out;
3988 }
3989
3990 ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el,
3991 split_index,
3992 split_rec);
3993
3994 /*
3995 * The core merge / split code wants to know how much room is
3996 * left in this inodes allocation tree, so we pass the
3997 * rightmost extent list.
3998 */
3999 if (path->p_tree_depth) {
4000 struct ocfs2_extent_block *eb;
4001 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
4002
4003 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
4004 le64_to_cpu(di->i_last_eb_blk),
4005 &last_eb_bh, OCFS2_BH_CACHED, inode);
4006 if (ret) {
4007 mlog_exit(ret);
4008 goto out;
4009 }
4010
4011 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
4012 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
4013 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
4014 ret = -EROFS;
4015 goto out;
4016 }
4017
4018 rightmost_el = &eb->h_list;
4019 } else
4020 rightmost_el = path_root_el(path);
4021
4022 ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec);
4023 if (ctxt.c_used_tail_recs > 0 &&
4024 ocfs2_is_empty_extent(&rightmost_el->l_recs[0]))
4025 ctxt.c_used_tail_recs--;
4026
4027 if (rec->e_cpos == split_rec->e_cpos &&
4028 rec->e_leaf_clusters == split_rec->e_leaf_clusters)
4029 ctxt.c_split_covers_rec = 1;
4030 else
4031 ctxt.c_split_covers_rec = 0;
4032
4033 ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
4034
4035 mlog(0, "index: %d, contig: %u, used_tail_recs: %u, "
4036 "has_empty: %u, split_covers: %u\n", split_index,
4037 ctxt.c_contig_type, ctxt.c_used_tail_recs,
4038 ctxt.c_has_empty_extent, ctxt.c_split_covers_rec);
4039
4040 if (ctxt.c_contig_type == CONTIG_NONE) {
4041 if (ctxt.c_split_covers_rec)
4042 el->l_recs[split_index] = *split_rec;
4043 else
4044 ret = ocfs2_split_and_insert(inode, handle, path, di_bh,
4045 &last_eb_bh, split_index,
4046 split_rec, meta_ac);
4047 if (ret)
4048 mlog_errno(ret);
4049 } else {
4050 ret = ocfs2_try_to_merge_extent(inode, handle, path,
4051 split_index, split_rec,
4052 dealloc, &ctxt);
4053 if (ret)
4054 mlog_errno(ret);
4055 }
4056
4057 ocfs2_journal_dirty(handle, eb_bh);
4058
4059out:
4060 brelse(last_eb_bh);
4061 return ret;
4062}
4063
4064/*
4065 * Mark the already-existing extent at cpos as written for len clusters.
4066 *
4067 * If the existing extent is larger than the request, initiate a
4068 * split. An attempt will be made at merging with adjacent extents.
4069 *
4070 * The caller is responsible for passing down meta_ac if we'll need it.
4071 */
4072int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
4073 handle_t *handle, u32 cpos, u32 len, u32 phys,
4074 struct ocfs2_alloc_context *meta_ac,
4075 struct ocfs2_cached_dealloc_ctxt *dealloc)
4076{
4077 int ret, index;
4078 u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys);
4079 struct ocfs2_extent_rec split_rec;
4080 struct ocfs2_path *left_path = NULL;
4081 struct ocfs2_extent_list *el;
4082
4083 mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n",
4084 inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno);
4085
4086 if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
4087 ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
4088 "that are being written to, but the feature bit "
4089 "is not set in the super block.",
4090 (unsigned long long)OCFS2_I(inode)->ip_blkno);
4091 ret = -EROFS;
4092 goto out;
4093 }
4094
4095 /*
4096 * XXX: This should be fixed up so that we just re-insert the
4097 * next extent records.
4098 */
4099 ocfs2_extent_map_trunc(inode, 0);
4100
4101 left_path = ocfs2_new_inode_path(di_bh);
4102 if (!left_path) {
4103 ret = -ENOMEM;
4104 mlog_errno(ret);
4105 goto out;
4106 }
4107
4108 ret = ocfs2_find_path(inode, left_path, cpos);
4109 if (ret) {
4110 mlog_errno(ret);
4111 goto out;
4112 }
4113 el = path_leaf_el(left_path);
4114
4115 index = ocfs2_search_extent_list(el, cpos);
4116 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
4117 ocfs2_error(inode->i_sb,
4118 "Inode %llu has an extent at cpos %u which can no "
4119 "longer be found.\n",
4120 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
4121 ret = -EROFS;
4122 goto out;
4123 }
4124
4125 memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec));
4126 split_rec.e_cpos = cpu_to_le32(cpos);
4127 split_rec.e_leaf_clusters = cpu_to_le16(len);
4128 split_rec.e_blkno = cpu_to_le64(start_blkno);
4129 split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
4130 split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
4131
4132 ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path,
4133 index, &split_rec, meta_ac, dealloc);
4134 if (ret)
4135 mlog_errno(ret);
4136
4137out:
4138 ocfs2_free_path(left_path);
4139 return ret;
4140}
4141
4142static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4143 handle_t *handle, struct ocfs2_path *path,
4144 int index, u32 new_range,
4145 struct ocfs2_alloc_context *meta_ac)
4146{
4147 int ret, depth, credits = handle->h_buffer_credits;
4148 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
4149 struct buffer_head *last_eb_bh = NULL;
4150 struct ocfs2_extent_block *eb;
4151 struct ocfs2_extent_list *rightmost_el, *el;
4152 struct ocfs2_extent_rec split_rec;
4153 struct ocfs2_extent_rec *rec;
4154 struct ocfs2_insert_type insert;
4155
4156 /*
4157 * Setup the record to split before we grow the tree.
4158 */
4159 el = path_leaf_el(path);
4160 rec = &el->l_recs[index];
4161 ocfs2_make_right_split_rec(inode->i_sb, &split_rec, new_range, rec);
4162
4163 depth = path->p_tree_depth;
4164 if (depth > 0) {
4165 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
4166 le64_to_cpu(di->i_last_eb_blk),
4167 &last_eb_bh, OCFS2_BH_CACHED, inode);
4168 if (ret < 0) {
4169 mlog_errno(ret);
4170 goto out;
4171 }
4172
4173 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
4174 rightmost_el = &eb->h_list;
4175 } else
4176 rightmost_el = path_leaf_el(path);
4177
4178 credits += path->p_tree_depth + ocfs2_extend_meta_needed(di);
4179 ret = ocfs2_extend_trans(handle, credits);
4180 if (ret) {
4181 mlog_errno(ret);
4182 goto out;
4183 }
4184
4185 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
4186 le16_to_cpu(rightmost_el->l_count)) {
4187 int old_depth = depth;
4188
4189 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh,
4190 meta_ac);
4191 if (ret) {
4192 mlog_errno(ret);
4193 goto out;
4194 }
4195
4196 if (old_depth != depth) {
4197 eb = (struct ocfs2_extent_block *)last_eb_bh->b_data;
4198 rightmost_el = &eb->h_list;
4199 }
4200 }
4201
4202 memset(&insert, 0, sizeof(struct ocfs2_insert_type));
4203 insert.ins_appending = APPEND_NONE;
4204 insert.ins_contig = CONTIG_NONE;
4205 insert.ins_split = SPLIT_RIGHT;
4206 insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
4207 - le16_to_cpu(rightmost_el->l_next_free_rec);
4208 insert.ins_tree_depth = depth;
4209
4210 ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert);
4211 if (ret)
4212 mlog_errno(ret);
4213
4214out:
4215 brelse(last_eb_bh);
4216 return ret;
4217}
4218
4219static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
4220 struct ocfs2_path *path, int index,
4221 struct ocfs2_cached_dealloc_ctxt *dealloc,
4222 u32 cpos, u32 len)
4223{
4224 int ret;
4225 u32 left_cpos, rec_range, trunc_range;
4226 int wants_rotate = 0, is_rightmost_tree_rec = 0;
4227 struct super_block *sb = inode->i_sb;
4228 struct ocfs2_path *left_path = NULL;
4229 struct ocfs2_extent_list *el = path_leaf_el(path);
4230 struct ocfs2_extent_rec *rec;
4231 struct ocfs2_extent_block *eb;
4232
4233 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
4234 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
4235 if (ret) {
4236 mlog_errno(ret);
4237 goto out;
4238 }
4239
4240 index--;
4241 }
4242
4243 if (index == (le16_to_cpu(el->l_next_free_rec) - 1) &&
4244 path->p_tree_depth) {
4245 /*
4246 * Check whether this is the rightmost tree record. If
4247 * we remove all of this record or part of its right
4248 * edge then an update of the record lengths above it
4249 * will be required.
4250 */
4251 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
4252 if (eb->h_next_leaf_blk == 0)
4253 is_rightmost_tree_rec = 1;
4254 }
4255
4256 rec = &el->l_recs[index];
4257 if (index == 0 && path->p_tree_depth &&
4258 le32_to_cpu(rec->e_cpos) == cpos) {
4259 /*
4260 * Changing the leftmost offset (via partial or whole
4261 * record truncate) of an interior (or rightmost) path
4262 * means we have to update the subtree that is formed
4263 * by this leaf and the one to it's left.
4264 *
4265 * There are two cases we can skip:
4266 * 1) Path is the leftmost one in our inode tree.
4267 * 2) The leaf is rightmost and will be empty after
4268 * we remove the extent record - the rotate code
4269 * knows how to update the newly formed edge.
4270 */
4271
4272 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path,
4273 &left_cpos);
4274 if (ret) {
4275 mlog_errno(ret);
4276 goto out;
4277 }
4278
4279 if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) {
4280 left_path = ocfs2_new_path(path_root_bh(path),
4281 path_root_el(path));
4282 if (!left_path) {
4283 ret = -ENOMEM;
4284 mlog_errno(ret);
4285 goto out;
4286 }
4287
4288 ret = ocfs2_find_path(inode, left_path, left_cpos);
4289 if (ret) {
4290 mlog_errno(ret);
4291 goto out;
4292 }
4293 }
4294 }
4295
4296 ret = ocfs2_extend_rotate_transaction(handle, 0,
4297 handle->h_buffer_credits,
4298 path);
4299 if (ret) {
4300 mlog_errno(ret);
4301 goto out;
4302 }
4303
4304 ret = ocfs2_journal_access_path(inode, handle, path);
4305 if (ret) {
4306 mlog_errno(ret);
4307 goto out;
4308 }
4309
4310 ret = ocfs2_journal_access_path(inode, handle, left_path);
4311 if (ret) {
4312 mlog_errno(ret);
4313 goto out;
4314 }
4315
4316 rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
4317 trunc_range = cpos + len;
4318
4319 if (le32_to_cpu(rec->e_cpos) == cpos && rec_range == trunc_range) {
4320 int next_free;
4321
4322 memset(rec, 0, sizeof(*rec));
4323 ocfs2_cleanup_merge(el, index);
4324 wants_rotate = 1;
4325
4326 next_free = le16_to_cpu(el->l_next_free_rec);
4327 if (is_rightmost_tree_rec && next_free > 1) {
4328 /*
4329 * We skip the edge update if this path will
4330 * be deleted by the rotate code.
4331 */
4332 rec = &el->l_recs[next_free - 1];
4333 ocfs2_adjust_rightmost_records(inode, handle, path,
4334 rec);
4335 }
4336 } else if (le32_to_cpu(rec->e_cpos) == cpos) {
4337 /* Remove leftmost portion of the record. */
4338 le32_add_cpu(&rec->e_cpos, len);
4339 le64_add_cpu(&rec->e_blkno, ocfs2_clusters_to_blocks(sb, len));
4340 le16_add_cpu(&rec->e_leaf_clusters, -len);
4341 } else if (rec_range == trunc_range) {
4342 /* Remove rightmost portion of the record */
4343 le16_add_cpu(&rec->e_leaf_clusters, -len);
4344 if (is_rightmost_tree_rec)
4345 ocfs2_adjust_rightmost_records(inode, handle, path, rec);
4346 } else {
4347 /* Caller should have trapped this. */
4348 mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) "
4349 "(%u, %u)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno,
4350 le32_to_cpu(rec->e_cpos),
4351 le16_to_cpu(rec->e_leaf_clusters), cpos, len);
4352 BUG();
4353 }
4354
4355 if (left_path) {
4356 int subtree_index;
4357
4358 subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
4359 ocfs2_complete_edge_insert(inode, handle, left_path, path,
4360 subtree_index);
4361 }
4362
4363 ocfs2_journal_dirty(handle, path_leaf_bh(path));
4364
4365 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
4366 if (ret) {
4367 mlog_errno(ret);
4368 goto out;
4369 }
4370
4371out:
4372 ocfs2_free_path(left_path);
4373 return ret;
4374}
4375
4376int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
4377 u32 cpos, u32 len, handle_t *handle,
4378 struct ocfs2_alloc_context *meta_ac,
4379 struct ocfs2_cached_dealloc_ctxt *dealloc)
4380{
4381 int ret, index;
4382 u32 rec_range, trunc_range;
4383 struct ocfs2_extent_rec *rec;
4384 struct ocfs2_extent_list *el;
4385 struct ocfs2_path *path;
4386
4387 ocfs2_extent_map_trunc(inode, 0);
4388
4389 path = ocfs2_new_inode_path(di_bh);
4390 if (!path) {
4391 ret = -ENOMEM;
4392 mlog_errno(ret);
4393 goto out;
4394 }
4395
4396 ret = ocfs2_find_path(inode, path, cpos);
4397 if (ret) {
4398 mlog_errno(ret);
4399 goto out;
4400 }
4401
4402 el = path_leaf_el(path);
4403 index = ocfs2_search_extent_list(el, cpos);
4404 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
4405 ocfs2_error(inode->i_sb,
4406 "Inode %llu has an extent at cpos %u which can no "
4407 "longer be found.\n",
4408 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
4409 ret = -EROFS;
4410 goto out;
4411 }
4412
4413 /*
4414 * We have 3 cases of extent removal:
4415 * 1) Range covers the entire extent rec
4416 * 2) Range begins or ends on one edge of the extent rec
4417 * 3) Range is in the middle of the extent rec (no shared edges)
4418 *
4419 * For case 1 we remove the extent rec and left rotate to
4420 * fill the hole.
4421 *
4422 * For case 2 we just shrink the existing extent rec, with a
4423 * tree update if the shrinking edge is also the edge of an
4424 * extent block.
4425 *
4426 * For case 3 we do a right split to turn the extent rec into
4427 * something case 2 can handle.
4428 */
4429 rec = &el->l_recs[index];
4430 rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
4431 trunc_range = cpos + len;
4432
4433 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range);
4434
4435 mlog(0, "Inode %llu, remove (cpos %u, len %u). Existing index %d "
4436 "(cpos %u, len %u)\n",
4437 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, len, index,
4438 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec));
4439
4440 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
4441 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
4442 cpos, len);
4443 if (ret) {
4444 mlog_errno(ret);
4445 goto out;
4446 }
4447 } else {
4448 ret = ocfs2_split_tree(inode, di_bh, handle, path, index,
4449 trunc_range, meta_ac);
4450 if (ret) {
4451 mlog_errno(ret);
4452 goto out;
4453 }
4454
4455 /*
4456 * The split could have manipulated the tree enough to
4457 * move the record location, so we have to look for it again.
4458 */
4459 ocfs2_reinit_path(path, 1);
4460
4461 ret = ocfs2_find_path(inode, path, cpos);
4462 if (ret) {
4463 mlog_errno(ret);
4464 goto out;
4465 }
4466
4467 el = path_leaf_el(path);
4468 index = ocfs2_search_extent_list(el, cpos);
4469 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
4470 ocfs2_error(inode->i_sb,
4471 "Inode %llu: split at cpos %u lost record.",
4472 (unsigned long long)OCFS2_I(inode)->ip_blkno,
4473 cpos);
4474 ret = -EROFS;
4475 goto out;
4476 }
4477
4478 /*
4479 * Double check our values here. If anything is fishy,
4480 * it's easier to catch it at the top level.
4481 */
4482 rec = &el->l_recs[index];
4483 rec_range = le32_to_cpu(rec->e_cpos) +
4484 ocfs2_rec_clusters(el, rec);
4485 if (rec_range != trunc_range) {
4486 ocfs2_error(inode->i_sb,
4487 "Inode %llu: error after split at cpos %u"
4488 "trunc len %u, existing record is (%u,%u)",
4489 (unsigned long long)OCFS2_I(inode)->ip_blkno,
4490 cpos, len, le32_to_cpu(rec->e_cpos),
4491 ocfs2_rec_clusters(el, rec));
4492 ret = -EROFS;
4493 goto out;
4494 }
4495
4496 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
4497 cpos, len);
4498 if (ret) {
4499 mlog_errno(ret);
4500 goto out;
4501 }
4502 }
4503
4504out:
4505 ocfs2_free_path(path);
4506 return ret;
4507}
4508
4509int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
2435{ 4510{
2436 struct buffer_head *tl_bh = osb->osb_tl_bh; 4511 struct buffer_head *tl_bh = osb->osb_tl_bh;
2437 struct ocfs2_dinode *di; 4512 struct ocfs2_dinode *di;
@@ -2464,10 +4539,10 @@ static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl,
2464 return current_tail == new_start; 4539 return current_tail == new_start;
2465} 4540}
2466 4541
2467static int ocfs2_truncate_log_append(struct ocfs2_super *osb, 4542int ocfs2_truncate_log_append(struct ocfs2_super *osb,
2468 handle_t *handle, 4543 handle_t *handle,
2469 u64 start_blk, 4544 u64 start_blk,
2470 unsigned int num_clusters) 4545 unsigned int num_clusters)
2471{ 4546{
2472 int status, index; 4547 int status, index;
2473 unsigned int start_cluster, tl_count; 4548 unsigned int start_cluster, tl_count;
@@ -2623,7 +4698,7 @@ bail:
2623} 4698}
2624 4699
2625/* Expects you to already be holding tl_inode->i_mutex */ 4700/* Expects you to already be holding tl_inode->i_mutex */
2626static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) 4701int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
2627{ 4702{
2628 int status; 4703 int status;
2629 unsigned int num_to_flush; 4704 unsigned int num_to_flush;
@@ -2957,6 +5032,219 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
2957 return status; 5032 return status;
2958} 5033}
2959 5034
5035/*
5036 * Delayed de-allocation of suballocator blocks.
5037 *
5038 * Some sets of block de-allocations might involve multiple suballocator inodes.
5039 *
5040 * The locking for this can get extremely complicated, especially when
5041 * the suballocator inodes to delete from aren't known until deep
5042 * within an unrelated codepath.
5043 *
5044 * ocfs2_extent_block structures are a good example of this - an inode
5045 * btree could have been grown by any number of nodes each allocating
5046 * out of their own suballoc inode.
5047 *
5048 * These structures allow the delay of block de-allocation until a
5049 * later time, when locking of multiple cluster inodes won't cause
5050 * deadlock.
5051 */
5052
5053/*
5054 * Describes a single block free from a suballocator
5055 */
5056struct ocfs2_cached_block_free {
5057 struct ocfs2_cached_block_free *free_next;
5058 u64 free_blk;
5059 unsigned int free_bit;
5060};
5061
5062struct ocfs2_per_slot_free_list {
5063 struct ocfs2_per_slot_free_list *f_next_suballocator;
5064 int f_inode_type;
5065 int f_slot;
5066 struct ocfs2_cached_block_free *f_first;
5067};
5068
5069static int ocfs2_free_cached_items(struct ocfs2_super *osb,
5070 int sysfile_type,
5071 int slot,
5072 struct ocfs2_cached_block_free *head)
5073{
5074 int ret;
5075 u64 bg_blkno;
5076 handle_t *handle;
5077 struct inode *inode;
5078 struct buffer_head *di_bh = NULL;
5079 struct ocfs2_cached_block_free *tmp;
5080
5081 inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot);
5082 if (!inode) {
5083 ret = -EINVAL;
5084 mlog_errno(ret);
5085 goto out;
5086 }
5087
5088 mutex_lock(&inode->i_mutex);
5089
5090 ret = ocfs2_meta_lock(inode, &di_bh, 1);
5091 if (ret) {
5092 mlog_errno(ret);
5093 goto out_mutex;
5094 }
5095
5096 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
5097 if (IS_ERR(handle)) {
5098 ret = PTR_ERR(handle);
5099 mlog_errno(ret);
5100 goto out_unlock;
5101 }
5102
5103 while (head) {
5104 bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
5105 head->free_bit);
5106 mlog(0, "Free bit: (bit %u, blkno %llu)\n",
5107 head->free_bit, (unsigned long long)head->free_blk);
5108
5109 ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
5110 head->free_bit, bg_blkno, 1);
5111 if (ret) {
5112 mlog_errno(ret);
5113 goto out_journal;
5114 }
5115
5116 ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
5117 if (ret) {
5118 mlog_errno(ret);
5119 goto out_journal;
5120 }
5121
5122 tmp = head;
5123 head = head->free_next;
5124 kfree(tmp);
5125 }
5126
5127out_journal:
5128 ocfs2_commit_trans(osb, handle);
5129
5130out_unlock:
5131 ocfs2_meta_unlock(inode, 1);
5132 brelse(di_bh);
5133out_mutex:
5134 mutex_unlock(&inode->i_mutex);
5135 iput(inode);
5136out:
5137 while(head) {
5138 /* Premature exit may have left some dangling items. */
5139 tmp = head;
5140 head = head->free_next;
5141 kfree(tmp);
5142 }
5143
5144 return ret;
5145}
5146
5147int ocfs2_run_deallocs(struct ocfs2_super *osb,
5148 struct ocfs2_cached_dealloc_ctxt *ctxt)
5149{
5150 int ret = 0, ret2;
5151 struct ocfs2_per_slot_free_list *fl;
5152
5153 if (!ctxt)
5154 return 0;
5155
5156 while (ctxt->c_first_suballocator) {
5157 fl = ctxt->c_first_suballocator;
5158
5159 if (fl->f_first) {
5160 mlog(0, "Free items: (type %u, slot %d)\n",
5161 fl->f_inode_type, fl->f_slot);
5162 ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type,
5163 fl->f_slot, fl->f_first);
5164 if (ret2)
5165 mlog_errno(ret2);
5166 if (!ret)
5167 ret = ret2;
5168 }
5169
5170 ctxt->c_first_suballocator = fl->f_next_suballocator;
5171 kfree(fl);
5172 }
5173
5174 return ret;
5175}
5176
5177static struct ocfs2_per_slot_free_list *
5178ocfs2_find_per_slot_free_list(int type,
5179 int slot,
5180 struct ocfs2_cached_dealloc_ctxt *ctxt)
5181{
5182 struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator;
5183
5184 while (fl) {
5185 if (fl->f_inode_type == type && fl->f_slot == slot)
5186 return fl;
5187
5188 fl = fl->f_next_suballocator;
5189 }
5190
5191 fl = kmalloc(sizeof(*fl), GFP_NOFS);
5192 if (fl) {
5193 fl->f_inode_type = type;
5194 fl->f_slot = slot;
5195 fl->f_first = NULL;
5196 fl->f_next_suballocator = ctxt->c_first_suballocator;
5197
5198 ctxt->c_first_suballocator = fl;
5199 }
5200 return fl;
5201}
5202
5203static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
5204 int type, int slot, u64 blkno,
5205 unsigned int bit)
5206{
5207 int ret;
5208 struct ocfs2_per_slot_free_list *fl;
5209 struct ocfs2_cached_block_free *item;
5210
5211 fl = ocfs2_find_per_slot_free_list(type, slot, ctxt);
5212 if (fl == NULL) {
5213 ret = -ENOMEM;
5214 mlog_errno(ret);
5215 goto out;
5216 }
5217
5218 item = kmalloc(sizeof(*item), GFP_NOFS);
5219 if (item == NULL) {
5220 ret = -ENOMEM;
5221 mlog_errno(ret);
5222 goto out;
5223 }
5224
5225 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n",
5226 type, slot, bit, (unsigned long long)blkno);
5227
5228 item->free_blk = blkno;
5229 item->free_bit = bit;
5230 item->free_next = fl->f_first;
5231
5232 fl->f_first = item;
5233
5234 ret = 0;
5235out:
5236 return ret;
5237}
5238
5239static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
5240 struct ocfs2_extent_block *eb)
5241{
5242 return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE,
5243 le16_to_cpu(eb->h_suballoc_slot),
5244 le64_to_cpu(eb->h_blkno),
5245 le16_to_cpu(eb->h_suballoc_bit));
5246}
5247
2960/* This function will figure out whether the currently last extent 5248/* This function will figure out whether the currently last extent
2961 * block will be deleted, and if it will, what the new last extent 5249 * block will be deleted, and if it will, what the new last extent
2962 * block will be so we can update his h_next_leaf_blk field, as well 5250 * block will be so we can update his h_next_leaf_blk field, as well
@@ -3238,27 +5526,10 @@ delete:
3238 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); 5526 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
3239 BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); 5527 BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno));
3240 5528
3241 if (le16_to_cpu(eb->h_suballoc_slot) == 0) { 5529 ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb);
3242 /* 5530 /* An error here is not fatal. */
3243 * This code only understands how to 5531 if (ret < 0)
3244 * lock the suballocator in slot 0, 5532 mlog_errno(ret);
3245 * which is fine because allocation is
3246 * only ever done out of that
3247 * suballocator too. A future version
3248 * might change that however, so avoid
3249 * a free if we don't know how to
3250 * handle it. This way an fs incompat
3251 * bit will not be necessary.
3252 */
3253 ret = ocfs2_free_extent_block(handle,
3254 tc->tc_ext_alloc_inode,
3255 tc->tc_ext_alloc_bh,
3256 eb);
3257
3258 /* An error here is not fatal. */
3259 if (ret < 0)
3260 mlog_errno(ret);
3261 }
3262 } else { 5533 } else {
3263 deleted_eb = 0; 5534 deleted_eb = 0;
3264 } 5535 }
@@ -3397,9 +5668,9 @@ static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
3397 return ocfs2_journal_dirty_data(handle, bh); 5668 return ocfs2_journal_dirty_data(handle, bh);
3398} 5669}
3399 5670
3400static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize, 5671static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
3401 struct page **pages, int numpages, 5672 loff_t end, struct page **pages,
3402 u64 phys, handle_t *handle) 5673 int numpages, u64 phys, handle_t *handle)
3403{ 5674{
3404 int i, ret, partial = 0; 5675 int i, ret, partial = 0;
3405 void *kaddr; 5676 void *kaddr;
@@ -3412,26 +5683,14 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize,
3412 if (numpages == 0) 5683 if (numpages == 0)
3413 goto out; 5684 goto out;
3414 5685
3415 from = isize & (PAGE_CACHE_SIZE - 1); /* 1st page offset */ 5686 to = PAGE_CACHE_SIZE;
3416 if (PAGE_CACHE_SHIFT > OCFS2_SB(sb)->s_clustersize_bits) {
3417 /*
3418 * Since 'from' has been capped to a value below page
3419 * size, this calculation won't be able to overflow
3420 * 'to'
3421 */
3422 to = ocfs2_align_bytes_to_clusters(sb, from);
3423
3424 /*
3425 * The truncate tail in this case should never contain
3426 * more than one page at maximum. The loop below also
3427 * assumes this.
3428 */
3429 BUG_ON(numpages != 1);
3430 }
3431
3432 for(i = 0; i < numpages; i++) { 5687 for(i = 0; i < numpages; i++) {
3433 page = pages[i]; 5688 page = pages[i];
3434 5689
5690 from = start & (PAGE_CACHE_SIZE - 1);
5691 if ((end >> PAGE_CACHE_SHIFT) == page->index)
5692 to = end & (PAGE_CACHE_SIZE - 1);
5693
3435 BUG_ON(from > PAGE_CACHE_SIZE); 5694 BUG_ON(from > PAGE_CACHE_SIZE);
3436 BUG_ON(to > PAGE_CACHE_SIZE); 5695 BUG_ON(to > PAGE_CACHE_SIZE);
3437 5696
@@ -3468,10 +5727,7 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize,
3468 5727
3469 flush_dcache_page(page); 5728 flush_dcache_page(page);
3470 5729
3471 /* 5730 start = (page->index + 1) << PAGE_CACHE_SHIFT;
3472 * Every page after the 1st one should be completely zero'd.
3473 */
3474 from = 0;
3475 } 5731 }
3476out: 5732out:
3477 if (pages) { 5733 if (pages) {
@@ -3484,24 +5740,26 @@ out:
3484 } 5740 }
3485} 5741}
3486 5742
3487static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page **pages, 5743static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
3488 int *num, u64 *phys) 5744 struct page **pages, int *num, u64 *phys)
3489{ 5745{
3490 int i, numpages = 0, ret = 0; 5746 int i, numpages = 0, ret = 0;
3491 unsigned int csize = OCFS2_SB(inode->i_sb)->s_clustersize;
3492 unsigned int ext_flags; 5747 unsigned int ext_flags;
3493 struct super_block *sb = inode->i_sb; 5748 struct super_block *sb = inode->i_sb;
3494 struct address_space *mapping = inode->i_mapping; 5749 struct address_space *mapping = inode->i_mapping;
3495 unsigned long index; 5750 unsigned long index;
3496 u64 next_cluster_bytes; 5751 loff_t last_page_bytes;
3497 5752
3498 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); 5753 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
5754 BUG_ON(start > end);
3499 5755
3500 /* Cluster boundary, so we don't need to grab any pages. */ 5756 if (start == end)
3501 if ((isize & (csize - 1)) == 0)
3502 goto out; 5757 goto out;
3503 5758
3504 ret = ocfs2_extent_map_get_blocks(inode, isize >> sb->s_blocksize_bits, 5759 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
5760 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
5761
5762 ret = ocfs2_extent_map_get_blocks(inode, start >> sb->s_blocksize_bits,
3505 phys, NULL, &ext_flags); 5763 phys, NULL, &ext_flags);
3506 if (ret) { 5764 if (ret) {
3507 mlog_errno(ret); 5765 mlog_errno(ret);
@@ -3517,8 +5775,8 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page *
3517 if (ext_flags & OCFS2_EXT_UNWRITTEN) 5775 if (ext_flags & OCFS2_EXT_UNWRITTEN)
3518 goto out; 5776 goto out;
3519 5777
3520 next_cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, isize); 5778 last_page_bytes = PAGE_ALIGN(end);
3521 index = isize >> PAGE_CACHE_SHIFT; 5779 index = start >> PAGE_CACHE_SHIFT;
3522 do { 5780 do {
3523 pages[numpages] = grab_cache_page(mapping, index); 5781 pages[numpages] = grab_cache_page(mapping, index);
3524 if (!pages[numpages]) { 5782 if (!pages[numpages]) {
@@ -3529,7 +5787,7 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page *
3529 5787
3530 numpages++; 5788 numpages++;
3531 index++; 5789 index++;
3532 } while (index < (next_cluster_bytes >> PAGE_CACHE_SHIFT)); 5790 } while (index < (last_page_bytes >> PAGE_CACHE_SHIFT));
3533 5791
3534out: 5792out:
3535 if (ret != 0) { 5793 if (ret != 0) {
@@ -3558,11 +5816,10 @@ out:
3558 * otherwise block_write_full_page() will skip writeout of pages past 5816 * otherwise block_write_full_page() will skip writeout of pages past
3559 * i_size. The new_i_size parameter is passed for this reason. 5817 * i_size. The new_i_size parameter is passed for this reason.
3560 */ 5818 */
3561int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, 5819int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
3562 u64 new_i_size) 5820 u64 range_start, u64 range_end)
3563{ 5821{
3564 int ret, numpages; 5822 int ret, numpages;
3565 loff_t endbyte;
3566 struct page **pages = NULL; 5823 struct page **pages = NULL;
3567 u64 phys; 5824 u64 phys;
3568 5825
@@ -3581,7 +5838,8 @@ int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
3581 goto out; 5838 goto out;
3582 } 5839 }
3583 5840
3584 ret = ocfs2_grab_eof_pages(inode, new_i_size, pages, &numpages, &phys); 5841 ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages,
5842 &numpages, &phys);
3585 if (ret) { 5843 if (ret) {
3586 mlog_errno(ret); 5844 mlog_errno(ret);
3587 goto out; 5845 goto out;
@@ -3590,17 +5848,16 @@ int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
3590 if (numpages == 0) 5848 if (numpages == 0)
3591 goto out; 5849 goto out;
3592 5850
3593 ocfs2_zero_cluster_pages(inode, new_i_size, pages, numpages, phys, 5851 ocfs2_zero_cluster_pages(inode, range_start, range_end, pages,
3594 handle); 5852 numpages, phys, handle);
3595 5853
3596 /* 5854 /*
3597 * Initiate writeout of the pages we zero'd here. We don't 5855 * Initiate writeout of the pages we zero'd here. We don't
3598 * wait on them - the truncate_inode_pages() call later will 5856 * wait on them - the truncate_inode_pages() call later will
3599 * do that for us. 5857 * do that for us.
3600 */ 5858 */
3601 endbyte = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size); 5859 ret = do_sync_mapping_range(inode->i_mapping, range_start,
3602 ret = do_sync_mapping_range(inode->i_mapping, new_i_size, 5860 range_end - 1, SYNC_FILE_RANGE_WRITE);
3603 endbyte - 1, SYNC_FILE_RANGE_WRITE);
3604 if (ret) 5861 if (ret)
3605 mlog_errno(ret); 5862 mlog_errno(ret);
3606 5863
@@ -3631,8 +5888,6 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
3631 5888
3632 mlog_entry_void(); 5889 mlog_entry_void();
3633 5890
3634 down_write(&OCFS2_I(inode)->ip_alloc_sem);
3635
3636 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 5891 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
3637 i_size_read(inode)); 5892 i_size_read(inode));
3638 5893
@@ -3754,7 +6009,6 @@ start:
3754 goto start; 6009 goto start;
3755 6010
3756bail: 6011bail:
3757 up_write(&OCFS2_I(inode)->ip_alloc_sem);
3758 6012
3759 ocfs2_schedule_truncate_log_flush(osb, 1); 6013 ocfs2_schedule_truncate_log_flush(osb, 1);
3760 6014
@@ -3764,6 +6018,8 @@ bail:
3764 if (handle) 6018 if (handle)
3765 ocfs2_commit_trans(osb, handle); 6019 ocfs2_commit_trans(osb, handle);
3766 6020
6021 ocfs2_run_deallocs(osb, &tc->tc_dealloc);
6022
3767 ocfs2_free_path(path); 6023 ocfs2_free_path(path);
3768 6024
3769 /* This will drop the ext_alloc cluster lock for us */ 6025 /* This will drop the ext_alloc cluster lock for us */
@@ -3774,23 +6030,18 @@ bail:
3774} 6030}
3775 6031
3776/* 6032/*
3777 * Expects the inode to already be locked. This will figure out which 6033 * Expects the inode to already be locked.
3778 * inodes need to be locked and will put them on the returned truncate
3779 * context.
3780 */ 6034 */
3781int ocfs2_prepare_truncate(struct ocfs2_super *osb, 6035int ocfs2_prepare_truncate(struct ocfs2_super *osb,
3782 struct inode *inode, 6036 struct inode *inode,
3783 struct buffer_head *fe_bh, 6037 struct buffer_head *fe_bh,
3784 struct ocfs2_truncate_context **tc) 6038 struct ocfs2_truncate_context **tc)
3785{ 6039{
3786 int status, metadata_delete, i; 6040 int status;
3787 unsigned int new_i_clusters; 6041 unsigned int new_i_clusters;
3788 struct ocfs2_dinode *fe; 6042 struct ocfs2_dinode *fe;
3789 struct ocfs2_extent_block *eb; 6043 struct ocfs2_extent_block *eb;
3790 struct ocfs2_extent_list *el;
3791 struct buffer_head *last_eb_bh = NULL; 6044 struct buffer_head *last_eb_bh = NULL;
3792 struct inode *ext_alloc_inode = NULL;
3793 struct buffer_head *ext_alloc_bh = NULL;
3794 6045
3795 mlog_entry_void(); 6046 mlog_entry_void();
3796 6047
@@ -3810,12 +6061,9 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
3810 mlog_errno(status); 6061 mlog_errno(status);
3811 goto bail; 6062 goto bail;
3812 } 6063 }
6064 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
3813 6065
3814 metadata_delete = 0;
3815 if (fe->id2.i_list.l_tree_depth) { 6066 if (fe->id2.i_list.l_tree_depth) {
3816 /* If we have a tree, then the truncate may result in
3817 * metadata deletes. Figure this out from the
3818 * rightmost leaf block.*/
3819 status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 6067 status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
3820 &last_eb_bh, OCFS2_BH_CACHED, inode); 6068 &last_eb_bh, OCFS2_BH_CACHED, inode);
3821 if (status < 0) { 6069 if (status < 0) {
@@ -3830,43 +6078,10 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
3830 status = -EIO; 6078 status = -EIO;
3831 goto bail; 6079 goto bail;
3832 } 6080 }
3833 el = &(eb->h_list);
3834
3835 i = 0;
3836 if (ocfs2_is_empty_extent(&el->l_recs[0]))
3837 i = 1;
3838 /*
3839 * XXX: Should we check that next_free_rec contains
3840 * the extent?
3841 */
3842 if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_i_clusters)
3843 metadata_delete = 1;
3844 } 6081 }
3845 6082
3846 (*tc)->tc_last_eb_bh = last_eb_bh; 6083 (*tc)->tc_last_eb_bh = last_eb_bh;
3847 6084
3848 if (metadata_delete) {
3849 mlog(0, "Will have to delete metadata for this trunc. "
3850 "locking allocator.\n");
3851 ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
3852 if (!ext_alloc_inode) {
3853 status = -ENOMEM;
3854 mlog_errno(status);
3855 goto bail;
3856 }
3857
3858 mutex_lock(&ext_alloc_inode->i_mutex);
3859 (*tc)->tc_ext_alloc_inode = ext_alloc_inode;
3860
3861 status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1);
3862 if (status < 0) {
3863 mlog_errno(status);
3864 goto bail;
3865 }
3866 (*tc)->tc_ext_alloc_bh = ext_alloc_bh;
3867 (*tc)->tc_ext_alloc_locked = 1;
3868 }
3869
3870 status = 0; 6085 status = 0;
3871bail: 6086bail:
3872 if (status < 0) { 6087 if (status < 0) {
@@ -3880,16 +6095,13 @@ bail:
3880 6095
3881static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 6096static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
3882{ 6097{
3883 if (tc->tc_ext_alloc_inode) { 6098 /*
3884 if (tc->tc_ext_alloc_locked) 6099 * The caller is responsible for completing deallocation
3885 ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); 6100 * before freeing the context.
3886 6101 */
3887 mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); 6102 if (tc->tc_dealloc.c_first_suballocator != NULL)
3888 iput(tc->tc_ext_alloc_inode); 6103 mlog(ML_NOTICE,
3889 } 6104 "Truncate completion has non-empty dealloc context\n");
3890
3891 if (tc->tc_ext_alloc_bh)
3892 brelse(tc->tc_ext_alloc_bh);
3893 6105
3894 if (tc->tc_last_eb_bh) 6106 if (tc->tc_last_eb_bh)
3895 brelse(tc->tc_last_eb_bh); 6107 brelse(tc->tc_last_eb_bh);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index fbcb5934a08..990df48ae8d 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -34,7 +34,17 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
34 u32 cpos, 34 u32 cpos,
35 u64 start_blk, 35 u64 start_blk,
36 u32 new_clusters, 36 u32 new_clusters,
37 u8 flags,
37 struct ocfs2_alloc_context *meta_ac); 38 struct ocfs2_alloc_context *meta_ac);
39struct ocfs2_cached_dealloc_ctxt;
40int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
41 handle_t *handle, u32 cpos, u32 len, u32 phys,
42 struct ocfs2_alloc_context *meta_ac,
43 struct ocfs2_cached_dealloc_ctxt *dealloc);
44int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
45 u32 cpos, u32 len, handle_t *handle,
46 struct ocfs2_alloc_context *meta_ac,
47 struct ocfs2_cached_dealloc_ctxt *dealloc);
38int ocfs2_num_free_extents(struct ocfs2_super *osb, 48int ocfs2_num_free_extents(struct ocfs2_super *osb,
39 struct inode *inode, 49 struct inode *inode,
40 struct ocfs2_dinode *fe); 50 struct ocfs2_dinode *fe);
@@ -62,17 +72,41 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
62 struct ocfs2_dinode **tl_copy); 72 struct ocfs2_dinode **tl_copy);
63int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, 73int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
64 struct ocfs2_dinode *tl_copy); 74 struct ocfs2_dinode *tl_copy);
75int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb);
76int ocfs2_truncate_log_append(struct ocfs2_super *osb,
77 handle_t *handle,
78 u64 start_blk,
79 unsigned int num_clusters);
80int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
81
82/*
83 * Process local structure which describes the block unlinks done
84 * during an operation. This is populated via
85 * ocfs2_cache_block_dealloc().
86 *
87 * ocfs2_run_deallocs() should be called after the potentially
88 * de-allocating routines. No journal handles should be open, and most
89 * locks should have been dropped.
90 */
91struct ocfs2_cached_dealloc_ctxt {
92 struct ocfs2_per_slot_free_list *c_first_suballocator;
93};
94static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
95{
96 c->c_first_suballocator = NULL;
97}
98int ocfs2_run_deallocs(struct ocfs2_super *osb,
99 struct ocfs2_cached_dealloc_ctxt *ctxt);
65 100
66struct ocfs2_truncate_context { 101struct ocfs2_truncate_context {
67 struct inode *tc_ext_alloc_inode; 102 struct ocfs2_cached_dealloc_ctxt tc_dealloc;
68 struct buffer_head *tc_ext_alloc_bh;
69 int tc_ext_alloc_locked; /* is it cluster locked? */ 103 int tc_ext_alloc_locked; /* is it cluster locked? */
70 /* these get destroyed once it's passed to ocfs2_commit_truncate. */ 104 /* these get destroyed once it's passed to ocfs2_commit_truncate. */
71 struct buffer_head *tc_last_eb_bh; 105 struct buffer_head *tc_last_eb_bh;
72}; 106};
73 107
74int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, 108int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
75 u64 new_i_size); 109 u64 range_start, u64 range_end);
76int ocfs2_prepare_truncate(struct ocfs2_super *osb, 110int ocfs2_prepare_truncate(struct ocfs2_super *osb,
77 struct inode *inode, 111 struct inode *inode,
78 struct buffer_head *fe_bh, 112 struct buffer_head *fe_bh,
@@ -84,6 +118,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
84 118
85int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 119int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
86 u32 cpos, struct buffer_head **leaf_bh); 120 u32 cpos, struct buffer_head **leaf_bh);
121int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
87 122
88/* 123/*
89 * Helper function to look at the # of clusters in an extent record. 124 * Helper function to look at the # of clusters in an extent record.
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a480b09c79b..84bf6e79de2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -684,6 +684,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
684 bh = bh->b_this_page, block_start += bsize) { 684 bh = bh->b_this_page, block_start += bsize) {
685 block_end = block_start + bsize; 685 block_end = block_start + bsize;
686 686
687 clear_buffer_new(bh);
688
687 /* 689 /*
688 * Ignore blocks outside of our i/o range - 690 * Ignore blocks outside of our i/o range -
689 * they may belong to unallocated clusters. 691 * they may belong to unallocated clusters.
@@ -698,9 +700,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
698 * For an allocating write with cluster size >= page 700 * For an allocating write with cluster size >= page
699 * size, we always write the entire page. 701 * size, we always write the entire page.
700 */ 702 */
701 703 if (new)
702 if (buffer_new(bh)) 704 set_buffer_new(bh);
703 clear_buffer_new(bh);
704 705
705 if (!buffer_mapped(bh)) { 706 if (!buffer_mapped(bh)) {
706 map_bh(bh, inode->i_sb, *p_blkno); 707 map_bh(bh, inode->i_sb, *p_blkno);
@@ -711,7 +712,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
711 if (!buffer_uptodate(bh)) 712 if (!buffer_uptodate(bh))
712 set_buffer_uptodate(bh); 713 set_buffer_uptodate(bh);
713 } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && 714 } else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
714 (block_start < from || block_end > to)) { 715 !buffer_new(bh) &&
716 (block_start < from || block_end > to)) {
715 ll_rw_block(READ, 1, &bh); 717 ll_rw_block(READ, 1, &bh);
716 *wait_bh++=bh; 718 *wait_bh++=bh;
717 } 719 }
@@ -738,18 +740,13 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
738 bh = head; 740 bh = head;
739 block_start = 0; 741 block_start = 0;
740 do { 742 do {
741 void *kaddr;
742
743 block_end = block_start + bsize; 743 block_end = block_start + bsize;
744 if (block_end <= from) 744 if (block_end <= from)
745 goto next_bh; 745 goto next_bh;
746 if (block_start >= to) 746 if (block_start >= to)
747 break; 747 break;
748 748
749 kaddr = kmap_atomic(page, KM_USER0); 749 zero_user_page(page, block_start, bh->b_size, KM_USER0);
750 memset(kaddr+block_start, 0, bh->b_size);
751 flush_dcache_page(page);
752 kunmap_atomic(kaddr, KM_USER0);
753 set_buffer_uptodate(bh); 750 set_buffer_uptodate(bh);
754 mark_buffer_dirty(bh); 751 mark_buffer_dirty(bh);
755 752
@@ -761,217 +758,240 @@ next_bh:
761 return ret; 758 return ret;
762} 759}
763 760
761#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE)
762#define OCFS2_MAX_CTXT_PAGES 1
763#else
764#define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)
765#endif
766
767#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)
768
764/* 769/*
765 * This will copy user data from the buffer page in the splice 770 * Describe the state of a single cluster to be written to.
766 * context.
767 *
768 * For now, we ignore SPLICE_F_MOVE as that would require some extra
769 * communication out all the way to ocfs2_write().
770 */ 771 */
771int ocfs2_map_and_write_splice_data(struct inode *inode, 772struct ocfs2_write_cluster_desc {
772 struct ocfs2_write_ctxt *wc, u64 *p_blkno, 773 u32 c_cpos;
773 unsigned int *ret_from, unsigned int *ret_to) 774 u32 c_phys;
775 /*
776 * Give this a unique field because c_phys eventually gets
777 * filled.
778 */
779 unsigned c_new;
780 unsigned c_unwritten;
781};
782
783static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
774{ 784{
775 int ret; 785 return d->c_new || d->c_unwritten;
776 unsigned int to, from, cluster_start, cluster_end; 786}
777 char *src, *dst;
778 struct ocfs2_splice_write_priv *sp = wc->w_private;
779 struct pipe_buffer *buf = sp->s_buf;
780 unsigned long bytes, src_from;
781 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
782 787
783 ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, 788struct ocfs2_write_ctxt {
784 &cluster_end); 789 /* Logical cluster position / len of write */
790 u32 w_cpos;
791 u32 w_clen;
785 792
786 from = sp->s_offset; 793 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
787 src_from = sp->s_buf_offset;
788 bytes = wc->w_count;
789 794
790 if (wc->w_large_pages) { 795 /*
791 /* 796 * This is true if page_size > cluster_size.
792 * For cluster size < page size, we have to 797 *
793 * calculate pos within the cluster and obey 798 * It triggers a set of special cases during write which might
794 * the rightmost boundary. 799 * have to deal with allocating writes to partial pages.
795 */ 800 */
796 bytes = min(bytes, (unsigned long)(osb->s_clustersize 801 unsigned int w_large_pages;
797 - (wc->w_pos & (osb->s_clustersize - 1)))); 802
798 } 803 /*
799 to = from + bytes; 804 * Pages involved in this write.
805 *
806 * w_target_page is the page being written to by the user.
807 *
808 * w_pages is an array of pages which always contains
809 * w_target_page, and in the case of an allocating write with
810 * page_size < cluster size, it will contain zero'd and mapped
811 * pages adjacent to w_target_page which need to be written
812 * out in so that future reads from that region will get
813 * zero's.
814 */
815 struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
816 unsigned int w_num_pages;
817 struct page *w_target_page;
800 818
801 BUG_ON(from > PAGE_CACHE_SIZE); 819 /*
802 BUG_ON(to > PAGE_CACHE_SIZE); 820 * ocfs2_write_end() uses this to know what the real range to
803 BUG_ON(from < cluster_start); 821 * write in the target should be.
804 BUG_ON(to > cluster_end); 822 */
823 unsigned int w_target_from;
824 unsigned int w_target_to;
805 825
806 if (wc->w_this_page_new) 826 /*
807 ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, 827 * We could use journal_current_handle() but this is cleaner,
808 cluster_start, cluster_end, 1); 828 * IMHO -Mark
809 else 829 */
810 ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, 830 handle_t *w_handle;
811 from, to, 0); 831
812 if (ret) { 832 struct buffer_head *w_di_bh;
813 mlog_errno(ret); 833
814 goto out; 834 struct ocfs2_cached_dealloc_ctxt w_dealloc;
835};
836
837static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
838{
839 int i;
840
841 for(i = 0; i < wc->w_num_pages; i++) {
842 if (wc->w_pages[i] == NULL)
843 continue;
844
845 unlock_page(wc->w_pages[i]);
846 mark_page_accessed(wc->w_pages[i]);
847 page_cache_release(wc->w_pages[i]);
815 } 848 }
816 849
817 src = buf->ops->map(sp->s_pipe, buf, 1); 850 brelse(wc->w_di_bh);
818 dst = kmap_atomic(wc->w_this_page, KM_USER1); 851 kfree(wc);
819 memcpy(dst + from, src + src_from, bytes); 852}
820 kunmap_atomic(wc->w_this_page, KM_USER1); 853
821 buf->ops->unmap(sp->s_pipe, buf, src); 854static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
855 struct ocfs2_super *osb, loff_t pos,
856 unsigned len, struct buffer_head *di_bh)
857{
858 struct ocfs2_write_ctxt *wc;
859
860 wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS);
861 if (!wc)
862 return -ENOMEM;
822 863
823 wc->w_finished_copy = 1; 864 wc->w_cpos = pos >> osb->s_clustersize_bits;
865 wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len);
866 get_bh(di_bh);
867 wc->w_di_bh = di_bh;
824 868
825 *ret_from = from; 869 if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))
826 *ret_to = to; 870 wc->w_large_pages = 1;
827out: 871 else
872 wc->w_large_pages = 0;
873
874 ocfs2_init_dealloc_ctxt(&wc->w_dealloc);
875
876 *wcp = wc;
828 877
829 return bytes ? (unsigned int)bytes : ret; 878 return 0;
830} 879}
831 880
832/* 881/*
833 * This will copy user data from the iovec in the buffered write 882 * If a page has any new buffers, zero them out here, and mark them uptodate
834 * context. 883 * and dirty so they'll be written out (in order to prevent uninitialised
884 * block data from leaking). And clear the new bit.
835 */ 885 */
836int ocfs2_map_and_write_user_data(struct inode *inode, 886static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to)
837 struct ocfs2_write_ctxt *wc, u64 *p_blkno,
838 unsigned int *ret_from, unsigned int *ret_to)
839{ 887{
840 int ret; 888 unsigned int block_start, block_end;
841 unsigned int to, from, cluster_start, cluster_end; 889 struct buffer_head *head, *bh;
842 unsigned long bytes, src_from;
843 char *dst;
844 struct ocfs2_buffered_write_priv *bp = wc->w_private;
845 const struct iovec *cur_iov = bp->b_cur_iov;
846 char __user *buf;
847 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
848 890
849 ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, 891 BUG_ON(!PageLocked(page));
850 &cluster_end); 892 if (!page_has_buffers(page))
893 return;
851 894
852 buf = cur_iov->iov_base + bp->b_cur_off; 895 bh = head = page_buffers(page);
853 src_from = (unsigned long)buf & ~PAGE_CACHE_MASK; 896 block_start = 0;
897 do {
898 block_end = block_start + bh->b_size;
854 899
855 from = wc->w_pos & (PAGE_CACHE_SIZE - 1); 900 if (buffer_new(bh)) {
901 if (block_end > from && block_start < to) {
902 if (!PageUptodate(page)) {
903 unsigned start, end;
856 904
857 /* 905 start = max(from, block_start);
858 * This is a lot of comparisons, but it reads quite 906 end = min(to, block_end);
859 * easily, which is important here.
860 */
861 /* Stay within the src page */
862 bytes = PAGE_SIZE - src_from;
863 /* Stay within the vector */
864 bytes = min(bytes,
865 (unsigned long)(cur_iov->iov_len - bp->b_cur_off));
866 /* Stay within count */
867 bytes = min(bytes, (unsigned long)wc->w_count);
868 /*
869 * For clustersize > page size, just stay within
870 * target page, otherwise we have to calculate pos
871 * within the cluster and obey the rightmost
872 * boundary.
873 */
874 if (wc->w_large_pages) {
875 /*
876 * For cluster size < page size, we have to
877 * calculate pos within the cluster and obey
878 * the rightmost boundary.
879 */
880 bytes = min(bytes, (unsigned long)(osb->s_clustersize
881 - (wc->w_pos & (osb->s_clustersize - 1))));
882 } else {
883 /*
884 * cluster size > page size is the most common
885 * case - we just stay within the target page
886 * boundary.
887 */
888 bytes = min(bytes, PAGE_CACHE_SIZE - from);
889 }
890 907
891 to = from + bytes; 908 zero_user_page(page, start, end - start, KM_USER0);
909 set_buffer_uptodate(bh);
910 }
892 911
893 BUG_ON(from > PAGE_CACHE_SIZE); 912 clear_buffer_new(bh);
894 BUG_ON(to > PAGE_CACHE_SIZE); 913 mark_buffer_dirty(bh);
895 BUG_ON(from < cluster_start); 914 }
896 BUG_ON(to > cluster_end); 915 }
897 916
898 if (wc->w_this_page_new) 917 block_start = block_end;
899 ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, 918 bh = bh->b_this_page;
900 cluster_start, cluster_end, 1); 919 } while (bh != head);
901 else 920}
902 ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
903 from, to, 0);
904 if (ret) {
905 mlog_errno(ret);
906 goto out;
907 }
908 921
909 dst = kmap(wc->w_this_page); 922/*
910 memcpy(dst + from, bp->b_src_buf + src_from, bytes); 923 * Only called when we have a failure during allocating write to write
911 kunmap(wc->w_this_page); 924 * zero's to the newly allocated region.
925 */
926static void ocfs2_write_failure(struct inode *inode,
927 struct ocfs2_write_ctxt *wc,
928 loff_t user_pos, unsigned user_len)
929{
930 int i;
931 unsigned from, to;
932 struct page *tmppage;
912 933
913 /* 934 ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len);
914 * XXX: This is slow, but simple. The caller of
915 * ocfs2_buffered_write_cluster() is responsible for
916 * passing through the iovecs, so it's difficult to
917 * predict what our next step is in here after our
918 * initial write. A future version should be pushing
919 * that iovec manipulation further down.
920 *
921 * By setting this, we indicate that a copy from user
922 * data was done, and subsequent calls for this
923 * cluster will skip copying more data.
924 */
925 wc->w_finished_copy = 1;
926 935
927 *ret_from = from; 936 if (wc->w_large_pages) {
928 *ret_to = to; 937 from = wc->w_target_from;
929out: 938 to = wc->w_target_to;
939 } else {
940 from = 0;
941 to = PAGE_CACHE_SIZE;
942 }
943
944 for(i = 0; i < wc->w_num_pages; i++) {
945 tmppage = wc->w_pages[i];
930 946
931 return bytes ? (unsigned int)bytes : ret; 947 if (ocfs2_should_order_data(inode))
948 walk_page_buffers(wc->w_handle, page_buffers(tmppage),
949 from, to, NULL,
950 ocfs2_journal_dirty_data);
951
952 block_commit_write(tmppage, from, to);
953 }
932} 954}
933 955
934/* 956static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
935 * Map, fill and write a page to disk. 957 struct ocfs2_write_ctxt *wc,
936 * 958 struct page *page, u32 cpos,
937 * The work of copying data is done via callback. Newly allocated 959 loff_t user_pos, unsigned user_len,
938 * pages which don't take user data will be zero'd (set 'new' to 960 int new)
939 * indicate an allocating write)
940 *
941 * Returns a negative error code or the number of bytes copied into
942 * the page.
943 */
944static int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
945 u64 *p_blkno, struct page *page,
946 struct ocfs2_write_ctxt *wc, int new)
947{ 961{
948 int ret, copied = 0; 962 int ret;
949 unsigned int from = 0, to = 0; 963 unsigned int map_from = 0, map_to = 0;
950 unsigned int cluster_start, cluster_end; 964 unsigned int cluster_start, cluster_end;
951 unsigned int zero_from = 0, zero_to = 0; 965 unsigned int user_data_from = 0, user_data_to = 0;
952 966
953 ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), wc->w_cpos, 967 ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos,
954 &cluster_start, &cluster_end); 968 &cluster_start, &cluster_end);
955 969
956 if ((wc->w_pos >> PAGE_CACHE_SHIFT) == page->index 970 if (page == wc->w_target_page) {
957 && !wc->w_finished_copy) { 971 map_from = user_pos & (PAGE_CACHE_SIZE - 1);
958 972 map_to = map_from + user_len;
959 wc->w_this_page = page; 973
960 wc->w_this_page_new = new; 974 if (new)
961 ret = wc->w_write_data_page(inode, wc, p_blkno, &from, &to); 975 ret = ocfs2_map_page_blocks(page, p_blkno, inode,
962 if (ret < 0) { 976 cluster_start, cluster_end,
977 new);
978 else
979 ret = ocfs2_map_page_blocks(page, p_blkno, inode,
980 map_from, map_to, new);
981 if (ret) {
963 mlog_errno(ret); 982 mlog_errno(ret);
964 goto out; 983 goto out;
965 } 984 }
966 985
967 copied = ret; 986 user_data_from = map_from;
968 987 user_data_to = map_to;
969 zero_from = from;
970 zero_to = to;
971 if (new) { 988 if (new) {
972 from = cluster_start; 989 map_from = cluster_start;
973 to = cluster_end; 990 map_to = cluster_end;
974 } 991 }
992
993 wc->w_target_from = map_from;
994 wc->w_target_to = map_to;
975 } else { 995 } else {
976 /* 996 /*
977 * If we haven't allocated the new page yet, we 997 * If we haven't allocated the new page yet, we
@@ -980,11 +1000,11 @@ static int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
980 */ 1000 */
981 BUG_ON(!new); 1001 BUG_ON(!new);
982 1002
983 from = cluster_start; 1003 map_from = cluster_start;
984 to = cluster_end; 1004 map_to = cluster_end;
985 1005
986 ret = ocfs2_map_page_blocks(page, p_blkno, inode, 1006 ret = ocfs2_map_page_blocks(page, p_blkno, inode,
987 cluster_start, cluster_end, 1); 1007 cluster_start, cluster_end, new);
988 if (ret) { 1008 if (ret) {
989 mlog_errno(ret); 1009 mlog_errno(ret);
990 goto out; 1010 goto out;
@@ -1003,108 +1023,113 @@ static int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
1003 */ 1023 */
1004 if (new && !PageUptodate(page)) 1024 if (new && !PageUptodate(page))
1005 ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), 1025 ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb),
1006 wc->w_cpos, zero_from, zero_to); 1026 cpos, user_data_from, user_data_to);
1007 1027
1008 flush_dcache_page(page); 1028 flush_dcache_page(page);
1009 1029
1010 if (ocfs2_should_order_data(inode)) {
1011 ret = walk_page_buffers(handle,
1012 page_buffers(page),
1013 from, to, NULL,
1014 ocfs2_journal_dirty_data);
1015 if (ret < 0)
1016 mlog_errno(ret);
1017 }
1018
1019 /*
1020 * We don't use generic_commit_write() because we need to
1021 * handle our own i_size update.
1022 */
1023 ret = block_commit_write(page, from, to);
1024 if (ret)
1025 mlog_errno(ret);
1026out: 1030out:
1027 1031 return ret;
1028 return copied ? copied : ret;
1029} 1032}
1030 1033
1031/* 1034/*
1032 * Do the actual write of some data into an inode. Optionally allocate 1035 * This function will only grab one clusters worth of pages.
1033 * in order to fulfill the write.
1034 *
1035 * cpos is the logical cluster offset within the file to write at
1036 *
1037 * 'phys' is the physical mapping of that offset. a 'phys' value of
1038 * zero indicates that allocation is required. In this case, data_ac
1039 * and meta_ac should be valid (meta_ac can be null if metadata
1040 * allocation isn't required).
1041 */ 1036 */
1042static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle, 1037static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1043 struct buffer_head *di_bh, 1038 struct ocfs2_write_ctxt *wc,
1044 struct ocfs2_alloc_context *data_ac, 1039 u32 cpos, loff_t user_pos, int new,
1045 struct ocfs2_alloc_context *meta_ac, 1040 struct page *mmap_page)
1046 struct ocfs2_write_ctxt *wc)
1047{ 1041{
1048 int ret, i, numpages = 1, new; 1042 int ret = 0, i;
1049 unsigned int copied = 0; 1043 unsigned long start, target_index, index;
1050 u32 tmp_pos;
1051 u64 v_blkno, p_blkno;
1052 struct address_space *mapping = file->f_mapping;
1053 struct inode *inode = mapping->host; 1044 struct inode *inode = mapping->host;
1054 unsigned long index, start;
1055 struct page **cpages;
1056 1045
1057 new = phys == 0 ? 1 : 0; 1046 target_index = user_pos >> PAGE_CACHE_SHIFT;
1058 1047
1059 /* 1048 /*
1060 * Figure out how many pages we'll be manipulating here. For 1049 * Figure out how many pages we'll be manipulating here. For
1061 * non allocating write, we just change the one 1050 * non allocating write, we just change the one
1062 * page. Otherwise, we'll need a whole clusters worth. 1051 * page. Otherwise, we'll need a whole clusters worth.
1063 */ 1052 */
1064 if (new)
1065 numpages = ocfs2_pages_per_cluster(inode->i_sb);
1066
1067 cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS);
1068 if (!cpages) {
1069 ret = -ENOMEM;
1070 mlog_errno(ret);
1071 return ret;
1072 }
1073
1074 /*
1075 * Fill our page array first. That way we've grabbed enough so
1076 * that we can zero and flush if we error after adding the
1077 * extent.
1078 */
1079 if (new) { 1053 if (new) {
1080 start = ocfs2_align_clusters_to_page_index(inode->i_sb, 1054 wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
1081 wc->w_cpos); 1055 start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
1082 v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, wc->w_cpos);
1083 } else { 1056 } else {
1084 start = wc->w_pos >> PAGE_CACHE_SHIFT; 1057 wc->w_num_pages = 1;
1085 v_blkno = wc->w_pos >> inode->i_sb->s_blocksize_bits; 1058 start = target_index;
1086 } 1059 }
1087 1060
1088 for(i = 0; i < numpages; i++) { 1061 for(i = 0; i < wc->w_num_pages; i++) {
1089 index = start + i; 1062 index = start + i;
1090 1063
1091 cpages[i] = find_or_create_page(mapping, index, GFP_NOFS); 1064 if (index == target_index && mmap_page) {
1092 if (!cpages[i]) { 1065 /*
1093 ret = -ENOMEM; 1066 * ocfs2_pagemkwrite() is a little different
1094 mlog_errno(ret); 1067 * and wants us to directly use the page
1095 goto out; 1068 * passed in.
1069 */
1070 lock_page(mmap_page);
1071
1072 if (mmap_page->mapping != mapping) {
1073 unlock_page(mmap_page);
1074 /*
1075 * Sanity check - the locking in
1076 * ocfs2_pagemkwrite() should ensure
1077 * that this code doesn't trigger.
1078 */
1079 ret = -EINVAL;
1080 mlog_errno(ret);
1081 goto out;
1082 }
1083
1084 page_cache_get(mmap_page);
1085 wc->w_pages[i] = mmap_page;
1086 } else {
1087 wc->w_pages[i] = find_or_create_page(mapping, index,
1088 GFP_NOFS);
1089 if (!wc->w_pages[i]) {
1090 ret = -ENOMEM;
1091 mlog_errno(ret);
1092 goto out;
1093 }
1096 } 1094 }
1095
1096 if (index == target_index)
1097 wc->w_target_page = wc->w_pages[i];
1097 } 1098 }
1099out:
1100 return ret;
1101}
1102
1103/*
1104 * Prepare a single cluster for write one cluster into the file.
1105 */
1106static int ocfs2_write_cluster(struct address_space *mapping,
1107 u32 phys, unsigned int unwritten,
1108 struct ocfs2_alloc_context *data_ac,
1109 struct ocfs2_alloc_context *meta_ac,
1110 struct ocfs2_write_ctxt *wc, u32 cpos,
1111 loff_t user_pos, unsigned user_len)
1112{
1113 int ret, i, new, should_zero = 0;
1114 u64 v_blkno, p_blkno;
1115 struct inode *inode = mapping->host;
1116
1117 new = phys == 0 ? 1 : 0;
1118 if (new || unwritten)
1119 should_zero = 1;
1098 1120
1099 if (new) { 1121 if (new) {
1122 u32 tmp_pos;
1123
1100 /* 1124 /*
1101 * This is safe to call with the page locks - it won't take 1125 * This is safe to call with the page locks - it won't take
1102 * any additional semaphores or cluster locks. 1126 * any additional semaphores or cluster locks.
1103 */ 1127 */
1104 tmp_pos = wc->w_cpos; 1128 tmp_pos = cpos;
1105 ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, 1129 ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode,
1106 &tmp_pos, 1, di_bh, handle, 1130 &tmp_pos, 1, 0, wc->w_di_bh,
1107 data_ac, meta_ac, NULL); 1131 wc->w_handle, data_ac,
1132 meta_ac, NULL);
1108 /* 1133 /*
1109 * This shouldn't happen because we must have already 1134 * This shouldn't happen because we must have already
1110 * calculated the correct meta data allocation required. The 1135 * calculated the correct meta data allocation required. The
@@ -1121,159 +1146,433 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
1121 mlog_errno(ret); 1146 mlog_errno(ret);
1122 goto out; 1147 goto out;
1123 } 1148 }
1149 } else if (unwritten) {
1150 ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,
1151 wc->w_handle, cpos, 1, phys,
1152 meta_ac, &wc->w_dealloc);
1153 if (ret < 0) {
1154 mlog_errno(ret);
1155 goto out;
1156 }
1124 } 1157 }
1125 1158
1159 if (should_zero)
1160 v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);
1161 else
1162 v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;
1163
1164 /*
1165 * The only reason this should fail is due to an inability to
1166 * find the extent added.
1167 */
1126 ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, 1168 ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
1127 NULL); 1169 NULL);
1128 if (ret < 0) { 1170 if (ret < 0) {
1129 1171 ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, "
1130 /* 1172 "at logical block %llu",
1131 * XXX: Should we go readonly here? 1173 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1132 */ 1174 (unsigned long long)v_blkno);
1133
1134 mlog_errno(ret);
1135 goto out; 1175 goto out;
1136 } 1176 }
1137 1177
1138 BUG_ON(p_blkno == 0); 1178 BUG_ON(p_blkno == 0);
1139 1179
1140 for(i = 0; i < numpages; i++) { 1180 for(i = 0; i < wc->w_num_pages; i++) {
1141 ret = ocfs2_write_data_page(inode, handle, &p_blkno, cpages[i], 1181 int tmpret;
1142 wc, new); 1182
1143 if (ret < 0) { 1183 tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,
1144 mlog_errno(ret); 1184 wc->w_pages[i], cpos,
1145 goto out; 1185 user_pos, user_len,
1186 should_zero);
1187 if (tmpret) {
1188 mlog_errno(tmpret);
1189 if (ret == 0)
1190 tmpret = ret;
1146 } 1191 }
1147
1148 copied += ret;
1149 } 1192 }
1150 1193
1194 /*
1195 * We only have cleanup to do in case of allocating write.
1196 */
1197 if (ret && new)
1198 ocfs2_write_failure(inode, wc, user_pos, user_len);
1199
1151out: 1200out:
1152 for(i = 0; i < numpages; i++) { 1201
1153 unlock_page(cpages[i]); 1202 return ret;
1154 mark_page_accessed(cpages[i]); 1203}
1155 page_cache_release(cpages[i]); 1204
1205static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1206 struct ocfs2_alloc_context *data_ac,
1207 struct ocfs2_alloc_context *meta_ac,
1208 struct ocfs2_write_ctxt *wc,
1209 loff_t pos, unsigned len)
1210{
1211 int ret, i;
1212 struct ocfs2_write_cluster_desc *desc;
1213
1214 for (i = 0; i < wc->w_clen; i++) {
1215 desc = &wc->w_desc[i];
1216
1217 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1218 desc->c_unwritten, data_ac, meta_ac,
1219 wc, desc->c_cpos, pos, len);
1220 if (ret) {
1221 mlog_errno(ret);
1222 goto out;
1223 }
1156 } 1224 }
1157 kfree(cpages);
1158 1225
1159 return copied ? copied : ret; 1226 ret = 0;
1227out:
1228 return ret;
1160} 1229}
1161 1230
1162static void ocfs2_write_ctxt_init(struct ocfs2_write_ctxt *wc, 1231/*
1163 struct ocfs2_super *osb, loff_t pos, 1232 * ocfs2_write_end() wants to know which parts of the target page it
1164 size_t count, ocfs2_page_writer *cb, 1233 * should complete the write on. It's easiest to compute them ahead of
1165 void *cb_priv) 1234 * time when a more complete view of the write is available.
1235 */
1236static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1237 struct ocfs2_write_ctxt *wc,
1238 loff_t pos, unsigned len, int alloc)
1166{ 1239{
1167 wc->w_count = count; 1240 struct ocfs2_write_cluster_desc *desc;
1168 wc->w_pos = pos;
1169 wc->w_cpos = wc->w_pos >> osb->s_clustersize_bits;
1170 wc->w_finished_copy = 0;
1171 1241
1172 if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) 1242 wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1);
1173 wc->w_large_pages = 1; 1243 wc->w_target_to = wc->w_target_from + len;
1174 else
1175 wc->w_large_pages = 0;
1176 1244
1177 wc->w_write_data_page = cb; 1245 if (alloc == 0)
1178 wc->w_private = cb_priv; 1246 return;
1247
1248 /*
1249 * Allocating write - we may have different boundaries based
1250 * on page size and cluster size.
1251 *
1252 * NOTE: We can no longer compute one value from the other as
1253 * the actual write length and user provided length may be
1254 * different.
1255 */
1256
1257 if (wc->w_large_pages) {
1258 /*
1259 * We only care about the 1st and last cluster within
1260 * our range and whether they should be zero'd or not. Either
1261 * value may be extended out to the start/end of a
1262 * newly allocated cluster.
1263 */
1264 desc = &wc->w_desc[0];
1265 if (ocfs2_should_zero_cluster(desc))
1266 ocfs2_figure_cluster_boundaries(osb,
1267 desc->c_cpos,
1268 &wc->w_target_from,
1269 NULL);
1270
1271 desc = &wc->w_desc[wc->w_clen - 1];
1272 if (ocfs2_should_zero_cluster(desc))
1273 ocfs2_figure_cluster_boundaries(osb,
1274 desc->c_cpos,
1275 NULL,
1276 &wc->w_target_to);
1277 } else {
1278 wc->w_target_from = 0;
1279 wc->w_target_to = PAGE_CACHE_SIZE;
1280 }
1179} 1281}
1180 1282
1181/* 1283/*
1182 * Write a cluster to an inode. The cluster may not be allocated yet, 1284 * Populate each single-cluster write descriptor in the write context
1183 * in which case it will be. This only exists for buffered writes - 1285 * with information about the i/o to be done.
1184 * O_DIRECT takes a more "traditional" path through the kernel.
1185 *
1186 * The caller is responsible for incrementing pos, written counts, etc
1187 * 1286 *
1188 * For file systems that don't support sparse files, pre-allocation 1287 * Returns the number of clusters that will have to be allocated, as
1189 * and page zeroing up until cpos should be done prior to this 1288 * well as a worst case estimate of the number of extent records that
1190 * function call. 1289 * would have to be created during a write to an unwritten region.
1191 *
1192 * Callers should be holding i_sem, and the rw cluster lock.
1193 *
1194 * Returns the number of user bytes written, or less than zero for
1195 * error.
1196 */ 1290 */
1197ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, 1291static int ocfs2_populate_write_desc(struct inode *inode,
1198 size_t count, ocfs2_page_writer *actor, 1292 struct ocfs2_write_ctxt *wc,
1199 void *priv) 1293 unsigned int *clusters_to_alloc,
1294 unsigned int *extents_to_split)
1295{
1296 int ret;
1297 struct ocfs2_write_cluster_desc *desc;
1298 unsigned int num_clusters = 0;
1299 unsigned int ext_flags = 0;
1300 u32 phys = 0;
1301 int i;
1302
1303 *clusters_to_alloc = 0;
1304 *extents_to_split = 0;
1305
1306 for (i = 0; i < wc->w_clen; i++) {
1307 desc = &wc->w_desc[i];
1308 desc->c_cpos = wc->w_cpos + i;
1309
1310 if (num_clusters == 0) {
1311 /*
1312 * Need to look up the next extent record.
1313 */
1314 ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys,
1315 &num_clusters, &ext_flags);
1316 if (ret) {
1317 mlog_errno(ret);
1318 goto out;
1319 }
1320
1321 /*
1322 * Assume worst case - that we're writing in
1323 * the middle of the extent.
1324 *
1325 * We can assume that the write proceeds from
1326 * left to right, in which case the extent
1327 * insert code is smart enough to coalesce the
1328 * next splits into the previous records created.
1329 */
1330 if (ext_flags & OCFS2_EXT_UNWRITTEN)
1331 *extents_to_split = *extents_to_split + 2;
1332 } else if (phys) {
1333 /*
1334 * Only increment phys if it doesn't describe
1335 * a hole.
1336 */
1337 phys++;
1338 }
1339
1340 desc->c_phys = phys;
1341 if (phys == 0) {
1342 desc->c_new = 1;
1343 *clusters_to_alloc = *clusters_to_alloc + 1;
1344 }
1345 if (ext_flags & OCFS2_EXT_UNWRITTEN)
1346 desc->c_unwritten = 1;
1347
1348 num_clusters--;
1349 }
1350
1351 ret = 0;
1352out:
1353 return ret;
1354}
1355
1356int ocfs2_write_begin_nolock(struct address_space *mapping,
1357 loff_t pos, unsigned len, unsigned flags,
1358 struct page **pagep, void **fsdata,
1359 struct buffer_head *di_bh, struct page *mmap_page)
1200{ 1360{
1201 int ret, credits = OCFS2_INODE_UPDATE_CREDITS; 1361 int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
1202 ssize_t written = 0; 1362 unsigned int clusters_to_alloc, extents_to_split;
1203 u32 phys; 1363 struct ocfs2_write_ctxt *wc;
1204 struct inode *inode = file->f_mapping->host; 1364 struct inode *inode = mapping->host;
1205 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1365 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1206 struct buffer_head *di_bh = NULL;
1207 struct ocfs2_dinode *di; 1366 struct ocfs2_dinode *di;
1208 struct ocfs2_alloc_context *data_ac = NULL; 1367 struct ocfs2_alloc_context *data_ac = NULL;
1209 struct ocfs2_alloc_context *meta_ac = NULL; 1368 struct ocfs2_alloc_context *meta_ac = NULL;
1210 handle_t *handle; 1369 handle_t *handle;
1211 struct ocfs2_write_ctxt wc;
1212
1213 ocfs2_write_ctxt_init(&wc, osb, pos, count, actor, priv);
1214 1370
1215 ret = ocfs2_meta_lock(inode, &di_bh, 1); 1371 ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
1216 if (ret) { 1372 if (ret) {
1217 mlog_errno(ret); 1373 mlog_errno(ret);
1218 goto out; 1374 return ret;
1219 } 1375 }
1220 di = (struct ocfs2_dinode *)di_bh->b_data;
1221
1222 /*
1223 * Take alloc sem here to prevent concurrent lookups. That way
1224 * the mapping, zeroing and tree manipulation within
1225 * ocfs2_write() will be safe against ->readpage(). This
1226 * should also serve to lock out allocation from a shared
1227 * writeable region.
1228 */
1229 down_write(&OCFS2_I(inode)->ip_alloc_sem);
1230 1376
1231 ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL); 1377 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
1378 &extents_to_split);
1232 if (ret) { 1379 if (ret) {
1233 mlog_errno(ret); 1380 mlog_errno(ret);
1234 goto out_meta; 1381 goto out;
1235 } 1382 }
1236 1383
1237 /* phys == 0 means that allocation is required. */ 1384 di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
1238 if (phys == 0) { 1385
1239 ret = ocfs2_lock_allocators(inode, di, 1, &data_ac, &meta_ac); 1386 /*
1387 * We set w_target_from, w_target_to here so that
1388 * ocfs2_write_end() knows which range in the target page to
1389 * write out. An allocation requires that we write the entire
1390 * cluster range.
1391 */
1392 if (clusters_to_alloc || extents_to_split) {
1393 /*
1394 * XXX: We are stretching the limits of
1395 * ocfs2_lock_allocators(). It greatly over-estimates
1396 * the work to be done.
1397 */
1398 ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc,
1399 extents_to_split, &data_ac, &meta_ac);
1240 if (ret) { 1400 if (ret) {
1241 mlog_errno(ret); 1401 mlog_errno(ret);
1242 goto out_meta; 1402 goto out;
1243 } 1403 }
1244 1404
1245 credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1); 1405 credits = ocfs2_calc_extend_credits(inode->i_sb, di,
1246 } 1406 clusters_to_alloc);
1247 1407
1248 ret = ocfs2_data_lock(inode, 1);
1249 if (ret) {
1250 mlog_errno(ret);
1251 goto out_meta;
1252 } 1408 }
1253 1409
1410 ocfs2_set_target_boundaries(osb, wc, pos, len,
1411 clusters_to_alloc + extents_to_split);
1412
1254 handle = ocfs2_start_trans(osb, credits); 1413 handle = ocfs2_start_trans(osb, credits);
1255 if (IS_ERR(handle)) { 1414 if (IS_ERR(handle)) {
1256 ret = PTR_ERR(handle); 1415 ret = PTR_ERR(handle);
1257 mlog_errno(ret); 1416 mlog_errno(ret);
1258 goto out_data; 1417 goto out;
1259 } 1418 }
1260 1419
1261 written = ocfs2_write(file, phys, handle, di_bh, data_ac, 1420 wc->w_handle = handle;
1262 meta_ac, &wc); 1421
1263 if (written < 0) { 1422 /*
1264 ret = written; 1423 * We don't want this to fail in ocfs2_write_end(), so do it
1424 * here.
1425 */
1426 ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
1427 OCFS2_JOURNAL_ACCESS_WRITE);
1428 if (ret) {
1265 mlog_errno(ret); 1429 mlog_errno(ret);
1266 goto out_commit; 1430 goto out_commit;
1267 } 1431 }
1268 1432
1269 ret = ocfs2_journal_access(handle, inode, di_bh, 1433 /*
1270 OCFS2_JOURNAL_ACCESS_WRITE); 1434 * Fill our page array first. That way we've grabbed enough so
1435 * that we can zero and flush if we error after adding the
1436 * extent.
1437 */
1438 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
1439 clusters_to_alloc + extents_to_split,
1440 mmap_page);
1271 if (ret) { 1441 if (ret) {
1272 mlog_errno(ret); 1442 mlog_errno(ret);
1273 goto out_commit; 1443 goto out_commit;
1274 } 1444 }
1275 1445
1276 pos += written; 1446 ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
1447 len);
1448 if (ret) {
1449 mlog_errno(ret);
1450 goto out_commit;
1451 }
1452
1453 if (data_ac)
1454 ocfs2_free_alloc_context(data_ac);
1455 if (meta_ac)
1456 ocfs2_free_alloc_context(meta_ac);
1457
1458 *pagep = wc->w_target_page;
1459 *fsdata = wc;
1460 return 0;
1461out_commit:
1462 ocfs2_commit_trans(osb, handle);
1463
1464out:
1465 ocfs2_free_write_ctxt(wc);
1466
1467 if (data_ac)
1468 ocfs2_free_alloc_context(data_ac);
1469 if (meta_ac)
1470 ocfs2_free_alloc_context(meta_ac);
1471 return ret;
1472}
1473
1474int ocfs2_write_begin(struct file *file, struct address_space *mapping,
1475 loff_t pos, unsigned len, unsigned flags,
1476 struct page **pagep, void **fsdata)
1477{
1478 int ret;
1479 struct buffer_head *di_bh = NULL;
1480 struct inode *inode = mapping->host;
1481
1482 ret = ocfs2_meta_lock(inode, &di_bh, 1);
1483 if (ret) {
1484 mlog_errno(ret);
1485 return ret;
1486 }
1487
1488 /*
1489 * Take alloc sem here to prevent concurrent lookups. That way
1490 * the mapping, zeroing and tree manipulation within
1491 * ocfs2_write() will be safe against ->readpage(). This
1492 * should also serve to lock out allocation from a shared
1493 * writeable region.
1494 */
1495 down_write(&OCFS2_I(inode)->ip_alloc_sem);
1496
1497 ret = ocfs2_data_lock(inode, 1);
1498 if (ret) {
1499 mlog_errno(ret);
1500 goto out_fail;
1501 }
1502
1503 ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
1504 fsdata, di_bh, NULL);
1505 if (ret) {
1506 mlog_errno(ret);
1507 goto out_fail_data;
1508 }
1509
1510 brelse(di_bh);
1511
1512 return 0;
1513
1514out_fail_data:
1515 ocfs2_data_unlock(inode, 1);
1516out_fail:
1517 up_write(&OCFS2_I(inode)->ip_alloc_sem);
1518
1519 brelse(di_bh);
1520 ocfs2_meta_unlock(inode, 1);
1521
1522 return ret;
1523}
1524
1525int ocfs2_write_end_nolock(struct address_space *mapping,
1526 loff_t pos, unsigned len, unsigned copied,
1527 struct page *page, void *fsdata)
1528{
1529 int i;
1530 unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
1531 struct inode *inode = mapping->host;
1532 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1533 struct ocfs2_write_ctxt *wc = fsdata;
1534 struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
1535 handle_t *handle = wc->w_handle;
1536 struct page *tmppage;
1537
1538 if (unlikely(copied < len)) {
1539 if (!PageUptodate(wc->w_target_page))
1540 copied = 0;
1541
1542 ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
1543 start+len);
1544 }
1545 flush_dcache_page(wc->w_target_page);
1546
1547 for(i = 0; i < wc->w_num_pages; i++) {
1548 tmppage = wc->w_pages[i];
1549
1550 if (tmppage == wc->w_target_page) {
1551 from = wc->w_target_from;
1552 to = wc->w_target_to;
1553
1554 BUG_ON(from > PAGE_CACHE_SIZE ||
1555 to > PAGE_CACHE_SIZE ||
1556 to < from);
1557 } else {
1558 /*
1559 * Pages adjacent to the target (if any) imply
1560 * a hole-filling write in which case we want
1561 * to flush their entire range.
1562 */
1563 from = 0;
1564 to = PAGE_CACHE_SIZE;
1565 }
1566
1567 if (ocfs2_should_order_data(inode))
1568 walk_page_buffers(wc->w_handle, page_buffers(tmppage),
1569 from, to, NULL,
1570 ocfs2_journal_dirty_data);
1571
1572 block_commit_write(tmppage, from, to);
1573 }
1574
1575 pos += copied;
1277 if (pos > inode->i_size) { 1576 if (pos > inode->i_size) {
1278 i_size_write(inode, pos); 1577 i_size_write(inode, pos);
1279 mark_inode_dirty(inode); 1578 mark_inode_dirty(inode);
@@ -1283,29 +1582,31 @@ ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
1283 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1582 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1284 di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); 1583 di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
1285 di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 1584 di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
1585 ocfs2_journal_dirty(handle, wc->w_di_bh);
1286 1586
1287 ret = ocfs2_journal_dirty(handle, di_bh);
1288 if (ret)
1289 mlog_errno(ret);
1290
1291out_commit:
1292 ocfs2_commit_trans(osb, handle); 1587 ocfs2_commit_trans(osb, handle);
1293 1588
1294out_data: 1589 ocfs2_run_deallocs(osb, &wc->w_dealloc);
1295 ocfs2_data_unlock(inode, 1); 1590
1591 ocfs2_free_write_ctxt(wc);
1592
1593 return copied;
1594}
1595
1596int ocfs2_write_end(struct file *file, struct address_space *mapping,
1597 loff_t pos, unsigned len, unsigned copied,
1598 struct page *page, void *fsdata)
1599{
1600 int ret;
1601 struct inode *inode = mapping->host;
1296 1602
1297out_meta: 1603 ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
1604
1605 ocfs2_data_unlock(inode, 1);
1298 up_write(&OCFS2_I(inode)->ip_alloc_sem); 1606 up_write(&OCFS2_I(inode)->ip_alloc_sem);
1299 ocfs2_meta_unlock(inode, 1); 1607 ocfs2_meta_unlock(inode, 1);
1300 1608
1301out: 1609 return ret;
1302 brelse(di_bh);
1303 if (data_ac)
1304 ocfs2_free_alloc_context(data_ac);
1305 if (meta_ac)
1306 ocfs2_free_alloc_context(meta_ac);
1307
1308 return written ? written : ret;
1309} 1610}
1310 1611
1311const struct address_space_operations ocfs2_aops = { 1612const struct address_space_operations ocfs2_aops = {
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 45821d479b5..389579bd64e 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -42,57 +42,22 @@ int walk_page_buffers( handle_t *handle,
42 int (*fn)( handle_t *handle, 42 int (*fn)( handle_t *handle,
43 struct buffer_head *bh)); 43 struct buffer_head *bh));
44 44
45struct ocfs2_write_ctxt; 45int ocfs2_write_begin(struct file *file, struct address_space *mapping,
46typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *, 46 loff_t pos, unsigned len, unsigned flags,
47 u64 *, unsigned int *, unsigned int *); 47 struct page **pagep, void **fsdata);
48 48
49ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, 49int ocfs2_write_end(struct file *file, struct address_space *mapping,
50 size_t count, ocfs2_page_writer *actor, 50 loff_t pos, unsigned len, unsigned copied,
51 void *priv); 51 struct page *page, void *fsdata);
52 52
53struct ocfs2_write_ctxt { 53int ocfs2_write_end_nolock(struct address_space *mapping,
54 size_t w_count; 54 loff_t pos, unsigned len, unsigned copied,
55 loff_t w_pos; 55 struct page *page, void *fsdata);
56 u32 w_cpos;
57 unsigned int w_finished_copy;
58 56
59 /* This is true if page_size > cluster_size */ 57int ocfs2_write_begin_nolock(struct address_space *mapping,
60 unsigned int w_large_pages; 58 loff_t pos, unsigned len, unsigned flags,
61 59 struct page **pagep, void **fsdata,
62 /* Filler callback and private data */ 60 struct buffer_head *di_bh, struct page *mmap_page);
63 ocfs2_page_writer *w_write_data_page;
64 void *w_private;
65
66 /* Only valid for the filler callback */
67 struct page *w_this_page;
68 unsigned int w_this_page_new;
69};
70
71struct ocfs2_buffered_write_priv {
72 char *b_src_buf;
73 const struct iovec *b_cur_iov; /* Current iovec */
74 size_t b_cur_off; /* Offset in the
75 * current iovec */
76};
77int ocfs2_map_and_write_user_data(struct inode *inode,
78 struct ocfs2_write_ctxt *wc,
79 u64 *p_blkno,
80 unsigned int *ret_from,
81 unsigned int *ret_to);
82
83struct ocfs2_splice_write_priv {
84 struct splice_desc *s_sd;
85 struct pipe_buffer *s_buf;
86 struct pipe_inode_info *s_pipe;
87 /* Neither offset value is ever larger than one page */
88 unsigned int s_offset;
89 unsigned int s_buf_offset;
90};
91int ocfs2_map_and_write_splice_data(struct inode *inode,
92 struct ocfs2_write_ctxt *wc,
93 u64 *p_blkno,
94 unsigned int *ret_from,
95 unsigned int *ret_to);
96 61
97/* all ocfs2_dio_end_io()'s fault */ 62/* all ocfs2_dio_end_io()'s fault */
98#define ocfs2_iocb_is_rw_locked(iocb) \ 63#define ocfs2_iocb_is_rw_locked(iocb) \
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 979113479c6..2bd7f788cf3 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1335,6 +1335,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1335 ret = wait_event_interruptible(o2hb_steady_queue, 1335 ret = wait_event_interruptible(o2hb_steady_queue,
1336 atomic_read(&reg->hr_steady_iterations) == 0); 1336 atomic_read(&reg->hr_steady_iterations) == 0);
1337 if (ret) { 1337 if (ret) {
1338 /* We got interrupted (hello ptrace!). Clean up */
1338 spin_lock(&o2hb_live_lock); 1339 spin_lock(&o2hb_live_lock);
1339 hb_task = reg->hr_task; 1340 hb_task = reg->hr_task;
1340 reg->hr_task = NULL; 1341 reg->hr_task = NULL;
@@ -1345,7 +1346,16 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1345 goto out; 1346 goto out;
1346 } 1347 }
1347 1348
1348 ret = count; 1349 /* Ok, we were woken. Make sure it wasn't by drop_item() */
1350 spin_lock(&o2hb_live_lock);
1351 hb_task = reg->hr_task;
1352 spin_unlock(&o2hb_live_lock);
1353
1354 if (hb_task)
1355 ret = count;
1356 else
1357 ret = -EIO;
1358
1349out: 1359out:
1350 if (filp) 1360 if (filp)
1351 fput(filp); 1361 fput(filp);
@@ -1523,6 +1533,15 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
1523 if (hb_task) 1533 if (hb_task)
1524 kthread_stop(hb_task); 1534 kthread_stop(hb_task);
1525 1535
1536 /*
1537 * If we're racing a dev_write(), we need to wake them. They will
1538 * check reg->hr_task
1539 */
1540 if (atomic_read(&reg->hr_steady_iterations) != 0) {
1541 atomic_set(&reg->hr_steady_iterations, 0);
1542 wake_up(&o2hb_steady_queue);
1543 }
1544
1526 config_item_put(item); 1545 config_item_put(item);
1527} 1546}
1528 1547
@@ -1665,7 +1684,67 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
1665} 1684}
1666EXPORT_SYMBOL_GPL(o2hb_setup_callback); 1685EXPORT_SYMBOL_GPL(o2hb_setup_callback);
1667 1686
1668int o2hb_register_callback(struct o2hb_callback_func *hc) 1687static struct o2hb_region *o2hb_find_region(const char *region_uuid)
1688{
1689 struct o2hb_region *p, *reg = NULL;
1690
1691 assert_spin_locked(&o2hb_live_lock);
1692
1693 list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
1694 if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
1695 reg = p;
1696 break;
1697 }
1698 }
1699
1700 return reg;
1701}
1702
1703static int o2hb_region_get(const char *region_uuid)
1704{
1705 int ret = 0;
1706 struct o2hb_region *reg;
1707
1708 spin_lock(&o2hb_live_lock);
1709
1710 reg = o2hb_find_region(region_uuid);
1711 if (!reg)
1712 ret = -ENOENT;
1713 spin_unlock(&o2hb_live_lock);
1714
1715 if (ret)
1716 goto out;
1717
1718 ret = o2nm_depend_this_node();
1719 if (ret)
1720 goto out;
1721
1722 ret = o2nm_depend_item(&reg->hr_item);
1723 if (ret)
1724 o2nm_undepend_this_node();
1725
1726out:
1727 return ret;
1728}
1729
1730static void o2hb_region_put(const char *region_uuid)
1731{
1732 struct o2hb_region *reg;
1733
1734 spin_lock(&o2hb_live_lock);
1735
1736 reg = o2hb_find_region(region_uuid);
1737
1738 spin_unlock(&o2hb_live_lock);
1739
1740 if (reg) {
1741 o2nm_undepend_item(&reg->hr_item);
1742 o2nm_undepend_this_node();
1743 }
1744}
1745
1746int o2hb_register_callback(const char *region_uuid,
1747 struct o2hb_callback_func *hc)
1669{ 1748{
1670 struct o2hb_callback_func *tmp; 1749 struct o2hb_callback_func *tmp;
1671 struct list_head *iter; 1750 struct list_head *iter;
@@ -1681,6 +1760,12 @@ int o2hb_register_callback(struct o2hb_callback_func *hc)
1681 goto out; 1760 goto out;
1682 } 1761 }
1683 1762
1763 if (region_uuid) {
1764 ret = o2hb_region_get(region_uuid);
1765 if (ret)
1766 goto out;
1767 }
1768
1684 down_write(&o2hb_callback_sem); 1769 down_write(&o2hb_callback_sem);
1685 1770
1686 list_for_each(iter, &hbcall->list) { 1771 list_for_each(iter, &hbcall->list) {
@@ -1702,16 +1787,21 @@ out:
1702} 1787}
1703EXPORT_SYMBOL_GPL(o2hb_register_callback); 1788EXPORT_SYMBOL_GPL(o2hb_register_callback);
1704 1789
1705void o2hb_unregister_callback(struct o2hb_callback_func *hc) 1790void o2hb_unregister_callback(const char *region_uuid,
1791 struct o2hb_callback_func *hc)
1706{ 1792{
1707 BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); 1793 BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
1708 1794
1709 mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", 1795 mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
1710 __builtin_return_address(0), hc); 1796 __builtin_return_address(0), hc);
1711 1797
1798 /* XXX Can this happen _with_ a region reference? */
1712 if (list_empty(&hc->hc_item)) 1799 if (list_empty(&hc->hc_item))
1713 return; 1800 return;
1714 1801
1802 if (region_uuid)
1803 o2hb_region_put(region_uuid);
1804
1715 down_write(&o2hb_callback_sem); 1805 down_write(&o2hb_callback_sem);
1716 1806
1717 list_del_init(&hc->hc_item); 1807 list_del_init(&hc->hc_item);
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index cc6d40b3977..35397dd5ecd 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -69,8 +69,10 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
69 o2hb_cb_func *func, 69 o2hb_cb_func *func,
70 void *data, 70 void *data,
71 int priority); 71 int priority);
72int o2hb_register_callback(struct o2hb_callback_func *hc); 72int o2hb_register_callback(const char *region_uuid,
73void o2hb_unregister_callback(struct o2hb_callback_func *hc); 73 struct o2hb_callback_func *hc);
74void o2hb_unregister_callback(const char *region_uuid,
75 struct o2hb_callback_func *hc);
74void o2hb_fill_node_map(unsigned long *map, 76void o2hb_fill_node_map(unsigned long *map,
75 unsigned bytes); 77 unsigned bytes);
76void o2hb_init(void); 78void o2hb_init(void);
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 2b205f5d579..e9e042b93db 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -74,7 +74,6 @@ struct mlog_attribute {
74#define define_mask(_name) { \ 74#define define_mask(_name) { \
75 .attr = { \ 75 .attr = { \
76 .name = #_name, \ 76 .name = #_name, \
77 .owner = THIS_MODULE, \
78 .mode = S_IRUGO | S_IWUSR, \ 77 .mode = S_IRUGO | S_IWUSR, \
79 }, \ 78 }, \
80 .mask = ML_##_name, \ 79 .mask = ML_##_name, \
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 9f5ad0f01ce..af2070da308 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -900,6 +900,46 @@ static struct o2nm_cluster_group o2nm_cluster_group = {
900 }, 900 },
901}; 901};
902 902
903int o2nm_depend_item(struct config_item *item)
904{
905 return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item);
906}
907
908void o2nm_undepend_item(struct config_item *item)
909{
910 configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item);
911}
912
913int o2nm_depend_this_node(void)
914{
915 int ret = 0;
916 struct o2nm_node *local_node;
917
918 local_node = o2nm_get_node_by_num(o2nm_this_node());
919 if (!local_node) {
920 ret = -EINVAL;
921 goto out;
922 }
923
924 ret = o2nm_depend_item(&local_node->nd_item);
925 o2nm_node_put(local_node);
926
927out:
928 return ret;
929}
930
931void o2nm_undepend_this_node(void)
932{
933 struct o2nm_node *local_node;
934
935 local_node = o2nm_get_node_by_num(o2nm_this_node());
936 BUG_ON(!local_node);
937
938 o2nm_undepend_item(&local_node->nd_item);
939 o2nm_node_put(local_node);
940}
941
942
903static void __exit exit_o2nm(void) 943static void __exit exit_o2nm(void)
904{ 944{
905 if (ocfs2_table_header) 945 if (ocfs2_table_header)
@@ -934,7 +974,7 @@ static int __init init_o2nm(void)
934 goto out_sysctl; 974 goto out_sysctl;
935 975
936 config_group_init(&o2nm_cluster_group.cs_subsys.su_group); 976 config_group_init(&o2nm_cluster_group.cs_subsys.su_group);
937 init_MUTEX(&o2nm_cluster_group.cs_subsys.su_sem); 977 mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex);
938 ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys); 978 ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys);
939 if (ret) { 979 if (ret) {
940 printk(KERN_ERR "nodemanager: Registration returned %d\n", ret); 980 printk(KERN_ERR "nodemanager: Registration returned %d\n", ret);
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index 070522138ae..7c860361b8d 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -77,4 +77,9 @@ struct o2nm_node *o2nm_get_node_by_ip(__be32 addr);
77void o2nm_node_get(struct o2nm_node *node); 77void o2nm_node_get(struct o2nm_node *node);
78void o2nm_node_put(struct o2nm_node *node); 78void o2nm_node_put(struct o2nm_node *node);
79 79
80int o2nm_depend_item(struct config_item *item);
81void o2nm_undepend_item(struct config_item *item);
82int o2nm_depend_this_node(void);
83void o2nm_undepend_this_node(void);
84
80#endif /* O2CLUSTER_NODEMANAGER_H */ 85#endif /* O2CLUSTER_NODEMANAGER_H */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 0b229a9c795..f0bdfd944c4 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -261,14 +261,12 @@ out:
261 261
262static void o2net_complete_nodes_nsw(struct o2net_node *nn) 262static void o2net_complete_nodes_nsw(struct o2net_node *nn)
263{ 263{
264 struct list_head *iter, *tmp; 264 struct o2net_status_wait *nsw, *tmp;
265 unsigned int num_kills = 0; 265 unsigned int num_kills = 0;
266 struct o2net_status_wait *nsw;
267 266
268 assert_spin_locked(&nn->nn_lock); 267 assert_spin_locked(&nn->nn_lock);
269 268
270 list_for_each_safe(iter, tmp, &nn->nn_status_list) { 269 list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) {
271 nsw = list_entry(iter, struct o2net_status_wait, ns_node_item);
272 o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0); 270 o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0);
273 num_kills++; 271 num_kills++;
274 } 272 }
@@ -764,13 +762,10 @@ EXPORT_SYMBOL_GPL(o2net_register_handler);
764 762
765void o2net_unregister_handler_list(struct list_head *list) 763void o2net_unregister_handler_list(struct list_head *list)
766{ 764{
767 struct list_head *pos, *n; 765 struct o2net_msg_handler *nmh, *n;
768 struct o2net_msg_handler *nmh;
769 766
770 write_lock(&o2net_handler_lock); 767 write_lock(&o2net_handler_lock);
771 list_for_each_safe(pos, n, list) { 768 list_for_each_entry_safe(nmh, n, list, nh_unregister_item) {
772 nmh = list_entry(pos, struct o2net_msg_handler,
773 nh_unregister_item);
774 mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n", 769 mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n",
775 nmh->nh_func, nmh->nh_msg_type, nmh->nh_key); 770 nmh->nh_func, nmh->nh_msg_type, nmh->nh_key);
776 rb_erase(&nmh->nh_node, &o2net_handler_tree); 771 rb_erase(&nmh->nh_node, &o2net_handler_tree);
@@ -1638,8 +1633,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
1638 1633
1639void o2net_unregister_hb_callbacks(void) 1634void o2net_unregister_hb_callbacks(void)
1640{ 1635{
1641 o2hb_unregister_callback(&o2net_hb_up); 1636 o2hb_unregister_callback(NULL, &o2net_hb_up);
1642 o2hb_unregister_callback(&o2net_hb_down); 1637 o2hb_unregister_callback(NULL, &o2net_hb_down);
1643} 1638}
1644 1639
1645int o2net_register_hb_callbacks(void) 1640int o2net_register_hb_callbacks(void)
@@ -1651,9 +1646,9 @@ int o2net_register_hb_callbacks(void)
1651 o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB, 1646 o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB,
1652 o2net_hb_node_up_cb, NULL, O2NET_HB_PRI); 1647 o2net_hb_node_up_cb, NULL, O2NET_HB_PRI);
1653 1648
1654 ret = o2hb_register_callback(&o2net_hb_up); 1649 ret = o2hb_register_callback(NULL, &o2net_hb_up);
1655 if (ret == 0) 1650 if (ret == 0)
1656 ret = o2hb_register_callback(&o2net_hb_down); 1651 ret = o2hb_register_callback(NULL, &o2net_hb_down);
1657 1652
1658 if (ret) 1653 if (ret)
1659 o2net_unregister_hb_callbacks(); 1654 o2net_unregister_hb_callbacks();
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c441ef1f2ba..0d5fdde959c 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -368,7 +368,7 @@ int ocfs2_do_extend_dir(struct super_block *sb,
368 u32 offset = OCFS2_I(dir)->ip_clusters; 368 u32 offset = OCFS2_I(dir)->ip_clusters;
369 369
370 status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, 370 status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
371 1, parent_fe_bh, handle, 371 1, 0, parent_fe_bh, handle,
372 data_ac, meta_ac, NULL); 372 data_ac, meta_ac, NULL);
373 BUG_ON(status == -EAGAIN); 373 BUG_ON(status == -EAGAIN);
374 if (status < 0) { 374 if (status < 0) {
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index d836b98dd99..6954565b8cc 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1128,8 +1128,8 @@ bail:
1128 1128
1129static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) 1129static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
1130{ 1130{
1131 o2hb_unregister_callback(&dlm->dlm_hb_up); 1131 o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
1132 o2hb_unregister_callback(&dlm->dlm_hb_down); 1132 o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
1133 o2net_unregister_handler_list(&dlm->dlm_domain_handlers); 1133 o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
1134} 1134}
1135 1135
@@ -1141,13 +1141,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
1141 1141
1142 o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, 1142 o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
1143 dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); 1143 dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
1144 status = o2hb_register_callback(&dlm->dlm_hb_down); 1144 status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
1145 if (status) 1145 if (status)
1146 goto bail; 1146 goto bail;
1147 1147
1148 o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, 1148 o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
1149 dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); 1149 dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
1150 status = o2hb_register_callback(&dlm->dlm_hb_up); 1150 status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
1151 if (status) 1151 if (status)
1152 goto bail; 1152 goto bail;
1153 1153
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 6edffca99d9..65b2b9b9268 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -192,25 +192,20 @@ static void dlm_print_one_mle(struct dlm_master_list_entry *mle)
192static void dlm_dump_mles(struct dlm_ctxt *dlm) 192static void dlm_dump_mles(struct dlm_ctxt *dlm)
193{ 193{
194 struct dlm_master_list_entry *mle; 194 struct dlm_master_list_entry *mle;
195 struct list_head *iter;
196 195
197 mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); 196 mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
198 spin_lock(&dlm->master_lock); 197 spin_lock(&dlm->master_lock);
199 list_for_each(iter, &dlm->master_list) { 198 list_for_each_entry(mle, &dlm->master_list, list)
200 mle = list_entry(iter, struct dlm_master_list_entry, list);
201 dlm_print_one_mle(mle); 199 dlm_print_one_mle(mle);
202 }
203 spin_unlock(&dlm->master_lock); 200 spin_unlock(&dlm->master_lock);
204} 201}
205 202
206int dlm_dump_all_mles(const char __user *data, unsigned int len) 203int dlm_dump_all_mles(const char __user *data, unsigned int len)
207{ 204{
208 struct list_head *iter;
209 struct dlm_ctxt *dlm; 205 struct dlm_ctxt *dlm;
210 206
211 spin_lock(&dlm_domain_lock); 207 spin_lock(&dlm_domain_lock);
212 list_for_each(iter, &dlm_domains) { 208 list_for_each_entry(dlm, &dlm_domains, list) {
213 dlm = list_entry (iter, struct dlm_ctxt, list);
214 mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name); 209 mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name);
215 dlm_dump_mles(dlm); 210 dlm_dump_mles(dlm);
216 } 211 }
@@ -454,12 +449,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
454 char *name, unsigned int namelen) 449 char *name, unsigned int namelen)
455{ 450{
456 struct dlm_master_list_entry *tmpmle; 451 struct dlm_master_list_entry *tmpmle;
457 struct list_head *iter;
458 452
459 assert_spin_locked(&dlm->master_lock); 453 assert_spin_locked(&dlm->master_lock);
460 454
461 list_for_each(iter, &dlm->master_list) { 455 list_for_each_entry(tmpmle, &dlm->master_list, list) {
462 tmpmle = list_entry(iter, struct dlm_master_list_entry, list);
463 if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) 456 if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
464 continue; 457 continue;
465 dlm_get_mle(tmpmle); 458 dlm_get_mle(tmpmle);
@@ -472,13 +465,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
472void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) 465void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
473{ 466{
474 struct dlm_master_list_entry *mle; 467 struct dlm_master_list_entry *mle;
475 struct list_head *iter;
476 468
477 assert_spin_locked(&dlm->spinlock); 469 assert_spin_locked(&dlm->spinlock);
478 470
479 list_for_each(iter, &dlm->mle_hb_events) { 471 list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
480 mle = list_entry(iter, struct dlm_master_list_entry,
481 hb_events);
482 if (node_up) 472 if (node_up)
483 dlm_mle_node_up(dlm, mle, NULL, idx); 473 dlm_mle_node_up(dlm, mle, NULL, idx);
484 else 474 else
@@ -2434,7 +2424,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2434 int ret; 2424 int ret;
2435 int i; 2425 int i;
2436 int count = 0; 2426 int count = 0;
2437 struct list_head *queue, *iter; 2427 struct list_head *queue;
2438 struct dlm_lock *lock; 2428 struct dlm_lock *lock;
2439 2429
2440 assert_spin_locked(&res->spinlock); 2430 assert_spin_locked(&res->spinlock);
@@ -2453,8 +2443,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2453 ret = 0; 2443 ret = 0;
2454 queue = &res->granted; 2444 queue = &res->granted;
2455 for (i = 0; i < 3; i++) { 2445 for (i = 0; i < 3; i++) {
2456 list_for_each(iter, queue) { 2446 list_for_each_entry(lock, queue, list) {
2457 lock = list_entry(iter, struct dlm_lock, list);
2458 ++count; 2447 ++count;
2459 if (lock->ml.node == dlm->node_num) { 2448 if (lock->ml.node == dlm->node_num) {
2460 mlog(0, "found a lock owned by this node still " 2449 mlog(0, "found a lock owned by this node still "
@@ -2923,18 +2912,16 @@ again:
2923static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, 2912static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
2924 struct dlm_lock_resource *res) 2913 struct dlm_lock_resource *res)
2925{ 2914{
2926 struct list_head *iter, *iter2;
2927 struct list_head *queue = &res->granted; 2915 struct list_head *queue = &res->granted;
2928 int i, bit; 2916 int i, bit;
2929 struct dlm_lock *lock; 2917 struct dlm_lock *lock, *next;
2930 2918
2931 assert_spin_locked(&res->spinlock); 2919 assert_spin_locked(&res->spinlock);
2932 2920
2933 BUG_ON(res->owner == dlm->node_num); 2921 BUG_ON(res->owner == dlm->node_num);
2934 2922
2935 for (i=0; i<3; i++) { 2923 for (i=0; i<3; i++) {
2936 list_for_each_safe(iter, iter2, queue) { 2924 list_for_each_entry_safe(lock, next, queue, list) {
2937 lock = list_entry (iter, struct dlm_lock, list);
2938 if (lock->ml.node != dlm->node_num) { 2925 if (lock->ml.node != dlm->node_num) {
2939 mlog(0, "putting lock for node %u\n", 2926 mlog(0, "putting lock for node %u\n",
2940 lock->ml.node); 2927 lock->ml.node);
@@ -2976,7 +2963,6 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
2976{ 2963{
2977 int i; 2964 int i;
2978 struct list_head *queue = &res->granted; 2965 struct list_head *queue = &res->granted;
2979 struct list_head *iter;
2980 struct dlm_lock *lock; 2966 struct dlm_lock *lock;
2981 int nodenum; 2967 int nodenum;
2982 2968
@@ -2984,10 +2970,9 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
2984 2970
2985 spin_lock(&res->spinlock); 2971 spin_lock(&res->spinlock);
2986 for (i=0; i<3; i++) { 2972 for (i=0; i<3; i++) {
2987 list_for_each(iter, queue) { 2973 list_for_each_entry(lock, queue, list) {
2988 /* up to the caller to make sure this node 2974 /* up to the caller to make sure this node
2989 * is alive */ 2975 * is alive */
2990 lock = list_entry (iter, struct dlm_lock, list);
2991 if (lock->ml.node != dlm->node_num) { 2976 if (lock->ml.node != dlm->node_num) {
2992 spin_unlock(&res->spinlock); 2977 spin_unlock(&res->spinlock);
2993 return lock->ml.node; 2978 return lock->ml.node;
@@ -3234,8 +3219,7 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
3234 3219
3235void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) 3220void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
3236{ 3221{
3237 struct list_head *iter, *iter2; 3222 struct dlm_master_list_entry *mle, *next;
3238 struct dlm_master_list_entry *mle;
3239 struct dlm_lock_resource *res; 3223 struct dlm_lock_resource *res;
3240 unsigned int hash; 3224 unsigned int hash;
3241 3225
@@ -3245,9 +3229,7 @@ top:
3245 3229
3246 /* clean the master list */ 3230 /* clean the master list */
3247 spin_lock(&dlm->master_lock); 3231 spin_lock(&dlm->master_lock);
3248 list_for_each_safe(iter, iter2, &dlm->master_list) { 3232 list_for_each_entry_safe(mle, next, &dlm->master_list, list) {
3249 mle = list_entry(iter, struct dlm_master_list_entry, list);
3250
3251 BUG_ON(mle->type != DLM_MLE_BLOCK && 3233 BUG_ON(mle->type != DLM_MLE_BLOCK &&
3252 mle->type != DLM_MLE_MASTER && 3234 mle->type != DLM_MLE_MASTER &&
3253 mle->type != DLM_MLE_MIGRATION); 3235 mle->type != DLM_MLE_MIGRATION);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 671c4ed58ee..a2c33160bfd 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -158,8 +158,7 @@ void dlm_dispatch_work(struct work_struct *work)
158 struct dlm_ctxt *dlm = 158 struct dlm_ctxt *dlm =
159 container_of(work, struct dlm_ctxt, dispatched_work); 159 container_of(work, struct dlm_ctxt, dispatched_work);
160 LIST_HEAD(tmp_list); 160 LIST_HEAD(tmp_list);
161 struct list_head *iter, *iter2; 161 struct dlm_work_item *item, *next;
162 struct dlm_work_item *item;
163 dlm_workfunc_t *workfunc; 162 dlm_workfunc_t *workfunc;
164 int tot=0; 163 int tot=0;
165 164
@@ -167,13 +166,12 @@ void dlm_dispatch_work(struct work_struct *work)
167 list_splice_init(&dlm->work_list, &tmp_list); 166 list_splice_init(&dlm->work_list, &tmp_list);
168 spin_unlock(&dlm->work_lock); 167 spin_unlock(&dlm->work_lock);
169 168
170 list_for_each_safe(iter, iter2, &tmp_list) { 169 list_for_each_entry(item, &tmp_list, list) {
171 tot++; 170 tot++;
172 } 171 }
173 mlog(0, "%s: work thread has %d work items\n", dlm->name, tot); 172 mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
174 173
175 list_for_each_safe(iter, iter2, &tmp_list) { 174 list_for_each_entry_safe(item, next, &tmp_list, list) {
176 item = list_entry(iter, struct dlm_work_item, list);
177 workfunc = item->func; 175 workfunc = item->func;
178 list_del_init(&item->list); 176 list_del_init(&item->list);
179 177
@@ -549,7 +547,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
549{ 547{
550 int status = 0; 548 int status = 0;
551 struct dlm_reco_node_data *ndata; 549 struct dlm_reco_node_data *ndata;
552 struct list_head *iter;
553 int all_nodes_done; 550 int all_nodes_done;
554 int destroy = 0; 551 int destroy = 0;
555 int pass = 0; 552 int pass = 0;
@@ -567,8 +564,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
567 564
568 /* safe to access the node data list without a lock, since this 565 /* safe to access the node data list without a lock, since this
569 * process is the only one to change the list */ 566 * process is the only one to change the list */
570 list_for_each(iter, &dlm->reco.node_data) { 567 list_for_each_entry(ndata, &dlm->reco.node_data, list) {
571 ndata = list_entry (iter, struct dlm_reco_node_data, list);
572 BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); 568 BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT);
573 ndata->state = DLM_RECO_NODE_DATA_REQUESTING; 569 ndata->state = DLM_RECO_NODE_DATA_REQUESTING;
574 570
@@ -655,9 +651,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
655 * done, or if anyone died */ 651 * done, or if anyone died */
656 all_nodes_done = 1; 652 all_nodes_done = 1;
657 spin_lock(&dlm_reco_state_lock); 653 spin_lock(&dlm_reco_state_lock);
658 list_for_each(iter, &dlm->reco.node_data) { 654 list_for_each_entry(ndata, &dlm->reco.node_data, list) {
659 ndata = list_entry (iter, struct dlm_reco_node_data, list);
660
661 mlog(0, "checking recovery state of node %u\n", 655 mlog(0, "checking recovery state of node %u\n",
662 ndata->node_num); 656 ndata->node_num);
663 switch (ndata->state) { 657 switch (ndata->state) {
@@ -774,16 +768,14 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
774 768
775static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) 769static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
776{ 770{
777 struct list_head *iter, *iter2; 771 struct dlm_reco_node_data *ndata, *next;
778 struct dlm_reco_node_data *ndata;
779 LIST_HEAD(tmplist); 772 LIST_HEAD(tmplist);
780 773
781 spin_lock(&dlm_reco_state_lock); 774 spin_lock(&dlm_reco_state_lock);
782 list_splice_init(&dlm->reco.node_data, &tmplist); 775 list_splice_init(&dlm->reco.node_data, &tmplist);
783 spin_unlock(&dlm_reco_state_lock); 776 spin_unlock(&dlm_reco_state_lock);
784 777
785 list_for_each_safe(iter, iter2, &tmplist) { 778 list_for_each_entry_safe(ndata, next, &tmplist, list) {
786 ndata = list_entry (iter, struct dlm_reco_node_data, list);
787 list_del_init(&ndata->list); 779 list_del_init(&ndata->list);
788 kfree(ndata); 780 kfree(ndata);
789 } 781 }
@@ -876,7 +868,6 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
876 struct dlm_lock_resource *res; 868 struct dlm_lock_resource *res;
877 struct dlm_ctxt *dlm; 869 struct dlm_ctxt *dlm;
878 LIST_HEAD(resources); 870 LIST_HEAD(resources);
879 struct list_head *iter;
880 int ret; 871 int ret;
881 u8 dead_node, reco_master; 872 u8 dead_node, reco_master;
882 int skip_all_done = 0; 873 int skip_all_done = 0;
@@ -920,8 +911,7 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
920 911
921 /* any errors returned will be due to the new_master dying, 912 /* any errors returned will be due to the new_master dying,
922 * the dlm_reco_thread should detect this */ 913 * the dlm_reco_thread should detect this */
923 list_for_each(iter, &resources) { 914 list_for_each_entry(res, &resources, recovering) {
924 res = list_entry (iter, struct dlm_lock_resource, recovering);
925 ret = dlm_send_one_lockres(dlm, res, mres, reco_master, 915 ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
926 DLM_MRES_RECOVERY); 916 DLM_MRES_RECOVERY);
927 if (ret < 0) { 917 if (ret < 0) {
@@ -983,7 +973,6 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
983{ 973{
984 struct dlm_ctxt *dlm = data; 974 struct dlm_ctxt *dlm = data;
985 struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; 975 struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
986 struct list_head *iter;
987 struct dlm_reco_node_data *ndata = NULL; 976 struct dlm_reco_node_data *ndata = NULL;
988 int ret = -EINVAL; 977 int ret = -EINVAL;
989 978
@@ -1000,8 +989,7 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
1000 dlm->reco.dead_node, done->node_idx, dlm->node_num); 989 dlm->reco.dead_node, done->node_idx, dlm->node_num);
1001 990
1002 spin_lock(&dlm_reco_state_lock); 991 spin_lock(&dlm_reco_state_lock);
1003 list_for_each(iter, &dlm->reco.node_data) { 992 list_for_each_entry(ndata, &dlm->reco.node_data, list) {
1004 ndata = list_entry (iter, struct dlm_reco_node_data, list);
1005 if (ndata->node_num != done->node_idx) 993 if (ndata->node_num != done->node_idx)
1006 continue; 994 continue;
1007 995
@@ -1049,13 +1037,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
1049 struct list_head *list, 1037 struct list_head *list,
1050 u8 dead_node) 1038 u8 dead_node)
1051{ 1039{
1052 struct dlm_lock_resource *res; 1040 struct dlm_lock_resource *res, *next;
1053 struct list_head *iter, *iter2;
1054 struct dlm_lock *lock; 1041 struct dlm_lock *lock;
1055 1042
1056 spin_lock(&dlm->spinlock); 1043 spin_lock(&dlm->spinlock);
1057 list_for_each_safe(iter, iter2, &dlm->reco.resources) { 1044 list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
1058 res = list_entry (iter, struct dlm_lock_resource, recovering);
1059 /* always prune any $RECOVERY entries for dead nodes, 1045 /* always prune any $RECOVERY entries for dead nodes,
1060 * otherwise hangs can occur during later recovery */ 1046 * otherwise hangs can occur during later recovery */
1061 if (dlm_is_recovery_lock(res->lockname.name, 1047 if (dlm_is_recovery_lock(res->lockname.name,
@@ -1169,7 +1155,7 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
1169 u8 flags, u8 master) 1155 u8 flags, u8 master)
1170{ 1156{
1171 /* mres here is one full page */ 1157 /* mres here is one full page */
1172 memset(mres, 0, PAGE_SIZE); 1158 clear_page(mres);
1173 mres->lockname_len = namelen; 1159 mres->lockname_len = namelen;
1174 memcpy(mres->lockname, lockname, namelen); 1160 memcpy(mres->lockname, lockname, namelen);
1175 mres->num_locks = 0; 1161 mres->num_locks = 0;
@@ -1252,7 +1238,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1252 struct dlm_migratable_lockres *mres, 1238 struct dlm_migratable_lockres *mres,
1253 u8 send_to, u8 flags) 1239 u8 send_to, u8 flags)
1254{ 1240{
1255 struct list_head *queue, *iter; 1241 struct list_head *queue;
1256 int total_locks, i; 1242 int total_locks, i;
1257 u64 mig_cookie = 0; 1243 u64 mig_cookie = 0;
1258 struct dlm_lock *lock; 1244 struct dlm_lock *lock;
@@ -1278,9 +1264,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1278 total_locks = 0; 1264 total_locks = 0;
1279 for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) { 1265 for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) {
1280 queue = dlm_list_idx_to_ptr(res, i); 1266 queue = dlm_list_idx_to_ptr(res, i);
1281 list_for_each(iter, queue) { 1267 list_for_each_entry(lock, queue, list) {
1282 lock = list_entry (iter, struct dlm_lock, list);
1283
1284 /* add another lock. */ 1268 /* add another lock. */
1285 total_locks++; 1269 total_locks++;
1286 if (!dlm_add_lock_to_array(lock, mres, i)) 1270 if (!dlm_add_lock_to_array(lock, mres, i))
@@ -1717,7 +1701,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1717 struct dlm_lockstatus *lksb = NULL; 1701 struct dlm_lockstatus *lksb = NULL;
1718 int ret = 0; 1702 int ret = 0;
1719 int i, j, bad; 1703 int i, j, bad;
1720 struct list_head *iter;
1721 struct dlm_lock *lock = NULL; 1704 struct dlm_lock *lock = NULL;
1722 u8 from = O2NM_MAX_NODES; 1705 u8 from = O2NM_MAX_NODES;
1723 unsigned int added = 0; 1706 unsigned int added = 0;
@@ -1755,8 +1738,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1755 spin_lock(&res->spinlock); 1738 spin_lock(&res->spinlock);
1756 for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { 1739 for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
1757 tmpq = dlm_list_idx_to_ptr(res, j); 1740 tmpq = dlm_list_idx_to_ptr(res, j);
1758 list_for_each(iter, tmpq) { 1741 list_for_each_entry(lock, tmpq, list) {
1759 lock = list_entry (iter, struct dlm_lock, list);
1760 if (lock->ml.cookie != ml->cookie) 1742 if (lock->ml.cookie != ml->cookie)
1761 lock = NULL; 1743 lock = NULL;
1762 else 1744 else
@@ -1930,8 +1912,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
1930 struct dlm_lock_resource *res) 1912 struct dlm_lock_resource *res)
1931{ 1913{
1932 int i; 1914 int i;
1933 struct list_head *queue, *iter, *iter2; 1915 struct list_head *queue;
1934 struct dlm_lock *lock; 1916 struct dlm_lock *lock, *next;
1935 1917
1936 res->state |= DLM_LOCK_RES_RECOVERING; 1918 res->state |= DLM_LOCK_RES_RECOVERING;
1937 if (!list_empty(&res->recovering)) { 1919 if (!list_empty(&res->recovering)) {
@@ -1947,8 +1929,7 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
1947 /* find any pending locks and put them back on proper list */ 1929 /* find any pending locks and put them back on proper list */
1948 for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) { 1930 for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) {
1949 queue = dlm_list_idx_to_ptr(res, i); 1931 queue = dlm_list_idx_to_ptr(res, i);
1950 list_for_each_safe(iter, iter2, queue) { 1932 list_for_each_entry_safe(lock, next, queue, list) {
1951 lock = list_entry (iter, struct dlm_lock, list);
1952 dlm_lock_get(lock); 1933 dlm_lock_get(lock);
1953 if (lock->convert_pending) { 1934 if (lock->convert_pending) {
1954 /* move converting lock back to granted */ 1935 /* move converting lock back to granted */
@@ -2013,18 +1994,15 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
2013 u8 dead_node, u8 new_master) 1994 u8 dead_node, u8 new_master)
2014{ 1995{
2015 int i; 1996 int i;
2016 struct list_head *iter, *iter2;
2017 struct hlist_node *hash_iter; 1997 struct hlist_node *hash_iter;
2018 struct hlist_head *bucket; 1998 struct hlist_head *bucket;
2019 1999 struct dlm_lock_resource *res, *next;
2020 struct dlm_lock_resource *res;
2021 2000
2022 mlog_entry_void(); 2001 mlog_entry_void();
2023 2002
2024 assert_spin_locked(&dlm->spinlock); 2003 assert_spin_locked(&dlm->spinlock);
2025 2004
2026 list_for_each_safe(iter, iter2, &dlm->reco.resources) { 2005 list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
2027 res = list_entry (iter, struct dlm_lock_resource, recovering);
2028 if (res->owner == dead_node) { 2006 if (res->owner == dead_node) {
2029 list_del_init(&res->recovering); 2007 list_del_init(&res->recovering);
2030 spin_lock(&res->spinlock); 2008 spin_lock(&res->spinlock);
@@ -2099,7 +2077,7 @@ static inline int dlm_lvb_needs_invalidation(struct dlm_lock *lock, int local)
2099static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, 2077static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
2100 struct dlm_lock_resource *res, u8 dead_node) 2078 struct dlm_lock_resource *res, u8 dead_node)
2101{ 2079{
2102 struct list_head *iter, *queue; 2080 struct list_head *queue;
2103 struct dlm_lock *lock; 2081 struct dlm_lock *lock;
2104 int blank_lvb = 0, local = 0; 2082 int blank_lvb = 0, local = 0;
2105 int i; 2083 int i;
@@ -2121,8 +2099,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
2121 2099
2122 for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) { 2100 for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) {
2123 queue = dlm_list_idx_to_ptr(res, i); 2101 queue = dlm_list_idx_to_ptr(res, i);
2124 list_for_each(iter, queue) { 2102 list_for_each_entry(lock, queue, list) {
2125 lock = list_entry (iter, struct dlm_lock, list);
2126 if (lock->ml.node == search_node) { 2103 if (lock->ml.node == search_node) {
2127 if (dlm_lvb_needs_invalidation(lock, local)) { 2104 if (dlm_lvb_needs_invalidation(lock, local)) {
2128 /* zero the lksb lvb and lockres lvb */ 2105 /* zero the lksb lvb and lockres lvb */
@@ -2143,8 +2120,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
2143static void dlm_free_dead_locks(struct dlm_ctxt *dlm, 2120static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2144 struct dlm_lock_resource *res, u8 dead_node) 2121 struct dlm_lock_resource *res, u8 dead_node)
2145{ 2122{
2146 struct list_head *iter, *tmpiter; 2123 struct dlm_lock *lock, *next;
2147 struct dlm_lock *lock;
2148 unsigned int freed = 0; 2124 unsigned int freed = 0;
2149 2125
2150 /* this node is the lockres master: 2126 /* this node is the lockres master:
@@ -2155,24 +2131,21 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2155 assert_spin_locked(&res->spinlock); 2131 assert_spin_locked(&res->spinlock);
2156 2132
2157 /* TODO: check pending_asts, pending_basts here */ 2133 /* TODO: check pending_asts, pending_basts here */
2158 list_for_each_safe(iter, tmpiter, &res->granted) { 2134 list_for_each_entry_safe(lock, next, &res->granted, list) {
2159 lock = list_entry (iter, struct dlm_lock, list);
2160 if (lock->ml.node == dead_node) { 2135 if (lock->ml.node == dead_node) {
2161 list_del_init(&lock->list); 2136 list_del_init(&lock->list);
2162 dlm_lock_put(lock); 2137 dlm_lock_put(lock);
2163 freed++; 2138 freed++;
2164 } 2139 }
2165 } 2140 }
2166 list_for_each_safe(iter, tmpiter, &res->converting) { 2141 list_for_each_entry_safe(lock, next, &res->converting, list) {
2167 lock = list_entry (iter, struct dlm_lock, list);
2168 if (lock->ml.node == dead_node) { 2142 if (lock->ml.node == dead_node) {
2169 list_del_init(&lock->list); 2143 list_del_init(&lock->list);
2170 dlm_lock_put(lock); 2144 dlm_lock_put(lock);
2171 freed++; 2145 freed++;
2172 } 2146 }
2173 } 2147 }
2174 list_for_each_safe(iter, tmpiter, &res->blocked) { 2148 list_for_each_entry_safe(lock, next, &res->blocked, list) {
2175 lock = list_entry (iter, struct dlm_lock, list);
2176 if (lock->ml.node == dead_node) { 2149 if (lock->ml.node == dead_node) {
2177 list_del_init(&lock->list); 2150 list_del_init(&lock->list);
2178 dlm_lock_put(lock); 2151 dlm_lock_put(lock);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index d1bd305ef0d..f71250ed166 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -600,15 +600,13 @@ static inline int ocfs2_highest_compat_lock_level(int level)
600static void lockres_set_flags(struct ocfs2_lock_res *lockres, 600static void lockres_set_flags(struct ocfs2_lock_res *lockres,
601 unsigned long newflags) 601 unsigned long newflags)
602{ 602{
603 struct list_head *pos, *tmp; 603 struct ocfs2_mask_waiter *mw, *tmp;
604 struct ocfs2_mask_waiter *mw;
605 604
606 assert_spin_locked(&lockres->l_lock); 605 assert_spin_locked(&lockres->l_lock);
607 606
608 lockres->l_flags = newflags; 607 lockres->l_flags = newflags;
609 608
610 list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) { 609 list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
611 mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item);
612 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 610 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
613 continue; 611 continue;
614 612
diff --git a/fs/ocfs2/endian.h b/fs/ocfs2/endian.h
index f226b220762..ff257628af1 100644
--- a/fs/ocfs2/endian.h
+++ b/fs/ocfs2/endian.h
@@ -32,6 +32,11 @@ static inline void le32_add_cpu(__le32 *var, u32 val)
32 *var = cpu_to_le32(le32_to_cpu(*var) + val); 32 *var = cpu_to_le32(le32_to_cpu(*var) + val);
33} 33}
34 34
35static inline void le64_add_cpu(__le64 *var, u64 val)
36{
37 *var = cpu_to_le64(le64_to_cpu(*var) + val);
38}
39
35static inline void le32_and_cpu(__le32 *var, u32 val) 40static inline void le32_and_cpu(__le32 *var, u32 val)
36{ 41{
37 *var = cpu_to_le32(le32_to_cpu(*var) & val); 42 *var = cpu_to_le32(le32_to_cpu(*var) & val);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index ba2b2ab1c6e..03c1d365c78 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -109,17 +109,14 @@ static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
109 */ 109 */
110void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) 110void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
111{ 111{
112 struct list_head *p, *n; 112 struct ocfs2_extent_map_item *emi, *n;
113 struct ocfs2_extent_map_item *emi;
114 struct ocfs2_inode_info *oi = OCFS2_I(inode); 113 struct ocfs2_inode_info *oi = OCFS2_I(inode);
115 struct ocfs2_extent_map *em = &oi->ip_extent_map; 114 struct ocfs2_extent_map *em = &oi->ip_extent_map;
116 LIST_HEAD(tmp_list); 115 LIST_HEAD(tmp_list);
117 unsigned int range; 116 unsigned int range;
118 117
119 spin_lock(&oi->ip_lock); 118 spin_lock(&oi->ip_lock);
120 list_for_each_safe(p, n, &em->em_list) { 119 list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
121 emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
122
123 if (emi->ei_cpos >= cpos) { 120 if (emi->ei_cpos >= cpos) {
124 /* Full truncate of this record. */ 121 /* Full truncate of this record. */
125 list_move(&emi->ei_list, &tmp_list); 122 list_move(&emi->ei_list, &tmp_list);
@@ -136,8 +133,7 @@ void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
136 } 133 }
137 spin_unlock(&oi->ip_lock); 134 spin_unlock(&oi->ip_lock);
138 135
139 list_for_each_safe(p, n, &tmp_list) { 136 list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
140 emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
141 list_del(&emi->ei_list); 137 list_del(&emi->ei_list);
142 kfree(emi); 138 kfree(emi);
143 } 139 }
@@ -377,37 +373,6 @@ out:
377 return ret; 373 return ret;
378} 374}
379 375
380/*
381 * Return the index of the extent record which contains cluster #v_cluster.
382 * -1 is returned if it was not found.
383 *
384 * Should work fine on interior and exterior nodes.
385 */
386static int ocfs2_search_extent_list(struct ocfs2_extent_list *el,
387 u32 v_cluster)
388{
389 int ret = -1;
390 int i;
391 struct ocfs2_extent_rec *rec;
392 u32 rec_end, rec_start, clusters;
393
394 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
395 rec = &el->l_recs[i];
396
397 rec_start = le32_to_cpu(rec->e_cpos);
398 clusters = ocfs2_rec_clusters(el, rec);
399
400 rec_end = rec_start + clusters;
401
402 if (v_cluster >= rec_start && v_cluster < rec_end) {
403 ret = i;
404 break;
405 }
406 }
407
408 return ret;
409}
410
411int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 376int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
412 u32 *p_cluster, u32 *num_clusters, 377 u32 *p_cluster, u32 *num_clusters,
413 unsigned int *extent_flags) 378 unsigned int *extent_flags)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4979b667571..f04c7aa834c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -263,6 +263,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
263 int status; 263 int status;
264 handle_t *handle; 264 handle_t *handle;
265 struct ocfs2_dinode *di; 265 struct ocfs2_dinode *di;
266 u64 cluster_bytes;
266 267
267 mlog_entry_void(); 268 mlog_entry_void();
268 269
@@ -286,7 +287,9 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
286 /* 287 /*
287 * Do this before setting i_size. 288 * Do this before setting i_size.
288 */ 289 */
289 status = ocfs2_zero_tail_for_truncate(inode, handle, new_i_size); 290 cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size);
291 status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size,
292 cluster_bytes);
290 if (status) { 293 if (status) {
291 mlog_errno(status); 294 mlog_errno(status);
292 goto out_commit; 295 goto out_commit;
@@ -326,9 +329,6 @@ static int ocfs2_truncate_file(struct inode *inode,
326 (unsigned long long)OCFS2_I(inode)->ip_blkno, 329 (unsigned long long)OCFS2_I(inode)->ip_blkno,
327 (unsigned long long)new_i_size); 330 (unsigned long long)new_i_size);
328 331
329 unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
330 truncate_inode_pages(inode->i_mapping, new_i_size);
331
332 fe = (struct ocfs2_dinode *) di_bh->b_data; 332 fe = (struct ocfs2_dinode *) di_bh->b_data;
333 if (!OCFS2_IS_VALID_DINODE(fe)) { 333 if (!OCFS2_IS_VALID_DINODE(fe)) {
334 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 334 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
@@ -363,16 +363,23 @@ static int ocfs2_truncate_file(struct inode *inode,
363 if (new_i_size == le64_to_cpu(fe->i_size)) 363 if (new_i_size == le64_to_cpu(fe->i_size))
364 goto bail; 364 goto bail;
365 365
366 down_write(&OCFS2_I(inode)->ip_alloc_sem);
367
366 /* This forces other nodes to sync and drop their pages. Do 368 /* This forces other nodes to sync and drop their pages. Do
367 * this even if we have a truncate without allocation change - 369 * this even if we have a truncate without allocation change -
368 * ocfs2 cluster sizes can be much greater than page size, so 370 * ocfs2 cluster sizes can be much greater than page size, so
369 * we have to truncate them anyway. */ 371 * we have to truncate them anyway. */
370 status = ocfs2_data_lock(inode, 1); 372 status = ocfs2_data_lock(inode, 1);
371 if (status < 0) { 373 if (status < 0) {
374 up_write(&OCFS2_I(inode)->ip_alloc_sem);
375
372 mlog_errno(status); 376 mlog_errno(status);
373 goto bail; 377 goto bail;
374 } 378 }
375 379
380 unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
381 truncate_inode_pages(inode->i_mapping, new_i_size);
382
376 /* alright, we're going to need to do a full blown alloc size 383 /* alright, we're going to need to do a full blown alloc size
377 * change. Orphan the inode so that recovery can complete the 384 * change. Orphan the inode so that recovery can complete the
378 * truncate if necessary. This does the task of marking 385 * truncate if necessary. This does the task of marking
@@ -399,6 +406,8 @@ static int ocfs2_truncate_file(struct inode *inode,
399bail_unlock_data: 406bail_unlock_data:
400 ocfs2_data_unlock(inode, 1); 407 ocfs2_data_unlock(inode, 1);
401 408
409 up_write(&OCFS2_I(inode)->ip_alloc_sem);
410
402bail: 411bail:
403 412
404 mlog_exit(status); 413 mlog_exit(status);
@@ -419,6 +428,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
419 struct inode *inode, 428 struct inode *inode,
420 u32 *logical_offset, 429 u32 *logical_offset,
421 u32 clusters_to_add, 430 u32 clusters_to_add,
431 int mark_unwritten,
422 struct buffer_head *fe_bh, 432 struct buffer_head *fe_bh,
423 handle_t *handle, 433 handle_t *handle,
424 struct ocfs2_alloc_context *data_ac, 434 struct ocfs2_alloc_context *data_ac,
@@ -431,9 +441,13 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
431 enum ocfs2_alloc_restarted reason = RESTART_NONE; 441 enum ocfs2_alloc_restarted reason = RESTART_NONE;
432 u32 bit_off, num_bits; 442 u32 bit_off, num_bits;
433 u64 block; 443 u64 block;
444 u8 flags = 0;
434 445
435 BUG_ON(!clusters_to_add); 446 BUG_ON(!clusters_to_add);
436 447
448 if (mark_unwritten)
449 flags = OCFS2_EXT_UNWRITTEN;
450
437 free_extents = ocfs2_num_free_extents(osb, inode, fe); 451 free_extents = ocfs2_num_free_extents(osb, inode, fe);
438 if (free_extents < 0) { 452 if (free_extents < 0) {
439 status = free_extents; 453 status = free_extents;
@@ -483,7 +497,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
483 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); 497 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
484 status = ocfs2_insert_extent(osb, handle, inode, fe_bh, 498 status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
485 *logical_offset, block, num_bits, 499 *logical_offset, block, num_bits,
486 meta_ac); 500 flags, meta_ac);
487 if (status < 0) { 501 if (status < 0) {
488 mlog_errno(status); 502 mlog_errno(status);
489 goto leave; 503 goto leave;
@@ -516,25 +530,31 @@ leave:
516 * For a given allocation, determine which allocators will need to be 530 * For a given allocation, determine which allocators will need to be
517 * accessed, and lock them, reserving the appropriate number of bits. 531 * accessed, and lock them, reserving the appropriate number of bits.
518 * 532 *
519 * Called from ocfs2_extend_allocation() for file systems which don't 533 * Sparse file systems call this from ocfs2_write_begin_nolock()
520 * support holes, and from ocfs2_write() for file systems which 534 * and ocfs2_allocate_unwritten_extents().
521 * understand sparse inodes. 535 *
536 * File systems which don't support holes call this from
537 * ocfs2_extend_allocation().
522 */ 538 */
523int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, 539int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
524 u32 clusters_to_add, 540 u32 clusters_to_add, u32 extents_to_split,
525 struct ocfs2_alloc_context **data_ac, 541 struct ocfs2_alloc_context **data_ac,
526 struct ocfs2_alloc_context **meta_ac) 542 struct ocfs2_alloc_context **meta_ac)
527{ 543{
528 int ret, num_free_extents; 544 int ret = 0, num_free_extents;
545 unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
529 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 546 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
530 547
531 *meta_ac = NULL; 548 *meta_ac = NULL;
532 *data_ac = NULL; 549 if (data_ac)
550 *data_ac = NULL;
551
552 BUG_ON(clusters_to_add != 0 && data_ac == NULL);
533 553
534 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " 554 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
535 "clusters_to_add = %u\n", 555 "clusters_to_add = %u, extents_to_split = %u\n",
536 (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), 556 (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
537 le32_to_cpu(di->i_clusters), clusters_to_add); 557 le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
538 558
539 num_free_extents = ocfs2_num_free_extents(osb, inode, di); 559 num_free_extents = ocfs2_num_free_extents(osb, inode, di);
540 if (num_free_extents < 0) { 560 if (num_free_extents < 0) {
@@ -552,9 +572,12 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
552 * 572 *
553 * Most of the time we'll only be seeing this 1 cluster at a time 573 * Most of the time we'll only be seeing this 1 cluster at a time
554 * anyway. 574 * anyway.
575 *
576 * Always lock for any unwritten extents - we might want to
577 * add blocks during a split.
555 */ 578 */
556 if (!num_free_extents || 579 if (!num_free_extents ||
557 (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) { 580 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
558 ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); 581 ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
559 if (ret < 0) { 582 if (ret < 0) {
560 if (ret != -ENOSPC) 583 if (ret != -ENOSPC)
@@ -563,6 +586,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
563 } 586 }
564 } 587 }
565 588
589 if (clusters_to_add == 0)
590 goto out;
591
566 ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); 592 ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
567 if (ret < 0) { 593 if (ret < 0) {
568 if (ret != -ENOSPC) 594 if (ret != -ENOSPC)
@@ -585,14 +611,13 @@ out:
585 return ret; 611 return ret;
586} 612}
587 613
588static int ocfs2_extend_allocation(struct inode *inode, 614static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
589 u32 clusters_to_add) 615 u32 clusters_to_add, int mark_unwritten)
590{ 616{
591 int status = 0; 617 int status = 0;
592 int restart_func = 0; 618 int restart_func = 0;
593 int drop_alloc_sem = 0;
594 int credits; 619 int credits;
595 u32 prev_clusters, logical_start; 620 u32 prev_clusters;
596 struct buffer_head *bh = NULL; 621 struct buffer_head *bh = NULL;
597 struct ocfs2_dinode *fe = NULL; 622 struct ocfs2_dinode *fe = NULL;
598 handle_t *handle = NULL; 623 handle_t *handle = NULL;
@@ -607,7 +632,7 @@ static int ocfs2_extend_allocation(struct inode *inode,
607 * This function only exists for file systems which don't 632 * This function only exists for file systems which don't
608 * support holes. 633 * support holes.
609 */ 634 */
610 BUG_ON(ocfs2_sparse_alloc(osb)); 635 BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
611 636
612 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 637 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
613 OCFS2_BH_CACHED, inode); 638 OCFS2_BH_CACHED, inode);
@@ -623,19 +648,10 @@ static int ocfs2_extend_allocation(struct inode *inode,
623 goto leave; 648 goto leave;
624 } 649 }
625 650
626 logical_start = OCFS2_I(inode)->ip_clusters;
627
628restart_all: 651restart_all:
629 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); 652 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
630 653
631 /* blocks peope in read/write from reading our allocation 654 status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac,
632 * until we're done changing it. We depend on i_mutex to block
633 * other extend/truncate calls while we're here. Ordering wrt
634 * start_trans is important here -- always do it before! */
635 down_write(&OCFS2_I(inode)->ip_alloc_sem);
636 drop_alloc_sem = 1;
637
638 status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac,
639 &meta_ac); 655 &meta_ac);
640 if (status) { 656 if (status) {
641 mlog_errno(status); 657 mlog_errno(status);
@@ -668,6 +684,7 @@ restarted_transaction:
668 inode, 684 inode,
669 &logical_start, 685 &logical_start,
670 clusters_to_add, 686 clusters_to_add,
687 mark_unwritten,
671 bh, 688 bh,
672 handle, 689 handle,
673 data_ac, 690 data_ac,
@@ -720,10 +737,6 @@ restarted_transaction:
720 OCFS2_I(inode)->ip_clusters, i_size_read(inode)); 737 OCFS2_I(inode)->ip_clusters, i_size_read(inode));
721 738
722leave: 739leave:
723 if (drop_alloc_sem) {
724 up_write(&OCFS2_I(inode)->ip_alloc_sem);
725 drop_alloc_sem = 0;
726 }
727 if (handle) { 740 if (handle) {
728 ocfs2_commit_trans(osb, handle); 741 ocfs2_commit_trans(osb, handle);
729 handle = NULL; 742 handle = NULL;
@@ -749,6 +762,25 @@ leave:
749 return status; 762 return status;
750} 763}
751 764
765static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
766 u32 clusters_to_add, int mark_unwritten)
767{
768 int ret;
769
770 /*
771 * The alloc sem blocks peope in read/write from reading our
772 * allocation until we're done changing it. We depend on
773 * i_mutex to block other extend/truncate calls while we're
774 * here.
775 */
776 down_write(&OCFS2_I(inode)->ip_alloc_sem);
777 ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add,
778 mark_unwritten);
779 up_write(&OCFS2_I(inode)->ip_alloc_sem);
780
781 return ret;
782}
783
752/* Some parts of this taken from generic_cont_expand, which turned out 784/* Some parts of this taken from generic_cont_expand, which turned out
753 * to be too fragile to do exactly what we need without us having to 785 * to be too fragile to do exactly what we need without us having to
754 * worry about recursive locking in ->prepare_write() and 786 * worry about recursive locking in ->prepare_write() and
@@ -890,7 +922,9 @@ static int ocfs2_extend_file(struct inode *inode,
890 } 922 }
891 923
892 if (clusters_to_add) { 924 if (clusters_to_add) {
893 ret = ocfs2_extend_allocation(inode, clusters_to_add); 925 ret = ocfs2_extend_allocation(inode,
926 OCFS2_I(inode)->ip_clusters,
927 clusters_to_add, 0);
894 if (ret < 0) { 928 if (ret < 0) {
895 mlog_errno(ret); 929 mlog_errno(ret);
896 goto out_unlock; 930 goto out_unlock;
@@ -995,6 +1029,13 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
995 goto bail_unlock; 1029 goto bail_unlock;
996 } 1030 }
997 1031
1032 /*
1033 * This will intentionally not wind up calling vmtruncate(),
1034 * since all the work for a size change has been done above.
1035 * Otherwise, we could get into problems with truncate as
1036 * ip_alloc_sem is used there to protect against i_size
1037 * changes.
1038 */
998 status = inode_setattr(inode, attr); 1039 status = inode_setattr(inode, attr);
999 if (status < 0) { 1040 if (status < 0) {
1000 mlog_errno(status); 1041 mlog_errno(status);
@@ -1070,17 +1111,16 @@ out:
1070 return ret; 1111 return ret;
1071} 1112}
1072 1113
1073static int ocfs2_write_remove_suid(struct inode *inode) 1114static int __ocfs2_write_remove_suid(struct inode *inode,
1115 struct buffer_head *bh)
1074{ 1116{
1075 int ret; 1117 int ret;
1076 struct buffer_head *bh = NULL;
1077 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1078 handle_t *handle; 1118 handle_t *handle;
1079 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1119 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1080 struct ocfs2_dinode *di; 1120 struct ocfs2_dinode *di;
1081 1121
1082 mlog_entry("(Inode %llu, mode 0%o)\n", 1122 mlog_entry("(Inode %llu, mode 0%o)\n",
1083 (unsigned long long)oi->ip_blkno, inode->i_mode); 1123 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode);
1084 1124
1085 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1125 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1086 if (handle == NULL) { 1126 if (handle == NULL) {
@@ -1089,17 +1129,11 @@ static int ocfs2_write_remove_suid(struct inode *inode)
1089 goto out; 1129 goto out;
1090 } 1130 }
1091 1131
1092 ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
1093 if (ret < 0) {
1094 mlog_errno(ret);
1095 goto out_trans;
1096 }
1097
1098 ret = ocfs2_journal_access(handle, inode, bh, 1132 ret = ocfs2_journal_access(handle, inode, bh,
1099 OCFS2_JOURNAL_ACCESS_WRITE); 1133 OCFS2_JOURNAL_ACCESS_WRITE);
1100 if (ret < 0) { 1134 if (ret < 0) {
1101 mlog_errno(ret); 1135 mlog_errno(ret);
1102 goto out_bh; 1136 goto out_trans;
1103 } 1137 }
1104 1138
1105 inode->i_mode &= ~S_ISUID; 1139 inode->i_mode &= ~S_ISUID;
@@ -1112,8 +1146,7 @@ static int ocfs2_write_remove_suid(struct inode *inode)
1112 ret = ocfs2_journal_dirty(handle, bh); 1146 ret = ocfs2_journal_dirty(handle, bh);
1113 if (ret < 0) 1147 if (ret < 0)
1114 mlog_errno(ret); 1148 mlog_errno(ret);
1115out_bh: 1149
1116 brelse(bh);
1117out_trans: 1150out_trans:
1118 ocfs2_commit_trans(osb, handle); 1151 ocfs2_commit_trans(osb, handle);
1119out: 1152out:
@@ -1159,6 +1192,460 @@ out:
1159 return ret; 1192 return ret;
1160} 1193}
1161 1194
1195static int ocfs2_write_remove_suid(struct inode *inode)
1196{
1197 int ret;
1198 struct buffer_head *bh = NULL;
1199 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1200
1201 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1202 oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
1203 if (ret < 0) {
1204 mlog_errno(ret);
1205 goto out;
1206 }
1207
1208 ret = __ocfs2_write_remove_suid(inode, bh);
1209out:
1210 brelse(bh);
1211 return ret;
1212}
1213
1214/*
1215 * Allocate enough extents to cover the region starting at byte offset
1216 * start for len bytes. Existing extents are skipped, any extents
1217 * added are marked as "unwritten".
1218 */
1219static int ocfs2_allocate_unwritten_extents(struct inode *inode,
1220 u64 start, u64 len)
1221{
1222 int ret;
1223 u32 cpos, phys_cpos, clusters, alloc_size;
1224
1225 /*
1226 * We consider both start and len to be inclusive.
1227 */
1228 cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
1229 clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
1230 clusters -= cpos;
1231
1232 while (clusters) {
1233 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
1234 &alloc_size, NULL);
1235 if (ret) {
1236 mlog_errno(ret);
1237 goto out;
1238 }
1239
1240 /*
1241 * Hole or existing extent len can be arbitrary, so
1242 * cap it to our own allocation request.
1243 */
1244 if (alloc_size > clusters)
1245 alloc_size = clusters;
1246
1247 if (phys_cpos) {
1248 /*
1249 * We already have an allocation at this
1250 * region so we can safely skip it.
1251 */
1252 goto next;
1253 }
1254
1255 ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
1256 if (ret) {
1257 if (ret != -ENOSPC)
1258 mlog_errno(ret);
1259 goto out;
1260 }
1261
1262next:
1263 cpos += alloc_size;
1264 clusters -= alloc_size;
1265 }
1266
1267 ret = 0;
1268out:
1269 return ret;
1270}
1271
1272static int __ocfs2_remove_inode_range(struct inode *inode,
1273 struct buffer_head *di_bh,
1274 u32 cpos, u32 phys_cpos, u32 len,
1275 struct ocfs2_cached_dealloc_ctxt *dealloc)
1276{
1277 int ret;
1278 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
1279 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1280 struct inode *tl_inode = osb->osb_tl_inode;
1281 handle_t *handle;
1282 struct ocfs2_alloc_context *meta_ac = NULL;
1283 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1284
1285 ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac);
1286 if (ret) {
1287 mlog_errno(ret);
1288 return ret;
1289 }
1290
1291 mutex_lock(&tl_inode->i_mutex);
1292
1293 if (ocfs2_truncate_log_needs_flush(osb)) {
1294 ret = __ocfs2_flush_truncate_log(osb);
1295 if (ret < 0) {
1296 mlog_errno(ret);
1297 goto out;
1298 }
1299 }
1300
1301 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
1302 if (handle == NULL) {
1303 ret = -ENOMEM;
1304 mlog_errno(ret);
1305 goto out;
1306 }
1307
1308 ret = ocfs2_journal_access(handle, inode, di_bh,
1309 OCFS2_JOURNAL_ACCESS_WRITE);
1310 if (ret) {
1311 mlog_errno(ret);
1312 goto out;
1313 }
1314
1315 ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac,
1316 dealloc);
1317 if (ret) {
1318 mlog_errno(ret);
1319 goto out_commit;
1320 }
1321
1322 OCFS2_I(inode)->ip_clusters -= len;
1323 di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
1324
1325 ret = ocfs2_journal_dirty(handle, di_bh);
1326 if (ret) {
1327 mlog_errno(ret);
1328 goto out_commit;
1329 }
1330
1331 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
1332 if (ret)
1333 mlog_errno(ret);
1334
1335out_commit:
1336 ocfs2_commit_trans(osb, handle);
1337out:
1338 mutex_unlock(&tl_inode->i_mutex);
1339
1340 if (meta_ac)
1341 ocfs2_free_alloc_context(meta_ac);
1342
1343 return ret;
1344}
1345
1346/*
1347 * Truncate a byte range, avoiding pages within partial clusters. This
1348 * preserves those pages for the zeroing code to write to.
1349 */
1350static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
1351 u64 byte_len)
1352{
1353 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1354 loff_t start, end;
1355 struct address_space *mapping = inode->i_mapping;
1356
1357 start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start);
1358 end = byte_start + byte_len;
1359 end = end & ~(osb->s_clustersize - 1);
1360
1361 if (start < end) {
1362 unmap_mapping_range(mapping, start, end - start, 0);
1363 truncate_inode_pages_range(mapping, start, end - 1);
1364 }
1365}
1366
1367static int ocfs2_zero_partial_clusters(struct inode *inode,
1368 u64 start, u64 len)
1369{
1370 int ret = 0;
1371 u64 tmpend, end = start + len;
1372 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1373 unsigned int csize = osb->s_clustersize;
1374 handle_t *handle;
1375
1376 /*
1377 * The "start" and "end" values are NOT necessarily part of
1378 * the range whose allocation is being deleted. Rather, this
1379 * is what the user passed in with the request. We must zero
1380 * partial clusters here. There's no need to worry about
1381 * physical allocation - the zeroing code knows to skip holes.
1382 */
1383 mlog(0, "byte start: %llu, end: %llu\n",
1384 (unsigned long long)start, (unsigned long long)end);
1385
1386 /*
1387 * If both edges are on a cluster boundary then there's no
1388 * zeroing required as the region is part of the allocation to
1389 * be truncated.
1390 */
1391 if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
1392 goto out;
1393
1394 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1395 if (handle == NULL) {
1396 ret = -ENOMEM;
1397 mlog_errno(ret);
1398 goto out;
1399 }
1400
1401 /*
1402 * We want to get the byte offset of the end of the 1st cluster.
1403 */
1404 tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
1405 if (tmpend > end)
1406 tmpend = end;
1407
1408 mlog(0, "1st range: start: %llu, tmpend: %llu\n",
1409 (unsigned long long)start, (unsigned long long)tmpend);
1410
1411 ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
1412 if (ret)
1413 mlog_errno(ret);
1414
1415 if (tmpend < end) {
1416 /*
1417 * This may make start and end equal, but the zeroing
1418 * code will skip any work in that case so there's no
1419 * need to catch it up here.
1420 */
1421 start = end & ~(osb->s_clustersize - 1);
1422
1423 mlog(0, "2nd range: start: %llu, end: %llu\n",
1424 (unsigned long long)start, (unsigned long long)end);
1425
1426 ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
1427 if (ret)
1428 mlog_errno(ret);
1429 }
1430
1431 ocfs2_commit_trans(osb, handle);
1432out:
1433 return ret;
1434}
1435
1436static int ocfs2_remove_inode_range(struct inode *inode,
1437 struct buffer_head *di_bh, u64 byte_start,
1438 u64 byte_len)
1439{
1440 int ret = 0;
1441 u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size;
1442 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1443 struct ocfs2_cached_dealloc_ctxt dealloc;
1444
1445 ocfs2_init_dealloc_ctxt(&dealloc);
1446
1447 if (byte_len == 0)
1448 return 0;
1449
1450 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
1451 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
1452 if (trunc_len >= trunc_start)
1453 trunc_len -= trunc_start;
1454 else
1455 trunc_len = 0;
1456
1457 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n",
1458 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1459 (unsigned long long)byte_start,
1460 (unsigned long long)byte_len, trunc_start, trunc_len);
1461
1462 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
1463 if (ret) {
1464 mlog_errno(ret);
1465 goto out;
1466 }
1467
1468 cpos = trunc_start;
1469 while (trunc_len) {
1470 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
1471 &alloc_size, NULL);
1472 if (ret) {
1473 mlog_errno(ret);
1474 goto out;
1475 }
1476
1477 if (alloc_size > trunc_len)
1478 alloc_size = trunc_len;
1479
1480 /* Only do work for non-holes */
1481 if (phys_cpos != 0) {
1482 ret = __ocfs2_remove_inode_range(inode, di_bh, cpos,
1483 phys_cpos, alloc_size,
1484 &dealloc);
1485 if (ret) {
1486 mlog_errno(ret);
1487 goto out;
1488 }
1489 }
1490
1491 cpos += alloc_size;
1492 trunc_len -= alloc_size;
1493 }
1494
1495 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
1496
1497out:
1498 ocfs2_schedule_truncate_log_flush(osb, 1);
1499 ocfs2_run_deallocs(osb, &dealloc);
1500
1501 return ret;
1502}
1503
1504/*
1505 * Parts of this function taken from xfs_change_file_space()
1506 */
1507int ocfs2_change_file_space(struct file *file, unsigned int cmd,
1508 struct ocfs2_space_resv *sr)
1509{
1510 int ret;
1511 s64 llen;
1512 struct inode *inode = file->f_path.dentry->d_inode;
1513 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1514 struct buffer_head *di_bh = NULL;
1515 handle_t *handle;
1516 unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits);
1517
1518 if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
1519 !ocfs2_writes_unwritten_extents(osb))
1520 return -ENOTTY;
1521 else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) &&
1522 !ocfs2_sparse_alloc(osb))
1523 return -ENOTTY;
1524
1525 if (!S_ISREG(inode->i_mode))
1526 return -EINVAL;
1527
1528 if (!(file->f_mode & FMODE_WRITE))
1529 return -EBADF;
1530
1531 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
1532 return -EROFS;
1533
1534 mutex_lock(&inode->i_mutex);
1535
1536 /*
1537 * This prevents concurrent writes on other nodes
1538 */
1539 ret = ocfs2_rw_lock(inode, 1);
1540 if (ret) {
1541 mlog_errno(ret);
1542 goto out;
1543 }
1544
1545 ret = ocfs2_meta_lock(inode, &di_bh, 1);
1546 if (ret) {
1547 mlog_errno(ret);
1548 goto out_rw_unlock;
1549 }
1550
1551 if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
1552 ret = -EPERM;
1553 goto out_meta_unlock;
1554 }
1555
1556 switch (sr->l_whence) {
1557 case 0: /*SEEK_SET*/
1558 break;
1559 case 1: /*SEEK_CUR*/
1560 sr->l_start += file->f_pos;
1561 break;
1562 case 2: /*SEEK_END*/
1563 sr->l_start += i_size_read(inode);
1564 break;
1565 default:
1566 ret = -EINVAL;
1567 goto out_meta_unlock;
1568 }
1569 sr->l_whence = 0;
1570
1571 llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;
1572
1573 if (sr->l_start < 0
1574 || sr->l_start > max_off
1575 || (sr->l_start + llen) < 0
1576 || (sr->l_start + llen) > max_off) {
1577 ret = -EINVAL;
1578 goto out_meta_unlock;
1579 }
1580
1581 if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
1582 if (sr->l_len <= 0) {
1583 ret = -EINVAL;
1584 goto out_meta_unlock;
1585 }
1586 }
1587
1588 if (should_remove_suid(file->f_path.dentry)) {
1589 ret = __ocfs2_write_remove_suid(inode, di_bh);
1590 if (ret) {
1591 mlog_errno(ret);
1592 goto out_meta_unlock;
1593 }
1594 }
1595
1596 down_write(&OCFS2_I(inode)->ip_alloc_sem);
1597 switch (cmd) {
1598 case OCFS2_IOC_RESVSP:
1599 case OCFS2_IOC_RESVSP64:
1600 /*
1601 * This takes unsigned offsets, but the signed ones we
1602 * pass have been checked against overflow above.
1603 */
1604 ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start,
1605 sr->l_len);
1606 break;
1607 case OCFS2_IOC_UNRESVSP:
1608 case OCFS2_IOC_UNRESVSP64:
1609 ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start,
1610 sr->l_len);
1611 break;
1612 default:
1613 ret = -EINVAL;
1614 }
1615 up_write(&OCFS2_I(inode)->ip_alloc_sem);
1616 if (ret) {
1617 mlog_errno(ret);
1618 goto out_meta_unlock;
1619 }
1620
1621 /*
1622 * We update c/mtime for these changes
1623 */
1624 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1625 if (IS_ERR(handle)) {
1626 ret = PTR_ERR(handle);
1627 mlog_errno(ret);
1628 goto out_meta_unlock;
1629 }
1630
1631 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1632 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
1633 if (ret < 0)
1634 mlog_errno(ret);
1635
1636 ocfs2_commit_trans(osb, handle);
1637
1638out_meta_unlock:
1639 brelse(di_bh);
1640 ocfs2_meta_unlock(inode, 1);
1641out_rw_unlock:
1642 ocfs2_rw_unlock(inode, 1);
1643
1644 mutex_unlock(&inode->i_mutex);
1645out:
1646 return ret;
1647}
1648
1162static int ocfs2_prepare_inode_for_write(struct dentry *dentry, 1649static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1163 loff_t *ppos, 1650 loff_t *ppos,
1164 size_t count, 1651 size_t count,
@@ -1329,15 +1816,16 @@ ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
1329 *basep = base; 1816 *basep = base;
1330} 1817}
1331 1818
1332static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp, 1819static struct page * ocfs2_get_write_source(char **ret_src_buf,
1333 const struct iovec *cur_iov, 1820 const struct iovec *cur_iov,
1334 size_t iov_offset) 1821 size_t iov_offset)
1335{ 1822{
1336 int ret; 1823 int ret;
1337 char *buf; 1824 char *buf = cur_iov->iov_base + iov_offset;
1338 struct page *src_page = NULL; 1825 struct page *src_page = NULL;
1826 unsigned long off;
1339 1827
1340 buf = cur_iov->iov_base + iov_offset; 1828 off = (unsigned long)(buf) & ~PAGE_CACHE_MASK;
1341 1829
1342 if (!segment_eq(get_fs(), KERNEL_DS)) { 1830 if (!segment_eq(get_fs(), KERNEL_DS)) {
1343 /* 1831 /*
@@ -1349,18 +1837,17 @@ static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp
1349 (unsigned long)buf & PAGE_CACHE_MASK, 1, 1837 (unsigned long)buf & PAGE_CACHE_MASK, 1,
1350 0, 0, &src_page, NULL); 1838 0, 0, &src_page, NULL);
1351 if (ret == 1) 1839 if (ret == 1)
1352 bp->b_src_buf = kmap(src_page); 1840 *ret_src_buf = kmap(src_page) + off;
1353 else 1841 else
1354 src_page = ERR_PTR(-EFAULT); 1842 src_page = ERR_PTR(-EFAULT);
1355 } else { 1843 } else {
1356 bp->b_src_buf = buf; 1844 *ret_src_buf = buf;
1357 } 1845 }
1358 1846
1359 return src_page; 1847 return src_page;
1360} 1848}
1361 1849
1362static void ocfs2_put_write_source(struct ocfs2_buffered_write_priv *bp, 1850static void ocfs2_put_write_source(struct page *page)
1363 struct page *page)
1364{ 1851{
1365 if (page) { 1852 if (page) {
1366 kunmap(page); 1853 kunmap(page);
@@ -1376,10 +1863,12 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
1376{ 1863{
1377 int ret = 0; 1864 int ret = 0;
1378 ssize_t copied, total = 0; 1865 ssize_t copied, total = 0;
1379 size_t iov_offset = 0; 1866 size_t iov_offset = 0, bytes;
1867 loff_t pos;
1380 const struct iovec *cur_iov = iov; 1868 const struct iovec *cur_iov = iov;
1381 struct ocfs2_buffered_write_priv bp; 1869 struct page *user_page, *page;
1382 struct page *page; 1870 char *buf, *dst;
1871 void *fsdata;
1383 1872
1384 /* 1873 /*
1385 * handle partial DIO write. Adjust cur_iov if needed. 1874 * handle partial DIO write. Adjust cur_iov if needed.
@@ -1387,21 +1876,38 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
1387 ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); 1876 ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);
1388 1877
1389 do { 1878 do {
1390 bp.b_cur_off = iov_offset; 1879 pos = *ppos;
1391 bp.b_cur_iov = cur_iov;
1392 1880
1393 page = ocfs2_get_write_source(&bp, cur_iov, iov_offset); 1881 user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset);
1394 if (IS_ERR(page)) { 1882 if (IS_ERR(user_page)) {
1395 ret = PTR_ERR(page); 1883 ret = PTR_ERR(user_page);
1396 goto out; 1884 goto out;
1397 } 1885 }
1398 1886
1399 copied = ocfs2_buffered_write_cluster(file, *ppos, count, 1887 /* Stay within our page boundaries */
1400 ocfs2_map_and_write_user_data, 1888 bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)),
1401 &bp); 1889 (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK)));
1890 /* Stay within the vector boundary */
1891 bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset);
1892 /* Stay within count */
1893 bytes = min(bytes, count);
1894
1895 page = NULL;
1896 ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0,
1897 &page, &fsdata);
1898 if (ret) {
1899 mlog_errno(ret);
1900 goto out;
1901 }
1402 1902
1403 ocfs2_put_write_source(&bp, page); 1903 dst = kmap_atomic(page, KM_USER0);
1904 memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes);
1905 kunmap_atomic(dst, KM_USER0);
1906 flush_dcache_page(page);
1907 ocfs2_put_write_source(user_page);
1404 1908
1909 copied = ocfs2_write_end(file, file->f_mapping, pos, bytes,
1910 bytes, page, fsdata);
1405 if (copied < 0) { 1911 if (copied < 0) {
1406 mlog_errno(copied); 1912 mlog_errno(copied);
1407 ret = copied; 1913 ret = copied;
@@ -1409,7 +1915,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
1409 } 1915 }
1410 1916
1411 total += copied; 1917 total += copied;
1412 *ppos = *ppos + copied; 1918 *ppos = pos + copied;
1413 count -= copied; 1919 count -= copied;
1414 1920
1415 ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); 1921 ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);
@@ -1579,52 +2085,46 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
1579 struct pipe_buffer *buf, 2085 struct pipe_buffer *buf,
1580 struct splice_desc *sd) 2086 struct splice_desc *sd)
1581{ 2087{
1582 int ret, count, total = 0; 2088 int ret, count;
1583 ssize_t copied = 0; 2089 ssize_t copied = 0;
1584 struct ocfs2_splice_write_priv sp; 2090 struct file *file = sd->u.file;
2091 unsigned int offset;
2092 struct page *page = NULL;
2093 void *fsdata;
2094 char *src, *dst;
1585 2095
1586 ret = buf->ops->confirm(pipe, buf); 2096 ret = buf->ops->confirm(pipe, buf);
1587 if (ret) 2097 if (ret)
1588 goto out; 2098 goto out;
1589 2099
1590 sp.s_sd = sd; 2100 offset = sd->pos & ~PAGE_CACHE_MASK;
1591 sp.s_buf = buf;
1592 sp.s_pipe = pipe;
1593 sp.s_offset = sd->pos & ~PAGE_CACHE_MASK;
1594 sp.s_buf_offset = buf->offset;
1595
1596 count = sd->len; 2101 count = sd->len;
1597 if (count + sp.s_offset > PAGE_CACHE_SIZE) 2102 if (count + offset > PAGE_CACHE_SIZE)
1598 count = PAGE_CACHE_SIZE - sp.s_offset; 2103 count = PAGE_CACHE_SIZE - offset;
1599 2104
1600 do { 2105 ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0,
1601 /* 2106 &page, &fsdata);
1602 * splice wants us to copy up to one page at a 2107 if (ret) {
1603 * time. For pagesize > cluster size, this means we 2108 mlog_errno(ret);
1604 * might enter ocfs2_buffered_write_cluster() more 2109 goto out;
1605 * than once, so keep track of our progress here. 2110 }
1606 */
1607 copied = ocfs2_buffered_write_cluster(sd->u.file,
1608 (loff_t)sd->pos + total,
1609 count,
1610 ocfs2_map_and_write_splice_data,
1611 &sp);
1612 if (copied < 0) {
1613 mlog_errno(copied);
1614 ret = copied;
1615 goto out;
1616 }
1617 2111
1618 count -= copied; 2112 src = buf->ops->map(pipe, buf, 1);
1619 sp.s_offset += copied; 2113 dst = kmap_atomic(page, KM_USER1);
1620 sp.s_buf_offset += copied; 2114 memcpy(dst + offset, src + buf->offset, count);
1621 total += copied; 2115 kunmap_atomic(page, KM_USER1);
1622 } while (count); 2116 buf->ops->unmap(pipe, buf, src);
1623 2117
1624 ret = 0; 2118 copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count,
2119 page, fsdata);
2120 if (copied < 0) {
2121 mlog_errno(copied);
2122 ret = copied;
2123 goto out;
2124 }
1625out: 2125out:
1626 2126
1627 return total ? total : ret; 2127 return copied ? copied : ret;
1628} 2128}
1629 2129
1630static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, 2130static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index a4dd1fa1822..36fe27f268e 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -39,15 +39,16 @@ enum ocfs2_alloc_restarted {
39}; 39};
40int ocfs2_do_extend_allocation(struct ocfs2_super *osb, 40int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
41 struct inode *inode, 41 struct inode *inode,
42 u32 *cluster_start, 42 u32 *logical_offset,
43 u32 clusters_to_add, 43 u32 clusters_to_add,
44 int mark_unwritten,
44 struct buffer_head *fe_bh, 45 struct buffer_head *fe_bh,
45 handle_t *handle, 46 handle_t *handle,
46 struct ocfs2_alloc_context *data_ac, 47 struct ocfs2_alloc_context *data_ac,
47 struct ocfs2_alloc_context *meta_ac, 48 struct ocfs2_alloc_context *meta_ac,
48 enum ocfs2_alloc_restarted *reason); 49 enum ocfs2_alloc_restarted *reason_ret);
49int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, 50int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
50 u32 clusters_to_add, 51 u32 clusters_to_add, u32 extents_to_split,
51 struct ocfs2_alloc_context **data_ac, 52 struct ocfs2_alloc_context **data_ac,
52 struct ocfs2_alloc_context **meta_ac); 53 struct ocfs2_alloc_context **meta_ac);
53int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 54int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
@@ -61,4 +62,7 @@ int ocfs2_should_update_atime(struct inode *inode,
61int ocfs2_update_inode_atime(struct inode *inode, 62int ocfs2_update_inode_atime(struct inode *inode,
62 struct buffer_head *bh); 63 struct buffer_head *bh);
63 64
65int ocfs2_change_file_space(struct file *file, unsigned int cmd,
66 struct ocfs2_space_resv *sr);
67
64#endif /* OCFS2_FILE_H */ 68#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index b25ef63781b..352eb4a13f9 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -157,16 +157,16 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
157 if (ocfs2_mount_local(osb)) 157 if (ocfs2_mount_local(osb))
158 return 0; 158 return 0;
159 159
160 status = o2hb_register_callback(&osb->osb_hb_down); 160 status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down);
161 if (status < 0) { 161 if (status < 0) {
162 mlog_errno(status); 162 mlog_errno(status);
163 goto bail; 163 goto bail;
164 } 164 }
165 165
166 status = o2hb_register_callback(&osb->osb_hb_up); 166 status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up);
167 if (status < 0) { 167 if (status < 0) {
168 mlog_errno(status); 168 mlog_errno(status);
169 o2hb_unregister_callback(&osb->osb_hb_down); 169 o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
170 } 170 }
171 171
172bail: 172bail:
@@ -178,8 +178,8 @@ void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
178 if (ocfs2_mount_local(osb)) 178 if (ocfs2_mount_local(osb))
179 return; 179 return;
180 180
181 o2hb_unregister_callback(&osb->osb_hb_down); 181 o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
182 o2hb_unregister_callback(&osb->osb_hb_up); 182 o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up);
183} 183}
184 184
185void ocfs2_stop_heartbeat(struct ocfs2_super *osb) 185void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index f3ad21ad9ae..bd68c3f2afb 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -14,6 +14,7 @@
14#include "ocfs2.h" 14#include "ocfs2.h"
15#include "alloc.h" 15#include "alloc.h"
16#include "dlmglue.h" 16#include "dlmglue.h"
17#include "file.h"
17#include "inode.h" 18#include "inode.h"
18#include "journal.h" 19#include "journal.h"
19 20
@@ -115,6 +116,7 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
115{ 116{
116 unsigned int flags; 117 unsigned int flags;
117 int status; 118 int status;
119 struct ocfs2_space_resv sr;
118 120
119 switch (cmd) { 121 switch (cmd) {
120 case OCFS2_IOC_GETFLAGS: 122 case OCFS2_IOC_GETFLAGS:
@@ -130,6 +132,14 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
130 132
131 return ocfs2_set_inode_attr(inode, flags, 133 return ocfs2_set_inode_attr(inode, flags,
132 OCFS2_FL_MODIFIABLE); 134 OCFS2_FL_MODIFIABLE);
135 case OCFS2_IOC_RESVSP:
136 case OCFS2_IOC_RESVSP64:
137 case OCFS2_IOC_UNRESVSP:
138 case OCFS2_IOC_UNRESVSP64:
139 if (copy_from_user(&sr, (int __user *) arg, sizeof(sr)))
140 return -EFAULT;
141
142 return ocfs2_change_file_space(filp, cmd, &sr);
133 default: 143 default:
134 return -ENOTTY; 144 return -ENOTTY;
135 } 145 }
@@ -148,6 +158,11 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
148 case OCFS2_IOC32_SETFLAGS: 158 case OCFS2_IOC32_SETFLAGS:
149 cmd = OCFS2_IOC_SETFLAGS; 159 cmd = OCFS2_IOC_SETFLAGS;
150 break; 160 break;
161 case OCFS2_IOC_RESVSP:
162 case OCFS2_IOC_RESVSP64:
163 case OCFS2_IOC_UNRESVSP:
164 case OCFS2_IOC_UNRESVSP64:
165 break;
151 default: 166 default:
152 return -ENOIOCTLCMD; 167 return -ENOIOCTLCMD;
153 } 168 }
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index dc118808172..dbfb20bb27e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -722,8 +722,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
722 container_of(work, struct ocfs2_journal, j_recovery_work); 722 container_of(work, struct ocfs2_journal, j_recovery_work);
723 struct ocfs2_super *osb = journal->j_osb; 723 struct ocfs2_super *osb = journal->j_osb;
724 struct ocfs2_dinode *la_dinode, *tl_dinode; 724 struct ocfs2_dinode *la_dinode, *tl_dinode;
725 struct ocfs2_la_recovery_item *item; 725 struct ocfs2_la_recovery_item *item, *n;
726 struct list_head *p, *n;
727 LIST_HEAD(tmp_la_list); 726 LIST_HEAD(tmp_la_list);
728 727
729 mlog_entry_void(); 728 mlog_entry_void();
@@ -734,8 +733,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
734 list_splice_init(&journal->j_la_cleanups, &tmp_la_list); 733 list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
735 spin_unlock(&journal->j_lock); 734 spin_unlock(&journal->j_lock);
736 735
737 list_for_each_safe(p, n, &tmp_la_list) { 736 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
738 item = list_entry(p, struct ocfs2_la_recovery_item, lri_list);
739 list_del_init(&item->lri_list); 737 list_del_init(&item->lri_list);
740 738
741 mlog(0, "Complete recovery for slot %d\n", item->lri_slot); 739 mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3db5de4506d..ce60aab013a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -289,6 +289,8 @@ int ocfs2_journal_dirty_data(handle_t *handle,
289#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ 289#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \
290 + OCFS2_TRUNCATE_LOG_UPDATE) 290 + OCFS2_TRUNCATE_LOG_UPDATE)
291 291
292#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS)
293
292/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 294/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
293 * bitmap block for the new bit) */ 295 * bitmap block for the new bit) */
294#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) 296#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index af01158b39f..d79aa12137d 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -37,11 +37,29 @@
37 37
38#include "ocfs2.h" 38#include "ocfs2.h"
39 39
40#include "aops.h"
40#include "dlmglue.h" 41#include "dlmglue.h"
41#include "file.h" 42#include "file.h"
42#include "inode.h" 43#include "inode.h"
43#include "mmap.h" 44#include "mmap.h"
44 45
46static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
47{
48 /* The best way to deal with signals in the vm path is
49 * to block them upfront, rather than allowing the
50 * locking paths to return -ERESTARTSYS. */
51 sigfillset(blocked);
52
53 /* We should technically never get a bad return value
54 * from sigprocmask */
55 return sigprocmask(SIG_BLOCK, blocked, oldset);
56}
57
58static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
59{
60 return sigprocmask(SIG_SETMASK, oldset, NULL);
61}
62
45static struct page *ocfs2_nopage(struct vm_area_struct * area, 63static struct page *ocfs2_nopage(struct vm_area_struct * area,
46 unsigned long address, 64 unsigned long address,
47 int *type) 65 int *type)
@@ -53,14 +71,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area,
53 mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, 71 mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address,
54 type); 72 type);
55 73
56 /* The best way to deal with signals in this path is 74 ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
57 * to block them upfront, rather than allowing the
58 * locking paths to return -ERESTARTSYS. */
59 sigfillset(&blocked);
60
61 /* We should technically never get a bad ret return
62 * from sigprocmask */
63 ret = sigprocmask(SIG_BLOCK, &blocked, &oldset);
64 if (ret < 0) { 75 if (ret < 0) {
65 mlog_errno(ret); 76 mlog_errno(ret);
66 goto out; 77 goto out;
@@ -68,7 +79,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area,
68 79
69 page = filemap_nopage(area, address, type); 80 page = filemap_nopage(area, address, type);
70 81
71 ret = sigprocmask(SIG_SETMASK, &oldset, NULL); 82 ret = ocfs2_vm_op_unblock_sigs(&oldset);
72 if (ret < 0) 83 if (ret < 0)
73 mlog_errno(ret); 84 mlog_errno(ret);
74out: 85out:
@@ -76,28 +87,136 @@ out:
76 return page; 87 return page;
77} 88}
78 89
79static struct vm_operations_struct ocfs2_file_vm_ops = { 90static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
80 .nopage = ocfs2_nopage, 91 struct page *page)
81}; 92{
93 int ret;
94 struct address_space *mapping = inode->i_mapping;
95 loff_t pos = page->index << PAGE_CACHE_SHIFT;
96 unsigned int len = PAGE_CACHE_SIZE;
97 pgoff_t last_index;
98 struct page *locked_page = NULL;
99 void *fsdata;
100 loff_t size = i_size_read(inode);
82 101
83int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) 102 /*
103 * Another node might have truncated while we were waiting on
104 * cluster locks.
105 */
106 last_index = size >> PAGE_CACHE_SHIFT;
107 if (page->index > last_index) {
108 ret = -EINVAL;
109 goto out;
110 }
111
112 /*
113 * The i_size check above doesn't catch the case where nodes
114 * truncated and then re-extended the file. We'll re-check the
115 * page mapping after taking the page lock inside of
116 * ocfs2_write_begin_nolock().
117 */
118 if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
119 ret = -EINVAL;
120 goto out;
121 }
122
123 /*
124 * Call ocfs2_write_begin() and ocfs2_write_end() to take
125 * advantage of the allocation code there. We pass a write
126 * length of the whole page (chopped to i_size) to make sure
127 * the whole thing is allocated.
128 *
129 * Since we know the page is up to date, we don't have to
130 * worry about ocfs2_write_begin() skipping some buffer reads
131 * because the "write" would invalidate their data.
132 */
133 if (page->index == last_index)
134 len = size & ~PAGE_CACHE_MASK;
135
136 ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
137 &fsdata, di_bh, page);
138 if (ret) {
139 if (ret != -ENOSPC)
140 mlog_errno(ret);
141 goto out;
142 }
143
144 ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
145 fsdata);
146 if (ret < 0) {
147 mlog_errno(ret);
148 goto out;
149 }
150 BUG_ON(ret != len);
151 ret = 0;
152out:
153 return ret;
154}
155
156static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
84{ 157{
85 int ret = 0, lock_level = 0; 158 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
86 struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb); 159 struct buffer_head *di_bh = NULL;
160 sigset_t blocked, oldset;
161 int ret, ret2;
162
163 ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
164 if (ret < 0) {
165 mlog_errno(ret);
166 return ret;
167 }
168
169 /*
170 * The cluster locks taken will block a truncate from another
171 * node. Taking the data lock will also ensure that we don't
172 * attempt page truncation as part of a downconvert.
173 */
174 ret = ocfs2_meta_lock(inode, &di_bh, 1);
175 if (ret < 0) {
176 mlog_errno(ret);
177 goto out;
178 }
87 179
88 /* 180 /*
89 * Only support shared writeable mmap for local mounts which 181 * The alloc sem should be enough to serialize with
90 * don't know about holes. 182 * ocfs2_truncate_file() changing i_size as well as any thread
183 * modifying the inode btree.
91 */ 184 */
92 if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) && 185 down_write(&OCFS2_I(inode)->ip_alloc_sem);
93 ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && 186
94 ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { 187 ret = ocfs2_data_lock(inode, 1);
95 mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); 188 if (ret < 0) {
96 /* This is -EINVAL because generic_file_readonly_mmap 189 mlog_errno(ret);
97 * returns it in a similar situation. */ 190 goto out_meta_unlock;
98 return -EINVAL;
99 } 191 }
100 192
193 ret = __ocfs2_page_mkwrite(inode, di_bh, page);
194
195 ocfs2_data_unlock(inode, 1);
196
197out_meta_unlock:
198 up_write(&OCFS2_I(inode)->ip_alloc_sem);
199
200 brelse(di_bh);
201 ocfs2_meta_unlock(inode, 1);
202
203out:
204 ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
205 if (ret2 < 0)
206 mlog_errno(ret2);
207
208 return ret;
209}
210
211static struct vm_operations_struct ocfs2_file_vm_ops = {
212 .nopage = ocfs2_nopage,
213 .page_mkwrite = ocfs2_page_mkwrite,
214};
215
216int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
217{
218 int ret = 0, lock_level = 0;
219
101 ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, 220 ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode,
102 file->f_vfsmnt, &lock_level); 221 file->f_vfsmnt, &lock_level);
103 if (ret < 0) { 222 if (ret < 0) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 36289e6295c..d430fdab16e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1674,7 +1674,7 @@ static int ocfs2_symlink(struct inode *dir,
1674 u32 offset = 0; 1674 u32 offset = 0;
1675 1675
1676 inode->i_op = &ocfs2_symlink_inode_operations; 1676 inode->i_op = &ocfs2_symlink_inode_operations;
1677 status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 1677 status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0,
1678 new_fe_bh, 1678 new_fe_bh,
1679 handle, data_ac, NULL, 1679 handle, data_ac, NULL,
1680 NULL); 1680 NULL);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index a860633e833..5cc90a40b3c 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -219,6 +219,7 @@ struct ocfs2_super
219 u16 max_slots; 219 u16 max_slots;
220 s16 node_num; 220 s16 node_num;
221 s16 slot_num; 221 s16 slot_num;
222 s16 preferred_slot;
222 int s_sectsize_bits; 223 int s_sectsize_bits;
223 int s_clustersize; 224 int s_clustersize;
224 int s_clustersize_bits; 225 int s_clustersize_bits;
@@ -305,6 +306,19 @@ static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb)
305 return 0; 306 return 0;
306} 307}
307 308
309static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
310{
311 /*
312 * Support for sparse files is a pre-requisite
313 */
314 if (!ocfs2_sparse_alloc(osb))
315 return 0;
316
317 if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN)
318 return 1;
319 return 0;
320}
321
308/* set / clear functions because cluster events can make these happen 322/* set / clear functions because cluster events can make these happen
309 * in parallel so we want the transitions to be atomic. this also 323 * in parallel so we want the transitions to be atomic. this also
310 * means that any future flags osb_flags must be protected by spinlock 324 * means that any future flags osb_flags must be protected by spinlock
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index f0d9eb08547..82f8a75b207 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -88,7 +88,7 @@
88#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB 88#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
89#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ 89#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
90 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) 90 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
91#define OCFS2_FEATURE_RO_COMPAT_SUPP 0 91#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
92 92
93/* 93/*
94 * Heartbeat-only devices are missing journals and other files. The 94 * Heartbeat-only devices are missing journals and other files. The
@@ -116,6 +116,11 @@
116 */ 116 */
117#define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 117#define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001
118 118
119/*
120 * Unwritten extents support.
121 */
122#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
123
119/* The byte offset of the first backup block will be 1G. 124/* The byte offset of the first backup block will be 1G.
120 * The following will be 4G, 16G, 64G, 256G and 1T. 125 * The following will be 4G, 16G, 64G, 256G and 1T.
121 */ 126 */
@@ -170,6 +175,32 @@
170#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) 175#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int)
171 176
172/* 177/*
178 * Space reservation / allocation / free ioctls and argument structure
179 * are designed to be compatible with XFS.
180 *
181 * ALLOCSP* and FREESP* are not and will never be supported, but are
182 * included here for completeness.
183 */
184struct ocfs2_space_resv {
185 __s16 l_type;
186 __s16 l_whence;
187 __s64 l_start;
188 __s64 l_len; /* len == 0 means until end of file */
189 __s32 l_sysid;
190 __u32 l_pid;
191 __s32 l_pad[4]; /* reserve area */
192};
193
194#define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv)
195#define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv)
196#define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv)
197#define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv)
198#define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv)
199#define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv)
200#define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv)
201#define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv)
202
203/*
173 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) 204 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
174 */ 205 */
175#define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */ 206#define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index d8b79067dc1..af4882b62cf 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -121,17 +121,25 @@ static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
121 return ret; 121 return ret;
122} 122}
123 123
124static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si) 124static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred)
125{ 125{
126 int i; 126 int i;
127 s16 ret = OCFS2_INVALID_SLOT; 127 s16 ret = OCFS2_INVALID_SLOT;
128 128
129 if (preferred >= 0 && preferred < si->si_num_slots) {
130 if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) {
131 ret = preferred;
132 goto out;
133 }
134 }
135
129 for(i = 0; i < si->si_num_slots; i++) { 136 for(i = 0; i < si->si_num_slots; i++) {
130 if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { 137 if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) {
131 ret = (s16) i; 138 ret = (s16) i;
132 break; 139 break;
133 } 140 }
134 } 141 }
142out:
135 return ret; 143 return ret;
136} 144}
137 145
@@ -248,7 +256,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
248 if (slot == OCFS2_INVALID_SLOT) { 256 if (slot == OCFS2_INVALID_SLOT) {
249 /* if no slot yet, then just take 1st available 257 /* if no slot yet, then just take 1st available
250 * one. */ 258 * one. */
251 slot = __ocfs2_find_empty_slot(si); 259 slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
252 if (slot == OCFS2_INVALID_SLOT) { 260 if (slot == OCFS2_INVALID_SLOT) {
253 spin_unlock(&si->si_lock); 261 spin_unlock(&si->si_lock);
254 mlog(ML_ERROR, "no free slots available!\n"); 262 mlog(ML_ERROR, "no free slots available!\n");
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index e3437626d18..d9c5c9fcb30 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -98,14 +98,6 @@ static int ocfs2_relink_block_group(handle_t *handle,
98 u16 chain); 98 u16 chain);
99static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, 99static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
100 u32 wanted); 100 u32 wanted);
101static int ocfs2_free_suballoc_bits(handle_t *handle,
102 struct inode *alloc_inode,
103 struct buffer_head *alloc_bh,
104 unsigned int start_bit,
105 u64 bg_blkno,
106 unsigned int count);
107static inline u64 ocfs2_which_suballoc_group(u64 block,
108 unsigned int bit);
109static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, 101static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
110 u64 bg_blkno, 102 u64 bg_blkno,
111 u16 bg_bit_off); 103 u16 bg_bit_off);
@@ -496,13 +488,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
496 488
497 (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); 489 (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
498 (*ac)->ac_which = OCFS2_AC_USE_META; 490 (*ac)->ac_which = OCFS2_AC_USE_META;
499
500#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
501 slot = 0;
502#else
503 slot = osb->slot_num; 491 slot = osb->slot_num;
504#endif
505
506 (*ac)->ac_group_search = ocfs2_block_group_search; 492 (*ac)->ac_group_search = ocfs2_block_group_search;
507 493
508 status = ocfs2_reserve_suballoc_bits(osb, (*ac), 494 status = ocfs2_reserve_suballoc_bits(osb, (*ac),
@@ -1626,12 +1612,12 @@ bail:
1626/* 1612/*
1627 * expects the suballoc inode to already be locked. 1613 * expects the suballoc inode to already be locked.
1628 */ 1614 */
1629static int ocfs2_free_suballoc_bits(handle_t *handle, 1615int ocfs2_free_suballoc_bits(handle_t *handle,
1630 struct inode *alloc_inode, 1616 struct inode *alloc_inode,
1631 struct buffer_head *alloc_bh, 1617 struct buffer_head *alloc_bh,
1632 unsigned int start_bit, 1618 unsigned int start_bit,
1633 u64 bg_blkno, 1619 u64 bg_blkno,
1634 unsigned int count) 1620 unsigned int count)
1635{ 1621{
1636 int status = 0; 1622 int status = 0;
1637 u32 tmp_used; 1623 u32 tmp_used;
@@ -1703,13 +1689,6 @@ bail:
1703 return status; 1689 return status;
1704} 1690}
1705 1691
1706static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
1707{
1708 u64 group = block - (u64) bit;
1709
1710 return group;
1711}
1712
1713int ocfs2_free_dinode(handle_t *handle, 1692int ocfs2_free_dinode(handle_t *handle,
1714 struct inode *inode_alloc_inode, 1693 struct inode *inode_alloc_inode,
1715 struct buffer_head *inode_alloc_bh, 1694 struct buffer_head *inode_alloc_bh,
@@ -1723,19 +1702,6 @@ int ocfs2_free_dinode(handle_t *handle,
1723 inode_alloc_bh, bit, bg_blkno, 1); 1702 inode_alloc_bh, bit, bg_blkno, 1);
1724} 1703}
1725 1704
1726int ocfs2_free_extent_block(handle_t *handle,
1727 struct inode *eb_alloc_inode,
1728 struct buffer_head *eb_alloc_bh,
1729 struct ocfs2_extent_block *eb)
1730{
1731 u64 blk = le64_to_cpu(eb->h_blkno);
1732 u16 bit = le16_to_cpu(eb->h_suballoc_bit);
1733 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1734
1735 return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh,
1736 bit, bg_blkno, 1);
1737}
1738
1739int ocfs2_free_clusters(handle_t *handle, 1705int ocfs2_free_clusters(handle_t *handle,
1740 struct inode *bitmap_inode, 1706 struct inode *bitmap_inode,
1741 struct buffer_head *bitmap_bh, 1707 struct buffer_head *bitmap_bh,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 1a3c94cb925..f212dc01a84 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -86,20 +86,29 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
86 u32 *cluster_start, 86 u32 *cluster_start,
87 u32 *num_clusters); 87 u32 *num_clusters);
88 88
89int ocfs2_free_suballoc_bits(handle_t *handle,
90 struct inode *alloc_inode,
91 struct buffer_head *alloc_bh,
92 unsigned int start_bit,
93 u64 bg_blkno,
94 unsigned int count);
89int ocfs2_free_dinode(handle_t *handle, 95int ocfs2_free_dinode(handle_t *handle,
90 struct inode *inode_alloc_inode, 96 struct inode *inode_alloc_inode,
91 struct buffer_head *inode_alloc_bh, 97 struct buffer_head *inode_alloc_bh,
92 struct ocfs2_dinode *di); 98 struct ocfs2_dinode *di);
93int ocfs2_free_extent_block(handle_t *handle,
94 struct inode *eb_alloc_inode,
95 struct buffer_head *eb_alloc_bh,
96 struct ocfs2_extent_block *eb);
97int ocfs2_free_clusters(handle_t *handle, 99int ocfs2_free_clusters(handle_t *handle,
98 struct inode *bitmap_inode, 100 struct inode *bitmap_inode,
99 struct buffer_head *bitmap_bh, 101 struct buffer_head *bitmap_bh,
100 u64 start_blk, 102 u64 start_blk,
101 unsigned int num_clusters); 103 unsigned int num_clusters);
102 104
105static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
106{
107 u64 group = block - (u64) bit;
108
109 return group;
110}
111
103static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, 112static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb,
104 u64 bg_blkno) 113 u64 bg_blkno)
105{ 114{
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 86b559c7dce..3a5a1ed09ac 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -82,7 +82,8 @@ MODULE_AUTHOR("Oracle");
82MODULE_LICENSE("GPL"); 82MODULE_LICENSE("GPL");
83 83
84static int ocfs2_parse_options(struct super_block *sb, char *options, 84static int ocfs2_parse_options(struct super_block *sb, char *options,
85 unsigned long *mount_opt, int is_remount); 85 unsigned long *mount_opt, s16 *slot,
86 int is_remount);
86static void ocfs2_put_super(struct super_block *sb); 87static void ocfs2_put_super(struct super_block *sb);
87static int ocfs2_mount_volume(struct super_block *sb); 88static int ocfs2_mount_volume(struct super_block *sb);
88static int ocfs2_remount(struct super_block *sb, int *flags, char *data); 89static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
@@ -114,8 +115,6 @@ static void ocfs2_write_super(struct super_block *sb);
114static struct inode *ocfs2_alloc_inode(struct super_block *sb); 115static struct inode *ocfs2_alloc_inode(struct super_block *sb);
115static void ocfs2_destroy_inode(struct inode *inode); 116static void ocfs2_destroy_inode(struct inode *inode);
116 117
117static unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
118
119static const struct super_operations ocfs2_sops = { 118static const struct super_operations ocfs2_sops = {
120 .statfs = ocfs2_statfs, 119 .statfs = ocfs2_statfs,
121 .alloc_inode = ocfs2_alloc_inode, 120 .alloc_inode = ocfs2_alloc_inode,
@@ -140,6 +139,7 @@ enum {
140 Opt_data_ordered, 139 Opt_data_ordered,
141 Opt_data_writeback, 140 Opt_data_writeback,
142 Opt_atime_quantum, 141 Opt_atime_quantum,
142 Opt_slot,
143 Opt_err, 143 Opt_err,
144}; 144};
145 145
@@ -154,6 +154,7 @@ static match_table_t tokens = {
154 {Opt_data_ordered, "data=ordered"}, 154 {Opt_data_ordered, "data=ordered"},
155 {Opt_data_writeback, "data=writeback"}, 155 {Opt_data_writeback, "data=writeback"},
156 {Opt_atime_quantum, "atime_quantum=%u"}, 156 {Opt_atime_quantum, "atime_quantum=%u"},
157 {Opt_slot, "preferred_slot=%u"},
157 {Opt_err, NULL} 158 {Opt_err, NULL}
158}; 159};
159 160
@@ -318,7 +319,7 @@ static void ocfs2_destroy_inode(struct inode *inode)
318/* From xfs_super.c:xfs_max_file_offset 319/* From xfs_super.c:xfs_max_file_offset
319 * Copyright (c) 2000-2004 Silicon Graphics, Inc. 320 * Copyright (c) 2000-2004 Silicon Graphics, Inc.
320 */ 321 */
321static unsigned long long ocfs2_max_file_offset(unsigned int blockshift) 322unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
322{ 323{
323 unsigned int pagefactor = 1; 324 unsigned int pagefactor = 1;
324 unsigned int bitshift = BITS_PER_LONG - 1; 325 unsigned int bitshift = BITS_PER_LONG - 1;
@@ -355,9 +356,10 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
355 int incompat_features; 356 int incompat_features;
356 int ret = 0; 357 int ret = 0;
357 unsigned long parsed_options; 358 unsigned long parsed_options;
359 s16 slot;
358 struct ocfs2_super *osb = OCFS2_SB(sb); 360 struct ocfs2_super *osb = OCFS2_SB(sb);
359 361
360 if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { 362 if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) {
361 ret = -EINVAL; 363 ret = -EINVAL;
362 goto out; 364 goto out;
363 } 365 }
@@ -534,6 +536,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
534 struct dentry *root; 536 struct dentry *root;
535 int status, sector_size; 537 int status, sector_size;
536 unsigned long parsed_opt; 538 unsigned long parsed_opt;
539 s16 slot;
537 struct inode *inode = NULL; 540 struct inode *inode = NULL;
538 struct ocfs2_super *osb = NULL; 541 struct ocfs2_super *osb = NULL;
539 struct buffer_head *bh = NULL; 542 struct buffer_head *bh = NULL;
@@ -541,7 +544,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
541 544
542 mlog_entry("%p, %p, %i", sb, data, silent); 545 mlog_entry("%p, %p, %i", sb, data, silent);
543 546
544 if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) { 547 if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) {
545 status = -EINVAL; 548 status = -EINVAL;
546 goto read_super_error; 549 goto read_super_error;
547 } 550 }
@@ -571,6 +574,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
571 brelse(bh); 574 brelse(bh);
572 bh = NULL; 575 bh = NULL;
573 osb->s_mount_opt = parsed_opt; 576 osb->s_mount_opt = parsed_opt;
577 osb->preferred_slot = slot;
574 578
575 sb->s_magic = OCFS2_SUPER_MAGIC; 579 sb->s_magic = OCFS2_SUPER_MAGIC;
576 580
@@ -713,6 +717,7 @@ static struct file_system_type ocfs2_fs_type = {
713static int ocfs2_parse_options(struct super_block *sb, 717static int ocfs2_parse_options(struct super_block *sb,
714 char *options, 718 char *options,
715 unsigned long *mount_opt, 719 unsigned long *mount_opt,
720 s16 *slot,
716 int is_remount) 721 int is_remount)
717{ 722{
718 int status; 723 int status;
@@ -722,6 +727,7 @@ static int ocfs2_parse_options(struct super_block *sb,
722 options ? options : "(none)"); 727 options ? options : "(none)");
723 728
724 *mount_opt = 0; 729 *mount_opt = 0;
730 *slot = OCFS2_INVALID_SLOT;
725 731
726 if (!options) { 732 if (!options) {
727 status = 1; 733 status = 1;
@@ -782,6 +788,15 @@ static int ocfs2_parse_options(struct super_block *sb,
782 else 788 else
783 osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 789 osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
784 break; 790 break;
791 case Opt_slot:
792 option = 0;
793 if (match_int(&args[0], &option)) {
794 status = 0;
795 goto bail;
796 }
797 if (option)
798 *slot = (s16)option;
799 break;
785 default: 800 default:
786 mlog(ML_ERROR, 801 mlog(ML_ERROR,
787 "Unrecognized mount option \"%s\" " 802 "Unrecognized mount option \"%s\" "
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index 783f5270f2a..3b9cb3d0b00 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -45,4 +45,6 @@ void __ocfs2_abort(struct super_block *sb,
45 45
46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) 46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
47 47
48unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
49
48#endif /* OCFS2_SUPER_H */ 50#endif /* OCFS2_SUPER_H */
diff --git a/fs/open.c b/fs/open.c
index 0d515d16197..be6a457f422 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -855,7 +855,7 @@ EXPORT_SYMBOL(dentry_open);
855/* 855/*
856 * Find an empty file descriptor entry, and mark it busy. 856 * Find an empty file descriptor entry, and mark it busy.
857 */ 857 */
858int get_unused_fd(void) 858int get_unused_fd_flags(int flags)
859{ 859{
860 struct files_struct * files = current->files; 860 struct files_struct * files = current->files;
861 int fd, error; 861 int fd, error;
@@ -891,7 +891,10 @@ repeat:
891 } 891 }
892 892
893 FD_SET(fd, fdt->open_fds); 893 FD_SET(fd, fdt->open_fds);
894 FD_CLR(fd, fdt->close_on_exec); 894 if (flags & O_CLOEXEC)
895 FD_SET(fd, fdt->close_on_exec);
896 else
897 FD_CLR(fd, fdt->close_on_exec);
895 files->next_fd = fd + 1; 898 files->next_fd = fd + 1;
896#if 1 899#if 1
897 /* Sanity check */ 900 /* Sanity check */
@@ -907,6 +910,11 @@ out:
907 return error; 910 return error;
908} 911}
909 912
913int get_unused_fd(void)
914{
915 return get_unused_fd_flags(0);
916}
917
910EXPORT_SYMBOL(get_unused_fd); 918EXPORT_SYMBOL(get_unused_fd);
911 919
912static void __put_unused_fd(struct files_struct *files, unsigned int fd) 920static void __put_unused_fd(struct files_struct *files, unsigned int fd)
@@ -959,7 +967,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
959 int fd = PTR_ERR(tmp); 967 int fd = PTR_ERR(tmp);
960 968
961 if (!IS_ERR(tmp)) { 969 if (!IS_ERR(tmp)) {
962 fd = get_unused_fd(); 970 fd = get_unused_fd_flags(flags);
963 if (fd >= 0) { 971 if (fd >= 0) {
964 struct file *f = do_filp_open(dfd, tmp, flags, mode); 972 struct file *f = do_filp_open(dfd, tmp, flags, mode);
965 if (IS_ERR(f)) { 973 if (IS_ERR(f)) {
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c
index e3491328596..3d3e1663147 100644
--- a/fs/partitions/acorn.c
+++ b/fs/partitions/acorn.c
@@ -25,6 +25,8 @@
25#define PARTITION_RISCIX_SCSI 2 25#define PARTITION_RISCIX_SCSI 2
26#define PARTITION_LINUX 9 26#define PARTITION_LINUX 9
27 27
28#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
29 defined(CONFIG_ACORN_PARTITION_ADFS)
28static struct adfs_discrecord * 30static struct adfs_discrecord *
29adfs_partition(struct parsed_partitions *state, char *name, char *data, 31adfs_partition(struct parsed_partitions *state, char *name, char *data,
30 unsigned long first_sector, int slot) 32 unsigned long first_sector, int slot)
@@ -48,6 +50,7 @@ adfs_partition(struct parsed_partitions *state, char *name, char *data,
48 put_partition(state, slot, first_sector, nr_sects); 50 put_partition(state, slot, first_sector, nr_sects);
49 return dr; 51 return dr;
50} 52}
53#endif
51 54
52#ifdef CONFIG_ACORN_PARTITION_RISCIX 55#ifdef CONFIG_ACORN_PARTITION_RISCIX
53 56
@@ -65,6 +68,8 @@ struct riscix_record {
65 struct riscix_part part[8]; 68 struct riscix_part part[8];
66}; 69};
67 70
71#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
72 defined(CONFIG_ACORN_PARTITION_ADFS)
68static int 73static int
69riscix_partition(struct parsed_partitions *state, struct block_device *bdev, 74riscix_partition(struct parsed_partitions *state, struct block_device *bdev,
70 unsigned long first_sect, int slot, unsigned long nr_sects) 75 unsigned long first_sect, int slot, unsigned long nr_sects)
@@ -105,6 +110,7 @@ riscix_partition(struct parsed_partitions *state, struct block_device *bdev,
105 return slot; 110 return slot;
106} 111}
107#endif 112#endif
113#endif
108 114
109#define LINUX_NATIVE_MAGIC 0xdeafa1de 115#define LINUX_NATIVE_MAGIC 0xdeafa1de
110#define LINUX_SWAP_MAGIC 0xdeafab1e 116#define LINUX_SWAP_MAGIC 0xdeafab1e
@@ -115,6 +121,8 @@ struct linux_part {
115 __le32 nr_sects; 121 __le32 nr_sects;
116}; 122};
117 123
124#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
125 defined(CONFIG_ACORN_PARTITION_ADFS)
118static int 126static int
119linux_partition(struct parsed_partitions *state, struct block_device *bdev, 127linux_partition(struct parsed_partitions *state, struct block_device *bdev,
120 unsigned long first_sect, int slot, unsigned long nr_sects) 128 unsigned long first_sect, int slot, unsigned long nr_sects)
@@ -146,6 +154,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev,
146 put_dev_sector(sect); 154 put_dev_sector(sect);
147 return slot; 155 return slot;
148} 156}
157#endif
149 158
150#ifdef CONFIG_ACORN_PARTITION_CUMANA 159#ifdef CONFIG_ACORN_PARTITION_CUMANA
151int 160int
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9a3a058f355..98e0b85a9bb 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -397,7 +397,6 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
397 static struct attribute addpartattr = { 397 static struct attribute addpartattr = {
398 .name = "whole_disk", 398 .name = "whole_disk",
399 .mode = S_IRUSR | S_IRGRP | S_IROTH, 399 .mode = S_IRUSR | S_IRGRP | S_IROTH,
400 .owner = THIS_MODULE,
401 }; 400 };
402 401
403 sysfs_create_file(&p->kobj, &addpartattr); 402 sysfs_create_file(&p->kobj, &addpartattr);
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 99873a2b4cb..e7dd1d4e347 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -677,15 +677,24 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp,
677 * Return: -1 Error, the calculated offset exceeded the size of the buffer 677 * Return: -1 Error, the calculated offset exceeded the size of the buffer
678 * n OK, a range-checked offset into buffer 678 * n OK, a range-checked offset into buffer
679 */ 679 */
680static int ldm_relative (const u8 *buffer, int buflen, int base, int offset) 680static int ldm_relative(const u8 *buffer, int buflen, int base, int offset)
681{ 681{
682 682
683 base += offset; 683 base += offset;
684 if ((!buffer) || (offset < 0) || (base > buflen)) 684 if (!buffer || offset < 0 || base > buflen) {
685 if (!buffer)
686 ldm_error("!buffer");
687 if (offset < 0)
688 ldm_error("offset (%d) < 0", offset);
689 if (base > buflen)
690 ldm_error("base (%d) > buflen (%d)", base, buflen);
685 return -1; 691 return -1;
686 if ((base + buffer[base]) >= buflen) 692 }
693 if (base + buffer[base] >= buflen) {
694 ldm_error("base (%d) + buffer[base] (%d) >= buflen (%d)", base,
695 buffer[base], buflen);
687 return -1; 696 return -1;
688 697 }
689 return buffer[base] + offset + 1; 698 return buffer[base] + offset + 1;
690} 699}
691 700
@@ -1054,60 +1063,98 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
1054 * Return: 'true' @vb contains a Volume VBLK 1063 * Return: 'true' @vb contains a Volume VBLK
1055 * 'false' @vb contents are not defined 1064 * 'false' @vb contents are not defined
1056 */ 1065 */
1057static bool ldm_parse_vol5 (const u8 *buffer, int buflen, struct vblk *vb) 1066static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
1058{ 1067{
1059 int r_objid, r_name, r_vtype, r_child, r_size, r_id1, r_id2, r_size2; 1068 int r_objid, r_name, r_vtype, r_disable_drive_letter, r_child, r_size;
1060 int r_drive, len; 1069 int r_id1, r_id2, r_size2, r_drive, len;
1061 struct vblk_volu *volu; 1070 struct vblk_volu *volu;
1062 1071
1063 BUG_ON (!buffer || !vb); 1072 BUG_ON(!buffer || !vb);
1064 1073 r_objid = ldm_relative(buffer, buflen, 0x18, 0);
1065 r_objid = ldm_relative (buffer, buflen, 0x18, 0); 1074 if (r_objid < 0) {
1066 r_name = ldm_relative (buffer, buflen, 0x18, r_objid); 1075 ldm_error("r_objid %d < 0", r_objid);
1067 r_vtype = ldm_relative (buffer, buflen, 0x18, r_name); 1076 return false;
1068 r_child = ldm_relative (buffer, buflen, 0x2E, r_vtype); 1077 }
1069 r_size = ldm_relative (buffer, buflen, 0x3E, r_child); 1078 r_name = ldm_relative(buffer, buflen, 0x18, r_objid);
1070 1079 if (r_name < 0) {
1071 if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) 1080 ldm_error("r_name %d < 0", r_name);
1072 r_id1 = ldm_relative (buffer, buflen, 0x53, r_size); 1081 return false;
1073 else 1082 }
1083 r_vtype = ldm_relative(buffer, buflen, 0x18, r_name);
1084 if (r_vtype < 0) {
1085 ldm_error("r_vtype %d < 0", r_vtype);
1086 return false;
1087 }
1088 r_disable_drive_letter = ldm_relative(buffer, buflen, 0x18, r_vtype);
1089 if (r_disable_drive_letter < 0) {
1090 ldm_error("r_disable_drive_letter %d < 0",
1091 r_disable_drive_letter);
1092 return false;
1093 }
1094 r_child = ldm_relative(buffer, buflen, 0x2D, r_disable_drive_letter);
1095 if (r_child < 0) {
1096 ldm_error("r_child %d < 0", r_child);
1097 return false;
1098 }
1099 r_size = ldm_relative(buffer, buflen, 0x3D, r_child);
1100 if (r_size < 0) {
1101 ldm_error("r_size %d < 0", r_size);
1102 return false;
1103 }
1104 if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) {
1105 r_id1 = ldm_relative(buffer, buflen, 0x52, r_size);
1106 if (r_id1 < 0) {
1107 ldm_error("r_id1 %d < 0", r_id1);
1108 return false;
1109 }
1110 } else
1074 r_id1 = r_size; 1111 r_id1 = r_size;
1075 1112 if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) {
1076 if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) 1113 r_id2 = ldm_relative(buffer, buflen, 0x52, r_id1);
1077 r_id2 = ldm_relative (buffer, buflen, 0x53, r_id1); 1114 if (r_id2 < 0) {
1078 else 1115 ldm_error("r_id2 %d < 0", r_id2);
1116 return false;
1117 }
1118 } else
1079 r_id2 = r_id1; 1119 r_id2 = r_id1;
1080 1120 if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) {
1081 if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) 1121 r_size2 = ldm_relative(buffer, buflen, 0x52, r_id2);
1082 r_size2 = ldm_relative (buffer, buflen, 0x53, r_id2); 1122 if (r_size2 < 0) {
1083 else 1123 ldm_error("r_size2 %d < 0", r_size2);
1124 return false;
1125 }
1126 } else
1084 r_size2 = r_id2; 1127 r_size2 = r_id2;
1085 1128 if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) {
1086 if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) 1129 r_drive = ldm_relative(buffer, buflen, 0x52, r_size2);
1087 r_drive = ldm_relative (buffer, buflen, 0x53, r_size2); 1130 if (r_drive < 0) {
1088 else 1131 ldm_error("r_drive %d < 0", r_drive);
1132 return false;
1133 }
1134 } else
1089 r_drive = r_size2; 1135 r_drive = r_size2;
1090
1091 len = r_drive; 1136 len = r_drive;
1092 if (len < 0) 1137 if (len < 0) {
1138 ldm_error("len %d < 0", len);
1093 return false; 1139 return false;
1094 1140 }
1095 len += VBLK_SIZE_VOL5; 1141 len += VBLK_SIZE_VOL5;
1096 if (len != BE32 (buffer + 0x14)) 1142 if (len > BE32(buffer + 0x14)) {
1143 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1144 BE32(buffer + 0x14));
1097 return false; 1145 return false;
1098 1146 }
1099 volu = &vb->vblk.volu; 1147 volu = &vb->vblk.volu;
1100 1148 ldm_get_vstr(buffer + 0x18 + r_name, volu->volume_type,
1101 ldm_get_vstr (buffer + 0x18 + r_name, volu->volume_type, 1149 sizeof(volu->volume_type));
1102 sizeof (volu->volume_type)); 1150 memcpy(volu->volume_state, buffer + 0x18 + r_disable_drive_letter,
1103 memcpy (volu->volume_state, buffer + 0x19 + r_vtype, 1151 sizeof(volu->volume_state));
1104 sizeof (volu->volume_state)); 1152 volu->size = ldm_get_vnum(buffer + 0x3D + r_child);
1105 volu->size = ldm_get_vnum (buffer + 0x3E + r_child); 1153 volu->partition_type = buffer[0x41 + r_size];
1106 volu->partition_type = buffer[0x42 + r_size]; 1154 memcpy(volu->guid, buffer + 0x42 + r_size, sizeof(volu->guid));
1107 memcpy (volu->guid, buffer + 0x43 + r_size, sizeof (volu->guid));
1108 if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { 1155 if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) {
1109 ldm_get_vstr (buffer + 0x53 + r_size, volu->drive_hint, 1156 ldm_get_vstr(buffer + 0x52 + r_size, volu->drive_hint,
1110 sizeof (volu->drive_hint)); 1157 sizeof(volu->drive_hint));
1111 } 1158 }
1112 return true; 1159 return true;
1113} 1160}
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index d2e6a304693..80f63b5fdd9 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -68,7 +68,7 @@ struct parsed_partitions;
68#define VBLK_SIZE_DSK3 12 68#define VBLK_SIZE_DSK3 12
69#define VBLK_SIZE_DSK4 45 69#define VBLK_SIZE_DSK4 45
70#define VBLK_SIZE_PRT3 28 70#define VBLK_SIZE_PRT3 28
71#define VBLK_SIZE_VOL5 59 71#define VBLK_SIZE_VOL5 58
72 72
73/* component types */ 73/* component types */
74#define COMP_STRIPE 0x01 /* Stripe-set */ 74#define COMP_STRIPE 0x01 /* Stripe-set */
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 98e78e2f18d..965625a0977 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -62,6 +62,8 @@
62#include <linux/mman.h> 62#include <linux/mman.h>
63#include <linux/proc_fs.h> 63#include <linux/proc_fs.h>
64#include <linux/ioport.h> 64#include <linux/ioport.h>
65#include <linux/uaccess.h>
66#include <linux/io.h>
65#include <linux/mm.h> 67#include <linux/mm.h>
66#include <linux/hugetlb.h> 68#include <linux/hugetlb.h>
67#include <linux/pagemap.h> 69#include <linux/pagemap.h>
@@ -76,9 +78,7 @@
76#include <linux/rcupdate.h> 78#include <linux/rcupdate.h>
77#include <linux/delayacct.h> 79#include <linux/delayacct.h>
78 80
79#include <asm/uaccess.h>
80#include <asm/pgtable.h> 81#include <asm/pgtable.h>
81#include <asm/io.h>
82#include <asm/processor.h> 82#include <asm/processor.h>
83#include "internal.h" 83#include "internal.h"
84 84
@@ -87,10 +87,10 @@
87do { memcpy(buffer, string, strlen(string)); \ 87do { memcpy(buffer, string, strlen(string)); \
88 buffer += strlen(string); } while (0) 88 buffer += strlen(string); } while (0)
89 89
90static inline char * task_name(struct task_struct *p, char * buf) 90static inline char *task_name(struct task_struct *p, char *buf)
91{ 91{
92 int i; 92 int i;
93 char * name; 93 char *name;
94 char tcomm[sizeof(p->comm)]; 94 char tcomm[sizeof(p->comm)];
95 95
96 get_task_comm(tcomm, p); 96 get_task_comm(tcomm, p);
@@ -138,7 +138,7 @@ static const char *task_state_array[] = {
138 "X (dead)" /* 32 */ 138 "X (dead)" /* 32 */
139}; 139};
140 140
141static inline const char * get_task_state(struct task_struct *tsk) 141static inline const char *get_task_state(struct task_struct *tsk)
142{ 142{
143 unsigned int state = (tsk->state & (TASK_RUNNING | 143 unsigned int state = (tsk->state & (TASK_RUNNING |
144 TASK_INTERRUPTIBLE | 144 TASK_INTERRUPTIBLE |
@@ -156,7 +156,7 @@ static inline const char * get_task_state(struct task_struct *tsk)
156 return *p; 156 return *p;
157} 157}
158 158
159static inline char * task_state(struct task_struct *p, char *buffer) 159static inline char *task_state(struct task_struct *p, char *buffer)
160{ 160{
161 struct group_info *group_info; 161 struct group_info *group_info;
162 int g; 162 int g;
@@ -172,8 +172,8 @@ static inline char * task_state(struct task_struct *p, char *buffer)
172 "Uid:\t%d\t%d\t%d\t%d\n" 172 "Uid:\t%d\t%d\t%d\t%d\n"
173 "Gid:\t%d\t%d\t%d\t%d\n", 173 "Gid:\t%d\t%d\t%d\t%d\n",
174 get_task_state(p), 174 get_task_state(p),
175 p->tgid, p->pid, 175 p->tgid, p->pid,
176 pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, 176 pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
177 pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, 177 pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
178 p->uid, p->euid, p->suid, p->fsuid, 178 p->uid, p->euid, p->suid, p->fsuid,
179 p->gid, p->egid, p->sgid, p->fsgid); 179 p->gid, p->egid, p->sgid, p->fsgid);
@@ -191,15 +191,15 @@ static inline char * task_state(struct task_struct *p, char *buffer)
191 get_group_info(group_info); 191 get_group_info(group_info);
192 task_unlock(p); 192 task_unlock(p);
193 193
194 for (g = 0; g < min(group_info->ngroups,NGROUPS_SMALL); g++) 194 for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
195 buffer += sprintf(buffer, "%d ", GROUP_AT(group_info,g)); 195 buffer += sprintf(buffer, "%d ", GROUP_AT(group_info, g));
196 put_group_info(group_info); 196 put_group_info(group_info);
197 197
198 buffer += sprintf(buffer, "\n"); 198 buffer += sprintf(buffer, "\n");
199 return buffer; 199 return buffer;
200} 200}
201 201
202static char * render_sigset_t(const char *header, sigset_t *set, char *buffer) 202static char *render_sigset_t(const char *header, sigset_t *set, char *buffer)
203{ 203{
204 int i, len; 204 int i, len;
205 205
@@ -239,7 +239,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
239 } 239 }
240} 240}
241 241
242static inline char * task_sig(struct task_struct *p, char *buffer) 242static inline char *task_sig(struct task_struct *p, char *buffer)
243{ 243{
244 unsigned long flags; 244 unsigned long flags;
245 sigset_t pending, shpending, blocked, ignored, caught; 245 sigset_t pending, shpending, blocked, ignored, caught;
@@ -289,14 +289,23 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
289 cap_t(p->cap_effective)); 289 cap_t(p->cap_effective));
290} 290}
291 291
292int proc_pid_status(struct task_struct *task, char * buffer) 292static inline char *task_context_switch_counts(struct task_struct *p,
293 char *buffer)
293{ 294{
294 char * orig = buffer; 295 return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
296 "nonvoluntary_ctxt_switches:\t%lu\n",
297 p->nvcsw,
298 p->nivcsw);
299}
300
301int proc_pid_status(struct task_struct *task, char *buffer)
302{
303 char *orig = buffer;
295 struct mm_struct *mm = get_task_mm(task); 304 struct mm_struct *mm = get_task_mm(task);
296 305
297 buffer = task_name(task, buffer); 306 buffer = task_name(task, buffer);
298 buffer = task_state(task, buffer); 307 buffer = task_state(task, buffer);
299 308
300 if (mm) { 309 if (mm) {
301 buffer = task_mem(mm, buffer); 310 buffer = task_mem(mm, buffer);
302 mmput(mm); 311 mmput(mm);
@@ -307,6 +316,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
307#if defined(CONFIG_S390) 316#if defined(CONFIG_S390)
308 buffer = task_show_regs(task, buffer); 317 buffer = task_show_regs(task, buffer);
309#endif 318#endif
319 buffer = task_context_switch_counts(task, buffer);
310 return buffer - orig; 320 return buffer - orig;
311} 321}
312 322
@@ -332,7 +342,7 @@ static clock_t task_utime(struct task_struct *p)
332 342
333static clock_t task_stime(struct task_struct *p) 343static clock_t task_stime(struct task_struct *p)
334{ 344{
335 clock_t stime = cputime_to_clock_t(p->stime); 345 clock_t stime;
336 346
337 /* 347 /*
338 * Use CFS's precise accounting. (we subtract utime from 348 * Use CFS's precise accounting. (we subtract utime from
@@ -344,8 +354,7 @@ static clock_t task_stime(struct task_struct *p)
344 return stime; 354 return stime;
345} 355}
346 356
347 357static int do_task_stat(struct task_struct *task, char *buffer, int whole)
348static int do_task_stat(struct task_struct *task, char * buffer, int whole)
349{ 358{
350 unsigned long vsize, eip, esp, wchan = ~0UL; 359 unsigned long vsize, eip, esp, wchan = ~0UL;
351 long priority, nice; 360 long priority, nice;
@@ -353,7 +362,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
353 sigset_t sigign, sigcatch; 362 sigset_t sigign, sigcatch;
354 char state; 363 char state;
355 int res; 364 int res;
356 pid_t ppid = 0, pgid = -1, sid = -1; 365 pid_t ppid = 0, pgid = -1, sid = -1;
357 int num_threads = 0; 366 int num_threads = 0;
358 struct mm_struct *mm; 367 struct mm_struct *mm;
359 unsigned long long start_time; 368 unsigned long long start_time;
@@ -424,7 +433,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
424 } 433 }
425 rcu_read_unlock(); 434 rcu_read_unlock();
426 435
427 if (!whole || num_threads<2) 436 if (!whole || num_threads < 2)
428 wchan = get_wchan(task); 437 wchan = get_wchan(task);
429 if (!whole) { 438 if (!whole) {
430 min_flt = task->min_flt; 439 min_flt = task->min_flt;
@@ -440,12 +449,13 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
440 449
441 /* Temporary variable needed for gcc-2.96 */ 450 /* Temporary variable needed for gcc-2.96 */
442 /* convert timespec -> nsec*/ 451 /* convert timespec -> nsec*/
443 start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC 452 start_time =
444 + task->start_time.tv_nsec; 453 (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
454 + task->real_start_time.tv_nsec;
445 /* convert nsec -> ticks */ 455 /* convert nsec -> ticks */
446 start_time = nsec_to_clock_t(start_time); 456 start_time = nsec_to_clock_t(start_time);
447 457
448 res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %u %lu \ 458 res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \
449%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ 459%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
450%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n", 460%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n",
451 task->pid, 461 task->pid,
@@ -471,7 +481,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
471 start_time, 481 start_time,
472 vsize, 482 vsize,
473 mm ? get_mm_rss(mm) : 0, 483 mm ? get_mm_rss(mm) : 0,
474 rsslim, 484 rsslim,
475 mm ? mm->start_code : 0, 485 mm ? mm->start_code : 0,
476 mm ? mm->end_code : 0, 486 mm ? mm->end_code : 0,
477 mm ? mm->start_stack : 0, 487 mm ? mm->start_stack : 0,
@@ -493,17 +503,17 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
493 task->rt_priority, 503 task->rt_priority,
494 task->policy, 504 task->policy,
495 (unsigned long long)delayacct_blkio_ticks(task)); 505 (unsigned long long)delayacct_blkio_ticks(task));
496 if(mm) 506 if (mm)
497 mmput(mm); 507 mmput(mm);
498 return res; 508 return res;
499} 509}
500 510
501int proc_tid_stat(struct task_struct *task, char * buffer) 511int proc_tid_stat(struct task_struct *task, char *buffer)
502{ 512{
503 return do_task_stat(task, buffer, 0); 513 return do_task_stat(task, buffer, 0);
504} 514}
505 515
506int proc_tgid_stat(struct task_struct *task, char * buffer) 516int proc_tgid_stat(struct task_struct *task, char *buffer)
507{ 517{
508 return do_task_stat(task, buffer, 1); 518 return do_task_stat(task, buffer, 1);
509} 519}
@@ -512,12 +522,12 @@ int proc_pid_statm(struct task_struct *task, char *buffer)
512{ 522{
513 int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; 523 int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0;
514 struct mm_struct *mm = get_task_mm(task); 524 struct mm_struct *mm = get_task_mm(task);
515 525
516 if (mm) { 526 if (mm) {
517 size = task_statm(mm, &shared, &text, &data, &resident); 527 size = task_statm(mm, &shared, &text, &data, &resident);
518 mmput(mm); 528 mmput(mm);
519 } 529 }
520 530
521 return sprintf(buffer,"%d %d %d %d %d %d %d\n", 531 return sprintf(buffer, "%d %d %d %d %d %d %d\n",
522 size, resident, shared, text, lib, data, 0); 532 size, resident, shared, text, lib, data, 0);
523} 533}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 46ea5d56e1b..ae3627337a9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -67,7 +67,6 @@
67#include <linux/mount.h> 67#include <linux/mount.h>
68#include <linux/security.h> 68#include <linux/security.h>
69#include <linux/ptrace.h> 69#include <linux/ptrace.h>
70#include <linux/seccomp.h>
71#include <linux/cpuset.h> 70#include <linux/cpuset.h>
72#include <linux/audit.h> 71#include <linux/audit.h>
73#include <linux/poll.h> 72#include <linux/poll.h>
@@ -204,12 +203,17 @@ static int proc_pid_environ(struct task_struct *task, char * buffer)
204 int res = 0; 203 int res = 0;
205 struct mm_struct *mm = get_task_mm(task); 204 struct mm_struct *mm = get_task_mm(task);
206 if (mm) { 205 if (mm) {
207 unsigned int len = mm->env_end - mm->env_start; 206 unsigned int len;
207
208 res = -ESRCH;
209 if (!ptrace_may_attach(task))
210 goto out;
211
212 len = mm->env_end - mm->env_start;
208 if (len > PAGE_SIZE) 213 if (len > PAGE_SIZE)
209 len = PAGE_SIZE; 214 len = PAGE_SIZE;
210 res = access_process_vm(task, mm->env_start, buffer, len, 0); 215 res = access_process_vm(task, mm->env_start, buffer, len, 0);
211 if (!ptrace_may_attach(task)) 216out:
212 res = -ESRCH;
213 mmput(mm); 217 mmput(mm);
214 } 218 }
215 return res; 219 return res;
@@ -812,71 +816,6 @@ static const struct file_operations proc_loginuid_operations = {
812}; 816};
813#endif 817#endif
814 818
815#ifdef CONFIG_SECCOMP
816static ssize_t seccomp_read(struct file *file, char __user *buf,
817 size_t count, loff_t *ppos)
818{
819 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
820 char __buf[20];
821 size_t len;
822
823 if (!tsk)
824 return -ESRCH;
825 /* no need to print the trailing zero, so use only len */
826 len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
827 put_task_struct(tsk);
828
829 return simple_read_from_buffer(buf, count, ppos, __buf, len);
830}
831
832static ssize_t seccomp_write(struct file *file, const char __user *buf,
833 size_t count, loff_t *ppos)
834{
835 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
836 char __buf[20], *end;
837 unsigned int seccomp_mode;
838 ssize_t result;
839
840 result = -ESRCH;
841 if (!tsk)
842 goto out_no_task;
843
844 /* can set it only once to be even more secure */
845 result = -EPERM;
846 if (unlikely(tsk->seccomp.mode))
847 goto out;
848
849 result = -EFAULT;
850 memset(__buf, 0, sizeof(__buf));
851 count = min(count, sizeof(__buf) - 1);
852 if (copy_from_user(__buf, buf, count))
853 goto out;
854
855 seccomp_mode = simple_strtoul(__buf, &end, 0);
856 if (*end == '\n')
857 end++;
858 result = -EINVAL;
859 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
860 tsk->seccomp.mode = seccomp_mode;
861 set_tsk_thread_flag(tsk, TIF_SECCOMP);
862 } else
863 goto out;
864 result = -EIO;
865 if (unlikely(!(end - __buf)))
866 goto out;
867 result = end - __buf;
868out:
869 put_task_struct(tsk);
870out_no_task:
871 return result;
872}
873
874static const struct file_operations proc_seccomp_operations = {
875 .read = seccomp_read,
876 .write = seccomp_write,
877};
878#endif /* CONFIG_SECCOMP */
879
880#ifdef CONFIG_FAULT_INJECTION 819#ifdef CONFIG_FAULT_INJECTION
881static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 820static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
882 size_t count, loff_t *ppos) 821 size_t count, loff_t *ppos)
@@ -2037,9 +1976,6 @@ static const struct pid_entry tgid_base_stuff[] = {
2037 REG("numa_maps", S_IRUGO, numa_maps), 1976 REG("numa_maps", S_IRUGO, numa_maps),
2038#endif 1977#endif
2039 REG("mem", S_IRUSR|S_IWUSR, mem), 1978 REG("mem", S_IRUSR|S_IWUSR, mem),
2040#ifdef CONFIG_SECCOMP
2041 REG("seccomp", S_IRUSR|S_IWUSR, seccomp),
2042#endif
2043 LNK("cwd", cwd), 1979 LNK("cwd", cwd),
2044 LNK("root", root), 1980 LNK("root", root),
2045 LNK("exe", exe), 1981 LNK("exe", exe),
@@ -2324,9 +2260,6 @@ static const struct pid_entry tid_base_stuff[] = {
2324 REG("numa_maps", S_IRUGO, numa_maps), 2260 REG("numa_maps", S_IRUGO, numa_maps),
2325#endif 2261#endif
2326 REG("mem", S_IRUSR|S_IWUSR, mem), 2262 REG("mem", S_IRUSR|S_IWUSR, mem),
2327#ifdef CONFIG_SECCOMP
2328 REG("seccomp", S_IRUSR|S_IWUSR, seccomp),
2329#endif
2330 LNK("cwd", cwd), 2263 LNK("cwd", cwd),
2331 LNK("root", root), 2264 LNK("root", root),
2332 LNK("exe", exe), 2265 LNK("exe", exe),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 8a40e15f5ec..b5e7155d30d 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -20,6 +20,7 @@
20#include <linux/namei.h> 20#include <linux/namei.h>
21#include <linux/bitops.h> 21#include <linux/bitops.h>
22#include <linux/spinlock.h> 22#include <linux/spinlock.h>
23#include <linux/completion.h>
23#include <asm/uaccess.h> 24#include <asm/uaccess.h>
24 25
25#include "internal.h" 26#include "internal.h"
@@ -529,12 +530,6 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
529 return -EAGAIN; 530 return -EAGAIN;
530 dp->low_ino = i; 531 dp->low_ino = i;
531 532
532 spin_lock(&proc_subdir_lock);
533 dp->next = dir->subdir;
534 dp->parent = dir;
535 dir->subdir = dp;
536 spin_unlock(&proc_subdir_lock);
537
538 if (S_ISDIR(dp->mode)) { 533 if (S_ISDIR(dp->mode)) {
539 if (dp->proc_iops == NULL) { 534 if (dp->proc_iops == NULL) {
540 dp->proc_fops = &proc_dir_operations; 535 dp->proc_fops = &proc_dir_operations;
@@ -550,6 +545,13 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
550 if (dp->proc_iops == NULL) 545 if (dp->proc_iops == NULL)
551 dp->proc_iops = &proc_file_inode_operations; 546 dp->proc_iops = &proc_file_inode_operations;
552 } 547 }
548
549 spin_lock(&proc_subdir_lock);
550 dp->next = dir->subdir;
551 dp->parent = dir;
552 dir->subdir = dp;
553 spin_unlock(&proc_subdir_lock);
554
553 return 0; 555 return 0;
554} 556}
555 557
@@ -613,6 +615,9 @@ static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent,
613 ent->namelen = len; 615 ent->namelen = len;
614 ent->mode = mode; 616 ent->mode = mode;
615 ent->nlink = nlink; 617 ent->nlink = nlink;
618 ent->pde_users = 0;
619 spin_lock_init(&ent->pde_unload_lock);
620 ent->pde_unload_completion = NULL;
616 out: 621 out:
617 return ent; 622 return ent;
618} 623}
@@ -649,9 +654,6 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
649 654
650 ent = proc_create(&parent, name, S_IFDIR | mode, 2); 655 ent = proc_create(&parent, name, S_IFDIR | mode, 2);
651 if (ent) { 656 if (ent) {
652 ent->proc_fops = &proc_dir_operations;
653 ent->proc_iops = &proc_dir_inode_operations;
654
655 if (proc_register(parent, ent) < 0) { 657 if (proc_register(parent, ent) < 0) {
656 kfree(ent); 658 kfree(ent);
657 ent = NULL; 659 ent = NULL;
@@ -686,10 +688,6 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
686 688
687 ent = proc_create(&parent,name,mode,nlink); 689 ent = proc_create(&parent,name,mode,nlink);
688 if (ent) { 690 if (ent) {
689 if (S_ISDIR(mode)) {
690 ent->proc_fops = &proc_dir_operations;
691 ent->proc_iops = &proc_dir_inode_operations;
692 }
693 if (proc_register(parent, ent) < 0) { 691 if (proc_register(parent, ent) < 0) {
694 kfree(ent); 692 kfree(ent);
695 ent = NULL; 693 ent = NULL;
@@ -734,9 +732,35 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
734 de = *p; 732 de = *p;
735 *p = de->next; 733 *p = de->next;
736 de->next = NULL; 734 de->next = NULL;
735
736 spin_lock(&de->pde_unload_lock);
737 /*
738 * Stop accepting new callers into module. If you're
739 * dynamically allocating ->proc_fops, save a pointer somewhere.
740 */
741 de->proc_fops = NULL;
742 /* Wait until all existing callers into module are done. */
743 if (de->pde_users > 0) {
744 DECLARE_COMPLETION_ONSTACK(c);
745
746 if (!de->pde_unload_completion)
747 de->pde_unload_completion = &c;
748
749 spin_unlock(&de->pde_unload_lock);
750 spin_unlock(&proc_subdir_lock);
751
752 wait_for_completion(de->pde_unload_completion);
753
754 spin_lock(&proc_subdir_lock);
755 goto continue_removing;
756 }
757 spin_unlock(&de->pde_unload_lock);
758
759continue_removing:
737 if (S_ISDIR(de->mode)) 760 if (S_ISDIR(de->mode))
738 parent->nlink--; 761 parent->nlink--;
739 proc_kill_inodes(de); 762 if (!S_ISREG(de->mode))
763 proc_kill_inodes(de);
740 de->nlink = 0; 764 de->nlink = 0;
741 WARN_ON(de->subdir); 765 WARN_ON(de->subdir);
742 if (!atomic_read(&de->count)) 766 if (!atomic_read(&de->count))
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d5ce65c68d7..dd28e86ab42 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -10,6 +10,7 @@
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/stat.h> 12#include <linux/stat.h>
13#include <linux/completion.h>
13#include <linux/file.h> 14#include <linux/file.h>
14#include <linux/limits.h> 15#include <linux/limits.h>
15#include <linux/init.h> 16#include <linux/init.h>
@@ -140,6 +141,251 @@ static const struct super_operations proc_sops = {
140 .remount_fs = proc_remount, 141 .remount_fs = proc_remount,
141}; 142};
142 143
144static void pde_users_dec(struct proc_dir_entry *pde)
145{
146 spin_lock(&pde->pde_unload_lock);
147 pde->pde_users--;
148 if (pde->pde_unload_completion && pde->pde_users == 0)
149 complete(pde->pde_unload_completion);
150 spin_unlock(&pde->pde_unload_lock);
151}
152
153static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
154{
155 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
156 loff_t rv = -EINVAL;
157 loff_t (*llseek)(struct file *, loff_t, int);
158
159 spin_lock(&pde->pde_unload_lock);
160 /*
161 * remove_proc_entry() is going to delete PDE (as part of module
162 * cleanup sequence). No new callers into module allowed.
163 */
164 if (!pde->proc_fops) {
165 spin_unlock(&pde->pde_unload_lock);
166 return rv;
167 }
168 /*
169 * Bump refcount so that remove_proc_entry will wail for ->llseek to
170 * complete.
171 */
172 pde->pde_users++;
173 /*
174 * Save function pointer under lock, to protect against ->proc_fops
175 * NULL'ifying right after ->pde_unload_lock is dropped.
176 */
177 llseek = pde->proc_fops->llseek;
178 spin_unlock(&pde->pde_unload_lock);
179
180 if (!llseek)
181 llseek = default_llseek;
182 rv = llseek(file, offset, whence);
183
184 pde_users_dec(pde);
185 return rv;
186}
187
188static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
189{
190 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
191 ssize_t rv = -EIO;
192 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
193
194 spin_lock(&pde->pde_unload_lock);
195 if (!pde->proc_fops) {
196 spin_unlock(&pde->pde_unload_lock);
197 return rv;
198 }
199 pde->pde_users++;
200 read = pde->proc_fops->read;
201 spin_unlock(&pde->pde_unload_lock);
202
203 if (read)
204 rv = read(file, buf, count, ppos);
205
206 pde_users_dec(pde);
207 return rv;
208}
209
210static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
211{
212 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
213 ssize_t rv = -EIO;
214 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
215
216 spin_lock(&pde->pde_unload_lock);
217 if (!pde->proc_fops) {
218 spin_unlock(&pde->pde_unload_lock);
219 return rv;
220 }
221 pde->pde_users++;
222 write = pde->proc_fops->write;
223 spin_unlock(&pde->pde_unload_lock);
224
225 if (write)
226 rv = write(file, buf, count, ppos);
227
228 pde_users_dec(pde);
229 return rv;
230}
231
232static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
233{
234 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
235 unsigned int rv = 0;
236 unsigned int (*poll)(struct file *, struct poll_table_struct *);
237
238 spin_lock(&pde->pde_unload_lock);
239 if (!pde->proc_fops) {
240 spin_unlock(&pde->pde_unload_lock);
241 return rv;
242 }
243 pde->pde_users++;
244 poll = pde->proc_fops->poll;
245 spin_unlock(&pde->pde_unload_lock);
246
247 if (poll)
248 rv = poll(file, pts);
249
250 pde_users_dec(pde);
251 return rv;
252}
253
254static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
255{
256 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
257 long rv = -ENOTTY;
258 long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long);
259 int (*ioctl)(struct inode *, struct file *, unsigned int, unsigned long);
260
261 spin_lock(&pde->pde_unload_lock);
262 if (!pde->proc_fops) {
263 spin_unlock(&pde->pde_unload_lock);
264 return rv;
265 }
266 pde->pde_users++;
267 unlocked_ioctl = pde->proc_fops->unlocked_ioctl;
268 ioctl = pde->proc_fops->ioctl;
269 spin_unlock(&pde->pde_unload_lock);
270
271 if (unlocked_ioctl) {
272 rv = unlocked_ioctl(file, cmd, arg);
273 if (rv == -ENOIOCTLCMD)
274 rv = -EINVAL;
275 } else if (ioctl) {
276 lock_kernel();
277 rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
278 unlock_kernel();
279 }
280
281 pde_users_dec(pde);
282 return rv;
283}
284
285#ifdef CONFIG_COMPAT
286static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
287{
288 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
289 long rv = -ENOTTY;
290 long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
291
292 spin_lock(&pde->pde_unload_lock);
293 if (!pde->proc_fops) {
294 spin_unlock(&pde->pde_unload_lock);
295 return rv;
296 }
297 pde->pde_users++;
298 compat_ioctl = pde->proc_fops->compat_ioctl;
299 spin_unlock(&pde->pde_unload_lock);
300
301 if (compat_ioctl)
302 rv = compat_ioctl(file, cmd, arg);
303
304 pde_users_dec(pde);
305 return rv;
306}
307#endif
308
309static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
310{
311 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
312 int rv = -EIO;
313 int (*mmap)(struct file *, struct vm_area_struct *);
314
315 spin_lock(&pde->pde_unload_lock);
316 if (!pde->proc_fops) {
317 spin_unlock(&pde->pde_unload_lock);
318 return rv;
319 }
320 pde->pde_users++;
321 mmap = pde->proc_fops->mmap;
322 spin_unlock(&pde->pde_unload_lock);
323
324 if (mmap)
325 rv = mmap(file, vma);
326
327 pde_users_dec(pde);
328 return rv;
329}
330
331static int proc_reg_open(struct inode *inode, struct file *file)
332{
333 struct proc_dir_entry *pde = PDE(inode);
334 int rv = 0;
335 int (*open)(struct inode *, struct file *);
336
337 spin_lock(&pde->pde_unload_lock);
338 if (!pde->proc_fops) {
339 spin_unlock(&pde->pde_unload_lock);
340 return rv;
341 }
342 pde->pde_users++;
343 open = pde->proc_fops->open;
344 spin_unlock(&pde->pde_unload_lock);
345
346 if (open)
347 rv = open(inode, file);
348
349 pde_users_dec(pde);
350 return rv;
351}
352
353static int proc_reg_release(struct inode *inode, struct file *file)
354{
355 struct proc_dir_entry *pde = PDE(inode);
356 int rv = 0;
357 int (*release)(struct inode *, struct file *);
358
359 spin_lock(&pde->pde_unload_lock);
360 if (!pde->proc_fops) {
361 spin_unlock(&pde->pde_unload_lock);
362 return rv;
363 }
364 pde->pde_users++;
365 release = pde->proc_fops->release;
366 spin_unlock(&pde->pde_unload_lock);
367
368 if (release)
369 rv = release(inode, file);
370
371 pde_users_dec(pde);
372 return rv;
373}
374
375static const struct file_operations proc_reg_file_ops = {
376 .llseek = proc_reg_llseek,
377 .read = proc_reg_read,
378 .write = proc_reg_write,
379 .poll = proc_reg_poll,
380 .unlocked_ioctl = proc_reg_unlocked_ioctl,
381#ifdef CONFIG_COMPAT
382 .compat_ioctl = proc_reg_compat_ioctl,
383#endif
384 .mmap = proc_reg_mmap,
385 .open = proc_reg_open,
386 .release = proc_reg_release,
387};
388
143struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, 389struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
144 struct proc_dir_entry *de) 390 struct proc_dir_entry *de)
145{ 391{
@@ -166,8 +412,12 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
166 inode->i_nlink = de->nlink; 412 inode->i_nlink = de->nlink;
167 if (de->proc_iops) 413 if (de->proc_iops)
168 inode->i_op = de->proc_iops; 414 inode->i_op = de->proc_iops;
169 if (de->proc_fops) 415 if (de->proc_fops) {
170 inode->i_fop = de->proc_fops; 416 if (S_ISREG(inode->i_mode))
417 inode->i_fop = &proc_reg_file_ops;
418 else
419 inode->i_fop = de->proc_fops;
420 }
171 } 421 }
172 422
173 return inode; 423 return inode;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5fd49e47f83..d24b8d46059 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -105,6 +105,7 @@ static int uptime_read_proc(char *page, char **start, off_t off,
105 cputime_t idletime = cputime_add(init_task.utime, init_task.stime); 105 cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
106 106
107 do_posix_clock_monotonic_gettime(&uptime); 107 do_posix_clock_monotonic_gettime(&uptime);
108 monotonic_to_bootbased(&uptime);
108 cputime_to_timespec(idletime, &idle); 109 cputime_to_timespec(idletime, &idle);
109 len = sprintf(page,"%lu.%02lu %lu.%02lu\n", 110 len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
110 (unsigned long) uptime.tv_sec, 111 (unsigned long) uptime.tv_sec,
@@ -443,12 +444,12 @@ static int show_stat(struct seq_file *p, void *v)
443 unsigned long jif; 444 unsigned long jif;
444 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; 445 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
445 u64 sum = 0; 446 u64 sum = 0;
447 struct timespec boottime;
446 448
447 user = nice = system = idle = iowait = 449 user = nice = system = idle = iowait =
448 irq = softirq = steal = cputime64_zero; 450 irq = softirq = steal = cputime64_zero;
449 jif = - wall_to_monotonic.tv_sec; 451 getboottime(&boottime);
450 if (wall_to_monotonic.tv_nsec) 452 jif = boottime.tv_sec;
451 --jif;
452 453
453 for_each_possible_cpu(i) { 454 for_each_possible_cpu(i) {
454 int j; 455 int j;
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index b3a473b0a19..22846225acf 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -69,7 +69,7 @@ static void show_tty_range(struct seq_file *m, struct tty_driver *p,
69 69
70static int show_tty_driver(struct seq_file *m, void *v) 70static int show_tty_driver(struct seq_file *m, void *v)
71{ 71{
72 struct tty_driver *p = v; 72 struct tty_driver *p = list_entry(v, struct tty_driver, tty_drivers);
73 dev_t from = MKDEV(p->major, p->minor_start); 73 dev_t from = MKDEV(p->major, p->minor_start);
74 dev_t to = from + p->num; 74 dev_t to = from + p->num;
75 75
@@ -106,22 +106,13 @@ static int show_tty_driver(struct seq_file *m, void *v)
106/* iterator */ 106/* iterator */
107static void *t_start(struct seq_file *m, loff_t *pos) 107static void *t_start(struct seq_file *m, loff_t *pos)
108{ 108{
109 struct list_head *p;
110 loff_t l = *pos;
111
112 mutex_lock(&tty_mutex); 109 mutex_lock(&tty_mutex);
113 list_for_each(p, &tty_drivers) 110 return seq_list_start(&tty_drivers, *pos);
114 if (!l--)
115 return list_entry(p, struct tty_driver, tty_drivers);
116 return NULL;
117} 111}
118 112
119static void *t_next(struct seq_file *m, void *v, loff_t *pos) 113static void *t_next(struct seq_file *m, void *v, loff_t *pos)
120{ 114{
121 struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next; 115 return seq_list_next(v, &tty_drivers, pos);
122 (*pos)++;
123 return p==&tty_drivers ? NULL :
124 list_entry(p, struct tty_driver, tty_drivers);
125} 116}
126 117
127static void t_stop(struct seq_file *m, void *v) 118static void t_stop(struct seq_file *m, void *v)
diff --git a/fs/quota.c b/fs/quota.c
index 9f237d6182c..e6577ac15a6 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -10,12 +10,14 @@
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <asm/current.h> 11#include <asm/current.h>
12#include <asm/uaccess.h> 12#include <asm/uaccess.h>
13#include <linux/compat.h>
13#include <linux/kernel.h> 14#include <linux/kernel.h>
14#include <linux/security.h> 15#include <linux/security.h>
15#include <linux/syscalls.h> 16#include <linux/syscalls.h>
16#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
17#include <linux/capability.h> 18#include <linux/capability.h>
18#include <linux/quotaops.h> 19#include <linux/quotaops.h>
20#include <linux/types.h>
19 21
20/* Check validity of generic quotactl commands */ 22/* Check validity of generic quotactl commands */
21static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) 23static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
@@ -384,3 +386,119 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t
384 386
385 return ret; 387 return ret;
386} 388}
389
390#if defined(CONFIG_X86_64) || defined(CONFIG_IA64)
391/*
392 * This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64)
393 * and is necessary due to alignment problems.
394 */
395struct compat_if_dqblk {
396 compat_u64 dqb_bhardlimit;
397 compat_u64 dqb_bsoftlimit;
398 compat_u64 dqb_curspace;
399 compat_u64 dqb_ihardlimit;
400 compat_u64 dqb_isoftlimit;
401 compat_u64 dqb_curinodes;
402 compat_u64 dqb_btime;
403 compat_u64 dqb_itime;
404 compat_uint_t dqb_valid;
405};
406
407/* XFS structures */
408struct compat_fs_qfilestat {
409 compat_u64 dqb_bhardlimit;
410 compat_u64 qfs_nblks;
411 compat_uint_t qfs_nextents;
412};
413
414struct compat_fs_quota_stat {
415 __s8 qs_version;
416 __u16 qs_flags;
417 __s8 qs_pad;
418 struct compat_fs_qfilestat qs_uquota;
419 struct compat_fs_qfilestat qs_gquota;
420 compat_uint_t qs_incoredqs;
421 compat_int_t qs_btimelimit;
422 compat_int_t qs_itimelimit;
423 compat_int_t qs_rtbtimelimit;
424 __u16 qs_bwarnlimit;
425 __u16 qs_iwarnlimit;
426};
427
428asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
429 qid_t id, void __user *addr)
430{
431 unsigned int cmds;
432 struct if_dqblk __user *dqblk;
433 struct compat_if_dqblk __user *compat_dqblk;
434 struct fs_quota_stat __user *fsqstat;
435 struct compat_fs_quota_stat __user *compat_fsqstat;
436 compat_uint_t data;
437 u16 xdata;
438 long ret;
439
440 cmds = cmd >> SUBCMDSHIFT;
441
442 switch (cmds) {
443 case Q_GETQUOTA:
444 dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
445 compat_dqblk = addr;
446 ret = sys_quotactl(cmd, special, id, dqblk);
447 if (ret)
448 break;
449 if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) ||
450 get_user(data, &dqblk->dqb_valid) ||
451 put_user(data, &compat_dqblk->dqb_valid))
452 ret = -EFAULT;
453 break;
454 case Q_SETQUOTA:
455 dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
456 compat_dqblk = addr;
457 ret = -EFAULT;
458 if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) ||
459 get_user(data, &compat_dqblk->dqb_valid) ||
460 put_user(data, &dqblk->dqb_valid))
461 break;
462 ret = sys_quotactl(cmd, special, id, dqblk);
463 break;
464 case Q_XGETQSTAT:
465 fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat));
466 compat_fsqstat = addr;
467 ret = sys_quotactl(cmd, special, id, fsqstat);
468 if (ret)
469 break;
470 ret = -EFAULT;
471 /* Copying qs_version, qs_flags, qs_pad */
472 if (copy_in_user(compat_fsqstat, fsqstat,
473 offsetof(struct compat_fs_quota_stat, qs_uquota)))
474 break;
475 /* Copying qs_uquota */
476 if (copy_in_user(&compat_fsqstat->qs_uquota,
477 &fsqstat->qs_uquota,
478 sizeof(compat_fsqstat->qs_uquota)) ||
479 get_user(data, &fsqstat->qs_uquota.qfs_nextents) ||
480 put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents))
481 break;
482 /* Copying qs_gquota */
483 if (copy_in_user(&compat_fsqstat->qs_gquota,
484 &fsqstat->qs_gquota,
485 sizeof(compat_fsqstat->qs_gquota)) ||
486 get_user(data, &fsqstat->qs_gquota.qfs_nextents) ||
487 put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents))
488 break;
489 /* Copying the rest */
490 if (copy_in_user(&compat_fsqstat->qs_incoredqs,
491 &fsqstat->qs_incoredqs,
492 sizeof(struct compat_fs_quota_stat) -
493 offsetof(struct compat_fs_quota_stat, qs_incoredqs)) ||
494 get_user(xdata, &fsqstat->qs_iwarnlimit) ||
495 put_user(xdata, &compat_fsqstat->qs_iwarnlimit))
496 break;
497 ret = 0;
498 break;
499 default:
500 ret = sys_quotactl(cmd, special, id, addr);
501 }
502 return ret;
503}
504#endif
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 30eebfb1b2d..2070aeee2a5 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1305,7 +1305,6 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1305 if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && 1305 if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 &&
1306 *ppos + count > MAX_NON_LFS) { 1306 *ppos + count > MAX_NON_LFS) {
1307 if (*ppos >= MAX_NON_LFS) { 1307 if (*ppos >= MAX_NON_LFS) {
1308 send_sig(SIGXFSZ, current, 0);
1309 return -EFBIG; 1308 return -EFBIG;
1310 } 1309 }
1311 if (count > MAX_NON_LFS - (unsigned long)*ppos) 1310 if (count > MAX_NON_LFS - (unsigned long)*ppos)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 49194a4e6b9..bbb19be260c 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -177,21 +177,23 @@ EXPORT_SYMBOL(seq_read);
177 177
178static int traverse(struct seq_file *m, loff_t offset) 178static int traverse(struct seq_file *m, loff_t offset)
179{ 179{
180 loff_t pos = 0; 180 loff_t pos = 0, index;
181 int error = 0; 181 int error = 0;
182 void *p; 182 void *p;
183 183
184 m->version = 0; 184 m->version = 0;
185 m->index = 0; 185 index = 0;
186 m->count = m->from = 0; 186 m->count = m->from = 0;
187 if (!offset) 187 if (!offset) {
188 m->index = index;
188 return 0; 189 return 0;
190 }
189 if (!m->buf) { 191 if (!m->buf) {
190 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); 192 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
191 if (!m->buf) 193 if (!m->buf)
192 return -ENOMEM; 194 return -ENOMEM;
193 } 195 }
194 p = m->op->start(m, &m->index); 196 p = m->op->start(m, &index);
195 while (p) { 197 while (p) {
196 error = PTR_ERR(p); 198 error = PTR_ERR(p);
197 if (IS_ERR(p)) 199 if (IS_ERR(p))
@@ -204,15 +206,17 @@ static int traverse(struct seq_file *m, loff_t offset)
204 if (pos + m->count > offset) { 206 if (pos + m->count > offset) {
205 m->from = offset - pos; 207 m->from = offset - pos;
206 m->count -= m->from; 208 m->count -= m->from;
209 m->index = index;
207 break; 210 break;
208 } 211 }
209 pos += m->count; 212 pos += m->count;
210 m->count = 0; 213 m->count = 0;
211 if (pos == offset) { 214 if (pos == offset) {
212 m->index++; 215 index++;
216 m->index = index;
213 break; 217 break;
214 } 218 }
215 p = m->op->next(m, p, &m->index); 219 p = m->op->next(m, p, &index);
216 } 220 }
217 m->op->stop(m, p); 221 m->op->stop(m, p);
218 return error; 222 return error;
@@ -260,8 +264,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
260 } 264 }
261 } 265 }
262 } 266 }
263 mutex_unlock(&m->lock);
264 file->f_version = m->version; 267 file->f_version = m->version;
268 mutex_unlock(&m->lock);
265 return retval; 269 return retval;
266} 270}
267EXPORT_SYMBOL(seq_lseek); 271EXPORT_SYMBOL(seq_lseek);
diff --git a/fs/splice.c b/fs/splice.c
index ed2ce995475..53fc2082a46 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -28,6 +28,7 @@
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h> 30#include <linux/uio.h>
31#include <linux/security.h>
31 32
32/* 33/*
33 * Attempt to steal a page from a pipe buffer. This should perhaps go into 34 * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -491,7 +492,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
491 492
492 ret = 0; 493 ret = 0;
493 spliced = 0; 494 spliced = 0;
494 while (len) { 495 while (len && !spliced) {
495 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 496 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
496 497
497 if (ret < 0) 498 if (ret < 0)
@@ -961,6 +962,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
961 if (unlikely(ret < 0)) 962 if (unlikely(ret < 0))
962 return ret; 963 return ret;
963 964
965 ret = security_file_permission(out, MAY_WRITE);
966 if (unlikely(ret < 0))
967 return ret;
968
964 return out->f_op->splice_write(pipe, out, ppos, len, flags); 969 return out->f_op->splice_write(pipe, out, ppos, len, flags);
965} 970}
966 971
@@ -983,6 +988,10 @@ static long do_splice_to(struct file *in, loff_t *ppos,
983 if (unlikely(ret < 0)) 988 if (unlikely(ret < 0))
984 return ret; 989 return ret;
985 990
991 ret = security_file_permission(in, MAY_READ);
992 if (unlikely(ret < 0))
993 return ret;
994
986 return in->f_op->splice_read(in, ppos, pipe, len, flags); 995 return in->f_op->splice_read(in, ppos, pipe, len, flags);
987} 996}
988 997
@@ -1051,15 +1060,11 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1051 sd->flags &= ~SPLICE_F_NONBLOCK; 1060 sd->flags &= ~SPLICE_F_NONBLOCK;
1052 1061
1053 while (len) { 1062 while (len) {
1054 size_t read_len, max_read_len; 1063 size_t read_len;
1055 1064 loff_t pos = sd->pos;
1056 /*
1057 * Do at most PIPE_BUFFERS pages worth of transfer:
1058 */
1059 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
1060 1065
1061 ret = do_splice_to(in, &sd->pos, pipe, max_read_len, flags); 1066 ret = do_splice_to(in, &pos, pipe, len, flags);
1062 if (unlikely(ret < 0)) 1067 if (unlikely(ret <= 0))
1063 goto out_release; 1068 goto out_release;
1064 1069
1065 read_len = ret; 1070 read_len = ret;
@@ -1071,26 +1076,18 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1071 * could get stuck data in the internal pipe: 1076 * could get stuck data in the internal pipe:
1072 */ 1077 */
1073 ret = actor(pipe, sd); 1078 ret = actor(pipe, sd);
1074 if (unlikely(ret < 0)) 1079 if (unlikely(ret <= 0))
1075 goto out_release; 1080 goto out_release;
1076 1081
1077 bytes += ret; 1082 bytes += ret;
1078 len -= ret; 1083 len -= ret;
1084 sd->pos = pos;
1079 1085
1080 /* 1086 if (ret < read_len)
1081 * In nonblocking mode, if we got back a short read then 1087 goto out_release;
1082 * that was due to either an IO error or due to the
1083 * pagecache entry not being there. In the IO error case
1084 * the _next_ splice attempt will produce a clean IO error
1085 * return value (not a short read), so in both cases it's
1086 * correct to break out of the loop here:
1087 */
1088 if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
1089 break;
1090 } 1088 }
1091 1089
1092 pipe->nrbufs = pipe->curbuf = 0; 1090 pipe->nrbufs = pipe->curbuf = 0;
1093
1094 return bytes; 1091 return bytes;
1095 1092
1096out_release: 1093out_release:
@@ -1152,10 +1149,12 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1152 .pos = *ppos, 1149 .pos = *ppos,
1153 .u.file = out, 1150 .u.file = out,
1154 }; 1151 };
1155 size_t ret; 1152 long ret;
1156 1153
1157 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 1154 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1158 *ppos = sd.pos; 1155 if (ret > 0)
1156 *ppos += ret;
1157
1159 return ret; 1158 return ret;
1160} 1159}
1161 1160
diff --git a/fs/super.c b/fs/super.c
index 5260d620c55..fc8ebedc6be 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -884,6 +884,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
884 error = type->get_sb(type, flags, name, data, mnt); 884 error = type->get_sb(type, flags, name, data, mnt);
885 if (error < 0) 885 if (error < 0)
886 goto out_free_secdata; 886 goto out_free_secdata;
887 BUG_ON(!mnt->mnt_sb);
887 888
888 error = security_sb_kern_mount(mnt->mnt_sb, secdata); 889 error = security_sb_kern_mount(mnt->mnt_sb, secdata);
889 if (error) 890 if (error)
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index d3b9f5f07db..135353f8a29 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -20,29 +20,41 @@
20 20
21#include "sysfs.h" 21#include "sysfs.h"
22 22
23struct bin_buffer {
24 struct mutex mutex;
25 void *buffer;
26 int mmapped;
27};
28
23static int 29static int
24fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) 30fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
25{ 31{
26 struct bin_attribute * attr = to_bin_attr(dentry); 32 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
27 struct kobject * kobj = to_kobj(dentry->d_parent); 33 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
34 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
35 int rc;
36
37 /* need attr_sd for attr, its parent for kobj */
38 if (!sysfs_get_active_two(attr_sd))
39 return -ENODEV;
28 40
29 if (!attr->read) 41 rc = -EIO;
30 return -EIO; 42 if (attr->read)
43 rc = attr->read(kobj, attr, buffer, off, count);
31 44
32 return attr->read(kobj, buffer, off, count); 45 sysfs_put_active_two(attr_sd);
46
47 return rc;
33} 48}
34 49
35static ssize_t 50static ssize_t
36read(struct file * file, char __user * userbuf, size_t count, loff_t * off) 51read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
37{ 52{
38 char *buffer = file->private_data; 53 struct bin_buffer *bb = file->private_data;
39 struct dentry *dentry = file->f_path.dentry; 54 struct dentry *dentry = file->f_path.dentry;
40 int size = dentry->d_inode->i_size; 55 int size = dentry->d_inode->i_size;
41 loff_t offs = *off; 56 loff_t offs = *off;
42 int ret; 57 int count = min_t(size_t, bytes, PAGE_SIZE);
43
44 if (count > PAGE_SIZE)
45 count = PAGE_SIZE;
46 58
47 if (size) { 59 if (size) {
48 if (offs > size) 60 if (offs > size)
@@ -51,43 +63,56 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
51 count = size - offs; 63 count = size - offs;
52 } 64 }
53 65
54 ret = fill_read(dentry, buffer, offs, count); 66 mutex_lock(&bb->mutex);
55 if (ret < 0) 67
56 return ret; 68 count = fill_read(dentry, bb->buffer, offs, count);
57 count = ret; 69 if (count < 0)
70 goto out_unlock;
58 71
59 if (copy_to_user(userbuf, buffer, count)) 72 if (copy_to_user(userbuf, bb->buffer, count)) {
60 return -EFAULT; 73 count = -EFAULT;
74 goto out_unlock;
75 }
61 76
62 pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); 77 pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
63 78
64 *off = offs + count; 79 *off = offs + count;
65 80
81 out_unlock:
82 mutex_unlock(&bb->mutex);
66 return count; 83 return count;
67} 84}
68 85
69static int 86static int
70flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) 87flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
71{ 88{
72 struct bin_attribute *attr = to_bin_attr(dentry); 89 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
73 struct kobject *kobj = to_kobj(dentry->d_parent); 90 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
91 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
92 int rc;
93
94 /* need attr_sd for attr, its parent for kobj */
95 if (!sysfs_get_active_two(attr_sd))
96 return -ENODEV;
97
98 rc = -EIO;
99 if (attr->write)
100 rc = attr->write(kobj, attr, buffer, offset, count);
74 101
75 if (!attr->write) 102 sysfs_put_active_two(attr_sd);
76 return -EIO;
77 103
78 return attr->write(kobj, buffer, offset, count); 104 return rc;
79} 105}
80 106
81static ssize_t write(struct file * file, const char __user * userbuf, 107static ssize_t write(struct file *file, const char __user *userbuf,
82 size_t count, loff_t * off) 108 size_t bytes, loff_t *off)
83{ 109{
84 char *buffer = file->private_data; 110 struct bin_buffer *bb = file->private_data;
85 struct dentry *dentry = file->f_path.dentry; 111 struct dentry *dentry = file->f_path.dentry;
86 int size = dentry->d_inode->i_size; 112 int size = dentry->d_inode->i_size;
87 loff_t offs = *off; 113 loff_t offs = *off;
114 int count = min_t(size_t, bytes, PAGE_SIZE);
88 115
89 if (count > PAGE_SIZE)
90 count = PAGE_SIZE;
91 if (size) { 116 if (size) {
92 if (offs > size) 117 if (offs > size)
93 return 0; 118 return 0;
@@ -95,72 +120,100 @@ static ssize_t write(struct file * file, const char __user * userbuf,
95 count = size - offs; 120 count = size - offs;
96 } 121 }
97 122
98 if (copy_from_user(buffer, userbuf, count)) 123 mutex_lock(&bb->mutex);
99 return -EFAULT;
100 124
101 count = flush_write(dentry, buffer, offs, count); 125 if (copy_from_user(bb->buffer, userbuf, count)) {
126 count = -EFAULT;
127 goto out_unlock;
128 }
129
130 count = flush_write(dentry, bb->buffer, offs, count);
102 if (count > 0) 131 if (count > 0)
103 *off = offs + count; 132 *off = offs + count;
133
134 out_unlock:
135 mutex_unlock(&bb->mutex);
104 return count; 136 return count;
105} 137}
106 138
107static int mmap(struct file *file, struct vm_area_struct *vma) 139static int mmap(struct file *file, struct vm_area_struct *vma)
108{ 140{
109 struct dentry *dentry = file->f_path.dentry; 141 struct bin_buffer *bb = file->private_data;
110 struct bin_attribute *attr = to_bin_attr(dentry); 142 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
111 struct kobject *kobj = to_kobj(dentry->d_parent); 143 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
144 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
145 int rc;
146
147 mutex_lock(&bb->mutex);
148
149 /* need attr_sd for attr, its parent for kobj */
150 if (!sysfs_get_active_two(attr_sd))
151 return -ENODEV;
112 152
113 if (!attr->mmap) 153 rc = -EINVAL;
114 return -EINVAL; 154 if (attr->mmap)
155 rc = attr->mmap(kobj, attr, vma);
115 156
116 return attr->mmap(kobj, attr, vma); 157 if (rc == 0 && !bb->mmapped)
158 bb->mmapped = 1;
159 else
160 sysfs_put_active_two(attr_sd);
161
162 mutex_unlock(&bb->mutex);
163
164 return rc;
117} 165}
118 166
119static int open(struct inode * inode, struct file * file) 167static int open(struct inode * inode, struct file * file)
120{ 168{
121 struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); 169 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
122 struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); 170 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
123 int error = -EINVAL; 171 struct bin_buffer *bb = NULL;
124 172 int error;
125 if (!kobj || !attr)
126 goto Done;
127 173
128 /* Grab the module reference for this attribute if we have one */ 174 /* need attr_sd for attr */
129 error = -ENODEV; 175 if (!sysfs_get_active(attr_sd))
130 if (!try_module_get(attr->attr.owner)) 176 return -ENODEV;
131 goto Done;
132 177
133 error = -EACCES; 178 error = -EACCES;
134 if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) 179 if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
135 goto Error; 180 goto err_out;
136 if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) 181 if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
137 goto Error; 182 goto err_out;
138 183
139 error = -ENOMEM; 184 error = -ENOMEM;
140 file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); 185 bb = kzalloc(sizeof(*bb), GFP_KERNEL);
141 if (!file->private_data) 186 if (!bb)
142 goto Error; 187 goto err_out;
143 188
144 error = 0; 189 bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
145 goto Done; 190 if (!bb->buffer)
146 191 goto err_out;
147 Error: 192
148 module_put(attr->attr.owner); 193 mutex_init(&bb->mutex);
149 Done: 194 file->private_data = bb;
150 if (error) 195
151 kobject_put(kobj); 196 /* open succeeded, put active reference and pin attr_sd */
197 sysfs_put_active(attr_sd);
198 sysfs_get(attr_sd);
199 return 0;
200
201 err_out:
202 sysfs_put_active(attr_sd);
203 kfree(bb);
152 return error; 204 return error;
153} 205}
154 206
155static int release(struct inode * inode, struct file * file) 207static int release(struct inode * inode, struct file * file)
156{ 208{
157 struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent); 209 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
158 struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); 210 struct bin_buffer *bb = file->private_data;
159 u8 * buffer = file->private_data; 211
160 212 if (bb->mmapped)
161 kobject_put(kobj); 213 sysfs_put_active_two(attr_sd);
162 module_put(attr->attr.owner); 214 sysfs_put(attr_sd);
163 kfree(buffer); 215 kfree(bb->buffer);
216 kfree(bb);
164 return 0; 217 return 0;
165} 218}
166 219
@@ -181,9 +234,9 @@ const struct file_operations bin_fops = {
181 234
182int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) 235int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
183{ 236{
184 BUG_ON(!kobj || !kobj->dentry || !attr); 237 BUG_ON(!kobj || !kobj->sd || !attr);
185 238
186 return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR); 239 return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
187} 240}
188 241
189 242
@@ -195,7 +248,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
195 248
196void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) 249void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
197{ 250{
198 if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) { 251 if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) {
199 printk(KERN_ERR "%s: " 252 printk(KERN_ERR "%s: "
200 "bad dentry or inode or no such file: \"%s\"\n", 253 "bad dentry or inode or no such file: \"%s\"\n",
201 __FUNCTION__, attr->attr.name); 254 __FUNCTION__, attr->attr.name);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index c4342a01997..aee966c44aa 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -9,21 +9,337 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/kobject.h> 10#include <linux/kobject.h>
11#include <linux/namei.h> 11#include <linux/namei.h>
12#include <linux/idr.h>
13#include <linux/completion.h>
12#include <asm/semaphore.h> 14#include <asm/semaphore.h>
13#include "sysfs.h" 15#include "sysfs.h"
14 16
15DECLARE_RWSEM(sysfs_rename_sem); 17DEFINE_MUTEX(sysfs_mutex);
16spinlock_t sysfs_lock = SPIN_LOCK_UNLOCKED; 18spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
19
20static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
21static DEFINE_IDA(sysfs_ino_ida);
22
23/**
24 * sysfs_link_sibling - link sysfs_dirent into sibling list
25 * @sd: sysfs_dirent of interest
26 *
27 * Link @sd into its sibling list which starts from
28 * sd->s_parent->s_children.
29 *
30 * Locking:
31 * mutex_lock(sysfs_mutex)
32 */
33void sysfs_link_sibling(struct sysfs_dirent *sd)
34{
35 struct sysfs_dirent *parent_sd = sd->s_parent;
36
37 BUG_ON(sd->s_sibling);
38 sd->s_sibling = parent_sd->s_children;
39 parent_sd->s_children = sd;
40}
41
42/**
43 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
44 * @sd: sysfs_dirent of interest
45 *
46 * Unlink @sd from its sibling list which starts from
47 * sd->s_parent->s_children.
48 *
49 * Locking:
50 * mutex_lock(sysfs_mutex)
51 */
52void sysfs_unlink_sibling(struct sysfs_dirent *sd)
53{
54 struct sysfs_dirent **pos;
55
56 for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
57 if (*pos == sd) {
58 *pos = sd->s_sibling;
59 sd->s_sibling = NULL;
60 break;
61 }
62 }
63}
64
65/**
66 * sysfs_get_dentry - get dentry for the given sysfs_dirent
67 * @sd: sysfs_dirent of interest
68 *
69 * Get dentry for @sd. Dentry is looked up if currently not
70 * present. This function climbs sysfs_dirent tree till it
71 * reaches a sysfs_dirent with valid dentry attached and descends
72 * down from there looking up dentry for each step.
73 *
74 * LOCKING:
75 * Kernel thread context (may sleep)
76 *
77 * RETURNS:
78 * Pointer to found dentry on success, ERR_PTR() value on error.
79 */
80struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
81{
82 struct sysfs_dirent *cur;
83 struct dentry *parent_dentry, *dentry;
84 int i, depth;
85
86 /* Find the first parent which has valid s_dentry and get the
87 * dentry.
88 */
89 mutex_lock(&sysfs_mutex);
90 restart0:
91 spin_lock(&sysfs_assoc_lock);
92 restart1:
93 spin_lock(&dcache_lock);
94
95 dentry = NULL;
96 depth = 0;
97 cur = sd;
98 while (!cur->s_dentry || !cur->s_dentry->d_inode) {
99 if (cur->s_flags & SYSFS_FLAG_REMOVED) {
100 dentry = ERR_PTR(-ENOENT);
101 depth = 0;
102 break;
103 }
104 cur = cur->s_parent;
105 depth++;
106 }
107 if (!IS_ERR(dentry))
108 dentry = dget_locked(cur->s_dentry);
109
110 spin_unlock(&dcache_lock);
111 spin_unlock(&sysfs_assoc_lock);
112
113 /* from the found dentry, look up depth times */
114 while (depth--) {
115 /* find and get depth'th ancestor */
116 for (cur = sd, i = 0; cur && i < depth; i++)
117 cur = cur->s_parent;
118
119 /* This can happen if tree structure was modified due
120 * to move/rename. Restart.
121 */
122 if (i != depth) {
123 dput(dentry);
124 goto restart0;
125 }
126
127 sysfs_get(cur);
128
129 mutex_unlock(&sysfs_mutex);
130
131 /* look it up */
132 parent_dentry = dentry;
133 dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
134 strlen(cur->s_name));
135 dput(parent_dentry);
136
137 if (IS_ERR(dentry)) {
138 sysfs_put(cur);
139 return dentry;
140 }
141
142 mutex_lock(&sysfs_mutex);
143 spin_lock(&sysfs_assoc_lock);
144
145 /* This, again, can happen if tree structure has
146 * changed and we looked up the wrong thing. Restart.
147 */
148 if (cur->s_dentry != dentry) {
149 dput(dentry);
150 sysfs_put(cur);
151 goto restart1;
152 }
153
154 spin_unlock(&sysfs_assoc_lock);
155
156 sysfs_put(cur);
157 }
158
159 mutex_unlock(&sysfs_mutex);
160 return dentry;
161}
162
163/**
164 * sysfs_get_active - get an active reference to sysfs_dirent
165 * @sd: sysfs_dirent to get an active reference to
166 *
167 * Get an active reference of @sd. This function is noop if @sd
168 * is NULL.
169 *
170 * RETURNS:
171 * Pointer to @sd on success, NULL on failure.
172 */
173struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
174{
175 if (unlikely(!sd))
176 return NULL;
177
178 while (1) {
179 int v, t;
180
181 v = atomic_read(&sd->s_active);
182 if (unlikely(v < 0))
183 return NULL;
184
185 t = atomic_cmpxchg(&sd->s_active, v, v + 1);
186 if (likely(t == v))
187 return sd;
188 if (t < 0)
189 return NULL;
190
191 cpu_relax();
192 }
193}
194
195/**
196 * sysfs_put_active - put an active reference to sysfs_dirent
197 * @sd: sysfs_dirent to put an active reference to
198 *
199 * Put an active reference to @sd. This function is noop if @sd
200 * is NULL.
201 */
202void sysfs_put_active(struct sysfs_dirent *sd)
203{
204 struct completion *cmpl;
205 int v;
206
207 if (unlikely(!sd))
208 return;
209
210 v = atomic_dec_return(&sd->s_active);
211 if (likely(v != SD_DEACTIVATED_BIAS))
212 return;
213
214 /* atomic_dec_return() is a mb(), we'll always see the updated
215 * sd->s_sibling.
216 */
217 cmpl = (void *)sd->s_sibling;
218 complete(cmpl);
219}
220
221/**
222 * sysfs_get_active_two - get active references to sysfs_dirent and parent
223 * @sd: sysfs_dirent of interest
224 *
225 * Get active reference to @sd and its parent. Parent's active
226 * reference is grabbed first. This function is noop if @sd is
227 * NULL.
228 *
229 * RETURNS:
230 * Pointer to @sd on success, NULL on failure.
231 */
232struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
233{
234 if (sd) {
235 if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
236 return NULL;
237 if (unlikely(!sysfs_get_active(sd))) {
238 sysfs_put_active(sd->s_parent);
239 return NULL;
240 }
241 }
242 return sd;
243}
244
245/**
246 * sysfs_put_active_two - put active references to sysfs_dirent and parent
247 * @sd: sysfs_dirent of interest
248 *
249 * Put active references to @sd and its parent. This function is
250 * noop if @sd is NULL.
251 */
252void sysfs_put_active_two(struct sysfs_dirent *sd)
253{
254 if (sd) {
255 sysfs_put_active(sd);
256 sysfs_put_active(sd->s_parent);
257 }
258}
259
260/**
261 * sysfs_deactivate - deactivate sysfs_dirent
262 * @sd: sysfs_dirent to deactivate
263 *
264 * Deny new active references and drain existing ones.
265 */
266static void sysfs_deactivate(struct sysfs_dirent *sd)
267{
268 DECLARE_COMPLETION_ONSTACK(wait);
269 int v;
270
271 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
272 sd->s_sibling = (void *)&wait;
273
274 /* atomic_add_return() is a mb(), put_active() will always see
275 * the updated sd->s_sibling.
276 */
277 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
278
279 if (v != SD_DEACTIVATED_BIAS)
280 wait_for_completion(&wait);
281
282 sd->s_sibling = NULL;
283}
284
285static int sysfs_alloc_ino(ino_t *pino)
286{
287 int ino, rc;
288
289 retry:
290 spin_lock(&sysfs_ino_lock);
291 rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
292 spin_unlock(&sysfs_ino_lock);
293
294 if (rc == -EAGAIN) {
295 if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
296 goto retry;
297 rc = -ENOMEM;
298 }
299
300 *pino = ino;
301 return rc;
302}
303
304static void sysfs_free_ino(ino_t ino)
305{
306 spin_lock(&sysfs_ino_lock);
307 ida_remove(&sysfs_ino_ida, ino);
308 spin_unlock(&sysfs_ino_lock);
309}
310
311void release_sysfs_dirent(struct sysfs_dirent * sd)
312{
313 struct sysfs_dirent *parent_sd;
314
315 repeat:
316 /* Moving/renaming is always done while holding reference.
317 * sd->s_parent won't change beneath us.
318 */
319 parent_sd = sd->s_parent;
320
321 if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
322 sysfs_put(sd->s_elem.symlink.target_sd);
323 if (sysfs_type(sd) & SYSFS_COPY_NAME)
324 kfree(sd->s_name);
325 kfree(sd->s_iattr);
326 sysfs_free_ino(sd->s_ino);
327 kmem_cache_free(sysfs_dir_cachep, sd);
328
329 sd = parent_sd;
330 if (sd && atomic_dec_and_test(&sd->s_count))
331 goto repeat;
332}
17 333
18static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) 334static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
19{ 335{
20 struct sysfs_dirent * sd = dentry->d_fsdata; 336 struct sysfs_dirent * sd = dentry->d_fsdata;
21 337
22 if (sd) { 338 if (sd) {
23 /* sd->s_dentry is protected with sysfs_lock. This 339 /* sd->s_dentry is protected with sysfs_assoc_lock.
24 * allows sysfs_drop_dentry() to dereference it. 340 * This allows sysfs_drop_dentry() to dereference it.
25 */ 341 */
26 spin_lock(&sysfs_lock); 342 spin_lock(&sysfs_assoc_lock);
27 343
28 /* The dentry might have been deleted or another 344 /* The dentry might have been deleted or another
29 * lookup could have happened updating sd->s_dentry to 345 * lookup could have happened updating sd->s_dentry to
@@ -32,7 +348,7 @@ static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
32 */ 348 */
33 if (sd->s_dentry == dentry) 349 if (sd->s_dentry == dentry)
34 sd->s_dentry = NULL; 350 sd->s_dentry = NULL;
35 spin_unlock(&sysfs_lock); 351 spin_unlock(&sysfs_assoc_lock);
36 sysfs_put(sd); 352 sysfs_put(sd);
37 } 353 }
38 iput(inode); 354 iput(inode);
@@ -42,260 +358,402 @@ static struct dentry_operations sysfs_dentry_ops = {
42 .d_iput = sysfs_d_iput, 358 .d_iput = sysfs_d_iput,
43}; 359};
44 360
45static unsigned int sysfs_inode_counter; 361struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
46ino_t sysfs_get_inum(void)
47{ 362{
48 if (unlikely(sysfs_inode_counter < 3)) 363 char *dup_name = NULL;
49 sysfs_inode_counter = 3; 364 struct sysfs_dirent *sd = NULL;
50 return sysfs_inode_counter++;
51}
52 365
53/* 366 if (type & SYSFS_COPY_NAME) {
54 * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent 367 name = dup_name = kstrdup(name, GFP_KERNEL);
55 */ 368 if (!name)
56static struct sysfs_dirent * __sysfs_new_dirent(void * element) 369 goto err_out;
57{ 370 }
58 struct sysfs_dirent * sd;
59 371
60 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); 372 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
61 if (!sd) 373 if (!sd)
62 return NULL; 374 goto err_out;
375
376 if (sysfs_alloc_ino(&sd->s_ino))
377 goto err_out;
63 378
64 sd->s_ino = sysfs_get_inum();
65 atomic_set(&sd->s_count, 1); 379 atomic_set(&sd->s_count, 1);
380 atomic_set(&sd->s_active, 0);
66 atomic_set(&sd->s_event, 1); 381 atomic_set(&sd->s_event, 1);
67 INIT_LIST_HEAD(&sd->s_children); 382
68 INIT_LIST_HEAD(&sd->s_sibling); 383 sd->s_name = name;
69 sd->s_element = element; 384 sd->s_mode = mode;
385 sd->s_flags = type;
70 386
71 return sd; 387 return sd;
388
389 err_out:
390 kfree(dup_name);
391 kmem_cache_free(sysfs_dir_cachep, sd);
392 return NULL;
72} 393}
73 394
74static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd, 395/**
75 struct sysfs_dirent *sd) 396 * sysfs_attach_dentry - associate sysfs_dirent with dentry
397 * @sd: target sysfs_dirent
398 * @dentry: dentry to associate
399 *
400 * Associate @sd with @dentry. This is protected by
401 * sysfs_assoc_lock to avoid race with sysfs_d_iput().
402 *
403 * LOCKING:
404 * mutex_lock(sysfs_mutex)
405 */
406static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
76{ 407{
77 if (sd) 408 dentry->d_op = &sysfs_dentry_ops;
78 list_add(&sd->s_sibling, &parent_sd->s_children); 409 dentry->d_fsdata = sysfs_get(sd);
410
411 /* protect sd->s_dentry against sysfs_d_iput */
412 spin_lock(&sysfs_assoc_lock);
413 sd->s_dentry = dentry;
414 spin_unlock(&sysfs_assoc_lock);
415
416 d_rehash(dentry);
79} 417}
80 418
81static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd, 419static int sysfs_ilookup_test(struct inode *inode, void *arg)
82 void * element)
83{ 420{
84 struct sysfs_dirent *sd; 421 struct sysfs_dirent *sd = arg;
85 sd = __sysfs_new_dirent(element); 422 return inode->i_ino == sd->s_ino;
86 __sysfs_list_dirent(parent_sd, sd);
87 return sd;
88} 423}
89 424
90/* 425/**
426 * sysfs_addrm_start - prepare for sysfs_dirent add/remove
427 * @acxt: pointer to sysfs_addrm_cxt to be used
428 * @parent_sd: parent sysfs_dirent
91 * 429 *
92 * Return -EEXIST if there is already a sysfs element with the same name for 430 * This function is called when the caller is about to add or
93 * the same parent. 431 * remove sysfs_dirent under @parent_sd. This function acquires
432 * sysfs_mutex, grabs inode for @parent_sd if available and lock
433 * i_mutex of it. @acxt is used to keep and pass context to
434 * other addrm functions.
94 * 435 *
95 * called with parent inode's i_mutex held 436 * LOCKING:
437 * Kernel thread context (may sleep). sysfs_mutex is locked on
438 * return. i_mutex of parent inode is locked on return if
439 * available.
96 */ 440 */
97int sysfs_dirent_exist(struct sysfs_dirent *parent_sd, 441void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
98 const unsigned char *new) 442 struct sysfs_dirent *parent_sd)
99{ 443{
100 struct sysfs_dirent * sd; 444 struct inode *inode;
101 445
102 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 446 memset(acxt, 0, sizeof(*acxt));
103 if (sd->s_element) { 447 acxt->parent_sd = parent_sd;
104 const unsigned char *existing = sysfs_get_name(sd);
105 if (strcmp(existing, new))
106 continue;
107 else
108 return -EEXIST;
109 }
110 }
111 448
112 return 0; 449 /* Lookup parent inode. inode initialization and I_NEW
450 * clearing are protected by sysfs_mutex. By grabbing it and
451 * looking up with _nowait variant, inode state can be
452 * determined reliably.
453 */
454 mutex_lock(&sysfs_mutex);
455
456 inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
457 parent_sd);
458
459 if (inode && !(inode->i_state & I_NEW)) {
460 /* parent inode available */
461 acxt->parent_inode = inode;
462
463 /* sysfs_mutex is below i_mutex in lock hierarchy.
464 * First, trylock i_mutex. If fails, unlock
465 * sysfs_mutex and lock them in order.
466 */
467 if (!mutex_trylock(&inode->i_mutex)) {
468 mutex_unlock(&sysfs_mutex);
469 mutex_lock(&inode->i_mutex);
470 mutex_lock(&sysfs_mutex);
471 }
472 } else
473 iput(inode);
113} 474}
114 475
476/**
477 * sysfs_add_one - add sysfs_dirent to parent
478 * @acxt: addrm context to use
479 * @sd: sysfs_dirent to be added
480 *
481 * Get @acxt->parent_sd and set sd->s_parent to it and increment
482 * nlink of parent inode if @sd is a directory. @sd is NOT
483 * linked into the children list of the parent. The caller
484 * should invoke sysfs_link_sibling() after this function
485 * completes if @sd needs to be on the children list.
486 *
487 * This function should be called between calls to
488 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
489 * passed the same @acxt as passed to sysfs_addrm_start().
490 *
491 * LOCKING:
492 * Determined by sysfs_addrm_start().
493 */
494void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
495{
496 sd->s_parent = sysfs_get(acxt->parent_sd);
497
498 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
499 inc_nlink(acxt->parent_inode);
500
501 acxt->cnt++;
502}
115 503
116static struct sysfs_dirent * 504/**
117__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type) 505 * sysfs_remove_one - remove sysfs_dirent from parent
506 * @acxt: addrm context to use
507 * @sd: sysfs_dirent to be added
508 *
509 * Mark @sd removed and drop nlink of parent inode if @sd is a
510 * directory. @sd is NOT unlinked from the children list of the
511 * parent. The caller is repsonsible for removing @sd from the
512 * children list before calling this function.
513 *
514 * This function should be called between calls to
515 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
516 * passed the same @acxt as passed to sysfs_addrm_start().
517 *
518 * LOCKING:
519 * Determined by sysfs_addrm_start().
520 */
521void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
118{ 522{
119 struct sysfs_dirent * sd; 523 BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
120 524
121 sd = __sysfs_new_dirent(element); 525 sd->s_flags |= SYSFS_FLAG_REMOVED;
122 if (!sd) 526 sd->s_sibling = acxt->removed;
123 goto out; 527 acxt->removed = sd;
124 528
125 sd->s_mode = mode; 529 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
126 sd->s_type = type; 530 drop_nlink(acxt->parent_inode);
127 sd->s_dentry = dentry;
128 if (dentry) {
129 dentry->d_fsdata = sysfs_get(sd);
130 dentry->d_op = &sysfs_dentry_ops;
131 }
132 531
133out: 532 acxt->cnt++;
134 return sd;
135} 533}
136 534
137int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry, 535/**
138 void * element, umode_t mode, int type) 536 * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
537 * @sd: target sysfs_dirent
538 *
539 * Drop dentry for @sd. @sd must have been unlinked from its
540 * parent on entry to this function such that it can't be looked
541 * up anymore.
542 *
543 * @sd->s_dentry which is protected with sysfs_assoc_lock points
544 * to the currently associated dentry but we're not holding a
545 * reference to it and racing with dput(). Grab dcache_lock and
546 * verify dentry before dropping it. If @sd->s_dentry is NULL or
547 * dput() beats us, no need to bother.
548 */
549static void sysfs_drop_dentry(struct sysfs_dirent *sd)
139{ 550{
140 struct sysfs_dirent *sd; 551 struct dentry *dentry = NULL;
552 struct inode *inode;
553
554 /* We're not holding a reference to ->s_dentry dentry but the
555 * field will stay valid as long as sysfs_assoc_lock is held.
556 */
557 spin_lock(&sysfs_assoc_lock);
558 spin_lock(&dcache_lock);
559
560 /* drop dentry if it's there and dput() didn't kill it yet */
561 if (sd->s_dentry && sd->s_dentry->d_inode) {
562 dentry = dget_locked(sd->s_dentry);
563 spin_lock(&dentry->d_lock);
564 __d_drop(dentry);
565 spin_unlock(&dentry->d_lock);
566 }
141 567
142 sd = __sysfs_make_dirent(dentry, element, mode, type); 568 spin_unlock(&dcache_lock);
143 __sysfs_list_dirent(parent_sd, sd); 569 spin_unlock(&sysfs_assoc_lock);
144 570
145 return sd ? 0 : -ENOMEM; 571 /* dentries for shadowed inodes are pinned, unpin */
572 if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
573 dput(dentry);
574 dput(dentry);
575
576 /* adjust nlink and update timestamp */
577 inode = ilookup(sysfs_sb, sd->s_ino);
578 if (inode) {
579 mutex_lock(&inode->i_mutex);
580
581 inode->i_ctime = CURRENT_TIME;
582 drop_nlink(inode);
583 if (sysfs_type(sd) == SYSFS_DIR)
584 drop_nlink(inode);
585
586 mutex_unlock(&inode->i_mutex);
587 iput(inode);
588 }
146} 589}
147 590
148static int init_dir(struct inode * inode) 591/**
592 * sysfs_addrm_finish - finish up sysfs_dirent add/remove
593 * @acxt: addrm context to finish up
594 *
595 * Finish up sysfs_dirent add/remove. Resources acquired by
596 * sysfs_addrm_start() are released and removed sysfs_dirents are
597 * cleaned up. Timestamps on the parent inode are updated.
598 *
599 * LOCKING:
600 * All mutexes acquired by sysfs_addrm_start() are released.
601 *
602 * RETURNS:
603 * Number of added/removed sysfs_dirents since sysfs_addrm_start().
604 */
605int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
149{ 606{
150 inode->i_op = &sysfs_dir_inode_operations; 607 /* release resources acquired by sysfs_addrm_start() */
151 inode->i_fop = &sysfs_dir_operations; 608 mutex_unlock(&sysfs_mutex);
609 if (acxt->parent_inode) {
610 struct inode *inode = acxt->parent_inode;
152 611
153 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 612 /* if added/removed, update timestamps on the parent */
154 inc_nlink(inode); 613 if (acxt->cnt)
155 return 0; 614 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
615
616 mutex_unlock(&inode->i_mutex);
617 iput(inode);
618 }
619
620 /* kill removed sysfs_dirents */
621 while (acxt->removed) {
622 struct sysfs_dirent *sd = acxt->removed;
623
624 acxt->removed = sd->s_sibling;
625 sd->s_sibling = NULL;
626
627 sysfs_drop_dentry(sd);
628 sysfs_deactivate(sd);
629 sysfs_put(sd);
630 }
631
632 return acxt->cnt;
156} 633}
157 634
158static int init_file(struct inode * inode) 635/**
636 * sysfs_find_dirent - find sysfs_dirent with the given name
637 * @parent_sd: sysfs_dirent to search under
638 * @name: name to look for
639 *
640 * Look for sysfs_dirent with name @name under @parent_sd.
641 *
642 * LOCKING:
643 * mutex_lock(sysfs_mutex)
644 *
645 * RETURNS:
646 * Pointer to sysfs_dirent if found, NULL if not.
647 */
648struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
649 const unsigned char *name)
159{ 650{
160 inode->i_size = PAGE_SIZE; 651 struct sysfs_dirent *sd;
161 inode->i_fop = &sysfs_file_operations; 652
162 return 0; 653 for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
654 if (sysfs_type(sd) && !strcmp(sd->s_name, name))
655 return sd;
656 return NULL;
163} 657}
164 658
165static int init_symlink(struct inode * inode) 659/**
660 * sysfs_get_dirent - find and get sysfs_dirent with the given name
661 * @parent_sd: sysfs_dirent to search under
662 * @name: name to look for
663 *
664 * Look for sysfs_dirent with name @name under @parent_sd and get
665 * it if found.
666 *
667 * LOCKING:
668 * Kernel thread context (may sleep). Grabs sysfs_mutex.
669 *
670 * RETURNS:
671 * Pointer to sysfs_dirent if found, NULL if not.
672 */
673struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
674 const unsigned char *name)
166{ 675{
167 inode->i_op = &sysfs_symlink_inode_operations; 676 struct sysfs_dirent *sd;
168 return 0; 677
678 mutex_lock(&sysfs_mutex);
679 sd = sysfs_find_dirent(parent_sd, name);
680 sysfs_get(sd);
681 mutex_unlock(&sysfs_mutex);
682
683 return sd;
169} 684}
170 685
171static int create_dir(struct kobject * k, struct dentry * p, 686static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
172 const char * n, struct dentry ** d) 687 const char *name, struct sysfs_dirent **p_sd)
173{ 688{
174 int error;
175 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 689 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
690 struct sysfs_addrm_cxt acxt;
691 struct sysfs_dirent *sd;
176 692
177 mutex_lock(&p->d_inode->i_mutex); 693 /* allocate */
178 *d = lookup_one_len(n, p, strlen(n)); 694 sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
179 if (!IS_ERR(*d)) { 695 if (!sd)
180 if (sysfs_dirent_exist(p->d_fsdata, n)) 696 return -ENOMEM;
181 error = -EEXIST; 697 sd->s_elem.dir.kobj = kobj;
182 else
183 error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
184 SYSFS_DIR);
185 if (!error) {
186 error = sysfs_create(*d, mode, init_dir);
187 if (!error) {
188 inc_nlink(p->d_inode);
189 (*d)->d_op = &sysfs_dentry_ops;
190 d_rehash(*d);
191 }
192 }
193 if (error && (error != -EEXIST)) {
194 struct sysfs_dirent *sd = (*d)->d_fsdata;
195 if (sd) {
196 list_del_init(&sd->s_sibling);
197 sysfs_put(sd);
198 }
199 d_drop(*d);
200 }
201 dput(*d);
202 } else
203 error = PTR_ERR(*d);
204 mutex_unlock(&p->d_inode->i_mutex);
205 return error;
206}
207 698
699 /* link in */
700 sysfs_addrm_start(&acxt, parent_sd);
701 if (!sysfs_find_dirent(parent_sd, name)) {
702 sysfs_add_one(&acxt, sd);
703 sysfs_link_sibling(sd);
704 }
705 if (sysfs_addrm_finish(&acxt)) {
706 *p_sd = sd;
707 return 0;
708 }
208 709
209int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d) 710 sysfs_put(sd);
711 return -EEXIST;
712}
713
714int sysfs_create_subdir(struct kobject *kobj, const char *name,
715 struct sysfs_dirent **p_sd)
210{ 716{
211 return create_dir(k,k->dentry,n,d); 717 return create_dir(kobj, kobj->sd, name, p_sd);
212} 718}
213 719
214/** 720/**
215 * sysfs_create_dir - create a directory for an object. 721 * sysfs_create_dir - create a directory for an object.
216 * @kobj: object we're creating directory for. 722 * @kobj: object we're creating directory for.
217 * @shadow_parent: parent parent object. 723 * @shadow_parent: parent object.
218 */ 724 */
219 725int sysfs_create_dir(struct kobject *kobj,
220int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent) 726 struct sysfs_dirent *shadow_parent_sd)
221{ 727{
222 struct dentry * dentry = NULL; 728 struct sysfs_dirent *parent_sd, *sd;
223 struct dentry * parent;
224 int error = 0; 729 int error = 0;
225 730
226 BUG_ON(!kobj); 731 BUG_ON(!kobj);
227 732
228 if (shadow_parent) 733 if (shadow_parent_sd)
229 parent = shadow_parent; 734 parent_sd = shadow_parent_sd;
230 else if (kobj->parent) 735 else if (kobj->parent)
231 parent = kobj->parent->dentry; 736 parent_sd = kobj->parent->sd;
232 else if (sysfs_mount && sysfs_mount->mnt_sb) 737 else if (sysfs_mount && sysfs_mount->mnt_sb)
233 parent = sysfs_mount->mnt_sb->s_root; 738 parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
234 else 739 else
235 return -EFAULT; 740 return -EFAULT;
236 741
237 error = create_dir(kobj,parent,kobject_name(kobj),&dentry); 742 error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
238 if (!error) 743 if (!error)
239 kobj->dentry = dentry; 744 kobj->sd = sd;
240 return error; 745 return error;
241} 746}
242 747
243/* attaches attribute's sysfs_dirent to the dentry corresponding to the 748static int sysfs_count_nlink(struct sysfs_dirent *sd)
244 * attribute file
245 */
246static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry)
247{ 749{
248 struct attribute * attr = NULL; 750 struct sysfs_dirent *child;
249 struct bin_attribute * bin_attr = NULL; 751 int nr = 0;
250 int (* init) (struct inode *) = NULL;
251 int error = 0;
252
253 if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) {
254 bin_attr = sd->s_element;
255 attr = &bin_attr->attr;
256 } else {
257 attr = sd->s_element;
258 init = init_file;
259 }
260 752
261 dentry->d_fsdata = sysfs_get(sd); 753 for (child = sd->s_children; child; child = child->s_sibling)
262 /* protect sd->s_dentry against sysfs_d_iput */ 754 if (sysfs_type(child) == SYSFS_DIR)
263 spin_lock(&sysfs_lock); 755 nr++;
264 sd->s_dentry = dentry; 756 return nr + 2;
265 spin_unlock(&sysfs_lock);
266 error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init);
267 if (error) {
268 sysfs_put(sd);
269 return error;
270 }
271
272 if (bin_attr) {
273 dentry->d_inode->i_size = bin_attr->size;
274 dentry->d_inode->i_fop = &bin_fops;
275 }
276 dentry->d_op = &sysfs_dentry_ops;
277 d_rehash(dentry);
278
279 return 0;
280}
281
282static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry)
283{
284 int err = 0;
285
286 dentry->d_fsdata = sysfs_get(sd);
287 /* protect sd->s_dentry against sysfs_d_iput */
288 spin_lock(&sysfs_lock);
289 sd->s_dentry = dentry;
290 spin_unlock(&sysfs_lock);
291 err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink);
292 if (!err) {
293 dentry->d_op = &sysfs_dentry_ops;
294 d_rehash(dentry);
295 } else
296 sysfs_put(sd);
297
298 return err;
299} 757}
300 758
301static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, 759static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
@@ -303,24 +761,60 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
303{ 761{
304 struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; 762 struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
305 struct sysfs_dirent * sd; 763 struct sysfs_dirent * sd;
306 int err = 0; 764 struct bin_attribute *bin_attr;
765 struct inode *inode;
766 int found = 0;
307 767
308 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 768 for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
309 if (sd->s_type & SYSFS_NOT_PINNED) { 769 if (sysfs_type(sd) &&
310 const unsigned char * name = sysfs_get_name(sd); 770 !strcmp(sd->s_name, dentry->d_name.name)) {
771 found = 1;
772 break;
773 }
774 }
311 775
312 if (strcmp(name, dentry->d_name.name)) 776 /* no such entry */
313 continue; 777 if (!found)
778 return NULL;
314 779
315 if (sd->s_type & SYSFS_KOBJ_LINK) 780 /* attach dentry and inode */
316 err = sysfs_attach_link(sd, dentry); 781 inode = sysfs_get_inode(sd);
317 else 782 if (!inode)
318 err = sysfs_attach_attr(sd, dentry); 783 return ERR_PTR(-ENOMEM);
784
785 mutex_lock(&sysfs_mutex);
786
787 if (inode->i_state & I_NEW) {
788 /* initialize inode according to type */
789 switch (sysfs_type(sd)) {
790 case SYSFS_DIR:
791 inode->i_op = &sysfs_dir_inode_operations;
792 inode->i_fop = &sysfs_dir_operations;
793 inode->i_nlink = sysfs_count_nlink(sd);
794 break;
795 case SYSFS_KOBJ_ATTR:
796 inode->i_size = PAGE_SIZE;
797 inode->i_fop = &sysfs_file_operations;
798 break;
799 case SYSFS_KOBJ_BIN_ATTR:
800 bin_attr = sd->s_elem.bin_attr.bin_attr;
801 inode->i_size = bin_attr->size;
802 inode->i_fop = &bin_fops;
319 break; 803 break;
804 case SYSFS_KOBJ_LINK:
805 inode->i_op = &sysfs_symlink_inode_operations;
806 break;
807 default:
808 BUG();
320 } 809 }
321 } 810 }
322 811
323 return ERR_PTR(err); 812 sysfs_instantiate(dentry, inode);
813 sysfs_attach_dentry(sd, dentry);
814
815 mutex_unlock(&sysfs_mutex);
816
817 return NULL;
324} 818}
325 819
326const struct inode_operations sysfs_dir_inode_operations = { 820const struct inode_operations sysfs_dir_inode_operations = {
@@ -328,58 +822,46 @@ const struct inode_operations sysfs_dir_inode_operations = {
328 .setattr = sysfs_setattr, 822 .setattr = sysfs_setattr,
329}; 823};
330 824
331static void remove_dir(struct dentry * d) 825static void remove_dir(struct sysfs_dirent *sd)
332{ 826{
333 struct dentry * parent = dget(d->d_parent); 827 struct sysfs_addrm_cxt acxt;
334 struct sysfs_dirent * sd;
335
336 mutex_lock(&parent->d_inode->i_mutex);
337 d_delete(d);
338 sd = d->d_fsdata;
339 list_del_init(&sd->s_sibling);
340 sysfs_put(sd);
341 if (d->d_inode)
342 simple_rmdir(parent->d_inode,d);
343
344 pr_debug(" o %s removing done (%d)\n",d->d_name.name,
345 atomic_read(&d->d_count));
346 828
347 mutex_unlock(&parent->d_inode->i_mutex); 829 sysfs_addrm_start(&acxt, sd->s_parent);
348 dput(parent); 830 sysfs_unlink_sibling(sd);
831 sysfs_remove_one(&acxt, sd);
832 sysfs_addrm_finish(&acxt);
349} 833}
350 834
351void sysfs_remove_subdir(struct dentry * d) 835void sysfs_remove_subdir(struct sysfs_dirent *sd)
352{ 836{
353 remove_dir(d); 837 remove_dir(sd);
354} 838}
355 839
356 840
357static void __sysfs_remove_dir(struct dentry *dentry) 841static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
358{ 842{
359 struct sysfs_dirent * parent_sd; 843 struct sysfs_addrm_cxt acxt;
360 struct sysfs_dirent * sd, * tmp; 844 struct sysfs_dirent **pos;
361 845
362 dget(dentry); 846 if (!dir_sd)
363 if (!dentry)
364 return; 847 return;
365 848
366 pr_debug("sysfs %s: removing dir\n",dentry->d_name.name); 849 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
367 mutex_lock(&dentry->d_inode->i_mutex); 850 sysfs_addrm_start(&acxt, dir_sd);
368 parent_sd = dentry->d_fsdata; 851 pos = &dir_sd->s_children;
369 list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { 852 while (*pos) {
370 if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED)) 853 struct sysfs_dirent *sd = *pos;
371 continue; 854
372 list_del_init(&sd->s_sibling); 855 if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
373 sysfs_drop_dentry(sd, dentry); 856 *pos = sd->s_sibling;
374 sysfs_put(sd); 857 sd->s_sibling = NULL;
858 sysfs_remove_one(&acxt, sd);
859 } else
860 pos = &(*pos)->s_sibling;
375 } 861 }
376 mutex_unlock(&dentry->d_inode->i_mutex); 862 sysfs_addrm_finish(&acxt);
377 863
378 remove_dir(dentry); 864 remove_dir(dir_sd);
379 /**
380 * Drop reference from dget() on entrance.
381 */
382 dput(dentry);
383} 865}
384 866
385/** 867/**
@@ -393,102 +875,166 @@ static void __sysfs_remove_dir(struct dentry *dentry)
393 875
394void sysfs_remove_dir(struct kobject * kobj) 876void sysfs_remove_dir(struct kobject * kobj)
395{ 877{
396 __sysfs_remove_dir(kobj->dentry); 878 struct sysfs_dirent *sd = kobj->sd;
397 kobj->dentry = NULL; 879
880 spin_lock(&sysfs_assoc_lock);
881 kobj->sd = NULL;
882 spin_unlock(&sysfs_assoc_lock);
883
884 __sysfs_remove_dir(sd);
398} 885}
399 886
400int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent, 887int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
401 const char *new_name) 888 const char *new_name)
402{ 889{
403 int error = 0; 890 struct sysfs_dirent *sd = kobj->sd;
404 struct dentry * new_dentry; 891 struct dentry *new_parent = NULL;
892 struct dentry *old_dentry = NULL, *new_dentry = NULL;
893 const char *dup_name = NULL;
894 int error;
405 895
406 if (!new_parent) 896 /* get dentries */
407 return -EFAULT; 897 old_dentry = sysfs_get_dentry(sd);
898 if (IS_ERR(old_dentry)) {
899 error = PTR_ERR(old_dentry);
900 goto out_dput;
901 }
408 902
409 down_write(&sysfs_rename_sem); 903 new_parent = sysfs_get_dentry(new_parent_sd);
904 if (IS_ERR(new_parent)) {
905 error = PTR_ERR(new_parent);
906 goto out_dput;
907 }
908
909 /* lock new_parent and get dentry for new name */
410 mutex_lock(&new_parent->d_inode->i_mutex); 910 mutex_lock(&new_parent->d_inode->i_mutex);
411 911
412 new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name)); 912 new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
413 if (!IS_ERR(new_dentry)) { 913 if (IS_ERR(new_dentry)) {
414 /* By allowing two different directories with the 914 error = PTR_ERR(new_dentry);
415 * same d_parent we allow this routine to move 915 goto out_unlock;
416 * between different shadows of the same directory
417 */
418 if (kobj->dentry->d_parent->d_inode != new_parent->d_inode)
419 return -EINVAL;
420 else if (new_dentry->d_parent->d_inode != new_parent->d_inode)
421 error = -EINVAL;
422 else if (new_dentry == kobj->dentry)
423 error = -EINVAL;
424 else if (!new_dentry->d_inode) {
425 error = kobject_set_name(kobj, "%s", new_name);
426 if (!error) {
427 struct sysfs_dirent *sd, *parent_sd;
428
429 d_add(new_dentry, NULL);
430 d_move(kobj->dentry, new_dentry);
431
432 sd = kobj->dentry->d_fsdata;
433 parent_sd = new_parent->d_fsdata;
434
435 list_del_init(&sd->s_sibling);
436 list_add(&sd->s_sibling, &parent_sd->s_children);
437 }
438 else
439 d_drop(new_dentry);
440 } else
441 error = -EEXIST;
442 dput(new_dentry);
443 } 916 }
444 mutex_unlock(&new_parent->d_inode->i_mutex);
445 up_write(&sysfs_rename_sem);
446 917
918 /* By allowing two different directories with the same
919 * d_parent we allow this routine to move between different
920 * shadows of the same directory
921 */
922 error = -EINVAL;
923 if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
924 new_dentry->d_parent->d_inode != new_parent->d_inode ||
925 old_dentry == new_dentry)
926 goto out_unlock;
927
928 error = -EEXIST;
929 if (new_dentry->d_inode)
930 goto out_unlock;
931
932 /* rename kobject and sysfs_dirent */
933 error = -ENOMEM;
934 new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
935 if (!new_name)
936 goto out_drop;
937
938 error = kobject_set_name(kobj, "%s", new_name);
939 if (error)
940 goto out_drop;
941
942 dup_name = sd->s_name;
943 sd->s_name = new_name;
944
945 /* move under the new parent */
946 d_add(new_dentry, NULL);
947 d_move(sd->s_dentry, new_dentry);
948
949 mutex_lock(&sysfs_mutex);
950
951 sysfs_unlink_sibling(sd);
952 sysfs_get(new_parent_sd);
953 sysfs_put(sd->s_parent);
954 sd->s_parent = new_parent_sd;
955 sysfs_link_sibling(sd);
956
957 mutex_unlock(&sysfs_mutex);
958
959 error = 0;
960 goto out_unlock;
961
962 out_drop:
963 d_drop(new_dentry);
964 out_unlock:
965 mutex_unlock(&new_parent->d_inode->i_mutex);
966 out_dput:
967 kfree(dup_name);
968 dput(new_parent);
969 dput(old_dentry);
970 dput(new_dentry);
447 return error; 971 return error;
448} 972}
449 973
450int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent) 974int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
451{ 975{
452 struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry; 976 struct sysfs_dirent *sd = kobj->sd;
453 struct sysfs_dirent *new_parent_sd, *sd; 977 struct sysfs_dirent *new_parent_sd;
978 struct dentry *old_parent, *new_parent = NULL;
979 struct dentry *old_dentry = NULL, *new_dentry = NULL;
454 int error; 980 int error;
455 981
456 old_parent_dentry = kobj->parent ? 982 BUG_ON(!sd->s_parent);
457 kobj->parent->dentry : sysfs_mount->mnt_sb->s_root; 983 new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
458 new_parent_dentry = new_parent ? 984
459 new_parent->dentry : sysfs_mount->mnt_sb->s_root; 985 /* get dentries */
986 old_dentry = sysfs_get_dentry(sd);
987 if (IS_ERR(old_dentry)) {
988 error = PTR_ERR(old_dentry);
989 goto out_dput;
990 }
991 old_parent = sd->s_parent->s_dentry;
992
993 new_parent = sysfs_get_dentry(new_parent_sd);
994 if (IS_ERR(new_parent)) {
995 error = PTR_ERR(new_parent);
996 goto out_dput;
997 }
460 998
461 if (old_parent_dentry->d_inode == new_parent_dentry->d_inode) 999 if (old_parent->d_inode == new_parent->d_inode) {
462 return 0; /* nothing to move */ 1000 error = 0;
1001 goto out_dput; /* nothing to move */
1002 }
463again: 1003again:
464 mutex_lock(&old_parent_dentry->d_inode->i_mutex); 1004 mutex_lock(&old_parent->d_inode->i_mutex);
465 if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) { 1005 if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
466 mutex_unlock(&old_parent_dentry->d_inode->i_mutex); 1006 mutex_unlock(&old_parent->d_inode->i_mutex);
467 goto again; 1007 goto again;
468 } 1008 }
469 1009
470 new_parent_sd = new_parent_dentry->d_fsdata; 1010 new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
471 sd = kobj->dentry->d_fsdata;
472
473 new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
474 strlen(kobj->name));
475 if (IS_ERR(new_dentry)) { 1011 if (IS_ERR(new_dentry)) {
476 error = PTR_ERR(new_dentry); 1012 error = PTR_ERR(new_dentry);
477 goto out; 1013 goto out_unlock;
478 } else 1014 } else
479 error = 0; 1015 error = 0;
480 d_add(new_dentry, NULL); 1016 d_add(new_dentry, NULL);
481 d_move(kobj->dentry, new_dentry); 1017 d_move(sd->s_dentry, new_dentry);
482 dput(new_dentry); 1018 dput(new_dentry);
483 1019
484 /* Remove from old parent's list and insert into new parent's list. */ 1020 /* Remove from old parent's list and insert into new parent's list. */
485 list_del_init(&sd->s_sibling); 1021 mutex_lock(&sysfs_mutex);
486 list_add(&sd->s_sibling, &new_parent_sd->s_children); 1022
1023 sysfs_unlink_sibling(sd);
1024 sysfs_get(new_parent_sd);
1025 sysfs_put(sd->s_parent);
1026 sd->s_parent = new_parent_sd;
1027 sysfs_link_sibling(sd);
487 1028
488out: 1029 mutex_unlock(&sysfs_mutex);
489 mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
490 mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
491 1030
1031 out_unlock:
1032 mutex_unlock(&new_parent->d_inode->i_mutex);
1033 mutex_unlock(&old_parent->d_inode->i_mutex);
1034 out_dput:
1035 dput(new_parent);
1036 dput(old_dentry);
1037 dput(new_dentry);
492 return error; 1038 return error;
493} 1039}
494 1040
@@ -496,23 +1042,27 @@ static int sysfs_dir_open(struct inode *inode, struct file *file)
496{ 1042{
497 struct dentry * dentry = file->f_path.dentry; 1043 struct dentry * dentry = file->f_path.dentry;
498 struct sysfs_dirent * parent_sd = dentry->d_fsdata; 1044 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1045 struct sysfs_dirent * sd;
499 1046
500 mutex_lock(&dentry->d_inode->i_mutex); 1047 sd = sysfs_new_dirent("_DIR_", 0, 0);
501 file->private_data = sysfs_new_dirent(parent_sd, NULL); 1048 if (sd) {
502 mutex_unlock(&dentry->d_inode->i_mutex); 1049 mutex_lock(&sysfs_mutex);
503 1050 sd->s_parent = sysfs_get(parent_sd);
504 return file->private_data ? 0 : -ENOMEM; 1051 sysfs_link_sibling(sd);
1052 mutex_unlock(&sysfs_mutex);
1053 }
505 1054
1055 file->private_data = sd;
1056 return sd ? 0 : -ENOMEM;
506} 1057}
507 1058
508static int sysfs_dir_close(struct inode *inode, struct file *file) 1059static int sysfs_dir_close(struct inode *inode, struct file *file)
509{ 1060{
510 struct dentry * dentry = file->f_path.dentry;
511 struct sysfs_dirent * cursor = file->private_data; 1061 struct sysfs_dirent * cursor = file->private_data;
512 1062
513 mutex_lock(&dentry->d_inode->i_mutex); 1063 mutex_lock(&sysfs_mutex);
514 list_del_init(&cursor->s_sibling); 1064 sysfs_unlink_sibling(cursor);
515 mutex_unlock(&dentry->d_inode->i_mutex); 1065 mutex_unlock(&sysfs_mutex);
516 1066
517 release_sysfs_dirent(cursor); 1067 release_sysfs_dirent(cursor);
518 1068
@@ -530,7 +1080,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
530 struct dentry *dentry = filp->f_path.dentry; 1080 struct dentry *dentry = filp->f_path.dentry;
531 struct sysfs_dirent * parent_sd = dentry->d_fsdata; 1081 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
532 struct sysfs_dirent *cursor = filp->private_data; 1082 struct sysfs_dirent *cursor = filp->private_data;
533 struct list_head *p, *q = &cursor->s_sibling; 1083 struct sysfs_dirent **pos;
534 ino_t ino; 1084 ino_t ino;
535 int i = filp->f_pos; 1085 int i = filp->f_pos;
536 1086
@@ -543,38 +1093,52 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
543 i++; 1093 i++;
544 /* fallthrough */ 1094 /* fallthrough */
545 case 1: 1095 case 1:
546 ino = parent_ino(dentry); 1096 if (parent_sd->s_parent)
1097 ino = parent_sd->s_parent->s_ino;
1098 else
1099 ino = parent_sd->s_ino;
547 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 1100 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
548 break; 1101 break;
549 filp->f_pos++; 1102 filp->f_pos++;
550 i++; 1103 i++;
551 /* fallthrough */ 1104 /* fallthrough */
552 default: 1105 default:
1106 mutex_lock(&sysfs_mutex);
1107
1108 pos = &parent_sd->s_children;
1109 while (*pos != cursor)
1110 pos = &(*pos)->s_sibling;
1111
1112 /* unlink cursor */
1113 *pos = cursor->s_sibling;
1114
553 if (filp->f_pos == 2) 1115 if (filp->f_pos == 2)
554 list_move(q, &parent_sd->s_children); 1116 pos = &parent_sd->s_children;
555 1117
556 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 1118 for ( ; *pos; pos = &(*pos)->s_sibling) {
557 struct sysfs_dirent *next; 1119 struct sysfs_dirent *next = *pos;
558 const char * name; 1120 const char * name;
559 int len; 1121 int len;
560 1122
561 next = list_entry(p, struct sysfs_dirent, 1123 if (!sysfs_type(next))
562 s_sibling);
563 if (!next->s_element)
564 continue; 1124 continue;
565 1125
566 name = sysfs_get_name(next); 1126 name = next->s_name;
567 len = strlen(name); 1127 len = strlen(name);
568 ino = next->s_ino; 1128 ino = next->s_ino;
569 1129
570 if (filldir(dirent, name, len, filp->f_pos, ino, 1130 if (filldir(dirent, name, len, filp->f_pos, ino,
571 dt_type(next)) < 0) 1131 dt_type(next)) < 0)
572 return 0; 1132 break;
573 1133
574 list_move(q, p);
575 p = q;
576 filp->f_pos++; 1134 filp->f_pos++;
577 } 1135 }
1136
1137 /* put cursor back in */
1138 cursor->s_sibling = *pos;
1139 *pos = cursor;
1140
1141 mutex_unlock(&sysfs_mutex);
578 } 1142 }
579 return 0; 1143 return 0;
580} 1144}
@@ -583,7 +1147,6 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
583{ 1147{
584 struct dentry * dentry = file->f_path.dentry; 1148 struct dentry * dentry = file->f_path.dentry;
585 1149
586 mutex_lock(&dentry->d_inode->i_mutex);
587 switch (origin) { 1150 switch (origin) {
588 case 1: 1151 case 1:
589 offset += file->f_pos; 1152 offset += file->f_pos;
@@ -591,31 +1154,35 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
591 if (offset >= 0) 1154 if (offset >= 0)
592 break; 1155 break;
593 default: 1156 default:
594 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
595 return -EINVAL; 1157 return -EINVAL;
596 } 1158 }
597 if (offset != file->f_pos) { 1159 if (offset != file->f_pos) {
1160 mutex_lock(&sysfs_mutex);
1161
598 file->f_pos = offset; 1162 file->f_pos = offset;
599 if (file->f_pos >= 2) { 1163 if (file->f_pos >= 2) {
600 struct sysfs_dirent *sd = dentry->d_fsdata; 1164 struct sysfs_dirent *sd = dentry->d_fsdata;
601 struct sysfs_dirent *cursor = file->private_data; 1165 struct sysfs_dirent *cursor = file->private_data;
602 struct list_head *p; 1166 struct sysfs_dirent **pos;
603 loff_t n = file->f_pos - 2; 1167 loff_t n = file->f_pos - 2;
604 1168
605 list_del(&cursor->s_sibling); 1169 sysfs_unlink_sibling(cursor);
606 p = sd->s_children.next; 1170
607 while (n && p != &sd->s_children) { 1171 pos = &sd->s_children;
608 struct sysfs_dirent *next; 1172 while (n && *pos) {
609 next = list_entry(p, struct sysfs_dirent, 1173 struct sysfs_dirent *next = *pos;
610 s_sibling); 1174 if (sysfs_type(next))
611 if (next->s_element)
612 n--; 1175 n--;
613 p = p->next; 1176 pos = &(*pos)->s_sibling;
614 } 1177 }
615 list_add_tail(&cursor->s_sibling, p); 1178
1179 cursor->s_sibling = *pos;
1180 *pos = cursor;
616 } 1181 }
1182
1183 mutex_unlock(&sysfs_mutex);
617 } 1184 }
618 mutex_unlock(&dentry->d_inode->i_mutex); 1185
619 return offset; 1186 return offset;
620} 1187}
621 1188
@@ -628,12 +1195,20 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
628int sysfs_make_shadowed_dir(struct kobject *kobj, 1195int sysfs_make_shadowed_dir(struct kobject *kobj,
629 void * (*follow_link)(struct dentry *, struct nameidata *)) 1196 void * (*follow_link)(struct dentry *, struct nameidata *))
630{ 1197{
1198 struct dentry *dentry;
631 struct inode *inode; 1199 struct inode *inode;
632 struct inode_operations *i_op; 1200 struct inode_operations *i_op;
633 1201
634 inode = kobj->dentry->d_inode; 1202 /* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
635 if (inode->i_op != &sysfs_dir_inode_operations) 1203 dentry = sysfs_get_dentry(kobj->sd);
1204 if (IS_ERR(dentry))
1205 return PTR_ERR(dentry);
1206
1207 inode = dentry->d_inode;
1208 if (inode->i_op != &sysfs_dir_inode_operations) {
1209 dput(dentry);
636 return -EINVAL; 1210 return -EINVAL;
1211 }
637 1212
638 i_op = kmalloc(sizeof(*i_op), GFP_KERNEL); 1213 i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
639 if (!i_op) 1214 if (!i_op)
@@ -658,54 +1233,72 @@ int sysfs_make_shadowed_dir(struct kobject *kobj,
658 * directory. 1233 * directory.
659 */ 1234 */
660 1235
661struct dentry *sysfs_create_shadow_dir(struct kobject *kobj) 1236struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
662{ 1237{
663 struct sysfs_dirent *sd; 1238 struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
664 struct dentry *parent, *dir, *shadow; 1239 struct dentry *dir, *parent, *shadow;
665 struct inode *inode; 1240 struct inode *inode;
1241 struct sysfs_dirent *sd;
1242 struct sysfs_addrm_cxt acxt;
666 1243
667 dir = kobj->dentry; 1244 dir = sysfs_get_dentry(kobj->sd);
668 inode = dir->d_inode; 1245 if (IS_ERR(dir)) {
1246 sd = (void *)dir;
1247 goto out;
1248 }
669 parent = dir->d_parent; 1249 parent = dir->d_parent;
670 shadow = ERR_PTR(-EINVAL); 1250
1251 inode = dir->d_inode;
1252 sd = ERR_PTR(-EINVAL);
671 if (!sysfs_is_shadowed_inode(inode)) 1253 if (!sysfs_is_shadowed_inode(inode))
672 goto out; 1254 goto out_dput;
673 1255
674 shadow = d_alloc(parent, &dir->d_name); 1256 shadow = d_alloc(parent, &dir->d_name);
675 if (!shadow) 1257 if (!shadow)
676 goto nomem; 1258 goto nomem;
677 1259
678 sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR); 1260 sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
679 if (!sd) 1261 if (!sd)
680 goto nomem; 1262 goto nomem;
1263 sd->s_elem.dir.kobj = kobj;
681 1264
1265 sysfs_addrm_start(&acxt, parent_sd);
1266
1267 /* add but don't link into children list */
1268 sysfs_add_one(&acxt, sd);
1269
1270 /* attach and instantiate dentry */
1271 sysfs_attach_dentry(sd, shadow);
682 d_instantiate(shadow, igrab(inode)); 1272 d_instantiate(shadow, igrab(inode));
683 inc_nlink(inode); 1273 inc_nlink(inode); /* tj: synchronization? */
684 inc_nlink(parent->d_inode); 1274
685 shadow->d_op = &sysfs_dentry_ops; 1275 sysfs_addrm_finish(&acxt);
686 1276
687 dget(shadow); /* Extra count - pin the dentry in core */ 1277 dget(shadow); /* Extra count - pin the dentry in core */
688 1278
689out: 1279 goto out_dput;
690 return shadow; 1280
691nomem: 1281 nomem:
692 dput(shadow); 1282 dput(shadow);
693 shadow = ERR_PTR(-ENOMEM); 1283 sd = ERR_PTR(-ENOMEM);
694 goto out; 1284 out_dput:
1285 dput(dir);
1286 out:
1287 return sd;
695} 1288}
696 1289
697/** 1290/**
698 * sysfs_remove_shadow_dir - remove an object's directory. 1291 * sysfs_remove_shadow_dir - remove an object's directory.
699 * @shadow: dentry of shadow directory 1292 * @shadow_sd: sysfs_dirent of shadow directory
700 * 1293 *
701 * The only thing special about this is that we remove any files in 1294 * The only thing special about this is that we remove any files in
702 * the directory before we remove the directory, and we've inlined 1295 * the directory before we remove the directory, and we've inlined
703 * what used to be sysfs_rmdir() below, instead of calling separately. 1296 * what used to be sysfs_rmdir() below, instead of calling separately.
704 */ 1297 */
705 1298
706void sysfs_remove_shadow_dir(struct dentry *shadow) 1299void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
707{ 1300{
708 __sysfs_remove_dir(shadow); 1301 __sysfs_remove_dir(shadow_sd);
709} 1302}
710 1303
711const struct file_operations sysfs_dir_operations = { 1304const struct file_operations sysfs_dir_operations = {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b502c7197ec..cc497994b2a 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -50,29 +50,15 @@ static struct sysfs_ops subsys_sysfs_ops = {
50 .store = subsys_attr_store, 50 .store = subsys_attr_store,
51}; 51};
52 52
53/** 53struct sysfs_buffer {
54 * add_to_collection - add buffer to a collection 54 size_t count;
55 * @buffer: buffer to be added 55 loff_t pos;
56 * @node: inode of set to add to 56 char * page;
57 */ 57 struct sysfs_ops * ops;
58 58 struct semaphore sem;
59static inline void 59 int needs_read_fill;
60add_to_collection(struct sysfs_buffer *buffer, struct inode *node) 60 int event;
61{ 61};
62 struct sysfs_buffer_collection *set = node->i_private;
63
64 mutex_lock(&node->i_mutex);
65 list_add(&buffer->associates, &set->associates);
66 mutex_unlock(&node->i_mutex);
67}
68
69static inline void
70remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
71{
72 mutex_lock(&node->i_mutex);
73 list_del(&buffer->associates);
74 mutex_unlock(&node->i_mutex);
75}
76 62
77/** 63/**
78 * fill_read_buffer - allocate and fill buffer from object. 64 * fill_read_buffer - allocate and fill buffer from object.
@@ -87,9 +73,8 @@ remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
87 */ 73 */
88static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) 74static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
89{ 75{
90 struct sysfs_dirent * sd = dentry->d_fsdata; 76 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
91 struct attribute * attr = to_attr(dentry); 77 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
92 struct kobject * kobj = to_kobj(dentry->d_parent);
93 struct sysfs_ops * ops = buffer->ops; 78 struct sysfs_ops * ops = buffer->ops;
94 int ret = 0; 79 int ret = 0;
95 ssize_t count; 80 ssize_t count;
@@ -99,8 +84,15 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
99 if (!buffer->page) 84 if (!buffer->page)
100 return -ENOMEM; 85 return -ENOMEM;
101 86
102 buffer->event = atomic_read(&sd->s_event); 87 /* need attr_sd for attr and ops, its parent for kobj */
103 count = ops->show(kobj,attr,buffer->page); 88 if (!sysfs_get_active_two(attr_sd))
89 return -ENODEV;
90
91 buffer->event = atomic_read(&attr_sd->s_event);
92 count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page);
93
94 sysfs_put_active_two(attr_sd);
95
104 BUG_ON(count > (ssize_t)PAGE_SIZE); 96 BUG_ON(count > (ssize_t)PAGE_SIZE);
105 if (count >= 0) { 97 if (count >= 0) {
106 buffer->needs_read_fill = 0; 98 buffer->needs_read_fill = 0;
@@ -138,10 +130,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
138 130
139 down(&buffer->sem); 131 down(&buffer->sem);
140 if (buffer->needs_read_fill) { 132 if (buffer->needs_read_fill) {
141 if (buffer->orphaned) 133 retval = fill_read_buffer(file->f_path.dentry,buffer);
142 retval = -ENODEV;
143 else
144 retval = fill_read_buffer(file->f_path.dentry,buffer);
145 if (retval) 134 if (retval)
146 goto out; 135 goto out;
147 } 136 }
@@ -196,14 +185,23 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
196 * passing the buffer that we acquired in fill_write_buffer(). 185 * passing the buffer that we acquired in fill_write_buffer().
197 */ 186 */
198 187
199static int 188static int
200flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count) 189flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
201{ 190{
202 struct attribute * attr = to_attr(dentry); 191 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
203 struct kobject * kobj = to_kobj(dentry->d_parent); 192 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
204 struct sysfs_ops * ops = buffer->ops; 193 struct sysfs_ops * ops = buffer->ops;
194 int rc;
195
196 /* need attr_sd for attr and ops, its parent for kobj */
197 if (!sysfs_get_active_two(attr_sd))
198 return -ENODEV;
199
200 rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
205 201
206 return ops->store(kobj,attr,buffer->page,count); 202 sysfs_put_active_two(attr_sd);
203
204 return rc;
207} 205}
208 206
209 207
@@ -231,37 +229,26 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
231 ssize_t len; 229 ssize_t len;
232 230
233 down(&buffer->sem); 231 down(&buffer->sem);
234 if (buffer->orphaned) {
235 len = -ENODEV;
236 goto out;
237 }
238 len = fill_write_buffer(buffer, buf, count); 232 len = fill_write_buffer(buffer, buf, count);
239 if (len > 0) 233 if (len > 0)
240 len = flush_write_buffer(file->f_path.dentry, buffer, len); 234 len = flush_write_buffer(file->f_path.dentry, buffer, len);
241 if (len > 0) 235 if (len > 0)
242 *ppos += len; 236 *ppos += len;
243out:
244 up(&buffer->sem); 237 up(&buffer->sem);
245 return len; 238 return len;
246} 239}
247 240
248static int sysfs_open_file(struct inode *inode, struct file *file) 241static int sysfs_open_file(struct inode *inode, struct file *file)
249{ 242{
250 struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); 243 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
251 struct attribute * attr = to_attr(file->f_path.dentry); 244 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
252 struct sysfs_buffer_collection *set;
253 struct sysfs_buffer * buffer; 245 struct sysfs_buffer * buffer;
254 struct sysfs_ops * ops = NULL; 246 struct sysfs_ops * ops = NULL;
255 int error = 0; 247 int error;
256
257 if (!kobj || !attr)
258 goto Einval;
259 248
260 /* Grab the module reference for this attribute if we have one */ 249 /* need attr_sd for attr and ops, its parent for kobj */
261 if (!try_module_get(attr->owner)) { 250 if (!sysfs_get_active_two(attr_sd))
262 error = -ENODEV; 251 return -ENODEV;
263 goto Done;
264 }
265 252
266 /* if the kobject has no ktype, then we assume that it is a subsystem 253 /* if the kobject has no ktype, then we assume that it is a subsystem
267 * itself, and use ops for it. 254 * itself, and use ops for it.
@@ -273,33 +260,21 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
273 else 260 else
274 ops = &subsys_sysfs_ops; 261 ops = &subsys_sysfs_ops;
275 262
263 error = -EACCES;
264
276 /* No sysfs operations, either from having no subsystem, 265 /* No sysfs operations, either from having no subsystem,
277 * or the subsystem have no operations. 266 * or the subsystem have no operations.
278 */ 267 */
279 if (!ops) 268 if (!ops)
280 goto Eaccess; 269 goto err_out;
281
282 /* make sure we have a collection to add our buffers to */
283 mutex_lock(&inode->i_mutex);
284 if (!(set = inode->i_private)) {
285 if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
286 error = -ENOMEM;
287 goto Done;
288 } else {
289 INIT_LIST_HEAD(&set->associates);
290 }
291 }
292 mutex_unlock(&inode->i_mutex);
293 270
294 /* File needs write support. 271 /* File needs write support.
295 * The inode's perms must say it's ok, 272 * The inode's perms must say it's ok,
296 * and we must have a store method. 273 * and we must have a store method.
297 */ 274 */
298 if (file->f_mode & FMODE_WRITE) { 275 if (file->f_mode & FMODE_WRITE) {
299
300 if (!(inode->i_mode & S_IWUGO) || !ops->store) 276 if (!(inode->i_mode & S_IWUGO) || !ops->store)
301 goto Eaccess; 277 goto err_out;
302
303 } 278 }
304 279
305 /* File needs read support. 280 /* File needs read support.
@@ -308,48 +283,38 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
308 */ 283 */
309 if (file->f_mode & FMODE_READ) { 284 if (file->f_mode & FMODE_READ) {
310 if (!(inode->i_mode & S_IRUGO) || !ops->show) 285 if (!(inode->i_mode & S_IRUGO) || !ops->show)
311 goto Eaccess; 286 goto err_out;
312 } 287 }
313 288
314 /* No error? Great, allocate a buffer for the file, and store it 289 /* No error? Great, allocate a buffer for the file, and store it
315 * it in file->private_data for easy access. 290 * it in file->private_data for easy access.
316 */ 291 */
292 error = -ENOMEM;
317 buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); 293 buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
318 if (buffer) { 294 if (!buffer)
319 INIT_LIST_HEAD(&buffer->associates); 295 goto err_out;
320 init_MUTEX(&buffer->sem); 296
321 buffer->needs_read_fill = 1; 297 init_MUTEX(&buffer->sem);
322 buffer->ops = ops; 298 buffer->needs_read_fill = 1;
323 add_to_collection(buffer, inode); 299 buffer->ops = ops;
324 file->private_data = buffer; 300 file->private_data = buffer;
325 } else 301
326 error = -ENOMEM; 302 /* open succeeded, put active references and pin attr_sd */
327 goto Done; 303 sysfs_put_active_two(attr_sd);
328 304 sysfs_get(attr_sd);
329 Einval: 305 return 0;
330 error = -EINVAL; 306
331 goto Done; 307 err_out:
332 Eaccess: 308 sysfs_put_active_two(attr_sd);
333 error = -EACCES;
334 module_put(attr->owner);
335 Done:
336 if (error)
337 kobject_put(kobj);
338 return error; 309 return error;
339} 310}
340 311
341static int sysfs_release(struct inode * inode, struct file * filp) 312static int sysfs_release(struct inode * inode, struct file * filp)
342{ 313{
343 struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); 314 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
344 struct attribute * attr = to_attr(filp->f_path.dentry); 315 struct sysfs_buffer *buffer = filp->private_data;
345 struct module * owner = attr->owner;
346 struct sysfs_buffer * buffer = filp->private_data;
347 316
348 if (buffer) 317 sysfs_put(attr_sd);
349 remove_from_collection(buffer, inode);
350 kobject_put(kobj);
351 /* After this point, attr should not be accessed. */
352 module_put(owner);
353 318
354 if (buffer) { 319 if (buffer) {
355 if (buffer->page) 320 if (buffer->page)
@@ -376,57 +341,43 @@ static int sysfs_release(struct inode * inode, struct file * filp)
376static unsigned int sysfs_poll(struct file *filp, poll_table *wait) 341static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
377{ 342{
378 struct sysfs_buffer * buffer = filp->private_data; 343 struct sysfs_buffer * buffer = filp->private_data;
379 struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); 344 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
380 struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata; 345 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
381 int res = 0; 346
347 /* need parent for the kobj, grab both */
348 if (!sysfs_get_active_two(attr_sd))
349 goto trigger;
382 350
383 poll_wait(filp, &kobj->poll, wait); 351 poll_wait(filp, &kobj->poll, wait);
384 352
385 if (buffer->event != atomic_read(&sd->s_event)) { 353 sysfs_put_active_two(attr_sd);
386 res = POLLERR|POLLPRI;
387 buffer->needs_read_fill = 1;
388 }
389 354
390 return res; 355 if (buffer->event != atomic_read(&attr_sd->s_event))
391} 356 goto trigger;
392 357
358 return 0;
393 359
394static struct dentry *step_down(struct dentry *dir, const char * name) 360 trigger:
395{ 361 buffer->needs_read_fill = 1;
396 struct dentry * de; 362 return POLLERR|POLLPRI;
397
398 if (dir == NULL || dir->d_inode == NULL)
399 return NULL;
400
401 mutex_lock(&dir->d_inode->i_mutex);
402 de = lookup_one_len(name, dir, strlen(name));
403 mutex_unlock(&dir->d_inode->i_mutex);
404 dput(dir);
405 if (IS_ERR(de))
406 return NULL;
407 if (de->d_inode == NULL) {
408 dput(de);
409 return NULL;
410 }
411 return de;
412} 363}
413 364
414void sysfs_notify(struct kobject * k, char *dir, char *attr) 365void sysfs_notify(struct kobject *k, char *dir, char *attr)
415{ 366{
416 struct dentry *de = k->dentry; 367 struct sysfs_dirent *sd = k->sd;
417 if (de) 368
418 dget(de); 369 mutex_lock(&sysfs_mutex);
419 if (de && dir) 370
420 de = step_down(de, dir); 371 if (sd && dir)
421 if (de && attr) 372 sd = sysfs_find_dirent(sd, dir);
422 de = step_down(de, attr); 373 if (sd && attr)
423 if (de) { 374 sd = sysfs_find_dirent(sd, attr);
424 struct sysfs_dirent * sd = de->d_fsdata; 375 if (sd) {
425 if (sd) 376 atomic_inc(&sd->s_event);
426 atomic_inc(&sd->s_event);
427 wake_up_interruptible(&k->poll); 377 wake_up_interruptible(&k->poll);
428 dput(de);
429 } 378 }
379
380 mutex_unlock(&sysfs_mutex);
430} 381}
431EXPORT_SYMBOL_GPL(sysfs_notify); 382EXPORT_SYMBOL_GPL(sysfs_notify);
432 383
@@ -440,19 +391,30 @@ const struct file_operations sysfs_file_operations = {
440}; 391};
441 392
442 393
443int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type) 394int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
395 int type)
444{ 396{
445 struct sysfs_dirent * parent_sd = dir->d_fsdata;
446 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; 397 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
447 int error = -EEXIST; 398 struct sysfs_addrm_cxt acxt;
399 struct sysfs_dirent *sd;
448 400
449 mutex_lock(&dir->d_inode->i_mutex); 401 sd = sysfs_new_dirent(attr->name, mode, type);
450 if (!sysfs_dirent_exist(parent_sd, attr->name)) 402 if (!sd)
451 error = sysfs_make_dirent(parent_sd, NULL, (void *)attr, 403 return -ENOMEM;
452 mode, type); 404 sd->s_elem.attr.attr = (void *)attr;
453 mutex_unlock(&dir->d_inode->i_mutex);
454 405
455 return error; 406 sysfs_addrm_start(&acxt, dir_sd);
407
408 if (!sysfs_find_dirent(dir_sd, attr->name)) {
409 sysfs_add_one(&acxt, sd);
410 sysfs_link_sibling(sd);
411 }
412
413 if (sysfs_addrm_finish(&acxt))
414 return 0;
415
416 sysfs_put(sd);
417 return -EEXIST;
456} 418}
457 419
458 420
@@ -464,9 +426,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
464 426
465int sysfs_create_file(struct kobject * kobj, const struct attribute * attr) 427int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
466{ 428{
467 BUG_ON(!kobj || !kobj->dentry || !attr); 429 BUG_ON(!kobj || !kobj->sd || !attr);
468 430
469 return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR); 431 return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
470 432
471} 433}
472 434
@@ -480,16 +442,16 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
480int sysfs_add_file_to_group(struct kobject *kobj, 442int sysfs_add_file_to_group(struct kobject *kobj,
481 const struct attribute *attr, const char *group) 443 const struct attribute *attr, const char *group)
482{ 444{
483 struct dentry *dir; 445 struct sysfs_dirent *dir_sd;
484 int error; 446 int error;
485 447
486 dir = lookup_one_len(group, kobj->dentry, strlen(group)); 448 dir_sd = sysfs_get_dirent(kobj->sd, group);
487 if (IS_ERR(dir)) 449 if (!dir_sd)
488 error = PTR_ERR(dir); 450 return -ENOENT;
489 else { 451
490 error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR); 452 error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR);
491 dput(dir); 453 sysfs_put(dir_sd);
492 } 454
493 return error; 455 return error;
494} 456}
495EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); 457EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
@@ -502,30 +464,31 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
502 */ 464 */
503int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) 465int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
504{ 466{
505 struct dentry * dir = kobj->dentry; 467 struct sysfs_dirent *victim_sd = NULL;
506 struct dentry * victim; 468 struct dentry *victim = NULL;
507 int res = -ENOENT; 469 int rc;
508 470
509 mutex_lock(&dir->d_inode->i_mutex); 471 rc = -ENOENT;
510 victim = lookup_one_len(attr->name, dir, strlen(attr->name)); 472 victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
511 if (!IS_ERR(victim)) { 473 if (!victim_sd)
512 /* make sure dentry is really there */ 474 goto out;
513 if (victim->d_inode && 475
514 (victim->d_parent->d_inode == dir->d_inode)) { 476 victim = sysfs_get_dentry(victim_sd);
515 victim->d_inode->i_mtime = CURRENT_TIME; 477 if (IS_ERR(victim)) {
516 fsnotify_modify(victim); 478 rc = PTR_ERR(victim);
517 res = 0; 479 victim = NULL;
518 } else 480 goto out;
519 d_drop(victim);
520
521 /**
522 * Drop the reference acquired from lookup_one_len() above.
523 */
524 dput(victim);
525 } 481 }
526 mutex_unlock(&dir->d_inode->i_mutex);
527 482
528 return res; 483 mutex_lock(&victim->d_inode->i_mutex);
484 victim->d_inode->i_mtime = CURRENT_TIME;
485 fsnotify_modify(victim);
486 mutex_unlock(&victim->d_inode->i_mutex);
487 rc = 0;
488 out:
489 dput(victim);
490 sysfs_put(victim_sd);
491 return rc;
529} 492}
530 493
531 494
@@ -538,30 +501,34 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
538 */ 501 */
539int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) 502int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
540{ 503{
541 struct dentry *dir = kobj->dentry; 504 struct sysfs_dirent *victim_sd = NULL;
542 struct dentry *victim; 505 struct dentry *victim = NULL;
543 struct inode * inode; 506 struct inode * inode;
544 struct iattr newattrs; 507 struct iattr newattrs;
545 int res = -ENOENT; 508 int rc;
546 509
547 mutex_lock(&dir->d_inode->i_mutex); 510 rc = -ENOENT;
548 victim = lookup_one_len(attr->name, dir, strlen(attr->name)); 511 victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
549 if (!IS_ERR(victim)) { 512 if (!victim_sd)
550 if (victim->d_inode && 513 goto out;
551 (victim->d_parent->d_inode == dir->d_inode)) { 514
552 inode = victim->d_inode; 515 victim = sysfs_get_dentry(victim_sd);
553 mutex_lock(&inode->i_mutex); 516 if (IS_ERR(victim)) {
554 newattrs.ia_mode = (mode & S_IALLUGO) | 517 rc = PTR_ERR(victim);
555 (inode->i_mode & ~S_IALLUGO); 518 victim = NULL;
556 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 519 goto out;
557 res = notify_change(victim, &newattrs);
558 mutex_unlock(&inode->i_mutex);
559 }
560 dput(victim);
561 } 520 }
562 mutex_unlock(&dir->d_inode->i_mutex);
563 521
564 return res; 522 inode = victim->d_inode;
523 mutex_lock(&inode->i_mutex);
524 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
525 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
526 rc = notify_change(victim, &newattrs);
527 mutex_unlock(&inode->i_mutex);
528 out:
529 dput(victim);
530 sysfs_put(victim_sd);
531 return rc;
565} 532}
566EXPORT_SYMBOL_GPL(sysfs_chmod_file); 533EXPORT_SYMBOL_GPL(sysfs_chmod_file);
567 534
@@ -576,7 +543,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
576 543
577void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) 544void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
578{ 545{
579 sysfs_hash_and_remove(kobj->dentry, attr->name); 546 sysfs_hash_and_remove(kobj->sd, attr->name);
580} 547}
581 548
582 549
@@ -589,12 +556,12 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
589void sysfs_remove_file_from_group(struct kobject *kobj, 556void sysfs_remove_file_from_group(struct kobject *kobj,
590 const struct attribute *attr, const char *group) 557 const struct attribute *attr, const char *group)
591{ 558{
592 struct dentry *dir; 559 struct sysfs_dirent *dir_sd;
593 560
594 dir = lookup_one_len(group, kobj->dentry, strlen(group)); 561 dir_sd = sysfs_get_dirent(kobj->sd, group);
595 if (!IS_ERR(dir)) { 562 if (dir_sd) {
596 sysfs_hash_and_remove(dir, attr->name); 563 sysfs_hash_and_remove(dir_sd, attr->name);
597 dput(dir); 564 sysfs_put(dir_sd);
598 } 565 }
599} 566}
600EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); 567EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 52eed2a7a5e..f318b73c790 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -18,26 +18,25 @@
18#include "sysfs.h" 18#include "sysfs.h"
19 19
20 20
21static void remove_files(struct dentry * dir, 21static void remove_files(struct sysfs_dirent *dir_sd,
22 const struct attribute_group * grp) 22 const struct attribute_group *grp)
23{ 23{
24 struct attribute *const* attr; 24 struct attribute *const* attr;
25 25
26 for (attr = grp->attrs; *attr; attr++) 26 for (attr = grp->attrs; *attr; attr++)
27 sysfs_hash_and_remove(dir,(*attr)->name); 27 sysfs_hash_and_remove(dir_sd, (*attr)->name);
28} 28}
29 29
30static int create_files(struct dentry * dir, 30static int create_files(struct sysfs_dirent *dir_sd,
31 const struct attribute_group * grp) 31 const struct attribute_group *grp)
32{ 32{
33 struct attribute *const* attr; 33 struct attribute *const* attr;
34 int error = 0; 34 int error = 0;
35 35
36 for (attr = grp->attrs; *attr && !error; attr++) { 36 for (attr = grp->attrs; *attr && !error; attr++)
37 error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR); 37 error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
38 }
39 if (error) 38 if (error)
40 remove_files(dir,grp); 39 remove_files(dir_sd, grp);
41 return error; 40 return error;
42} 41}
43 42
@@ -45,44 +44,44 @@ static int create_files(struct dentry * dir,
45int sysfs_create_group(struct kobject * kobj, 44int sysfs_create_group(struct kobject * kobj,
46 const struct attribute_group * grp) 45 const struct attribute_group * grp)
47{ 46{
48 struct dentry * dir; 47 struct sysfs_dirent *sd;
49 int error; 48 int error;
50 49
51 BUG_ON(!kobj || !kobj->dentry); 50 BUG_ON(!kobj || !kobj->sd);
52 51
53 if (grp->name) { 52 if (grp->name) {
54 error = sysfs_create_subdir(kobj,grp->name,&dir); 53 error = sysfs_create_subdir(kobj, grp->name, &sd);
55 if (error) 54 if (error)
56 return error; 55 return error;
57 } else 56 } else
58 dir = kobj->dentry; 57 sd = kobj->sd;
59 dir = dget(dir); 58 sysfs_get(sd);
60 if ((error = create_files(dir,grp))) { 59 error = create_files(sd, grp);
60 if (error) {
61 if (grp->name) 61 if (grp->name)
62 sysfs_remove_subdir(dir); 62 sysfs_remove_subdir(sd);
63 } 63 }
64 dput(dir); 64 sysfs_put(sd);
65 return error; 65 return error;
66} 66}
67 67
68void sysfs_remove_group(struct kobject * kobj, 68void sysfs_remove_group(struct kobject * kobj,
69 const struct attribute_group * grp) 69 const struct attribute_group * grp)
70{ 70{
71 struct dentry * dir; 71 struct sysfs_dirent *dir_sd = kobj->sd;
72 struct sysfs_dirent *sd;
72 73
73 if (grp->name) { 74 if (grp->name) {
74 dir = lookup_one_len_kern(grp->name, kobj->dentry, 75 sd = sysfs_get_dirent(dir_sd, grp->name);
75 strlen(grp->name)); 76 BUG_ON(!sd);
76 BUG_ON(IS_ERR(dir)); 77 } else
77 } 78 sd = sysfs_get(dir_sd);
78 else
79 dir = dget(kobj->dentry);
80 79
81 remove_files(dir,grp); 80 remove_files(sd, grp);
82 if (grp->name) 81 if (grp->name)
83 sysfs_remove_subdir(dir); 82 sysfs_remove_subdir(sd);
84 /* release the ref. taken in this routine */ 83
85 dput(dir); 84 sysfs_put(sd);
86} 85}
87 86
88 87
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 5266eec15f6..3756e152285 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -133,187 +133,94 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
133 */ 133 */
134static struct lock_class_key sysfs_inode_imutex_key; 134static struct lock_class_key sysfs_inode_imutex_key;
135 135
136struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) 136void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
137{ 137{
138 struct inode * inode = new_inode(sysfs_sb); 138 inode->i_blocks = 0;
139 if (inode) { 139 inode->i_mapping->a_ops = &sysfs_aops;
140 inode->i_blocks = 0; 140 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
141 inode->i_mapping->a_ops = &sysfs_aops; 141 inode->i_op = &sysfs_inode_operations;
142 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 142 inode->i_ino = sd->s_ino;
143 inode->i_op = &sysfs_inode_operations; 143 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
144 inode->i_ino = sd->s_ino; 144
145 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); 145 if (sd->s_iattr) {
146 146 /* sysfs_dirent has non-default attributes
147 if (sd->s_iattr) { 147 * get them for the new inode from persistent copy
148 /* sysfs_dirent has non-default attributes 148 * in sysfs_dirent
149 * get them for the new inode from persistent copy 149 */
150 * in sysfs_dirent 150 set_inode_attr(inode, sd->s_iattr);
151 */
152 set_inode_attr(inode, sd->s_iattr);
153 } else
154 set_default_inode_attr(inode, mode);
155 }
156 return inode;
157}
158
159int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
160{
161 int error = 0;
162 struct inode * inode = NULL;
163 if (dentry) {
164 if (!dentry->d_inode) {
165 struct sysfs_dirent * sd = dentry->d_fsdata;
166 if ((inode = sysfs_new_inode(mode, sd))) {
167 if (dentry->d_parent && dentry->d_parent->d_inode) {
168 struct inode *p_inode = dentry->d_parent->d_inode;
169 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
170 }
171 goto Proceed;
172 }
173 else
174 error = -ENOMEM;
175 } else
176 error = -EEXIST;
177 } else
178 error = -ENOENT;
179 goto Done;
180
181 Proceed:
182 if (init)
183 error = init(inode);
184 if (!error) {
185 d_instantiate(dentry, inode);
186 if (S_ISDIR(mode))
187 dget(dentry); /* pin only directory dentry in core */
188 } else 151 } else
189 iput(inode); 152 set_default_inode_attr(inode, sd->s_mode);
190 Done:
191 return error;
192} 153}
193 154
194/* 155/**
195 * Get the name for corresponding element represented by the given sysfs_dirent 156 * sysfs_get_inode - get inode for sysfs_dirent
157 * @sd: sysfs_dirent to allocate inode for
158 *
159 * Get inode for @sd. If such inode doesn't exist, a new inode
160 * is allocated and basics are initialized. New inode is
161 * returned locked.
162 *
163 * LOCKING:
164 * Kernel thread context (may sleep).
165 *
166 * RETURNS:
167 * Pointer to allocated inode on success, NULL on failure.
196 */ 168 */
197const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) 169struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
198{ 170{
199 struct attribute * attr; 171 struct inode *inode;
200 struct bin_attribute * bin_attr;
201 struct sysfs_symlink * sl;
202
203 BUG_ON(!sd || !sd->s_element);
204
205 switch (sd->s_type) {
206 case SYSFS_DIR:
207 /* Always have a dentry so use that */
208 return sd->s_dentry->d_name.name;
209
210 case SYSFS_KOBJ_ATTR:
211 attr = sd->s_element;
212 return attr->name;
213
214 case SYSFS_KOBJ_BIN_ATTR:
215 bin_attr = sd->s_element;
216 return bin_attr->attr.name;
217 172
218 case SYSFS_KOBJ_LINK: 173 inode = iget_locked(sysfs_sb, sd->s_ino);
219 sl = sd->s_element; 174 if (inode && (inode->i_state & I_NEW))
220 return sl->link_name; 175 sysfs_init_inode(sd, inode);
221 }
222 return NULL;
223}
224 176
225static inline void orphan_all_buffers(struct inode *node) 177 return inode;
226{
227 struct sysfs_buffer_collection *set;
228 struct sysfs_buffer *buf;
229
230 mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
231 set = node->i_private;
232 if (set) {
233 list_for_each_entry(buf, &set->associates, associates) {
234 down(&buf->sem);
235 buf->orphaned = 1;
236 up(&buf->sem);
237 }
238 }
239 mutex_unlock(&node->i_mutex);
240} 178}
241 179
242 180/**
243/* 181 * sysfs_instantiate - instantiate dentry
244 * Unhashes the dentry corresponding to given sysfs_dirent 182 * @dentry: dentry to be instantiated
245 * Called with parent inode's i_mutex held. 183 * @inode: inode associated with @sd
184 *
185 * Unlock @inode if locked and instantiate @dentry with @inode.
186 *
187 * LOCKING:
188 * None.
246 */ 189 */
247void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) 190void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
248{ 191{
249 struct dentry *dentry = NULL; 192 BUG_ON(!dentry || dentry->d_inode);
250 struct inode *inode;
251 193
252 /* We're not holding a reference to ->s_dentry dentry but the 194 if (inode->i_state & I_NEW)
253 * field will stay valid as long as sysfs_lock is held. 195 unlock_new_inode(inode);
254 */
255 spin_lock(&sysfs_lock);
256 spin_lock(&dcache_lock);
257
258 /* dget dentry if it's still alive */
259 if (sd->s_dentry && sd->s_dentry->d_inode)
260 dentry = dget_locked(sd->s_dentry);
261
262 spin_unlock(&dcache_lock);
263 spin_unlock(&sysfs_lock);
264
265 /* drop dentry */
266 if (dentry) {
267 spin_lock(&dcache_lock);
268 spin_lock(&dentry->d_lock);
269 if (!d_unhashed(dentry) && dentry->d_inode) {
270 inode = dentry->d_inode;
271 spin_lock(&inode->i_lock);
272 __iget(inode);
273 spin_unlock(&inode->i_lock);
274 dget_locked(dentry);
275 __d_drop(dentry);
276 spin_unlock(&dentry->d_lock);
277 spin_unlock(&dcache_lock);
278 simple_unlink(parent->d_inode, dentry);
279 orphan_all_buffers(inode);
280 iput(inode);
281 } else {
282 spin_unlock(&dentry->d_lock);
283 spin_unlock(&dcache_lock);
284 }
285 196
286 dput(dentry); 197 d_instantiate(dentry, inode);
287 }
288} 198}
289 199
290int sysfs_hash_and_remove(struct dentry * dir, const char * name) 200int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
291{ 201{
292 struct sysfs_dirent * sd; 202 struct sysfs_addrm_cxt acxt;
293 struct sysfs_dirent * parent_sd; 203 struct sysfs_dirent **pos, *sd;
294 int found = 0;
295 204
296 if (!dir) 205 if (!dir_sd)
297 return -ENOENT; 206 return -ENOENT;
298 207
299 if (dir->d_inode == NULL) 208 sysfs_addrm_start(&acxt, dir_sd);
300 /* no inode means this hasn't been made visible yet */ 209
301 return -ENOENT; 210 for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
211 sd = *pos;
302 212
303 parent_sd = dir->d_fsdata; 213 if (!sysfs_type(sd))
304 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
305 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
306 if (!sd->s_element)
307 continue; 214 continue;
308 if (!strcmp(sysfs_get_name(sd), name)) { 215 if (!strcmp(sd->s_name, name)) {
309 list_del_init(&sd->s_sibling); 216 *pos = sd->s_sibling;
310 sysfs_drop_dentry(sd, dir); 217 sd->s_sibling = NULL;
311 sysfs_put(sd); 218 sysfs_remove_one(&acxt, sd);
312 found = 1;
313 break; 219 break;
314 } 220 }
315 } 221 }
316 mutex_unlock(&dir->d_inode->i_mutex);
317 222
318 return found ? 0 : -ENOENT; 223 if (sysfs_addrm_finish(&acxt))
224 return 0;
225 return -ENOENT;
319} 226}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 00ab9125d39..402cc356203 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,28 +19,18 @@ struct vfsmount *sysfs_mount;
19struct super_block * sysfs_sb = NULL; 19struct super_block * sysfs_sb = NULL;
20struct kmem_cache *sysfs_dir_cachep; 20struct kmem_cache *sysfs_dir_cachep;
21 21
22static void sysfs_clear_inode(struct inode *inode);
23
24static const struct super_operations sysfs_ops = { 22static const struct super_operations sysfs_ops = {
25 .statfs = simple_statfs, 23 .statfs = simple_statfs,
26 .drop_inode = sysfs_delete_inode, 24 .drop_inode = sysfs_delete_inode,
27 .clear_inode = sysfs_clear_inode,
28}; 25};
29 26
30static struct sysfs_dirent sysfs_root = { 27struct sysfs_dirent sysfs_root = {
31 .s_sibling = LIST_HEAD_INIT(sysfs_root.s_sibling), 28 .s_count = ATOMIC_INIT(1),
32 .s_children = LIST_HEAD_INIT(sysfs_root.s_children), 29 .s_flags = SYSFS_ROOT,
33 .s_element = NULL, 30 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
34 .s_type = SYSFS_ROOT,
35 .s_iattr = NULL,
36 .s_ino = 1, 31 .s_ino = 1,
37}; 32};
38 33
39static void sysfs_clear_inode(struct inode *inode)
40{
41 kfree(inode->i_private);
42}
43
44static int sysfs_fill_super(struct super_block *sb, void *data, int silent) 34static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
45{ 35{
46 struct inode *inode; 36 struct inode *inode;
@@ -53,24 +43,26 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
53 sb->s_time_gran = 1; 43 sb->s_time_gran = 1;
54 sysfs_sb = sb; 44 sysfs_sb = sb;
55 45
56 inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 46 inode = new_inode(sysfs_sb);
57 &sysfs_root); 47 if (!inode) {
58 if (inode) {
59 inode->i_op = &sysfs_dir_inode_operations;
60 inode->i_fop = &sysfs_dir_operations;
61 /* directory inodes start off with i_nlink == 2 (for "." entry) */
62 inc_nlink(inode);
63 } else {
64 pr_debug("sysfs: could not get root inode\n"); 48 pr_debug("sysfs: could not get root inode\n");
65 return -ENOMEM; 49 return -ENOMEM;
66 } 50 }
67 51
52 sysfs_init_inode(&sysfs_root, inode);
53
54 inode->i_op = &sysfs_dir_inode_operations;
55 inode->i_fop = &sysfs_dir_operations;
56 /* directory inodes start off with i_nlink == 2 (for "." entry) */
57 inc_nlink(inode);
58
68 root = d_alloc_root(inode); 59 root = d_alloc_root(inode);
69 if (!root) { 60 if (!root) {
70 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 61 pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
71 iput(inode); 62 iput(inode);
72 return -ENOMEM; 63 return -ENOMEM;
73 } 64 }
65 sysfs_root.s_dentry = root;
74 root->d_fsdata = &sysfs_root; 66 root->d_fsdata = &sysfs_root;
75 sb->s_root = root; 67 sb->s_root = root;
76 return 0; 68 return 0;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 7b9c5bfde92..2f86e042229 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,71 +11,39 @@
11 11
12#include "sysfs.h" 12#include "sysfs.h"
13 13
14static int object_depth(struct kobject * kobj) 14static int object_depth(struct sysfs_dirent *sd)
15{ 15{
16 struct kobject * p = kobj;
17 int depth = 0; 16 int depth = 0;
18 do { depth++; } while ((p = p->parent)); 17
18 for (; sd->s_parent; sd = sd->s_parent)
19 depth++;
20
19 return depth; 21 return depth;
20} 22}
21 23
22static int object_path_length(struct kobject * kobj) 24static int object_path_length(struct sysfs_dirent * sd)
23{ 25{
24 struct kobject * p = kobj;
25 int length = 1; 26 int length = 1;
26 do { 27
27 length += strlen(kobject_name(p)) + 1; 28 for (; sd->s_parent; sd = sd->s_parent)
28 p = p->parent; 29 length += strlen(sd->s_name) + 1;
29 } while (p); 30
30 return length; 31 return length;
31} 32}
32 33
33static void fill_object_path(struct kobject * kobj, char * buffer, int length) 34static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length)
34{ 35{
35 struct kobject * p;
36
37 --length; 36 --length;
38 for (p = kobj; p; p = p->parent) { 37 for (; sd->s_parent; sd = sd->s_parent) {
39 int cur = strlen(kobject_name(p)); 38 int cur = strlen(sd->s_name);
40 39
41 /* back up enough to print this bus id with '/' */ 40 /* back up enough to print this bus id with '/' */
42 length -= cur; 41 length -= cur;
43 strncpy(buffer + length,kobject_name(p),cur); 42 strncpy(buffer + length, sd->s_name, cur);
44 *(buffer + --length) = '/'; 43 *(buffer + --length) = '/';
45 } 44 }
46} 45}
47 46
48static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target)
49{
50 struct sysfs_dirent * parent_sd = parent->d_fsdata;
51 struct sysfs_symlink * sl;
52 int error = 0;
53
54 error = -ENOMEM;
55 sl = kmalloc(sizeof(*sl), GFP_KERNEL);
56 if (!sl)
57 goto exit1;
58
59 sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
60 if (!sl->link_name)
61 goto exit2;
62
63 strcpy(sl->link_name, name);
64 sl->target_kobj = kobject_get(target);
65
66 error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO,
67 SYSFS_KOBJ_LINK);
68 if (!error)
69 return 0;
70
71 kobject_put(target);
72 kfree(sl->link_name);
73exit2:
74 kfree(sl);
75exit1:
76 return error;
77}
78
79/** 47/**
80 * sysfs_create_link - create symlink between two objects. 48 * sysfs_create_link - create symlink between two objects.
81 * @kobj: object whose directory we're creating the link in. 49 * @kobj: object whose directory we're creating the link in.
@@ -84,24 +52,57 @@ exit1:
84 */ 52 */
85int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) 53int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
86{ 54{
87 struct dentry *dentry = NULL; 55 struct sysfs_dirent *parent_sd = NULL;
88 int error = -EEXIST; 56 struct sysfs_dirent *target_sd = NULL;
57 struct sysfs_dirent *sd = NULL;
58 struct sysfs_addrm_cxt acxt;
59 int error;
89 60
90 BUG_ON(!name); 61 BUG_ON(!name);
91 62
92 if (!kobj) { 63 if (!kobj) {
93 if (sysfs_mount && sysfs_mount->mnt_sb) 64 if (sysfs_mount && sysfs_mount->mnt_sb)
94 dentry = sysfs_mount->mnt_sb->s_root; 65 parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
95 } else 66 } else
96 dentry = kobj->dentry; 67 parent_sd = kobj->sd;
68
69 error = -EFAULT;
70 if (!parent_sd)
71 goto out_put;
72
73 /* target->sd can go away beneath us but is protected with
74 * sysfs_assoc_lock. Fetch target_sd from it.
75 */
76 spin_lock(&sysfs_assoc_lock);
77 if (target->sd)
78 target_sd = sysfs_get(target->sd);
79 spin_unlock(&sysfs_assoc_lock);
80
81 error = -ENOENT;
82 if (!target_sd)
83 goto out_put;
84
85 error = -ENOMEM;
86 sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
87 if (!sd)
88 goto out_put;
89 sd->s_elem.symlink.target_sd = target_sd;
97 90
98 if (!dentry) 91 sysfs_addrm_start(&acxt, parent_sd);
99 return -EFAULT;
100 92
101 mutex_lock(&dentry->d_inode->i_mutex); 93 if (!sysfs_find_dirent(parent_sd, name)) {
102 if (!sysfs_dirent_exist(dentry->d_fsdata, name)) 94 sysfs_add_one(&acxt, sd);
103 error = sysfs_add_link(dentry, name, target); 95 sysfs_link_sibling(sd);
104 mutex_unlock(&dentry->d_inode->i_mutex); 96 }
97
98 if (sysfs_addrm_finish(&acxt))
99 return 0;
100
101 error = -EEXIST;
102 /* fall through */
103 out_put:
104 sysfs_put(target_sd);
105 sysfs_put(sd);
105 return error; 106 return error;
106} 107}
107 108
@@ -114,17 +115,17 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
114 115
115void sysfs_remove_link(struct kobject * kobj, const char * name) 116void sysfs_remove_link(struct kobject * kobj, const char * name)
116{ 117{
117 sysfs_hash_and_remove(kobj->dentry,name); 118 sysfs_hash_and_remove(kobj->sd, name);
118} 119}
119 120
120static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target, 121static int sysfs_get_target_path(struct sysfs_dirent * parent_sd,
121 char *path) 122 struct sysfs_dirent * target_sd, char *path)
122{ 123{
123 char * s; 124 char * s;
124 int depth, size; 125 int depth, size;
125 126
126 depth = object_depth(kobj); 127 depth = object_depth(parent_sd);
127 size = object_path_length(target) + depth * 3 - 1; 128 size = object_path_length(target_sd) + depth * 3 - 1;
128 if (size > PATH_MAX) 129 if (size > PATH_MAX)
129 return -ENAMETOOLONG; 130 return -ENAMETOOLONG;
130 131
@@ -133,7 +134,7 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
133 for (s = path; depth--; s += 3) 134 for (s = path; depth--; s += 3)
134 strcpy(s,"../"); 135 strcpy(s,"../");
135 136
136 fill_object_path(target, path, size); 137 fill_object_path(target_sd, path, size);
137 pr_debug("%s: path = '%s'\n", __FUNCTION__, path); 138 pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
138 139
139 return 0; 140 return 0;
@@ -141,27 +142,16 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
141 142
142static int sysfs_getlink(struct dentry *dentry, char * path) 143static int sysfs_getlink(struct dentry *dentry, char * path)
143{ 144{
144 struct kobject *kobj, *target_kobj; 145 struct sysfs_dirent *sd = dentry->d_fsdata;
145 int error = 0; 146 struct sysfs_dirent *parent_sd = sd->s_parent;
147 struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd;
148 int error;
146 149
147 kobj = sysfs_get_kobject(dentry->d_parent); 150 mutex_lock(&sysfs_mutex);
148 if (!kobj) 151 error = sysfs_get_target_path(parent_sd, target_sd, path);
149 return -EINVAL; 152 mutex_unlock(&sysfs_mutex);
150 153
151 target_kobj = sysfs_get_kobject(dentry);
152 if (!target_kobj) {
153 kobject_put(kobj);
154 return -EINVAL;
155 }
156
157 down_read(&sysfs_rename_sem);
158 error = sysfs_get_target_path(kobj, target_kobj, path);
159 up_read(&sysfs_rename_sem);
160
161 kobject_put(kobj);
162 kobject_put(target_kobj);
163 return error; 154 return error;
164
165} 155}
166 156
167static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) 157static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 502c949c402..6a37f2386a8 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,9 +1,40 @@
1struct sysfs_elem_dir {
2 struct kobject * kobj;
3};
4
5struct sysfs_elem_symlink {
6 struct sysfs_dirent * target_sd;
7};
8
9struct sysfs_elem_attr {
10 struct attribute * attr;
11};
12
13struct sysfs_elem_bin_attr {
14 struct bin_attribute * bin_attr;
15};
16
17/*
18 * As long as s_count reference is held, the sysfs_dirent itself is
19 * accessible. Dereferencing s_elem or any other outer entity
20 * requires s_active reference.
21 */
1struct sysfs_dirent { 22struct sysfs_dirent {
2 atomic_t s_count; 23 atomic_t s_count;
3 struct list_head s_sibling; 24 atomic_t s_active;
4 struct list_head s_children; 25 struct sysfs_dirent * s_parent;
5 void * s_element; 26 struct sysfs_dirent * s_sibling;
6 int s_type; 27 struct sysfs_dirent * s_children;
28 const char * s_name;
29
30 union {
31 struct sysfs_elem_dir dir;
32 struct sysfs_elem_symlink symlink;
33 struct sysfs_elem_attr attr;
34 struct sysfs_elem_bin_attr bin_attr;
35 } s_elem;
36
37 unsigned int s_flags;
7 umode_t s_mode; 38 umode_t s_mode;
8 ino_t s_ino; 39 ino_t s_ino;
9 struct dentry * s_dentry; 40 struct dentry * s_dentry;
@@ -11,30 +42,60 @@ struct sysfs_dirent {
11 atomic_t s_event; 42 atomic_t s_event;
12}; 43};
13 44
45#define SD_DEACTIVATED_BIAS INT_MIN
46
47struct sysfs_addrm_cxt {
48 struct sysfs_dirent *parent_sd;
49 struct inode *parent_inode;
50 struct sysfs_dirent *removed;
51 int cnt;
52};
53
14extern struct vfsmount * sysfs_mount; 54extern struct vfsmount * sysfs_mount;
55extern struct sysfs_dirent sysfs_root;
15extern struct kmem_cache *sysfs_dir_cachep; 56extern struct kmem_cache *sysfs_dir_cachep;
16 57
17extern void sysfs_delete_inode(struct inode *inode); 58extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
18extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); 59extern void sysfs_link_sibling(struct sysfs_dirent *sd);
19extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); 60extern void sysfs_unlink_sibling(struct sysfs_dirent *sd);
61extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
62extern void sysfs_put_active(struct sysfs_dirent *sd);
63extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
64extern void sysfs_put_active_two(struct sysfs_dirent *sd);
65extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
66 struct sysfs_dirent *parent_sd);
67extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt,
68 struct sysfs_dirent *sd);
69extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
70 struct sysfs_dirent *sd);
71extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
20 72
21extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *); 73extern void sysfs_delete_inode(struct inode *inode);
22extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, 74extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode);
23 umode_t, int); 75extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
24 76extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
25extern int sysfs_add_file(struct dentry *, const struct attribute *, int); 77
26extern int sysfs_hash_and_remove(struct dentry * dir, const char * name); 78extern void release_sysfs_dirent(struct sysfs_dirent * sd);
79extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
80 const unsigned char *name);
81extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
82 const unsigned char *name);
83extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode,
84 int type);
85
86extern int sysfs_add_file(struct sysfs_dirent *dir_sd,
87 const struct attribute *attr, int type);
88extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
27extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); 89extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
28 90
29extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); 91extern int sysfs_create_subdir(struct kobject *kobj, const char *name,
30extern void sysfs_remove_subdir(struct dentry *); 92 struct sysfs_dirent **p_sd);
93extern void sysfs_remove_subdir(struct sysfs_dirent *sd);
31 94
32extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd);
33extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent);
34extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 95extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
35 96
36extern spinlock_t sysfs_lock; 97extern spinlock_t sysfs_assoc_lock;
37extern struct rw_semaphore sysfs_rename_sem; 98extern struct mutex sysfs_mutex;
38extern struct super_block * sysfs_sb; 99extern struct super_block * sysfs_sb;
39extern const struct file_operations sysfs_dir_operations; 100extern const struct file_operations sysfs_dir_operations;
40extern const struct file_operations sysfs_file_operations; 101extern const struct file_operations sysfs_file_operations;
@@ -42,73 +103,9 @@ extern const struct file_operations bin_fops;
42extern const struct inode_operations sysfs_dir_inode_operations; 103extern const struct inode_operations sysfs_dir_inode_operations;
43extern const struct inode_operations sysfs_symlink_inode_operations; 104extern const struct inode_operations sysfs_symlink_inode_operations;
44 105
45struct sysfs_symlink { 106static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
46 char * link_name;
47 struct kobject * target_kobj;
48};
49
50struct sysfs_buffer {
51 struct list_head associates;
52 size_t count;
53 loff_t pos;
54 char * page;
55 struct sysfs_ops * ops;
56 struct semaphore sem;
57 int orphaned;
58 int needs_read_fill;
59 int event;
60};
61
62struct sysfs_buffer_collection {
63 struct list_head associates;
64};
65
66static inline struct kobject * to_kobj(struct dentry * dentry)
67{
68 struct sysfs_dirent * sd = dentry->d_fsdata;
69 return ((struct kobject *) sd->s_element);
70}
71
72static inline struct attribute * to_attr(struct dentry * dentry)
73{ 107{
74 struct sysfs_dirent * sd = dentry->d_fsdata; 108 return sd->s_flags & SYSFS_TYPE_MASK;
75 return ((struct attribute *) sd->s_element);
76}
77
78static inline struct bin_attribute * to_bin_attr(struct dentry * dentry)
79{
80 struct sysfs_dirent * sd = dentry->d_fsdata;
81 return ((struct bin_attribute *) sd->s_element);
82}
83
84static inline struct kobject *sysfs_get_kobject(struct dentry *dentry)
85{
86 struct kobject * kobj = NULL;
87
88 spin_lock(&dcache_lock);
89 if (!d_unhashed(dentry)) {
90 struct sysfs_dirent * sd = dentry->d_fsdata;
91 if (sd->s_type & SYSFS_KOBJ_LINK) {
92 struct sysfs_symlink * sl = sd->s_element;
93 kobj = kobject_get(sl->target_kobj);
94 } else
95 kobj = kobject_get(sd->s_element);
96 }
97 spin_unlock(&dcache_lock);
98
99 return kobj;
100}
101
102static inline void release_sysfs_dirent(struct sysfs_dirent * sd)
103{
104 if (sd->s_type & SYSFS_KOBJ_LINK) {
105 struct sysfs_symlink * sl = sd->s_element;
106 kfree(sl->link_name);
107 kobject_put(sl->target_kobj);
108 kfree(sl);
109 }
110 kfree(sd->s_iattr);
111 kmem_cache_free(sysfs_dir_cachep, sd);
112} 109}
113 110
114static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd) 111static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
@@ -122,7 +119,7 @@ static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
122 119
123static inline void sysfs_put(struct sysfs_dirent * sd) 120static inline void sysfs_put(struct sysfs_dirent * sd)
124{ 121{
125 if (atomic_dec_and_test(&sd->s_count)) 122 if (sd && atomic_dec_and_test(&sd->s_count))
126 release_sysfs_dirent(sd); 123 release_sysfs_dirent(sd);
127} 124}
128 125
diff --git a/fs/udf/crc.c b/fs/udf/crc.c
index 1b82a4adc2f..ef2bfaa19d7 100644
--- a/fs/udf/crc.c
+++ b/fs/udf/crc.c
@@ -106,8 +106,8 @@ int main(void)
106{ 106{
107 unsigned short x; 107 unsigned short x;
108 108
109 x = udf_crc16(bytes, sizeof bytes); 109 x = udf_crc(bytes, sizeof bytes);
110 printf("udf_crc16: calculated = %4.4x, correct = %4.4x\n", x, 0x3299U); 110 printf("udf_crc: calculated = %4.4x, correct = %4.4x\n", x, 0x3299U);
111 111
112 return 0; 112 return 0;
113} 113}
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 8206983f2eb..10f3188738a 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -50,7 +50,7 @@ void udf_free_inode(struct inode * inode)
50 else 50 else
51 UDF_SB_LVIDIU(sb)->numFiles = 51 UDF_SB_LVIDIU(sb)->numFiles =
52 cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) - 1); 52 cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) - 1);
53 53
54 mark_buffer_dirty(sbi->s_lvidbh); 54 mark_buffer_dirty(sbi->s_lvidbh);
55 } 55 }
56 mutex_unlock(&sbi->s_alloc_mutex); 56 mutex_unlock(&sbi->s_alloc_mutex);
@@ -136,6 +136,13 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
136 UDF_I_EFE(inode) = 0; 136 UDF_I_EFE(inode) = 0;
137 UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); 137 UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL);
138 } 138 }
139 if (!UDF_I_DATA(inode))
140 {
141 iput(inode);
142 *err = -ENOMEM;
143 mutex_unlock(&sbi->s_alloc_mutex);
144 return NULL;
145 }
139 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) 146 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB))
140 UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; 147 UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB;
141 else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) 148 else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index bf7de0bdbab..5b82e489af7 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -49,6 +49,7 @@ MODULE_LICENSE("GPL");
49static mode_t udf_convert_permissions(struct fileEntry *); 49static mode_t udf_convert_permissions(struct fileEntry *);
50static int udf_update_inode(struct inode *, int); 50static int udf_update_inode(struct inode *, int);
51static void udf_fill_inode(struct inode *, struct buffer_head *); 51static void udf_fill_inode(struct inode *, struct buffer_head *);
52static int udf_alloc_i_data(struct inode *inode, size_t size);
52static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, 53static struct buffer_head *inode_getblk(struct inode *, sector_t, int *,
53 long *, int *); 54 long *, int *);
54static int8_t udf_insert_aext(struct inode *, struct extent_position, 55static int8_t udf_insert_aext(struct inode *, struct extent_position,
@@ -734,7 +735,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset, int newbl
734 (*c) ++; 735 (*c) ++;
735 (*endnum) ++; 736 (*endnum) ++;
736 } 737 }
737 738
738 laarr[curr].extLocation.logicalBlockNum = newblocknum; 739 laarr[curr].extLocation.logicalBlockNum = newblocknum;
739 if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) 740 if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
740 laarr[curr].extLocation.partitionReferenceNum = 741 laarr[curr].extLocation.partitionReferenceNum =
@@ -836,7 +837,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
836 { 837 {
837 numalloc -= elen; 838 numalloc -= elen;
838 if (*endnum > (i+1)) 839 if (*endnum > (i+1))
839 memmove(&laarr[i], &laarr[i+1], 840 memmove(&laarr[i], &laarr[i+1],
840 sizeof(long_ad) * (*endnum - (i+1))); 841 sizeof(long_ad) * (*endnum - (i+1)));
841 i --; 842 i --;
842 (*endnum) --; 843 (*endnum) --;
@@ -1024,7 +1025,7 @@ void udf_truncate(struct inode * inode)
1024 { 1025 {
1025 block_truncate_page(inode->i_mapping, inode->i_size, udf_get_block); 1026 block_truncate_page(inode->i_mapping, inode->i_size, udf_get_block);
1026 udf_truncate_extents(inode); 1027 udf_truncate_extents(inode);
1027 } 1028 }
1028 1029
1029 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); 1030 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
1030 if (IS_SYNC(inode)) 1031 if (IS_SYNC(inode))
@@ -1087,10 +1088,10 @@ __udf_read_inode(struct inode *inode)
1087 { 1088 {
1088 kernel_lb_addr loc; 1089 kernel_lb_addr loc;
1089 ie = (struct indirectEntry *)ibh->b_data; 1090 ie = (struct indirectEntry *)ibh->b_data;
1090 1091
1091 loc = lelb_to_cpu(ie->indirectICB.extLocation); 1092 loc = lelb_to_cpu(ie->indirectICB.extLocation);
1092 1093
1093 if (ie->indirectICB.extLength && 1094 if (ie->indirectICB.extLength &&
1094 (nbh = udf_read_ptagged(inode->i_sb, loc, 0, &ident))) 1095 (nbh = udf_read_ptagged(inode->i_sb, loc, 0, &ident)))
1095 { 1096 {
1096 if (ident == TAG_IDENT_FE || 1097 if (ident == TAG_IDENT_FE ||
@@ -1156,14 +1157,22 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1156 { 1157 {
1157 UDF_I_EFE(inode) = 1; 1158 UDF_I_EFE(inode) = 1;
1158 UDF_I_USE(inode) = 0; 1159 UDF_I_USE(inode) = 0;
1159 UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL); 1160 if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)))
1161 {
1162 make_bad_inode(inode);
1163 return;
1164 }
1160 memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct extendedFileEntry), inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)); 1165 memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct extendedFileEntry), inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry));
1161 } 1166 }
1162 else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_FE) 1167 else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_FE)
1163 { 1168 {
1164 UDF_I_EFE(inode) = 0; 1169 UDF_I_EFE(inode) = 0;
1165 UDF_I_USE(inode) = 0; 1170 UDF_I_USE(inode) = 0;
1166 UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); 1171 if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct fileEntry)))
1172 {
1173 make_bad_inode(inode);
1174 return;
1175 }
1167 memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry)); 1176 memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry));
1168 } 1177 }
1169 else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) 1178 else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE)
@@ -1173,7 +1182,11 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1173 UDF_I_LENALLOC(inode) = 1182 UDF_I_LENALLOC(inode) =
1174 le32_to_cpu( 1183 le32_to_cpu(
1175 ((struct unallocSpaceEntry *)bh->b_data)->lengthAllocDescs); 1184 ((struct unallocSpaceEntry *)bh->b_data)->lengthAllocDescs);
1176 UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry), GFP_KERNEL); 1185 if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)))
1186 {
1187 make_bad_inode(inode);
1188 return;
1189 }
1177 memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct unallocSpaceEntry), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); 1190 memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct unallocSpaceEntry), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry));
1178 return; 1191 return;
1179 } 1192 }
@@ -1191,7 +1204,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1191 inode->i_nlink = le16_to_cpu(fe->fileLinkCount); 1204 inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
1192 if (!inode->i_nlink) 1205 if (!inode->i_nlink)
1193 inode->i_nlink = 1; 1206 inode->i_nlink = 1;
1194 1207
1195 inode->i_size = le64_to_cpu(fe->informationLength); 1208 inode->i_size = le64_to_cpu(fe->informationLength);
1196 UDF_I_LENEXTENTS(inode) = inode->i_size; 1209 UDF_I_LENEXTENTS(inode) = inode->i_size;
1197 1210
@@ -1243,7 +1256,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1243 } 1256 }
1244 else 1257 else
1245 { 1258 {
1246 inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << 1259 inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
1247 (inode->i_sb->s_blocksize_bits - 9); 1260 (inode->i_sb->s_blocksize_bits - 9);
1248 1261
1249 if ( udf_stamp_to_time(&convtime, &convtime_usec, 1262 if ( udf_stamp_to_time(&convtime, &convtime_usec,
@@ -1374,6 +1387,20 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1374 } 1387 }
1375} 1388}
1376 1389
1390static int udf_alloc_i_data(struct inode *inode, size_t size)
1391{
1392 UDF_I_DATA(inode) = kmalloc(size, GFP_KERNEL);
1393
1394 if (!UDF_I_DATA(inode))
1395 {
1396 printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) no free memory\n",
1397 inode->i_ino);
1398 return -ENOMEM;
1399 }
1400
1401 return 0;
1402}
1403
1377static mode_t 1404static mode_t
1378udf_convert_permissions(struct fileEntry *fe) 1405udf_convert_permissions(struct fileEntry *fe)
1379{ 1406{
@@ -2072,7 +2099,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
2072 mark_buffer_dirty_inode(oepos.bh, inode); 2099 mark_buffer_dirty_inode(oepos.bh, inode);
2073 } 2100 }
2074 } 2101 }
2075 2102
2076 brelse(epos.bh); 2103 brelse(epos.bh);
2077 brelse(oepos.bh); 2104 brelse(oepos.bh);
2078 return (elen >> 30); 2105 return (elen >> 30);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 22ff6ed55ce..2b3011689e8 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -87,6 +87,7 @@
87#include <linux/smp_lock.h> 87#include <linux/smp_lock.h>
88#include <linux/buffer_head.h> 88#include <linux/buffer_head.h>
89#include <linux/vfs.h> 89#include <linux/vfs.h>
90#include <linux/log2.h>
90 91
91#include "swab.h" 92#include "swab.h"
92#include "util.h" 93#include "util.h"
@@ -854,7 +855,7 @@ magic_found:
854 uspi->s_fmask = fs32_to_cpu(sb, usb1->fs_fmask); 855 uspi->s_fmask = fs32_to_cpu(sb, usb1->fs_fmask);
855 uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift); 856 uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift);
856 857
857 if (uspi->s_fsize & (uspi->s_fsize - 1)) { 858 if (!is_power_of_2(uspi->s_fsize)) {
858 printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n", 859 printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n",
859 uspi->s_fsize); 860 uspi->s_fsize);
860 goto failed; 861 goto failed;
@@ -869,7 +870,7 @@ magic_found:
869 uspi->s_fsize); 870 uspi->s_fsize);
870 goto failed; 871 goto failed;
871 } 872 }
872 if (uspi->s_bsize & (uspi->s_bsize - 1)) { 873 if (!is_power_of_2(uspi->s_bsize)) {
873 printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n", 874 printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n",
874 uspi->s_bsize); 875 uspi->s_bsize);
875 goto failed; 876 goto failed;
diff --git a/fs/utimes.c b/fs/utimes.c
index b3c88952465..83a7e69e706 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -106,7 +106,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
106 if (IS_IMMUTABLE(inode)) 106 if (IS_IMMUTABLE(inode))
107 goto dput_and_out; 107 goto dput_and_out;
108 108
109 if (current->fsuid != inode->i_uid) { 109 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) {
110 if (f) { 110 if (f) {
111 if (!(f->f_mode & FMODE_WRITE)) 111 if (!(f->f_mode & FMODE_WRITE))
112 goto dput_and_out; 112 goto dput_and_out;
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index b49989bb89a..e7a9a83f008 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -64,6 +64,7 @@ xfs-y += xfs_alloc.o \
64 xfs_dir2_sf.o \ 64 xfs_dir2_sf.o \
65 xfs_error.o \ 65 xfs_error.o \
66 xfs_extfree_item.o \ 66 xfs_extfree_item.o \
67 xfs_filestream.o \
67 xfs_fsops.o \ 68 xfs_fsops.o \
68 xfs_ialloc.o \ 69 xfs_ialloc.o \
69 xfs_ialloc_btree.o \ 70 xfs_ialloc_btree.o \
@@ -77,6 +78,7 @@ xfs-y += xfs_alloc.o \
77 xfs_log.o \ 78 xfs_log.o \
78 xfs_log_recover.o \ 79 xfs_log_recover.o \
79 xfs_mount.o \ 80 xfs_mount.o \
81 xfs_mru_cache.o \
80 xfs_rename.o \ 82 xfs_rename.o \
81 xfs_trans.o \ 83 xfs_trans.o \
82 xfs_trans_ail.o \ 84 xfs_trans_ail.o \
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 9ebabdf7829..4b6470cf87f 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -100,25 +100,6 @@ kmem_zone_destroy(kmem_zone_t *zone)
100extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); 100extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
101extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); 101extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
102 102
103/*
104 * Low memory cache shrinkers
105 */
106
107typedef struct shrinker *kmem_shaker_t;
108typedef int (*kmem_shake_func_t)(int, gfp_t);
109
110static inline kmem_shaker_t
111kmem_shake_register(kmem_shake_func_t sfunc)
112{
113 return set_shrinker(DEFAULT_SEEKS, sfunc);
114}
115
116static inline void
117kmem_shake_deregister(kmem_shaker_t shrinker)
118{
119 remove_shrinker(shrinker);
120}
121
122static inline int 103static inline int
123kmem_shake_allow(gfp_t gfp_mask) 104kmem_shake_allow(gfp_t gfp_mask)
124{ 105{
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7361861e3aa..fd4105d662e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -108,14 +108,19 @@ xfs_page_trace(
108 108
109/* 109/*
110 * Schedule IO completion handling on a xfsdatad if this was 110 * Schedule IO completion handling on a xfsdatad if this was
111 * the final hold on this ioend. 111 * the final hold on this ioend. If we are asked to wait,
112 * flush the workqueue.
112 */ 113 */
113STATIC void 114STATIC void
114xfs_finish_ioend( 115xfs_finish_ioend(
115 xfs_ioend_t *ioend) 116 xfs_ioend_t *ioend,
117 int wait)
116{ 118{
117 if (atomic_dec_and_test(&ioend->io_remaining)) 119 if (atomic_dec_and_test(&ioend->io_remaining)) {
118 queue_work(xfsdatad_workqueue, &ioend->io_work); 120 queue_work(xfsdatad_workqueue, &ioend->io_work);
121 if (wait)
122 flush_workqueue(xfsdatad_workqueue);
123 }
119} 124}
120 125
121/* 126/*
@@ -156,6 +161,8 @@ xfs_setfilesize(
156 xfs_fsize_t bsize; 161 xfs_fsize_t bsize;
157 162
158 ip = xfs_vtoi(ioend->io_vnode); 163 ip = xfs_vtoi(ioend->io_vnode);
164 if (!ip)
165 return;
159 166
160 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); 167 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
161 ASSERT(ioend->io_type != IOMAP_READ); 168 ASSERT(ioend->io_type != IOMAP_READ);
@@ -334,7 +341,7 @@ xfs_end_bio(
334 bio->bi_end_io = NULL; 341 bio->bi_end_io = NULL;
335 bio_put(bio); 342 bio_put(bio);
336 343
337 xfs_finish_ioend(ioend); 344 xfs_finish_ioend(ioend, 0);
338 return 0; 345 return 0;
339} 346}
340 347
@@ -470,7 +477,7 @@ xfs_submit_ioend(
470 } 477 }
471 if (bio) 478 if (bio)
472 xfs_submit_ioend_bio(ioend, bio); 479 xfs_submit_ioend_bio(ioend, bio);
473 xfs_finish_ioend(ioend); 480 xfs_finish_ioend(ioend, 0);
474 } while ((ioend = next) != NULL); 481 } while ((ioend = next) != NULL);
475} 482}
476 483
@@ -1003,6 +1010,8 @@ xfs_page_state_convert(
1003 if (buffer_unwritten(bh) || buffer_delay(bh) || 1010 if (buffer_unwritten(bh) || buffer_delay(bh) ||
1004 ((buffer_uptodate(bh) || PageUptodate(page)) && 1011 ((buffer_uptodate(bh) || PageUptodate(page)) &&
1005 !buffer_mapped(bh) && (unmapped || startio))) { 1012 !buffer_mapped(bh) && (unmapped || startio))) {
1013 int new_ioend = 0;
1014
1006 /* 1015 /*
1007 * Make sure we don't use a read-only iomap 1016 * Make sure we don't use a read-only iomap
1008 */ 1017 */
@@ -1021,6 +1030,15 @@ xfs_page_state_convert(
1021 } 1030 }
1022 1031
1023 if (!iomap_valid) { 1032 if (!iomap_valid) {
1033 /*
1034 * if we didn't have a valid mapping then we
1035 * need to ensure that we put the new mapping
1036 * in a new ioend structure. This needs to be
1037 * done to ensure that the ioends correctly
1038 * reflect the block mappings at io completion
1039 * for unwritten extent conversion.
1040 */
1041 new_ioend = 1;
1024 if (type == IOMAP_NEW) { 1042 if (type == IOMAP_NEW) {
1025 size = xfs_probe_cluster(inode, 1043 size = xfs_probe_cluster(inode,
1026 page, bh, head, 0); 1044 page, bh, head, 0);
@@ -1040,7 +1058,7 @@ xfs_page_state_convert(
1040 if (startio) { 1058 if (startio) {
1041 xfs_add_to_ioend(inode, bh, offset, 1059 xfs_add_to_ioend(inode, bh, offset,
1042 type, &ioend, 1060 type, &ioend,
1043 !iomap_valid); 1061 new_ioend);
1044 } else { 1062 } else {
1045 set_buffer_dirty(bh); 1063 set_buffer_dirty(bh);
1046 unlock_buffer(bh); 1064 unlock_buffer(bh);
@@ -1416,6 +1434,13 @@ xfs_end_io_direct(
1416 * This is not necessary for synchronous direct I/O, but we do 1434 * This is not necessary for synchronous direct I/O, but we do
1417 * it anyway to keep the code uniform and simpler. 1435 * it anyway to keep the code uniform and simpler.
1418 * 1436 *
1437 * Well, if only it were that simple. Because synchronous direct I/O
1438 * requires extent conversion to occur *before* we return to userspace,
1439 * we have to wait for extent conversion to complete. Look at the
1440 * iocb that has been passed to us to determine if this is AIO or
1441 * not. If it is synchronous, tell xfs_finish_ioend() to kick the
1442 * workqueue and wait for it to complete.
1443 *
1419 * The core direct I/O code might be changed to always call the 1444 * The core direct I/O code might be changed to always call the
1420 * completion handler in the future, in which case all this can 1445 * completion handler in the future, in which case all this can
1421 * go away. 1446 * go away.
@@ -1423,9 +1448,9 @@ xfs_end_io_direct(
1423 ioend->io_offset = offset; 1448 ioend->io_offset = offset;
1424 ioend->io_size = size; 1449 ioend->io_size = size;
1425 if (ioend->io_type == IOMAP_READ) { 1450 if (ioend->io_type == IOMAP_READ) {
1426 xfs_finish_ioend(ioend); 1451 xfs_finish_ioend(ioend, 0);
1427 } else if (private && size > 0) { 1452 } else if (private && size > 0) {
1428 xfs_finish_ioend(ioend); 1453 xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
1429 } else { 1454 } else {
1430 /* 1455 /*
1431 * A direct I/O write ioend starts it's life in unwritten 1456 * A direct I/O write ioend starts it's life in unwritten
@@ -1434,7 +1459,7 @@ xfs_end_io_direct(
1434 * handler. 1459 * handler.
1435 */ 1460 */
1436 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 1461 INIT_WORK(&ioend->io_work, xfs_end_bio_written);
1437 xfs_finish_ioend(ioend); 1462 xfs_finish_ioend(ioend, 0);
1438 } 1463 }
1439 1464
1440 /* 1465 /*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index fe4f66a5af1..2df63622354 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -35,7 +35,7 @@
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36 36
37static kmem_zone_t *xfs_buf_zone; 37static kmem_zone_t *xfs_buf_zone;
38static kmem_shaker_t xfs_buf_shake; 38static struct shrinker *xfs_buf_shake;
39STATIC int xfsbufd(void *); 39STATIC int xfsbufd(void *);
40STATIC int xfsbufd_wakeup(int, gfp_t); 40STATIC int xfsbufd_wakeup(int, gfp_t);
41STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 41STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
@@ -314,7 +314,7 @@ xfs_buf_free(
314 314
315 ASSERT(list_empty(&bp->b_hash_list)); 315 ASSERT(list_empty(&bp->b_hash_list));
316 316
317 if (bp->b_flags & _XBF_PAGE_CACHE) { 317 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
318 uint i; 318 uint i;
319 319
320 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) 320 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
@@ -323,18 +323,11 @@ xfs_buf_free(
323 for (i = 0; i < bp->b_page_count; i++) { 323 for (i = 0; i < bp->b_page_count; i++) {
324 struct page *page = bp->b_pages[i]; 324 struct page *page = bp->b_pages[i];
325 325
326 ASSERT(!PagePrivate(page)); 326 if (bp->b_flags & _XBF_PAGE_CACHE)
327 ASSERT(!PagePrivate(page));
327 page_cache_release(page); 328 page_cache_release(page);
328 } 329 }
329 _xfs_buf_free_pages(bp); 330 _xfs_buf_free_pages(bp);
330 } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
331 /*
332 * XXX(hch): bp->b_count_desired might be incorrect (see
333 * xfs_buf_associate_memory for details), but fortunately
334 * the Linux version of kmem_free ignores the len argument..
335 */
336 kmem_free(bp->b_addr, bp->b_count_desired);
337 _xfs_buf_free_pages(bp);
338 } 331 }
339 332
340 xfs_buf_deallocate(bp); 333 xfs_buf_deallocate(bp);
@@ -764,43 +757,44 @@ xfs_buf_get_noaddr(
764 size_t len, 757 size_t len,
765 xfs_buftarg_t *target) 758 xfs_buftarg_t *target)
766{ 759{
767 size_t malloc_len = len; 760 unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
761 int error, i;
768 xfs_buf_t *bp; 762 xfs_buf_t *bp;
769 void *data;
770 int error;
771 763
772 bp = xfs_buf_allocate(0); 764 bp = xfs_buf_allocate(0);
773 if (unlikely(bp == NULL)) 765 if (unlikely(bp == NULL))
774 goto fail; 766 goto fail;
775 _xfs_buf_initialize(bp, target, 0, len, 0); 767 _xfs_buf_initialize(bp, target, 0, len, 0);
776 768
777 try_again: 769 error = _xfs_buf_get_pages(bp, page_count, 0);
778 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE); 770 if (error)
779 if (unlikely(data == NULL))
780 goto fail_free_buf; 771 goto fail_free_buf;
781 772
782 /* check whether alignment matches.. */ 773 for (i = 0; i < page_count; i++) {
783 if ((__psunsigned_t)data != 774 bp->b_pages[i] = alloc_page(GFP_KERNEL);
784 ((__psunsigned_t)data & ~target->bt_smask)) { 775 if (!bp->b_pages[i])
785 /* .. else double the size and try again */ 776 goto fail_free_mem;
786 kmem_free(data, malloc_len);
787 malloc_len <<= 1;
788 goto try_again;
789 } 777 }
778 bp->b_flags |= _XBF_PAGES;
790 779
791 error = xfs_buf_associate_memory(bp, data, len); 780 error = _xfs_buf_map_pages(bp, XBF_MAPPED);
792 if (error) 781 if (unlikely(error)) {
782 printk(KERN_WARNING "%s: failed to map pages\n",
783 __FUNCTION__);
793 goto fail_free_mem; 784 goto fail_free_mem;
794 bp->b_flags |= _XBF_KMEM_ALLOC; 785 }
795 786
796 xfs_buf_unlock(bp); 787 xfs_buf_unlock(bp);
797 788
798 XB_TRACE(bp, "no_daddr", data); 789 XB_TRACE(bp, "no_daddr", len);
799 return bp; 790 return bp;
791
800 fail_free_mem: 792 fail_free_mem:
801 kmem_free(data, malloc_len); 793 while (--i >= 0)
794 __free_page(bp->b_pages[i]);
795 _xfs_buf_free_pages(bp);
802 fail_free_buf: 796 fail_free_buf:
803 xfs_buf_free(bp); 797 xfs_buf_deallocate(bp);
804 fail: 798 fail:
805 return NULL; 799 return NULL;
806} 800}
@@ -1453,6 +1447,7 @@ xfs_free_buftarg(
1453 int external) 1447 int external)
1454{ 1448{
1455 xfs_flush_buftarg(btp, 1); 1449 xfs_flush_buftarg(btp, 1);
1450 xfs_blkdev_issue_flush(btp);
1456 if (external) 1451 if (external)
1457 xfs_blkdev_put(btp->bt_bdev); 1452 xfs_blkdev_put(btp->bt_bdev);
1458 xfs_free_bufhash(btp); 1453 xfs_free_bufhash(btp);
@@ -1837,7 +1832,7 @@ xfs_buf_init(void)
1837 if (!xfsdatad_workqueue) 1832 if (!xfsdatad_workqueue)
1838 goto out_destroy_xfslogd_workqueue; 1833 goto out_destroy_xfslogd_workqueue;
1839 1834
1840 xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup); 1835 xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup);
1841 if (!xfs_buf_shake) 1836 if (!xfs_buf_shake)
1842 goto out_destroy_xfsdatad_workqueue; 1837 goto out_destroy_xfsdatad_workqueue;
1843 1838
@@ -1859,7 +1854,7 @@ xfs_buf_init(void)
1859void 1854void
1860xfs_buf_terminate(void) 1855xfs_buf_terminate(void)
1861{ 1856{
1862 kmem_shake_deregister(xfs_buf_shake); 1857 remove_shrinker(xfs_buf_shake);
1863 destroy_workqueue(xfsdatad_workqueue); 1858 destroy_workqueue(xfsdatad_workqueue);
1864 destroy_workqueue(xfslogd_workqueue); 1859 destroy_workqueue(xfslogd_workqueue);
1865 kmem_zone_destroy(xfs_buf_zone); 1860 kmem_zone_destroy(xfs_buf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index b6241f6201a..b5908a34b15 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -63,7 +63,7 @@ typedef enum {
63 63
64 /* flags used only internally */ 64 /* flags used only internally */
65 _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 65 _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
66 _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ 66 _XBF_PAGES = (1 << 18), /* backed by refcounted pages */
67 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 67 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
68 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ 68 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
69} xfs_buf_flags_t; 69} xfs_buf_flags_t;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 8c43cd2e237..cbcd40c8c2a 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -184,15 +184,6 @@ xfs_file_open(
184} 184}
185 185
186STATIC int 186STATIC int
187xfs_file_close(
188 struct file *filp,
189 fl_owner_t id)
190{
191 return -bhv_vop_close(vn_from_inode(filp->f_path.dentry->d_inode), 0,
192 file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL);
193}
194
195STATIC int
196xfs_file_release( 187xfs_file_release(
197 struct inode *inode, 188 struct inode *inode,
198 struct file *filp) 189 struct file *filp)
@@ -436,7 +427,6 @@ const struct file_operations xfs_file_operations = {
436#endif 427#endif
437 .mmap = xfs_file_mmap, 428 .mmap = xfs_file_mmap,
438 .open = xfs_file_open, 429 .open = xfs_file_open,
439 .flush = xfs_file_close,
440 .release = xfs_file_release, 430 .release = xfs_file_release,
441 .fsync = xfs_file_fsync, 431 .fsync = xfs_file_fsync,
442#ifdef HAVE_FOP_OPEN_EXEC 432#ifdef HAVE_FOP_OPEN_EXEC
@@ -458,7 +448,6 @@ const struct file_operations xfs_invis_file_operations = {
458#endif 448#endif
459 .mmap = xfs_file_mmap, 449 .mmap = xfs_file_mmap,
460 .open = xfs_file_open, 450 .open = xfs_file_open,
461 .flush = xfs_file_close,
462 .release = xfs_file_release, 451 .release = xfs_file_release,
463 .fsync = xfs_file_fsync, 452 .fsync = xfs_file_fsync,
464}; 453};
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ed3a5e1b4b6..bb72c3d4141 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -46,6 +46,7 @@ xfs_param_t xfs_params = {
46 .inherit_nosym = { 0, 0, 1 }, 46 .inherit_nosym = { 0, 0, 1 },
47 .rotorstep = { 1, 1, 255 }, 47 .rotorstep = { 1, 1, 255 },
48 .inherit_nodfrg = { 0, 1, 1 }, 48 .inherit_nodfrg = { 0, 1, 1 },
49 .fstrm_timer = { 1, 50, 3600*100},
49}; 50};
50 51
51/* 52/*
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index ff5c41ff8d4..5917808abbd 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -1019,7 +1019,7 @@ xfs_ioc_bulkstat(
1019 1019
1020 if (cmd == XFS_IOC_FSINUMBERS) 1020 if (cmd == XFS_IOC_FSINUMBERS)
1021 error = xfs_inumbers(mp, &inlast, &count, 1021 error = xfs_inumbers(mp, &inlast, &count,
1022 bulkreq.ubuffer); 1022 bulkreq.ubuffer, xfs_inumbers_fmt);
1023 else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) 1023 else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
1024 error = xfs_bulkstat_single(mp, &inlast, 1024 error = xfs_bulkstat_single(mp, &inlast,
1025 bulkreq.ubuffer, &done); 1025 bulkreq.ubuffer, &done);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b83cebc165f..141cf15067c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -23,10 +23,25 @@
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <asm/uaccess.h> 24#include <asm/uaccess.h>
25#include "xfs.h" 25#include "xfs.h"
26#include "xfs_types.h"
27#include "xfs_fs.h" 26#include "xfs_fs.h"
27#include "xfs_bit.h"
28#include "xfs_log.h"
29#include "xfs_inum.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_ag.h"
33#include "xfs_dir2.h"
34#include "xfs_dmapi.h"
35#include "xfs_mount.h"
36#include "xfs_bmap_btree.h"
37#include "xfs_attr_sf.h"
38#include "xfs_dir2_sf.h"
28#include "xfs_vfs.h" 39#include "xfs_vfs.h"
29#include "xfs_vnode.h" 40#include "xfs_vnode.h"
41#include "xfs_dinode.h"
42#include "xfs_inode.h"
43#include "xfs_itable.h"
44#include "xfs_error.h"
30#include "xfs_dfrag.h" 45#include "xfs_dfrag.h"
31 46
32#define _NATIVE_IOC(cmd, type) \ 47#define _NATIVE_IOC(cmd, type) \
@@ -34,6 +49,7 @@
34 49
35#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 50#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
36#define BROKEN_X86_ALIGNMENT 51#define BROKEN_X86_ALIGNMENT
52#define _PACKED __attribute__((packed))
37/* on ia32 l_start is on a 32-bit boundary */ 53/* on ia32 l_start is on a 32-bit boundary */
38typedef struct xfs_flock64_32 { 54typedef struct xfs_flock64_32 {
39 __s16 l_type; 55 __s16 l_type;
@@ -75,35 +91,276 @@ xfs_ioctl32_flock(
75 return (unsigned long)p; 91 return (unsigned long)p;
76} 92}
77 93
94typedef struct compat_xfs_fsop_geom_v1 {
95 __u32 blocksize; /* filesystem (data) block size */
96 __u32 rtextsize; /* realtime extent size */
97 __u32 agblocks; /* fsblocks in an AG */
98 __u32 agcount; /* number of allocation groups */
99 __u32 logblocks; /* fsblocks in the log */
100 __u32 sectsize; /* (data) sector size, bytes */
101 __u32 inodesize; /* inode size in bytes */
102 __u32 imaxpct; /* max allowed inode space(%) */
103 __u64 datablocks; /* fsblocks in data subvolume */
104 __u64 rtblocks; /* fsblocks in realtime subvol */
105 __u64 rtextents; /* rt extents in realtime subvol*/
106 __u64 logstart; /* starting fsblock of the log */
107 unsigned char uuid[16]; /* unique id of the filesystem */
108 __u32 sunit; /* stripe unit, fsblocks */
109 __u32 swidth; /* stripe width, fsblocks */
110 __s32 version; /* structure version */
111 __u32 flags; /* superblock version flags */
112 __u32 logsectsize; /* log sector size, bytes */
113 __u32 rtsectsize; /* realtime sector size, bytes */
114 __u32 dirblocksize; /* directory block size, bytes */
115} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
116
117#define XFS_IOC_FSGEOMETRY_V1_32 \
118 _IOR ('X', 100, struct compat_xfs_fsop_geom_v1)
119
120STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
121{
122 compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg;
123 xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p));
124
125 if (copy_in_user(p, p32, sizeof(*p32)))
126 return -EFAULT;
127 return (unsigned long)p;
128}
129
130typedef struct compat_xfs_inogrp {
131 __u64 xi_startino; /* starting inode number */
132 __s32 xi_alloccount; /* # bits set in allocmask */
133 __u64 xi_allocmask; /* mask of allocated inodes */
134} __attribute__((packed)) compat_xfs_inogrp_t;
135
136STATIC int xfs_inumbers_fmt_compat(
137 void __user *ubuffer,
138 const xfs_inogrp_t *buffer,
139 long count,
140 long *written)
141{
142 compat_xfs_inogrp_t *p32 = ubuffer;
143 long i;
144
145 for (i = 0; i < count; i++) {
146 if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
147 put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
148 put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
149 return -EFAULT;
150 }
151 *written = count * sizeof(*p32);
152 return 0;
153}
154
78#else 155#else
79 156
80typedef struct xfs_fsop_bulkreq32 { 157#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
158#define _PACKED
159
160#endif
161
162/* XFS_IOC_FSBULKSTAT and friends */
163
164typedef struct compat_xfs_bstime {
165 __s32 tv_sec; /* seconds */
166 __s32 tv_nsec; /* and nanoseconds */
167} compat_xfs_bstime_t;
168
169STATIC int xfs_bstime_store_compat(
170 compat_xfs_bstime_t __user *p32,
171 const xfs_bstime_t *p)
172{
173 __s32 sec32;
174
175 sec32 = p->tv_sec;
176 if (put_user(sec32, &p32->tv_sec) ||
177 put_user(p->tv_nsec, &p32->tv_nsec))
178 return -EFAULT;
179 return 0;
180}
181
182typedef struct compat_xfs_bstat {
183 __u64 bs_ino; /* inode number */
184 __u16 bs_mode; /* type and mode */
185 __u16 bs_nlink; /* number of links */
186 __u32 bs_uid; /* user id */
187 __u32 bs_gid; /* group id */
188 __u32 bs_rdev; /* device value */
189 __s32 bs_blksize; /* block size */
190 __s64 bs_size; /* file size */
191 compat_xfs_bstime_t bs_atime; /* access time */
192 compat_xfs_bstime_t bs_mtime; /* modify time */
193 compat_xfs_bstime_t bs_ctime; /* inode change time */
194 int64_t bs_blocks; /* number of blocks */
195 __u32 bs_xflags; /* extended flags */
196 __s32 bs_extsize; /* extent size */
197 __s32 bs_extents; /* number of extents */
198 __u32 bs_gen; /* generation count */
199 __u16 bs_projid; /* project id */
200 unsigned char bs_pad[14]; /* pad space, unused */
201 __u32 bs_dmevmask; /* DMIG event mask */
202 __u16 bs_dmstate; /* DMIG state info */
203 __u16 bs_aextents; /* attribute number of extents */
204} _PACKED compat_xfs_bstat_t;
205
206STATIC int xfs_bulkstat_one_fmt_compat(
207 void __user *ubuffer,
208 const xfs_bstat_t *buffer)
209{
210 compat_xfs_bstat_t __user *p32 = ubuffer;
211
212 if (put_user(buffer->bs_ino, &p32->bs_ino) ||
213 put_user(buffer->bs_mode, &p32->bs_mode) ||
214 put_user(buffer->bs_nlink, &p32->bs_nlink) ||
215 put_user(buffer->bs_uid, &p32->bs_uid) ||
216 put_user(buffer->bs_gid, &p32->bs_gid) ||
217 put_user(buffer->bs_rdev, &p32->bs_rdev) ||
218 put_user(buffer->bs_blksize, &p32->bs_blksize) ||
219 put_user(buffer->bs_size, &p32->bs_size) ||
220 xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
221 xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
222 xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
223 put_user(buffer->bs_blocks, &p32->bs_blocks) ||
224 put_user(buffer->bs_xflags, &p32->bs_xflags) ||
225 put_user(buffer->bs_extsize, &p32->bs_extsize) ||
226 put_user(buffer->bs_extents, &p32->bs_extents) ||
227 put_user(buffer->bs_gen, &p32->bs_gen) ||
228 put_user(buffer->bs_projid, &p32->bs_projid) ||
229 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
230 put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
231 put_user(buffer->bs_aextents, &p32->bs_aextents))
232 return -EFAULT;
233 return sizeof(*p32);
234}
235
236
237
238typedef struct compat_xfs_fsop_bulkreq {
81 compat_uptr_t lastip; /* last inode # pointer */ 239 compat_uptr_t lastip; /* last inode # pointer */
82 __s32 icount; /* count of entries in buffer */ 240 __s32 icount; /* count of entries in buffer */
83 compat_uptr_t ubuffer; /* user buffer for inode desc. */ 241 compat_uptr_t ubuffer; /* user buffer for inode desc. */
84 __s32 ocount; /* output count pointer */ 242 compat_uptr_t ocount; /* output count pointer */
85} xfs_fsop_bulkreq32_t; 243} compat_xfs_fsop_bulkreq_t;
86 244
87STATIC unsigned long 245#define XFS_IOC_FSBULKSTAT_32 \
88xfs_ioctl32_bulkstat( 246 _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
89 unsigned long arg) 247#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
248 _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
249#define XFS_IOC_FSINUMBERS_32 \
250 _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
251
252/* copied from xfs_ioctl.c */
253STATIC int
254xfs_ioc_bulkstat_compat(
255 xfs_mount_t *mp,
256 unsigned int cmd,
257 void __user *arg)
90{ 258{
91 xfs_fsop_bulkreq32_t __user *p32 = (void __user *)arg; 259 compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg;
92 xfs_fsop_bulkreq_t __user *p = compat_alloc_user_space(sizeof(*p));
93 u32 addr; 260 u32 addr;
261 xfs_fsop_bulkreq_t bulkreq;
262 int count; /* # of records returned */
263 xfs_ino_t inlast; /* last inode number */
264 int done;
265 int error;
266
267 /* done = 1 if there are more stats to get and if bulkstat */
268 /* should be called again (unused here, but used in dmapi) */
269
270 if (!capable(CAP_SYS_ADMIN))
271 return -EPERM;
272
273 if (XFS_FORCED_SHUTDOWN(mp))
274 return -XFS_ERROR(EIO);
275
276 if (get_user(addr, &p32->lastip))
277 return -EFAULT;
278 bulkreq.lastip = compat_ptr(addr);
279 if (get_user(bulkreq.icount, &p32->icount) ||
280 get_user(addr, &p32->ubuffer))
281 return -EFAULT;
282 bulkreq.ubuffer = compat_ptr(addr);
283 if (get_user(addr, &p32->ocount))
284 return -EFAULT;
285 bulkreq.ocount = compat_ptr(addr);
286
287 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
288 return -XFS_ERROR(EFAULT);
289
290 if ((count = bulkreq.icount) <= 0)
291 return -XFS_ERROR(EINVAL);
292
293 if (cmd == XFS_IOC_FSINUMBERS)
294 error = xfs_inumbers(mp, &inlast, &count,
295 bulkreq.ubuffer, xfs_inumbers_fmt_compat);
296 else {
297 /* declare a var to get a warning in case the type changes */
298 bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat;
299 error = xfs_bulkstat(mp, &inlast, &count,
300 xfs_bulkstat_one, formatter,
301 sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
302 BULKSTAT_FG_QUICK, &done);
303 }
304 if (error)
305 return -error;
306
307 if (bulkreq.ocount != NULL) {
308 if (copy_to_user(bulkreq.lastip, &inlast,
309 sizeof(xfs_ino_t)))
310 return -XFS_ERROR(EFAULT);
311
312 if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
313 return -XFS_ERROR(EFAULT);
314 }
315
316 return 0;
317}
318
319
320
321typedef struct compat_xfs_fsop_handlereq {
322 __u32 fd; /* fd for FD_TO_HANDLE */
323 compat_uptr_t path; /* user pathname */
324 __u32 oflags; /* open flags */
325 compat_uptr_t ihandle; /* user supplied handle */
326 __u32 ihandlen; /* user supplied length */
327 compat_uptr_t ohandle; /* user buffer for handle */
328 compat_uptr_t ohandlen; /* user buffer length */
329} compat_xfs_fsop_handlereq_t;
330
331#define XFS_IOC_PATH_TO_FSHANDLE_32 \
332 _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
333#define XFS_IOC_PATH_TO_HANDLE_32 \
334 _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
335#define XFS_IOC_FD_TO_HANDLE_32 \
336 _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
337#define XFS_IOC_OPEN_BY_HANDLE_32 \
338 _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
339#define XFS_IOC_READLINK_BY_HANDLE_32 \
340 _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
341
342STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg)
343{
344 compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg;
345 xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p));
346 u32 addr;
94 347
95 if (get_user(addr, &p32->lastip) || 348 if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) ||
96 put_user(compat_ptr(addr), &p->lastip) || 349 get_user(addr, &p32->path) ||
97 copy_in_user(&p->icount, &p32->icount, sizeof(s32)) || 350 put_user(compat_ptr(addr), &p->path) ||
98 get_user(addr, &p32->ubuffer) || 351 copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) ||
99 put_user(compat_ptr(addr), &p->ubuffer) || 352 get_user(addr, &p32->ihandle) ||
100 get_user(addr, &p32->ocount) || 353 put_user(compat_ptr(addr), &p->ihandle) ||
101 put_user(compat_ptr(addr), &p->ocount)) 354 copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) ||
355 get_user(addr, &p32->ohandle) ||
356 put_user(compat_ptr(addr), &p->ohandle) ||
357 get_user(addr, &p32->ohandlen) ||
358 put_user(compat_ptr(addr), &p->ohandlen))
102 return -EFAULT; 359 return -EFAULT;
103 360
104 return (unsigned long)p; 361 return (unsigned long)p;
105} 362}
106#endif 363
107 364
108STATIC long 365STATIC long
109xfs_compat_ioctl( 366xfs_compat_ioctl(
@@ -118,7 +375,6 @@ xfs_compat_ioctl(
118 375
119 switch (cmd) { 376 switch (cmd) {
120 case XFS_IOC_DIOINFO: 377 case XFS_IOC_DIOINFO:
121 case XFS_IOC_FSGEOMETRY_V1:
122 case XFS_IOC_FSGEOMETRY: 378 case XFS_IOC_FSGEOMETRY:
123 case XFS_IOC_GETVERSION: 379 case XFS_IOC_GETVERSION:
124 case XFS_IOC_GETXFLAGS: 380 case XFS_IOC_GETXFLAGS:
@@ -131,12 +387,7 @@ xfs_compat_ioctl(
131 case XFS_IOC_GETBMAPA: 387 case XFS_IOC_GETBMAPA:
132 case XFS_IOC_GETBMAPX: 388 case XFS_IOC_GETBMAPX:
133/* not handled 389/* not handled
134 case XFS_IOC_FD_TO_HANDLE:
135 case XFS_IOC_PATH_TO_HANDLE:
136 case XFS_IOC_PATH_TO_FSHANDLE:
137 case XFS_IOC_OPEN_BY_HANDLE:
138 case XFS_IOC_FSSETDM_BY_HANDLE: 390 case XFS_IOC_FSSETDM_BY_HANDLE:
139 case XFS_IOC_READLINK_BY_HANDLE:
140 case XFS_IOC_ATTRLIST_BY_HANDLE: 391 case XFS_IOC_ATTRLIST_BY_HANDLE:
141 case XFS_IOC_ATTRMULTI_BY_HANDLE: 392 case XFS_IOC_ATTRMULTI_BY_HANDLE:
142*/ 393*/
@@ -166,6 +417,10 @@ xfs_compat_ioctl(
166 arg = xfs_ioctl32_flock(arg); 417 arg = xfs_ioctl32_flock(arg);
167 cmd = _NATIVE_IOC(cmd, struct xfs_flock64); 418 cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
168 break; 419 break;
420 case XFS_IOC_FSGEOMETRY_V1_32:
421 arg = xfs_ioctl32_geom_v1(arg);
422 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1);
423 break;
169 424
170#else /* These are handled fine if no alignment issues */ 425#else /* These are handled fine if no alignment issues */
171 case XFS_IOC_ALLOCSP: 426 case XFS_IOC_ALLOCSP:
@@ -176,18 +431,28 @@ xfs_compat_ioctl(
176 case XFS_IOC_FREESP64: 431 case XFS_IOC_FREESP64:
177 case XFS_IOC_RESVSP64: 432 case XFS_IOC_RESVSP64:
178 case XFS_IOC_UNRESVSP64: 433 case XFS_IOC_UNRESVSP64:
434 case XFS_IOC_FSGEOMETRY_V1:
179 break; 435 break;
180 436
181 /* xfs_bstat_t still has wrong u32 vs u64 alignment */ 437 /* xfs_bstat_t still has wrong u32 vs u64 alignment */
182 case XFS_IOC_SWAPEXT: 438 case XFS_IOC_SWAPEXT:
183 break; 439 break;
184 440
185 case XFS_IOC_FSBULKSTAT_SINGLE:
186 case XFS_IOC_FSBULKSTAT:
187 case XFS_IOC_FSINUMBERS:
188 arg = xfs_ioctl32_bulkstat(arg);
189 break;
190#endif 441#endif
442 case XFS_IOC_FSBULKSTAT_32:
443 case XFS_IOC_FSBULKSTAT_SINGLE_32:
444 case XFS_IOC_FSINUMBERS_32:
445 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq);
446 return xfs_ioc_bulkstat_compat(XFS_BHVTOI(VNHEAD(vp))->i_mount,
447 cmd, (void*)arg);
448 case XFS_IOC_FD_TO_HANDLE_32:
449 case XFS_IOC_PATH_TO_HANDLE_32:
450 case XFS_IOC_PATH_TO_FSHANDLE_32:
451 case XFS_IOC_OPEN_BY_HANDLE_32:
452 case XFS_IOC_READLINK_BY_HANDLE_32:
453 arg = xfs_ioctl32_fshandle(arg);
454 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
455 break;
191 default: 456 default:
192 return -ENOIOCTLCMD; 457 return -ENOIOCTLCMD;
193 } 458 }
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index af24a457d3a..330c4ba9d40 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -123,6 +123,7 @@
123#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val 123#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
124#define xfs_rotorstep xfs_params.rotorstep.val 124#define xfs_rotorstep xfs_params.rotorstep.val
125#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val 125#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
126#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val
126 127
127#define current_cpu() (raw_smp_processor_id()) 128#define current_cpu() (raw_smp_processor_id())
128#define current_pid() (current->pid) 129#define current_pid() (current->pid)
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bf9a9d5909b..06894cf00b1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -547,7 +547,8 @@ vfs_sync_worker(
547 547
548 if (!(vfsp->vfs_flag & VFS_RDONLY)) 548 if (!(vfsp->vfs_flag & VFS_RDONLY))
549 error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \ 549 error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
550 SYNC_ATTR | SYNC_REFCACHE, NULL); 550 SYNC_ATTR | SYNC_REFCACHE | SYNC_SUPER,
551 NULL);
551 vfsp->vfs_sync_seq++; 552 vfsp->vfs_sync_seq++;
552 wake_up(&vfsp->vfs_wait_single_sync_task); 553 wake_up(&vfsp->vfs_wait_single_sync_task);
553} 554}
@@ -663,7 +664,7 @@ xfs_fs_sync_super(
663 * occur here so don't bother flushing the buftarg (i.e 664 * occur here so don't bother flushing the buftarg (i.e
664 * SYNC_QUIESCE) because it'll just get dirty again. 665 * SYNC_QUIESCE) because it'll just get dirty again.
665 */ 666 */
666 flags = SYNC_FSDATA | SYNC_DELWRI | SYNC_WAIT | SYNC_IOWAIT; 667 flags = SYNC_DATA_QUIESCE;
667 } else 668 } else
668 flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0); 669 flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
669 670
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index cd6eaa44aa2..bb997d75c05 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -210,6 +210,17 @@ static ctl_table xfs_table[] = {
210 .extra1 = &xfs_params.inherit_nodfrg.min, 210 .extra1 = &xfs_params.inherit_nodfrg.min,
211 .extra2 = &xfs_params.inherit_nodfrg.max 211 .extra2 = &xfs_params.inherit_nodfrg.max
212 }, 212 },
213 {
214 .ctl_name = XFS_FILESTREAM_TIMER,
215 .procname = "filestream_centisecs",
216 .data = &xfs_params.fstrm_timer.val,
217 .maxlen = sizeof(int),
218 .mode = 0644,
219 .proc_handler = &proc_dointvec_minmax,
220 .strategy = &sysctl_intvec,
221 .extra1 = &xfs_params.fstrm_timer.min,
222 .extra2 = &xfs_params.fstrm_timer.max,
223 },
213 /* please keep this the last entry */ 224 /* please keep this the last entry */
214#ifdef CONFIG_PROC_FS 225#ifdef CONFIG_PROC_FS
215 { 226 {
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index a631fb8cc5a..98b97e399d6 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -47,6 +47,7 @@ typedef struct xfs_param {
47 xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ 47 xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
48 xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ 48 xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
49 xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ 49 xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
50 xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */
50} xfs_param_t; 51} xfs_param_t;
51 52
52/* 53/*
@@ -86,6 +87,7 @@ enum {
86 XFS_INHERIT_NOSYM = 19, 87 XFS_INHERIT_NOSYM = 19,
87 XFS_ROTORSTEP = 20, 88 XFS_ROTORSTEP = 20,
88 XFS_INHERIT_NODFRG = 21, 89 XFS_INHERIT_NODFRG = 21,
90 XFS_FILESTREAM_TIMER = 22,
89}; 91};
90 92
91extern xfs_param_t xfs_params; 93extern xfs_param_t xfs_params;
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index e2c2ce98ab5..dca3481aaaf 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -92,6 +92,21 @@ typedef enum {
92#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ 92#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
93#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ 93#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
94#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ 94#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
95#define SYNC_SUPER 0x0200 /* flush superblock to disk */
96
97/*
98 * When remounting a filesystem read-only or freezing the filesystem,
99 * we have two phases to execute. This first phase is syncing the data
100 * before we quiesce the fielsystem, and the second is flushing all the
101 * inodes out after we've waited for all the transactions created by
102 * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
103 * to ensure that the inodes are written to their location on disk
104 * rather than just existing in transactions in the log. This means
105 * after a quiesce there is no log replay required to write the inodes
106 * to disk (this is the main difference between a sync and a quiesce).
107 */
108#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
109#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
95 110
96#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ 111#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
97#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ 112#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 013048a9264..5742d65f078 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -129,10 +129,7 @@ typedef enum bhv_vchange {
129 VCHANGE_FLAGS_IOEXCL_COUNT = 4 129 VCHANGE_FLAGS_IOEXCL_COUNT = 4
130} bhv_vchange_t; 130} bhv_vchange_t;
131 131
132typedef enum { L_FALSE, L_TRUE } lastclose_t;
133
134typedef int (*vop_open_t)(bhv_desc_t *, struct cred *); 132typedef int (*vop_open_t)(bhv_desc_t *, struct cred *);
135typedef int (*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *);
136typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *, 133typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
137 const struct iovec *, unsigned int, 134 const struct iovec *, unsigned int,
138 loff_t *, int, struct cred *); 135 loff_t *, int, struct cred *);
@@ -200,7 +197,6 @@ typedef int (*vop_iflush_t)(bhv_desc_t *, int);
200typedef struct bhv_vnodeops { 197typedef struct bhv_vnodeops {
201 bhv_position_t vn_position; /* position within behavior chain */ 198 bhv_position_t vn_position; /* position within behavior chain */
202 vop_open_t vop_open; 199 vop_open_t vop_open;
203 vop_close_t vop_close;
204 vop_read_t vop_read; 200 vop_read_t vop_read;
205 vop_write_t vop_write; 201 vop_write_t vop_write;
206 vop_splice_read_t vop_splice_read; 202 vop_splice_read_t vop_splice_read;
@@ -245,7 +241,6 @@ typedef struct bhv_vnodeops {
245#define VNHEAD(vp) ((vp)->v_bh.bh_first) 241#define VNHEAD(vp) ((vp)->v_bh.bh_first)
246#define VOP(op, vp) (*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op) 242#define VOP(op, vp) (*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op)
247#define bhv_vop_open(vp, cr) VOP(vop_open, vp)(VNHEAD(vp),cr) 243#define bhv_vop_open(vp, cr) VOP(vop_open, vp)(VNHEAD(vp),cr)
248#define bhv_vop_close(vp, f,last,cr) VOP(vop_close, vp)(VNHEAD(vp),f,last,cr)
249#define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr) \ 244#define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr) \
250 VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) 245 VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
251#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \ 246#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 3e4a8ad8a34..7def4c69934 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -62,10 +62,9 @@ uint ndquot;
62 62
63kmem_zone_t *qm_dqzone; 63kmem_zone_t *qm_dqzone;
64kmem_zone_t *qm_dqtrxzone; 64kmem_zone_t *qm_dqtrxzone;
65static kmem_shaker_t xfs_qm_shaker; 65static struct shrinker *xfs_qm_shaker;
66 66
67static cred_t xfs_zerocr; 67static cred_t xfs_zerocr;
68static xfs_inode_t xfs_zeroino;
69 68
70STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); 69STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
71STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); 70STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
@@ -150,7 +149,7 @@ xfs_Gqm_init(void)
150 } else 149 } else
151 xqm->qm_dqzone = qm_dqzone; 150 xqm->qm_dqzone = qm_dqzone;
152 151
153 xfs_qm_shaker = kmem_shake_register(xfs_qm_shake); 152 xfs_qm_shaker = set_shrinker(DEFAULT_SEEKS, xfs_qm_shake);
154 153
155 /* 154 /*
156 * The t_dqinfo portion of transactions. 155 * The t_dqinfo portion of transactions.
@@ -182,7 +181,7 @@ xfs_qm_destroy(
182 181
183 ASSERT(xqm != NULL); 182 ASSERT(xqm != NULL);
184 ASSERT(xqm->qm_nrefs == 0); 183 ASSERT(xqm->qm_nrefs == 0);
185 kmem_shake_deregister(xfs_qm_shaker); 184 remove_shrinker(xfs_qm_shaker);
186 hsize = xqm->qm_dqhashmask + 1; 185 hsize = xqm->qm_dqhashmask + 1;
187 for (i = 0; i < hsize; i++) { 186 for (i = 0; i < hsize; i++) {
188 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 187 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
@@ -1415,7 +1414,7 @@ xfs_qm_qino_alloc(
1415 return error; 1414 return error;
1416 } 1415 }
1417 1416
1418 if ((error = xfs_dir_ialloc(&tp, &xfs_zeroino, S_IFREG, 1, 0, 1417 if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
1419 &xfs_zerocr, 0, 1, ip, &committed))) { 1418 &xfs_zerocr, 0, 1, ip, &committed))) {
1420 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 1419 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1421 XFS_TRANS_ABORT); 1420 XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index bf0a12040b1..b5a7d92c684 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -38,6 +38,7 @@
38#define XFS_RW_TRACE 1 38#define XFS_RW_TRACE 1
39#define XFS_BUF_TRACE 1 39#define XFS_BUF_TRACE 1
40#define XFS_VNODE_TRACE 1 40#define XFS_VNODE_TRACE 1
41#define XFS_FILESTREAMS_TRACE 1
41#endif 42#endif
42 43
43#include <linux-2.6/xfs_linux.h> 44#include <linux-2.6/xfs_linux.h>
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 9ece7f87ec5..51c09c114a2 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -68,6 +68,7 @@ typedef struct xfs_agf {
68 __be32 agf_flcount; /* count of blocks in freelist */ 68 __be32 agf_flcount; /* count of blocks in freelist */
69 __be32 agf_freeblks; /* total free blocks */ 69 __be32 agf_freeblks; /* total free blocks */
70 __be32 agf_longest; /* longest free space */ 70 __be32 agf_longest; /* longest free space */
71 __be32 agf_btreeblks; /* # of blocks held in AGF btrees */
71} xfs_agf_t; 72} xfs_agf_t;
72 73
73#define XFS_AGF_MAGICNUM 0x00000001 74#define XFS_AGF_MAGICNUM 0x00000001
@@ -81,7 +82,8 @@ typedef struct xfs_agf {
81#define XFS_AGF_FLCOUNT 0x00000100 82#define XFS_AGF_FLCOUNT 0x00000100
82#define XFS_AGF_FREEBLKS 0x00000200 83#define XFS_AGF_FREEBLKS 0x00000200
83#define XFS_AGF_LONGEST 0x00000400 84#define XFS_AGF_LONGEST 0x00000400
84#define XFS_AGF_NUM_BITS 11 85#define XFS_AGF_BTREEBLKS 0x00000800
86#define XFS_AGF_NUM_BITS 12
85#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) 87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
86 88
87/* disk block (xfs_daddr_t) in the AG */ 89/* disk block (xfs_daddr_t) in the AG */
@@ -186,12 +188,15 @@ typedef struct xfs_perag
186 __uint32_t pagf_flcount; /* count of blocks in freelist */ 188 __uint32_t pagf_flcount; /* count of blocks in freelist */
187 xfs_extlen_t pagf_freeblks; /* total free blocks */ 189 xfs_extlen_t pagf_freeblks; /* total free blocks */
188 xfs_extlen_t pagf_longest; /* longest free space */ 190 xfs_extlen_t pagf_longest; /* longest free space */
191 __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
189 xfs_agino_t pagi_freecount; /* number of free inodes */ 192 xfs_agino_t pagi_freecount; /* number of free inodes */
193 xfs_agino_t pagi_count; /* number of allocated inodes */
194 int pagb_count; /* pagb slots in use */
190#ifdef __KERNEL__ 195#ifdef __KERNEL__
191 lock_t pagb_lock; /* lock for pagb_list */ 196 lock_t pagb_lock; /* lock for pagb_list */
192#endif 197#endif
193 int pagb_count; /* pagb slots in use */
194 xfs_perag_busy_t *pagb_list; /* unstable blocks */ 198 xfs_perag_busy_t *pagb_list; /* unstable blocks */
199 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
195} xfs_perag_t; 200} xfs_perag_t;
196 201
197#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) 202#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8e9a40aa0cd..012a649a19c 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -55,17 +55,17 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
55ktrace_t *xfs_alloc_trace_buf; 55ktrace_t *xfs_alloc_trace_buf;
56 56
57#define TRACE_ALLOC(s,a) \ 57#define TRACE_ALLOC(s,a) \
58 xfs_alloc_trace_alloc(fname, s, a, __LINE__) 58 xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__)
59#define TRACE_FREE(s,a,b,x,f) \ 59#define TRACE_FREE(s,a,b,x,f) \
60 xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__) 60 xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__)
61#define TRACE_MODAGF(s,a,f) \ 61#define TRACE_MODAGF(s,a,f) \
62 xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__) 62 xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__)
63#define TRACE_BUSY(fname,s,ag,agb,l,sl,tp) \ 63#define TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp) \
64 xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) 64 xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
65#define TRACE_UNBUSY(fname,s,ag,sl,tp) \ 65#define TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp) \
66 xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) 66 xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
67#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) \ 67#define TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp) \
68 xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) 68 xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
69#else 69#else
70#define TRACE_ALLOC(s,a) 70#define TRACE_ALLOC(s,a)
71#define TRACE_FREE(s,a,b,x,f) 71#define TRACE_FREE(s,a,b,x,f)
@@ -420,7 +420,7 @@ xfs_alloc_read_agfl(
420 */ 420 */
421STATIC void 421STATIC void
422xfs_alloc_trace_alloc( 422xfs_alloc_trace_alloc(
423 char *name, /* function tag string */ 423 const char *name, /* function tag string */
424 char *str, /* additional string */ 424 char *str, /* additional string */
425 xfs_alloc_arg_t *args, /* allocation argument structure */ 425 xfs_alloc_arg_t *args, /* allocation argument structure */
426 int line) /* source line number */ 426 int line) /* source line number */
@@ -453,7 +453,7 @@ xfs_alloc_trace_alloc(
453 */ 453 */
454STATIC void 454STATIC void
455xfs_alloc_trace_free( 455xfs_alloc_trace_free(
456 char *name, /* function tag string */ 456 const char *name, /* function tag string */
457 char *str, /* additional string */ 457 char *str, /* additional string */
458 xfs_mount_t *mp, /* file system mount point */ 458 xfs_mount_t *mp, /* file system mount point */
459 xfs_agnumber_t agno, /* allocation group number */ 459 xfs_agnumber_t agno, /* allocation group number */
@@ -479,7 +479,7 @@ xfs_alloc_trace_free(
479 */ 479 */
480STATIC void 480STATIC void
481xfs_alloc_trace_modagf( 481xfs_alloc_trace_modagf(
482 char *name, /* function tag string */ 482 const char *name, /* function tag string */
483 char *str, /* additional string */ 483 char *str, /* additional string */
484 xfs_mount_t *mp, /* file system mount point */ 484 xfs_mount_t *mp, /* file system mount point */
485 xfs_agf_t *agf, /* new agf value */ 485 xfs_agf_t *agf, /* new agf value */
@@ -507,7 +507,7 @@ xfs_alloc_trace_modagf(
507 507
508STATIC void 508STATIC void
509xfs_alloc_trace_busy( 509xfs_alloc_trace_busy(
510 char *name, /* function tag string */ 510 const char *name, /* function tag string */
511 char *str, /* additional string */ 511 char *str, /* additional string */
512 xfs_mount_t *mp, /* file system mount point */ 512 xfs_mount_t *mp, /* file system mount point */
513 xfs_agnumber_t agno, /* allocation group number */ 513 xfs_agnumber_t agno, /* allocation group number */
@@ -549,9 +549,6 @@ xfs_alloc_ag_vextent(
549 xfs_alloc_arg_t *args) /* argument structure for allocation */ 549 xfs_alloc_arg_t *args) /* argument structure for allocation */
550{ 550{
551 int error=0; 551 int error=0;
552#ifdef XFS_ALLOC_TRACE
553 static char fname[] = "xfs_alloc_ag_vextent";
554#endif
555 552
556 ASSERT(args->minlen > 0); 553 ASSERT(args->minlen > 0);
557 ASSERT(args->maxlen > 0); 554 ASSERT(args->maxlen > 0);
@@ -635,9 +632,6 @@ xfs_alloc_ag_vextent_exact(
635 xfs_agblock_t fbno; /* start block of found extent */ 632 xfs_agblock_t fbno; /* start block of found extent */
636 xfs_agblock_t fend; /* end block of found extent */ 633 xfs_agblock_t fend; /* end block of found extent */
637 xfs_extlen_t flen; /* length of found extent */ 634 xfs_extlen_t flen; /* length of found extent */
638#ifdef XFS_ALLOC_TRACE
639 static char fname[] = "xfs_alloc_ag_vextent_exact";
640#endif
641 int i; /* success/failure of operation */ 635 int i; /* success/failure of operation */
642 xfs_agblock_t maxend; /* end of maximal extent */ 636 xfs_agblock_t maxend; /* end of maximal extent */
643 xfs_agblock_t minend; /* end of minimal extent */ 637 xfs_agblock_t minend; /* end of minimal extent */
@@ -737,9 +731,6 @@ xfs_alloc_ag_vextent_near(
737 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ 731 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
738 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ 732 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
739 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ 733 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
740#ifdef XFS_ALLOC_TRACE
741 static char fname[] = "xfs_alloc_ag_vextent_near";
742#endif
743 xfs_agblock_t gtbno; /* start bno of right side entry */ 734 xfs_agblock_t gtbno; /* start bno of right side entry */
744 xfs_agblock_t gtbnoa; /* aligned ... */ 735 xfs_agblock_t gtbnoa; /* aligned ... */
745 xfs_extlen_t gtdiff; /* difference to right side entry */ 736 xfs_extlen_t gtdiff; /* difference to right side entry */
@@ -1270,9 +1261,6 @@ xfs_alloc_ag_vextent_size(
1270 int error; /* error result */ 1261 int error; /* error result */
1271 xfs_agblock_t fbno; /* start of found freespace */ 1262 xfs_agblock_t fbno; /* start of found freespace */
1272 xfs_extlen_t flen; /* length of found freespace */ 1263 xfs_extlen_t flen; /* length of found freespace */
1273#ifdef XFS_ALLOC_TRACE
1274 static char fname[] = "xfs_alloc_ag_vextent_size";
1275#endif
1276 int i; /* temp status variable */ 1264 int i; /* temp status variable */
1277 xfs_agblock_t rbno; /* returned block number */ 1265 xfs_agblock_t rbno; /* returned block number */
1278 xfs_extlen_t rlen; /* length of returned extent */ 1266 xfs_extlen_t rlen; /* length of returned extent */
@@ -1427,9 +1415,6 @@ xfs_alloc_ag_vextent_small(
1427 int error; 1415 int error;
1428 xfs_agblock_t fbno; 1416 xfs_agblock_t fbno;
1429 xfs_extlen_t flen; 1417 xfs_extlen_t flen;
1430#ifdef XFS_ALLOC_TRACE
1431 static char fname[] = "xfs_alloc_ag_vextent_small";
1432#endif
1433 int i; 1418 int i;
1434 1419
1435 if ((error = xfs_alloc_decrement(ccur, 0, &i))) 1420 if ((error = xfs_alloc_decrement(ccur, 0, &i)))
@@ -1447,7 +1432,8 @@ xfs_alloc_ag_vextent_small(
1447 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && 1432 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1448 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1433 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1449 > args->minleft)) { 1434 > args->minleft)) {
1450 if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno))) 1435 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1436 if (error)
1451 goto error0; 1437 goto error0;
1452 if (fbno != NULLAGBLOCK) { 1438 if (fbno != NULLAGBLOCK) {
1453 if (args->userdata) { 1439 if (args->userdata) {
@@ -1515,9 +1501,6 @@ xfs_free_ag_extent(
1515 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1501 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1516 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1502 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
1517 int error; /* error return value */ 1503 int error; /* error return value */
1518#ifdef XFS_ALLOC_TRACE
1519 static char fname[] = "xfs_free_ag_extent";
1520#endif
1521 xfs_agblock_t gtbno; /* start of right neighbor block */ 1504 xfs_agblock_t gtbno; /* start of right neighbor block */
1522 xfs_extlen_t gtlen; /* length of right neighbor block */ 1505 xfs_extlen_t gtlen; /* length of right neighbor block */
1523 int haveleft; /* have a left neighbor block */ 1506 int haveleft; /* have a left neighbor block */
@@ -1923,7 +1906,8 @@ xfs_alloc_fix_freelist(
1923 while (be32_to_cpu(agf->agf_flcount) > need) { 1906 while (be32_to_cpu(agf->agf_flcount) > need) {
1924 xfs_buf_t *bp; 1907 xfs_buf_t *bp;
1925 1908
1926 if ((error = xfs_alloc_get_freelist(tp, agbp, &bno))) 1909 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1910 if (error)
1927 return error; 1911 return error;
1928 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) 1912 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1929 return error; 1913 return error;
@@ -1973,8 +1957,9 @@ xfs_alloc_fix_freelist(
1973 * Put each allocated block on the list. 1957 * Put each allocated block on the list.
1974 */ 1958 */
1975 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 1959 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
1976 if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp, 1960 error = xfs_alloc_put_freelist(tp, agbp,
1977 bno))) 1961 agflbp, bno, 0);
1962 if (error)
1978 return error; 1963 return error;
1979 } 1964 }
1980 } 1965 }
@@ -1991,16 +1976,15 @@ int /* error */
1991xfs_alloc_get_freelist( 1976xfs_alloc_get_freelist(
1992 xfs_trans_t *tp, /* transaction pointer */ 1977 xfs_trans_t *tp, /* transaction pointer */
1993 xfs_buf_t *agbp, /* buffer containing the agf structure */ 1978 xfs_buf_t *agbp, /* buffer containing the agf structure */
1994 xfs_agblock_t *bnop) /* block address retrieved from freelist */ 1979 xfs_agblock_t *bnop, /* block address retrieved from freelist */
1980 int btreeblk) /* destination is a AGF btree */
1995{ 1981{
1996 xfs_agf_t *agf; /* a.g. freespace structure */ 1982 xfs_agf_t *agf; /* a.g. freespace structure */
1997 xfs_agfl_t *agfl; /* a.g. freelist structure */ 1983 xfs_agfl_t *agfl; /* a.g. freelist structure */
1998 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 1984 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
1999 xfs_agblock_t bno; /* block number returned */ 1985 xfs_agblock_t bno; /* block number returned */
2000 int error; 1986 int error;
2001#ifdef XFS_ALLOC_TRACE 1987 int logflags;
2002 static char fname[] = "xfs_alloc_get_freelist";
2003#endif
2004 xfs_mount_t *mp; /* mount structure */ 1988 xfs_mount_t *mp; /* mount structure */
2005 xfs_perag_t *pag; /* per allocation group data */ 1989 xfs_perag_t *pag; /* per allocation group data */
2006 1990
@@ -2032,8 +2016,16 @@ xfs_alloc_get_freelist(
2032 be32_add(&agf->agf_flcount, -1); 2016 be32_add(&agf->agf_flcount, -1);
2033 xfs_trans_agflist_delta(tp, -1); 2017 xfs_trans_agflist_delta(tp, -1);
2034 pag->pagf_flcount--; 2018 pag->pagf_flcount--;
2035 TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); 2019
2036 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); 2020 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2021 if (btreeblk) {
2022 be32_add(&agf->agf_btreeblks, 1);
2023 pag->pagf_btreeblks++;
2024 logflags |= XFS_AGF_BTREEBLKS;
2025 }
2026
2027 TRACE_MODAGF(NULL, agf, logflags);
2028 xfs_alloc_log_agf(tp, agbp, logflags);
2037 *bnop = bno; 2029 *bnop = bno;
2038 2030
2039 /* 2031 /*
@@ -2071,6 +2063,7 @@ xfs_alloc_log_agf(
2071 offsetof(xfs_agf_t, agf_flcount), 2063 offsetof(xfs_agf_t, agf_flcount),
2072 offsetof(xfs_agf_t, agf_freeblks), 2064 offsetof(xfs_agf_t, agf_freeblks),
2073 offsetof(xfs_agf_t, agf_longest), 2065 offsetof(xfs_agf_t, agf_longest),
2066 offsetof(xfs_agf_t, agf_btreeblks),
2074 sizeof(xfs_agf_t) 2067 sizeof(xfs_agf_t)
2075 }; 2068 };
2076 2069
@@ -2106,15 +2099,14 @@ xfs_alloc_put_freelist(
2106 xfs_trans_t *tp, /* transaction pointer */ 2099 xfs_trans_t *tp, /* transaction pointer */
2107 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2100 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2108 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2101 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
2109 xfs_agblock_t bno) /* block being freed */ 2102 xfs_agblock_t bno, /* block being freed */
2103 int btreeblk) /* block came from a AGF btree */
2110{ 2104{
2111 xfs_agf_t *agf; /* a.g. freespace structure */ 2105 xfs_agf_t *agf; /* a.g. freespace structure */
2112 xfs_agfl_t *agfl; /* a.g. free block array */ 2106 xfs_agfl_t *agfl; /* a.g. free block array */
2113 __be32 *blockp;/* pointer to array entry */ 2107 __be32 *blockp;/* pointer to array entry */
2114 int error; 2108 int error;
2115#ifdef XFS_ALLOC_TRACE 2109 int logflags;
2116 static char fname[] = "xfs_alloc_put_freelist";
2117#endif
2118 xfs_mount_t *mp; /* mount structure */ 2110 xfs_mount_t *mp; /* mount structure */
2119 xfs_perag_t *pag; /* per allocation group data */ 2111 xfs_perag_t *pag; /* per allocation group data */
2120 2112
@@ -2132,11 +2124,22 @@ xfs_alloc_put_freelist(
2132 be32_add(&agf->agf_flcount, 1); 2124 be32_add(&agf->agf_flcount, 1);
2133 xfs_trans_agflist_delta(tp, 1); 2125 xfs_trans_agflist_delta(tp, 1);
2134 pag->pagf_flcount++; 2126 pag->pagf_flcount++;
2127
2128 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2129 if (btreeblk) {
2130 be32_add(&agf->agf_btreeblks, -1);
2131 pag->pagf_btreeblks--;
2132 logflags |= XFS_AGF_BTREEBLKS;
2133 }
2134
2135 TRACE_MODAGF(NULL, agf, logflags);
2136 xfs_alloc_log_agf(tp, agbp, logflags);
2137
2135 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2138 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2136 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2139 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
2137 *blockp = cpu_to_be32(bno); 2140 *blockp = cpu_to_be32(bno);
2138 TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); 2141 TRACE_MODAGF(NULL, agf, logflags);
2139 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); 2142 xfs_alloc_log_agf(tp, agbp, logflags);
2140 xfs_trans_log_buf(tp, agflbp, 2143 xfs_trans_log_buf(tp, agflbp,
2141 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), 2144 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2142 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + 2145 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
@@ -2196,6 +2199,7 @@ xfs_alloc_read_agf(
2196 pag = &mp->m_perag[agno]; 2199 pag = &mp->m_perag[agno];
2197 if (!pag->pagf_init) { 2200 if (!pag->pagf_init) {
2198 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2201 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2202 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2199 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2203 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2200 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2204 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2201 pag->pagf_levels[XFS_BTNUM_BNOi] = 2205 pag->pagf_levels[XFS_BTNUM_BNOi] =
@@ -2235,9 +2239,6 @@ xfs_alloc_vextent(
2235 xfs_agblock_t agsize; /* allocation group size */ 2239 xfs_agblock_t agsize; /* allocation group size */
2236 int error; 2240 int error;
2237 int flags; /* XFS_ALLOC_FLAG_... locking flags */ 2241 int flags; /* XFS_ALLOC_FLAG_... locking flags */
2238#ifdef XFS_ALLOC_TRACE
2239 static char fname[] = "xfs_alloc_vextent";
2240#endif
2241 xfs_extlen_t minleft;/* minimum left value, temp copy */ 2242 xfs_extlen_t minleft;/* minimum left value, temp copy */
2242 xfs_mount_t *mp; /* mount structure pointer */ 2243 xfs_mount_t *mp; /* mount structure pointer */
2243 xfs_agnumber_t sagno; /* starting allocation group number */ 2244 xfs_agnumber_t sagno; /* starting allocation group number */
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5a4256120cc..5aec15d0651 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -136,7 +136,8 @@ int /* error */
136xfs_alloc_get_freelist( 136xfs_alloc_get_freelist(
137 struct xfs_trans *tp, /* transaction pointer */ 137 struct xfs_trans *tp, /* transaction pointer */
138 struct xfs_buf *agbp, /* buffer containing the agf structure */ 138 struct xfs_buf *agbp, /* buffer containing the agf structure */
139 xfs_agblock_t *bnop); /* block address retrieved from freelist */ 139 xfs_agblock_t *bnop, /* block address retrieved from freelist */
140 int btreeblk); /* destination is a AGF btree */
140 141
141/* 142/*
142 * Log the given fields from the agf structure. 143 * Log the given fields from the agf structure.
@@ -165,7 +166,8 @@ xfs_alloc_put_freelist(
165 struct xfs_trans *tp, /* transaction pointer */ 166 struct xfs_trans *tp, /* transaction pointer */
166 struct xfs_buf *agbp, /* buffer for a.g. freelist header */ 167 struct xfs_buf *agbp, /* buffer for a.g. freelist header */
167 struct xfs_buf *agflbp,/* buffer for a.g. free block array */ 168 struct xfs_buf *agflbp,/* buffer for a.g. free block array */
168 xfs_agblock_t bno); /* block being freed */ 169 xfs_agblock_t bno, /* block being freed */
170 int btreeblk); /* owner was a AGF btree */
169 171
170/* 172/*
171 * Read in the allocation group header (free/alloc section). 173 * Read in the allocation group header (free/alloc section).
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 74cadf95d4e..1603ce59585 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -226,8 +226,9 @@ xfs_alloc_delrec(
226 /* 226 /*
227 * Put this buffer/block on the ag's freelist. 227 * Put this buffer/block on the ag's freelist.
228 */ 228 */
229 if ((error = xfs_alloc_put_freelist(cur->bc_tp, 229 error = xfs_alloc_put_freelist(cur->bc_tp,
230 cur->bc_private.a.agbp, NULL, bno))) 230 cur->bc_private.a.agbp, NULL, bno, 1);
231 if (error)
231 return error; 232 return error;
232 /* 233 /*
233 * Since blocks move to the free list without the 234 * Since blocks move to the free list without the
@@ -549,8 +550,9 @@ xfs_alloc_delrec(
549 /* 550 /*
550 * Free the deleting block by putting it on the freelist. 551 * Free the deleting block by putting it on the freelist.
551 */ 552 */
552 if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp, 553 error = xfs_alloc_put_freelist(cur->bc_tp,
553 NULL, rbno))) 554 cur->bc_private.a.agbp, NULL, rbno, 1);
555 if (error)
554 return error; 556 return error;
555 /* 557 /*
556 * Since blocks move to the free list without the coordination 558 * Since blocks move to the free list without the coordination
@@ -1320,8 +1322,9 @@ xfs_alloc_newroot(
1320 /* 1322 /*
1321 * Get a buffer from the freelist blocks, for the new root. 1323 * Get a buffer from the freelist blocks, for the new root.
1322 */ 1324 */
1323 if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, 1325 error = xfs_alloc_get_freelist(cur->bc_tp,
1324 &nbno))) 1326 cur->bc_private.a.agbp, &nbno, 1);
1327 if (error)
1325 return error; 1328 return error;
1326 /* 1329 /*
1327 * None available, we fail. 1330 * None available, we fail.
@@ -1604,8 +1607,9 @@ xfs_alloc_split(
1604 * Allocate the new block from the freelist. 1607 * Allocate the new block from the freelist.
1605 * If we can't do it, we're toast. Give up. 1608 * If we can't do it, we're toast. Give up.
1606 */ 1609 */
1607 if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, 1610 error = xfs_alloc_get_freelist(cur->bc_tp,
1608 &rbno))) 1611 cur->bc_private.a.agbp, &rbno, 1);
1612 if (error)
1609 return error; 1613 return error;
1610 if (rbno == NULLAGBLOCK) { 1614 if (rbno == NULLAGBLOCK) {
1611 *stat = 0; 1615 *stat = 0;
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 1afe07f67e3..fab0b6d5a41 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -66,44 +66,6 @@ static const char xfs_highbit[256] = {
66#endif 66#endif
67 67
68/* 68/*
69 * Count of bits set in byte, 0..8.
70 */
71static const char xfs_countbit[256] = {
72 0, 1, 1, 2, 1, 2, 2, 3, /* 00 .. 07 */
73 1, 2, 2, 3, 2, 3, 3, 4, /* 08 .. 0f */
74 1, 2, 2, 3, 2, 3, 3, 4, /* 10 .. 17 */
75 2, 3, 3, 4, 3, 4, 4, 5, /* 18 .. 1f */
76 1, 2, 2, 3, 2, 3, 3, 4, /* 20 .. 27 */
77 2, 3, 3, 4, 3, 4, 4, 5, /* 28 .. 2f */
78 2, 3, 3, 4, 3, 4, 4, 5, /* 30 .. 37 */
79 3, 4, 4, 5, 4, 5, 5, 6, /* 38 .. 3f */
80 1, 2, 2, 3, 2, 3, 3, 4, /* 40 .. 47 */
81 2, 3, 3, 4, 3, 4, 4, 5, /* 48 .. 4f */
82 2, 3, 3, 4, 3, 4, 4, 5, /* 50 .. 57 */
83 3, 4, 4, 5, 4, 5, 5, 6, /* 58 .. 5f */
84 2, 3, 3, 4, 3, 4, 4, 5, /* 60 .. 67 */
85 3, 4, 4, 5, 4, 5, 5, 6, /* 68 .. 6f */
86 3, 4, 4, 5, 4, 5, 5, 6, /* 70 .. 77 */
87 4, 5, 5, 6, 5, 6, 6, 7, /* 78 .. 7f */
88 1, 2, 2, 3, 2, 3, 3, 4, /* 80 .. 87 */
89 2, 3, 3, 4, 3, 4, 4, 5, /* 88 .. 8f */
90 2, 3, 3, 4, 3, 4, 4, 5, /* 90 .. 97 */
91 3, 4, 4, 5, 4, 5, 5, 6, /* 98 .. 9f */
92 2, 3, 3, 4, 3, 4, 4, 5, /* a0 .. a7 */
93 3, 4, 4, 5, 4, 5, 5, 6, /* a8 .. af */
94 3, 4, 4, 5, 4, 5, 5, 6, /* b0 .. b7 */
95 4, 5, 5, 6, 5, 6, 6, 7, /* b8 .. bf */
96 2, 3, 3, 4, 3, 4, 4, 5, /* c0 .. c7 */
97 3, 4, 4, 5, 4, 5, 5, 6, /* c8 .. cf */
98 3, 4, 4, 5, 4, 5, 5, 6, /* d0 .. d7 */
99 4, 5, 5, 6, 5, 6, 6, 7, /* d8 .. df */
100 3, 4, 4, 5, 4, 5, 5, 6, /* e0 .. e7 */
101 4, 5, 5, 6, 5, 6, 6, 7, /* e8 .. ef */
102 4, 5, 5, 6, 5, 6, 6, 7, /* f0 .. f7 */
103 5, 6, 6, 7, 6, 7, 7, 8, /* f8 .. ff */
104};
105
106/*
107 * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. 69 * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
108 */ 70 */
109inline int 71inline int
@@ -167,56 +129,21 @@ xfs_highbit64(
167 129
168 130
169/* 131/*
170 * Count the number of bits set in the bitmap starting with bit 132 * Return whether bitmap is empty.
171 * start_bit. Size is the size of the bitmap in words. 133 * Size is number of words in the bitmap, which is padded to word boundary
172 * 134 * Returns 1 for empty, 0 for non-empty.
173 * Do the counting by mapping a byte value to the number of set
174 * bits for that value using the xfs_countbit array, i.e.
175 * xfs_countbit[0] == 0, xfs_countbit[1] == 1, xfs_countbit[2] == 1,
176 * xfs_countbit[3] == 2, etc.
177 */ 135 */
178int 136int
179xfs_count_bits(uint *map, uint size, uint start_bit) 137xfs_bitmap_empty(uint *map, uint size)
180{ 138{
181 register int bits; 139 uint i;
182 register unsigned char *bytep; 140 uint ret = 0;
183 register unsigned char *end_map;
184 int byte_bit;
185
186 bits = 0;
187 end_map = (char*)(map + size);
188 bytep = (char*)(map + (start_bit & ~0x7));
189 byte_bit = start_bit & 0x7;
190
191 /*
192 * If the caller fell off the end of the map, return 0.
193 */
194 if (bytep >= end_map) {
195 return (0);
196 }
197
198 /*
199 * If start_bit is not byte aligned, then process the
200 * first byte separately.
201 */
202 if (byte_bit != 0) {
203 /*
204 * Shift off the bits we don't want to look at,
205 * before indexing into xfs_countbit.
206 */
207 bits += xfs_countbit[(*bytep >> byte_bit)];
208 bytep++;
209 }
210 141
211 /* 142 for (i = 0; i < size; i++) {
212 * Count the bits in each byte until the end of the bitmap. 143 ret |= map[i];
213 */
214 while (bytep < end_map) {
215 bits += xfs_countbit[*bytep];
216 bytep++;
217 } 144 }
218 145
219 return (bits); 146 return (ret == 0);
220} 147}
221 148
222/* 149/*
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 0bbe5681754..082641a9782 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -55,8 +55,8 @@ extern int xfs_lowbit64(__uint64_t v);
55/* Get high bit set out of 64-bit argument, -1 if none set */ 55/* Get high bit set out of 64-bit argument, -1 if none set */
56extern int xfs_highbit64(__uint64_t); 56extern int xfs_highbit64(__uint64_t);
57 57
58/* Count set bits in map starting with start_bit */ 58/* Return whether bitmap is empty (1 == empty) */
59extern int xfs_count_bits(uint *map, uint size, uint start_bit); 59extern int xfs_bitmap_empty(uint *map, uint size);
60 60
61/* Count continuous one bits in map starting with start_bit */ 61/* Count continuous one bits in map starting with start_bit */
62extern int xfs_contig_bits(uint *map, uint size, uint start_bit); 62extern int xfs_contig_bits(uint *map, uint size, uint start_bit);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index b1ea26e40aa..94b5c5fe268 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -52,6 +52,7 @@
52#include "xfs_quota.h" 52#include "xfs_quota.h"
53#include "xfs_trans_space.h" 53#include "xfs_trans_space.h"
54#include "xfs_buf_item.h" 54#include "xfs_buf_item.h"
55#include "xfs_filestream.h"
55 56
56 57
57#ifdef DEBUG 58#ifdef DEBUG
@@ -277,7 +278,7 @@ xfs_bmap_isaeof(
277STATIC void 278STATIC void
278xfs_bmap_trace_addentry( 279xfs_bmap_trace_addentry(
279 int opcode, /* operation */ 280 int opcode, /* operation */
280 char *fname, /* function name */ 281 const char *fname, /* function name */
281 char *desc, /* operation description */ 282 char *desc, /* operation description */
282 xfs_inode_t *ip, /* incore inode pointer */ 283 xfs_inode_t *ip, /* incore inode pointer */
283 xfs_extnum_t idx, /* index of entry(ies) */ 284 xfs_extnum_t idx, /* index of entry(ies) */
@@ -291,7 +292,7 @@ xfs_bmap_trace_addentry(
291 */ 292 */
292STATIC void 293STATIC void
293xfs_bmap_trace_delete( 294xfs_bmap_trace_delete(
294 char *fname, /* function name */ 295 const char *fname, /* function name */
295 char *desc, /* operation description */ 296 char *desc, /* operation description */
296 xfs_inode_t *ip, /* incore inode pointer */ 297 xfs_inode_t *ip, /* incore inode pointer */
297 xfs_extnum_t idx, /* index of entry(entries) deleted */ 298 xfs_extnum_t idx, /* index of entry(entries) deleted */
@@ -304,7 +305,7 @@ xfs_bmap_trace_delete(
304 */ 305 */
305STATIC void 306STATIC void
306xfs_bmap_trace_insert( 307xfs_bmap_trace_insert(
307 char *fname, /* function name */ 308 const char *fname, /* function name */
308 char *desc, /* operation description */ 309 char *desc, /* operation description */
309 xfs_inode_t *ip, /* incore inode pointer */ 310 xfs_inode_t *ip, /* incore inode pointer */
310 xfs_extnum_t idx, /* index of entry(entries) inserted */ 311 xfs_extnum_t idx, /* index of entry(entries) inserted */
@@ -318,7 +319,7 @@ xfs_bmap_trace_insert(
318 */ 319 */
319STATIC void 320STATIC void
320xfs_bmap_trace_post_update( 321xfs_bmap_trace_post_update(
321 char *fname, /* function name */ 322 const char *fname, /* function name */
322 char *desc, /* operation description */ 323 char *desc, /* operation description */
323 xfs_inode_t *ip, /* incore inode pointer */ 324 xfs_inode_t *ip, /* incore inode pointer */
324 xfs_extnum_t idx, /* index of entry updated */ 325 xfs_extnum_t idx, /* index of entry updated */
@@ -329,17 +330,25 @@ xfs_bmap_trace_post_update(
329 */ 330 */
330STATIC void 331STATIC void
331xfs_bmap_trace_pre_update( 332xfs_bmap_trace_pre_update(
332 char *fname, /* function name */ 333 const char *fname, /* function name */
333 char *desc, /* operation description */ 334 char *desc, /* operation description */
334 xfs_inode_t *ip, /* incore inode pointer */ 335 xfs_inode_t *ip, /* incore inode pointer */
335 xfs_extnum_t idx, /* index of entry to be updated */ 336 xfs_extnum_t idx, /* index of entry to be updated */
336 int whichfork); /* data or attr fork */ 337 int whichfork); /* data or attr fork */
337 338
339#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \
340 xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w)
341#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \
342 xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w)
343#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \
344 xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w)
345#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \
346 xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w)
338#else 347#else
339#define xfs_bmap_trace_delete(f,d,ip,i,c,w) 348#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)
340#define xfs_bmap_trace_insert(f,d,ip,i,c,r1,r2,w) 349#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)
341#define xfs_bmap_trace_post_update(f,d,ip,i,w) 350#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)
342#define xfs_bmap_trace_pre_update(f,d,ip,i,w) 351#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)
343#endif /* XFS_BMAP_TRACE */ 352#endif /* XFS_BMAP_TRACE */
344 353
345/* 354/*
@@ -531,9 +540,6 @@ xfs_bmap_add_extent(
531 xfs_filblks_t da_new; /* new count del alloc blocks used */ 540 xfs_filblks_t da_new; /* new count del alloc blocks used */
532 xfs_filblks_t da_old; /* old count del alloc blocks used */ 541 xfs_filblks_t da_old; /* old count del alloc blocks used */
533 int error; /* error return value */ 542 int error; /* error return value */
534#ifdef XFS_BMAP_TRACE
535 static char fname[] = "xfs_bmap_add_extent";
536#endif
537 xfs_ifork_t *ifp; /* inode fork ptr */ 543 xfs_ifork_t *ifp; /* inode fork ptr */
538 int logflags; /* returned value */ 544 int logflags; /* returned value */
539 xfs_extnum_t nextents; /* number of extents in file now */ 545 xfs_extnum_t nextents; /* number of extents in file now */
@@ -551,8 +557,8 @@ xfs_bmap_add_extent(
551 * already extents in the list. 557 * already extents in the list.
552 */ 558 */
553 if (nextents == 0) { 559 if (nextents == 0) {
554 xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new, 560 XFS_BMAP_TRACE_INSERT("insert empty", ip, 0, 1, new, NULL,
555 NULL, whichfork); 561 whichfork);
556 xfs_iext_insert(ifp, 0, 1, new); 562 xfs_iext_insert(ifp, 0, 1, new);
557 ASSERT(cur == NULL); 563 ASSERT(cur == NULL);
558 ifp->if_lastex = 0; 564 ifp->if_lastex = 0;
@@ -710,9 +716,6 @@ xfs_bmap_add_extent_delay_real(
710 int diff; /* temp value */ 716 int diff; /* temp value */
711 xfs_bmbt_rec_t *ep; /* extent entry for idx */ 717 xfs_bmbt_rec_t *ep; /* extent entry for idx */
712 int error; /* error return value */ 718 int error; /* error return value */
713#ifdef XFS_BMAP_TRACE
714 static char fname[] = "xfs_bmap_add_extent_delay_real";
715#endif
716 int i; /* temp state */ 719 int i; /* temp state */
717 xfs_ifork_t *ifp; /* inode fork pointer */ 720 xfs_ifork_t *ifp; /* inode fork pointer */
718 xfs_fileoff_t new_endoff; /* end offset of new entry */ 721 xfs_fileoff_t new_endoff; /* end offset of new entry */
@@ -808,15 +811,14 @@ xfs_bmap_add_extent_delay_real(
808 * Filling in all of a previously delayed allocation extent. 811 * Filling in all of a previously delayed allocation extent.
809 * The left and right neighbors are both contiguous with new. 812 * The left and right neighbors are both contiguous with new.
810 */ 813 */
811 xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1, 814 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1,
812 XFS_DATA_FORK); 815 XFS_DATA_FORK);
813 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 816 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
814 LEFT.br_blockcount + PREV.br_blockcount + 817 LEFT.br_blockcount + PREV.br_blockcount +
815 RIGHT.br_blockcount); 818 RIGHT.br_blockcount);
816 xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1, 819 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1,
817 XFS_DATA_FORK);
818 xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
819 XFS_DATA_FORK); 820 XFS_DATA_FORK);
821 XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK);
820 xfs_iext_remove(ifp, idx, 2); 822 xfs_iext_remove(ifp, idx, 2);
821 ip->i_df.if_lastex = idx - 1; 823 ip->i_df.if_lastex = idx - 1;
822 ip->i_d.di_nextents--; 824 ip->i_d.di_nextents--;
@@ -855,15 +857,14 @@ xfs_bmap_add_extent_delay_real(
855 * Filling in all of a previously delayed allocation extent. 857 * Filling in all of a previously delayed allocation extent.
856 * The left neighbor is contiguous, the right is not. 858 * The left neighbor is contiguous, the right is not.
857 */ 859 */
858 xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1, 860 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1,
859 XFS_DATA_FORK); 861 XFS_DATA_FORK);
860 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 862 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
861 LEFT.br_blockcount + PREV.br_blockcount); 863 LEFT.br_blockcount + PREV.br_blockcount);
862 xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1, 864 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1,
863 XFS_DATA_FORK); 865 XFS_DATA_FORK);
864 ip->i_df.if_lastex = idx - 1; 866 ip->i_df.if_lastex = idx - 1;
865 xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1, 867 XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK);
866 XFS_DATA_FORK);
867 xfs_iext_remove(ifp, idx, 1); 868 xfs_iext_remove(ifp, idx, 1);
868 if (cur == NULL) 869 if (cur == NULL)
869 rval = XFS_ILOG_DEXT; 870 rval = XFS_ILOG_DEXT;
@@ -892,16 +893,13 @@ xfs_bmap_add_extent_delay_real(
892 * Filling in all of a previously delayed allocation extent. 893 * Filling in all of a previously delayed allocation extent.
893 * The right neighbor is contiguous, the left is not. 894 * The right neighbor is contiguous, the left is not.
894 */ 895 */
895 xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx, 896 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK);
896 XFS_DATA_FORK);
897 xfs_bmbt_set_startblock(ep, new->br_startblock); 897 xfs_bmbt_set_startblock(ep, new->br_startblock);
898 xfs_bmbt_set_blockcount(ep, 898 xfs_bmbt_set_blockcount(ep,
899 PREV.br_blockcount + RIGHT.br_blockcount); 899 PREV.br_blockcount + RIGHT.br_blockcount);
900 xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx, 900 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK);
901 XFS_DATA_FORK);
902 ip->i_df.if_lastex = idx; 901 ip->i_df.if_lastex = idx;
903 xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1, 902 XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK);
904 XFS_DATA_FORK);
905 xfs_iext_remove(ifp, idx + 1, 1); 903 xfs_iext_remove(ifp, idx + 1, 1);
906 if (cur == NULL) 904 if (cur == NULL)
907 rval = XFS_ILOG_DEXT; 905 rval = XFS_ILOG_DEXT;
@@ -931,11 +929,9 @@ xfs_bmap_add_extent_delay_real(
931 * Neither the left nor right neighbors are contiguous with 929 * Neither the left nor right neighbors are contiguous with
932 * the new one. 930 * the new one.
933 */ 931 */
934 xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx, 932 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK);
935 XFS_DATA_FORK);
936 xfs_bmbt_set_startblock(ep, new->br_startblock); 933 xfs_bmbt_set_startblock(ep, new->br_startblock);
937 xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx, 934 XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK);
938 XFS_DATA_FORK);
939 ip->i_df.if_lastex = idx; 935 ip->i_df.if_lastex = idx;
940 ip->i_d.di_nextents++; 936 ip->i_d.di_nextents++;
941 if (cur == NULL) 937 if (cur == NULL)
@@ -963,17 +959,14 @@ xfs_bmap_add_extent_delay_real(
963 * Filling in the first part of a previous delayed allocation. 959 * Filling in the first part of a previous delayed allocation.
964 * The left neighbor is contiguous. 960 * The left neighbor is contiguous.
965 */ 961 */
966 xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1, 962 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK);
967 XFS_DATA_FORK);
968 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 963 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
969 LEFT.br_blockcount + new->br_blockcount); 964 LEFT.br_blockcount + new->br_blockcount);
970 xfs_bmbt_set_startoff(ep, 965 xfs_bmbt_set_startoff(ep,
971 PREV.br_startoff + new->br_blockcount); 966 PREV.br_startoff + new->br_blockcount);
972 xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1, 967 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK);
973 XFS_DATA_FORK);
974 temp = PREV.br_blockcount - new->br_blockcount; 968 temp = PREV.br_blockcount - new->br_blockcount;
975 xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx, 969 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK);
976 XFS_DATA_FORK);
977 xfs_bmbt_set_blockcount(ep, temp); 970 xfs_bmbt_set_blockcount(ep, temp);
978 ip->i_df.if_lastex = idx - 1; 971 ip->i_df.if_lastex = idx - 1;
979 if (cur == NULL) 972 if (cur == NULL)
@@ -995,8 +988,7 @@ xfs_bmap_add_extent_delay_real(
995 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 988 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
996 STARTBLOCKVAL(PREV.br_startblock)); 989 STARTBLOCKVAL(PREV.br_startblock));
997 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 990 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
998 xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx, 991 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK);
999 XFS_DATA_FORK);
1000 *dnew = temp; 992 *dnew = temp;
1001 /* DELTA: The boundary between two in-core extents moved. */ 993 /* DELTA: The boundary between two in-core extents moved. */
1002 temp = LEFT.br_startoff; 994 temp = LEFT.br_startoff;
@@ -1009,11 +1001,11 @@ xfs_bmap_add_extent_delay_real(
1009 * Filling in the first part of a previous delayed allocation. 1001 * Filling in the first part of a previous delayed allocation.
1010 * The left neighbor is not contiguous. 1002 * The left neighbor is not contiguous.
1011 */ 1003 */
1012 xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK); 1004 XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK);
1013 xfs_bmbt_set_startoff(ep, new_endoff); 1005 xfs_bmbt_set_startoff(ep, new_endoff);
1014 temp = PREV.br_blockcount - new->br_blockcount; 1006 temp = PREV.br_blockcount - new->br_blockcount;
1015 xfs_bmbt_set_blockcount(ep, temp); 1007 xfs_bmbt_set_blockcount(ep, temp);
1016 xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL, 1008 XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL,
1017 XFS_DATA_FORK); 1009 XFS_DATA_FORK);
1018 xfs_iext_insert(ifp, idx, 1, new); 1010 xfs_iext_insert(ifp, idx, 1, new);
1019 ip->i_df.if_lastex = idx; 1011 ip->i_df.if_lastex = idx;
@@ -1046,8 +1038,7 @@ xfs_bmap_add_extent_delay_real(
1046 (cur ? cur->bc_private.b.allocated : 0)); 1038 (cur ? cur->bc_private.b.allocated : 0));
1047 ep = xfs_iext_get_ext(ifp, idx + 1); 1039 ep = xfs_iext_get_ext(ifp, idx + 1);
1048 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 1040 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1049 xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1, 1041 XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx + 1, XFS_DATA_FORK);
1050 XFS_DATA_FORK);
1051 *dnew = temp; 1042 *dnew = temp;
1052 /* DELTA: One in-core extent is split in two. */ 1043 /* DELTA: One in-core extent is split in two. */
1053 temp = PREV.br_startoff; 1044 temp = PREV.br_startoff;
@@ -1060,17 +1051,14 @@ xfs_bmap_add_extent_delay_real(
1060 * The right neighbor is contiguous with the new allocation. 1051 * The right neighbor is contiguous with the new allocation.
1061 */ 1052 */
1062 temp = PREV.br_blockcount - new->br_blockcount; 1053 temp = PREV.br_blockcount - new->br_blockcount;
1063 xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx, 1054 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK);
1064 XFS_DATA_FORK); 1055 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK);
1065 xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
1066 XFS_DATA_FORK);
1067 xfs_bmbt_set_blockcount(ep, temp); 1056 xfs_bmbt_set_blockcount(ep, temp);
1068 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1057 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
1069 new->br_startoff, new->br_startblock, 1058 new->br_startoff, new->br_startblock,
1070 new->br_blockcount + RIGHT.br_blockcount, 1059 new->br_blockcount + RIGHT.br_blockcount,
1071 RIGHT.br_state); 1060 RIGHT.br_state);
1072 xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1, 1061 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK);
1073 XFS_DATA_FORK);
1074 ip->i_df.if_lastex = idx + 1; 1062 ip->i_df.if_lastex = idx + 1;
1075 if (cur == NULL) 1063 if (cur == NULL)
1076 rval = XFS_ILOG_DEXT; 1064 rval = XFS_ILOG_DEXT;
@@ -1091,8 +1079,7 @@ xfs_bmap_add_extent_delay_real(
1091 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 1079 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1092 STARTBLOCKVAL(PREV.br_startblock)); 1080 STARTBLOCKVAL(PREV.br_startblock));
1093 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 1081 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1094 xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx, 1082 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK);
1095 XFS_DATA_FORK);
1096 *dnew = temp; 1083 *dnew = temp;
1097 /* DELTA: The boundary between two in-core extents moved. */ 1084 /* DELTA: The boundary between two in-core extents moved. */
1098 temp = PREV.br_startoff; 1085 temp = PREV.br_startoff;
@@ -1106,10 +1093,10 @@ xfs_bmap_add_extent_delay_real(
1106 * The right neighbor is not contiguous. 1093 * The right neighbor is not contiguous.
1107 */ 1094 */
1108 temp = PREV.br_blockcount - new->br_blockcount; 1095 temp = PREV.br_blockcount - new->br_blockcount;
1109 xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK); 1096 XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK);
1110 xfs_bmbt_set_blockcount(ep, temp); 1097 xfs_bmbt_set_blockcount(ep, temp);
1111 xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1, 1098 XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL,
1112 new, NULL, XFS_DATA_FORK); 1099 XFS_DATA_FORK);
1113 xfs_iext_insert(ifp, idx + 1, 1, new); 1100 xfs_iext_insert(ifp, idx + 1, 1, new);
1114 ip->i_df.if_lastex = idx + 1; 1101 ip->i_df.if_lastex = idx + 1;
1115 ip->i_d.di_nextents++; 1102 ip->i_d.di_nextents++;
@@ -1141,7 +1128,7 @@ xfs_bmap_add_extent_delay_real(
1141 (cur ? cur->bc_private.b.allocated : 0)); 1128 (cur ? cur->bc_private.b.allocated : 0));
1142 ep = xfs_iext_get_ext(ifp, idx); 1129 ep = xfs_iext_get_ext(ifp, idx);
1143 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 1130 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1144 xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK); 1131 XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK);
1145 *dnew = temp; 1132 *dnew = temp;
1146 /* DELTA: One in-core extent is split in two. */ 1133 /* DELTA: One in-core extent is split in two. */
1147 temp = PREV.br_startoff; 1134 temp = PREV.br_startoff;
@@ -1155,7 +1142,7 @@ xfs_bmap_add_extent_delay_real(
1155 * This case is avoided almost all the time. 1142 * This case is avoided almost all the time.
1156 */ 1143 */
1157 temp = new->br_startoff - PREV.br_startoff; 1144 temp = new->br_startoff - PREV.br_startoff;
1158 xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK); 1145 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK);
1159 xfs_bmbt_set_blockcount(ep, temp); 1146 xfs_bmbt_set_blockcount(ep, temp);
1160 r[0] = *new; 1147 r[0] = *new;
1161 r[1].br_state = PREV.br_state; 1148 r[1].br_state = PREV.br_state;
@@ -1163,7 +1150,7 @@ xfs_bmap_add_extent_delay_real(
1163 r[1].br_startoff = new_endoff; 1150 r[1].br_startoff = new_endoff;
1164 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1151 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
1165 r[1].br_blockcount = temp2; 1152 r[1].br_blockcount = temp2;
1166 xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1], 1153 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1],
1167 XFS_DATA_FORK); 1154 XFS_DATA_FORK);
1168 xfs_iext_insert(ifp, idx + 1, 2, &r[0]); 1155 xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
1169 ip->i_df.if_lastex = idx + 1; 1156 ip->i_df.if_lastex = idx + 1;
@@ -1222,13 +1209,11 @@ xfs_bmap_add_extent_delay_real(
1222 } 1209 }
1223 ep = xfs_iext_get_ext(ifp, idx); 1210 ep = xfs_iext_get_ext(ifp, idx);
1224 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 1211 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1225 xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK); 1212 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK);
1226 xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2, 1213 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx + 2, XFS_DATA_FORK);
1227 XFS_DATA_FORK);
1228 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), 1214 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
1229 NULLSTARTBLOCK((int)temp2)); 1215 NULLSTARTBLOCK((int)temp2));
1230 xfs_bmap_trace_post_update(fname, "0", ip, idx + 2, 1216 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx + 2, XFS_DATA_FORK);
1231 XFS_DATA_FORK);
1232 *dnew = temp + temp2; 1217 *dnew = temp + temp2;
1233 /* DELTA: One in-core extent is split in three. */ 1218 /* DELTA: One in-core extent is split in three. */
1234 temp = PREV.br_startoff; 1219 temp = PREV.br_startoff;
@@ -1287,9 +1272,6 @@ xfs_bmap_add_extent_unwritten_real(
1287 xfs_btree_cur_t *cur; /* btree cursor */ 1272 xfs_btree_cur_t *cur; /* btree cursor */
1288 xfs_bmbt_rec_t *ep; /* extent entry for idx */ 1273 xfs_bmbt_rec_t *ep; /* extent entry for idx */
1289 int error; /* error return value */ 1274 int error; /* error return value */
1290#ifdef XFS_BMAP_TRACE
1291 static char fname[] = "xfs_bmap_add_extent_unwritten_real";
1292#endif
1293 int i; /* temp state */ 1275 int i; /* temp state */
1294 xfs_ifork_t *ifp; /* inode fork pointer */ 1276 xfs_ifork_t *ifp; /* inode fork pointer */
1295 xfs_fileoff_t new_endoff; /* end offset of new entry */ 1277 xfs_fileoff_t new_endoff; /* end offset of new entry */
@@ -1390,15 +1372,14 @@ xfs_bmap_add_extent_unwritten_real(
1390 * Setting all of a previous oldext extent to newext. 1372 * Setting all of a previous oldext extent to newext.
1391 * The left and right neighbors are both contiguous with new. 1373 * The left and right neighbors are both contiguous with new.
1392 */ 1374 */
1393 xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1, 1375 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1,
1394 XFS_DATA_FORK); 1376 XFS_DATA_FORK);
1395 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1377 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1396 LEFT.br_blockcount + PREV.br_blockcount + 1378 LEFT.br_blockcount + PREV.br_blockcount +
1397 RIGHT.br_blockcount); 1379 RIGHT.br_blockcount);
1398 xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1, 1380 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1,
1399 XFS_DATA_FORK);
1400 xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
1401 XFS_DATA_FORK); 1381 XFS_DATA_FORK);
1382 XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK);
1402 xfs_iext_remove(ifp, idx, 2); 1383 xfs_iext_remove(ifp, idx, 2);
1403 ip->i_df.if_lastex = idx - 1; 1384 ip->i_df.if_lastex = idx - 1;
1404 ip->i_d.di_nextents -= 2; 1385 ip->i_d.di_nextents -= 2;
@@ -1441,15 +1422,14 @@ xfs_bmap_add_extent_unwritten_real(
1441 * Setting all of a previous oldext extent to newext. 1422 * Setting all of a previous oldext extent to newext.
1442 * The left neighbor is contiguous, the right is not. 1423 * The left neighbor is contiguous, the right is not.
1443 */ 1424 */
1444 xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1, 1425 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1,
1445 XFS_DATA_FORK); 1426 XFS_DATA_FORK);
1446 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1427 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1447 LEFT.br_blockcount + PREV.br_blockcount); 1428 LEFT.br_blockcount + PREV.br_blockcount);
1448 xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1, 1429 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1,
1449 XFS_DATA_FORK); 1430 XFS_DATA_FORK);
1450 ip->i_df.if_lastex = idx - 1; 1431 ip->i_df.if_lastex = idx - 1;
1451 xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1, 1432 XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK);
1452 XFS_DATA_FORK);
1453 xfs_iext_remove(ifp, idx, 1); 1433 xfs_iext_remove(ifp, idx, 1);
1454 ip->i_d.di_nextents--; 1434 ip->i_d.di_nextents--;
1455 if (cur == NULL) 1435 if (cur == NULL)
@@ -1484,16 +1464,15 @@ xfs_bmap_add_extent_unwritten_real(
1484 * Setting all of a previous oldext extent to newext. 1464 * Setting all of a previous oldext extent to newext.
1485 * The right neighbor is contiguous, the left is not. 1465 * The right neighbor is contiguous, the left is not.
1486 */ 1466 */
1487 xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx, 1467 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx,
1488 XFS_DATA_FORK); 1468 XFS_DATA_FORK);
1489 xfs_bmbt_set_blockcount(ep, 1469 xfs_bmbt_set_blockcount(ep,
1490 PREV.br_blockcount + RIGHT.br_blockcount); 1470 PREV.br_blockcount + RIGHT.br_blockcount);
1491 xfs_bmbt_set_state(ep, newext); 1471 xfs_bmbt_set_state(ep, newext);
1492 xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx, 1472 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx,
1493 XFS_DATA_FORK); 1473 XFS_DATA_FORK);
1494 ip->i_df.if_lastex = idx; 1474 ip->i_df.if_lastex = idx;
1495 xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1, 1475 XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK);
1496 XFS_DATA_FORK);
1497 xfs_iext_remove(ifp, idx + 1, 1); 1476 xfs_iext_remove(ifp, idx + 1, 1);
1498 ip->i_d.di_nextents--; 1477 ip->i_d.di_nextents--;
1499 if (cur == NULL) 1478 if (cur == NULL)
@@ -1529,10 +1508,10 @@ xfs_bmap_add_extent_unwritten_real(
1529 * Neither the left nor right neighbors are contiguous with 1508 * Neither the left nor right neighbors are contiguous with
1530 * the new one. 1509 * the new one.
1531 */ 1510 */
1532 xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx, 1511 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx,
1533 XFS_DATA_FORK); 1512 XFS_DATA_FORK);
1534 xfs_bmbt_set_state(ep, newext); 1513 xfs_bmbt_set_state(ep, newext);
1535 xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx, 1514 XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx,
1536 XFS_DATA_FORK); 1515 XFS_DATA_FORK);
1537 ip->i_df.if_lastex = idx; 1516 ip->i_df.if_lastex = idx;
1538 if (cur == NULL) 1517 if (cur == NULL)
@@ -1559,21 +1538,21 @@ xfs_bmap_add_extent_unwritten_real(
1559 * Setting the first part of a previous oldext extent to newext. 1538 * Setting the first part of a previous oldext extent to newext.
1560 * The left neighbor is contiguous. 1539 * The left neighbor is contiguous.
1561 */ 1540 */
1562 xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1, 1541 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1,
1563 XFS_DATA_FORK); 1542 XFS_DATA_FORK);
1564 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1543 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1565 LEFT.br_blockcount + new->br_blockcount); 1544 LEFT.br_blockcount + new->br_blockcount);
1566 xfs_bmbt_set_startoff(ep, 1545 xfs_bmbt_set_startoff(ep,
1567 PREV.br_startoff + new->br_blockcount); 1546 PREV.br_startoff + new->br_blockcount);
1568 xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1, 1547 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1,
1569 XFS_DATA_FORK); 1548 XFS_DATA_FORK);
1570 xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx, 1549 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx,
1571 XFS_DATA_FORK); 1550 XFS_DATA_FORK);
1572 xfs_bmbt_set_startblock(ep, 1551 xfs_bmbt_set_startblock(ep,
1573 new->br_startblock + new->br_blockcount); 1552 new->br_startblock + new->br_blockcount);
1574 xfs_bmbt_set_blockcount(ep, 1553 xfs_bmbt_set_blockcount(ep,
1575 PREV.br_blockcount - new->br_blockcount); 1554 PREV.br_blockcount - new->br_blockcount);
1576 xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx, 1555 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx,
1577 XFS_DATA_FORK); 1556 XFS_DATA_FORK);
1578 ip->i_df.if_lastex = idx - 1; 1557 ip->i_df.if_lastex = idx - 1;
1579 if (cur == NULL) 1558 if (cur == NULL)
@@ -1610,15 +1589,15 @@ xfs_bmap_add_extent_unwritten_real(
1610 * Setting the first part of a previous oldext extent to newext. 1589 * Setting the first part of a previous oldext extent to newext.
1611 * The left neighbor is not contiguous. 1590 * The left neighbor is not contiguous.
1612 */ 1591 */
1613 xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK); 1592 XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK);
1614 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); 1593 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
1615 xfs_bmbt_set_startoff(ep, new_endoff); 1594 xfs_bmbt_set_startoff(ep, new_endoff);
1616 xfs_bmbt_set_blockcount(ep, 1595 xfs_bmbt_set_blockcount(ep,
1617 PREV.br_blockcount - new->br_blockcount); 1596 PREV.br_blockcount - new->br_blockcount);
1618 xfs_bmbt_set_startblock(ep, 1597 xfs_bmbt_set_startblock(ep,
1619 new->br_startblock + new->br_blockcount); 1598 new->br_startblock + new->br_blockcount);
1620 xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK); 1599 XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx, XFS_DATA_FORK);
1621 xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL, 1600 XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL,
1622 XFS_DATA_FORK); 1601 XFS_DATA_FORK);
1623 xfs_iext_insert(ifp, idx, 1, new); 1602 xfs_iext_insert(ifp, idx, 1, new);
1624 ip->i_df.if_lastex = idx; 1603 ip->i_df.if_lastex = idx;
@@ -1653,18 +1632,18 @@ xfs_bmap_add_extent_unwritten_real(
1653 * Setting the last part of a previous oldext extent to newext. 1632 * Setting the last part of a previous oldext extent to newext.
1654 * The right neighbor is contiguous with the new allocation. 1633 * The right neighbor is contiguous with the new allocation.
1655 */ 1634 */
1656 xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx, 1635 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx,
1657 XFS_DATA_FORK); 1636 XFS_DATA_FORK);
1658 xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1, 1637 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1,
1659 XFS_DATA_FORK); 1638 XFS_DATA_FORK);
1660 xfs_bmbt_set_blockcount(ep, 1639 xfs_bmbt_set_blockcount(ep,
1661 PREV.br_blockcount - new->br_blockcount); 1640 PREV.br_blockcount - new->br_blockcount);
1662 xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx, 1641 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx,
1663 XFS_DATA_FORK); 1642 XFS_DATA_FORK);
1664 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1643 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
1665 new->br_startoff, new->br_startblock, 1644 new->br_startoff, new->br_startblock,
1666 new->br_blockcount + RIGHT.br_blockcount, newext); 1645 new->br_blockcount + RIGHT.br_blockcount, newext);
1667 xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1, 1646 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1,
1668 XFS_DATA_FORK); 1647 XFS_DATA_FORK);
1669 ip->i_df.if_lastex = idx + 1; 1648 ip->i_df.if_lastex = idx + 1;
1670 if (cur == NULL) 1649 if (cur == NULL)
@@ -1700,12 +1679,12 @@ xfs_bmap_add_extent_unwritten_real(
1700 * Setting the last part of a previous oldext extent to newext. 1679 * Setting the last part of a previous oldext extent to newext.
1701 * The right neighbor is not contiguous. 1680 * The right neighbor is not contiguous.
1702 */ 1681 */
1703 xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK); 1682 XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK);
1704 xfs_bmbt_set_blockcount(ep, 1683 xfs_bmbt_set_blockcount(ep,
1705 PREV.br_blockcount - new->br_blockcount); 1684 PREV.br_blockcount - new->br_blockcount);
1706 xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK); 1685 XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK);
1707 xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1, 1686 XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL,
1708 new, NULL, XFS_DATA_FORK); 1687 XFS_DATA_FORK);
1709 xfs_iext_insert(ifp, idx + 1, 1, new); 1688 xfs_iext_insert(ifp, idx + 1, 1, new);
1710 ip->i_df.if_lastex = idx + 1; 1689 ip->i_df.if_lastex = idx + 1;
1711 ip->i_d.di_nextents++; 1690 ip->i_d.di_nextents++;
@@ -1744,17 +1723,17 @@ xfs_bmap_add_extent_unwritten_real(
1744 * newext. Contiguity is impossible here. 1723 * newext. Contiguity is impossible here.
1745 * One extent becomes three extents. 1724 * One extent becomes three extents.
1746 */ 1725 */
1747 xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK); 1726 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK);
1748 xfs_bmbt_set_blockcount(ep, 1727 xfs_bmbt_set_blockcount(ep,
1749 new->br_startoff - PREV.br_startoff); 1728 new->br_startoff - PREV.br_startoff);
1750 xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK); 1729 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK);
1751 r[0] = *new; 1730 r[0] = *new;
1752 r[1].br_startoff = new_endoff; 1731 r[1].br_startoff = new_endoff;
1753 r[1].br_blockcount = 1732 r[1].br_blockcount =
1754 PREV.br_startoff + PREV.br_blockcount - new_endoff; 1733 PREV.br_startoff + PREV.br_blockcount - new_endoff;
1755 r[1].br_startblock = new->br_startblock + new->br_blockcount; 1734 r[1].br_startblock = new->br_startblock + new->br_blockcount;
1756 r[1].br_state = oldext; 1735 r[1].br_state = oldext;
1757 xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1], 1736 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1],
1758 XFS_DATA_FORK); 1737 XFS_DATA_FORK);
1759 xfs_iext_insert(ifp, idx + 1, 2, &r[0]); 1738 xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
1760 ip->i_df.if_lastex = idx + 1; 1739 ip->i_df.if_lastex = idx + 1;
@@ -1845,9 +1824,6 @@ xfs_bmap_add_extent_hole_delay(
1845 int rsvd) /* OK to allocate reserved blocks */ 1824 int rsvd) /* OK to allocate reserved blocks */
1846{ 1825{
1847 xfs_bmbt_rec_t *ep; /* extent record for idx */ 1826 xfs_bmbt_rec_t *ep; /* extent record for idx */
1848#ifdef XFS_BMAP_TRACE
1849 static char fname[] = "xfs_bmap_add_extent_hole_delay";
1850#endif
1851 xfs_ifork_t *ifp; /* inode fork pointer */ 1827 xfs_ifork_t *ifp; /* inode fork pointer */
1852 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 1828 xfs_bmbt_irec_t left; /* left neighbor extent entry */
1853 xfs_filblks_t newlen=0; /* new indirect size */ 1829 xfs_filblks_t newlen=0; /* new indirect size */
@@ -1919,7 +1895,7 @@ xfs_bmap_add_extent_hole_delay(
1919 */ 1895 */
1920 temp = left.br_blockcount + new->br_blockcount + 1896 temp = left.br_blockcount + new->br_blockcount +
1921 right.br_blockcount; 1897 right.br_blockcount;
1922 xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1, 1898 XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1,
1923 XFS_DATA_FORK); 1899 XFS_DATA_FORK);
1924 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1900 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
1925 oldlen = STARTBLOCKVAL(left.br_startblock) + 1901 oldlen = STARTBLOCKVAL(left.br_startblock) +
@@ -1928,10 +1904,9 @@ xfs_bmap_add_extent_hole_delay(
1928 newlen = xfs_bmap_worst_indlen(ip, temp); 1904 newlen = xfs_bmap_worst_indlen(ip, temp);
1929 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1905 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
1930 NULLSTARTBLOCK((int)newlen)); 1906 NULLSTARTBLOCK((int)newlen));
1931 xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1, 1907 XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1,
1932 XFS_DATA_FORK);
1933 xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1,
1934 XFS_DATA_FORK); 1908 XFS_DATA_FORK);
1909 XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, XFS_DATA_FORK);
1935 xfs_iext_remove(ifp, idx, 1); 1910 xfs_iext_remove(ifp, idx, 1);
1936 ip->i_df.if_lastex = idx - 1; 1911 ip->i_df.if_lastex = idx - 1;
1937 /* DELTA: Two in-core extents were replaced by one. */ 1912 /* DELTA: Two in-core extents were replaced by one. */
@@ -1946,7 +1921,7 @@ xfs_bmap_add_extent_hole_delay(
1946 * Merge the new allocation with the left neighbor. 1921 * Merge the new allocation with the left neighbor.
1947 */ 1922 */
1948 temp = left.br_blockcount + new->br_blockcount; 1923 temp = left.br_blockcount + new->br_blockcount;
1949 xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, 1924 XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1,
1950 XFS_DATA_FORK); 1925 XFS_DATA_FORK);
1951 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1926 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
1952 oldlen = STARTBLOCKVAL(left.br_startblock) + 1927 oldlen = STARTBLOCKVAL(left.br_startblock) +
@@ -1954,7 +1929,7 @@ xfs_bmap_add_extent_hole_delay(
1954 newlen = xfs_bmap_worst_indlen(ip, temp); 1929 newlen = xfs_bmap_worst_indlen(ip, temp);
1955 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1930 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
1956 NULLSTARTBLOCK((int)newlen)); 1931 NULLSTARTBLOCK((int)newlen));
1957 xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, 1932 XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1,
1958 XFS_DATA_FORK); 1933 XFS_DATA_FORK);
1959 ip->i_df.if_lastex = idx - 1; 1934 ip->i_df.if_lastex = idx - 1;
1960 /* DELTA: One in-core extent grew into a hole. */ 1935 /* DELTA: One in-core extent grew into a hole. */
@@ -1968,14 +1943,14 @@ xfs_bmap_add_extent_hole_delay(
1968 * on the right. 1943 * on the right.
1969 * Merge the new allocation with the right neighbor. 1944 * Merge the new allocation with the right neighbor.
1970 */ 1945 */
1971 xfs_bmap_trace_pre_update(fname, "RC", ip, idx, XFS_DATA_FORK); 1946 XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, XFS_DATA_FORK);
1972 temp = new->br_blockcount + right.br_blockcount; 1947 temp = new->br_blockcount + right.br_blockcount;
1973 oldlen = STARTBLOCKVAL(new->br_startblock) + 1948 oldlen = STARTBLOCKVAL(new->br_startblock) +
1974 STARTBLOCKVAL(right.br_startblock); 1949 STARTBLOCKVAL(right.br_startblock);
1975 newlen = xfs_bmap_worst_indlen(ip, temp); 1950 newlen = xfs_bmap_worst_indlen(ip, temp);
1976 xfs_bmbt_set_allf(ep, new->br_startoff, 1951 xfs_bmbt_set_allf(ep, new->br_startoff,
1977 NULLSTARTBLOCK((int)newlen), temp, right.br_state); 1952 NULLSTARTBLOCK((int)newlen), temp, right.br_state);
1978 xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK); 1953 XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, XFS_DATA_FORK);
1979 ip->i_df.if_lastex = idx; 1954 ip->i_df.if_lastex = idx;
1980 /* DELTA: One in-core extent grew into a hole. */ 1955 /* DELTA: One in-core extent grew into a hole. */
1981 temp2 = temp; 1956 temp2 = temp;
@@ -1989,7 +1964,7 @@ xfs_bmap_add_extent_hole_delay(
1989 * Insert a new entry. 1964 * Insert a new entry.
1990 */ 1965 */
1991 oldlen = newlen = 0; 1966 oldlen = newlen = 0;
1992 xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL, 1967 XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL,
1993 XFS_DATA_FORK); 1968 XFS_DATA_FORK);
1994 xfs_iext_insert(ifp, idx, 1, new); 1969 xfs_iext_insert(ifp, idx, 1, new);
1995 ip->i_df.if_lastex = idx; 1970 ip->i_df.if_lastex = idx;
@@ -2039,9 +2014,6 @@ xfs_bmap_add_extent_hole_real(
2039{ 2014{
2040 xfs_bmbt_rec_t *ep; /* pointer to extent entry ins. point */ 2015 xfs_bmbt_rec_t *ep; /* pointer to extent entry ins. point */
2041 int error; /* error return value */ 2016 int error; /* error return value */
2042#ifdef XFS_BMAP_TRACE
2043 static char fname[] = "xfs_bmap_add_extent_hole_real";
2044#endif
2045 int i; /* temp state */ 2017 int i; /* temp state */
2046 xfs_ifork_t *ifp; /* inode fork pointer */ 2018 xfs_ifork_t *ifp; /* inode fork pointer */
2047 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2019 xfs_bmbt_irec_t left; /* left neighbor extent entry */
@@ -2118,15 +2090,14 @@ xfs_bmap_add_extent_hole_real(
2118 * left and on the right. 2090 * left and on the right.
2119 * Merge all three into a single extent record. 2091 * Merge all three into a single extent record.
2120 */ 2092 */
2121 xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1, 2093 XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1,
2122 whichfork); 2094 whichfork);
2123 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 2095 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
2124 left.br_blockcount + new->br_blockcount + 2096 left.br_blockcount + new->br_blockcount +
2125 right.br_blockcount); 2097 right.br_blockcount);
2126 xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1, 2098 XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1,
2127 whichfork); 2099 whichfork);
2128 xfs_bmap_trace_delete(fname, "LC|RC", ip, 2100 XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, whichfork);
2129 idx, 1, whichfork);
2130 xfs_iext_remove(ifp, idx, 1); 2101 xfs_iext_remove(ifp, idx, 1);
2131 ifp->if_lastex = idx - 1; 2102 ifp->if_lastex = idx - 1;
2132 XFS_IFORK_NEXT_SET(ip, whichfork, 2103 XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -2168,10 +2139,10 @@ xfs_bmap_add_extent_hole_real(
2168 * on the left. 2139 * on the left.
2169 * Merge the new allocation with the left neighbor. 2140 * Merge the new allocation with the left neighbor.
2170 */ 2141 */
2171 xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork); 2142 XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, whichfork);
2172 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 2143 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
2173 left.br_blockcount + new->br_blockcount); 2144 left.br_blockcount + new->br_blockcount);
2174 xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork); 2145 XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, whichfork);
2175 ifp->if_lastex = idx - 1; 2146 ifp->if_lastex = idx - 1;
2176 if (cur == NULL) { 2147 if (cur == NULL) {
2177 rval = XFS_ILOG_FEXT(whichfork); 2148 rval = XFS_ILOG_FEXT(whichfork);
@@ -2202,11 +2173,11 @@ xfs_bmap_add_extent_hole_real(
2202 * on the right. 2173 * on the right.
2203 * Merge the new allocation with the right neighbor. 2174 * Merge the new allocation with the right neighbor.
2204 */ 2175 */
2205 xfs_bmap_trace_pre_update(fname, "RC", ip, idx, whichfork); 2176 XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, whichfork);
2206 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, 2177 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
2207 new->br_blockcount + right.br_blockcount, 2178 new->br_blockcount + right.br_blockcount,
2208 right.br_state); 2179 right.br_state);
2209 xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork); 2180 XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, whichfork);
2210 ifp->if_lastex = idx; 2181 ifp->if_lastex = idx;
2211 if (cur == NULL) { 2182 if (cur == NULL) {
2212 rval = XFS_ILOG_FEXT(whichfork); 2183 rval = XFS_ILOG_FEXT(whichfork);
@@ -2237,8 +2208,7 @@ xfs_bmap_add_extent_hole_real(
2237 * real allocation. 2208 * real allocation.
2238 * Insert a new entry. 2209 * Insert a new entry.
2239 */ 2210 */
2240 xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL, 2211 XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, whichfork);
2241 whichfork);
2242 xfs_iext_insert(ifp, idx, 1, new); 2212 xfs_iext_insert(ifp, idx, 1, new);
2243 ifp->if_lastex = idx; 2213 ifp->if_lastex = idx;
2244 XFS_IFORK_NEXT_SET(ip, whichfork, 2214 XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -2605,12 +2575,10 @@ xfs_bmap_rtalloc(
2605 xfs_extlen_t prod = 0; /* product factor for allocators */ 2575 xfs_extlen_t prod = 0; /* product factor for allocators */
2606 xfs_extlen_t ralen = 0; /* realtime allocation length */ 2576 xfs_extlen_t ralen = 0; /* realtime allocation length */
2607 xfs_extlen_t align; /* minimum allocation alignment */ 2577 xfs_extlen_t align; /* minimum allocation alignment */
2608 xfs_rtblock_t rtx; /* realtime extent number */
2609 xfs_rtblock_t rtb; 2578 xfs_rtblock_t rtb;
2610 2579
2611 mp = ap->ip->i_mount; 2580 mp = ap->ip->i_mount;
2612 align = ap->ip->i_d.di_extsize ? 2581 align = xfs_get_extsz_hint(ap->ip);
2613 ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
2614 prod = align / mp->m_sb.sb_rextsize; 2582 prod = align / mp->m_sb.sb_rextsize;
2615 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, 2583 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
2616 align, 1, ap->eof, 0, 2584 align, 1, ap->eof, 0,
@@ -2644,6 +2612,8 @@ xfs_bmap_rtalloc(
2644 * pick an extent that will space things out in the rt area. 2612 * pick an extent that will space things out in the rt area.
2645 */ 2613 */
2646 if (ap->eof && ap->off == 0) { 2614 if (ap->eof && ap->off == 0) {
2615 xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
2616
2647 error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); 2617 error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
2648 if (error) 2618 if (error)
2649 return error; 2619 return error;
@@ -2715,9 +2685,7 @@ xfs_bmap_btalloc(
2715 int error; 2685 int error;
2716 2686
2717 mp = ap->ip->i_mount; 2687 mp = ap->ip->i_mount;
2718 align = (ap->userdata && ap->ip->i_d.di_extsize && 2688 align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
2719 (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
2720 ap->ip->i_d.di_extsize : 0;
2721 if (unlikely(align)) { 2689 if (unlikely(align)) {
2722 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, 2690 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
2723 align, 0, ap->eof, 0, ap->conv, 2691 align, 0, ap->eof, 0, ap->conv,
@@ -2727,9 +2695,15 @@ xfs_bmap_btalloc(
2727 } 2695 }
2728 nullfb = ap->firstblock == NULLFSBLOCK; 2696 nullfb = ap->firstblock == NULLFSBLOCK;
2729 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); 2697 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
2730 if (nullfb) 2698 if (nullfb) {
2731 ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); 2699 if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
2732 else 2700 ag = xfs_filestream_lookup_ag(ap->ip);
2701 ag = (ag != NULLAGNUMBER) ? ag : 0;
2702 ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
2703 } else {
2704 ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
2705 }
2706 } else
2733 ap->rval = ap->firstblock; 2707 ap->rval = ap->firstblock;
2734 2708
2735 xfs_bmap_adjacent(ap); 2709 xfs_bmap_adjacent(ap);
@@ -2753,13 +2727,22 @@ xfs_bmap_btalloc(
2753 args.firstblock = ap->firstblock; 2727 args.firstblock = ap->firstblock;
2754 blen = 0; 2728 blen = 0;
2755 if (nullfb) { 2729 if (nullfb) {
2756 args.type = XFS_ALLOCTYPE_START_BNO; 2730 if (ap->userdata && xfs_inode_is_filestream(ap->ip))
2731 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2732 else
2733 args.type = XFS_ALLOCTYPE_START_BNO;
2757 args.total = ap->total; 2734 args.total = ap->total;
2735
2758 /* 2736 /*
2759 * Find the longest available space. 2737 * Search for an allocation group with a single extent
2760 * We're going to try for the whole allocation at once. 2738 * large enough for the request.
2739 *
2740 * If one isn't found, then adjust the minimum allocation
2741 * size to the largest space found.
2761 */ 2742 */
2762 startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno); 2743 startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
2744 if (startag == NULLAGNUMBER)
2745 startag = ag = 0;
2763 notinit = 0; 2746 notinit = 0;
2764 down_read(&mp->m_peraglock); 2747 down_read(&mp->m_peraglock);
2765 while (blen < ap->alen) { 2748 while (blen < ap->alen) {
@@ -2785,6 +2768,35 @@ xfs_bmap_btalloc(
2785 blen = longest; 2768 blen = longest;
2786 } else 2769 } else
2787 notinit = 1; 2770 notinit = 1;
2771
2772 if (xfs_inode_is_filestream(ap->ip)) {
2773 if (blen >= ap->alen)
2774 break;
2775
2776 if (ap->userdata) {
2777 /*
2778 * If startag is an invalid AG, we've
2779 * come here once before and
2780 * xfs_filestream_new_ag picked the
2781 * best currently available.
2782 *
2783 * Don't continue looping, since we
2784 * could loop forever.
2785 */
2786 if (startag == NULLAGNUMBER)
2787 break;
2788
2789 error = xfs_filestream_new_ag(ap, &ag);
2790 if (error) {
2791 up_read(&mp->m_peraglock);
2792 return error;
2793 }
2794
2795 /* loop again to set 'blen'*/
2796 startag = NULLAGNUMBER;
2797 continue;
2798 }
2799 }
2788 if (++ag == mp->m_sb.sb_agcount) 2800 if (++ag == mp->m_sb.sb_agcount)
2789 ag = 0; 2801 ag = 0;
2790 if (ag == startag) 2802 if (ag == startag)
@@ -2809,17 +2821,27 @@ xfs_bmap_btalloc(
2809 */ 2821 */
2810 else 2822 else
2811 args.minlen = ap->alen; 2823 args.minlen = ap->alen;
2824
2825 /*
2826 * set the failure fallback case to look in the selected
2827 * AG as the stream may have moved.
2828 */
2829 if (xfs_inode_is_filestream(ap->ip))
2830 ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
2812 } else if (ap->low) { 2831 } else if (ap->low) {
2813 args.type = XFS_ALLOCTYPE_START_BNO; 2832 if (xfs_inode_is_filestream(ap->ip))
2833 args.type = XFS_ALLOCTYPE_FIRST_AG;
2834 else
2835 args.type = XFS_ALLOCTYPE_START_BNO;
2814 args.total = args.minlen = ap->minlen; 2836 args.total = args.minlen = ap->minlen;
2815 } else { 2837 } else {
2816 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2838 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2817 args.total = ap->total; 2839 args.total = ap->total;
2818 args.minlen = ap->minlen; 2840 args.minlen = ap->minlen;
2819 } 2841 }
2820 if (unlikely(ap->userdata && ap->ip->i_d.di_extsize && 2842 /* apply extent size hints if obtained earlier */
2821 (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) { 2843 if (unlikely(align)) {
2822 args.prod = ap->ip->i_d.di_extsize; 2844 args.prod = align;
2823 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) 2845 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
2824 args.mod = (xfs_extlen_t)(args.prod - args.mod); 2846 args.mod = (xfs_extlen_t)(args.prod - args.mod);
2825 } else if (mp->m_sb.sb_blocksize >= NBPP) { 2847 } else if (mp->m_sb.sb_blocksize >= NBPP) {
@@ -3051,9 +3073,6 @@ xfs_bmap_del_extent(
3051 xfs_bmbt_rec_t *ep; /* current extent entry pointer */ 3073 xfs_bmbt_rec_t *ep; /* current extent entry pointer */
3052 int error; /* error return value */ 3074 int error; /* error return value */
3053 int flags; /* inode logging flags */ 3075 int flags; /* inode logging flags */
3054#ifdef XFS_BMAP_TRACE
3055 static char fname[] = "xfs_bmap_del_extent";
3056#endif
3057 xfs_bmbt_irec_t got; /* current extent entry */ 3076 xfs_bmbt_irec_t got; /* current extent entry */
3058 xfs_fileoff_t got_endoff; /* first offset past got */ 3077 xfs_fileoff_t got_endoff; /* first offset past got */
3059 int i; /* temp state */ 3078 int i; /* temp state */
@@ -3147,7 +3166,7 @@ xfs_bmap_del_extent(
3147 /* 3166 /*
3148 * Matches the whole extent. Delete the entry. 3167 * Matches the whole extent. Delete the entry.
3149 */ 3168 */
3150 xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork); 3169 XFS_BMAP_TRACE_DELETE("3", ip, idx, 1, whichfork);
3151 xfs_iext_remove(ifp, idx, 1); 3170 xfs_iext_remove(ifp, idx, 1);
3152 ifp->if_lastex = idx; 3171 ifp->if_lastex = idx;
3153 if (delay) 3172 if (delay)
@@ -3168,7 +3187,7 @@ xfs_bmap_del_extent(
3168 /* 3187 /*
3169 * Deleting the first part of the extent. 3188 * Deleting the first part of the extent.
3170 */ 3189 */
3171 xfs_bmap_trace_pre_update(fname, "2", ip, idx, whichfork); 3190 XFS_BMAP_TRACE_PRE_UPDATE("2", ip, idx, whichfork);
3172 xfs_bmbt_set_startoff(ep, del_endoff); 3191 xfs_bmbt_set_startoff(ep, del_endoff);
3173 temp = got.br_blockcount - del->br_blockcount; 3192 temp = got.br_blockcount - del->br_blockcount;
3174 xfs_bmbt_set_blockcount(ep, temp); 3193 xfs_bmbt_set_blockcount(ep, temp);
@@ -3177,13 +3196,13 @@ xfs_bmap_del_extent(
3177 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 3196 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3178 da_old); 3197 da_old);
3179 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 3198 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
3180 xfs_bmap_trace_post_update(fname, "2", ip, idx, 3199 XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx,
3181 whichfork); 3200 whichfork);
3182 da_new = temp; 3201 da_new = temp;
3183 break; 3202 break;
3184 } 3203 }
3185 xfs_bmbt_set_startblock(ep, del_endblock); 3204 xfs_bmbt_set_startblock(ep, del_endblock);
3186 xfs_bmap_trace_post_update(fname, "2", ip, idx, whichfork); 3205 XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork);
3187 if (!cur) { 3206 if (!cur) {
3188 flags |= XFS_ILOG_FEXT(whichfork); 3207 flags |= XFS_ILOG_FEXT(whichfork);
3189 break; 3208 break;
@@ -3199,19 +3218,19 @@ xfs_bmap_del_extent(
3199 * Deleting the last part of the extent. 3218 * Deleting the last part of the extent.
3200 */ 3219 */
3201 temp = got.br_blockcount - del->br_blockcount; 3220 temp = got.br_blockcount - del->br_blockcount;
3202 xfs_bmap_trace_pre_update(fname, "1", ip, idx, whichfork); 3221 XFS_BMAP_TRACE_PRE_UPDATE("1", ip, idx, whichfork);
3203 xfs_bmbt_set_blockcount(ep, temp); 3222 xfs_bmbt_set_blockcount(ep, temp);
3204 ifp->if_lastex = idx; 3223 ifp->if_lastex = idx;
3205 if (delay) { 3224 if (delay) {
3206 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 3225 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3207 da_old); 3226 da_old);
3208 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); 3227 xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
3209 xfs_bmap_trace_post_update(fname, "1", ip, idx, 3228 XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx,
3210 whichfork); 3229 whichfork);
3211 da_new = temp; 3230 da_new = temp;
3212 break; 3231 break;
3213 } 3232 }
3214 xfs_bmap_trace_post_update(fname, "1", ip, idx, whichfork); 3233 XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork);
3215 if (!cur) { 3234 if (!cur) {
3216 flags |= XFS_ILOG_FEXT(whichfork); 3235 flags |= XFS_ILOG_FEXT(whichfork);
3217 break; 3236 break;
@@ -3228,7 +3247,7 @@ xfs_bmap_del_extent(
3228 * Deleting the middle of the extent. 3247 * Deleting the middle of the extent.
3229 */ 3248 */
3230 temp = del->br_startoff - got.br_startoff; 3249 temp = del->br_startoff - got.br_startoff;
3231 xfs_bmap_trace_pre_update(fname, "0", ip, idx, whichfork); 3250 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, whichfork);
3232 xfs_bmbt_set_blockcount(ep, temp); 3251 xfs_bmbt_set_blockcount(ep, temp);
3233 new.br_startoff = del_endoff; 3252 new.br_startoff = del_endoff;
3234 temp2 = got_endoff - del_endoff; 3253 temp2 = got_endoff - del_endoff;
@@ -3315,8 +3334,8 @@ xfs_bmap_del_extent(
3315 } 3334 }
3316 } 3335 }
3317 } 3336 }
3318 xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork); 3337 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, whichfork);
3319 xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL, 3338 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 1, &new, NULL,
3320 whichfork); 3339 whichfork);
3321 xfs_iext_insert(ifp, idx + 1, 1, &new); 3340 xfs_iext_insert(ifp, idx + 1, 1, &new);
3322 ifp->if_lastex = idx + 1; 3341 ifp->if_lastex = idx + 1;
@@ -3556,9 +3575,6 @@ xfs_bmap_local_to_extents(
3556{ 3575{
3557 int error; /* error return value */ 3576 int error; /* error return value */
3558 int flags; /* logging flags returned */ 3577 int flags; /* logging flags returned */
3559#ifdef XFS_BMAP_TRACE
3560 static char fname[] = "xfs_bmap_local_to_extents";
3561#endif
3562 xfs_ifork_t *ifp; /* inode fork pointer */ 3578 xfs_ifork_t *ifp; /* inode fork pointer */
3563 3579
3564 /* 3580 /*
@@ -3613,7 +3629,7 @@ xfs_bmap_local_to_extents(
3613 xfs_iext_add(ifp, 0, 1); 3629 xfs_iext_add(ifp, 0, 1);
3614 ep = xfs_iext_get_ext(ifp, 0); 3630 ep = xfs_iext_get_ext(ifp, 0);
3615 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); 3631 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
3616 xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork); 3632 XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
3617 XFS_IFORK_NEXT_SET(ip, whichfork, 1); 3633 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
3618 ip->i_d.di_nblocks = 1; 3634 ip->i_d.di_nblocks = 1;
3619 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip, 3635 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
@@ -3736,7 +3752,7 @@ ktrace_t *xfs_bmap_trace_buf;
3736STATIC void 3752STATIC void
3737xfs_bmap_trace_addentry( 3753xfs_bmap_trace_addentry(
3738 int opcode, /* operation */ 3754 int opcode, /* operation */
3739 char *fname, /* function name */ 3755 const char *fname, /* function name */
3740 char *desc, /* operation description */ 3756 char *desc, /* operation description */
3741 xfs_inode_t *ip, /* incore inode pointer */ 3757 xfs_inode_t *ip, /* incore inode pointer */
3742 xfs_extnum_t idx, /* index of entry(ies) */ 3758 xfs_extnum_t idx, /* index of entry(ies) */
@@ -3795,7 +3811,7 @@ xfs_bmap_trace_addentry(
3795 */ 3811 */
3796STATIC void 3812STATIC void
3797xfs_bmap_trace_delete( 3813xfs_bmap_trace_delete(
3798 char *fname, /* function name */ 3814 const char *fname, /* function name */
3799 char *desc, /* operation description */ 3815 char *desc, /* operation description */
3800 xfs_inode_t *ip, /* incore inode pointer */ 3816 xfs_inode_t *ip, /* incore inode pointer */
3801 xfs_extnum_t idx, /* index of entry(entries) deleted */ 3817 xfs_extnum_t idx, /* index of entry(entries) deleted */
@@ -3817,7 +3833,7 @@ xfs_bmap_trace_delete(
3817 */ 3833 */
3818STATIC void 3834STATIC void
3819xfs_bmap_trace_insert( 3835xfs_bmap_trace_insert(
3820 char *fname, /* function name */ 3836 const char *fname, /* function name */
3821 char *desc, /* operation description */ 3837 char *desc, /* operation description */
3822 xfs_inode_t *ip, /* incore inode pointer */ 3838 xfs_inode_t *ip, /* incore inode pointer */
3823 xfs_extnum_t idx, /* index of entry(entries) inserted */ 3839 xfs_extnum_t idx, /* index of entry(entries) inserted */
@@ -3846,7 +3862,7 @@ xfs_bmap_trace_insert(
3846 */ 3862 */
3847STATIC void 3863STATIC void
3848xfs_bmap_trace_post_update( 3864xfs_bmap_trace_post_update(
3849 char *fname, /* function name */ 3865 const char *fname, /* function name */
3850 char *desc, /* operation description */ 3866 char *desc, /* operation description */
3851 xfs_inode_t *ip, /* incore inode pointer */ 3867 xfs_inode_t *ip, /* incore inode pointer */
3852 xfs_extnum_t idx, /* index of entry updated */ 3868 xfs_extnum_t idx, /* index of entry updated */
@@ -3864,7 +3880,7 @@ xfs_bmap_trace_post_update(
3864 */ 3880 */
3865STATIC void 3881STATIC void
3866xfs_bmap_trace_pre_update( 3882xfs_bmap_trace_pre_update(
3867 char *fname, /* function name */ 3883 const char *fname, /* function name */
3868 char *desc, /* operation description */ 3884 char *desc, /* operation description */
3869 xfs_inode_t *ip, /* incore inode pointer */ 3885 xfs_inode_t *ip, /* incore inode pointer */
3870 xfs_extnum_t idx, /* index of entry to be updated */ 3886 xfs_extnum_t idx, /* index of entry to be updated */
@@ -4481,9 +4497,6 @@ xfs_bmap_read_extents(
4481 xfs_buf_t *bp; /* buffer for "block" */ 4497 xfs_buf_t *bp; /* buffer for "block" */
4482 int error; /* error return value */ 4498 int error; /* error return value */
4483 xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ 4499 xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */
4484#ifdef XFS_BMAP_TRACE
4485 static char fname[] = "xfs_bmap_read_extents";
4486#endif
4487 xfs_extnum_t i, j; /* index into the extents list */ 4500 xfs_extnum_t i, j; /* index into the extents list */
4488 xfs_ifork_t *ifp; /* fork structure */ 4501 xfs_ifork_t *ifp; /* fork structure */
4489 int level; /* btree level, for checking */ 4502 int level; /* btree level, for checking */
@@ -4600,7 +4613,7 @@ xfs_bmap_read_extents(
4600 } 4613 }
4601 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); 4614 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4602 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); 4615 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
4603 xfs_bmap_trace_exlist(fname, ip, i, whichfork); 4616 XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
4604 return 0; 4617 return 0;
4605error0: 4618error0:
4606 xfs_trans_brelse(tp, bp); 4619 xfs_trans_brelse(tp, bp);
@@ -4613,7 +4626,7 @@ error0:
4613 */ 4626 */
4614void 4627void
4615xfs_bmap_trace_exlist( 4628xfs_bmap_trace_exlist(
4616 char *fname, /* function name */ 4629 const char *fname, /* function name */
4617 xfs_inode_t *ip, /* incore inode pointer */ 4630 xfs_inode_t *ip, /* incore inode pointer */
4618 xfs_extnum_t cnt, /* count of entries in the list */ 4631 xfs_extnum_t cnt, /* count of entries in the list */
4619 int whichfork) /* data or attr fork */ 4632 int whichfork) /* data or attr fork */
@@ -4628,7 +4641,7 @@ xfs_bmap_trace_exlist(
4628 for (idx = 0; idx < cnt; idx++) { 4641 for (idx = 0; idx < cnt; idx++) {
4629 ep = xfs_iext_get_ext(ifp, idx); 4642 ep = xfs_iext_get_ext(ifp, idx);
4630 xfs_bmbt_get_all(ep, &s); 4643 xfs_bmbt_get_all(ep, &s);
4631 xfs_bmap_trace_insert(fname, "exlist", ip, idx, 1, &s, NULL, 4644 XFS_BMAP_TRACE_INSERT("exlist", ip, idx, 1, &s, NULL,
4632 whichfork); 4645 whichfork);
4633 } 4646 }
4634} 4647}
@@ -4868,12 +4881,7 @@ xfs_bmapi(
4868 xfs_extlen_t extsz; 4881 xfs_extlen_t extsz;
4869 4882
4870 /* Figure out the extent size, adjust alen */ 4883 /* Figure out the extent size, adjust alen */
4871 if (rt) { 4884 extsz = xfs_get_extsz_hint(ip);
4872 if (!(extsz = ip->i_d.di_extsize))
4873 extsz = mp->m_sb.sb_rextsize;
4874 } else {
4875 extsz = ip->i_d.di_extsize;
4876 }
4877 if (extsz) { 4885 if (extsz) {
4878 error = xfs_bmap_extsize_align(mp, 4886 error = xfs_bmap_extsize_align(mp,
4879 &got, &prev, extsz, 4887 &got, &prev, extsz,
@@ -5219,10 +5227,10 @@ xfs_bmapi(
5219 * Else go on to the next record. 5227 * Else go on to the next record.
5220 */ 5228 */
5221 ep = xfs_iext_get_ext(ifp, ++lastx); 5229 ep = xfs_iext_get_ext(ifp, ++lastx);
5222 if (lastx >= nextents) { 5230 prev = got;
5231 if (lastx >= nextents)
5223 eof = 1; 5232 eof = 1;
5224 prev = got; 5233 else
5225 } else
5226 xfs_bmbt_get_all(ep, &got); 5234 xfs_bmbt_get_all(ep, &got);
5227 } 5235 }
5228 ifp->if_lastex = lastx; 5236 ifp->if_lastex = lastx;
@@ -5813,8 +5821,7 @@ xfs_getbmap(
5813 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5821 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
5814 return XFS_ERROR(EINVAL); 5822 return XFS_ERROR(EINVAL);
5815 if (whichfork == XFS_DATA_FORK) { 5823 if (whichfork == XFS_DATA_FORK) {
5816 if ((ip->i_d.di_extsize && (ip->i_d.di_flags & 5824 if (xfs_get_extsz_hint(ip) ||
5817 (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
5818 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 5825 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
5819 prealloced = 1; 5826 prealloced = 1;
5820 fixlen = XFS_MAXIOFFSET(mp); 5827 fixlen = XFS_MAXIOFFSET(mp);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 4f24c7e39b3..524b1c9d524 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -144,12 +144,14 @@ extern ktrace_t *xfs_bmap_trace_buf;
144 */ 144 */
145void 145void
146xfs_bmap_trace_exlist( 146xfs_bmap_trace_exlist(
147 char *fname, /* function name */ 147 const char *fname, /* function name */
148 struct xfs_inode *ip, /* incore inode pointer */ 148 struct xfs_inode *ip, /* incore inode pointer */
149 xfs_extnum_t cnt, /* count of entries in list */ 149 xfs_extnum_t cnt, /* count of entries in list */
150 int whichfork); /* data or attr fork */ 150 int whichfork); /* data or attr fork */
151#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
152 xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w)
151#else 153#else
152#define xfs_bmap_trace_exlist(f,ip,c,w) 154#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
153#endif 155#endif
154 156
155/* 157/*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0bf192fea3e..89b891f51cf 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -76,7 +76,7 @@ static char EXIT[] = "exit";
76 */ 76 */
77STATIC void 77STATIC void
78xfs_bmbt_trace_enter( 78xfs_bmbt_trace_enter(
79 char *func, 79 const char *func,
80 xfs_btree_cur_t *cur, 80 xfs_btree_cur_t *cur,
81 char *s, 81 char *s,
82 int type, 82 int type,
@@ -117,7 +117,7 @@ xfs_bmbt_trace_enter(
117 */ 117 */
118STATIC void 118STATIC void
119xfs_bmbt_trace_argbi( 119xfs_bmbt_trace_argbi(
120 char *func, 120 const char *func,
121 xfs_btree_cur_t *cur, 121 xfs_btree_cur_t *cur,
122 xfs_buf_t *b, 122 xfs_buf_t *b,
123 int i, 123 int i,
@@ -134,7 +134,7 @@ xfs_bmbt_trace_argbi(
134 */ 134 */
135STATIC void 135STATIC void
136xfs_bmbt_trace_argbii( 136xfs_bmbt_trace_argbii(
137 char *func, 137 const char *func,
138 xfs_btree_cur_t *cur, 138 xfs_btree_cur_t *cur,
139 xfs_buf_t *b, 139 xfs_buf_t *b,
140 int i0, 140 int i0,
@@ -153,7 +153,7 @@ xfs_bmbt_trace_argbii(
153 */ 153 */
154STATIC void 154STATIC void
155xfs_bmbt_trace_argfffi( 155xfs_bmbt_trace_argfffi(
156 char *func, 156 const char *func,
157 xfs_btree_cur_t *cur, 157 xfs_btree_cur_t *cur,
158 xfs_dfiloff_t o, 158 xfs_dfiloff_t o,
159 xfs_dfsbno_t b, 159 xfs_dfsbno_t b,
@@ -172,7 +172,7 @@ xfs_bmbt_trace_argfffi(
172 */ 172 */
173STATIC void 173STATIC void
174xfs_bmbt_trace_argi( 174xfs_bmbt_trace_argi(
175 char *func, 175 const char *func,
176 xfs_btree_cur_t *cur, 176 xfs_btree_cur_t *cur,
177 int i, 177 int i,
178 int line) 178 int line)
@@ -188,7 +188,7 @@ xfs_bmbt_trace_argi(
188 */ 188 */
189STATIC void 189STATIC void
190xfs_bmbt_trace_argifk( 190xfs_bmbt_trace_argifk(
191 char *func, 191 const char *func,
192 xfs_btree_cur_t *cur, 192 xfs_btree_cur_t *cur,
193 int i, 193 int i,
194 xfs_fsblock_t f, 194 xfs_fsblock_t f,
@@ -206,7 +206,7 @@ xfs_bmbt_trace_argifk(
206 */ 206 */
207STATIC void 207STATIC void
208xfs_bmbt_trace_argifr( 208xfs_bmbt_trace_argifr(
209 char *func, 209 const char *func,
210 xfs_btree_cur_t *cur, 210 xfs_btree_cur_t *cur,
211 int i, 211 int i,
212 xfs_fsblock_t f, 212 xfs_fsblock_t f,
@@ -235,7 +235,7 @@ xfs_bmbt_trace_argifr(
235 */ 235 */
236STATIC void 236STATIC void
237xfs_bmbt_trace_argik( 237xfs_bmbt_trace_argik(
238 char *func, 238 const char *func,
239 xfs_btree_cur_t *cur, 239 xfs_btree_cur_t *cur,
240 int i, 240 int i,
241 xfs_bmbt_key_t *k, 241 xfs_bmbt_key_t *k,
@@ -255,7 +255,7 @@ xfs_bmbt_trace_argik(
255 */ 255 */
256STATIC void 256STATIC void
257xfs_bmbt_trace_cursor( 257xfs_bmbt_trace_cursor(
258 char *func, 258 const char *func,
259 xfs_btree_cur_t *cur, 259 xfs_btree_cur_t *cur,
260 char *s, 260 char *s,
261 int line) 261 int line)
@@ -274,21 +274,21 @@ xfs_bmbt_trace_cursor(
274} 274}
275 275
276#define XFS_BMBT_TRACE_ARGBI(c,b,i) \ 276#define XFS_BMBT_TRACE_ARGBI(c,b,i) \
277 xfs_bmbt_trace_argbi(fname, c, b, i, __LINE__) 277 xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__)
278#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ 278#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \
279 xfs_bmbt_trace_argbii(fname, c, b, i, j, __LINE__) 279 xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__)
280#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ 280#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \
281 xfs_bmbt_trace_argfffi(fname, c, o, b, i, j, __LINE__) 281 xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__)
282#define XFS_BMBT_TRACE_ARGI(c,i) \ 282#define XFS_BMBT_TRACE_ARGI(c,i) \
283 xfs_bmbt_trace_argi(fname, c, i, __LINE__) 283 xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__)
284#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ 284#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \
285 xfs_bmbt_trace_argifk(fname, c, i, f, s, __LINE__) 285 xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__)
286#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ 286#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \
287 xfs_bmbt_trace_argifr(fname, c, i, f, r, __LINE__) 287 xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__)
288#define XFS_BMBT_TRACE_ARGIK(c,i,k) \ 288#define XFS_BMBT_TRACE_ARGIK(c,i,k) \
289 xfs_bmbt_trace_argik(fname, c, i, k, __LINE__) 289 xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__)
290#define XFS_BMBT_TRACE_CURSOR(c,s) \ 290#define XFS_BMBT_TRACE_CURSOR(c,s) \
291 xfs_bmbt_trace_cursor(fname, c, s, __LINE__) 291 xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__)
292#else 292#else
293#define XFS_BMBT_TRACE_ARGBI(c,b,i) 293#define XFS_BMBT_TRACE_ARGBI(c,b,i)
294#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) 294#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
@@ -318,9 +318,6 @@ xfs_bmbt_delrec(
318 xfs_fsblock_t bno; /* fs-relative block number */ 318 xfs_fsblock_t bno; /* fs-relative block number */
319 xfs_buf_t *bp; /* buffer for block */ 319 xfs_buf_t *bp; /* buffer for block */
320 int error; /* error return value */ 320 int error; /* error return value */
321#ifdef XFS_BMBT_TRACE
322 static char fname[] = "xfs_bmbt_delrec";
323#endif
324 int i; /* loop counter */ 321 int i; /* loop counter */
325 int j; /* temp state */ 322 int j; /* temp state */
326 xfs_bmbt_key_t key; /* bmap btree key */ 323 xfs_bmbt_key_t key; /* bmap btree key */
@@ -694,9 +691,6 @@ xfs_bmbt_insrec(
694 xfs_bmbt_block_t *block; /* bmap btree block */ 691 xfs_bmbt_block_t *block; /* bmap btree block */
695 xfs_buf_t *bp; /* buffer for block */ 692 xfs_buf_t *bp; /* buffer for block */
696 int error; /* error return value */ 693 int error; /* error return value */
697#ifdef XFS_BMBT_TRACE
698 static char fname[] = "xfs_bmbt_insrec";
699#endif
700 int i; /* loop index */ 694 int i; /* loop index */
701 xfs_bmbt_key_t key; /* bmap btree key */ 695 xfs_bmbt_key_t key; /* bmap btree key */
702 xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */ 696 xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
@@ -881,9 +875,6 @@ xfs_bmbt_killroot(
881#ifdef DEBUG 875#ifdef DEBUG
882 int error; 876 int error;
883#endif 877#endif
884#ifdef XFS_BMBT_TRACE
885 static char fname[] = "xfs_bmbt_killroot";
886#endif
887 int i; 878 int i;
888 xfs_bmbt_key_t *kp; 879 xfs_bmbt_key_t *kp;
889 xfs_inode_t *ip; 880 xfs_inode_t *ip;
@@ -973,9 +964,6 @@ xfs_bmbt_log_keys(
973 int kfirst, 964 int kfirst,
974 int klast) 965 int klast)
975{ 966{
976#ifdef XFS_BMBT_TRACE
977 static char fname[] = "xfs_bmbt_log_keys";
978#endif
979 xfs_trans_t *tp; 967 xfs_trans_t *tp;
980 968
981 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 969 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
@@ -1012,9 +1000,6 @@ xfs_bmbt_log_ptrs(
1012 int pfirst, 1000 int pfirst,
1013 int plast) 1001 int plast)
1014{ 1002{
1015#ifdef XFS_BMBT_TRACE
1016 static char fname[] = "xfs_bmbt_log_ptrs";
1017#endif
1018 xfs_trans_t *tp; 1003 xfs_trans_t *tp;
1019 1004
1020 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 1005 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
@@ -1055,9 +1040,6 @@ xfs_bmbt_lookup(
1055 xfs_daddr_t d; 1040 xfs_daddr_t d;
1056 xfs_sfiloff_t diff; 1041 xfs_sfiloff_t diff;
1057 int error; /* error return value */ 1042 int error; /* error return value */
1058#ifdef XFS_BMBT_TRACE
1059 static char fname[] = "xfs_bmbt_lookup";
1060#endif
1061 xfs_fsblock_t fsbno=0; 1043 xfs_fsblock_t fsbno=0;
1062 int high; 1044 int high;
1063 int i; 1045 int i;
@@ -1195,9 +1177,6 @@ xfs_bmbt_lshift(
1195 int *stat) /* success/failure */ 1177 int *stat) /* success/failure */
1196{ 1178{
1197 int error; /* error return value */ 1179 int error; /* error return value */
1198#ifdef XFS_BMBT_TRACE
1199 static char fname[] = "xfs_bmbt_lshift";
1200#endif
1201#ifdef DEBUG 1180#ifdef DEBUG
1202 int i; /* loop counter */ 1181 int i; /* loop counter */
1203#endif 1182#endif
@@ -1331,9 +1310,6 @@ xfs_bmbt_rshift(
1331 int *stat) /* success/failure */ 1310 int *stat) /* success/failure */
1332{ 1311{
1333 int error; /* error return value */ 1312 int error; /* error return value */
1334#ifdef XFS_BMBT_TRACE
1335 static char fname[] = "xfs_bmbt_rshift";
1336#endif
1337 int i; /* loop counter */ 1313 int i; /* loop counter */
1338 xfs_bmbt_key_t key; /* bmap btree key */ 1314 xfs_bmbt_key_t key; /* bmap btree key */
1339 xfs_buf_t *lbp; /* left buffer pointer */ 1315 xfs_buf_t *lbp; /* left buffer pointer */
@@ -1492,9 +1468,6 @@ xfs_bmbt_split(
1492{ 1468{
1493 xfs_alloc_arg_t args; /* block allocation args */ 1469 xfs_alloc_arg_t args; /* block allocation args */
1494 int error; /* error return value */ 1470 int error; /* error return value */
1495#ifdef XFS_BMBT_TRACE
1496 static char fname[] = "xfs_bmbt_split";
1497#endif
1498 int i; /* loop counter */ 1471 int i; /* loop counter */
1499 xfs_fsblock_t lbno; /* left sibling block number */ 1472 xfs_fsblock_t lbno; /* left sibling block number */
1500 xfs_buf_t *lbp; /* left buffer pointer */ 1473 xfs_buf_t *lbp; /* left buffer pointer */
@@ -1641,9 +1614,6 @@ xfs_bmbt_updkey(
1641#ifdef DEBUG 1614#ifdef DEBUG
1642 int error; 1615 int error;
1643#endif 1616#endif
1644#ifdef XFS_BMBT_TRACE
1645 static char fname[] = "xfs_bmbt_updkey";
1646#endif
1647 xfs_bmbt_key_t *kp; 1617 xfs_bmbt_key_t *kp;
1648 int ptr; 1618 int ptr;
1649 1619
@@ -1712,9 +1682,6 @@ xfs_bmbt_decrement(
1712 xfs_bmbt_block_t *block; 1682 xfs_bmbt_block_t *block;
1713 xfs_buf_t *bp; 1683 xfs_buf_t *bp;
1714 int error; /* error return value */ 1684 int error; /* error return value */
1715#ifdef XFS_BMBT_TRACE
1716 static char fname[] = "xfs_bmbt_decrement";
1717#endif
1718 xfs_fsblock_t fsbno; 1685 xfs_fsblock_t fsbno;
1719 int lev; 1686 int lev;
1720 xfs_mount_t *mp; 1687 xfs_mount_t *mp;
@@ -1785,9 +1752,6 @@ xfs_bmbt_delete(
1785 int *stat) /* success/failure */ 1752 int *stat) /* success/failure */
1786{ 1753{
1787 int error; /* error return value */ 1754 int error; /* error return value */
1788#ifdef XFS_BMBT_TRACE
1789 static char fname[] = "xfs_bmbt_delete";
1790#endif
1791 int i; 1755 int i;
1792 int level; 1756 int level;
1793 1757
@@ -2000,9 +1964,6 @@ xfs_bmbt_increment(
2000 xfs_bmbt_block_t *block; 1964 xfs_bmbt_block_t *block;
2001 xfs_buf_t *bp; 1965 xfs_buf_t *bp;
2002 int error; /* error return value */ 1966 int error; /* error return value */
2003#ifdef XFS_BMBT_TRACE
2004 static char fname[] = "xfs_bmbt_increment";
2005#endif
2006 xfs_fsblock_t fsbno; 1967 xfs_fsblock_t fsbno;
2007 int lev; 1968 int lev;
2008 xfs_mount_t *mp; 1969 xfs_mount_t *mp;
@@ -2080,9 +2041,6 @@ xfs_bmbt_insert(
2080 int *stat) /* success/failure */ 2041 int *stat) /* success/failure */
2081{ 2042{
2082 int error; /* error return value */ 2043 int error; /* error return value */
2083#ifdef XFS_BMBT_TRACE
2084 static char fname[] = "xfs_bmbt_insert";
2085#endif
2086 int i; 2044 int i;
2087 int level; 2045 int level;
2088 xfs_fsblock_t nbno; 2046 xfs_fsblock_t nbno;
@@ -2142,9 +2100,6 @@ xfs_bmbt_log_block(
2142 int fields) 2100 int fields)
2143{ 2101{
2144 int first; 2102 int first;
2145#ifdef XFS_BMBT_TRACE
2146 static char fname[] = "xfs_bmbt_log_block";
2147#endif
2148 int last; 2103 int last;
2149 xfs_trans_t *tp; 2104 xfs_trans_t *tp;
2150 static const short offsets[] = { 2105 static const short offsets[] = {
@@ -2181,9 +2136,6 @@ xfs_bmbt_log_recs(
2181{ 2136{
2182 xfs_bmbt_block_t *block; 2137 xfs_bmbt_block_t *block;
2183 int first; 2138 int first;
2184#ifdef XFS_BMBT_TRACE
2185 static char fname[] = "xfs_bmbt_log_recs";
2186#endif
2187 int last; 2139 int last;
2188 xfs_bmbt_rec_t *rp; 2140 xfs_bmbt_rec_t *rp;
2189 xfs_trans_t *tp; 2141 xfs_trans_t *tp;
@@ -2245,9 +2197,6 @@ xfs_bmbt_newroot(
2245 xfs_bmbt_key_t *ckp; /* child key pointer */ 2197 xfs_bmbt_key_t *ckp; /* child key pointer */
2246 xfs_bmbt_ptr_t *cpp; /* child ptr pointer */ 2198 xfs_bmbt_ptr_t *cpp; /* child ptr pointer */
2247 int error; /* error return code */ 2199 int error; /* error return code */
2248#ifdef XFS_BMBT_TRACE
2249 static char fname[] = "xfs_bmbt_newroot";
2250#endif
2251#ifdef DEBUG 2200#ifdef DEBUG
2252 int i; /* loop counter */ 2201 int i; /* loop counter */
2253#endif 2202#endif
@@ -2630,9 +2579,6 @@ xfs_bmbt_update(
2630 xfs_bmbt_block_t *block; 2579 xfs_bmbt_block_t *block;
2631 xfs_buf_t *bp; 2580 xfs_buf_t *bp;
2632 int error; 2581 int error;
2633#ifdef XFS_BMBT_TRACE
2634 static char fname[] = "xfs_bmbt_update";
2635#endif
2636 xfs_bmbt_key_t key; 2582 xfs_bmbt_key_t key;
2637 int ptr; 2583 int ptr;
2638 xfs_bmbt_rec_t *rp; 2584 xfs_bmbt_rec_t *rp;
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 4e27d55a1e7..6e40a0a198f 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -444,30 +444,14 @@ xfs_btree_setbuf(
444/* 444/*
445 * Min and max functions for extlen, agblock, fileoff, and filblks types. 445 * Min and max functions for extlen, agblock, fileoff, and filblks types.
446 */ 446 */
447#define XFS_EXTLEN_MIN(a,b) \ 447#define XFS_EXTLEN_MIN(a,b) min_t(xfs_extlen_t, (a), (b))
448 ((xfs_extlen_t)(a) < (xfs_extlen_t)(b) ? \ 448#define XFS_EXTLEN_MAX(a,b) max_t(xfs_extlen_t, (a), (b))
449 (xfs_extlen_t)(a) : (xfs_extlen_t)(b)) 449#define XFS_AGBLOCK_MIN(a,b) min_t(xfs_agblock_t, (a), (b))
450#define XFS_EXTLEN_MAX(a,b) \ 450#define XFS_AGBLOCK_MAX(a,b) max_t(xfs_agblock_t, (a), (b))
451 ((xfs_extlen_t)(a) > (xfs_extlen_t)(b) ? \ 451#define XFS_FILEOFF_MIN(a,b) min_t(xfs_fileoff_t, (a), (b))
452 (xfs_extlen_t)(a) : (xfs_extlen_t)(b)) 452#define XFS_FILEOFF_MAX(a,b) max_t(xfs_fileoff_t, (a), (b))
453#define XFS_AGBLOCK_MIN(a,b) \ 453#define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b))
454 ((xfs_agblock_t)(a) < (xfs_agblock_t)(b) ? \ 454#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
455 (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
456#define XFS_AGBLOCK_MAX(a,b) \
457 ((xfs_agblock_t)(a) > (xfs_agblock_t)(b) ? \
458 (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
459#define XFS_FILEOFF_MIN(a,b) \
460 ((xfs_fileoff_t)(a) < (xfs_fileoff_t)(b) ? \
461 (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
462#define XFS_FILEOFF_MAX(a,b) \
463 ((xfs_fileoff_t)(a) > (xfs_fileoff_t)(b) ? \
464 (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
465#define XFS_FILBLKS_MIN(a,b) \
466 ((xfs_filblks_t)(a) < (xfs_filblks_t)(b) ? \
467 (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
468#define XFS_FILBLKS_MAX(a,b) \
469 ((xfs_filblks_t)(a) > (xfs_filblks_t)(b) ? \
470 (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
471 455
472#define XFS_FSB_SANITY_CHECK(mp,fsb) \ 456#define XFS_FSB_SANITY_CHECK(mp,fsb) \
473 (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ 457 (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 6c1bddc04e3..b0667cb27d6 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -580,8 +580,8 @@ xfs_buf_item_unlock(
580 * If the buf item isn't tracking any data, free it. 580 * If the buf item isn't tracking any data, free it.
581 * Otherwise, if XFS_BLI_HOLD is set clear it. 581 * Otherwise, if XFS_BLI_HOLD is set clear it.
582 */ 582 */
583 if (xfs_count_bits(bip->bli_format.blf_data_map, 583 if (xfs_bitmap_empty(bip->bli_format.blf_data_map,
584 bip->bli_format.blf_map_size, 0) == 0) { 584 bip->bli_format.blf_map_size)) {
585 xfs_buf_item_relse(bp); 585 xfs_buf_item_relse(bp);
586 } else if (hold) { 586 } else if (hold) {
587 bip->bli_flags &= ~XFS_BLI_HOLD; 587 bip->bli_flags &= ~XFS_BLI_HOLD;
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 5b7eb81453b..f89196cb08d 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -99,5 +99,7 @@ struct xfs_mount_args {
99 */ 99 */
100#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred 100#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred
101 * I/O size in stat(2) */ 101 * I/O size in stat(2) */
102#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams
103 * allocator */
102 104
103#endif /* __XFS_CLNT_H__ */ 105#endif /* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index b33826961c4..fefd0116bac 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -257,6 +257,7 @@ typedef enum xfs_dinode_fmt
257#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ 257#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */
258#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ 258#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
259#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ 259#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */
260#define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */
260#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) 261#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
261#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) 262#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
262#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) 263#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -271,12 +272,13 @@ typedef enum xfs_dinode_fmt
271#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) 272#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT)
272#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) 273#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
273#define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) 274#define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT)
275#define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT)
274 276
275#define XFS_DIFLAG_ANY \ 277#define XFS_DIFLAG_ANY \
276 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ 278 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
277 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ 279 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
278 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ 280 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
279 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ 281 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
280 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG) 282 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
281 283
282#endif /* __XFS_DINODE_H__ */ 284#endif /* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 8e8e5279334..29e091914df 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -55,9 +55,9 @@ xfs_dir_mount(
55 XFS_MAX_BLOCKSIZE); 55 XFS_MAX_BLOCKSIZE);
56 mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); 56 mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
57 mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog; 57 mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog;
58 mp->m_dirdatablk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp)); 58 mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp));
59 mp->m_dirleafblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); 59 mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
60 mp->m_dirfreeblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp)); 60 mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp));
61 mp->m_attr_node_ents = 61 mp->m_attr_node_ents =
62 (mp->m_sb.sb_blocksize - (uint)sizeof(xfs_da_node_hdr_t)) / 62 (mp->m_sb.sb_blocksize - (uint)sizeof(xfs_da_node_hdr_t)) /
63 (uint)sizeof(xfs_da_node_entry_t); 63 (uint)sizeof(xfs_da_node_entry_t);
@@ -554,7 +554,7 @@ xfs_dir2_grow_inode(
554 */ 554 */
555 if (mapp != &map) 555 if (mapp != &map)
556 kmem_free(mapp, sizeof(*mapp) * count); 556 kmem_free(mapp, sizeof(*mapp) * count);
557 *dbp = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)bno); 557 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
558 /* 558 /*
559 * Update file's size if this is the data space and it grew. 559 * Update file's size if this is the data space and it grew.
560 */ 560 */
@@ -706,7 +706,7 @@ xfs_dir2_shrink_inode(
706 dp = args->dp; 706 dp = args->dp;
707 mp = dp->i_mount; 707 mp = dp->i_mount;
708 tp = args->trans; 708 tp = args->trans;
709 da = XFS_DIR2_DB_TO_DA(mp, db); 709 da = xfs_dir2_db_to_da(mp, db);
710 /* 710 /*
711 * Unmap the fsblock(s). 711 * Unmap the fsblock(s).
712 */ 712 */
@@ -742,7 +742,7 @@ xfs_dir2_shrink_inode(
742 /* 742 /*
743 * If the block isn't the last one in the directory, we're done. 743 * If the block isn't the last one in the directory, we're done.
744 */ 744 */
745 if (dp->i_d.di_size > XFS_DIR2_DB_OFF_TO_BYTE(mp, db + 1, 0)) 745 if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0))
746 return 0; 746 return 0;
747 bno = da; 747 bno = da;
748 if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) { 748 if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 3accc1dcd6c..e4df1aaae2a 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -115,13 +115,13 @@ xfs_dir2_block_addname(
115 xfs_da_brelse(tp, bp); 115 xfs_da_brelse(tp, bp);
116 return XFS_ERROR(EFSCORRUPTED); 116 return XFS_ERROR(EFSCORRUPTED);
117 } 117 }
118 len = XFS_DIR2_DATA_ENTSIZE(args->namelen); 118 len = xfs_dir2_data_entsize(args->namelen);
119 /* 119 /*
120 * Set up pointers to parts of the block. 120 * Set up pointers to parts of the block.
121 */ 121 */
122 bf = block->hdr.bestfree; 122 bf = block->hdr.bestfree;
123 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 123 btp = xfs_dir2_block_tail_p(mp, block);
124 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 124 blp = xfs_dir2_block_leaf_p(btp);
125 /* 125 /*
126 * No stale entries? Need space for entry and new leaf. 126 * No stale entries? Need space for entry and new leaf.
127 */ 127 */
@@ -396,7 +396,7 @@ xfs_dir2_block_addname(
396 * Fill in the leaf entry. 396 * Fill in the leaf entry.
397 */ 397 */
398 blp[mid].hashval = cpu_to_be32(args->hashval); 398 blp[mid].hashval = cpu_to_be32(args->hashval);
399 blp[mid].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, 399 blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
400 (char *)dep - (char *)block)); 400 (char *)dep - (char *)block));
401 xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); 401 xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
402 /* 402 /*
@@ -411,7 +411,7 @@ xfs_dir2_block_addname(
411 dep->inumber = cpu_to_be64(args->inumber); 411 dep->inumber = cpu_to_be64(args->inumber);
412 dep->namelen = args->namelen; 412 dep->namelen = args->namelen;
413 memcpy(dep->name, args->name, args->namelen); 413 memcpy(dep->name, args->name, args->namelen);
414 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 414 tagp = xfs_dir2_data_entry_tag_p(dep);
415 *tagp = cpu_to_be16((char *)dep - (char *)block); 415 *tagp = cpu_to_be16((char *)dep - (char *)block);
416 /* 416 /*
417 * Clean up the bestfree array and log the header, tail, and entry. 417 * Clean up the bestfree array and log the header, tail, and entry.
@@ -455,7 +455,7 @@ xfs_dir2_block_getdents(
455 /* 455 /*
456 * If the block number in the offset is out of range, we're done. 456 * If the block number in the offset is out of range, we're done.
457 */ 457 */
458 if (XFS_DIR2_DATAPTR_TO_DB(mp, uio->uio_offset) > mp->m_dirdatablk) { 458 if (xfs_dir2_dataptr_to_db(mp, uio->uio_offset) > mp->m_dirdatablk) {
459 *eofp = 1; 459 *eofp = 1;
460 return 0; 460 return 0;
461 } 461 }
@@ -471,15 +471,15 @@ xfs_dir2_block_getdents(
471 * Extract the byte offset we start at from the seek pointer. 471 * Extract the byte offset we start at from the seek pointer.
472 * We'll skip entries before this. 472 * We'll skip entries before this.
473 */ 473 */
474 wantoff = XFS_DIR2_DATAPTR_TO_OFF(mp, uio->uio_offset); 474 wantoff = xfs_dir2_dataptr_to_off(mp, uio->uio_offset);
475 block = bp->data; 475 block = bp->data;
476 xfs_dir2_data_check(dp, bp); 476 xfs_dir2_data_check(dp, bp);
477 /* 477 /*
478 * Set up values for the loop. 478 * Set up values for the loop.
479 */ 479 */
480 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 480 btp = xfs_dir2_block_tail_p(mp, block);
481 ptr = (char *)block->u; 481 ptr = (char *)block->u;
482 endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); 482 endptr = (char *)xfs_dir2_block_leaf_p(btp);
483 p.dbp = dbp; 483 p.dbp = dbp;
484 p.put = put; 484 p.put = put;
485 p.uio = uio; 485 p.uio = uio;
@@ -502,7 +502,7 @@ xfs_dir2_block_getdents(
502 /* 502 /*
503 * Bump pointer for the next iteration. 503 * Bump pointer for the next iteration.
504 */ 504 */
505 ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); 505 ptr += xfs_dir2_data_entsize(dep->namelen);
506 /* 506 /*
507 * The entry is before the desired starting point, skip it. 507 * The entry is before the desired starting point, skip it.
508 */ 508 */
@@ -513,7 +513,7 @@ xfs_dir2_block_getdents(
513 */ 513 */
514 p.namelen = dep->namelen; 514 p.namelen = dep->namelen;
515 515
516 p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 516 p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
517 ptr - (char *)block); 517 ptr - (char *)block);
518 p.ino = be64_to_cpu(dep->inumber); 518 p.ino = be64_to_cpu(dep->inumber);
519#if XFS_BIG_INUMS 519#if XFS_BIG_INUMS
@@ -531,7 +531,7 @@ xfs_dir2_block_getdents(
531 */ 531 */
532 if (!p.done) { 532 if (!p.done) {
533 uio->uio_offset = 533 uio->uio_offset =
534 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 534 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
535 (char *)dep - (char *)block); 535 (char *)dep - (char *)block);
536 xfs_da_brelse(tp, bp); 536 xfs_da_brelse(tp, bp);
537 return error; 537 return error;
@@ -545,7 +545,7 @@ xfs_dir2_block_getdents(
545 *eofp = 1; 545 *eofp = 1;
546 546
547 uio->uio_offset = 547 uio->uio_offset =
548 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0); 548 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0);
549 549
550 xfs_da_brelse(tp, bp); 550 xfs_da_brelse(tp, bp);
551 551
@@ -569,8 +569,8 @@ xfs_dir2_block_log_leaf(
569 569
570 mp = tp->t_mountp; 570 mp = tp->t_mountp;
571 block = bp->data; 571 block = bp->data;
572 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 572 btp = xfs_dir2_block_tail_p(mp, block);
573 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 573 blp = xfs_dir2_block_leaf_p(btp);
574 xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block), 574 xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
575 (uint)((char *)&blp[last + 1] - (char *)block - 1)); 575 (uint)((char *)&blp[last + 1] - (char *)block - 1));
576} 576}
@@ -589,7 +589,7 @@ xfs_dir2_block_log_tail(
589 589
590 mp = tp->t_mountp; 590 mp = tp->t_mountp;
591 block = bp->data; 591 block = bp->data;
592 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 592 btp = xfs_dir2_block_tail_p(mp, block);
593 xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block), 593 xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
594 (uint)((char *)(btp + 1) - (char *)block - 1)); 594 (uint)((char *)(btp + 1) - (char *)block - 1));
595} 595}
@@ -623,13 +623,13 @@ xfs_dir2_block_lookup(
623 mp = dp->i_mount; 623 mp = dp->i_mount;
624 block = bp->data; 624 block = bp->data;
625 xfs_dir2_data_check(dp, bp); 625 xfs_dir2_data_check(dp, bp);
626 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 626 btp = xfs_dir2_block_tail_p(mp, block);
627 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 627 blp = xfs_dir2_block_leaf_p(btp);
628 /* 628 /*
629 * Get the offset from the leaf entry, to point to the data. 629 * Get the offset from the leaf entry, to point to the data.
630 */ 630 */
631 dep = (xfs_dir2_data_entry_t *) 631 dep = (xfs_dir2_data_entry_t *)
632 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); 632 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
633 /* 633 /*
634 * Fill in inode number, release the block. 634 * Fill in inode number, release the block.
635 */ 635 */
@@ -675,8 +675,8 @@ xfs_dir2_block_lookup_int(
675 ASSERT(bp != NULL); 675 ASSERT(bp != NULL);
676 block = bp->data; 676 block = bp->data;
677 xfs_dir2_data_check(dp, bp); 677 xfs_dir2_data_check(dp, bp);
678 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 678 btp = xfs_dir2_block_tail_p(mp, block);
679 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 679 blp = xfs_dir2_block_leaf_p(btp);
680 /* 680 /*
681 * Loop doing a binary search for our hash value. 681 * Loop doing a binary search for our hash value.
682 * Find our entry, ENOENT if it's not there. 682 * Find our entry, ENOENT if it's not there.
@@ -713,7 +713,7 @@ xfs_dir2_block_lookup_int(
713 * Get pointer to the entry from the leaf. 713 * Get pointer to the entry from the leaf.
714 */ 714 */
715 dep = (xfs_dir2_data_entry_t *) 715 dep = (xfs_dir2_data_entry_t *)
716 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr)); 716 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
717 /* 717 /*
718 * Compare, if it's right give back buffer & entry number. 718 * Compare, if it's right give back buffer & entry number.
719 */ 719 */
@@ -768,20 +768,20 @@ xfs_dir2_block_removename(
768 tp = args->trans; 768 tp = args->trans;
769 mp = dp->i_mount; 769 mp = dp->i_mount;
770 block = bp->data; 770 block = bp->data;
771 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 771 btp = xfs_dir2_block_tail_p(mp, block);
772 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 772 blp = xfs_dir2_block_leaf_p(btp);
773 /* 773 /*
774 * Point to the data entry using the leaf entry. 774 * Point to the data entry using the leaf entry.
775 */ 775 */
776 dep = (xfs_dir2_data_entry_t *) 776 dep = (xfs_dir2_data_entry_t *)
777 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); 777 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
778 /* 778 /*
779 * Mark the data entry's space free. 779 * Mark the data entry's space free.
780 */ 780 */
781 needlog = needscan = 0; 781 needlog = needscan = 0;
782 xfs_dir2_data_make_free(tp, bp, 782 xfs_dir2_data_make_free(tp, bp,
783 (xfs_dir2_data_aoff_t)((char *)dep - (char *)block), 783 (xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
784 XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); 784 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
785 /* 785 /*
786 * Fix up the block tail. 786 * Fix up the block tail.
787 */ 787 */
@@ -843,13 +843,13 @@ xfs_dir2_block_replace(
843 dp = args->dp; 843 dp = args->dp;
844 mp = dp->i_mount; 844 mp = dp->i_mount;
845 block = bp->data; 845 block = bp->data;
846 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 846 btp = xfs_dir2_block_tail_p(mp, block);
847 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 847 blp = xfs_dir2_block_leaf_p(btp);
848 /* 848 /*
849 * Point to the data entry we need to change. 849 * Point to the data entry we need to change.
850 */ 850 */
851 dep = (xfs_dir2_data_entry_t *) 851 dep = (xfs_dir2_data_entry_t *)
852 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); 852 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
853 ASSERT(be64_to_cpu(dep->inumber) != args->inumber); 853 ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
854 /* 854 /*
855 * Change the inode number to the new value. 855 * Change the inode number to the new value.
@@ -912,7 +912,7 @@ xfs_dir2_leaf_to_block(
912 mp = dp->i_mount; 912 mp = dp->i_mount;
913 leaf = lbp->data; 913 leaf = lbp->data;
914 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); 914 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
915 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 915 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
916 /* 916 /*
917 * If there are data blocks other than the first one, take this 917 * If there are data blocks other than the first one, take this
918 * opportunity to remove trailing empty data blocks that may have 918 * opportunity to remove trailing empty data blocks that may have
@@ -920,7 +920,7 @@ xfs_dir2_leaf_to_block(
920 * These will show up in the leaf bests table. 920 * These will show up in the leaf bests table.
921 */ 921 */
922 while (dp->i_d.di_size > mp->m_dirblksize) { 922 while (dp->i_d.di_size > mp->m_dirblksize) {
923 bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); 923 bestsp = xfs_dir2_leaf_bests_p(ltp);
924 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == 924 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
925 mp->m_dirblksize - (uint)sizeof(block->hdr)) { 925 mp->m_dirblksize - (uint)sizeof(block->hdr)) {
926 if ((error = 926 if ((error =
@@ -974,14 +974,14 @@ xfs_dir2_leaf_to_block(
974 /* 974 /*
975 * Initialize the block tail. 975 * Initialize the block tail.
976 */ 976 */
977 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 977 btp = xfs_dir2_block_tail_p(mp, block);
978 btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); 978 btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
979 btp->stale = 0; 979 btp->stale = 0;
980 xfs_dir2_block_log_tail(tp, dbp); 980 xfs_dir2_block_log_tail(tp, dbp);
981 /* 981 /*
982 * Initialize the block leaf area. We compact out stale entries. 982 * Initialize the block leaf area. We compact out stale entries.
983 */ 983 */
984 lep = XFS_DIR2_BLOCK_LEAF_P(btp); 984 lep = xfs_dir2_block_leaf_p(btp);
985 for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { 985 for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
986 if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) 986 if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
987 continue; 987 continue;
@@ -1067,7 +1067,7 @@ xfs_dir2_sf_to_block(
1067 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 1067 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
1068 ASSERT(dp->i_df.if_u1.if_data != NULL); 1068 ASSERT(dp->i_df.if_u1.if_data != NULL);
1069 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1069 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
1070 ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 1070 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
1071 /* 1071 /*
1072 * Copy the directory into the stack buffer. 1072 * Copy the directory into the stack buffer.
1073 * Then pitch the incore inode data so we can make extents. 1073 * Then pitch the incore inode data so we can make extents.
@@ -1119,10 +1119,10 @@ xfs_dir2_sf_to_block(
1119 /* 1119 /*
1120 * Fill in the tail. 1120 * Fill in the tail.
1121 */ 1121 */
1122 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 1122 btp = xfs_dir2_block_tail_p(mp, block);
1123 btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */ 1123 btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */
1124 btp->stale = 0; 1124 btp->stale = 0;
1125 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 1125 blp = xfs_dir2_block_leaf_p(btp);
1126 endoffset = (uint)((char *)blp - (char *)block); 1126 endoffset = (uint)((char *)blp - (char *)block);
1127 /* 1127 /*
1128 * Remove the freespace, we'll manage it. 1128 * Remove the freespace, we'll manage it.
@@ -1138,25 +1138,25 @@ xfs_dir2_sf_to_block(
1138 dep->inumber = cpu_to_be64(dp->i_ino); 1138 dep->inumber = cpu_to_be64(dp->i_ino);
1139 dep->namelen = 1; 1139 dep->namelen = 1;
1140 dep->name[0] = '.'; 1140 dep->name[0] = '.';
1141 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1141 tagp = xfs_dir2_data_entry_tag_p(dep);
1142 *tagp = cpu_to_be16((char *)dep - (char *)block); 1142 *tagp = cpu_to_be16((char *)dep - (char *)block);
1143 xfs_dir2_data_log_entry(tp, bp, dep); 1143 xfs_dir2_data_log_entry(tp, bp, dep);
1144 blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); 1144 blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
1145 blp[0].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, 1145 blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1146 (char *)dep - (char *)block)); 1146 (char *)dep - (char *)block));
1147 /* 1147 /*
1148 * Create entry for .. 1148 * Create entry for ..
1149 */ 1149 */
1150 dep = (xfs_dir2_data_entry_t *) 1150 dep = (xfs_dir2_data_entry_t *)
1151 ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); 1151 ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
1152 dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); 1152 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
1153 dep->namelen = 2; 1153 dep->namelen = 2;
1154 dep->name[0] = dep->name[1] = '.'; 1154 dep->name[0] = dep->name[1] = '.';
1155 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1155 tagp = xfs_dir2_data_entry_tag_p(dep);
1156 *tagp = cpu_to_be16((char *)dep - (char *)block); 1156 *tagp = cpu_to_be16((char *)dep - (char *)block);
1157 xfs_dir2_data_log_entry(tp, bp, dep); 1157 xfs_dir2_data_log_entry(tp, bp, dep);
1158 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); 1158 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
1159 blp[1].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, 1159 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1160 (char *)dep - (char *)block)); 1160 (char *)dep - (char *)block));
1161 offset = XFS_DIR2_DATA_FIRST_OFFSET; 1161 offset = XFS_DIR2_DATA_FIRST_OFFSET;
1162 /* 1162 /*
@@ -1165,7 +1165,7 @@ xfs_dir2_sf_to_block(
1165 if ((i = 0) == sfp->hdr.count) 1165 if ((i = 0) == sfp->hdr.count)
1166 sfep = NULL; 1166 sfep = NULL;
1167 else 1167 else
1168 sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 1168 sfep = xfs_dir2_sf_firstentry(sfp);
1169 /* 1169 /*
1170 * Need to preserve the existing offset values in the sf directory. 1170 * Need to preserve the existing offset values in the sf directory.
1171 * Insert holes (unused entries) where necessary. 1171 * Insert holes (unused entries) where necessary.
@@ -1177,7 +1177,7 @@ xfs_dir2_sf_to_block(
1177 if (sfep == NULL) 1177 if (sfep == NULL)
1178 newoffset = endoffset; 1178 newoffset = endoffset;
1179 else 1179 else
1180 newoffset = XFS_DIR2_SF_GET_OFFSET(sfep); 1180 newoffset = xfs_dir2_sf_get_offset(sfep);
1181 /* 1181 /*
1182 * There should be a hole here, make one. 1182 * There should be a hole here, make one.
1183 */ 1183 */
@@ -1186,7 +1186,7 @@ xfs_dir2_sf_to_block(
1186 ((char *)block + offset); 1186 ((char *)block + offset);
1187 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 1187 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
1188 dup->length = cpu_to_be16(newoffset - offset); 1188 dup->length = cpu_to_be16(newoffset - offset);
1189 *XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16( 1189 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
1190 ((char *)dup - (char *)block)); 1190 ((char *)dup - (char *)block));
1191 xfs_dir2_data_log_unused(tp, bp, dup); 1191 xfs_dir2_data_log_unused(tp, bp, dup);
1192 (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block, 1192 (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
@@ -1198,22 +1198,22 @@ xfs_dir2_sf_to_block(
1198 * Copy a real entry. 1198 * Copy a real entry.
1199 */ 1199 */
1200 dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); 1200 dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
1201 dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, 1201 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
1202 XFS_DIR2_SF_INUMBERP(sfep))); 1202 xfs_dir2_sf_inumberp(sfep)));
1203 dep->namelen = sfep->namelen; 1203 dep->namelen = sfep->namelen;
1204 memcpy(dep->name, sfep->name, dep->namelen); 1204 memcpy(dep->name, sfep->name, dep->namelen);
1205 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1205 tagp = xfs_dir2_data_entry_tag_p(dep);
1206 *tagp = cpu_to_be16((char *)dep - (char *)block); 1206 *tagp = cpu_to_be16((char *)dep - (char *)block);
1207 xfs_dir2_data_log_entry(tp, bp, dep); 1207 xfs_dir2_data_log_entry(tp, bp, dep);
1208 blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname( 1208 blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
1209 (char *)sfep->name, sfep->namelen)); 1209 (char *)sfep->name, sfep->namelen));
1210 blp[2 + i].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, 1210 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1211 (char *)dep - (char *)block)); 1211 (char *)dep - (char *)block));
1212 offset = (int)((char *)(tagp + 1) - (char *)block); 1212 offset = (int)((char *)(tagp + 1) - (char *)block);
1213 if (++i == sfp->hdr.count) 1213 if (++i == sfp->hdr.count)
1214 sfep = NULL; 1214 sfep = NULL;
1215 else 1215 else
1216 sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); 1216 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
1217 } 1217 }
1218 /* Done with the temporary buffer */ 1218 /* Done with the temporary buffer */
1219 kmem_free(buf, buf_len); 1219 kmem_free(buf, buf_len);
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
index 6722effd0b2..e7c2606161e 100644
--- a/fs/xfs/xfs_dir2_block.h
+++ b/fs/xfs/xfs_dir2_block.h
@@ -60,7 +60,6 @@ typedef struct xfs_dir2_block {
60/* 60/*
61 * Pointer to the leaf header embedded in a data block (1-block format) 61 * Pointer to the leaf header embedded in a data block (1-block format)
62 */ 62 */
63#define XFS_DIR2_BLOCK_TAIL_P(mp,block) xfs_dir2_block_tail_p(mp,block)
64static inline xfs_dir2_block_tail_t * 63static inline xfs_dir2_block_tail_t *
65xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block) 64xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
66{ 65{
@@ -71,7 +70,6 @@ xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
71/* 70/*
72 * Pointer to the leaf entries embedded in a data block (1-block format) 71 * Pointer to the leaf entries embedded in a data block (1-block format)
73 */ 72 */
74#define XFS_DIR2_BLOCK_LEAF_P(btp) xfs_dir2_block_leaf_p(btp)
75static inline struct xfs_dir2_leaf_entry * 73static inline struct xfs_dir2_leaf_entry *
76xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp) 74xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
77{ 75{
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index c211c37ef67..7ebe295bd6d 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -72,8 +72,8 @@ xfs_dir2_data_check(
72 bf = d->hdr.bestfree; 72 bf = d->hdr.bestfree;
73 p = (char *)d->u; 73 p = (char *)d->u;
74 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { 74 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
75 btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); 75 btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
76 lep = XFS_DIR2_BLOCK_LEAF_P(btp); 76 lep = xfs_dir2_block_leaf_p(btp);
77 endp = (char *)lep; 77 endp = (char *)lep;
78 } else 78 } else
79 endp = (char *)d + mp->m_dirblksize; 79 endp = (char *)d + mp->m_dirblksize;
@@ -107,7 +107,7 @@ xfs_dir2_data_check(
107 */ 107 */
108 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 108 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
109 ASSERT(lastfree == 0); 109 ASSERT(lastfree == 0);
110 ASSERT(be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)) == 110 ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
111 (char *)dup - (char *)d); 111 (char *)dup - (char *)d);
112 dfp = xfs_dir2_data_freefind(d, dup); 112 dfp = xfs_dir2_data_freefind(d, dup);
113 if (dfp) { 113 if (dfp) {
@@ -131,12 +131,12 @@ xfs_dir2_data_check(
131 dep = (xfs_dir2_data_entry_t *)p; 131 dep = (xfs_dir2_data_entry_t *)p;
132 ASSERT(dep->namelen != 0); 132 ASSERT(dep->namelen != 0);
133 ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0); 133 ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
134 ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) == 134 ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
135 (char *)dep - (char *)d); 135 (char *)dep - (char *)d);
136 count++; 136 count++;
137 lastfree = 0; 137 lastfree = 0;
138 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { 138 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
139 addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 139 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
140 (xfs_dir2_data_aoff_t) 140 (xfs_dir2_data_aoff_t)
141 ((char *)dep - (char *)d)); 141 ((char *)dep - (char *)d));
142 hash = xfs_da_hashname((char *)dep->name, dep->namelen); 142 hash = xfs_da_hashname((char *)dep->name, dep->namelen);
@@ -147,7 +147,7 @@ xfs_dir2_data_check(
147 } 147 }
148 ASSERT(i < be32_to_cpu(btp->count)); 148 ASSERT(i < be32_to_cpu(btp->count));
149 } 149 }
150 p += XFS_DIR2_DATA_ENTSIZE(dep->namelen); 150 p += xfs_dir2_data_entsize(dep->namelen);
151 } 151 }
152 /* 152 /*
153 * Need to have seen all the entries and all the bestfree slots. 153 * Need to have seen all the entries and all the bestfree slots.
@@ -346,8 +346,8 @@ xfs_dir2_data_freescan(
346 */ 346 */
347 p = (char *)d->u; 347 p = (char *)d->u;
348 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { 348 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
349 btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); 349 btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
350 endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); 350 endp = (char *)xfs_dir2_block_leaf_p(btp);
351 } else 351 } else
352 endp = (char *)d + mp->m_dirblksize; 352 endp = (char *)d + mp->m_dirblksize;
353 /* 353 /*
@@ -360,7 +360,7 @@ xfs_dir2_data_freescan(
360 */ 360 */
361 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 361 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
362 ASSERT((char *)dup - (char *)d == 362 ASSERT((char *)dup - (char *)d ==
363 be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup))); 363 be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
364 xfs_dir2_data_freeinsert(d, dup, loghead); 364 xfs_dir2_data_freeinsert(d, dup, loghead);
365 p += be16_to_cpu(dup->length); 365 p += be16_to_cpu(dup->length);
366 } 366 }
@@ -370,8 +370,8 @@ xfs_dir2_data_freescan(
370 else { 370 else {
371 dep = (xfs_dir2_data_entry_t *)p; 371 dep = (xfs_dir2_data_entry_t *)p;
372 ASSERT((char *)dep - (char *)d == 372 ASSERT((char *)dep - (char *)d ==
373 be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep))); 373 be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
374 p += XFS_DIR2_DATA_ENTSIZE(dep->namelen); 374 p += xfs_dir2_data_entsize(dep->namelen);
375 } 375 }
376 } 376 }
377} 377}
@@ -402,7 +402,7 @@ xfs_dir2_data_init(
402 /* 402 /*
403 * Get the buffer set up for the block. 403 * Get the buffer set up for the block.
404 */ 404 */
405 error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, blkno), -1, &bp, 405 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp,
406 XFS_DATA_FORK); 406 XFS_DATA_FORK);
407 if (error) { 407 if (error) {
408 return error; 408 return error;
@@ -427,7 +427,7 @@ xfs_dir2_data_init(
427 t=mp->m_dirblksize - (uint)sizeof(d->hdr); 427 t=mp->m_dirblksize - (uint)sizeof(d->hdr);
428 d->hdr.bestfree[0].length = cpu_to_be16(t); 428 d->hdr.bestfree[0].length = cpu_to_be16(t);
429 dup->length = cpu_to_be16(t); 429 dup->length = cpu_to_be16(t);
430 *XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16((char *)dup - (char *)d); 430 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d);
431 /* 431 /*
432 * Log it and return it. 432 * Log it and return it.
433 */ 433 */
@@ -452,7 +452,7 @@ xfs_dir2_data_log_entry(
452 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || 452 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
453 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 453 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
454 xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d), 454 xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
455 (uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) - 455 (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
456 (char *)d - 1)); 456 (char *)d - 1));
457} 457}
458 458
@@ -497,8 +497,8 @@ xfs_dir2_data_log_unused(
497 * Log the end (tag) of the unused entry. 497 * Log the end (tag) of the unused entry.
498 */ 498 */
499 xfs_da_log_buf(tp, bp, 499 xfs_da_log_buf(tp, bp,
500 (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d), 500 (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d),
501 (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d + 501 (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d +
502 sizeof(xfs_dir2_data_off_t) - 1)); 502 sizeof(xfs_dir2_data_off_t) - 1));
503} 503}
504 504
@@ -535,8 +535,8 @@ xfs_dir2_data_make_free(
535 xfs_dir2_block_tail_t *btp; /* block tail */ 535 xfs_dir2_block_tail_t *btp; /* block tail */
536 536
537 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 537 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
538 btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); 538 btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
539 endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); 539 endptr = (char *)xfs_dir2_block_leaf_p(btp);
540 } 540 }
541 /* 541 /*
542 * If this isn't the start of the block, then back up to 542 * If this isn't the start of the block, then back up to
@@ -587,7 +587,7 @@ xfs_dir2_data_make_free(
587 * Fix up the new big freespace. 587 * Fix up the new big freespace.
588 */ 588 */
589 be16_add(&prevdup->length, len + be16_to_cpu(postdup->length)); 589 be16_add(&prevdup->length, len + be16_to_cpu(postdup->length));
590 *XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) = 590 *xfs_dir2_data_unused_tag_p(prevdup) =
591 cpu_to_be16((char *)prevdup - (char *)d); 591 cpu_to_be16((char *)prevdup - (char *)d);
592 xfs_dir2_data_log_unused(tp, bp, prevdup); 592 xfs_dir2_data_log_unused(tp, bp, prevdup);
593 if (!needscan) { 593 if (!needscan) {
@@ -621,7 +621,7 @@ xfs_dir2_data_make_free(
621 else if (prevdup) { 621 else if (prevdup) {
622 dfp = xfs_dir2_data_freefind(d, prevdup); 622 dfp = xfs_dir2_data_freefind(d, prevdup);
623 be16_add(&prevdup->length, len); 623 be16_add(&prevdup->length, len);
624 *XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) = 624 *xfs_dir2_data_unused_tag_p(prevdup) =
625 cpu_to_be16((char *)prevdup - (char *)d); 625 cpu_to_be16((char *)prevdup - (char *)d);
626 xfs_dir2_data_log_unused(tp, bp, prevdup); 626 xfs_dir2_data_log_unused(tp, bp, prevdup);
627 /* 627 /*
@@ -649,7 +649,7 @@ xfs_dir2_data_make_free(
649 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); 649 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
650 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 650 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
651 newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); 651 newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
652 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = 652 *xfs_dir2_data_unused_tag_p(newdup) =
653 cpu_to_be16((char *)newdup - (char *)d); 653 cpu_to_be16((char *)newdup - (char *)d);
654 xfs_dir2_data_log_unused(tp, bp, newdup); 654 xfs_dir2_data_log_unused(tp, bp, newdup);
655 /* 655 /*
@@ -676,7 +676,7 @@ xfs_dir2_data_make_free(
676 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); 676 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
677 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 677 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
678 newdup->length = cpu_to_be16(len); 678 newdup->length = cpu_to_be16(len);
679 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = 679 *xfs_dir2_data_unused_tag_p(newdup) =
680 cpu_to_be16((char *)newdup - (char *)d); 680 cpu_to_be16((char *)newdup - (char *)d);
681 xfs_dir2_data_log_unused(tp, bp, newdup); 681 xfs_dir2_data_log_unused(tp, bp, newdup);
682 (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); 682 (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
@@ -712,7 +712,7 @@ xfs_dir2_data_use_free(
712 ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); 712 ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
713 ASSERT(offset >= (char *)dup - (char *)d); 713 ASSERT(offset >= (char *)dup - (char *)d);
714 ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d); 714 ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
715 ASSERT((char *)dup - (char *)d == be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup))); 715 ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
716 /* 716 /*
717 * Look up the entry in the bestfree table. 717 * Look up the entry in the bestfree table.
718 */ 718 */
@@ -745,7 +745,7 @@ xfs_dir2_data_use_free(
745 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len); 745 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
746 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 746 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
747 newdup->length = cpu_to_be16(oldlen - len); 747 newdup->length = cpu_to_be16(oldlen - len);
748 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = 748 *xfs_dir2_data_unused_tag_p(newdup) =
749 cpu_to_be16((char *)newdup - (char *)d); 749 cpu_to_be16((char *)newdup - (char *)d);
750 xfs_dir2_data_log_unused(tp, bp, newdup); 750 xfs_dir2_data_log_unused(tp, bp, newdup);
751 /* 751 /*
@@ -772,7 +772,7 @@ xfs_dir2_data_use_free(
772 else if (matchback) { 772 else if (matchback) {
773 newdup = dup; 773 newdup = dup;
774 newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); 774 newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
775 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = 775 *xfs_dir2_data_unused_tag_p(newdup) =
776 cpu_to_be16((char *)newdup - (char *)d); 776 cpu_to_be16((char *)newdup - (char *)d);
777 xfs_dir2_data_log_unused(tp, bp, newdup); 777 xfs_dir2_data_log_unused(tp, bp, newdup);
778 /* 778 /*
@@ -799,13 +799,13 @@ xfs_dir2_data_use_free(
799 else { 799 else {
800 newdup = dup; 800 newdup = dup;
801 newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); 801 newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
802 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = 802 *xfs_dir2_data_unused_tag_p(newdup) =
803 cpu_to_be16((char *)newdup - (char *)d); 803 cpu_to_be16((char *)newdup - (char *)d);
804 xfs_dir2_data_log_unused(tp, bp, newdup); 804 xfs_dir2_data_log_unused(tp, bp, newdup);
805 newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len); 805 newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
806 newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 806 newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
807 newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); 807 newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
808 *XFS_DIR2_DATA_UNUSED_TAG_P(newdup2) = 808 *xfs_dir2_data_unused_tag_p(newdup2) =
809 cpu_to_be16((char *)newdup2 - (char *)d); 809 cpu_to_be16((char *)newdup2 - (char *)d);
810 xfs_dir2_data_log_unused(tp, bp, newdup2); 810 xfs_dir2_data_log_unused(tp, bp, newdup2);
811 /* 811 /*
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index c94c9099cfb..b816e025273 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -44,7 +44,7 @@ struct xfs_trans;
44#define XFS_DIR2_DATA_SPACE 0 44#define XFS_DIR2_DATA_SPACE 0
45#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) 45#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
46#define XFS_DIR2_DATA_FIRSTDB(mp) \ 46#define XFS_DIR2_DATA_FIRSTDB(mp) \
47 XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATA_OFFSET) 47 xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
48 48
49/* 49/*
50 * Offsets of . and .. in data space (always block 0) 50 * Offsets of . and .. in data space (always block 0)
@@ -52,9 +52,9 @@ struct xfs_trans;
52#define XFS_DIR2_DATA_DOT_OFFSET \ 52#define XFS_DIR2_DATA_DOT_OFFSET \
53 ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t)) 53 ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
54#define XFS_DIR2_DATA_DOTDOT_OFFSET \ 54#define XFS_DIR2_DATA_DOTDOT_OFFSET \
55 (XFS_DIR2_DATA_DOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(1)) 55 (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
56#define XFS_DIR2_DATA_FIRST_OFFSET \ 56#define XFS_DIR2_DATA_FIRST_OFFSET \
57 (XFS_DIR2_DATA_DOTDOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(2)) 57 (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
58 58
59/* 59/*
60 * Structures. 60 * Structures.
@@ -123,7 +123,6 @@ typedef struct xfs_dir2_data {
123/* 123/*
124 * Size of a data entry. 124 * Size of a data entry.
125 */ 125 */
126#define XFS_DIR2_DATA_ENTSIZE(n) xfs_dir2_data_entsize(n)
127static inline int xfs_dir2_data_entsize(int n) 126static inline int xfs_dir2_data_entsize(int n)
128{ 127{
129 return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \ 128 return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
@@ -133,19 +132,16 @@ static inline int xfs_dir2_data_entsize(int n)
133/* 132/*
134 * Pointer to an entry's tag word. 133 * Pointer to an entry's tag word.
135 */ 134 */
136#define XFS_DIR2_DATA_ENTRY_TAG_P(dep) xfs_dir2_data_entry_tag_p(dep)
137static inline __be16 * 135static inline __be16 *
138xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep) 136xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
139{ 137{
140 return (__be16 *)((char *)dep + 138 return (__be16 *)((char *)dep +
141 XFS_DIR2_DATA_ENTSIZE(dep->namelen) - sizeof(__be16)); 139 xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
142} 140}
143 141
144/* 142/*
145 * Pointer to a freespace's tag word. 143 * Pointer to a freespace's tag word.
146 */ 144 */
147#define XFS_DIR2_DATA_UNUSED_TAG_P(dup) \
148 xfs_dir2_data_unused_tag_p(dup)
149static inline __be16 * 145static inline __be16 *
150xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup) 146xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
151{ 147{
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index db14ea71459..1b73c9ad646 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -92,7 +92,7 @@ xfs_dir2_block_to_leaf(
92 if ((error = xfs_da_grow_inode(args, &blkno))) { 92 if ((error = xfs_da_grow_inode(args, &blkno))) {
93 return error; 93 return error;
94 } 94 }
95 ldb = XFS_DIR2_DA_TO_DB(mp, blkno); 95 ldb = xfs_dir2_da_to_db(mp, blkno);
96 ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp)); 96 ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
97 /* 97 /*
98 * Initialize the leaf block, get a buffer for it. 98 * Initialize the leaf block, get a buffer for it.
@@ -104,8 +104,8 @@ xfs_dir2_block_to_leaf(
104 leaf = lbp->data; 104 leaf = lbp->data;
105 block = dbp->data; 105 block = dbp->data;
106 xfs_dir2_data_check(dp, dbp); 106 xfs_dir2_data_check(dp, dbp);
107 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 107 btp = xfs_dir2_block_tail_p(mp, block);
108 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 108 blp = xfs_dir2_block_leaf_p(btp);
109 /* 109 /*
110 * Set the counts in the leaf header. 110 * Set the counts in the leaf header.
111 */ 111 */
@@ -137,9 +137,9 @@ xfs_dir2_block_to_leaf(
137 /* 137 /*
138 * Set up leaf tail and bests table. 138 * Set up leaf tail and bests table.
139 */ 139 */
140 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 140 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
141 ltp->bestcount = cpu_to_be32(1); 141 ltp->bestcount = cpu_to_be32(1);
142 bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); 142 bestsp = xfs_dir2_leaf_bests_p(ltp);
143 bestsp[0] = block->hdr.bestfree[0].length; 143 bestsp[0] = block->hdr.bestfree[0].length;
144 /* 144 /*
145 * Log the data header and leaf bests table. 145 * Log the data header and leaf bests table.
@@ -209,9 +209,9 @@ xfs_dir2_leaf_addname(
209 */ 209 */
210 index = xfs_dir2_leaf_search_hash(args, lbp); 210 index = xfs_dir2_leaf_search_hash(args, lbp);
211 leaf = lbp->data; 211 leaf = lbp->data;
212 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 212 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
213 bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); 213 bestsp = xfs_dir2_leaf_bests_p(ltp);
214 length = XFS_DIR2_DATA_ENTSIZE(args->namelen); 214 length = xfs_dir2_data_entsize(args->namelen);
215 /* 215 /*
216 * See if there are any entries with the same hash value 216 * See if there are any entries with the same hash value
217 * and space in their block for the new entry. 217 * and space in their block for the new entry.
@@ -223,7 +223,7 @@ xfs_dir2_leaf_addname(
223 index++, lep++) { 223 index++, lep++) {
224 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) 224 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
225 continue; 225 continue;
226 i = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); 226 i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
227 ASSERT(i < be32_to_cpu(ltp->bestcount)); 227 ASSERT(i < be32_to_cpu(ltp->bestcount));
228 ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF); 228 ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
229 if (be16_to_cpu(bestsp[i]) >= length) { 229 if (be16_to_cpu(bestsp[i]) >= length) {
@@ -378,7 +378,7 @@ xfs_dir2_leaf_addname(
378 */ 378 */
379 else { 379 else {
380 if ((error = 380 if ((error =
381 xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, use_block), 381 xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
382 -1, &dbp, XFS_DATA_FORK))) { 382 -1, &dbp, XFS_DATA_FORK))) {
383 xfs_da_brelse(tp, lbp); 383 xfs_da_brelse(tp, lbp);
384 return error; 384 return error;
@@ -407,7 +407,7 @@ xfs_dir2_leaf_addname(
407 dep->inumber = cpu_to_be64(args->inumber); 407 dep->inumber = cpu_to_be64(args->inumber);
408 dep->namelen = args->namelen; 408 dep->namelen = args->namelen;
409 memcpy(dep->name, args->name, dep->namelen); 409 memcpy(dep->name, args->name, dep->namelen);
410 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 410 tagp = xfs_dir2_data_entry_tag_p(dep);
411 *tagp = cpu_to_be16((char *)dep - (char *)data); 411 *tagp = cpu_to_be16((char *)dep - (char *)data);
412 /* 412 /*
413 * Need to scan fix up the bestfree table. 413 * Need to scan fix up the bestfree table.
@@ -529,7 +529,7 @@ xfs_dir2_leaf_addname(
529 * Fill in the new leaf entry. 529 * Fill in the new leaf entry.
530 */ 530 */
531 lep->hashval = cpu_to_be32(args->hashval); 531 lep->hashval = cpu_to_be32(args->hashval);
532 lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block, 532 lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
533 be16_to_cpu(*tagp))); 533 be16_to_cpu(*tagp)));
534 /* 534 /*
535 * Log the leaf fields and give up the buffers. 535 * Log the leaf fields and give up the buffers.
@@ -567,13 +567,13 @@ xfs_dir2_leaf_check(
567 * Should factor in the size of the bests table as well. 567 * Should factor in the size of the bests table as well.
568 * We can deduce a value for that from di_size. 568 * We can deduce a value for that from di_size.
569 */ 569 */
570 ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp)); 570 ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
571 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 571 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
572 /* 572 /*
573 * Leaves and bests don't overlap. 573 * Leaves and bests don't overlap.
574 */ 574 */
575 ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <= 575 ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
576 (char *)XFS_DIR2_LEAF_BESTS_P(ltp)); 576 (char *)xfs_dir2_leaf_bests_p(ltp));
577 /* 577 /*
578 * Check hash value order, count stale entries. 578 * Check hash value order, count stale entries.
579 */ 579 */
@@ -815,12 +815,12 @@ xfs_dir2_leaf_getdents(
815 * Inside the loop we keep the main offset value as a byte offset 815 * Inside the loop we keep the main offset value as a byte offset
816 * in the directory file. 816 * in the directory file.
817 */ 817 */
818 curoff = XFS_DIR2_DATAPTR_TO_BYTE(mp, uio->uio_offset); 818 curoff = xfs_dir2_dataptr_to_byte(mp, uio->uio_offset);
819 /* 819 /*
820 * Force this conversion through db so we truncate the offset 820 * Force this conversion through db so we truncate the offset
821 * down to get the start of the data block. 821 * down to get the start of the data block.
822 */ 822 */
823 map_off = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, curoff)); 823 map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff));
824 /* 824 /*
825 * Loop over directory entries until we reach the end offset. 825 * Loop over directory entries until we reach the end offset.
826 * Get more blocks and readahead as necessary. 826 * Get more blocks and readahead as necessary.
@@ -870,7 +870,7 @@ xfs_dir2_leaf_getdents(
870 */ 870 */
871 if (1 + ra_want > map_blocks && 871 if (1 + ra_want > map_blocks &&
872 map_off < 872 map_off <
873 XFS_DIR2_BYTE_TO_DA(mp, XFS_DIR2_LEAF_OFFSET)) { 873 xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
874 /* 874 /*
875 * Get more bmaps, fill in after the ones 875 * Get more bmaps, fill in after the ones
876 * we already have in the table. 876 * we already have in the table.
@@ -878,7 +878,7 @@ xfs_dir2_leaf_getdents(
878 nmap = map_size - map_valid; 878 nmap = map_size - map_valid;
879 error = xfs_bmapi(tp, dp, 879 error = xfs_bmapi(tp, dp,
880 map_off, 880 map_off,
881 XFS_DIR2_BYTE_TO_DA(mp, 881 xfs_dir2_byte_to_da(mp,
882 XFS_DIR2_LEAF_OFFSET) - map_off, 882 XFS_DIR2_LEAF_OFFSET) - map_off,
883 XFS_BMAPI_METADATA, NULL, 0, 883 XFS_BMAPI_METADATA, NULL, 0,
884 &map[map_valid], &nmap, NULL, NULL); 884 &map[map_valid], &nmap, NULL, NULL);
@@ -903,7 +903,7 @@ xfs_dir2_leaf_getdents(
903 map[map_valid + nmap - 1].br_blockcount; 903 map[map_valid + nmap - 1].br_blockcount;
904 else 904 else
905 map_off = 905 map_off =
906 XFS_DIR2_BYTE_TO_DA(mp, 906 xfs_dir2_byte_to_da(mp,
907 XFS_DIR2_LEAF_OFFSET); 907 XFS_DIR2_LEAF_OFFSET);
908 /* 908 /*
909 * Look for holes in the mapping, and 909 * Look for holes in the mapping, and
@@ -931,14 +931,14 @@ xfs_dir2_leaf_getdents(
931 * No valid mappings, so no more data blocks. 931 * No valid mappings, so no more data blocks.
932 */ 932 */
933 if (!map_valid) { 933 if (!map_valid) {
934 curoff = XFS_DIR2_DA_TO_BYTE(mp, map_off); 934 curoff = xfs_dir2_da_to_byte(mp, map_off);
935 break; 935 break;
936 } 936 }
937 /* 937 /*
938 * Read the directory block starting at the first 938 * Read the directory block starting at the first
939 * mapping. 939 * mapping.
940 */ 940 */
941 curdb = XFS_DIR2_DA_TO_DB(mp, map->br_startoff); 941 curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
942 error = xfs_da_read_buf(tp, dp, map->br_startoff, 942 error = xfs_da_read_buf(tp, dp, map->br_startoff,
943 map->br_blockcount >= mp->m_dirblkfsbs ? 943 map->br_blockcount >= mp->m_dirblkfsbs ?
944 XFS_FSB_TO_DADDR(mp, map->br_startblock) : 944 XFS_FSB_TO_DADDR(mp, map->br_startblock) :
@@ -1014,7 +1014,7 @@ xfs_dir2_leaf_getdents(
1014 /* 1014 /*
1015 * Having done a read, we need to set a new offset. 1015 * Having done a read, we need to set a new offset.
1016 */ 1016 */
1017 newoff = XFS_DIR2_DB_OFF_TO_BYTE(mp, curdb, 0); 1017 newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0);
1018 /* 1018 /*
1019 * Start of the current block. 1019 * Start of the current block.
1020 */ 1020 */
@@ -1024,7 +1024,7 @@ xfs_dir2_leaf_getdents(
1024 * Make sure we're in the right block. 1024 * Make sure we're in the right block.
1025 */ 1025 */
1026 else if (curoff > newoff) 1026 else if (curoff > newoff)
1027 ASSERT(XFS_DIR2_BYTE_TO_DB(mp, curoff) == 1027 ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1028 curdb); 1028 curdb);
1029 data = bp->data; 1029 data = bp->data;
1030 xfs_dir2_data_check(dp, bp); 1030 xfs_dir2_data_check(dp, bp);
@@ -1032,7 +1032,7 @@ xfs_dir2_leaf_getdents(
1032 * Find our position in the block. 1032 * Find our position in the block.
1033 */ 1033 */
1034 ptr = (char *)&data->u; 1034 ptr = (char *)&data->u;
1035 byteoff = XFS_DIR2_BYTE_TO_OFF(mp, curoff); 1035 byteoff = xfs_dir2_byte_to_off(mp, curoff);
1036 /* 1036 /*
1037 * Skip past the header. 1037 * Skip past the header.
1038 */ 1038 */
@@ -1054,15 +1054,15 @@ xfs_dir2_leaf_getdents(
1054 } 1054 }
1055 dep = (xfs_dir2_data_entry_t *)ptr; 1055 dep = (xfs_dir2_data_entry_t *)ptr;
1056 length = 1056 length =
1057 XFS_DIR2_DATA_ENTSIZE(dep->namelen); 1057 xfs_dir2_data_entsize(dep->namelen);
1058 ptr += length; 1058 ptr += length;
1059 } 1059 }
1060 /* 1060 /*
1061 * Now set our real offset. 1061 * Now set our real offset.
1062 */ 1062 */
1063 curoff = 1063 curoff =
1064 XFS_DIR2_DB_OFF_TO_BYTE(mp, 1064 xfs_dir2_db_off_to_byte(mp,
1065 XFS_DIR2_BYTE_TO_DB(mp, curoff), 1065 xfs_dir2_byte_to_db(mp, curoff),
1066 (char *)ptr - (char *)data); 1066 (char *)ptr - (char *)data);
1067 if (ptr >= (char *)data + mp->m_dirblksize) { 1067 if (ptr >= (char *)data + mp->m_dirblksize) {
1068 continue; 1068 continue;
@@ -1091,9 +1091,9 @@ xfs_dir2_leaf_getdents(
1091 1091
1092 p->namelen = dep->namelen; 1092 p->namelen = dep->namelen;
1093 1093
1094 length = XFS_DIR2_DATA_ENTSIZE(p->namelen); 1094 length = xfs_dir2_data_entsize(p->namelen);
1095 1095
1096 p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length); 1096 p->cook = xfs_dir2_byte_to_dataptr(mp, curoff + length);
1097 1097
1098 p->ino = be64_to_cpu(dep->inumber); 1098 p->ino = be64_to_cpu(dep->inumber);
1099#if XFS_BIG_INUMS 1099#if XFS_BIG_INUMS
@@ -1121,10 +1121,10 @@ xfs_dir2_leaf_getdents(
1121 * All done. Set output offset value to current offset. 1121 * All done. Set output offset value to current offset.
1122 */ 1122 */
1123 *eofp = eof; 1123 *eofp = eof;
1124 if (curoff > XFS_DIR2_DATAPTR_TO_BYTE(mp, XFS_DIR2_MAX_DATAPTR)) 1124 if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
1125 uio->uio_offset = XFS_DIR2_MAX_DATAPTR; 1125 uio->uio_offset = XFS_DIR2_MAX_DATAPTR;
1126 else 1126 else
1127 uio->uio_offset = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff); 1127 uio->uio_offset = xfs_dir2_byte_to_dataptr(mp, curoff);
1128 kmem_free(map, map_size * sizeof(*map)); 1128 kmem_free(map, map_size * sizeof(*map));
1129 kmem_free(p, sizeof(*p)); 1129 kmem_free(p, sizeof(*p));
1130 if (bp) 1130 if (bp)
@@ -1159,7 +1159,7 @@ xfs_dir2_leaf_init(
1159 /* 1159 /*
1160 * Get the buffer for the block. 1160 * Get the buffer for the block.
1161 */ 1161 */
1162 error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, bno), -1, &bp, 1162 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
1163 XFS_DATA_FORK); 1163 XFS_DATA_FORK);
1164 if (error) { 1164 if (error) {
1165 return error; 1165 return error;
@@ -1181,7 +1181,7 @@ xfs_dir2_leaf_init(
1181 * the block. 1181 * the block.
1182 */ 1182 */
1183 if (magic == XFS_DIR2_LEAF1_MAGIC) { 1183 if (magic == XFS_DIR2_LEAF1_MAGIC) {
1184 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 1184 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1185 ltp->bestcount = 0; 1185 ltp->bestcount = 0;
1186 xfs_dir2_leaf_log_tail(tp, bp); 1186 xfs_dir2_leaf_log_tail(tp, bp);
1187 } 1187 }
@@ -1206,9 +1206,9 @@ xfs_dir2_leaf_log_bests(
1206 1206
1207 leaf = bp->data; 1207 leaf = bp->data;
1208 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); 1208 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
1209 ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf); 1209 ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
1210 firstb = XFS_DIR2_LEAF_BESTS_P(ltp) + first; 1210 firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1211 lastb = XFS_DIR2_LEAF_BESTS_P(ltp) + last; 1211 lastb = xfs_dir2_leaf_bests_p(ltp) + last;
1212 xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), 1212 xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
1213 (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); 1213 (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
1214} 1214}
@@ -1268,7 +1268,7 @@ xfs_dir2_leaf_log_tail(
1268 mp = tp->t_mountp; 1268 mp = tp->t_mountp;
1269 leaf = bp->data; 1269 leaf = bp->data;
1270 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); 1270 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
1271 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 1271 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1272 xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), 1272 xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1273 (uint)(mp->m_dirblksize - 1)); 1273 (uint)(mp->m_dirblksize - 1));
1274} 1274}
@@ -1312,7 +1312,7 @@ xfs_dir2_leaf_lookup(
1312 */ 1312 */
1313 dep = (xfs_dir2_data_entry_t *) 1313 dep = (xfs_dir2_data_entry_t *)
1314 ((char *)dbp->data + 1314 ((char *)dbp->data +
1315 XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address))); 1315 xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1316 /* 1316 /*
1317 * Return the found inode number. 1317 * Return the found inode number.
1318 */ 1318 */
@@ -1381,7 +1381,7 @@ xfs_dir2_leaf_lookup_int(
1381 /* 1381 /*
1382 * Get the new data block number. 1382 * Get the new data block number.
1383 */ 1383 */
1384 newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); 1384 newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1385 /* 1385 /*
1386 * If it's not the same as the old data block number, 1386 * If it's not the same as the old data block number,
1387 * need to pitch the old one and read the new one. 1387 * need to pitch the old one and read the new one.
@@ -1391,7 +1391,7 @@ xfs_dir2_leaf_lookup_int(
1391 xfs_da_brelse(tp, dbp); 1391 xfs_da_brelse(tp, dbp);
1392 if ((error = 1392 if ((error =
1393 xfs_da_read_buf(tp, dp, 1393 xfs_da_read_buf(tp, dp,
1394 XFS_DIR2_DB_TO_DA(mp, newdb), -1, &dbp, 1394 xfs_dir2_db_to_da(mp, newdb), -1, &dbp,
1395 XFS_DATA_FORK))) { 1395 XFS_DATA_FORK))) {
1396 xfs_da_brelse(tp, lbp); 1396 xfs_da_brelse(tp, lbp);
1397 return error; 1397 return error;
@@ -1404,7 +1404,7 @@ xfs_dir2_leaf_lookup_int(
1404 */ 1404 */
1405 dep = (xfs_dir2_data_entry_t *) 1405 dep = (xfs_dir2_data_entry_t *)
1406 ((char *)dbp->data + 1406 ((char *)dbp->data +
1407 XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address))); 1407 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1408 /* 1408 /*
1409 * If it matches then return it. 1409 * If it matches then return it.
1410 */ 1410 */
@@ -1469,20 +1469,20 @@ xfs_dir2_leaf_removename(
1469 * Point to the leaf entry, use that to point to the data entry. 1469 * Point to the leaf entry, use that to point to the data entry.
1470 */ 1470 */
1471 lep = &leaf->ents[index]; 1471 lep = &leaf->ents[index];
1472 db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); 1472 db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1473 dep = (xfs_dir2_data_entry_t *) 1473 dep = (xfs_dir2_data_entry_t *)
1474 ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address))); 1474 ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1475 needscan = needlog = 0; 1475 needscan = needlog = 0;
1476 oldbest = be16_to_cpu(data->hdr.bestfree[0].length); 1476 oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
1477 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 1477 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1478 bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); 1478 bestsp = xfs_dir2_leaf_bests_p(ltp);
1479 ASSERT(be16_to_cpu(bestsp[db]) == oldbest); 1479 ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
1480 /* 1480 /*
1481 * Mark the former data entry unused. 1481 * Mark the former data entry unused.
1482 */ 1482 */
1483 xfs_dir2_data_make_free(tp, dbp, 1483 xfs_dir2_data_make_free(tp, dbp,
1484 (xfs_dir2_data_aoff_t)((char *)dep - (char *)data), 1484 (xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
1485 XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); 1485 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
1486 /* 1486 /*
1487 * We just mark the leaf entry stale by putting a null in it. 1487 * We just mark the leaf entry stale by putting a null in it.
1488 */ 1488 */
@@ -1602,7 +1602,7 @@ xfs_dir2_leaf_replace(
1602 */ 1602 */
1603 dep = (xfs_dir2_data_entry_t *) 1603 dep = (xfs_dir2_data_entry_t *)
1604 ((char *)dbp->data + 1604 ((char *)dbp->data +
1605 XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address))); 1605 xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1606 ASSERT(args->inumber != be64_to_cpu(dep->inumber)); 1606 ASSERT(args->inumber != be64_to_cpu(dep->inumber));
1607 /* 1607 /*
1608 * Put the new inode number in, log it. 1608 * Put the new inode number in, log it.
@@ -1698,7 +1698,7 @@ xfs_dir2_leaf_trim_data(
1698 /* 1698 /*
1699 * Read the offending data block. We need its buffer. 1699 * Read the offending data block. We need its buffer.
1700 */ 1700 */
1701 if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, db), -1, &dbp, 1701 if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp,
1702 XFS_DATA_FORK))) { 1702 XFS_DATA_FORK))) {
1703 return error; 1703 return error;
1704 } 1704 }
@@ -1712,7 +1712,7 @@ xfs_dir2_leaf_trim_data(
1712 */ 1712 */
1713 1713
1714 leaf = lbp->data; 1714 leaf = lbp->data;
1715 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 1715 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1716 ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) == 1716 ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
1717 mp->m_dirblksize - (uint)sizeof(data->hdr)); 1717 mp->m_dirblksize - (uint)sizeof(data->hdr));
1718 ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); 1718 ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
@@ -1727,7 +1727,7 @@ xfs_dir2_leaf_trim_data(
1727 /* 1727 /*
1728 * Eliminate the last bests entry from the table. 1728 * Eliminate the last bests entry from the table.
1729 */ 1729 */
1730 bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); 1730 bestsp = xfs_dir2_leaf_bests_p(ltp);
1731 be32_add(&ltp->bestcount, -1); 1731 be32_add(&ltp->bestcount, -1);
1732 memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); 1732 memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
1733 xfs_dir2_leaf_log_tail(tp, lbp); 1733 xfs_dir2_leaf_log_tail(tp, lbp);
@@ -1838,12 +1838,12 @@ xfs_dir2_node_to_leaf(
1838 /* 1838 /*
1839 * Set up the leaf tail from the freespace block. 1839 * Set up the leaf tail from the freespace block.
1840 */ 1840 */
1841 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 1841 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1842 ltp->bestcount = free->hdr.nvalid; 1842 ltp->bestcount = free->hdr.nvalid;
1843 /* 1843 /*
1844 * Set up the leaf bests table. 1844 * Set up the leaf bests table.
1845 */ 1845 */
1846 memcpy(XFS_DIR2_LEAF_BESTS_P(ltp), free->bests, 1846 memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
1847 be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0])); 1847 be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
1848 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 1848 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1849 xfs_dir2_leaf_log_tail(tp, lbp); 1849 xfs_dir2_leaf_log_tail(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index f57ca116241..70c97f3f815 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -32,7 +32,7 @@ struct xfs_trans;
32#define XFS_DIR2_LEAF_SPACE 1 32#define XFS_DIR2_LEAF_SPACE 1
33#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) 33#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
34#define XFS_DIR2_LEAF_FIRSTDB(mp) \ 34#define XFS_DIR2_LEAF_FIRSTDB(mp) \
35 XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_LEAF_OFFSET) 35 xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
36 36
37/* 37/*
38 * Offset in data space of a data entry. 38 * Offset in data space of a data entry.
@@ -82,7 +82,6 @@ typedef struct xfs_dir2_leaf {
82 * DB blocks here are logical directory block numbers, not filesystem blocks. 82 * DB blocks here are logical directory block numbers, not filesystem blocks.
83 */ 83 */
84 84
85#define XFS_DIR2_MAX_LEAF_ENTS(mp) xfs_dir2_max_leaf_ents(mp)
86static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) 85static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
87{ 86{
88 return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / 87 return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
@@ -92,7 +91,6 @@ static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
92/* 91/*
93 * Get address of the bestcount field in the single-leaf block. 92 * Get address of the bestcount field in the single-leaf block.
94 */ 93 */
95#define XFS_DIR2_LEAF_TAIL_P(mp,lp) xfs_dir2_leaf_tail_p(mp, lp)
96static inline xfs_dir2_leaf_tail_t * 94static inline xfs_dir2_leaf_tail_t *
97xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp) 95xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
98{ 96{
@@ -104,7 +102,6 @@ xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
104/* 102/*
105 * Get address of the bests array in the single-leaf block. 103 * Get address of the bests array in the single-leaf block.
106 */ 104 */
107#define XFS_DIR2_LEAF_BESTS_P(ltp) xfs_dir2_leaf_bests_p(ltp)
108static inline __be16 * 105static inline __be16 *
109xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp) 106xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
110{ 107{
@@ -114,7 +111,6 @@ xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
114/* 111/*
115 * Convert dataptr to byte in file space 112 * Convert dataptr to byte in file space
116 */ 113 */
117#define XFS_DIR2_DATAPTR_TO_BYTE(mp,dp) xfs_dir2_dataptr_to_byte(mp, dp)
118static inline xfs_dir2_off_t 114static inline xfs_dir2_off_t
119xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) 115xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
120{ 116{
@@ -124,7 +120,6 @@ xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
124/* 120/*
125 * Convert byte in file space to dataptr. It had better be aligned. 121 * Convert byte in file space to dataptr. It had better be aligned.
126 */ 122 */
127#define XFS_DIR2_BYTE_TO_DATAPTR(mp,by) xfs_dir2_byte_to_dataptr(mp,by)
128static inline xfs_dir2_dataptr_t 123static inline xfs_dir2_dataptr_t
129xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) 124xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
130{ 125{
@@ -134,7 +129,6 @@ xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
134/* 129/*
135 * Convert byte in space to (DB) block 130 * Convert byte in space to (DB) block
136 */ 131 */
137#define XFS_DIR2_BYTE_TO_DB(mp,by) xfs_dir2_byte_to_db(mp, by)
138static inline xfs_dir2_db_t 132static inline xfs_dir2_db_t
139xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) 133xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
140{ 134{
@@ -145,17 +139,15 @@ xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
145/* 139/*
146 * Convert dataptr to a block number 140 * Convert dataptr to a block number
147 */ 141 */
148#define XFS_DIR2_DATAPTR_TO_DB(mp,dp) xfs_dir2_dataptr_to_db(mp, dp)
149static inline xfs_dir2_db_t 142static inline xfs_dir2_db_t
150xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) 143xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
151{ 144{
152 return XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp)); 145 return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
153} 146}
154 147
155/* 148/*
156 * Convert byte in space to offset in a block 149 * Convert byte in space to offset in a block
157 */ 150 */
158#define XFS_DIR2_BYTE_TO_OFF(mp,by) xfs_dir2_byte_to_off(mp, by)
159static inline xfs_dir2_data_aoff_t 151static inline xfs_dir2_data_aoff_t
160xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) 152xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
161{ 153{
@@ -166,18 +158,15 @@ xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
166/* 158/*
167 * Convert dataptr to a byte offset in a block 159 * Convert dataptr to a byte offset in a block
168 */ 160 */
169#define XFS_DIR2_DATAPTR_TO_OFF(mp,dp) xfs_dir2_dataptr_to_off(mp, dp)
170static inline xfs_dir2_data_aoff_t 161static inline xfs_dir2_data_aoff_t
171xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) 162xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
172{ 163{
173 return XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp)); 164 return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
174} 165}
175 166
176/* 167/*
177 * Convert block and offset to byte in space 168 * Convert block and offset to byte in space
178 */ 169 */
179#define XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o) \
180 xfs_dir2_db_off_to_byte(mp, db, o)
181static inline xfs_dir2_off_t 170static inline xfs_dir2_off_t
182xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, 171xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
183 xfs_dir2_data_aoff_t o) 172 xfs_dir2_data_aoff_t o)
@@ -189,7 +178,6 @@ xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
189/* 178/*
190 * Convert block (DB) to block (dablk) 179 * Convert block (DB) to block (dablk)
191 */ 180 */
192#define XFS_DIR2_DB_TO_DA(mp,db) xfs_dir2_db_to_da(mp, db)
193static inline xfs_dablk_t 181static inline xfs_dablk_t
194xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) 182xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
195{ 183{
@@ -199,29 +187,25 @@ xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
199/* 187/*
200 * Convert byte in space to (DA) block 188 * Convert byte in space to (DA) block
201 */ 189 */
202#define XFS_DIR2_BYTE_TO_DA(mp,by) xfs_dir2_byte_to_da(mp, by)
203static inline xfs_dablk_t 190static inline xfs_dablk_t
204xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) 191xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
205{ 192{
206 return XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by)); 193 return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
207} 194}
208 195
209/* 196/*
210 * Convert block and offset to dataptr 197 * Convert block and offset to dataptr
211 */ 198 */
212#define XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o) \
213 xfs_dir2_db_off_to_dataptr(mp, db, o)
214static inline xfs_dir2_dataptr_t 199static inline xfs_dir2_dataptr_t
215xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, 200xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
216 xfs_dir2_data_aoff_t o) 201 xfs_dir2_data_aoff_t o)
217{ 202{
218 return XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o)); 203 return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
219} 204}
220 205
221/* 206/*
222 * Convert block (dablk) to block (DB) 207 * Convert block (dablk) to block (DB)
223 */ 208 */
224#define XFS_DIR2_DA_TO_DB(mp,da) xfs_dir2_da_to_db(mp, da)
225static inline xfs_dir2_db_t 209static inline xfs_dir2_db_t
226xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) 210xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
227{ 211{
@@ -231,11 +215,10 @@ xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
231/* 215/*
232 * Convert block (dablk) to byte offset in space 216 * Convert block (dablk) to byte offset in space
233 */ 217 */
234#define XFS_DIR2_DA_TO_BYTE(mp,da) xfs_dir2_da_to_byte(mp, da)
235static inline xfs_dir2_off_t 218static inline xfs_dir2_off_t
236xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) 219xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
237{ 220{
238 return XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0); 221 return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
239} 222}
240 223
241/* 224/*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index d083c381993..91c61d9632c 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -136,14 +136,14 @@ xfs_dir2_leaf_to_node(
136 /* 136 /*
137 * Get the buffer for the new freespace block. 137 * Get the buffer for the new freespace block.
138 */ 138 */
139 if ((error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp, 139 if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp,
140 XFS_DATA_FORK))) { 140 XFS_DATA_FORK))) {
141 return error; 141 return error;
142 } 142 }
143 ASSERT(fbp != NULL); 143 ASSERT(fbp != NULL);
144 free = fbp->data; 144 free = fbp->data;
145 leaf = lbp->data; 145 leaf = lbp->data;
146 ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); 146 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
147 /* 147 /*
148 * Initialize the freespace block header. 148 * Initialize the freespace block header.
149 */ 149 */
@@ -155,7 +155,7 @@ xfs_dir2_leaf_to_node(
155 * Copy freespace entries from the leaf block to the new block. 155 * Copy freespace entries from the leaf block to the new block.
156 * Count active entries. 156 * Count active entries.
157 */ 157 */
158 for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P(ltp), to = free->bests; 158 for (i = n = 0, from = xfs_dir2_leaf_bests_p(ltp), to = free->bests;
159 i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { 159 i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
160 if ((off = be16_to_cpu(*from)) != NULLDATAOFF) 160 if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
161 n++; 161 n++;
@@ -215,7 +215,7 @@ xfs_dir2_leafn_add(
215 * a compact. 215 * a compact.
216 */ 216 */
217 217
218 if (be16_to_cpu(leaf->hdr.count) == XFS_DIR2_MAX_LEAF_ENTS(mp)) { 218 if (be16_to_cpu(leaf->hdr.count) == xfs_dir2_max_leaf_ents(mp)) {
219 if (!leaf->hdr.stale) 219 if (!leaf->hdr.stale)
220 return XFS_ERROR(ENOSPC); 220 return XFS_ERROR(ENOSPC);
221 compact = be16_to_cpu(leaf->hdr.stale) > 1; 221 compact = be16_to_cpu(leaf->hdr.stale) > 1;
@@ -327,7 +327,7 @@ xfs_dir2_leafn_add(
327 * Insert the new entry, log everything. 327 * Insert the new entry, log everything.
328 */ 328 */
329 lep->hashval = cpu_to_be32(args->hashval); 329 lep->hashval = cpu_to_be32(args->hashval);
330 lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, 330 lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
331 args->blkno, args->index)); 331 args->blkno, args->index));
332 xfs_dir2_leaf_log_header(tp, bp); 332 xfs_dir2_leaf_log_header(tp, bp);
333 xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh); 333 xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
@@ -352,7 +352,7 @@ xfs_dir2_leafn_check(
352 leaf = bp->data; 352 leaf = bp->data;
353 mp = dp->i_mount; 353 mp = dp->i_mount;
354 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 354 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
355 ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp)); 355 ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
356 for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) { 356 for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
357 if (i + 1 < be16_to_cpu(leaf->hdr.count)) { 357 if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
358 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= 358 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
@@ -440,7 +440,7 @@ xfs_dir2_leafn_lookup_int(
440 if (args->addname) { 440 if (args->addname) {
441 curfdb = curbp ? state->extrablk.blkno : -1; 441 curfdb = curbp ? state->extrablk.blkno : -1;
442 curdb = -1; 442 curdb = -1;
443 length = XFS_DIR2_DATA_ENTSIZE(args->namelen); 443 length = xfs_dir2_data_entsize(args->namelen);
444 if ((free = (curbp ? curbp->data : NULL))) 444 if ((free = (curbp ? curbp->data : NULL)))
445 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 445 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
446 } 446 }
@@ -465,7 +465,7 @@ xfs_dir2_leafn_lookup_int(
465 /* 465 /*
466 * Pull the data block number from the entry. 466 * Pull the data block number from the entry.
467 */ 467 */
468 newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); 468 newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
469 /* 469 /*
470 * For addname, we're looking for a place to put the new entry. 470 * For addname, we're looking for a place to put the new entry.
471 * We want to use a data block with an entry of equal 471 * We want to use a data block with an entry of equal
@@ -482,7 +482,7 @@ xfs_dir2_leafn_lookup_int(
482 * Convert the data block to the free block 482 * Convert the data block to the free block
483 * holding its freespace information. 483 * holding its freespace information.
484 */ 484 */
485 newfdb = XFS_DIR2_DB_TO_FDB(mp, newdb); 485 newfdb = xfs_dir2_db_to_fdb(mp, newdb);
486 /* 486 /*
487 * If it's not the one we have in hand, 487 * If it's not the one we have in hand,
488 * read it in. 488 * read it in.
@@ -497,7 +497,7 @@ xfs_dir2_leafn_lookup_int(
497 * Read the free block. 497 * Read the free block.
498 */ 498 */
499 if ((error = xfs_da_read_buf(tp, dp, 499 if ((error = xfs_da_read_buf(tp, dp,
500 XFS_DIR2_DB_TO_DA(mp, 500 xfs_dir2_db_to_da(mp,
501 newfdb), 501 newfdb),
502 -1, &curbp, 502 -1, &curbp,
503 XFS_DATA_FORK))) { 503 XFS_DATA_FORK))) {
@@ -517,7 +517,7 @@ xfs_dir2_leafn_lookup_int(
517 /* 517 /*
518 * Get the index for our entry. 518 * Get the index for our entry.
519 */ 519 */
520 fi = XFS_DIR2_DB_TO_FDINDEX(mp, curdb); 520 fi = xfs_dir2_db_to_fdindex(mp, curdb);
521 /* 521 /*
522 * If it has room, return it. 522 * If it has room, return it.
523 */ 523 */
@@ -561,7 +561,7 @@ xfs_dir2_leafn_lookup_int(
561 */ 561 */
562 if ((error = 562 if ((error =
563 xfs_da_read_buf(tp, dp, 563 xfs_da_read_buf(tp, dp,
564 XFS_DIR2_DB_TO_DA(mp, newdb), -1, 564 xfs_dir2_db_to_da(mp, newdb), -1,
565 &curbp, XFS_DATA_FORK))) { 565 &curbp, XFS_DATA_FORK))) {
566 return error; 566 return error;
567 } 567 }
@@ -573,7 +573,7 @@ xfs_dir2_leafn_lookup_int(
573 */ 573 */
574 dep = (xfs_dir2_data_entry_t *) 574 dep = (xfs_dir2_data_entry_t *)
575 ((char *)curbp->data + 575 ((char *)curbp->data +
576 XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address))); 576 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
577 /* 577 /*
578 * Compare the entry, return it if it matches. 578 * Compare the entry, return it if it matches.
579 */ 579 */
@@ -876,9 +876,9 @@ xfs_dir2_leafn_remove(
876 /* 876 /*
877 * Extract the data block and offset from the entry. 877 * Extract the data block and offset from the entry.
878 */ 878 */
879 db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); 879 db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
880 ASSERT(dblk->blkno == db); 880 ASSERT(dblk->blkno == db);
881 off = XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)); 881 off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address));
882 ASSERT(dblk->index == off); 882 ASSERT(dblk->index == off);
883 /* 883 /*
884 * Kill the leaf entry by marking it stale. 884 * Kill the leaf entry by marking it stale.
@@ -898,7 +898,7 @@ xfs_dir2_leafn_remove(
898 longest = be16_to_cpu(data->hdr.bestfree[0].length); 898 longest = be16_to_cpu(data->hdr.bestfree[0].length);
899 needlog = needscan = 0; 899 needlog = needscan = 0;
900 xfs_dir2_data_make_free(tp, dbp, off, 900 xfs_dir2_data_make_free(tp, dbp, off,
901 XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); 901 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
902 /* 902 /*
903 * Rescan the data block freespaces for bestfree. 903 * Rescan the data block freespaces for bestfree.
904 * Log the data block header if needed. 904 * Log the data block header if needed.
@@ -924,8 +924,8 @@ xfs_dir2_leafn_remove(
924 * Convert the data block number to a free block, 924 * Convert the data block number to a free block,
925 * read in the free block. 925 * read in the free block.
926 */ 926 */
927 fdb = XFS_DIR2_DB_TO_FDB(mp, db); 927 fdb = xfs_dir2_db_to_fdb(mp, db);
928 if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), 928 if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb),
929 -1, &fbp, XFS_DATA_FORK))) { 929 -1, &fbp, XFS_DATA_FORK))) {
930 return error; 930 return error;
931 } 931 }
@@ -937,7 +937,7 @@ xfs_dir2_leafn_remove(
937 /* 937 /*
938 * Calculate which entry we need to fix. 938 * Calculate which entry we need to fix.
939 */ 939 */
940 findex = XFS_DIR2_DB_TO_FDINDEX(mp, db); 940 findex = xfs_dir2_db_to_fdindex(mp, db);
941 longest = be16_to_cpu(data->hdr.bestfree[0].length); 941 longest = be16_to_cpu(data->hdr.bestfree[0].length);
942 /* 942 /*
943 * If the data block is now empty we can get rid of it 943 * If the data block is now empty we can get rid of it
@@ -1073,7 +1073,7 @@ xfs_dir2_leafn_split(
1073 /* 1073 /*
1074 * Initialize the new leaf block. 1074 * Initialize the new leaf block.
1075 */ 1075 */
1076 error = xfs_dir2_leaf_init(args, XFS_DIR2_DA_TO_DB(mp, blkno), 1076 error = xfs_dir2_leaf_init(args, xfs_dir2_da_to_db(mp, blkno),
1077 &newblk->bp, XFS_DIR2_LEAFN_MAGIC); 1077 &newblk->bp, XFS_DIR2_LEAFN_MAGIC);
1078 if (error) { 1078 if (error) {
1079 return error; 1079 return error;
@@ -1385,7 +1385,7 @@ xfs_dir2_node_addname_int(
1385 dp = args->dp; 1385 dp = args->dp;
1386 mp = dp->i_mount; 1386 mp = dp->i_mount;
1387 tp = args->trans; 1387 tp = args->trans;
1388 length = XFS_DIR2_DATA_ENTSIZE(args->namelen); 1388 length = xfs_dir2_data_entsize(args->namelen);
1389 /* 1389 /*
1390 * If we came in with a freespace block that means that lookup 1390 * If we came in with a freespace block that means that lookup
1391 * found an entry with our hash value. This is the freespace 1391 * found an entry with our hash value. This is the freespace
@@ -1438,7 +1438,7 @@ xfs_dir2_node_addname_int(
1438 1438
1439 if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) 1439 if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)))
1440 return error; 1440 return error;
1441 lastfbno = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo); 1441 lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo);
1442 fbno = ifbno; 1442 fbno = ifbno;
1443 } 1443 }
1444 /* 1444 /*
@@ -1474,7 +1474,7 @@ xfs_dir2_node_addname_int(
1474 * to avoid it. 1474 * to avoid it.
1475 */ 1475 */
1476 if ((error = xfs_da_read_buf(tp, dp, 1476 if ((error = xfs_da_read_buf(tp, dp,
1477 XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp, 1477 xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
1478 XFS_DATA_FORK))) { 1478 XFS_DATA_FORK))) {
1479 return error; 1479 return error;
1480 } 1480 }
@@ -1550,9 +1550,9 @@ xfs_dir2_node_addname_int(
1550 * Get the freespace block corresponding to the data block 1550 * Get the freespace block corresponding to the data block
1551 * that was just allocated. 1551 * that was just allocated.
1552 */ 1552 */
1553 fbno = XFS_DIR2_DB_TO_FDB(mp, dbno); 1553 fbno = xfs_dir2_db_to_fdb(mp, dbno);
1554 if (unlikely(error = xfs_da_read_buf(tp, dp, 1554 if (unlikely(error = xfs_da_read_buf(tp, dp,
1555 XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp, 1555 xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
1556 XFS_DATA_FORK))) { 1556 XFS_DATA_FORK))) {
1557 xfs_da_buf_done(dbp); 1557 xfs_da_buf_done(dbp);
1558 return error; 1558 return error;
@@ -1567,14 +1567,14 @@ xfs_dir2_node_addname_int(
1567 return error; 1567 return error;
1568 } 1568 }
1569 1569
1570 if (unlikely(XFS_DIR2_DB_TO_FDB(mp, dbno) != fbno)) { 1570 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
1571 cmn_err(CE_ALERT, 1571 cmn_err(CE_ALERT,
1572 "xfs_dir2_node_addname_int: dir ino " 1572 "xfs_dir2_node_addname_int: dir ino "
1573 "%llu needed freesp block %lld for\n" 1573 "%llu needed freesp block %lld for\n"
1574 " data block %lld, got %lld\n" 1574 " data block %lld, got %lld\n"
1575 " ifbno %llu lastfbno %d\n", 1575 " ifbno %llu lastfbno %d\n",
1576 (unsigned long long)dp->i_ino, 1576 (unsigned long long)dp->i_ino,
1577 (long long)XFS_DIR2_DB_TO_FDB(mp, dbno), 1577 (long long)xfs_dir2_db_to_fdb(mp, dbno),
1578 (long long)dbno, (long long)fbno, 1578 (long long)dbno, (long long)fbno,
1579 (unsigned long long)ifbno, lastfbno); 1579 (unsigned long long)ifbno, lastfbno);
1580 if (fblk) { 1580 if (fblk) {
@@ -1598,7 +1598,7 @@ xfs_dir2_node_addname_int(
1598 * Get a buffer for the new block. 1598 * Get a buffer for the new block.
1599 */ 1599 */
1600 if ((error = xfs_da_get_buf(tp, dp, 1600 if ((error = xfs_da_get_buf(tp, dp,
1601 XFS_DIR2_DB_TO_DA(mp, fbno), 1601 xfs_dir2_db_to_da(mp, fbno),
1602 -1, &fbp, XFS_DATA_FORK))) { 1602 -1, &fbp, XFS_DATA_FORK))) {
1603 return error; 1603 return error;
1604 } 1604 }
@@ -1623,7 +1623,7 @@ xfs_dir2_node_addname_int(
1623 /* 1623 /*
1624 * Set the freespace block index from the data block number. 1624 * Set the freespace block index from the data block number.
1625 */ 1625 */
1626 findex = XFS_DIR2_DB_TO_FDINDEX(mp, dbno); 1626 findex = xfs_dir2_db_to_fdindex(mp, dbno);
1627 /* 1627 /*
1628 * If it's after the end of the current entries in the 1628 * If it's after the end of the current entries in the
1629 * freespace block, extend that table. 1629 * freespace block, extend that table.
@@ -1669,7 +1669,7 @@ xfs_dir2_node_addname_int(
1669 * Read the data block in. 1669 * Read the data block in.
1670 */ 1670 */
1671 if (unlikely( 1671 if (unlikely(
1672 error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, dbno), 1672 error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
1673 -1, &dbp, XFS_DATA_FORK))) { 1673 -1, &dbp, XFS_DATA_FORK))) {
1674 if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) 1674 if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
1675 xfs_da_buf_done(fbp); 1675 xfs_da_buf_done(fbp);
@@ -1698,7 +1698,7 @@ xfs_dir2_node_addname_int(
1698 dep->inumber = cpu_to_be64(args->inumber); 1698 dep->inumber = cpu_to_be64(args->inumber);
1699 dep->namelen = args->namelen; 1699 dep->namelen = args->namelen;
1700 memcpy(dep->name, args->name, dep->namelen); 1700 memcpy(dep->name, args->name, dep->namelen);
1701 tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); 1701 tagp = xfs_dir2_data_entry_tag_p(dep);
1702 *tagp = cpu_to_be16((char *)dep - (char *)data); 1702 *tagp = cpu_to_be16((char *)dep - (char *)data);
1703 xfs_dir2_data_log_entry(tp, dbp, dep); 1703 xfs_dir2_data_log_entry(tp, dbp, dep);
1704 /* 1704 /*
@@ -1904,7 +1904,7 @@ xfs_dir2_node_replace(
1904 ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); 1904 ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
1905 dep = (xfs_dir2_data_entry_t *) 1905 dep = (xfs_dir2_data_entry_t *)
1906 ((char *)data + 1906 ((char *)data +
1907 XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address))); 1907 xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
1908 ASSERT(inum != be64_to_cpu(dep->inumber)); 1908 ASSERT(inum != be64_to_cpu(dep->inumber));
1909 /* 1909 /*
1910 * Fill in the new inode number and log the entry. 1910 * Fill in the new inode number and log the entry.
@@ -1980,7 +1980,7 @@ xfs_dir2_node_trim_free(
1980 * Blow the block away. 1980 * Blow the block away.
1981 */ 1981 */
1982 if ((error = 1982 if ((error =
1983 xfs_dir2_shrink_inode(args, XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo), 1983 xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo),
1984 bp))) { 1984 bp))) {
1985 /* 1985 /*
1986 * Can't fail with ENOSPC since that only happens with no 1986 * Can't fail with ENOSPC since that only happens with no
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index c7c870ee785..dde72db3d69 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -36,7 +36,7 @@ struct xfs_trans;
36#define XFS_DIR2_FREE_SPACE 2 36#define XFS_DIR2_FREE_SPACE 2
37#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) 37#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
38#define XFS_DIR2_FREE_FIRSTDB(mp) \ 38#define XFS_DIR2_FREE_FIRSTDB(mp) \
39 XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_FREE_OFFSET) 39 xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
40 40
41#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */ 41#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */
42 42
@@ -60,7 +60,6 @@ typedef struct xfs_dir2_free {
60/* 60/*
61 * Convert data space db to the corresponding free db. 61 * Convert data space db to the corresponding free db.
62 */ 62 */
63#define XFS_DIR2_DB_TO_FDB(mp,db) xfs_dir2_db_to_fdb(mp, db)
64static inline xfs_dir2_db_t 63static inline xfs_dir2_db_t
65xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) 64xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
66{ 65{
@@ -70,7 +69,6 @@ xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
70/* 69/*
71 * Convert data space db to the corresponding index in a free db. 70 * Convert data space db to the corresponding index in a free db.
72 */ 71 */
73#define XFS_DIR2_DB_TO_FDINDEX(mp,db) xfs_dir2_db_to_fdindex(mp, db)
74static inline int 72static inline int
75xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) 73xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
76{ 74{
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 0cd77b17bf9..38fc4f22b76 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -89,8 +89,8 @@ xfs_dir2_block_sfsize(
89 mp = dp->i_mount; 89 mp = dp->i_mount;
90 90
91 count = i8count = namelen = 0; 91 count = i8count = namelen = 0;
92 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 92 btp = xfs_dir2_block_tail_p(mp, block);
93 blp = XFS_DIR2_BLOCK_LEAF_P(btp); 93 blp = xfs_dir2_block_leaf_p(btp);
94 94
95 /* 95 /*
96 * Iterate over the block's data entries by using the leaf pointers. 96 * Iterate over the block's data entries by using the leaf pointers.
@@ -102,7 +102,7 @@ xfs_dir2_block_sfsize(
102 * Calculate the pointer to the entry at hand. 102 * Calculate the pointer to the entry at hand.
103 */ 103 */
104 dep = (xfs_dir2_data_entry_t *) 104 dep = (xfs_dir2_data_entry_t *)
105 ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr)); 105 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
106 /* 106 /*
107 * Detect . and .., so we can special-case them. 107 * Detect . and .., so we can special-case them.
108 * . is not included in sf directories. 108 * . is not included in sf directories.
@@ -124,7 +124,7 @@ xfs_dir2_block_sfsize(
124 /* 124 /*
125 * Calculate the new size, see if we should give up yet. 125 * Calculate the new size, see if we should give up yet.
126 */ 126 */
127 size = XFS_DIR2_SF_HDR_SIZE(i8count) + /* header */ 127 size = xfs_dir2_sf_hdr_size(i8count) + /* header */
128 count + /* namelen */ 128 count + /* namelen */
129 count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */ 129 count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */
130 namelen + /* name */ 130 namelen + /* name */
@@ -139,7 +139,7 @@ xfs_dir2_block_sfsize(
139 */ 139 */
140 sfhp->count = count; 140 sfhp->count = count;
141 sfhp->i8count = i8count; 141 sfhp->i8count = i8count;
142 XFS_DIR2_SF_PUT_INUMBER((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent); 142 xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
143 return size; 143 return size;
144} 144}
145 145
@@ -199,15 +199,15 @@ xfs_dir2_block_to_sf(
199 * Copy the header into the newly allocate local space. 199 * Copy the header into the newly allocate local space.
200 */ 200 */
201 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 201 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
202 memcpy(sfp, sfhp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count)); 202 memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
203 dp->i_d.di_size = size; 203 dp->i_d.di_size = size;
204 /* 204 /*
205 * Set up to loop over the block's entries. 205 * Set up to loop over the block's entries.
206 */ 206 */
207 btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); 207 btp = xfs_dir2_block_tail_p(mp, block);
208 ptr = (char *)block->u; 208 ptr = (char *)block->u;
209 endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); 209 endptr = (char *)xfs_dir2_block_leaf_p(btp);
210 sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 210 sfep = xfs_dir2_sf_firstentry(sfp);
211 /* 211 /*
212 * Loop over the active and unused entries. 212 * Loop over the active and unused entries.
213 * Stop when we reach the leaf/tail portion of the block. 213 * Stop when we reach the leaf/tail portion of the block.
@@ -233,22 +233,22 @@ xfs_dir2_block_to_sf(
233 else if (dep->namelen == 2 && 233 else if (dep->namelen == 2 &&
234 dep->name[0] == '.' && dep->name[1] == '.') 234 dep->name[0] == '.' && dep->name[1] == '.')
235 ASSERT(be64_to_cpu(dep->inumber) == 235 ASSERT(be64_to_cpu(dep->inumber) ==
236 XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); 236 xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
237 /* 237 /*
238 * Normal entry, copy it into shortform. 238 * Normal entry, copy it into shortform.
239 */ 239 */
240 else { 240 else {
241 sfep->namelen = dep->namelen; 241 sfep->namelen = dep->namelen;
242 XFS_DIR2_SF_PUT_OFFSET(sfep, 242 xfs_dir2_sf_put_offset(sfep,
243 (xfs_dir2_data_aoff_t) 243 (xfs_dir2_data_aoff_t)
244 ((char *)dep - (char *)block)); 244 ((char *)dep - (char *)block));
245 memcpy(sfep->name, dep->name, dep->namelen); 245 memcpy(sfep->name, dep->name, dep->namelen);
246 temp = be64_to_cpu(dep->inumber); 246 temp = be64_to_cpu(dep->inumber);
247 XFS_DIR2_SF_PUT_INUMBER(sfp, &temp, 247 xfs_dir2_sf_put_inumber(sfp, &temp,
248 XFS_DIR2_SF_INUMBERP(sfep)); 248 xfs_dir2_sf_inumberp(sfep));
249 sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); 249 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
250 } 250 }
251 ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); 251 ptr += xfs_dir2_data_entsize(dep->namelen);
252 } 252 }
253 ASSERT((char *)sfep - (char *)sfp == size); 253 ASSERT((char *)sfep - (char *)sfp == size);
254 xfs_dir2_sf_check(args); 254 xfs_dir2_sf_check(args);
@@ -294,11 +294,11 @@ xfs_dir2_sf_addname(
294 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 294 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
295 ASSERT(dp->i_df.if_u1.if_data != NULL); 295 ASSERT(dp->i_df.if_u1.if_data != NULL);
296 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 296 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
297 ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 297 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
298 /* 298 /*
299 * Compute entry (and change in) size. 299 * Compute entry (and change in) size.
300 */ 300 */
301 add_entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen); 301 add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
302 incr_isize = add_entsize; 302 incr_isize = add_entsize;
303 objchange = 0; 303 objchange = 0;
304#if XFS_BIG_INUMS 304#if XFS_BIG_INUMS
@@ -392,7 +392,7 @@ xfs_dir2_sf_addname_easy(
392 /* 392 /*
393 * Grow the in-inode space. 393 * Grow the in-inode space.
394 */ 394 */
395 xfs_idata_realloc(dp, XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen), 395 xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen),
396 XFS_DATA_FORK); 396 XFS_DATA_FORK);
397 /* 397 /*
398 * Need to set up again due to realloc of the inode data. 398 * Need to set up again due to realloc of the inode data.
@@ -403,10 +403,10 @@ xfs_dir2_sf_addname_easy(
403 * Fill in the new entry. 403 * Fill in the new entry.
404 */ 404 */
405 sfep->namelen = args->namelen; 405 sfep->namelen = args->namelen;
406 XFS_DIR2_SF_PUT_OFFSET(sfep, offset); 406 xfs_dir2_sf_put_offset(sfep, offset);
407 memcpy(sfep->name, args->name, sfep->namelen); 407 memcpy(sfep->name, args->name, sfep->namelen);
408 XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, 408 xfs_dir2_sf_put_inumber(sfp, &args->inumber,
409 XFS_DIR2_SF_INUMBERP(sfep)); 409 xfs_dir2_sf_inumberp(sfep));
410 /* 410 /*
411 * Update the header and inode. 411 * Update the header and inode.
412 */ 412 */
@@ -463,14 +463,14 @@ xfs_dir2_sf_addname_hard(
463 * If it's going to end up at the end then oldsfep will point there. 463 * If it's going to end up at the end then oldsfep will point there.
464 */ 464 */
465 for (offset = XFS_DIR2_DATA_FIRST_OFFSET, 465 for (offset = XFS_DIR2_DATA_FIRST_OFFSET,
466 oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp), 466 oldsfep = xfs_dir2_sf_firstentry(oldsfp),
467 add_datasize = XFS_DIR2_DATA_ENTSIZE(args->namelen), 467 add_datasize = xfs_dir2_data_entsize(args->namelen),
468 eof = (char *)oldsfep == &buf[old_isize]; 468 eof = (char *)oldsfep == &buf[old_isize];
469 !eof; 469 !eof;
470 offset = new_offset + XFS_DIR2_DATA_ENTSIZE(oldsfep->namelen), 470 offset = new_offset + xfs_dir2_data_entsize(oldsfep->namelen),
471 oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep), 471 oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep),
472 eof = (char *)oldsfep == &buf[old_isize]) { 472 eof = (char *)oldsfep == &buf[old_isize]) {
473 new_offset = XFS_DIR2_SF_GET_OFFSET(oldsfep); 473 new_offset = xfs_dir2_sf_get_offset(oldsfep);
474 if (offset + add_datasize <= new_offset) 474 if (offset + add_datasize <= new_offset)
475 break; 475 break;
476 } 476 }
@@ -495,10 +495,10 @@ xfs_dir2_sf_addname_hard(
495 * Fill in the new entry, and update the header counts. 495 * Fill in the new entry, and update the header counts.
496 */ 496 */
497 sfep->namelen = args->namelen; 497 sfep->namelen = args->namelen;
498 XFS_DIR2_SF_PUT_OFFSET(sfep, offset); 498 xfs_dir2_sf_put_offset(sfep, offset);
499 memcpy(sfep->name, args->name, sfep->namelen); 499 memcpy(sfep->name, args->name, sfep->namelen);
500 XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, 500 xfs_dir2_sf_put_inumber(sfp, &args->inumber,
501 XFS_DIR2_SF_INUMBERP(sfep)); 501 xfs_dir2_sf_inumberp(sfep));
502 sfp->hdr.count++; 502 sfp->hdr.count++;
503#if XFS_BIG_INUMS 503#if XFS_BIG_INUMS
504 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) 504 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
@@ -508,7 +508,7 @@ xfs_dir2_sf_addname_hard(
508 * If there's more left to copy, do that. 508 * If there's more left to copy, do that.
509 */ 509 */
510 if (!eof) { 510 if (!eof) {
511 sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); 511 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
512 memcpy(sfep, oldsfep, old_isize - nbytes); 512 memcpy(sfep, oldsfep, old_isize - nbytes);
513 } 513 }
514 kmem_free(buf, old_isize); 514 kmem_free(buf, old_isize);
@@ -544,9 +544,9 @@ xfs_dir2_sf_addname_pick(
544 mp = dp->i_mount; 544 mp = dp->i_mount;
545 545
546 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 546 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
547 size = XFS_DIR2_DATA_ENTSIZE(args->namelen); 547 size = xfs_dir2_data_entsize(args->namelen);
548 offset = XFS_DIR2_DATA_FIRST_OFFSET; 548 offset = XFS_DIR2_DATA_FIRST_OFFSET;
549 sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 549 sfep = xfs_dir2_sf_firstentry(sfp);
550 holefit = 0; 550 holefit = 0;
551 /* 551 /*
552 * Loop over sf entries. 552 * Loop over sf entries.
@@ -555,10 +555,10 @@ xfs_dir2_sf_addname_pick(
555 */ 555 */
556 for (i = 0; i < sfp->hdr.count; i++) { 556 for (i = 0; i < sfp->hdr.count; i++) {
557 if (!holefit) 557 if (!holefit)
558 holefit = offset + size <= XFS_DIR2_SF_GET_OFFSET(sfep); 558 holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
559 offset = XFS_DIR2_SF_GET_OFFSET(sfep) + 559 offset = xfs_dir2_sf_get_offset(sfep) +
560 XFS_DIR2_DATA_ENTSIZE(sfep->namelen); 560 xfs_dir2_data_entsize(sfep->namelen);
561 sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); 561 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
562 } 562 }
563 /* 563 /*
564 * Calculate data bytes used excluding the new entry, if this 564 * Calculate data bytes used excluding the new entry, if this
@@ -617,18 +617,18 @@ xfs_dir2_sf_check(
617 617
618 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 618 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
619 offset = XFS_DIR2_DATA_FIRST_OFFSET; 619 offset = XFS_DIR2_DATA_FIRST_OFFSET;
620 ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); 620 ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
621 i8count = ino > XFS_DIR2_MAX_SHORT_INUM; 621 i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
622 622
623 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 623 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
624 i < sfp->hdr.count; 624 i < sfp->hdr.count;
625 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { 625 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
626 ASSERT(XFS_DIR2_SF_GET_OFFSET(sfep) >= offset); 626 ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
627 ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep)); 627 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
628 i8count += ino > XFS_DIR2_MAX_SHORT_INUM; 628 i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
629 offset = 629 offset =
630 XFS_DIR2_SF_GET_OFFSET(sfep) + 630 xfs_dir2_sf_get_offset(sfep) +
631 XFS_DIR2_DATA_ENTSIZE(sfep->namelen); 631 xfs_dir2_data_entsize(sfep->namelen);
632 } 632 }
633 ASSERT(i8count == sfp->hdr.i8count); 633 ASSERT(i8count == sfp->hdr.i8count);
634 ASSERT(XFS_BIG_INUMS || i8count == 0); 634 ASSERT(XFS_BIG_INUMS || i8count == 0);
@@ -671,7 +671,7 @@ xfs_dir2_sf_create(
671 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 671 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
672 ASSERT(dp->i_df.if_bytes == 0); 672 ASSERT(dp->i_df.if_bytes == 0);
673 i8count = pino > XFS_DIR2_MAX_SHORT_INUM; 673 i8count = pino > XFS_DIR2_MAX_SHORT_INUM;
674 size = XFS_DIR2_SF_HDR_SIZE(i8count); 674 size = xfs_dir2_sf_hdr_size(i8count);
675 /* 675 /*
676 * Make a buffer for the data. 676 * Make a buffer for the data.
677 */ 677 */
@@ -684,7 +684,7 @@ xfs_dir2_sf_create(
684 /* 684 /*
685 * Now can put in the inode number, since i8count is set. 685 * Now can put in the inode number, since i8count is set.
686 */ 686 */
687 XFS_DIR2_SF_PUT_INUMBER(sfp, &pino, &sfp->hdr.parent); 687 xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent);
688 sfp->hdr.count = 0; 688 sfp->hdr.count = 0;
689 dp->i_d.di_size = size; 689 dp->i_d.di_size = size;
690 xfs_dir2_sf_check(args); 690 xfs_dir2_sf_check(args);
@@ -727,12 +727,12 @@ xfs_dir2_sf_getdents(
727 727
728 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 728 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
729 729
730 ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 730 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
731 731
732 /* 732 /*
733 * If the block number in the offset is out of range, we're done. 733 * If the block number in the offset is out of range, we're done.
734 */ 734 */
735 if (XFS_DIR2_DATAPTR_TO_DB(mp, dir_offset) > mp->m_dirdatablk) { 735 if (xfs_dir2_dataptr_to_db(mp, dir_offset) > mp->m_dirdatablk) {
736 *eofp = 1; 736 *eofp = 1;
737 return 0; 737 return 0;
738 } 738 }
@@ -747,9 +747,9 @@ xfs_dir2_sf_getdents(
747 * Put . entry unless we're starting past it. 747 * Put . entry unless we're starting past it.
748 */ 748 */
749 if (dir_offset <= 749 if (dir_offset <=
750 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 750 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
751 XFS_DIR2_DATA_DOT_OFFSET)) { 751 XFS_DIR2_DATA_DOT_OFFSET)) {
752 p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, 0, 752 p.cook = xfs_dir2_db_off_to_dataptr(mp, 0,
753 XFS_DIR2_DATA_DOTDOT_OFFSET); 753 XFS_DIR2_DATA_DOTDOT_OFFSET);
754 p.ino = dp->i_ino; 754 p.ino = dp->i_ino;
755#if XFS_BIG_INUMS 755#if XFS_BIG_INUMS
@@ -762,7 +762,7 @@ xfs_dir2_sf_getdents(
762 762
763 if (!p.done) { 763 if (!p.done) {
764 uio->uio_offset = 764 uio->uio_offset =
765 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 765 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
766 XFS_DIR2_DATA_DOT_OFFSET); 766 XFS_DIR2_DATA_DOT_OFFSET);
767 return error; 767 return error;
768 } 768 }
@@ -772,11 +772,11 @@ xfs_dir2_sf_getdents(
772 * Put .. entry unless we're starting past it. 772 * Put .. entry unless we're starting past it.
773 */ 773 */
774 if (dir_offset <= 774 if (dir_offset <=
775 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 775 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
776 XFS_DIR2_DATA_DOTDOT_OFFSET)) { 776 XFS_DIR2_DATA_DOTDOT_OFFSET)) {
777 p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 777 p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
778 XFS_DIR2_DATA_FIRST_OFFSET); 778 XFS_DIR2_DATA_FIRST_OFFSET);
779 p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); 779 p.ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
780#if XFS_BIG_INUMS 780#if XFS_BIG_INUMS
781 p.ino += mp->m_inoadd; 781 p.ino += mp->m_inoadd;
782#endif 782#endif
@@ -787,7 +787,7 @@ xfs_dir2_sf_getdents(
787 787
788 if (!p.done) { 788 if (!p.done) {
789 uio->uio_offset = 789 uio->uio_offset =
790 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 790 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
791 XFS_DIR2_DATA_DOTDOT_OFFSET); 791 XFS_DIR2_DATA_DOTDOT_OFFSET);
792 return error; 792 return error;
793 } 793 }
@@ -796,23 +796,23 @@ xfs_dir2_sf_getdents(
796 /* 796 /*
797 * Loop while there are more entries and put'ing works. 797 * Loop while there are more entries and put'ing works.
798 */ 798 */
799 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 799 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
800 i < sfp->hdr.count; 800 i < sfp->hdr.count;
801 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { 801 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
802 802
803 off = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 803 off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
804 XFS_DIR2_SF_GET_OFFSET(sfep)); 804 xfs_dir2_sf_get_offset(sfep));
805 805
806 if (dir_offset > off) 806 if (dir_offset > off)
807 continue; 807 continue;
808 808
809 p.namelen = sfep->namelen; 809 p.namelen = sfep->namelen;
810 810
811 p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, 811 p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
812 XFS_DIR2_SF_GET_OFFSET(sfep) + 812 xfs_dir2_sf_get_offset(sfep) +
813 XFS_DIR2_DATA_ENTSIZE(p.namelen)); 813 xfs_dir2_data_entsize(p.namelen));
814 814
815 p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep)); 815 p.ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
816#if XFS_BIG_INUMS 816#if XFS_BIG_INUMS
817 p.ino += mp->m_inoadd; 817 p.ino += mp->m_inoadd;
818#endif 818#endif
@@ -832,7 +832,7 @@ xfs_dir2_sf_getdents(
832 *eofp = 1; 832 *eofp = 1;
833 833
834 uio->uio_offset = 834 uio->uio_offset =
835 XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0); 835 xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0);
836 836
837 return 0; 837 return 0;
838} 838}
@@ -865,7 +865,7 @@ xfs_dir2_sf_lookup(
865 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 865 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
866 ASSERT(dp->i_df.if_u1.if_data != NULL); 866 ASSERT(dp->i_df.if_u1.if_data != NULL);
867 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 867 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
868 ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 868 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
869 /* 869 /*
870 * Special case for . 870 * Special case for .
871 */ 871 */
@@ -878,21 +878,21 @@ xfs_dir2_sf_lookup(
878 */ 878 */
879 if (args->namelen == 2 && 879 if (args->namelen == 2 &&
880 args->name[0] == '.' && args->name[1] == '.') { 880 args->name[0] == '.' && args->name[1] == '.') {
881 args->inumber = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); 881 args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
882 return XFS_ERROR(EEXIST); 882 return XFS_ERROR(EEXIST);
883 } 883 }
884 /* 884 /*
885 * Loop over all the entries trying to match ours. 885 * Loop over all the entries trying to match ours.
886 */ 886 */
887 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 887 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
888 i < sfp->hdr.count; 888 i < sfp->hdr.count;
889 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { 889 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
890 if (sfep->namelen == args->namelen && 890 if (sfep->namelen == args->namelen &&
891 sfep->name[0] == args->name[0] && 891 sfep->name[0] == args->name[0] &&
892 memcmp(args->name, sfep->name, args->namelen) == 0) { 892 memcmp(args->name, sfep->name, args->namelen) == 0) {
893 args->inumber = 893 args->inumber =
894 XFS_DIR2_SF_GET_INUMBER(sfp, 894 xfs_dir2_sf_get_inumber(sfp,
895 XFS_DIR2_SF_INUMBERP(sfep)); 895 xfs_dir2_sf_inumberp(sfep));
896 return XFS_ERROR(EEXIST); 896 return XFS_ERROR(EEXIST);
897 } 897 }
898 } 898 }
@@ -934,19 +934,19 @@ xfs_dir2_sf_removename(
934 ASSERT(dp->i_df.if_bytes == oldsize); 934 ASSERT(dp->i_df.if_bytes == oldsize);
935 ASSERT(dp->i_df.if_u1.if_data != NULL); 935 ASSERT(dp->i_df.if_u1.if_data != NULL);
936 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 936 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
937 ASSERT(oldsize >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 937 ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
938 /* 938 /*
939 * Loop over the old directory entries. 939 * Loop over the old directory entries.
940 * Find the one we're deleting. 940 * Find the one we're deleting.
941 */ 941 */
942 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 942 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
943 i < sfp->hdr.count; 943 i < sfp->hdr.count;
944 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { 944 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
945 if (sfep->namelen == args->namelen && 945 if (sfep->namelen == args->namelen &&
946 sfep->name[0] == args->name[0] && 946 sfep->name[0] == args->name[0] &&
947 memcmp(sfep->name, args->name, args->namelen) == 0) { 947 memcmp(sfep->name, args->name, args->namelen) == 0) {
948 ASSERT(XFS_DIR2_SF_GET_INUMBER(sfp, 948 ASSERT(xfs_dir2_sf_get_inumber(sfp,
949 XFS_DIR2_SF_INUMBERP(sfep)) == 949 xfs_dir2_sf_inumberp(sfep)) ==
950 args->inumber); 950 args->inumber);
951 break; 951 break;
952 } 952 }
@@ -961,7 +961,7 @@ xfs_dir2_sf_removename(
961 * Calculate sizes. 961 * Calculate sizes.
962 */ 962 */
963 byteoff = (int)((char *)sfep - (char *)sfp); 963 byteoff = (int)((char *)sfep - (char *)sfp);
964 entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen); 964 entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
965 newsize = oldsize - entsize; 965 newsize = oldsize - entsize;
966 /* 966 /*
967 * Copy the part if any after the removed entry, sliding it down. 967 * Copy the part if any after the removed entry, sliding it down.
@@ -1027,7 +1027,7 @@ xfs_dir2_sf_replace(
1027 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 1027 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
1028 ASSERT(dp->i_df.if_u1.if_data != NULL); 1028 ASSERT(dp->i_df.if_u1.if_data != NULL);
1029 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1029 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
1030 ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); 1030 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
1031#if XFS_BIG_INUMS 1031#if XFS_BIG_INUMS
1032 /* 1032 /*
1033 * New inode number is large, and need to convert to 8-byte inodes. 1033 * New inode number is large, and need to convert to 8-byte inodes.
@@ -1067,28 +1067,28 @@ xfs_dir2_sf_replace(
1067 if (args->namelen == 2 && 1067 if (args->namelen == 2 &&
1068 args->name[0] == '.' && args->name[1] == '.') { 1068 args->name[0] == '.' && args->name[1] == '.') {
1069#if XFS_BIG_INUMS || defined(DEBUG) 1069#if XFS_BIG_INUMS || defined(DEBUG)
1070 ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); 1070 ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
1071 ASSERT(args->inumber != ino); 1071 ASSERT(args->inumber != ino);
1072#endif 1072#endif
1073 XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, &sfp->hdr.parent); 1073 xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent);
1074 } 1074 }
1075 /* 1075 /*
1076 * Normal entry, look for the name. 1076 * Normal entry, look for the name.
1077 */ 1077 */
1078 else { 1078 else {
1079 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); 1079 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
1080 i < sfp->hdr.count; 1080 i < sfp->hdr.count;
1081 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { 1081 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
1082 if (sfep->namelen == args->namelen && 1082 if (sfep->namelen == args->namelen &&
1083 sfep->name[0] == args->name[0] && 1083 sfep->name[0] == args->name[0] &&
1084 memcmp(args->name, sfep->name, args->namelen) == 0) { 1084 memcmp(args->name, sfep->name, args->namelen) == 0) {
1085#if XFS_BIG_INUMS || defined(DEBUG) 1085#if XFS_BIG_INUMS || defined(DEBUG)
1086 ino = XFS_DIR2_SF_GET_INUMBER(sfp, 1086 ino = xfs_dir2_sf_get_inumber(sfp,
1087 XFS_DIR2_SF_INUMBERP(sfep)); 1087 xfs_dir2_sf_inumberp(sfep));
1088 ASSERT(args->inumber != ino); 1088 ASSERT(args->inumber != ino);
1089#endif 1089#endif
1090 XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, 1090 xfs_dir2_sf_put_inumber(sfp, &args->inumber,
1091 XFS_DIR2_SF_INUMBERP(sfep)); 1091 xfs_dir2_sf_inumberp(sfep));
1092 break; 1092 break;
1093 } 1093 }
1094 } 1094 }
@@ -1189,22 +1189,22 @@ xfs_dir2_sf_toino4(
1189 */ 1189 */
1190 sfp->hdr.count = oldsfp->hdr.count; 1190 sfp->hdr.count = oldsfp->hdr.count;
1191 sfp->hdr.i8count = 0; 1191 sfp->hdr.i8count = 0;
1192 ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent); 1192 ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
1193 XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent); 1193 xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
1194 /* 1194 /*
1195 * Copy the entries field by field. 1195 * Copy the entries field by field.
1196 */ 1196 */
1197 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp), 1197 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
1198 oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp); 1198 oldsfep = xfs_dir2_sf_firstentry(oldsfp);
1199 i < sfp->hdr.count; 1199 i < sfp->hdr.count;
1200 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep), 1200 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
1201 oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) { 1201 oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
1202 sfep->namelen = oldsfep->namelen; 1202 sfep->namelen = oldsfep->namelen;
1203 sfep->offset = oldsfep->offset; 1203 sfep->offset = oldsfep->offset;
1204 memcpy(sfep->name, oldsfep->name, sfep->namelen); 1204 memcpy(sfep->name, oldsfep->name, sfep->namelen);
1205 ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, 1205 ino = xfs_dir2_sf_get_inumber(oldsfp,
1206 XFS_DIR2_SF_INUMBERP(oldsfep)); 1206 xfs_dir2_sf_inumberp(oldsfep));
1207 XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep)); 1207 xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
1208 } 1208 }
1209 /* 1209 /*
1210 * Clean up the inode. 1210 * Clean up the inode.
@@ -1266,22 +1266,22 @@ xfs_dir2_sf_toino8(
1266 */ 1266 */
1267 sfp->hdr.count = oldsfp->hdr.count; 1267 sfp->hdr.count = oldsfp->hdr.count;
1268 sfp->hdr.i8count = 1; 1268 sfp->hdr.i8count = 1;
1269 ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent); 1269 ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
1270 XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent); 1270 xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
1271 /* 1271 /*
1272 * Copy the entries field by field. 1272 * Copy the entries field by field.
1273 */ 1273 */
1274 for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp), 1274 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
1275 oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp); 1275 oldsfep = xfs_dir2_sf_firstentry(oldsfp);
1276 i < sfp->hdr.count; 1276 i < sfp->hdr.count;
1277 i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep), 1277 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
1278 oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) { 1278 oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
1279 sfep->namelen = oldsfep->namelen; 1279 sfep->namelen = oldsfep->namelen;
1280 sfep->offset = oldsfep->offset; 1280 sfep->offset = oldsfep->offset;
1281 memcpy(sfep->name, oldsfep->name, sfep->namelen); 1281 memcpy(sfep->name, oldsfep->name, sfep->namelen);
1282 ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, 1282 ino = xfs_dir2_sf_get_inumber(oldsfp,
1283 XFS_DIR2_SF_INUMBERP(oldsfep)); 1283 xfs_dir2_sf_inumberp(oldsfep));
1284 XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep)); 1284 xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
1285 } 1285 }
1286 /* 1286 /*
1287 * Clean up the inode. 1287 * Clean up the inode.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 42f015b7001..11e503209af 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -90,7 +90,6 @@ typedef struct xfs_dir2_sf {
90 xfs_dir2_sf_entry_t list[1]; /* shortform entries */ 90 xfs_dir2_sf_entry_t list[1]; /* shortform entries */
91} xfs_dir2_sf_t; 91} xfs_dir2_sf_t;
92 92
93#define XFS_DIR2_SF_HDR_SIZE(i8count) xfs_dir2_sf_hdr_size(i8count)
94static inline int xfs_dir2_sf_hdr_size(int i8count) 93static inline int xfs_dir2_sf_hdr_size(int i8count)
95{ 94{
96 return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \ 95 return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
@@ -98,14 +97,11 @@ static inline int xfs_dir2_sf_hdr_size(int i8count)
98 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); 97 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
99} 98}
100 99
101#define XFS_DIR2_SF_INUMBERP(sfep) xfs_dir2_sf_inumberp(sfep)
102static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep) 100static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
103{ 101{
104 return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen]; 102 return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
105} 103}
106 104
107#define XFS_DIR2_SF_GET_INUMBER(sfp, from) \
108 xfs_dir2_sf_get_inumber(sfp, from)
109static inline xfs_intino_t 105static inline xfs_intino_t
110xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from) 106xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
111{ 107{
@@ -114,8 +110,6 @@ xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
114 (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8)); 110 (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
115} 111}
116 112
117#define XFS_DIR2_SF_PUT_INUMBER(sfp,from,to) \
118 xfs_dir2_sf_put_inumber(sfp,from,to)
119static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from, 113static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
120 xfs_dir2_inou_t *to) 114 xfs_dir2_inou_t *to)
121{ 115{
@@ -125,24 +119,18 @@ static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
125 XFS_PUT_DIR_INO8(*(from), (to)->i8); 119 XFS_PUT_DIR_INO8(*(from), (to)->i8);
126} 120}
127 121
128#define XFS_DIR2_SF_GET_OFFSET(sfep) \
129 xfs_dir2_sf_get_offset(sfep)
130static inline xfs_dir2_data_aoff_t 122static inline xfs_dir2_data_aoff_t
131xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) 123xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
132{ 124{
133 return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i); 125 return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
134} 126}
135 127
136#define XFS_DIR2_SF_PUT_OFFSET(sfep,off) \
137 xfs_dir2_sf_put_offset(sfep,off)
138static inline void 128static inline void
139xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) 129xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
140{ 130{
141 INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off); 131 INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
142} 132}
143 133
144#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len) \
145 xfs_dir2_sf_entsize_byname(sfp,len)
146static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len) 134static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
147{ 135{
148 return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \ 136 return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
@@ -150,8 +138,6 @@ static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
150 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); 138 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
151} 139}
152 140
153#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep) \
154 xfs_dir2_sf_entsize_byentry(sfp,sfep)
155static inline int 141static inline int
156xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) 142xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
157{ 143{
@@ -160,19 +146,17 @@ xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
160 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); 146 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
161} 147}
162 148
163#define XFS_DIR2_SF_FIRSTENTRY(sfp) xfs_dir2_sf_firstentry(sfp)
164static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp) 149static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
165{ 150{
166 return ((xfs_dir2_sf_entry_t *) \ 151 return ((xfs_dir2_sf_entry_t *) \
167 ((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count))); 152 ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
168} 153}
169 154
170#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep) xfs_dir2_sf_nextentry(sfp,sfep)
171static inline xfs_dir2_sf_entry_t * 155static inline xfs_dir2_sf_entry_t *
172xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) 156xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
173{ 157{
174 return ((xfs_dir2_sf_entry_t *) \ 158 return ((xfs_dir2_sf_entry_t *) \
175 ((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep))); 159 ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
176} 160}
177 161
178/* 162/*
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
new file mode 100644
index 00000000000..ce2278611bb
--- /dev/null
+++ b/fs/xfs/xfs_filestream.c
@@ -0,0 +1,771 @@
1/*
2 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_bmap_btree.h"
20#include "xfs_inum.h"
21#include "xfs_dir2.h"
22#include "xfs_dir2_sf.h"
23#include "xfs_attr_sf.h"
24#include "xfs_dinode.h"
25#include "xfs_inode.h"
26#include "xfs_ag.h"
27#include "xfs_dmapi.h"
28#include "xfs_log.h"
29#include "xfs_trans.h"
30#include "xfs_sb.h"
31#include "xfs_mount.h"
32#include "xfs_bmap.h"
33#include "xfs_alloc.h"
34#include "xfs_utils.h"
35#include "xfs_mru_cache.h"
36#include "xfs_filestream.h"
37
38#ifdef XFS_FILESTREAMS_TRACE
39
40ktrace_t *xfs_filestreams_trace_buf;
41
42STATIC void
43xfs_filestreams_trace(
44 xfs_mount_t *mp, /* mount point */
45 int type, /* type of trace */
46 const char *func, /* source function */
47 int line, /* source line number */
48 __psunsigned_t arg0,
49 __psunsigned_t arg1,
50 __psunsigned_t arg2,
51 __psunsigned_t arg3,
52 __psunsigned_t arg4,
53 __psunsigned_t arg5)
54{
55 ktrace_enter(xfs_filestreams_trace_buf,
56 (void *)(__psint_t)(type | (line << 16)),
57 (void *)func,
58 (void *)(__psunsigned_t)current_pid(),
59 (void *)mp,
60 (void *)(__psunsigned_t)arg0,
61 (void *)(__psunsigned_t)arg1,
62 (void *)(__psunsigned_t)arg2,
63 (void *)(__psunsigned_t)arg3,
64 (void *)(__psunsigned_t)arg4,
65 (void *)(__psunsigned_t)arg5,
66 NULL, NULL, NULL, NULL, NULL, NULL);
67}
68
69#define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0)
70#define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0)
71#define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0)
72#define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0)
73#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0)
74#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0)
75#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
76 xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \
77 (__psunsigned_t)a0, (__psunsigned_t)a1, \
78 (__psunsigned_t)a2, (__psunsigned_t)a3, \
79 (__psunsigned_t)a4, (__psunsigned_t)a5)
80
81#define TRACE_AG_SCAN(mp, ag, ag2) \
82 TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
83#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
84 TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
85#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
86 TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
87 cnt, free, scan, flag)
88#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
89 TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
90#define TRACE_FREE(mp, ip, pip, ag, cnt) \
91 TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
92#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
93 TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
94#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
95 TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
96#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
97 TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
98#define TRACE_ORPHAN(mp, ip, ag) \
99 TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
100
101
102#else
103#define TRACE_AG_SCAN(mp, ag, ag2)
104#define TRACE_AG_PICK1(mp, max_ag, maxfree)
105#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
106#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
107#define TRACE_FREE(mp, ip, pip, ag, cnt)
108#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
109#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
110#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
111#define TRACE_ORPHAN(mp, ip, ag)
112#endif
113
114static kmem_zone_t *item_zone;
115
116/*
117 * Structure for associating a file or a directory with an allocation group.
118 * The parent directory pointer is only needed for files, but since there will
119 * generally be vastly more files than directories in the cache, using the same
120 * data structure simplifies the code with very little memory overhead.
121 */
122typedef struct fstrm_item
123{
124 xfs_agnumber_t ag; /* AG currently in use for the file/directory. */
125 xfs_inode_t *ip; /* inode self-pointer. */
126 xfs_inode_t *pip; /* Parent directory inode pointer. */
127} fstrm_item_t;
128
129
130/*
131 * Scan the AGs starting at startag looking for an AG that isn't in use and has
132 * at least minlen blocks free.
133 */
134static int
135_xfs_filestream_pick_ag(
136 xfs_mount_t *mp,
137 xfs_agnumber_t startag,
138 xfs_agnumber_t *agp,
139 int flags,
140 xfs_extlen_t minlen)
141{
142 int err, trylock, nscan;
143 xfs_extlen_t delta, longest, need, free, minfree, maxfree = 0;
144 xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
145 struct xfs_perag *pag;
146
147 /* 2% of an AG's blocks must be free for it to be chosen. */
148 minfree = mp->m_sb.sb_agblocks / 50;
149
150 ag = startag;
151 *agp = NULLAGNUMBER;
152
153 /* For the first pass, don't sleep trying to init the per-AG. */
154 trylock = XFS_ALLOC_FLAG_TRYLOCK;
155
156 for (nscan = 0; 1; nscan++) {
157
158 TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag));
159
160 pag = mp->m_perag + ag;
161
162 if (!pag->pagf_init) {
163 err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
164 if (err && !trylock)
165 return err;
166 }
167
168 /* Might fail sometimes during the 1st pass with trylock set. */
169 if (!pag->pagf_init)
170 goto next_ag;
171
172 /* Keep track of the AG with the most free blocks. */
173 if (pag->pagf_freeblks > maxfree) {
174 maxfree = pag->pagf_freeblks;
175 max_ag = ag;
176 }
177
178 /*
179 * The AG reference count does two things: it enforces mutual
180 * exclusion when examining the suitability of an AG in this
181 * loop, and it guards against two filestreams being established
182 * in the same AG as each other.
183 */
184 if (xfs_filestream_get_ag(mp, ag) > 1) {
185 xfs_filestream_put_ag(mp, ag);
186 goto next_ag;
187 }
188
189 need = XFS_MIN_FREELIST_PAG(pag, mp);
190 delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
191 longest = (pag->pagf_longest > delta) ?
192 (pag->pagf_longest - delta) :
193 (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
194
195 if (((minlen && longest >= minlen) ||
196 (!minlen && pag->pagf_freeblks >= minfree)) &&
197 (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
198 (flags & XFS_PICK_LOWSPACE))) {
199
200 /* Break out, retaining the reference on the AG. */
201 free = pag->pagf_freeblks;
202 *agp = ag;
203 break;
204 }
205
206 /* Drop the reference on this AG, it's not usable. */
207 xfs_filestream_put_ag(mp, ag);
208next_ag:
209 /* Move to the next AG, wrapping to AG 0 if necessary. */
210 if (++ag >= mp->m_sb.sb_agcount)
211 ag = 0;
212
213 /* If a full pass of the AGs hasn't been done yet, continue. */
214 if (ag != startag)
215 continue;
216
217 /* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */
218 if (trylock != 0) {
219 trylock = 0;
220 continue;
221 }
222
223 /* Finally, if lowspace wasn't set, set it for the 3rd pass. */
224 if (!(flags & XFS_PICK_LOWSPACE)) {
225 flags |= XFS_PICK_LOWSPACE;
226 continue;
227 }
228
229 /*
230 * Take the AG with the most free space, regardless of whether
231 * it's already in use by another filestream.
232 */
233 if (max_ag != NULLAGNUMBER) {
234 xfs_filestream_get_ag(mp, max_ag);
235 TRACE_AG_PICK1(mp, max_ag, maxfree);
236 free = maxfree;
237 *agp = max_ag;
238 break;
239 }
240
241 /* take AG 0 if none matched */
242 TRACE_AG_PICK1(mp, max_ag, maxfree);
243 *agp = 0;
244 return 0;
245 }
246
247 TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp),
248 free, nscan, flags);
249
250 return 0;
251}
252
253/*
254 * Set the allocation group number for a file or a directory, updating inode
255 * references and per-AG references as appropriate. Must be called with the
256 * m_peraglock held in read mode.
257 */
258static int
259_xfs_filestream_update_ag(
260 xfs_inode_t *ip,
261 xfs_inode_t *pip,
262 xfs_agnumber_t ag)
263{
264 int err = 0;
265 xfs_mount_t *mp;
266 xfs_mru_cache_t *cache;
267 fstrm_item_t *item;
268 xfs_agnumber_t old_ag;
269 xfs_inode_t *old_pip;
270
271 /*
272 * Either ip is a regular file and pip is a directory, or ip is a
273 * directory and pip is NULL.
274 */
275 ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
276 (pip->i_d.di_mode & S_IFDIR)) ||
277 ((ip->i_d.di_mode & S_IFDIR) && !pip)));
278
279 mp = ip->i_mount;
280 cache = mp->m_filestream;
281
282 item = xfs_mru_cache_lookup(cache, ip->i_ino);
283 if (item) {
284 ASSERT(item->ip == ip);
285 old_ag = item->ag;
286 item->ag = ag;
287 old_pip = item->pip;
288 item->pip = pip;
289 xfs_mru_cache_done(cache);
290
291 /*
292 * If the AG has changed, drop the old ref and take a new one,
293 * effectively transferring the reference from old to new AG.
294 */
295 if (ag != old_ag) {
296 xfs_filestream_put_ag(mp, old_ag);
297 xfs_filestream_get_ag(mp, ag);
298 }
299
300 /*
301 * If ip is a file and its pip has changed, drop the old ref and
302 * take a new one.
303 */
304 if (pip && pip != old_pip) {
305 IRELE(old_pip);
306 IHOLD(pip);
307 }
308
309 TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
310 ag, xfs_filestream_peek_ag(mp, ag));
311 return 0;
312 }
313
314 item = kmem_zone_zalloc(item_zone, KM_MAYFAIL);
315 if (!item)
316 return ENOMEM;
317
318 item->ag = ag;
319 item->ip = ip;
320 item->pip = pip;
321
322 err = xfs_mru_cache_insert(cache, ip->i_ino, item);
323 if (err) {
324 kmem_zone_free(item_zone, item);
325 return err;
326 }
327
328 /* Take a reference on the AG. */
329 xfs_filestream_get_ag(mp, ag);
330
331 /*
332 * Take a reference on the inode itself regardless of whether it's a
333 * regular file or a directory.
334 */
335 IHOLD(ip);
336
337 /*
338 * In the case of a regular file, take a reference on the parent inode
339 * as well to ensure it remains in-core.
340 */
341 if (pip)
342 IHOLD(pip);
343
344 TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
345 ag, xfs_filestream_peek_ag(mp, ag));
346
347 return 0;
348}
349
350/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
351void
352xfs_fstrm_free_func(
353 xfs_ino_t ino,
354 fstrm_item_t *item)
355{
356 xfs_inode_t *ip = item->ip;
357 int ref;
358
359 ASSERT(ip->i_ino == ino);
360
361 xfs_iflags_clear(ip, XFS_IFILESTREAM);
362
363 /* Drop the reference taken on the AG when the item was added. */
364 ref = xfs_filestream_put_ag(ip->i_mount, item->ag);
365
366 ASSERT(ref >= 0);
367 TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
368 xfs_filestream_peek_ag(ip->i_mount, item->ag));
369
370 /*
371 * _xfs_filestream_update_ag() always takes a reference on the inode
372 * itself, whether it's a file or a directory. Release it here.
373 * This can result in the inode being freed and so we must
374 * not hold any inode locks when freeing filesstreams objects
375 * otherwise we can deadlock here.
376 */
377 IRELE(ip);
378
379 /*
380 * In the case of a regular file, _xfs_filestream_update_ag() also
381 * takes a ref on the parent inode to keep it in-core. Release that
382 * too.
383 */
384 if (item->pip)
385 IRELE(item->pip);
386
387 /* Finally, free the memory allocated for the item. */
388 kmem_zone_free(item_zone, item);
389}
390
391/*
392 * xfs_filestream_init() is called at xfs initialisation time to set up the
393 * memory zone that will be used for filestream data structure allocation.
394 */
395int
396xfs_filestream_init(void)
397{
398 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
399#ifdef XFS_FILESTREAMS_TRACE
400 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
401#endif
402 return item_zone ? 0 : -ENOMEM;
403}
404
405/*
406 * xfs_filestream_uninit() is called at xfs termination time to destroy the
407 * memory zone that was used for filestream data structure allocation.
408 */
409void
410xfs_filestream_uninit(void)
411{
412#ifdef XFS_FILESTREAMS_TRACE
413 ktrace_free(xfs_filestreams_trace_buf);
414#endif
415 kmem_zone_destroy(item_zone);
416}
417
418/*
419 * xfs_filestream_mount() is called when a file system is mounted with the
420 * filestream option. It is responsible for allocating the data structures
421 * needed to track the new file system's file streams.
422 */
423int
424xfs_filestream_mount(
425 xfs_mount_t *mp)
426{
427 int err;
428 unsigned int lifetime, grp_count;
429
430 /*
431 * The filestream timer tunable is currently fixed within the range of
432 * one second to four minutes, with five seconds being the default. The
433 * group count is somewhat arbitrary, but it'd be nice to adhere to the
434 * timer tunable to within about 10 percent. This requires at least 10
435 * groups.
436 */
437 lifetime = xfs_fstrm_centisecs * 10;
438 grp_count = 10;
439
440 err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
441 (xfs_mru_cache_free_func_t)xfs_fstrm_free_func);
442
443 return err;
444}
445
446/*
447 * xfs_filestream_unmount() is called when a file system that was mounted with
448 * the filestream option is unmounted. It drains the data structures created
449 * to track the file system's file streams and frees all the memory that was
450 * allocated.
451 */
452void
453xfs_filestream_unmount(
454 xfs_mount_t *mp)
455{
456 xfs_mru_cache_destroy(mp->m_filestream);
457}
458
459/*
460 * If the mount point's m_perag array is going to be reallocated, all
461 * outstanding cache entries must be flushed to avoid accessing reference count
462 * addresses that have been freed. The call to xfs_filestream_flush() must be
463 * made inside the block that holds the m_peraglock in write mode to do the
464 * reallocation.
465 */
466void
467xfs_filestream_flush(
468 xfs_mount_t *mp)
469{
470 /* point in time flush, so keep the reaper running */
471 xfs_mru_cache_flush(mp->m_filestream, 1);
472}
473
474/*
475 * Return the AG of the filestream the file or directory belongs to, or
476 * NULLAGNUMBER otherwise.
477 */
478xfs_agnumber_t
479xfs_filestream_lookup_ag(
480 xfs_inode_t *ip)
481{
482 xfs_mru_cache_t *cache;
483 fstrm_item_t *item;
484 xfs_agnumber_t ag;
485 int ref;
486
487 if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) {
488 ASSERT(0);
489 return NULLAGNUMBER;
490 }
491
492 cache = ip->i_mount->m_filestream;
493 item = xfs_mru_cache_lookup(cache, ip->i_ino);
494 if (!item) {
495 TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
496 return NULLAGNUMBER;
497 }
498
499 ASSERT(ip == item->ip);
500 ag = item->ag;
501 ref = xfs_filestream_peek_ag(ip->i_mount, ag);
502 xfs_mru_cache_done(cache);
503
504 TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref);
505 return ag;
506}
507
508/*
509 * xfs_filestream_associate() should only be called to associate a regular file
510 * with its parent directory. Calling it with a child directory isn't
511 * appropriate because filestreams don't apply to entire directory hierarchies.
512 * Creating a file in a child directory of an existing filestream directory
513 * starts a new filestream with its own allocation group association.
514 *
515 * Returns < 0 on error, 0 if successful association occurred, > 0 if
516 * we failed to get an association because of locking issues.
517 */
518int
519xfs_filestream_associate(
520 xfs_inode_t *pip,
521 xfs_inode_t *ip)
522{
523 xfs_mount_t *mp;
524 xfs_mru_cache_t *cache;
525 fstrm_item_t *item;
526 xfs_agnumber_t ag, rotorstep, startag;
527 int err = 0;
528
529 ASSERT(pip->i_d.di_mode & S_IFDIR);
530 ASSERT(ip->i_d.di_mode & S_IFREG);
531 if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG))
532 return -EINVAL;
533
534 mp = pip->i_mount;
535 cache = mp->m_filestream;
536 down_read(&mp->m_peraglock);
537
538 /*
539 * We have a problem, Houston.
540 *
541 * Taking the iolock here violates inode locking order - we already
542 * hold the ilock. Hence if we block getting this lock we may never
543 * wake. Unfortunately, that means if we can't get the lock, we're
544 * screwed in terms of getting a stream association - we can't spin
545 * waiting for the lock because someone else is waiting on the lock we
546 * hold and we cannot drop that as we are in a transaction here.
547 *
548 * Lucky for us, this inversion is rarely a problem because it's a
549 * directory inode that we are trying to lock here and that means the
550 * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
551 * used. i.e. freeze, remount-ro, quotasync or unmount.
552 *
553 * So, if we can't get the iolock without sleeping then just give up
554 */
555 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) {
556 up_read(&mp->m_peraglock);
557 return 1;
558 }
559
560 /* If the parent directory is already in the cache, use its AG. */
561 item = xfs_mru_cache_lookup(cache, pip->i_ino);
562 if (item) {
563 ASSERT(item->ip == pip);
564 ag = item->ag;
565 xfs_mru_cache_done(cache);
566
567 TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
568 err = _xfs_filestream_update_ag(ip, pip, ag);
569
570 goto exit;
571 }
572
573 /*
574 * Set the starting AG using the rotor for inode32, otherwise
575 * use the directory inode's AG.
576 */
577 if (mp->m_flags & XFS_MOUNT_32BITINODES) {
578 rotorstep = xfs_rotorstep;
579 startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
580 mp->m_agfrotor = (mp->m_agfrotor + 1) %
581 (mp->m_sb.sb_agcount * rotorstep);
582 } else
583 startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
584
585 /* Pick a new AG for the parent inode starting at startag. */
586 err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0);
587 if (err || ag == NULLAGNUMBER)
588 goto exit_did_pick;
589
590 /* Associate the parent inode with the AG. */
591 err = _xfs_filestream_update_ag(pip, NULL, ag);
592 if (err)
593 goto exit_did_pick;
594
595 /* Associate the file inode with the AG. */
596 err = _xfs_filestream_update_ag(ip, pip, ag);
597 if (err)
598 goto exit_did_pick;
599
600 TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
601
602exit_did_pick:
603 /*
604 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
605 * reference it took on it, since the file and directory will have taken
606 * their own now if they were successfully cached.
607 */
608 if (ag != NULLAGNUMBER)
609 xfs_filestream_put_ag(mp, ag);
610
611exit:
612 xfs_iunlock(pip, XFS_IOLOCK_EXCL);
613 up_read(&mp->m_peraglock);
614 return -err;
615}
616
617/*
618 * Pick a new allocation group for the current file and its file stream. This
619 * function is called by xfs_bmap_filestreams() with the mount point's per-ag
620 * lock held.
621 */
622int
623xfs_filestream_new_ag(
624 xfs_bmalloca_t *ap,
625 xfs_agnumber_t *agp)
626{
627 int flags, err;
628 xfs_inode_t *ip, *pip = NULL;
629 xfs_mount_t *mp;
630 xfs_mru_cache_t *cache;
631 xfs_extlen_t minlen;
632 fstrm_item_t *dir, *file;
633 xfs_agnumber_t ag = NULLAGNUMBER;
634
635 ip = ap->ip;
636 mp = ip->i_mount;
637 cache = mp->m_filestream;
638 minlen = ap->alen;
639 *agp = NULLAGNUMBER;
640
641 /*
642 * Look for the file in the cache, removing it if it's found. Doing
643 * this allows it to be held across the dir lookup that follows.
644 */
645 file = xfs_mru_cache_remove(cache, ip->i_ino);
646 if (file) {
647 ASSERT(ip == file->ip);
648
649 /* Save the file's parent inode and old AG number for later. */
650 pip = file->pip;
651 ag = file->ag;
652
653 /* Look for the file's directory in the cache. */
654 dir = xfs_mru_cache_lookup(cache, pip->i_ino);
655 if (dir) {
656 ASSERT(pip == dir->ip);
657
658 /*
659 * If the directory has already moved on to a new AG,
660 * use that AG as the new AG for the file. Don't
661 * forget to twiddle the AG refcounts to match the
662 * movement.
663 */
664 if (dir->ag != file->ag) {
665 xfs_filestream_put_ag(mp, file->ag);
666 xfs_filestream_get_ag(mp, dir->ag);
667 *agp = file->ag = dir->ag;
668 }
669
670 xfs_mru_cache_done(cache);
671 }
672
673 /*
674 * Put the file back in the cache. If this fails, the free
675 * function needs to be called to tidy up in the same way as if
676 * the item had simply expired from the cache.
677 */
678 err = xfs_mru_cache_insert(cache, ip->i_ino, file);
679 if (err) {
680 xfs_fstrm_free_func(ip->i_ino, file);
681 return err;
682 }
683
684 /*
685 * If the file's AG was moved to the directory's new AG, there's
686 * nothing more to be done.
687 */
688 if (*agp != NULLAGNUMBER) {
689 TRACE_MOVEAG(mp, ip, pip,
690 ag, xfs_filestream_peek_ag(mp, ag),
691 *agp, xfs_filestream_peek_ag(mp, *agp));
692 return 0;
693 }
694 }
695
696 /*
697 * If the file's parent directory is known, take its iolock in exclusive
698 * mode to prevent two sibling files from racing each other to migrate
699 * themselves and their parent to different AGs.
700 */
701 if (pip)
702 xfs_ilock(pip, XFS_IOLOCK_EXCL);
703
704 /*
705 * A new AG needs to be found for the file. If the file's parent
706 * directory is also known, it will be moved to the new AG as well to
707 * ensure that files created inside it in future use the new AG.
708 */
709 ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
710 flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
711 (ap->low ? XFS_PICK_LOWSPACE : 0);
712
713 err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
714 if (err || *agp == NULLAGNUMBER)
715 goto exit;
716
717 /*
718 * If the file wasn't found in the file cache, then its parent directory
719 * inode isn't known. For this to have happened, the file must either
720 * be pre-existing, or it was created long enough ago that its cache
721 * entry has expired. This isn't the sort of usage that the filestreams
722 * allocator is trying to optimise, so there's no point trying to track
723 * its new AG somehow in the filestream data structures.
724 */
725 if (!pip) {
726 TRACE_ORPHAN(mp, ip, *agp);
727 goto exit;
728 }
729
730 /* Associate the parent inode with the AG. */
731 err = _xfs_filestream_update_ag(pip, NULL, *agp);
732 if (err)
733 goto exit;
734
735 /* Associate the file inode with the AG. */
736 err = _xfs_filestream_update_ag(ip, pip, *agp);
737 if (err)
738 goto exit;
739
740 TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
741 *agp, xfs_filestream_peek_ag(mp, *agp));
742
743exit:
744 /*
745 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
746 * reference it took on it, since the file and directory will have taken
747 * their own now if they were successfully cached.
748 */
749 if (*agp != NULLAGNUMBER)
750 xfs_filestream_put_ag(mp, *agp);
751 else
752 *agp = 0;
753
754 if (pip)
755 xfs_iunlock(pip, XFS_IOLOCK_EXCL);
756
757 return err;
758}
759
760/*
761 * Remove an association between an inode and a filestream object.
762 * Typically this is done on last close of an unlinked file.
763 */
764void
765xfs_filestream_deassociate(
766 xfs_inode_t *ip)
767{
768 xfs_mru_cache_t *cache = ip->i_mount->m_filestream;
769
770 xfs_mru_cache_delete(cache, ip->i_ino);
771}
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
new file mode 100644
index 00000000000..f655f7dc334
--- /dev/null
+++ b/fs/xfs/xfs_filestream.h
@@ -0,0 +1,136 @@
1/*
2 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_FILESTREAM_H__
19#define __XFS_FILESTREAM_H__
20
21#ifdef __KERNEL__
22
23struct xfs_mount;
24struct xfs_inode;
25struct xfs_perag;
26struct xfs_bmalloca;
27
28#ifdef XFS_FILESTREAMS_TRACE
29#define XFS_FSTRM_KTRACE_INFO 1
30#define XFS_FSTRM_KTRACE_AGSCAN 2
31#define XFS_FSTRM_KTRACE_AGPICK1 3
32#define XFS_FSTRM_KTRACE_AGPICK2 4
33#define XFS_FSTRM_KTRACE_UPDATE 5
34#define XFS_FSTRM_KTRACE_FREE 6
35#define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7
36#define XFS_FSTRM_KTRACE_ASSOCIATE 8
37#define XFS_FSTRM_KTRACE_MOVEAG 9
38#define XFS_FSTRM_KTRACE_ORPHAN 10
39
40#define XFS_FSTRM_KTRACE_SIZE 16384
41extern ktrace_t *xfs_filestreams_trace_buf;
42
43#endif
44
45/*
46 * Allocation group filestream associations are tracked with per-ag atomic
47 * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
48 * particular AG already has active filestreams associated with it. The mount
49 * point's m_peraglock is used to protect these counters from per-ag array
50 * re-allocation during a growfs operation. When xfs_growfs_data_private() is
51 * about to reallocate the array, it calls xfs_filestream_flush() with the
52 * m_peraglock held in write mode.
53 *
54 * Since xfs_mru_cache_flush() guarantees that all the free functions for all
55 * the cache elements have finished executing before it returns, it's safe for
56 * the free functions to use the atomic counters without m_peraglock protection.
57 * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
58 * whether it was called with the m_peraglock held in read mode, write mode or
59 * not held at all. The race condition this addresses is the following:
60 *
61 * - The work queue scheduler fires and pulls a filestream directory cache
62 * element off the LRU end of the cache for deletion, then gets pre-empted.
63 * - A growfs operation grabs the m_peraglock in write mode, flushes all the
64 * remaining items from the cache and reallocates the mount point's per-ag
65 * array, resetting all the counters to zero.
66 * - The work queue thread resumes and calls the free function for the element
67 * it started cleaning up earlier. In the process it decrements the
68 * filestreams counter for an AG that now has no references.
69 *
70 * With a shrinkfs feature, the above scenario could panic the system.
71 *
72 * All other uses of the following macros should be protected by either the
73 * m_peraglock held in read mode, or the cache's internal locking exposed by the
74 * interval between a call to xfs_mru_cache_lookup() and a call to
75 * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
76 * when new elements are added to the cache.
77 *
78 * Combined, these locking rules ensure that no associations will ever exist in
79 * the cache that reference per-ag array elements that have since been
80 * reallocated.
81 */
82STATIC_INLINE int
83xfs_filestream_peek_ag(
84 xfs_mount_t *mp,
85 xfs_agnumber_t agno)
86{
87 return atomic_read(&mp->m_perag[agno].pagf_fstrms);
88}
89
90STATIC_INLINE int
91xfs_filestream_get_ag(
92 xfs_mount_t *mp,
93 xfs_agnumber_t agno)
94{
95 return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
96}
97
98STATIC_INLINE int
99xfs_filestream_put_ag(
100 xfs_mount_t *mp,
101 xfs_agnumber_t agno)
102{
103 return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms);
104}
105
106/* allocation selection flags */
107typedef enum xfs_fstrm_alloc {
108 XFS_PICK_USERDATA = 1,
109 XFS_PICK_LOWSPACE = 2,
110} xfs_fstrm_alloc_t;
111
112/* prototypes for filestream.c */
113int xfs_filestream_init(void);
114void xfs_filestream_uninit(void);
115int xfs_filestream_mount(struct xfs_mount *mp);
116void xfs_filestream_unmount(struct xfs_mount *mp);
117void xfs_filestream_flush(struct xfs_mount *mp);
118xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
119int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
120void xfs_filestream_deassociate(struct xfs_inode *ip);
121int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
122
123
124/* filestreams for the inode? */
125STATIC_INLINE int
126xfs_inode_is_filestream(
127 struct xfs_inode *ip)
128{
129 return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
130 xfs_iflags_test(ip, XFS_IFILESTREAM) ||
131 (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
132}
133
134#endif /* __KERNEL__ */
135
136#endif /* __XFS_FILESTREAM_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 1335449841c..ec3c9c27e0d 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -66,6 +66,7 @@ struct fsxattr {
66#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ 66#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
67#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ 67#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
68#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ 68#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
69#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
69#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ 70#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
70 71
71/* 72/*
@@ -238,6 +239,7 @@ typedef struct xfs_fsop_resblks {
238#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ 239#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */
239#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ 240#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */
240#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ 241#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */
242#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
241 243
242 244
243/* 245/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b599e6be9ec..432e82347ed 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -44,6 +44,7 @@
44#include "xfs_trans_space.h" 44#include "xfs_trans_space.h"
45#include "xfs_rtalloc.h" 45#include "xfs_rtalloc.h"
46#include "xfs_rw.h" 46#include "xfs_rw.h"
47#include "xfs_filestream.h"
47 48
48/* 49/*
49 * File system operations 50 * File system operations
@@ -94,6 +95,8 @@ xfs_fs_geometry(
94 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | 95 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
95 (XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ? 96 (XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
96 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | 97 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
98 (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
99 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
97 (XFS_SB_VERSION_HASATTR2(&mp->m_sb) ? 100 (XFS_SB_VERSION_HASATTR2(&mp->m_sb) ?
98 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0); 101 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
99 geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ? 102 geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
@@ -140,6 +143,8 @@ xfs_growfs_data_private(
140 pct = in->imaxpct; 143 pct = in->imaxpct;
141 if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100) 144 if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
142 return XFS_ERROR(EINVAL); 145 return XFS_ERROR(EINVAL);
146 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
147 return error;
143 dpct = pct - mp->m_sb.sb_imax_pct; 148 dpct = pct - mp->m_sb.sb_imax_pct;
144 error = xfs_read_buf(mp, mp->m_ddev_targp, 149 error = xfs_read_buf(mp, mp->m_ddev_targp,
145 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), 150 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
@@ -161,6 +166,7 @@ xfs_growfs_data_private(
161 new = nb - mp->m_sb.sb_dblocks; 166 new = nb - mp->m_sb.sb_dblocks;
162 oagcount = mp->m_sb.sb_agcount; 167 oagcount = mp->m_sb.sb_agcount;
163 if (nagcount > oagcount) { 168 if (nagcount > oagcount) {
169 xfs_filestream_flush(mp);
164 down_write(&mp->m_peraglock); 170 down_write(&mp->m_peraglock);
165 mp->m_perag = kmem_realloc(mp->m_perag, 171 mp->m_perag = kmem_realloc(mp->m_perag,
166 sizeof(xfs_perag_t) * nagcount, 172 sizeof(xfs_perag_t) * nagcount,
@@ -173,6 +179,7 @@ xfs_growfs_data_private(
173 up_write(&mp->m_peraglock); 179 up_write(&mp->m_peraglock);
174 } 180 }
175 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 181 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
182 tp->t_flags |= XFS_TRANS_RESERVE;
176 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp), 183 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
177 XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) { 184 XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
178 xfs_trans_cancel(tp, 0); 185 xfs_trans_cancel(tp, 0);
@@ -328,6 +335,7 @@ xfs_growfs_data_private(
328 be32_add(&agf->agf_length, new); 335 be32_add(&agf->agf_length, new);
329 ASSERT(be32_to_cpu(agf->agf_length) == 336 ASSERT(be32_to_cpu(agf->agf_length) ==
330 be32_to_cpu(agi->agi_length)); 337 be32_to_cpu(agi->agi_length));
338 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
331 /* 339 /*
332 * Free the new space. 340 * Free the new space.
333 */ 341 */
@@ -494,8 +502,9 @@ xfs_reserve_blocks(
494 unsigned long s; 502 unsigned long s;
495 503
496 /* If inval is null, report current values and return */ 504 /* If inval is null, report current values and return */
497
498 if (inval == (__uint64_t *)NULL) { 505 if (inval == (__uint64_t *)NULL) {
506 if (!outval)
507 return EINVAL;
499 outval->resblks = mp->m_resblks; 508 outval->resblks = mp->m_resblks;
500 outval->resblks_avail = mp->m_resblks_avail; 509 outval->resblks_avail = mp->m_resblks_avail;
501 return 0; 510 return 0;
@@ -558,8 +567,10 @@ retry:
558 } 567 }
559 } 568 }
560out: 569out:
561 outval->resblks = mp->m_resblks; 570 if (outval) {
562 outval->resblks_avail = mp->m_resblks_avail; 571 outval->resblks = mp->m_resblks;
572 outval->resblks_avail = mp->m_resblks_avail;
573 }
563 XFS_SB_UNLOCK(mp, s); 574 XFS_SB_UNLOCK(mp, s);
564 575
565 if (fdblks_delta) { 576 if (fdblks_delta) {
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index b5feb3e7711..f943368c9b9 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -123,6 +123,7 @@ xfs_ialloc_ag_alloc(
123 int blks_per_cluster; /* fs blocks per inode cluster */ 123 int blks_per_cluster; /* fs blocks per inode cluster */
124 xfs_btree_cur_t *cur; /* inode btree cursor */ 124 xfs_btree_cur_t *cur; /* inode btree cursor */
125 xfs_daddr_t d; /* disk addr of buffer */ 125 xfs_daddr_t d; /* disk addr of buffer */
126 xfs_agnumber_t agno;
126 int error; 127 int error;
127 xfs_buf_t *fbuf; /* new free inodes' buffer */ 128 xfs_buf_t *fbuf; /* new free inodes' buffer */
128 xfs_dinode_t *free; /* new free inode structure */ 129 xfs_dinode_t *free; /* new free inode structure */
@@ -302,15 +303,15 @@ xfs_ialloc_ag_alloc(
302 } 303 }
303 be32_add(&agi->agi_count, newlen); 304 be32_add(&agi->agi_count, newlen);
304 be32_add(&agi->agi_freecount, newlen); 305 be32_add(&agi->agi_freecount, newlen);
306 agno = be32_to_cpu(agi->agi_seqno);
305 down_read(&args.mp->m_peraglock); 307 down_read(&args.mp->m_peraglock);
306 args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen; 308 args.mp->m_perag[agno].pagi_freecount += newlen;
307 up_read(&args.mp->m_peraglock); 309 up_read(&args.mp->m_peraglock);
308 agi->agi_newino = cpu_to_be32(newino); 310 agi->agi_newino = cpu_to_be32(newino);
309 /* 311 /*
310 * Insert records describing the new inode chunk into the btree. 312 * Insert records describing the new inode chunk into the btree.
311 */ 313 */
312 cur = xfs_btree_init_cursor(args.mp, tp, agbp, 314 cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno,
313 be32_to_cpu(agi->agi_seqno),
314 XFS_BTNUM_INO, (xfs_inode_t *)0, 0); 315 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
315 for (thisino = newino; 316 for (thisino = newino;
316 thisino < newino + newlen; 317 thisino < newino + newlen;
@@ -1387,6 +1388,7 @@ xfs_ialloc_read_agi(
1387 pag = &mp->m_perag[agno]; 1388 pag = &mp->m_perag[agno];
1388 if (!pag->pagi_init) { 1389 if (!pag->pagi_init) {
1389 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1390 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1391 pag->pagi_count = be32_to_cpu(agi->agi_count);
1390 pag->pagi_init = 1; 1392 pag->pagi_init = 1;
1391 } else { 1393 } else {
1392 /* 1394 /*
@@ -1410,3 +1412,23 @@ xfs_ialloc_read_agi(
1410 *bpp = bp; 1412 *bpp = bp;
1411 return 0; 1413 return 0;
1412} 1414}
1415
1416/*
1417 * Read in the agi to initialise the per-ag data in the mount structure
1418 */
1419int
1420xfs_ialloc_pagi_init(
1421 xfs_mount_t *mp, /* file system mount structure */
1422 xfs_trans_t *tp, /* transaction pointer */
1423 xfs_agnumber_t agno) /* allocation group number */
1424{
1425 xfs_buf_t *bp = NULL;
1426 int error;
1427
1428 error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
1429 if (error)
1430 return error;
1431 if (bp)
1432 xfs_trans_brelse(tp, bp);
1433 return 0;
1434}
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 7f5debe1acb..97f4040931c 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -149,6 +149,16 @@ xfs_ialloc_read_agi(
149 xfs_agnumber_t agno, /* allocation group number */ 149 xfs_agnumber_t agno, /* allocation group number */
150 struct xfs_buf **bpp); /* allocation group hdr buf */ 150 struct xfs_buf **bpp); /* allocation group hdr buf */
151 151
152/*
153 * Read in the allocation group header to initialise the per-ag data
154 * in the mount structure
155 */
156int
157xfs_ialloc_pagi_init(
158 struct xfs_mount *mp, /* file system mount structure */
159 struct xfs_trans *tp, /* transaction pointer */
160 xfs_agnumber_t agno); /* allocation group number */
161
152#endif /* __KERNEL__ */ 162#endif /* __KERNEL__ */
153 163
154#endif /* __XFS_IALLOC_H__ */ 164#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3ca5d43b834..cdc4c28926d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -48,7 +48,9 @@
48#include "xfs_dir2_trace.h" 48#include "xfs_dir2_trace.h"
49#include "xfs_quota.h" 49#include "xfs_quota.h"
50#include "xfs_acl.h" 50#include "xfs_acl.h"
51#include "xfs_filestream.h"
51 52
53#include <linux/log2.h>
52 54
53kmem_zone_t *xfs_ifork_zone; 55kmem_zone_t *xfs_ifork_zone;
54kmem_zone_t *xfs_inode_zone; 56kmem_zone_t *xfs_inode_zone;
@@ -643,8 +645,7 @@ xfs_iformat_extents(
643 ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1), 645 ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1),
644 ARCH_CONVERT); 646 ARCH_CONVERT);
645 } 647 }
646 xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex, 648 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
647 whichfork);
648 if (whichfork != XFS_DATA_FORK || 649 if (whichfork != XFS_DATA_FORK ||
649 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 650 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
650 if (unlikely(xfs_check_nostate_extents( 651 if (unlikely(xfs_check_nostate_extents(
@@ -817,6 +818,8 @@ _xfs_dic2xflags(
817 flags |= XFS_XFLAG_EXTSZINHERIT; 818 flags |= XFS_XFLAG_EXTSZINHERIT;
818 if (di_flags & XFS_DIFLAG_NODEFRAG) 819 if (di_flags & XFS_DIFLAG_NODEFRAG)
819 flags |= XFS_XFLAG_NODEFRAG; 820 flags |= XFS_XFLAG_NODEFRAG;
821 if (di_flags & XFS_DIFLAG_FILESTREAM)
822 flags |= XFS_XFLAG_FILESTREAM;
820 } 823 }
821 824
822 return flags; 825 return flags;
@@ -1074,6 +1077,11 @@ xfs_iread_extents(
1074 * also returns the [locked] bp pointing to the head of the freelist 1077 * also returns the [locked] bp pointing to the head of the freelist
1075 * as ialloc_context. The caller should hold this buffer across 1078 * as ialloc_context. The caller should hold this buffer across
1076 * the commit and pass it back into this routine on the second call. 1079 * the commit and pass it back into this routine on the second call.
1080 *
1081 * If we are allocating quota inodes, we do not have a parent inode
1082 * to attach to or associate with (i.e. pip == NULL) because they
1083 * are not linked into the directory structure - they are attached
1084 * directly to the superblock - and so have no parent.
1077 */ 1085 */
1078int 1086int
1079xfs_ialloc( 1087xfs_ialloc(
@@ -1099,7 +1107,7 @@ xfs_ialloc(
1099 * Call the space management code to pick 1107 * Call the space management code to pick
1100 * the on-disk inode to be allocated. 1108 * the on-disk inode to be allocated.
1101 */ 1109 */
1102 error = xfs_dialloc(tp, pip->i_ino, mode, okalloc, 1110 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
1103 ialloc_context, call_again, &ino); 1111 ialloc_context, call_again, &ino);
1104 if (error != 0) { 1112 if (error != 0) {
1105 return error; 1113 return error;
@@ -1150,10 +1158,10 @@ xfs_ialloc(
1150 /* 1158 /*
1151 * Project ids won't be stored on disk if we are using a version 1 inode. 1159 * Project ids won't be stored on disk if we are using a version 1 inode.
1152 */ 1160 */
1153 if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) 1161 if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
1154 xfs_bump_ino_vers2(tp, ip); 1162 xfs_bump_ino_vers2(tp, ip);
1155 1163
1156 if (XFS_INHERIT_GID(pip, vp->v_vfsp)) { 1164 if (pip && XFS_INHERIT_GID(pip, vp->v_vfsp)) {
1157 ip->i_d.di_gid = pip->i_d.di_gid; 1165 ip->i_d.di_gid = pip->i_d.di_gid;
1158 if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { 1166 if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
1159 ip->i_d.di_mode |= S_ISGID; 1167 ip->i_d.di_mode |= S_ISGID;
@@ -1195,8 +1203,16 @@ xfs_ialloc(
1195 flags |= XFS_ILOG_DEV; 1203 flags |= XFS_ILOG_DEV;
1196 break; 1204 break;
1197 case S_IFREG: 1205 case S_IFREG:
1206 if (pip && xfs_inode_is_filestream(pip)) {
1207 error = xfs_filestream_associate(pip, ip);
1208 if (error < 0)
1209 return -error;
1210 if (!error)
1211 xfs_iflags_set(ip, XFS_IFILESTREAM);
1212 }
1213 /* fall through */
1198 case S_IFDIR: 1214 case S_IFDIR:
1199 if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1215 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
1200 uint di_flags = 0; 1216 uint di_flags = 0;
1201 1217
1202 if ((mode & S_IFMT) == S_IFDIR) { 1218 if ((mode & S_IFMT) == S_IFDIR) {
@@ -1233,6 +1249,8 @@ xfs_ialloc(
1233 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 1249 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
1234 xfs_inherit_nodefrag) 1250 xfs_inherit_nodefrag)
1235 di_flags |= XFS_DIFLAG_NODEFRAG; 1251 di_flags |= XFS_DIFLAG_NODEFRAG;
1252 if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
1253 di_flags |= XFS_DIFLAG_FILESTREAM;
1236 ip->i_d.di_flags |= di_flags; 1254 ip->i_d.di_flags |= di_flags;
1237 } 1255 }
1238 /* FALLTHROUGH */ 1256 /* FALLTHROUGH */
@@ -2875,9 +2893,6 @@ xfs_iextents_copy(
2875 int copied; 2893 int copied;
2876 xfs_bmbt_rec_t *dest_ep; 2894 xfs_bmbt_rec_t *dest_ep;
2877 xfs_bmbt_rec_t *ep; 2895 xfs_bmbt_rec_t *ep;
2878#ifdef XFS_BMAP_TRACE
2879 static char fname[] = "xfs_iextents_copy";
2880#endif
2881 int i; 2896 int i;
2882 xfs_ifork_t *ifp; 2897 xfs_ifork_t *ifp;
2883 int nrecs; 2898 int nrecs;
@@ -2888,7 +2903,7 @@ xfs_iextents_copy(
2888 ASSERT(ifp->if_bytes > 0); 2903 ASSERT(ifp->if_bytes > 0);
2889 2904
2890 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 2905 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
2891 xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork); 2906 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
2892 ASSERT(nrecs > 0); 2907 ASSERT(nrecs > 0);
2893 2908
2894 /* 2909 /*
@@ -4184,7 +4199,7 @@ xfs_iext_realloc_direct(
4184 ifp->if_bytes = new_size; 4199 ifp->if_bytes = new_size;
4185 return; 4200 return;
4186 } 4201 }
4187 if ((new_size & (new_size - 1)) != 0) { 4202 if (!is_power_of_2(new_size)){
4188 rnew_size = xfs_iroundup(new_size); 4203 rnew_size = xfs_iroundup(new_size);
4189 } 4204 }
4190 if (rnew_size != ifp->if_real_bytes) { 4205 if (rnew_size != ifp->if_real_bytes) {
@@ -4207,7 +4222,7 @@ xfs_iext_realloc_direct(
4207 */ 4222 */
4208 else { 4223 else {
4209 new_size += ifp->if_bytes; 4224 new_size += ifp->if_bytes;
4210 if ((new_size & (new_size - 1)) != 0) { 4225 if (!is_power_of_2(new_size)) {
4211 rnew_size = xfs_iroundup(new_size); 4226 rnew_size = xfs_iroundup(new_size);
4212 } 4227 }
4213 xfs_iext_inline_to_direct(ifp, rnew_size); 4228 xfs_iext_inline_to_direct(ifp, rnew_size);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f75afecef8e..012dfd4a958 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -379,6 +379,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
379#define XFS_ISTALE 0x0010 /* inode has been staled */ 379#define XFS_ISTALE 0x0010 /* inode has been staled */
380#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ 380#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */
381#define XFS_INEW 0x0040 381#define XFS_INEW 0x0040
382#define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */
382 383
383/* 384/*
384 * Flags for inode locking. 385 * Flags for inode locking.
@@ -414,19 +415,22 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
414 * gets a lockdep subclass of 1 and the second lock will have a lockdep 415 * gets a lockdep subclass of 1 and the second lock will have a lockdep
415 * subclass of 0. 416 * subclass of 0.
416 * 417 *
417 * XFS_I[O]LOCK_INUMORDER - for locking several inodes at the some time 418 * XFS_LOCK_INUMORDER - for locking several inodes at the some time
418 * with xfs_lock_inodes(). This flag is used as the starting subclass 419 * with xfs_lock_inodes(). This flag is used as the starting subclass
419 * and each subsequent lock acquired will increment the subclass by one. 420 * and each subsequent lock acquired will increment the subclass by one.
420 * So the first lock acquired will have a lockdep subclass of 2, the 421 * So the first lock acquired will have a lockdep subclass of 2, the
421 * second lock will have a lockdep subclass of 3, and so on. 422 * second lock will have a lockdep subclass of 3, and so on. It is
423 * the responsibility of the class builder to shift this to the correct
424 * portion of the lock_mode lockdep mask.
422 */ 425 */
426#define XFS_LOCK_PARENT 1
427#define XFS_LOCK_INUMORDER 2
428
423#define XFS_IOLOCK_SHIFT 16 429#define XFS_IOLOCK_SHIFT 16
424#define XFS_IOLOCK_PARENT (1 << XFS_IOLOCK_SHIFT) 430#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
425#define XFS_IOLOCK_INUMORDER (2 << XFS_IOLOCK_SHIFT)
426 431
427#define XFS_ILOCK_SHIFT 24 432#define XFS_ILOCK_SHIFT 24
428#define XFS_ILOCK_PARENT (1 << XFS_ILOCK_SHIFT) 433#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
429#define XFS_ILOCK_INUMORDER (2 << XFS_ILOCK_SHIFT)
430 434
431#define XFS_IOLOCK_DEP_MASK 0x00ff0000 435#define XFS_IOLOCK_DEP_MASK 0x00ff0000
432#define XFS_ILOCK_DEP_MASK 0xff000000 436#define XFS_ILOCK_DEP_MASK 0xff000000
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3f2b9f2a7b9..bf57b75acb9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -451,19 +451,14 @@ xfs_iomap_write_direct(
451 return XFS_ERROR(error); 451 return XFS_ERROR(error);
452 452
453 rt = XFS_IS_REALTIME_INODE(ip); 453 rt = XFS_IS_REALTIME_INODE(ip);
454 if (unlikely(rt)) { 454 extsz = xfs_get_extsz_hint(ip);
455 if (!(extsz = ip->i_d.di_extsize))
456 extsz = mp->m_sb.sb_rextsize;
457 } else {
458 extsz = ip->i_d.di_extsize;
459 }
460 455
461 isize = ip->i_size; 456 isize = ip->i_size;
462 if (io->io_new_size > isize) 457 if (io->io_new_size > isize)
463 isize = io->io_new_size; 458 isize = io->io_new_size;
464 459
465 offset_fsb = XFS_B_TO_FSBT(mp, offset); 460 offset_fsb = XFS_B_TO_FSBT(mp, offset);
466 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 461 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
467 if ((offset + count) > isize) { 462 if ((offset + count) > isize) {
468 error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz, 463 error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
469 &last_fsb); 464 &last_fsb);
@@ -489,13 +484,13 @@ xfs_iomap_write_direct(
489 if (unlikely(rt)) { 484 if (unlikely(rt)) {
490 resrtextents = qblocks = resaligned; 485 resrtextents = qblocks = resaligned;
491 resrtextents /= mp->m_sb.sb_rextsize; 486 resrtextents /= mp->m_sb.sb_rextsize;
492 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 487 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
493 quota_flag = XFS_QMOPT_RES_RTBLKS; 488 quota_flag = XFS_QMOPT_RES_RTBLKS;
494 } else { 489 } else {
495 resrtextents = 0; 490 resrtextents = 0;
496 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 491 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
497 quota_flag = XFS_QMOPT_RES_REGBLKS; 492 quota_flag = XFS_QMOPT_RES_REGBLKS;
498 } 493 }
499 494
500 /* 495 /*
501 * Allocate and setup the transaction 496 * Allocate and setup the transaction
@@ -666,13 +661,7 @@ xfs_iomap_write_delay(
666 if (error) 661 if (error)
667 return XFS_ERROR(error); 662 return XFS_ERROR(error);
668 663
669 if (XFS_IS_REALTIME_INODE(ip)) { 664 extsz = xfs_get_extsz_hint(ip);
670 if (!(extsz = ip->i_d.di_extsize))
671 extsz = mp->m_sb.sb_rextsize;
672 } else {
673 extsz = ip->i_d.di_extsize;
674 }
675
676 offset_fsb = XFS_B_TO_FSBT(mp, offset); 665 offset_fsb = XFS_B_TO_FSBT(mp, offset);
677 666
678retry: 667retry:
@@ -788,18 +777,12 @@ xfs_iomap_write_allocate(
788 nimaps = 0; 777 nimaps = 0;
789 while (nimaps == 0) { 778 while (nimaps == 0) {
790 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 779 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
780 tp->t_flags |= XFS_TRANS_RESERVE;
791 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); 781 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
792 error = xfs_trans_reserve(tp, nres, 782 error = xfs_trans_reserve(tp, nres,
793 XFS_WRITE_LOG_RES(mp), 783 XFS_WRITE_LOG_RES(mp),
794 0, XFS_TRANS_PERM_LOG_RES, 784 0, XFS_TRANS_PERM_LOG_RES,
795 XFS_WRITE_LOG_COUNT); 785 XFS_WRITE_LOG_COUNT);
796 if (error == ENOSPC) {
797 error = xfs_trans_reserve(tp, 0,
798 XFS_WRITE_LOG_RES(mp),
799 0,
800 XFS_TRANS_PERM_LOG_RES,
801 XFS_WRITE_LOG_COUNT);
802 }
803 if (error) { 786 if (error) {
804 xfs_trans_cancel(tp, 0); 787 xfs_trans_cancel(tp, 0);
805 return XFS_ERROR(error); 788 return XFS_ERROR(error);
@@ -917,8 +900,8 @@ xfs_iomap_write_unwritten(
917 * from unwritten to real. Do allocations in a loop until 900 * from unwritten to real. Do allocations in a loop until
918 * we have covered the range passed in. 901 * we have covered the range passed in.
919 */ 902 */
920
921 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 903 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
904 tp->t_flags |= XFS_TRANS_RESERVE;
922 error = xfs_trans_reserve(tp, resblks, 905 error = xfs_trans_reserve(tp, resblks,
923 XFS_WRITE_LOG_RES(mp), 0, 906 XFS_WRITE_LOG_RES(mp), 0,
924 XFS_TRANS_PERM_LOG_RES, 907 XFS_TRANS_PERM_LOG_RES,
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index e725ddd3de5..4c2454bcc71 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -202,6 +202,16 @@ xfs_bulkstat_one_dinode(
202 return 0; 202 return 0;
203} 203}
204 204
205STATIC int
206xfs_bulkstat_one_fmt(
207 void __user *ubuffer,
208 const xfs_bstat_t *buffer)
209{
210 if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
211 return -EFAULT;
212 return sizeof(*buffer);
213}
214
205/* 215/*
206 * Return stat information for one inode. 216 * Return stat information for one inode.
207 * Return 0 if ok, else errno. 217 * Return 0 if ok, else errno.
@@ -221,6 +231,7 @@ xfs_bulkstat_one(
221 xfs_bstat_t *buf; /* return buffer */ 231 xfs_bstat_t *buf; /* return buffer */
222 int error = 0; /* error value */ 232 int error = 0; /* error value */
223 xfs_dinode_t *dip; /* dinode inode pointer */ 233 xfs_dinode_t *dip; /* dinode inode pointer */
234 bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt;
224 235
225 dip = (xfs_dinode_t *)dibuff; 236 dip = (xfs_dinode_t *)dibuff;
226 *stat = BULKSTAT_RV_NOTHING; 237 *stat = BULKSTAT_RV_NOTHING;
@@ -243,14 +254,15 @@ xfs_bulkstat_one(
243 xfs_bulkstat_one_dinode(mp, ino, dip, buf); 254 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
244 } 255 }
245 256
246 if (copy_to_user(buffer, buf, sizeof(*buf))) { 257 error = formatter(buffer, buf);
258 if (error < 0) {
247 error = EFAULT; 259 error = EFAULT;
248 goto out_free; 260 goto out_free;
249 } 261 }
250 262
251 *stat = BULKSTAT_RV_DIDONE; 263 *stat = BULKSTAT_RV_DIDONE;
252 if (ubused) 264 if (ubused)
253 *ubused = sizeof(*buf); 265 *ubused = error;
254 266
255 out_free: 267 out_free:
256 kmem_free(buf, sizeof(*buf)); 268 kmem_free(buf, sizeof(*buf));
@@ -748,6 +760,19 @@ xfs_bulkstat_single(
748 return 0; 760 return 0;
749} 761}
750 762
763int
764xfs_inumbers_fmt(
765 void __user *ubuffer, /* buffer to write to */
766 const xfs_inogrp_t *buffer, /* buffer to read from */
767 long count, /* # of elements to read */
768 long *written) /* # of bytes written */
769{
770 if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
771 return -EFAULT;
772 *written = count * sizeof(*buffer);
773 return 0;
774}
775
751/* 776/*
752 * Return inode number table for the filesystem. 777 * Return inode number table for the filesystem.
753 */ 778 */
@@ -756,7 +781,8 @@ xfs_inumbers(
756 xfs_mount_t *mp, /* mount point for filesystem */ 781 xfs_mount_t *mp, /* mount point for filesystem */
757 xfs_ino_t *lastino, /* last inode returned */ 782 xfs_ino_t *lastino, /* last inode returned */
758 int *count, /* size of buffer/count returned */ 783 int *count, /* size of buffer/count returned */
759 xfs_inogrp_t __user *ubuffer)/* buffer with inode descriptions */ 784 void __user *ubuffer,/* buffer with inode descriptions */
785 inumbers_fmt_pf formatter)
760{ 786{
761 xfs_buf_t *agbp; 787 xfs_buf_t *agbp;
762 xfs_agino_t agino; 788 xfs_agino_t agino;
@@ -835,12 +861,12 @@ xfs_inumbers(
835 bufidx++; 861 bufidx++;
836 left--; 862 left--;
837 if (bufidx == bcount) { 863 if (bufidx == bcount) {
838 if (copy_to_user(ubuffer, buffer, 864 long written;
839 bufidx * sizeof(*buffer))) { 865 if (formatter(ubuffer, buffer, bufidx, &written)) {
840 error = XFS_ERROR(EFAULT); 866 error = XFS_ERROR(EFAULT);
841 break; 867 break;
842 } 868 }
843 ubuffer += bufidx; 869 ubuffer += written;
844 *count += bufidx; 870 *count += bufidx;
845 bufidx = 0; 871 bufidx = 0;
846 } 872 }
@@ -862,8 +888,8 @@ xfs_inumbers(
862 } 888 }
863 if (!error) { 889 if (!error) {
864 if (bufidx) { 890 if (bufidx) {
865 if (copy_to_user(ubuffer, buffer, 891 long written;
866 bufidx * sizeof(*buffer))) 892 if (formatter(ubuffer, buffer, bufidx, &written))
867 error = XFS_ERROR(EFAULT); 893 error = XFS_ERROR(EFAULT);
868 else 894 else
869 *count += bufidx; 895 *count += bufidx;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index f25a28862a1..a1f18fce9b7 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -69,6 +69,10 @@ xfs_bulkstat_single(
69 char __user *buffer, 69 char __user *buffer,
70 int *done); 70 int *done);
71 71
72typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
73 void __user *ubuffer, /* buffer to write to */
74 const xfs_bstat_t *buffer); /* buffer to read from */
75
72int 76int
73xfs_bulkstat_one( 77xfs_bulkstat_one(
74 xfs_mount_t *mp, 78 xfs_mount_t *mp,
@@ -86,11 +90,25 @@ xfs_internal_inum(
86 xfs_mount_t *mp, 90 xfs_mount_t *mp,
87 xfs_ino_t ino); 91 xfs_ino_t ino);
88 92
93typedef int (*inumbers_fmt_pf)(
94 void __user *ubuffer, /* buffer to write to */
95 const xfs_inogrp_t *buffer, /* buffer to read from */
96 long count, /* # of elements to read */
97 long *written); /* # of bytes written */
98
99int
100xfs_inumbers_fmt(
101 void __user *ubuffer, /* buffer to write to */
102 const xfs_inogrp_t *buffer, /* buffer to read from */
103 long count, /* # of elements to read */
104 long *written); /* # of bytes written */
105
89int /* error status */ 106int /* error status */
90xfs_inumbers( 107xfs_inumbers(
91 xfs_mount_t *mp, /* mount point for filesystem */ 108 xfs_mount_t *mp, /* mount point for filesystem */
92 xfs_ino_t *last, /* last inode returned */ 109 xfs_ino_t *last, /* last inode returned */
93 int *count, /* size of buffer/count returned */ 110 int *count, /* size of buffer/count returned */
94 xfs_inogrp_t __user *buffer);/* buffer with inode info */ 111 void __user *buffer, /* buffer with inode info */
112 inumbers_fmt_pf formatter);
95 113
96#endif /* __XFS_ITABLE_H__ */ 114#endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c48bf61f17b..9d4c4fbeb3e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -817,10 +817,8 @@ xfs_log_need_covered(xfs_mount_t *mp)
817 SPLDECL(s); 817 SPLDECL(s);
818 int needed = 0, gen; 818 int needed = 0, gen;
819 xlog_t *log = mp->m_log; 819 xlog_t *log = mp->m_log;
820 bhv_vfs_t *vfsp = XFS_MTOVFS(mp);
821 820
822 if (vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) || 821 if (!xfs_fs_writable(mp))
823 (vfsp->vfs_flag & VFS_RDONLY))
824 return 0; 822 return 0;
825 823
826 s = LOG_LOCK(log); 824 s = LOG_LOCK(log);
@@ -967,14 +965,16 @@ xlog_iodone(xfs_buf_t *bp)
967 } else if (iclog->ic_state & XLOG_STATE_IOERROR) { 965 } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
968 aborted = XFS_LI_ABORTED; 966 aborted = XFS_LI_ABORTED;
969 } 967 }
968
969 /* log I/O is always issued ASYNC */
970 ASSERT(XFS_BUF_ISASYNC(bp));
970 xlog_state_done_syncing(iclog, aborted); 971 xlog_state_done_syncing(iclog, aborted);
971 if (!(XFS_BUF_ISASYNC(bp))) { 972 /*
972 /* 973 * do not reference the buffer (bp) here as we could race
973 * Corresponding psema() will be done in bwrite(). If we don't 974 * with it being freed after writing the unmount record to the
974 * vsema() here, panic. 975 * log.
975 */ 976 */
976 XFS_BUF_V_IODONESEMA(bp); 977
977 }
978} /* xlog_iodone */ 978} /* xlog_iodone */
979 979
980/* 980/*
@@ -1199,11 +1199,18 @@ xlog_alloc_log(xfs_mount_t *mp,
1199 *iclogp = (xlog_in_core_t *) 1199 *iclogp = (xlog_in_core_t *)
1200 kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); 1200 kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
1201 iclog = *iclogp; 1201 iclog = *iclogp;
1202 iclog->hic_data = (xlog_in_core_2_t *)
1203 kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
1204
1205 iclog->ic_prev = prev_iclog; 1202 iclog->ic_prev = prev_iclog;
1206 prev_iclog = iclog; 1203 prev_iclog = iclog;
1204
1205 bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
1206 if (!XFS_BUF_CPSEMA(bp))
1207 ASSERT(0);
1208 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1209 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1210 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1211 iclog->ic_bp = bp;
1212 iclog->hic_data = bp->b_addr;
1213
1207 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); 1214 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
1208 1215
1209 head = &iclog->ic_header; 1216 head = &iclog->ic_header;
@@ -1216,11 +1223,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1216 INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT); 1223 INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
1217 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 1224 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
1218 1225
1219 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
1220 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1221 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1222 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1223 iclog->ic_bp = bp;
1224 1226
1225 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; 1227 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
1226 iclog->ic_state = XLOG_STATE_ACTIVE; 1228 iclog->ic_state = XLOG_STATE_ACTIVE;
@@ -1432,7 +1434,7 @@ xlog_sync(xlog_t *log,
1432 } else { 1434 } else {
1433 iclog->ic_bwritecnt = 1; 1435 iclog->ic_bwritecnt = 1;
1434 } 1436 }
1435 XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count); 1437 XFS_BUF_SET_COUNT(bp, count);
1436 XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ 1438 XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */
1437 XFS_BUF_ZEROFLAGS(bp); 1439 XFS_BUF_ZEROFLAGS(bp);
1438 XFS_BUF_BUSY(bp); 1440 XFS_BUF_BUSY(bp);
@@ -1528,7 +1530,6 @@ xlog_dealloc_log(xlog_t *log)
1528 } 1530 }
1529#endif 1531#endif
1530 next_iclog = iclog->ic_next; 1532 next_iclog = iclog->ic_next;
1531 kmem_free(iclog->hic_data, log->l_iclog_size);
1532 kmem_free(iclog, sizeof(xlog_in_core_t)); 1533 kmem_free(iclog, sizeof(xlog_in_core_t));
1533 iclog = next_iclog; 1534 iclog = next_iclog;
1534 } 1535 }
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 080fabf61c9..fddbb091a86 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -927,6 +927,14 @@ xlog_find_tail(
927 ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle, 927 ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
928 after_umount_blk); 928 after_umount_blk);
929 *tail_blk = after_umount_blk; 929 *tail_blk = after_umount_blk;
930
931 /*
932 * Note that the unmount was clean. If the unmount
933 * was not clean, we need to know this to rebuild the
934 * superblock counters from the perag headers if we
935 * have a filesystem using non-persistent counters.
936 */
937 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
930 } 938 }
931 } 939 }
932 940
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a96bde6df96..a66b3980517 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -202,6 +202,27 @@ xfs_mount_free(
202 kmem_free(mp, sizeof(xfs_mount_t)); 202 kmem_free(mp, sizeof(xfs_mount_t));
203} 203}
204 204
205/*
206 * Check size of device based on the (data/realtime) block count.
207 * Note: this check is used by the growfs code as well as mount.
208 */
209int
210xfs_sb_validate_fsb_count(
211 xfs_sb_t *sbp,
212 __uint64_t nblocks)
213{
214 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
215 ASSERT(sbp->sb_blocklog >= BBSHIFT);
216
217#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
218 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
219 return E2BIG;
220#else /* Limited by UINT_MAX of sectors */
221 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
222 return E2BIG;
223#endif
224 return 0;
225}
205 226
206/* 227/*
207 * Check the validity of the SB found. 228 * Check the validity of the SB found.
@@ -284,18 +305,8 @@ xfs_mount_validate_sb(
284 return XFS_ERROR(EFSCORRUPTED); 305 return XFS_ERROR(EFSCORRUPTED);
285 } 306 }
286 307
287 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); 308 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
288 ASSERT(sbp->sb_blocklog >= BBSHIFT); 309 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
289
290#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
291 if (unlikely(
292 (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX ||
293 (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
294#else /* Limited by UINT_MAX of sectors */
295 if (unlikely(
296 (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX ||
297 (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
298#endif
299 xfs_fs_mount_cmn_err(flags, 310 xfs_fs_mount_cmn_err(flags,
300 "file system too large to be mounted on this system."); 311 "file system too large to be mounted on this system.");
301 return XFS_ERROR(E2BIG); 312 return XFS_ERROR(E2BIG);
@@ -632,6 +643,64 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
632 sbp->sb_inopblock); 643 sbp->sb_inopblock);
633 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 644 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
634} 645}
646
647/*
648 * xfs_initialize_perag_data
649 *
650 * Read in each per-ag structure so we can count up the number of
651 * allocated inodes, free inodes and used filesystem blocks as this
652 * information is no longer persistent in the superblock. Once we have
653 * this information, write it into the in-core superblock structure.
654 */
655STATIC int
656xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
657{
658 xfs_agnumber_t index;
659 xfs_perag_t *pag;
660 xfs_sb_t *sbp = &mp->m_sb;
661 uint64_t ifree = 0;
662 uint64_t ialloc = 0;
663 uint64_t bfree = 0;
664 uint64_t bfreelst = 0;
665 uint64_t btree = 0;
666 int error;
667 int s;
668
669 for (index = 0; index < agcount; index++) {
670 /*
671 * read the agf, then the agi. This gets us
672 * all the inforamtion we need and populates the
673 * per-ag structures for us.
674 */
675 error = xfs_alloc_pagf_init(mp, NULL, index, 0);
676 if (error)
677 return error;
678
679 error = xfs_ialloc_pagi_init(mp, NULL, index);
680 if (error)
681 return error;
682 pag = &mp->m_perag[index];
683 ifree += pag->pagi_freecount;
684 ialloc += pag->pagi_count;
685 bfree += pag->pagf_freeblks;
686 bfreelst += pag->pagf_flcount;
687 btree += pag->pagf_btreeblks;
688 }
689 /*
690 * Overwrite incore superblock counters with just-read data
691 */
692 s = XFS_SB_LOCK(mp);
693 sbp->sb_ifree = ifree;
694 sbp->sb_icount = ialloc;
695 sbp->sb_fdblocks = bfree + bfreelst + btree;
696 XFS_SB_UNLOCK(mp, s);
697
698 /* Fixup the per-cpu counters as well. */
699 xfs_icsb_reinit_counters(mp);
700
701 return 0;
702}
703
635/* 704/*
636 * xfs_mountfs 705 * xfs_mountfs
637 * 706 *
@@ -656,7 +725,7 @@ xfs_mountfs(
656 bhv_vnode_t *rvp = NULL; 725 bhv_vnode_t *rvp = NULL;
657 int readio_log, writeio_log; 726 int readio_log, writeio_log;
658 xfs_daddr_t d; 727 xfs_daddr_t d;
659 __uint64_t ret64; 728 __uint64_t resblks;
660 __int64_t update_flags; 729 __int64_t update_flags;
661 uint quotamount, quotaflags; 730 uint quotamount, quotaflags;
662 int agno; 731 int agno;
@@ -773,6 +842,7 @@ xfs_mountfs(
773 */ 842 */
774 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 843 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
775 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { 844 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
845 __uint64_t ret64;
776 if (xfs_uuid_mount(mp)) { 846 if (xfs_uuid_mount(mp)) {
777 error = XFS_ERROR(EINVAL); 847 error = XFS_ERROR(EINVAL);
778 goto error1; 848 goto error1;
@@ -976,6 +1046,34 @@ xfs_mountfs(
976 } 1046 }
977 1047
978 /* 1048 /*
1049 * Now the log is mounted, we know if it was an unclean shutdown or
1050 * not. If it was, with the first phase of recovery has completed, we
1051 * have consistent AG blocks on disk. We have not recovered EFIs yet,
1052 * but they are recovered transactionally in the second recovery phase
1053 * later.
1054 *
1055 * Hence we can safely re-initialise incore superblock counters from
1056 * the per-ag data. These may not be correct if the filesystem was not
1057 * cleanly unmounted, so we need to wait for recovery to finish before
1058 * doing this.
1059 *
1060 * If the filesystem was cleanly unmounted, then we can trust the
1061 * values in the superblock to be correct and we don't need to do
1062 * anything here.
1063 *
1064 * If we are currently making the filesystem, the initialisation will
1065 * fail as the perag data is in an undefined state.
1066 */
1067
1068 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
1069 !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
1070 !mp->m_sb.sb_inprogress) {
1071 error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
1072 if (error) {
1073 goto error2;
1074 }
1075 }
1076 /*
979 * Get and sanity-check the root inode. 1077 * Get and sanity-check the root inode.
980 * Save the pointer to it in the mount structure. 1078 * Save the pointer to it in the mount structure.
981 */ 1079 */
@@ -1044,6 +1142,23 @@ xfs_mountfs(
1044 if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) 1142 if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
1045 goto error4; 1143 goto error4;
1046 1144
1145 /*
1146 * Now we are mounted, reserve a small amount of unused space for
1147 * privileged transactions. This is needed so that transaction
1148 * space required for critical operations can dip into this pool
1149 * when at ENOSPC. This is needed for operations like create with
1150 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
1151 * are not allowed to use this reserved space.
1152 *
1153 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1154 * This may drive us straight to ENOSPC on mount, but that implies
1155 * we were already there on the last unmount.
1156 */
1157 resblks = mp->m_sb.sb_dblocks;
1158 do_div(resblks, 20);
1159 resblks = min_t(__uint64_t, resblks, 1024);
1160 xfs_reserve_blocks(mp, &resblks, NULL);
1161
1047 return 0; 1162 return 0;
1048 1163
1049 error4: 1164 error4:
@@ -1083,7 +1198,19 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1083#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1198#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
1084 int64_t fsid; 1199 int64_t fsid;
1085#endif 1200#endif
1201 __uint64_t resblks;
1086 1202
1203 /*
1204 * We can potentially deadlock here if we have an inode cluster
1205 * that has been freed has it's buffer still pinned in memory because
1206 * the transaction is still sitting in a iclog. The stale inodes
1207 * on that buffer will have their flush locks held until the
1208 * transaction hits the disk and the callbacks run. the inode
1209 * flush takes the flush lock unconditionally and with nothing to
1210 * push out the iclog we will never get that unlocked. hence we
1211 * need to force the log first.
1212 */
1213 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
1087 xfs_iflush_all(mp); 1214 xfs_iflush_all(mp);
1088 1215
1089 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1216 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
@@ -1100,10 +1227,26 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1100 xfs_binval(mp->m_rtdev_targp); 1227 xfs_binval(mp->m_rtdev_targp);
1101 } 1228 }
1102 1229
1103 xfs_unmountfs_writesb(mp); 1230 /*
1231 * Unreserve any blocks we have so that when we unmount we don't account
1232 * the reserved free space as used. This is really only necessary for
1233 * lazy superblock counting because it trusts the incore superblock
1234 * counters to be aboslutely correct on clean unmount.
1235 *
1236 * We don't bother correcting this elsewhere for lazy superblock
1237 * counting because on mount of an unclean filesystem we reconstruct the
1238 * correct counter value and this is irrelevant.
1239 *
1240 * For non-lazy counter filesystems, this doesn't matter at all because
1241 * we only every apply deltas to the superblock and hence the incore
1242 * value does not matter....
1243 */
1244 resblks = 0;
1245 xfs_reserve_blocks(mp, &resblks, NULL);
1104 1246
1247 xfs_log_sbcount(mp, 1);
1248 xfs_unmountfs_writesb(mp);
1105 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1249 xfs_unmountfs_wait(mp); /* wait for async bufs */
1106
1107 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1250 xfs_log_unmount(mp); /* Done! No more fs ops. */
1108 1251
1109 xfs_freesb(mp); 1252 xfs_freesb(mp);
@@ -1150,6 +1293,62 @@ xfs_unmountfs_wait(xfs_mount_t *mp)
1150} 1293}
1151 1294
1152int 1295int
1296xfs_fs_writable(xfs_mount_t *mp)
1297{
1298 bhv_vfs_t *vfsp = XFS_MTOVFS(mp);
1299
1300 return !(vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
1301 (vfsp->vfs_flag & VFS_RDONLY));
1302}
1303
1304/*
1305 * xfs_log_sbcount
1306 *
1307 * Called either periodically to keep the on disk superblock values
1308 * roughly up to date or from unmount to make sure the values are
1309 * correct on a clean unmount.
1310 *
1311 * Note this code can be called during the process of freezing, so
1312 * we may need to use the transaction allocator which does not not
1313 * block when the transaction subsystem is in its frozen state.
1314 */
1315int
1316xfs_log_sbcount(
1317 xfs_mount_t *mp,
1318 uint sync)
1319{
1320 xfs_trans_t *tp;
1321 int error;
1322
1323 if (!xfs_fs_writable(mp))
1324 return 0;
1325
1326 xfs_icsb_sync_counters(mp);
1327
1328 /*
1329 * we don't need to do this if we are updating the superblock
1330 * counters on every modification.
1331 */
1332 if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1333 return 0;
1334
1335 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
1336 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
1337 XFS_DEFAULT_LOG_COUNT);
1338 if (error) {
1339 xfs_trans_cancel(tp, 0);
1340 return error;
1341 }
1342
1343 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
1344 if (sync)
1345 xfs_trans_set_sync(tp);
1346 xfs_trans_commit(tp, 0);
1347
1348 return 0;
1349}
1350
1351int
1153xfs_unmountfs_writesb(xfs_mount_t *mp) 1352xfs_unmountfs_writesb(xfs_mount_t *mp)
1154{ 1353{
1155 xfs_buf_t *sbp; 1354 xfs_buf_t *sbp;
@@ -1160,16 +1359,15 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1160 * skip superblock write if fs is read-only, or 1359 * skip superblock write if fs is read-only, or
1161 * if we are doing a forced umount. 1360 * if we are doing a forced umount.
1162 */ 1361 */
1163 sbp = xfs_getsb(mp, 0);
1164 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || 1362 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
1165 XFS_FORCED_SHUTDOWN(mp))) { 1363 XFS_FORCED_SHUTDOWN(mp))) {
1166 1364
1167 xfs_icsb_sync_counters(mp); 1365 sbp = xfs_getsb(mp, 0);
1366 sb = XFS_BUF_TO_SBP(sbp);
1168 1367
1169 /* 1368 /*
1170 * mark shared-readonly if desired 1369 * mark shared-readonly if desired
1171 */ 1370 */
1172 sb = XFS_BUF_TO_SBP(sbp);
1173 if (mp->m_mk_sharedro) { 1371 if (mp->m_mk_sharedro) {
1174 if (!(sb->sb_flags & XFS_SBF_READONLY)) 1372 if (!(sb->sb_flags & XFS_SBF_READONLY))
1175 sb->sb_flags |= XFS_SBF_READONLY; 1373 sb->sb_flags |= XFS_SBF_READONLY;
@@ -1178,6 +1376,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1178 xfs_fs_cmn_err(CE_NOTE, mp, 1376 xfs_fs_cmn_err(CE_NOTE, mp,
1179 "Unmounting, marking shared read-only"); 1377 "Unmounting, marking shared read-only");
1180 } 1378 }
1379
1181 XFS_BUF_UNDONE(sbp); 1380 XFS_BUF_UNDONE(sbp);
1182 XFS_BUF_UNREAD(sbp); 1381 XFS_BUF_UNREAD(sbp);
1183 XFS_BUF_UNDELAYWRITE(sbp); 1382 XFS_BUF_UNDELAYWRITE(sbp);
@@ -1192,8 +1391,8 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1192 mp, sbp, XFS_BUF_ADDR(sbp)); 1391 mp, sbp, XFS_BUF_ADDR(sbp));
1193 if (error && mp->m_mk_sharedro) 1392 if (error && mp->m_mk_sharedro)
1194 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); 1393 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
1394 xfs_buf_relse(sbp);
1195 } 1395 }
1196 xfs_buf_relse(sbp);
1197 return error; 1396 return error;
1198} 1397}
1199 1398
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 82304b94646..76ad7475869 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,6 +66,7 @@ struct xfs_bmbt_irec;
66struct xfs_bmap_free; 66struct xfs_bmap_free;
67struct xfs_extdelta; 67struct xfs_extdelta;
68struct xfs_swapext; 68struct xfs_swapext;
69struct xfs_mru_cache;
69 70
70extern struct bhv_vfsops xfs_vfsops; 71extern struct bhv_vfsops xfs_vfsops;
71extern struct bhv_vnodeops xfs_vnodeops; 72extern struct bhv_vnodeops xfs_vnodeops;
@@ -424,17 +425,18 @@ typedef struct xfs_mount {
424 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ 425 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
425 struct mutex m_icsb_mutex; /* balancer sync lock */ 426 struct mutex m_icsb_mutex; /* balancer sync lock */
426#endif 427#endif
428 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
427} xfs_mount_t; 429} xfs_mount_t;
428 430
429/* 431/*
430 * Flags for m_flags. 432 * Flags for m_flags.
431 */ 433 */
432#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops 434#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
433 must be synchronous except 435 must be synchronous except
434 for space allocations */ 436 for space allocations */
435#define XFS_MOUNT_INO64 (1ULL << 1) 437#define XFS_MOUNT_INO64 (1ULL << 1)
436 /* (1ULL << 2) -- currently unused */ 438 /* (1ULL << 2) -- currently unused */
437 /* (1ULL << 3) -- currently unused */ 439#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
438#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 440#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
439 operations, typically for 441 operations, typically for
440 disk errors in metadata */ 442 disk errors in metadata */
@@ -463,6 +465,8 @@ typedef struct xfs_mount {
463 * I/O size in stat() */ 465 * I/O size in stat() */
464#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock 466#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock
465 counters */ 467 counters */
468#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
469 allocator */
466 470
467 471
468/* 472/*
@@ -511,6 +515,8 @@ xfs_preferred_iosize(xfs_mount_t *mp)
511 515
512#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) 516#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset)
513 517
518#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \
519 ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
514#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) 520#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
515#define xfs_force_shutdown(m,f) \ 521#define xfs_force_shutdown(m,f) \
516 bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__) 522 bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__)
@@ -602,6 +608,7 @@ typedef struct xfs_mod_sb {
602 608
603extern xfs_mount_t *xfs_mount_init(void); 609extern xfs_mount_t *xfs_mount_init(void);
604extern void xfs_mod_sb(xfs_trans_t *, __int64_t); 610extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
611extern int xfs_log_sbcount(xfs_mount_t *, uint);
605extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); 612extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
606extern int xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int); 613extern int xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int);
607extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 614extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
@@ -618,12 +625,14 @@ extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
618extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 625extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
619extern int xfs_readsb(xfs_mount_t *, int); 626extern int xfs_readsb(xfs_mount_t *, int);
620extern void xfs_freesb(xfs_mount_t *); 627extern void xfs_freesb(xfs_mount_t *);
628extern int xfs_fs_writable(xfs_mount_t *);
621extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int); 629extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
622extern int xfs_syncsub(xfs_mount_t *, int, int *); 630extern int xfs_syncsub(xfs_mount_t *, int, int *);
623extern int xfs_sync_inodes(xfs_mount_t *, int, int *); 631extern int xfs_sync_inodes(xfs_mount_t *, int, int *);
624extern xfs_agnumber_t xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *, 632extern xfs_agnumber_t xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *,
625 xfs_agnumber_t); 633 xfs_agnumber_t);
626extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t); 634extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t);
635extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
627 636
628extern struct xfs_dmops xfs_dmcore_stub; 637extern struct xfs_dmops xfs_dmcore_stub;
629extern struct xfs_qmops xfs_qmcore_stub; 638extern struct xfs_qmops xfs_qmcore_stub;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
new file mode 100644
index 00000000000..7deb9e3cbbd
--- /dev/null
+++ b/fs/xfs/xfs_mru_cache.c
@@ -0,0 +1,608 @@
1/*
2 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_mru_cache.h"
20
21/*
22 * The MRU Cache data structure consists of a data store, an array of lists and
23 * a lock to protect its internal state. At initialisation time, the client
24 * supplies an element lifetime in milliseconds and a group count, as well as a
25 * function pointer to call when deleting elements. A data structure for
26 * queueing up work in the form of timed callbacks is also included.
27 *
28 * The group count controls how many lists are created, and thereby how finely
29 * the elements are grouped in time. When reaping occurs, all the elements in
30 * all the lists whose time has expired are deleted.
31 *
32 * To give an example of how this works in practice, consider a client that
33 * initialises an MRU Cache with a lifetime of ten seconds and a group count of
34 * five. Five internal lists will be created, each representing a two second
35 * period in time. When the first element is added, time zero for the data
36 * structure is initialised to the current time.
37 *
38 * All the elements added in the first two seconds are appended to the first
39 * list. Elements added in the third second go into the second list, and so on.
40 * If an element is accessed at any point, it is removed from its list and
41 * inserted at the head of the current most-recently-used list.
42 *
43 * The reaper function will have nothing to do until at least twelve seconds
44 * have elapsed since the first element was added. The reason for this is that
45 * if it were called at t=11s, there could be elements in the first list that
46 * have only been inactive for nine seconds, so it still does nothing. If it is
47 * called anywhere between t=12 and t=14 seconds, it will delete all the
48 * elements that remain in the first list. It's therefore possible for elements
49 * to remain in the data store even after they've been inactive for up to
50 * (t + t/g) seconds, where t is the inactive element lifetime and g is the
51 * number of groups.
52 *
53 * The above example assumes that the reaper function gets called at least once
54 * every (t/g) seconds. If it is called less frequently, unused elements will
55 * accumulate in the reap list until the reaper function is eventually called.
56 * The current implementation uses work queue callbacks to carefully time the
57 * reaper function calls, so this should happen rarely, if at all.
58 *
59 * From a design perspective, the primary reason for the choice of a list array
60 * representing discrete time intervals is that it's only practical to reap
61 * expired elements in groups of some appreciable size. This automatically
62 * introduces a granularity to element lifetimes, so there's no point storing an
63 * individual timeout with each element that specifies a more precise reap time.
64 * The bonus is a saving of sizeof(long) bytes of memory per element stored.
65 *
66 * The elements could have been stored in just one list, but an array of
67 * counters or pointers would need to be maintained to allow them to be divided
68 * up into discrete time groups. More critically, the process of touching or
69 * removing an element would involve walking large portions of the entire list,
70 * which would have a detrimental effect on performance. The additional memory
71 * requirement for the array of list heads is minimal.
72 *
73 * When an element is touched or deleted, it needs to be removed from its
74 * current list. Doubly linked lists are used to make the list maintenance
75 * portion of these operations O(1). Since reaper timing can be imprecise,
76 * inserts and lookups can occur when there are no free lists available. When
77 * this happens, all the elements on the LRU list need to be migrated to the end
78 * of the reap list. To keep the list maintenance portion of these operations
79 * O(1) also, list tails need to be accessible without walking the entire list.
80 * This is the reason why doubly linked list heads are used.
81 */
82
83/*
84 * An MRU Cache is a dynamic data structure that stores its elements in a way
85 * that allows efficient lookups, but also groups them into discrete time
86 * intervals based on insertion time. This allows elements to be efficiently
87 * and automatically reaped after a fixed period of inactivity.
88 *
89 * When a client data pointer is stored in the MRU Cache it needs to be added to
90 * both the data store and to one of the lists. It must also be possible to
91 * access each of these entries via the other, i.e. to:
92 *
93 * a) Walk a list, removing the corresponding data store entry for each item.
94 * b) Look up a data store entry, then access its list entry directly.
95 *
96 * To achieve both of these goals, each entry must contain both a list entry and
97 * a key, in addition to the user's data pointer. Note that it's not a good
98 * idea to have the client embed one of these structures at the top of their own
99 * data structure, because inserting the same item more than once would most
100 * likely result in a loop in one of the lists. That's a sure-fire recipe for
101 * an infinite loop in the code.
102 */
103typedef struct xfs_mru_cache_elem
104{
105 struct list_head list_node;
106 unsigned long key;
107 void *value;
108} xfs_mru_cache_elem_t;
109
110static kmem_zone_t *xfs_mru_elem_zone;
111static struct workqueue_struct *xfs_mru_reap_wq;
112
113/*
114 * When inserting, destroying or reaping, it's first necessary to update the
115 * lists relative to a particular time. In the case of destroying, that time
116 * will be well in the future to ensure that all items are moved to the reap
117 * list. In all other cases though, the time will be the current time.
118 *
119 * This function enters a loop, moving the contents of the LRU list to the reap
120 * list again and again until either a) the lists are all empty, or b) time zero
121 * has been advanced sufficiently to be within the immediate element lifetime.
122 *
123 * Case a) above is detected by counting how many groups are migrated and
124 * stopping when they've all been moved. Case b) is detected by monitoring the
125 * time_zero field, which is updated as each group is migrated.
126 *
127 * The return value is the earliest time that more migration could be needed, or
128 * zero if there's no need to schedule more work because the lists are empty.
129 */
130STATIC unsigned long
131_xfs_mru_cache_migrate(
132 xfs_mru_cache_t *mru,
133 unsigned long now)
134{
135 unsigned int grp;
136 unsigned int migrated = 0;
137 struct list_head *lru_list;
138
139 /* Nothing to do if the data store is empty. */
140 if (!mru->time_zero)
141 return 0;
142
143 /* While time zero is older than the time spanned by all the lists. */
144 while (mru->time_zero <= now - mru->grp_count * mru->grp_time) {
145
146 /*
147 * If the LRU list isn't empty, migrate its elements to the tail
148 * of the reap list.
149 */
150 lru_list = mru->lists + mru->lru_grp;
151 if (!list_empty(lru_list))
152 list_splice_init(lru_list, mru->reap_list.prev);
153
154 /*
155 * Advance the LRU group number, freeing the old LRU list to
156 * become the new MRU list; advance time zero accordingly.
157 */
158 mru->lru_grp = (mru->lru_grp + 1) % mru->grp_count;
159 mru->time_zero += mru->grp_time;
160
161 /*
162 * If reaping is so far behind that all the elements on all the
163 * lists have been migrated to the reap list, it's now empty.
164 */
165 if (++migrated == mru->grp_count) {
166 mru->lru_grp = 0;
167 mru->time_zero = 0;
168 return 0;
169 }
170 }
171
172 /* Find the first non-empty list from the LRU end. */
173 for (grp = 0; grp < mru->grp_count; grp++) {
174
175 /* Check the grp'th list from the LRU end. */
176 lru_list = mru->lists + ((mru->lru_grp + grp) % mru->grp_count);
177 if (!list_empty(lru_list))
178 return mru->time_zero +
179 (mru->grp_count + grp) * mru->grp_time;
180 }
181
182 /* All the lists must be empty. */
183 mru->lru_grp = 0;
184 mru->time_zero = 0;
185 return 0;
186}
187
188/*
189 * When inserting or doing a lookup, an element needs to be inserted into the
190 * MRU list. The lists must be migrated first to ensure that they're
191 * up-to-date, otherwise the new element could be given a shorter lifetime in
192 * the cache than it should.
193 */
194STATIC void
195_xfs_mru_cache_list_insert(
196 xfs_mru_cache_t *mru,
197 xfs_mru_cache_elem_t *elem)
198{
199 unsigned int grp = 0;
200 unsigned long now = jiffies;
201
202 /*
203 * If the data store is empty, initialise time zero, leave grp set to
204 * zero and start the work queue timer if necessary. Otherwise, set grp
205 * to the number of group times that have elapsed since time zero.
206 */
207 if (!_xfs_mru_cache_migrate(mru, now)) {
208 mru->time_zero = now;
209 if (!mru->next_reap)
210 mru->next_reap = mru->grp_count * mru->grp_time;
211 } else {
212 grp = (now - mru->time_zero) / mru->grp_time;
213 grp = (mru->lru_grp + grp) % mru->grp_count;
214 }
215
216 /* Insert the element at the tail of the corresponding list. */
217 list_add_tail(&elem->list_node, mru->lists + grp);
218}
219
220/*
221 * When destroying or reaping, all the elements that were migrated to the reap
222 * list need to be deleted. For each element this involves removing it from the
223 * data store, removing it from the reap list, calling the client's free
224 * function and deleting the element from the element zone.
225 */
226STATIC void
227_xfs_mru_cache_clear_reap_list(
228 xfs_mru_cache_t *mru)
229{
230 xfs_mru_cache_elem_t *elem, *next;
231 struct list_head tmp;
232
233 INIT_LIST_HEAD(&tmp);
234 list_for_each_entry_safe(elem, next, &mru->reap_list, list_node) {
235
236 /* Remove the element from the data store. */
237 radix_tree_delete(&mru->store, elem->key);
238
239 /*
240 * remove to temp list so it can be freed without
241 * needing to hold the lock
242 */
243 list_move(&elem->list_node, &tmp);
244 }
245 mutex_spinunlock(&mru->lock, 0);
246
247 list_for_each_entry_safe(elem, next, &tmp, list_node) {
248
249 /* Remove the element from the reap list. */
250 list_del_init(&elem->list_node);
251
252 /* Call the client's free function with the key and value pointer. */
253 mru->free_func(elem->key, elem->value);
254
255 /* Free the element structure. */
256 kmem_zone_free(xfs_mru_elem_zone, elem);
257 }
258
259 mutex_spinlock(&mru->lock);
260}
261
262/*
263 * We fire the reap timer every group expiry interval so
264 * we always have a reaper ready to run. This makes shutdown
265 * and flushing of the reaper easy to do. Hence we need to
266 * keep when the next reap must occur so we can determine
267 * at each interval whether there is anything we need to do.
268 */
269STATIC void
270_xfs_mru_cache_reap(
271 struct work_struct *work)
272{
273 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work);
274 unsigned long now;
275
276 ASSERT(mru && mru->lists);
277 if (!mru || !mru->lists)
278 return;
279
280 mutex_spinlock(&mru->lock);
281 now = jiffies;
282 if (mru->reap_all ||
283 (mru->next_reap && time_after(now, mru->next_reap))) {
284 if (mru->reap_all)
285 now += mru->grp_count * mru->grp_time * 2;
286 mru->next_reap = _xfs_mru_cache_migrate(mru, now);
287 _xfs_mru_cache_clear_reap_list(mru);
288 }
289
290 /*
291 * the process that triggered the reap_all is responsible
292 * for restating the periodic reap if it is required.
293 */
294 if (!mru->reap_all)
295 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
296 mru->reap_all = 0;
297 mutex_spinunlock(&mru->lock, 0);
298}
299
300int
301xfs_mru_cache_init(void)
302{
303 xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
304 "xfs_mru_cache_elem");
305 if (!xfs_mru_elem_zone)
306 return ENOMEM;
307
308 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
309 if (!xfs_mru_reap_wq) {
310 kmem_zone_destroy(xfs_mru_elem_zone);
311 return ENOMEM;
312 }
313
314 return 0;
315}
316
317void
318xfs_mru_cache_uninit(void)
319{
320 destroy_workqueue(xfs_mru_reap_wq);
321 kmem_zone_destroy(xfs_mru_elem_zone);
322}
323
324/*
325 * To initialise a struct xfs_mru_cache pointer, call xfs_mru_cache_create()
326 * with the address of the pointer, a lifetime value in milliseconds, a group
327 * count and a free function to use when deleting elements. This function
328 * returns 0 if the initialisation was successful.
329 */
330int
331xfs_mru_cache_create(
332 xfs_mru_cache_t **mrup,
333 unsigned int lifetime_ms,
334 unsigned int grp_count,
335 xfs_mru_cache_free_func_t free_func)
336{
337 xfs_mru_cache_t *mru = NULL;
338 int err = 0, grp;
339 unsigned int grp_time;
340
341 if (mrup)
342 *mrup = NULL;
343
344 if (!mrup || !grp_count || !lifetime_ms || !free_func)
345 return EINVAL;
346
347 if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count))
348 return EINVAL;
349
350 if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP)))
351 return ENOMEM;
352
353 /* An extra list is needed to avoid reaping up to a grp_time early. */
354 mru->grp_count = grp_count + 1;
355 mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
356
357 if (!mru->lists) {
358 err = ENOMEM;
359 goto exit;
360 }
361
362 for (grp = 0; grp < mru->grp_count; grp++)
363 INIT_LIST_HEAD(mru->lists + grp);
364
365 /*
366 * We use GFP_KERNEL radix tree preload and do inserts under a
367 * spinlock so GFP_ATOMIC is appropriate for the radix tree itself.
368 */
369 INIT_RADIX_TREE(&mru->store, GFP_ATOMIC);
370 INIT_LIST_HEAD(&mru->reap_list);
371 spinlock_init(&mru->lock, "xfs_mru_cache");
372 INIT_DELAYED_WORK(&mru->work, _xfs_mru_cache_reap);
373
374 mru->grp_time = grp_time;
375 mru->free_func = free_func;
376
377 /* start up the reaper event */
378 mru->next_reap = 0;
379 mru->reap_all = 0;
380 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
381
382 *mrup = mru;
383
384exit:
385 if (err && mru && mru->lists)
386 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
387 if (err && mru)
388 kmem_free(mru, sizeof(*mru));
389
390 return err;
391}
392
393/*
394 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
395 * free functions as they're deleted. When this function returns, the caller is
396 * guaranteed that all the free functions for all the elements have finished
397 * executing.
398 *
399 * While we are flushing, we stop the periodic reaper event from triggering.
400 * Normally, we want to restart this periodic event, but if we are shutting
401 * down the cache we do not want it restarted. hence the restart parameter
402 * where 0 = do not restart reaper and 1 = restart reaper.
403 */
404void
405xfs_mru_cache_flush(
406 xfs_mru_cache_t *mru,
407 int restart)
408{
409 if (!mru || !mru->lists)
410 return;
411
412 cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
413
414 mutex_spinlock(&mru->lock);
415 mru->reap_all = 1;
416 mutex_spinunlock(&mru->lock, 0);
417
418 queue_work(xfs_mru_reap_wq, &mru->work.work);
419 flush_workqueue(xfs_mru_reap_wq);
420
421 mutex_spinlock(&mru->lock);
422 WARN_ON_ONCE(mru->reap_all != 0);
423 mru->reap_all = 0;
424 if (restart)
425 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
426 mutex_spinunlock(&mru->lock, 0);
427}
428
429void
430xfs_mru_cache_destroy(
431 xfs_mru_cache_t *mru)
432{
433 if (!mru || !mru->lists)
434 return;
435
436 /* we don't want the reaper to restart here */
437 xfs_mru_cache_flush(mru, 0);
438
439 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
440 kmem_free(mru, sizeof(*mru));
441}
442
443/*
444 * To insert an element, call xfs_mru_cache_insert() with the data store, the
445 * element's key and the client data pointer. This function returns 0 on
446 * success or ENOMEM if memory for the data element couldn't be allocated.
447 */
448int
449xfs_mru_cache_insert(
450 xfs_mru_cache_t *mru,
451 unsigned long key,
452 void *value)
453{
454 xfs_mru_cache_elem_t *elem;
455
456 ASSERT(mru && mru->lists);
457 if (!mru || !mru->lists)
458 return EINVAL;
459
460 elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP);
461 if (!elem)
462 return ENOMEM;
463
464 if (radix_tree_preload(GFP_KERNEL)) {
465 kmem_zone_free(xfs_mru_elem_zone, elem);
466 return ENOMEM;
467 }
468
469 INIT_LIST_HEAD(&elem->list_node);
470 elem->key = key;
471 elem->value = value;
472
473 mutex_spinlock(&mru->lock);
474
475 radix_tree_insert(&mru->store, key, elem);
476 radix_tree_preload_end();
477 _xfs_mru_cache_list_insert(mru, elem);
478
479 mutex_spinunlock(&mru->lock, 0);
480
481 return 0;
482}
483
484/*
485 * To remove an element without calling the free function, call
486 * xfs_mru_cache_remove() with the data store and the element's key. On success
487 * the client data pointer for the removed element is returned, otherwise this
488 * function will return a NULL pointer.
489 */
490void *
491xfs_mru_cache_remove(
492 xfs_mru_cache_t *mru,
493 unsigned long key)
494{
495 xfs_mru_cache_elem_t *elem;
496 void *value = NULL;
497
498 ASSERT(mru && mru->lists);
499 if (!mru || !mru->lists)
500 return NULL;
501
502 mutex_spinlock(&mru->lock);
503 elem = radix_tree_delete(&mru->store, key);
504 if (elem) {
505 value = elem->value;
506 list_del(&elem->list_node);
507 }
508
509 mutex_spinunlock(&mru->lock, 0);
510
511 if (elem)
512 kmem_zone_free(xfs_mru_elem_zone, elem);
513
514 return value;
515}
516
517/*
518 * To remove and element and call the free function, call xfs_mru_cache_delete()
519 * with the data store and the element's key.
520 */
521void
522xfs_mru_cache_delete(
523 xfs_mru_cache_t *mru,
524 unsigned long key)
525{
526 void *value = xfs_mru_cache_remove(mru, key);
527
528 if (value)
529 mru->free_func(key, value);
530}
531
532/*
533 * To look up an element using its key, call xfs_mru_cache_lookup() with the
534 * data store and the element's key. If found, the element will be moved to the
535 * head of the MRU list to indicate that it's been touched.
536 *
537 * The internal data structures are protected by a spinlock that is STILL HELD
538 * when this function returns. Call xfs_mru_cache_done() to release it. Note
539 * that it is not safe to call any function that might sleep in the interim.
540 *
541 * The implementation could have used reference counting to avoid this
542 * restriction, but since most clients simply want to get, set or test a member
543 * of the returned data structure, the extra per-element memory isn't warranted.
544 *
545 * If the element isn't found, this function returns NULL and the spinlock is
546 * released. xfs_mru_cache_done() should NOT be called when this occurs.
547 */
548void *
549xfs_mru_cache_lookup(
550 xfs_mru_cache_t *mru,
551 unsigned long key)
552{
553 xfs_mru_cache_elem_t *elem;
554
555 ASSERT(mru && mru->lists);
556 if (!mru || !mru->lists)
557 return NULL;
558
559 mutex_spinlock(&mru->lock);
560 elem = radix_tree_lookup(&mru->store, key);
561 if (elem) {
562 list_del(&elem->list_node);
563 _xfs_mru_cache_list_insert(mru, elem);
564 }
565 else
566 mutex_spinunlock(&mru->lock, 0);
567
568 return elem ? elem->value : NULL;
569}
570
571/*
572 * To look up an element using its key, but leave its location in the internal
573 * lists alone, call xfs_mru_cache_peek(). If the element isn't found, this
574 * function returns NULL.
575 *
576 * See the comments above the declaration of the xfs_mru_cache_lookup() function
577 * for important locking information pertaining to this call.
578 */
579void *
580xfs_mru_cache_peek(
581 xfs_mru_cache_t *mru,
582 unsigned long key)
583{
584 xfs_mru_cache_elem_t *elem;
585
586 ASSERT(mru && mru->lists);
587 if (!mru || !mru->lists)
588 return NULL;
589
590 mutex_spinlock(&mru->lock);
591 elem = radix_tree_lookup(&mru->store, key);
592 if (!elem)
593 mutex_spinunlock(&mru->lock, 0);
594
595 return elem ? elem->value : NULL;
596}
597
598/*
599 * To release the internal data structure spinlock after having performed an
600 * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done()
601 * with the data store pointer.
602 */
603void
604xfs_mru_cache_done(
605 xfs_mru_cache_t *mru)
606{
607 mutex_spinunlock(&mru->lock, 0);
608}
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
new file mode 100644
index 00000000000..624fd10ee8e
--- /dev/null
+++ b/fs/xfs/xfs_mru_cache.h
@@ -0,0 +1,57 @@
1/*
2 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_MRU_CACHE_H__
19#define __XFS_MRU_CACHE_H__
20
21
22/* Function pointer type for callback to free a client's data pointer. */
23typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*);
24
25typedef struct xfs_mru_cache
26{
27 struct radix_tree_root store; /* Core storage data structure. */
28 struct list_head *lists; /* Array of lists, one per grp. */
29 struct list_head reap_list; /* Elements overdue for reaping. */
30 spinlock_t lock; /* Lock to protect this struct. */
31 unsigned int grp_count; /* Number of discrete groups. */
32 unsigned int grp_time; /* Time period spanned by grps. */
33 unsigned int lru_grp; /* Group containing time zero. */
34 unsigned long time_zero; /* Time first element was added. */
35 unsigned long next_reap; /* Time that the reaper should
36 next do something. */
37 unsigned int reap_all; /* if set, reap all lists */
38 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
39 struct delayed_work work; /* Workqueue data for reaping. */
40} xfs_mru_cache_t;
41
42int xfs_mru_cache_init(void);
43void xfs_mru_cache_uninit(void);
44int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
45 unsigned int grp_count,
46 xfs_mru_cache_free_func_t free_func);
47void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);
48void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
49int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
50 void *value);
51void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
52void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
53void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
54void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key);
55void xfs_mru_cache_done(struct xfs_mru_cache *mru);
56
57#endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index b3a5f07bd07..47082c01872 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1882,11 +1882,13 @@ xfs_growfs_rt(
1882 (nrblocks = in->newblocks) <= sbp->sb_rblocks || 1882 (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
1883 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) 1883 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
1884 return XFS_ERROR(EINVAL); 1884 return XFS_ERROR(EINVAL);
1885 if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
1886 return error;
1885 /* 1887 /*
1886 * Read in the last block of the device, make sure it exists. 1888 * Read in the last block of the device, make sure it exists.
1887 */ 1889 */
1888 error = xfs_read_buf(mp, mp->m_rtdev_targp, 1890 error = xfs_read_buf(mp, mp->m_rtdev_targp,
1889 XFS_FSB_TO_BB(mp, in->newblocks - 1), 1891 XFS_FSB_TO_BB(mp, nrblocks - 1),
1890 XFS_FSB_TO_BB(mp, 1), 0, &bp); 1892 XFS_FSB_TO_BB(mp, 1), 0, &bp);
1891 if (error) 1893 if (error)
1892 return error; 1894 return error;
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index 188b296ff50..fcf28dbded7 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -72,6 +72,34 @@ xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb)
72} 72}
73 73
74/* 74/*
75 * Flags for xfs_free_eofblocks
76 */
77#define XFS_FREE_EOF_LOCK (1<<0)
78#define XFS_FREE_EOF_NOLOCK (1<<1)
79
80
81/*
82 * helper function to extract extent size hint from inode
83 */
84STATIC_INLINE xfs_extlen_t
85xfs_get_extsz_hint(
86 xfs_inode_t *ip)
87{
88 xfs_extlen_t extsz;
89
90 if (unlikely(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
91 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
92 ? ip->i_d.di_extsize
93 : ip->i_mount->m_sb.sb_rextsize;
94 ASSERT(extsz);
95 } else {
96 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
97 ? ip->i_d.di_extsize : 0;
98 }
99 return extsz;
100}
101
102/*
75 * Prototypes for functions in xfs_rw.c. 103 * Prototypes for functions in xfs_rw.c.
76 */ 104 */
77extern int xfs_write_clear_setuid(struct xfs_inode *ip); 105extern int xfs_write_clear_setuid(struct xfs_inode *ip);
@@ -91,10 +119,12 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
91extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock); 119extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
92extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock); 120extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
93extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags, 121extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags,
94 cred_t *credp); 122 cred_t *credp);
95extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf, 123extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf,
96 xfs_off_t offset, cred_t *credp, int flags); 124 xfs_off_t offset, cred_t *credp, int flags);
97extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state, 125extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state,
98 cred_t *credp); 126 cred_t *credp);
127extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
128 int flags);
99 129
100#endif /* __XFS_RW_H__ */ 130#endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 467854b45c8..ef42537a607 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -74,12 +74,13 @@ struct xfs_mount;
74 */ 74 */
75#define XFS_SB_VERSION2_REALFBITS 0x00ffffff /* Mask: features */ 75#define XFS_SB_VERSION2_REALFBITS 0x00ffffff /* Mask: features */
76#define XFS_SB_VERSION2_RESERVED1BIT 0x00000001 76#define XFS_SB_VERSION2_RESERVED1BIT 0x00000001
77#define XFS_SB_VERSION2_RESERVED2BIT 0x00000002 77#define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */
78#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 78#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004
79#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ 79#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */
80 80
81#define XFS_SB_VERSION2_OKREALFBITS \ 81#define XFS_SB_VERSION2_OKREALFBITS \
82 (XFS_SB_VERSION2_ATTR2BIT) 82 (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
83 XFS_SB_VERSION2_ATTR2BIT)
83#define XFS_SB_VERSION2_OKSASHFBITS \ 84#define XFS_SB_VERSION2_OKSASHFBITS \
84 (0) 85 (0)
85#define XFS_SB_VERSION2_OKREALBITS \ 86#define XFS_SB_VERSION2_OKREALBITS \
@@ -181,6 +182,9 @@ typedef enum {
181#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN) 182#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN)
182#define XFS_SB_UNIT XFS_SB_MVAL(UNIT) 183#define XFS_SB_UNIT XFS_SB_MVAL(UNIT)
183#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH) 184#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH)
185#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT)
186#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
187#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
184#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) 188#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2)
185#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) 189#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
186#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) 190#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
@@ -188,7 +192,7 @@ typedef enum {
188 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ 192 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
189 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ 193 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
190 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ 194 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
191 XFS_SB_FEATURES2) 195 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2)
192 196
193 197
194/* 198/*
@@ -414,6 +418,12 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
414 * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT) 418 * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
415 */ 419 */
416 420
421static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
422{
423 return (XFS_SB_VERSION_HASMOREBITS(sbp) && \
424 ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
425}
426
417#define XFS_SB_VERSION_HASATTR2(sbp) xfs_sb_version_hasattr2(sbp) 427#define XFS_SB_VERSION_HASATTR2(sbp) xfs_sb_version_hasattr2(sbp)
418static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) 428static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
419{ 429{
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index cc2d60951e2..356d6627f58 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -427,6 +427,14 @@ undo_blocks:
427 * 427 *
428 * Mark the transaction structure to indicate that the superblock 428 * Mark the transaction structure to indicate that the superblock
429 * needs to be updated before committing. 429 * needs to be updated before committing.
430 *
431 * Because we may not be keeping track of allocated/free inodes and
432 * used filesystem blocks in the superblock, we do not mark the
433 * superblock dirty in this transaction if we modify these fields.
434 * We still need to update the transaction deltas so that they get
435 * applied to the incore superblock, but we don't want them to
436 * cause the superblock to get locked and logged if these are the
437 * only fields in the superblock that the transaction modifies.
430 */ 438 */
431void 439void
432xfs_trans_mod_sb( 440xfs_trans_mod_sb(
@@ -434,13 +442,19 @@ xfs_trans_mod_sb(
434 uint field, 442 uint field,
435 int64_t delta) 443 int64_t delta)
436{ 444{
445 uint32_t flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
446 xfs_mount_t *mp = tp->t_mountp;
437 447
438 switch (field) { 448 switch (field) {
439 case XFS_TRANS_SB_ICOUNT: 449 case XFS_TRANS_SB_ICOUNT:
440 tp->t_icount_delta += delta; 450 tp->t_icount_delta += delta;
451 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
452 flags &= ~XFS_TRANS_SB_DIRTY;
441 break; 453 break;
442 case XFS_TRANS_SB_IFREE: 454 case XFS_TRANS_SB_IFREE:
443 tp->t_ifree_delta += delta; 455 tp->t_ifree_delta += delta;
456 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
457 flags &= ~XFS_TRANS_SB_DIRTY;
444 break; 458 break;
445 case XFS_TRANS_SB_FDBLOCKS: 459 case XFS_TRANS_SB_FDBLOCKS:
446 /* 460 /*
@@ -453,6 +467,8 @@ xfs_trans_mod_sb(
453 ASSERT(tp->t_blk_res_used <= tp->t_blk_res); 467 ASSERT(tp->t_blk_res_used <= tp->t_blk_res);
454 } 468 }
455 tp->t_fdblocks_delta += delta; 469 tp->t_fdblocks_delta += delta;
470 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
471 flags &= ~XFS_TRANS_SB_DIRTY;
456 break; 472 break;
457 case XFS_TRANS_SB_RES_FDBLOCKS: 473 case XFS_TRANS_SB_RES_FDBLOCKS:
458 /* 474 /*
@@ -462,6 +478,8 @@ xfs_trans_mod_sb(
462 */ 478 */
463 ASSERT(delta < 0); 479 ASSERT(delta < 0);
464 tp->t_res_fdblocks_delta += delta; 480 tp->t_res_fdblocks_delta += delta;
481 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
482 flags &= ~XFS_TRANS_SB_DIRTY;
465 break; 483 break;
466 case XFS_TRANS_SB_FREXTENTS: 484 case XFS_TRANS_SB_FREXTENTS:
467 /* 485 /*
@@ -515,7 +533,7 @@ xfs_trans_mod_sb(
515 return; 533 return;
516 } 534 }
517 535
518 tp->t_flags |= (XFS_TRANS_SB_DIRTY | XFS_TRANS_DIRTY); 536 tp->t_flags |= flags;
519} 537}
520 538
521/* 539/*
@@ -544,18 +562,23 @@ xfs_trans_apply_sb_deltas(
544 (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta + 562 (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta +
545 tp->t_ag_btree_delta)); 563 tp->t_ag_btree_delta));
546 564
547 if (tp->t_icount_delta != 0) { 565 /*
548 INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta); 566 * Only update the superblock counters if we are logging them
549 } 567 */
550 if (tp->t_ifree_delta != 0) { 568 if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) {
551 INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta); 569 if (tp->t_icount_delta != 0) {
552 } 570 INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta);
571 }
572 if (tp->t_ifree_delta != 0) {
573 INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta);
574 }
553 575
554 if (tp->t_fdblocks_delta != 0) { 576 if (tp->t_fdblocks_delta != 0) {
555 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta); 577 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta);
556 } 578 }
557 if (tp->t_res_fdblocks_delta != 0) { 579 if (tp->t_res_fdblocks_delta != 0) {
558 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta); 580 INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta);
581 }
559 } 582 }
560 583
561 if (tp->t_frextents_delta != 0) { 584 if (tp->t_frextents_delta != 0) {
@@ -615,11 +638,23 @@ xfs_trans_apply_sb_deltas(
615} 638}
616 639
617/* 640/*
618 * xfs_trans_unreserve_and_mod_sb() is called to release unused 641 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
619 * reservations and apply superblock counter changes to the in-core 642 * and apply superblock counter changes to the in-core superblock. The
620 * superblock. 643 * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
644 * applied to the in-core superblock. The idea is that that has already been
645 * done.
621 * 646 *
622 * This is done efficiently with a single call to xfs_mod_incore_sb_batch(). 647 * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
648 * However, we have to ensure that we only modify each superblock field only
649 * once because the application of the delta values may not be atomic. That can
650 * lead to ENOSPC races occurring if we have two separate modifcations of the
651 * free space counter to put back the entire reservation and then take away
652 * what we used.
653 *
654 * If we are not logging superblock counters, then the inode allocated/free and
655 * used block counts are not updated in the on disk superblock. In this case,
656 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
657 * still need to update the incore superblock with the changes.
623 */ 658 */
624STATIC void 659STATIC void
625xfs_trans_unreserve_and_mod_sb( 660xfs_trans_unreserve_and_mod_sb(
@@ -627,40 +662,49 @@ xfs_trans_unreserve_and_mod_sb(
627{ 662{
628 xfs_mod_sb_t msb[14]; /* If you add cases, add entries */ 663 xfs_mod_sb_t msb[14]; /* If you add cases, add entries */
629 xfs_mod_sb_t *msbp; 664 xfs_mod_sb_t *msbp;
665 xfs_mount_t *mp = tp->t_mountp;
630 /* REFERENCED */ 666 /* REFERENCED */
631 int error; 667 int error;
632 int rsvd; 668 int rsvd;
669 int64_t blkdelta = 0;
670 int64_t rtxdelta = 0;
633 671
634 msbp = msb; 672 msbp = msb;
635 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 673 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
636 674
637 /* 675 /* calculate free blocks delta */
638 * Release any reserved blocks. Any that were allocated 676 if (tp->t_blk_res > 0)
639 * will be taken back again by fdblocks_delta below. 677 blkdelta = tp->t_blk_res;
640 */ 678
641 if (tp->t_blk_res > 0) { 679 if ((tp->t_fdblocks_delta != 0) &&
680 (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
681 (tp->t_flags & XFS_TRANS_SB_DIRTY)))
682 blkdelta += tp->t_fdblocks_delta;
683
684 if (blkdelta != 0) {
642 msbp->msb_field = XFS_SBS_FDBLOCKS; 685 msbp->msb_field = XFS_SBS_FDBLOCKS;
643 msbp->msb_delta = tp->t_blk_res; 686 msbp->msb_delta = blkdelta;
644 msbp++; 687 msbp++;
645 } 688 }
646 689
647 /* 690 /* calculate free realtime extents delta */
648 * Release any reserved real time extents . Any that were 691 if (tp->t_rtx_res > 0)
649 * allocated will be taken back again by frextents_delta below. 692 rtxdelta = tp->t_rtx_res;
650 */ 693
651 if (tp->t_rtx_res > 0) { 694 if ((tp->t_frextents_delta != 0) &&
695 (tp->t_flags & XFS_TRANS_SB_DIRTY))
696 rtxdelta += tp->t_frextents_delta;
697
698 if (rtxdelta != 0) {
652 msbp->msb_field = XFS_SBS_FREXTENTS; 699 msbp->msb_field = XFS_SBS_FREXTENTS;
653 msbp->msb_delta = tp->t_rtx_res; 700 msbp->msb_delta = rtxdelta;
654 msbp++; 701 msbp++;
655 } 702 }
656 703
657 /* 704 /* apply remaining deltas */
658 * Apply any superblock modifications to the in-core version. 705
659 * The t_res_fdblocks_delta and t_res_frextents_delta fields are 706 if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
660 * explicitly NOT applied to the in-core superblock. 707 (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
661 * The idea is that that has already been done.
662 */
663 if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
664 if (tp->t_icount_delta != 0) { 708 if (tp->t_icount_delta != 0) {
665 msbp->msb_field = XFS_SBS_ICOUNT; 709 msbp->msb_field = XFS_SBS_ICOUNT;
666 msbp->msb_delta = tp->t_icount_delta; 710 msbp->msb_delta = tp->t_icount_delta;
@@ -671,16 +715,9 @@ xfs_trans_unreserve_and_mod_sb(
671 msbp->msb_delta = tp->t_ifree_delta; 715 msbp->msb_delta = tp->t_ifree_delta;
672 msbp++; 716 msbp++;
673 } 717 }
674 if (tp->t_fdblocks_delta != 0) { 718 }
675 msbp->msb_field = XFS_SBS_FDBLOCKS; 719
676 msbp->msb_delta = tp->t_fdblocks_delta; 720 if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
677 msbp++;
678 }
679 if (tp->t_frextents_delta != 0) {
680 msbp->msb_field = XFS_SBS_FREXTENTS;
681 msbp->msb_delta = tp->t_frextents_delta;
682 msbp++;
683 }
684 if (tp->t_dblocks_delta != 0) { 721 if (tp->t_dblocks_delta != 0) {
685 msbp->msb_field = XFS_SBS_DBLOCKS; 722 msbp->msb_field = XFS_SBS_DBLOCKS;
686 msbp->msb_delta = tp->t_dblocks_delta; 723 msbp->msb_delta = tp->t_dblocks_delta;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7dfcc450366..0e26e729023 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -94,7 +94,8 @@ typedef struct xfs_trans_header {
94#define XFS_TRANS_GROWFSRT_ZERO 38 94#define XFS_TRANS_GROWFSRT_ZERO 38
95#define XFS_TRANS_GROWFSRT_FREE 39 95#define XFS_TRANS_GROWFSRT_FREE 39
96#define XFS_TRANS_SWAPEXT 40 96#define XFS_TRANS_SWAPEXT 40
97#define XFS_TRANS_TYPE_MAX 40 97#define XFS_TRANS_SB_COUNT 41
98#define XFS_TRANS_TYPE_MAX 41
98/* new transaction types need to be reflected in xfs_logprint(8) */ 99/* new transaction types need to be reflected in xfs_logprint(8) */
99 100
100 101
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 65c561201cb..11f5ea29a03 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -51,6 +51,8 @@
51#include "xfs_acl.h" 51#include "xfs_acl.h"
52#include "xfs_attr.h" 52#include "xfs_attr.h"
53#include "xfs_clnt.h" 53#include "xfs_clnt.h"
54#include "xfs_mru_cache.h"
55#include "xfs_filestream.h"
54#include "xfs_fsops.h" 56#include "xfs_fsops.h"
55 57
56STATIC int xfs_sync(bhv_desc_t *, int, cred_t *); 58STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
@@ -81,6 +83,8 @@ xfs_init(void)
81 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); 83 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
82 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); 84 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
83 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); 85 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
86 xfs_mru_cache_init();
87 xfs_filestream_init();
84 88
85 /* 89 /*
86 * The size of the zone allocated buf log item is the maximum 90 * The size of the zone allocated buf log item is the maximum
@@ -164,6 +168,8 @@ xfs_cleanup(void)
164 xfs_cleanup_procfs(); 168 xfs_cleanup_procfs();
165 xfs_sysctl_unregister(); 169 xfs_sysctl_unregister();
166 xfs_refcache_destroy(); 170 xfs_refcache_destroy();
171 xfs_filestream_uninit();
172 xfs_mru_cache_uninit();
167 xfs_acl_zone_destroy(xfs_acl_zone); 173 xfs_acl_zone_destroy(xfs_acl_zone);
168 174
169#ifdef XFS_DIR2_TRACE 175#ifdef XFS_DIR2_TRACE
@@ -320,6 +326,9 @@ xfs_start_flags(
320 else 326 else
321 mp->m_flags &= ~XFS_MOUNT_BARRIER; 327 mp->m_flags &= ~XFS_MOUNT_BARRIER;
322 328
329 if (ap->flags2 & XFSMNT2_FILESTREAMS)
330 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
331
323 return 0; 332 return 0;
324} 333}
325 334
@@ -518,6 +527,9 @@ xfs_mount(
518 if (mp->m_flags & XFS_MOUNT_BARRIER) 527 if (mp->m_flags & XFS_MOUNT_BARRIER)
519 xfs_mountfs_check_barriers(mp); 528 xfs_mountfs_check_barriers(mp);
520 529
530 if ((error = xfs_filestream_mount(mp)))
531 goto error2;
532
521 error = XFS_IOINIT(vfsp, args, flags); 533 error = XFS_IOINIT(vfsp, args, flags);
522 if (error) 534 if (error)
523 goto error2; 535 goto error2;
@@ -575,6 +587,13 @@ xfs_unmount(
575 */ 587 */
576 xfs_refcache_purge_mp(mp); 588 xfs_refcache_purge_mp(mp);
577 589
590 /*
591 * Blow away any referenced inode in the filestreams cache.
592 * This can and will cause log traffic as inodes go inactive
593 * here.
594 */
595 xfs_filestream_unmount(mp);
596
578 XFS_bflush(mp->m_ddev_targp); 597 XFS_bflush(mp->m_ddev_targp);
579 error = xfs_unmount_flush(mp, 0); 598 error = xfs_unmount_flush(mp, 0);
580 if (error) 599 if (error)
@@ -640,7 +659,7 @@ xfs_quiesce_fs(
640 * we can write the unmount record. 659 * we can write the unmount record.
641 */ 660 */
642 do { 661 do {
643 xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, NULL); 662 xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL);
644 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); 663 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
645 if (!pincount) { 664 if (!pincount) {
646 delay(50); 665 delay(50);
@@ -651,6 +670,30 @@ xfs_quiesce_fs(
651 return 0; 670 return 0;
652} 671}
653 672
673/*
674 * Second stage of a quiesce. The data is already synced, now we have to take
675 * care of the metadata. New transactions are already blocked, so we need to
676 * wait for any remaining transactions to drain out before proceding.
677 */
678STATIC void
679xfs_attr_quiesce(
680 xfs_mount_t *mp)
681{
682 /* wait for all modifications to complete */
683 while (atomic_read(&mp->m_active_trans) > 0)
684 delay(100);
685
686 /* flush inodes and push all remaining buffers out to disk */
687 xfs_quiesce_fs(mp);
688
689 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
690
691 /* Push the superblock and write an unmount record */
692 xfs_log_sbcount(mp, 1);
693 xfs_log_unmount_write(mp);
694 xfs_unmountfs_writesb(mp);
695}
696
654STATIC int 697STATIC int
655xfs_mntupdate( 698xfs_mntupdate(
656 bhv_desc_t *bdp, 699 bhv_desc_t *bdp,
@@ -670,10 +713,9 @@ xfs_mntupdate(
670 mp->m_flags &= ~XFS_MOUNT_BARRIER; 713 mp->m_flags &= ~XFS_MOUNT_BARRIER;
671 } 714 }
672 } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */ 715 } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */
673 bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL); 716 xfs_filestream_flush(mp);
674 xfs_quiesce_fs(mp); 717 bhv_vfs_sync(vfsp, SYNC_DATA_QUIESCE, NULL);
675 xfs_log_unmount_write(mp); 718 xfs_attr_quiesce(mp);
676 xfs_unmountfs_writesb(mp);
677 vfsp->vfs_flag |= VFS_RDONLY; 719 vfsp->vfs_flag |= VFS_RDONLY;
678 } 720 }
679 return 0; 721 return 0;
@@ -887,6 +929,9 @@ xfs_sync(
887{ 929{
888 xfs_mount_t *mp = XFS_BHVTOM(bdp); 930 xfs_mount_t *mp = XFS_BHVTOM(bdp);
889 931
932 if (flags & SYNC_IOWAIT)
933 xfs_filestream_flush(mp);
934
890 return xfs_syncsub(mp, flags, NULL); 935 return xfs_syncsub(mp, flags, NULL);
891} 936}
892 937
@@ -1128,58 +1173,41 @@ xfs_sync_inodes(
1128 * in the inode list. 1173 * in the inode list.
1129 */ 1174 */
1130 1175
1131 if ((flags & SYNC_CLOSE) && (vp != NULL)) { 1176 /*
1132 /* 1177 * If we have to flush data or wait for I/O completion
1133 * This is the shutdown case. We just need to 1178 * we need to drop the ilock that we currently hold.
1134 * flush and invalidate all the pages associated 1179 * If we need to drop the lock, insert a marker if we
1135 * with the inode. Drop the inode lock since 1180 * have not already done so.
1136 * we can't hold it across calls to the buffer 1181 */
1137 * cache. 1182 if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
1138 * 1183 ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
1139 * We don't set the VREMAPPING bit in the vnode 1184 if (mount_locked) {
1140 * here, because we don't hold the vnode lock 1185 IPOINTER_INSERT(ip, mp);
1141 * exclusively. It doesn't really matter, though,
1142 * because we only come here when we're shutting
1143 * down anyway.
1144 */
1145 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1146
1147 if (XFS_FORCED_SHUTDOWN(mp)) {
1148 bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
1149 } else {
1150 error = bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF);
1151 } 1186 }
1187 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1152 1188
1153 xfs_ilock(ip, XFS_ILOCK_SHARED); 1189 if (flags & SYNC_CLOSE) {
1154 1190 /* Shutdown case. Flush and invalidate. */
1155 } else if ((flags & SYNC_DELWRI) && (vp != NULL)) { 1191 if (XFS_FORCED_SHUTDOWN(mp))
1156 if (VN_DIRTY(vp)) { 1192 bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
1157 /* We need to have dropped the lock here, 1193 else
1158 * so insert a marker if we have not already 1194 error = bhv_vop_flushinval_pages(vp, 0,
1159 * done so. 1195 -1, FI_REMAPF);
1160 */ 1196 } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
1161 if (mount_locked) {
1162 IPOINTER_INSERT(ip, mp);
1163 }
1164
1165 /*
1166 * Drop the inode lock since we can't hold it
1167 * across calls to the buffer cache.
1168 */
1169 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1170 error = bhv_vop_flush_pages(vp, (xfs_off_t)0, 1197 error = bhv_vop_flush_pages(vp, (xfs_off_t)0,
1171 -1, fflag, FI_NONE); 1198 -1, fflag, FI_NONE);
1172 xfs_ilock(ip, XFS_ILOCK_SHARED);
1173 } 1199 }
1174 1200
1201 /*
1202 * When freezing, we need to wait ensure all I/O (including direct
1203 * I/O) is complete to ensure no further data modification can take
1204 * place after this point
1205 */
1206 if (flags & SYNC_IOWAIT)
1207 vn_iowait(vp);
1208
1209 xfs_ilock(ip, XFS_ILOCK_SHARED);
1175 } 1210 }
1176 /*
1177 * When freezing, we need to wait ensure all I/O (including direct
1178 * I/O) is complete to ensure no further data modification can take
1179 * place after this point
1180 */
1181 if (flags & SYNC_IOWAIT)
1182 vn_iowait(vp);
1183 1211
1184 if (flags & SYNC_BDFLUSH) { 1212 if (flags & SYNC_BDFLUSH) {
1185 if ((flags & SYNC_ATTR) && 1213 if ((flags & SYNC_ATTR) &&
@@ -1514,6 +1542,15 @@ xfs_syncsub(
1514 } 1542 }
1515 1543
1516 /* 1544 /*
1545 * If asked, update the disk superblock with incore counter values if we
1546 * are using non-persistent counters so that they don't get too far out
1547 * of sync if we crash or get a forced shutdown. We don't want to force
1548 * this to disk, just get a transaction into the iclogs....
1549 */
1550 if (flags & SYNC_SUPER)
1551 xfs_log_sbcount(mp, 0);
1552
1553 /*
1517 * Now check to see if the log needs a "dummy" transaction. 1554 * Now check to see if the log needs a "dummy" transaction.
1518 */ 1555 */
1519 1556
@@ -1645,6 +1682,7 @@ xfs_vget(
1645 * in stat(). */ 1682 * in stat(). */
1646#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ 1683#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */
1647#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ 1684#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */
1685#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */
1648 1686
1649STATIC unsigned long 1687STATIC unsigned long
1650suffix_strtoul(char *s, char **endp, unsigned int base) 1688suffix_strtoul(char *s, char **endp, unsigned int base)
@@ -1831,6 +1869,8 @@ xfs_parseargs(
1831 args->flags |= XFSMNT_ATTR2; 1869 args->flags |= XFSMNT_ATTR2;
1832 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 1870 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
1833 args->flags &= ~XFSMNT_ATTR2; 1871 args->flags &= ~XFSMNT_ATTR2;
1872 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
1873 args->flags2 |= XFSMNT2_FILESTREAMS;
1834 } else if (!strcmp(this_char, "osyncisdsync")) { 1874 } else if (!strcmp(this_char, "osyncisdsync")) {
1835 /* no-op, this is now the default */ 1875 /* no-op, this is now the default */
1836 cmn_err(CE_WARN, 1876 cmn_err(CE_WARN,
@@ -1959,9 +1999,9 @@ xfs_showargs(
1959} 1999}
1960 2000
1961/* 2001/*
1962 * Second stage of a freeze. The data is already frozen, now we have to take 2002 * Second stage of a freeze. The data is already frozen so we only
1963 * care of the metadata. New transactions are already blocked, so we need to 2003 * need to take care of themetadata. Once that's done write a dummy
1964 * wait for any remaining transactions to drain out before proceding. 2004 * record to dirty the log in case of a crash while frozen.
1965 */ 2005 */
1966STATIC void 2006STATIC void
1967xfs_freeze( 2007xfs_freeze(
@@ -1969,18 +2009,7 @@ xfs_freeze(
1969{ 2009{
1970 xfs_mount_t *mp = XFS_BHVTOM(bdp); 2010 xfs_mount_t *mp = XFS_BHVTOM(bdp);
1971 2011
1972 /* wait for all modifications to complete */ 2012 xfs_attr_quiesce(mp);
1973 while (atomic_read(&mp->m_active_trans) > 0)
1974 delay(100);
1975
1976 /* flush inodes and push all remaining buffers out to disk */
1977 xfs_quiesce_fs(mp);
1978
1979 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
1980
1981 /* Push the superblock and write an unmount record */
1982 xfs_log_unmount_write(mp);
1983 xfs_unmountfs_writesb(mp);
1984 xfs_fs_log_dummy(mp); 2013 xfs_fs_log_dummy(mp);
1985} 2014}
1986 2015
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 70bc82f6531..79b522779aa 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -51,6 +51,7 @@
51#include "xfs_refcache.h" 51#include "xfs_refcache.h"
52#include "xfs_trans_space.h" 52#include "xfs_trans_space.h"
53#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
54#include "xfs_filestream.h"
54 55
55STATIC int 56STATIC int
56xfs_open( 57xfs_open(
@@ -77,36 +78,6 @@ xfs_open(
77 return 0; 78 return 0;
78} 79}
79 80
80STATIC int
81xfs_close(
82 bhv_desc_t *bdp,
83 int flags,
84 lastclose_t lastclose,
85 cred_t *credp)
86{
87 bhv_vnode_t *vp = BHV_TO_VNODE(bdp);
88 xfs_inode_t *ip = XFS_BHVTOI(bdp);
89
90 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
91 return XFS_ERROR(EIO);
92
93 if (lastclose != L_TRUE || !VN_ISREG(vp))
94 return 0;
95
96 /*
97 * If we previously truncated this file and removed old data in
98 * the process, we want to initiate "early" writeout on the last
99 * close. This is an attempt to combat the notorious NULL files
100 * problem which is particularly noticable from a truncate down,
101 * buffered (re-)write (delalloc), followed by a crash. What we
102 * are effectively doing here is significantly reducing the time
103 * window where we'd otherwise be exposed to that problem.
104 */
105 if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
106 return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
107 return 0;
108}
109
110/* 81/*
111 * xfs_getattr 82 * xfs_getattr
112 */ 83 */
@@ -183,9 +154,8 @@ xfs_getattr(
183 * realtime extent size or the realtime volume's 154 * realtime extent size or the realtime volume's
184 * extent size. 155 * extent size.
185 */ 156 */
186 vap->va_blocksize = ip->i_d.di_extsize ? 157 vap->va_blocksize =
187 (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : 158 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
188 (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
189 } 159 }
190 break; 160 break;
191 } 161 }
@@ -814,6 +784,8 @@ xfs_setattr(
814 di_flags |= XFS_DIFLAG_PROJINHERIT; 784 di_flags |= XFS_DIFLAG_PROJINHERIT;
815 if (vap->va_xflags & XFS_XFLAG_NODEFRAG) 785 if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
816 di_flags |= XFS_DIFLAG_NODEFRAG; 786 di_flags |= XFS_DIFLAG_NODEFRAG;
787 if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
788 di_flags |= XFS_DIFLAG_FILESTREAM;
817 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 789 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
818 if (vap->va_xflags & XFS_XFLAG_RTINHERIT) 790 if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
819 di_flags |= XFS_DIFLAG_RTINHERIT; 791 di_flags |= XFS_DIFLAG_RTINHERIT;
@@ -1201,13 +1173,15 @@ xfs_fsync(
1201} 1173}
1202 1174
1203/* 1175/*
1204 * This is called by xfs_inactive to free any blocks beyond eof, 1176 * This is called by xfs_inactive to free any blocks beyond eof
1205 * when the link count isn't zero. 1177 * when the link count isn't zero and by xfs_dm_punch_hole() when
1178 * punching a hole to EOF.
1206 */ 1179 */
1207STATIC int 1180int
1208xfs_inactive_free_eofblocks( 1181xfs_free_eofblocks(
1209 xfs_mount_t *mp, 1182 xfs_mount_t *mp,
1210 xfs_inode_t *ip) 1183 xfs_inode_t *ip,
1184 int flags)
1211{ 1185{
1212 xfs_trans_t *tp; 1186 xfs_trans_t *tp;
1213 int error; 1187 int error;
@@ -1216,6 +1190,7 @@ xfs_inactive_free_eofblocks(
1216 xfs_filblks_t map_len; 1190 xfs_filblks_t map_len;
1217 int nimaps; 1191 int nimaps;
1218 xfs_bmbt_irec_t imap; 1192 xfs_bmbt_irec_t imap;
1193 int use_iolock = (flags & XFS_FREE_EOF_LOCK);
1219 1194
1220 /* 1195 /*
1221 * Figure out if there are any blocks beyond the end 1196 * Figure out if there are any blocks beyond the end
@@ -1256,11 +1231,14 @@ xfs_inactive_free_eofblocks(
1256 * cache and we can't 1231 * cache and we can't
1257 * do that within a transaction. 1232 * do that within a transaction.
1258 */ 1233 */
1259 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1234 if (use_iolock)
1235 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1260 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 1236 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
1261 ip->i_size); 1237 ip->i_size);
1262 if (error) { 1238 if (error) {
1263 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1239 xfs_trans_cancel(tp, 0);
1240 if (use_iolock)
1241 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1264 return error; 1242 return error;
1265 } 1243 }
1266 1244
@@ -1297,7 +1275,8 @@ xfs_inactive_free_eofblocks(
1297 error = xfs_trans_commit(tp, 1275 error = xfs_trans_commit(tp,
1298 XFS_TRANS_RELEASE_LOG_RES); 1276 XFS_TRANS_RELEASE_LOG_RES);
1299 } 1277 }
1300 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1278 xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
1279 : XFS_ILOCK_EXCL));
1301 } 1280 }
1302 return error; 1281 return error;
1303} 1282}
@@ -1560,6 +1539,31 @@ xfs_release(
1560 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1539 if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
1561 return 0; 1540 return 0;
1562 1541
1542 if (!XFS_FORCED_SHUTDOWN(mp)) {
1543 /*
1544 * If we are using filestreams, and we have an unlinked
1545 * file that we are processing the last close on, then nothing
1546 * will be able to reopen and write to this file. Purge this
1547 * inode from the filestreams cache so that it doesn't delay
1548 * teardown of the inode.
1549 */
1550 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
1551 xfs_filestream_deassociate(ip);
1552
1553 /*
1554 * If we previously truncated this file and removed old data
1555 * in the process, we want to initiate "early" writeout on
1556 * the last close. This is an attempt to combat the notorious
1557 * NULL files problem which is particularly noticable from a
1558 * truncate down, buffered (re-)write (delalloc), followed by
1559 * a crash. What we are effectively doing here is
1560 * significantly reducing the time window where we'd otherwise
1561 * be exposed to that problem.
1562 */
1563 if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
1564 bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
1565 }
1566
1563#ifdef HAVE_REFCACHE 1567#ifdef HAVE_REFCACHE
1564 /* If we are in the NFS reference cache then don't do this now */ 1568 /* If we are in the NFS reference cache then don't do this now */
1565 if (ip->i_refcache) 1569 if (ip->i_refcache)
@@ -1573,7 +1577,8 @@ xfs_release(
1573 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1577 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1574 (!(ip->i_d.di_flags & 1578 (!(ip->i_d.di_flags &
1575 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1579 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
1576 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1580 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
1581 if (error)
1577 return error; 1582 return error;
1578 /* Update linux inode block count after free above */ 1583 /* Update linux inode block count after free above */
1579 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1584 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
@@ -1654,7 +1659,8 @@ xfs_inactive(
1654 (!(ip->i_d.di_flags & 1659 (!(ip->i_d.di_flags &
1655 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1660 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
1656 (ip->i_delayed_blks != 0)))) { 1661 (ip->i_delayed_blks != 0)))) {
1657 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1662 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
1663 if (error)
1658 return VN_INACTIVE_CACHE; 1664 return VN_INACTIVE_CACHE;
1659 /* Update linux inode block count after free above */ 1665 /* Update linux inode block count after free above */
1660 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1666 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
@@ -1680,6 +1686,7 @@ xfs_inactive(
1680 1686
1681 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 1687 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
1682 if (error) { 1688 if (error) {
1689 xfs_trans_cancel(tp, 0);
1683 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1690 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1684 return VN_INACTIVE_CACHE; 1691 return VN_INACTIVE_CACHE;
1685 } 1692 }
@@ -2217,9 +2224,9 @@ static inline int
2217xfs_lock_inumorder(int lock_mode, int subclass) 2224xfs_lock_inumorder(int lock_mode, int subclass)
2218{ 2225{
2219 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 2226 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
2220 lock_mode |= (subclass + XFS_IOLOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 2227 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
2221 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 2228 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
2222 lock_mode |= (subclass + XFS_ILOCK_INUMORDER) << XFS_ILOCK_SHIFT; 2229 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
2223 2230
2224 return lock_mode; 2231 return lock_mode;
2225} 2232}
@@ -2546,6 +2553,15 @@ xfs_remove(
2546 */ 2553 */
2547 xfs_refcache_purge_ip(ip); 2554 xfs_refcache_purge_ip(ip);
2548 2555
2556 /*
2557 * If we are using filestreams, kill the stream association.
2558 * If the file is still open it may get a new one but that
2559 * will get killed on last close in xfs_close() so we don't
2560 * have to worry about that.
2561 */
2562 if (link_zero && xfs_inode_is_filestream(ip))
2563 xfs_filestream_deassociate(ip);
2564
2549 vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 2565 vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
2550 2566
2551 /* 2567 /*
@@ -4047,22 +4063,16 @@ xfs_alloc_file_space(
4047 if (XFS_FORCED_SHUTDOWN(mp)) 4063 if (XFS_FORCED_SHUTDOWN(mp))
4048 return XFS_ERROR(EIO); 4064 return XFS_ERROR(EIO);
4049 4065
4050 rt = XFS_IS_REALTIME_INODE(ip);
4051 if (unlikely(rt)) {
4052 if (!(extsz = ip->i_d.di_extsize))
4053 extsz = mp->m_sb.sb_rextsize;
4054 } else {
4055 extsz = ip->i_d.di_extsize;
4056 }
4057
4058 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4066 if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
4059 return error; 4067 return error;
4060 4068
4061 if (len <= 0) 4069 if (len <= 0)
4062 return XFS_ERROR(EINVAL); 4070 return XFS_ERROR(EINVAL);
4063 4071
4072 rt = XFS_IS_REALTIME_INODE(ip);
4073 extsz = xfs_get_extsz_hint(ip);
4074
4064 count = len; 4075 count = len;
4065 error = 0;
4066 imapp = &imaps[0]; 4076 imapp = &imaps[0];
4067 nimaps = 1; 4077 nimaps = 1;
4068 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 4078 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
@@ -4678,7 +4688,6 @@ xfs_change_file_space(
4678bhv_vnodeops_t xfs_vnodeops = { 4688bhv_vnodeops_t xfs_vnodeops = {
4679 BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), 4689 BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
4680 .vop_open = xfs_open, 4690 .vop_open = xfs_open,
4681 .vop_close = xfs_close,
4682 .vop_read = xfs_read, 4691 .vop_read = xfs_read,
4683#ifdef HAVE_SPLICE 4692#ifdef HAVE_SPLICE
4684 .vop_splice_read = xfs_splice_read, 4693 .vop_splice_read = xfs_splice_read,