aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/9p.c359
-rw-r--r--fs/9p/9p.h341
-rw-r--r--fs/9p/Makefile17
-rw-r--r--fs/9p/conv.c693
-rw-r--r--fs/9p/conv.h36
-rw-r--r--fs/9p/debug.h70
-rw-r--r--fs/9p/error.c93
-rw-r--r--fs/9p/error.h178
-rw-r--r--fs/9p/fid.c241
-rw-r--r--fs/9p/fid.h57
-rw-r--r--fs/9p/mux.c475
-rw-r--r--fs/9p/mux.h41
-rw-r--r--fs/9p/trans_fd.c172
-rw-r--r--fs/9p/trans_sock.c290
-rw-r--r--fs/9p/transport.h46
-rw-r--r--fs/9p/v9fs.c452
-rw-r--r--fs/9p/v9fs.h103
-rw-r--r--fs/9p/v9fs_vfs.h53
-rw-r--r--fs/9p/vfs_dentry.c126
-rw-r--r--fs/9p/vfs_dir.c226
-rw-r--r--fs/9p/vfs_file.c401
-rw-r--r--fs/9p/vfs_inode.c1338
-rw-r--r--fs/9p/vfs_super.c280
-rw-r--r--fs/Kconfig88
-rw-r--r--fs/Makefile3
-rw-r--r--fs/affs/inode.c1
-rw-r--r--fs/aio.c38
-rw-r--r--fs/autofs/autofs_i.h3
-rw-r--r--fs/autofs/dirhash.c5
-rw-r--r--fs/autofs/inode.c3
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/dir.c25
-rw-r--r--fs/bfs/file.c23
-rw-r--r--fs/bfs/inode.c104
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/bio.c259
-rw-r--r--fs/buffer.c41
-rw-r--r--fs/cifs/connect.c88
-rw-r--r--fs/cifs/dir.c27
-rw-r--r--fs/compat.c122
-rw-r--r--fs/compat_ioctl.c7
-rw-r--r--fs/cramfs/inode.c43
-rw-r--r--fs/cramfs/uncompress.c1
-rw-r--r--fs/dcache.c16
-rw-r--r--fs/devpts/Makefile1
-rw-r--r--fs/devpts/inode.c21
-rw-r--r--fs/devpts/xattr_security.c47
-rw-r--r--fs/exec.c8
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/super.c59
-rw-r--r--fs/ext2/xattr.h8
-rw-r--r--fs/ext2/xattr_security.c22
-rw-r--r--fs/ext3/ialloc.c5
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/super.c92
-rw-r--r--fs/ext3/xattr.h11
-rw-r--r--fs/ext3/xattr_security.c22
-rw-r--r--fs/fat/dir.c28
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/fcntl.c60
-rw-r--r--fs/file.c387
-rw-r--r--fs/file_table.c41
-rw-r--r--fs/freevxfs/vxfs_super.c2
-rw-r--r--fs/fuse/Makefile7
-rw-r--r--fs/fuse/dev.c877
-rw-r--r--fs/fuse/dir.c982
-rw-r--r--fs/fuse/file.c555
-rw-r--r--fs/fuse/fuse_i.h451
-rw-r--r--fs/fuse/inode.c591
-rw-r--r--fs/hfs/bnode.c21
-rw-r--r--fs/hfs/catalog.c35
-rw-r--r--fs/hfs/dir.c11
-rw-r--r--fs/hfs/hfs.h1
-rw-r--r--fs/hfs/hfs_fs.h8
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/mdb.c6
-rw-r--r--fs/hfs/super.c68
-rw-r--r--fs/hfs/trans.c116
-rw-r--r--fs/hfsplus/bnode.c21
-rw-r--r--fs/hfsplus/hfsplus_fs.h5
-rw-r--r--fs/hfsplus/options.c26
-rw-r--r--fs/hfsplus/super.c11
-rw-r--r--fs/hostfs/hostfs.h1
-rw-r--r--fs/hostfs/hostfs_kern.c1
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/inode.c15
-rw-r--r--fs/inotify.c16
-rw-r--r--fs/jbd/checkpoint.c2
-rw-r--r--fs/jbd/commit.c38
-rw-r--r--fs/jbd/journal.c38
-rw-r--r--fs/jbd/revoke.c5
-rw-r--r--fs/jbd/transaction.c42
-rw-r--r--fs/jffs/inode-v23.c4
-rw-r--r--fs/jffs/intrep.c22
-rw-r--r--fs/jffs2/file.c3
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/jfs_filsys.h3
-rw-r--r--fs/jfs/super.c48
-rw-r--r--fs/lockd/clntproc.c3
-rw-r--r--fs/locks.c8
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/namei.c78
-rw-r--r--fs/namespace.c10
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfs3proc.c3
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/nfsd/export.c3
-rw-r--r--fs/nfsd/nfs4idmap.c8
-rw-r--r--fs/nfsd/nfs4recover.c5
-rw-r--r--fs/ntfs/ChangeLog70
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/aops.c294
-rw-r--r--fs/ntfs/attrib.c125
-rw-r--r--fs/ntfs/attrib.h2
-rw-r--r--fs/ntfs/compress.c8
-rw-r--r--fs/ntfs/dir.c3
-rw-r--r--fs/ntfs/file.c9
-rw-r--r--fs/ntfs/index.c1
-rw-r--r--fs/ntfs/inode.c227
-rw-r--r--fs/ntfs/lcnalloc.c39
-rw-r--r--fs/ntfs/lcnalloc.h21
-rw-r--r--fs/ntfs/logfile.c251
-rw-r--r--fs/ntfs/logfile.h8
-rw-r--r--fs/ntfs/malloc.h48
-rw-r--r--fs/ntfs/mft.c4
-rw-r--r--fs/ntfs/runlist.c374
-rw-r--r--fs/ntfs/runlist.h3
-rw-r--r--fs/ntfs/super.c16
-rw-r--r--fs/ntfs/unistr.c3
-rw-r--r--fs/open.c62
-rw-r--r--fs/pipe.c19
-rw-r--r--fs/proc/array.c5
-rw-r--r--fs/proc/base.c192
-rw-r--r--fs/proc/generic.c13
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/task_mmu.c357
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/journal.c7
-rw-r--r--fs/reiserfs/super.c3
-rw-r--r--fs/relayfs/Makefile4
-rw-r--r--fs/relayfs/buffers.c189
-rw-r--r--fs/relayfs/buffers.h12
-rw-r--r--fs/relayfs/inode.c609
-rw-r--r--fs/relayfs/relay.c431
-rw-r--r--fs/relayfs/relay.h12
-rw-r--r--fs/select.c23
-rw-r--r--fs/smbfs/inode.c1
-rw-r--r--fs/smbfs/proc.c3
-rw-r--r--fs/sysv/inode.c1
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/ufs/balloc.c12
-rw-r--r--fs/ufs/ialloc.c6
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/ufs/truncate.c9
-rw-r--r--fs/umsdos/notes17
-rw-r--r--fs/xattr.c82
-rw-r--r--fs/xfs/Kconfig45
-rw-r--r--fs/xfs/Makefile151
-rw-r--r--fs/xfs/Makefile-linux-2.6152
-rw-r--r--fs/xfs/linux-2.6/kmem.c23
-rw-r--r--fs/xfs/linux-2.6/kmem.h23
-rw-r--r--fs/xfs/linux-2.6/spin.h3
-rw-r--r--fs/xfs/linux-2.6/time.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c259
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h50
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c123
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c90
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c65
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h13
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c178
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c251
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h60
-rw-r--r--fs/xfs/quota/Makefile1
-rw-r--r--fs/xfs/quota/Makefile-linux-2.653
-rw-r--r--fs/xfs/quota/xfs_dquot.c43
-rw-r--r--fs/xfs/quota/xfs_dquot.h16
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c1
-rw-r--r--fs/xfs/quota/xfs_qm.c26
-rw-r--r--fs/xfs/quota/xfs_qm.h2
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c44
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c16
-rw-r--r--fs/xfs/support/debug.c1
-rw-r--r--fs/xfs/support/ktrace.c2
-rw-r--r--fs/xfs/xfs_acl.c6
-rw-r--r--fs/xfs/xfs_arch.h22
-rw-r--r--fs/xfs/xfs_bmap.c12
-rw-r--r--fs/xfs/xfs_bmap_btree.c8
-rw-r--r--fs/xfs/xfs_bmap_btree.h12
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_dir_leaf.h6
-rw-r--r--fs/xfs/xfs_dmapi.h2
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_iget.c35
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_inode_item.c13
-rw-r--r--fs/xfs/xfs_iomap.c22
-rw-r--r--fs/xfs/xfs_log.c215
-rw-r--r--fs/xfs/xfs_log.h38
-rw-r--r--fs/xfs/xfs_log_priv.h78
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_qmops.c78
-rw-r--r--fs/xfs/xfs_quota.h17
-rw-r--r--fs/xfs/xfs_trans.c3
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c2
-rw-r--r--fs/xfs/xfs_trans_buf.c23
-rw-r--r--fs/xfs/xfs_vfsops.c62
-rw-r--r--fs/xfs/xfs_vnodeops.c92
219 files changed, 15415 insertions, 2666 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c
new file mode 100644
index 000000000000..e847f504a47c
--- /dev/null
+++ b/fs/9p/9p.c
@@ -0,0 +1,359 @@
1/*
2 * linux/fs/9p/9p.c
3 *
4 * This file contains functions 9P2000 functions
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/config.h>
28#include <linux/module.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/idr.h>
32
33#include "debug.h"
34#include "v9fs.h"
35#include "9p.h"
36#include "mux.h"
37
38/**
39 * v9fs_t_version - negotiate protocol parameters with sever
40 * @v9ses: 9P2000 session information
41 * @msize: requested max size packet
42 * @version: requested version.extension string
43 * @fcall: pointer to response fcall pointer
44 *
45 */
46
47int
48v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
49 char *version, struct v9fs_fcall **fcall)
50{
51 struct v9fs_fcall msg;
52
53 dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
54 msg.id = TVERSION;
55 msg.params.tversion.msize = msize;
56 msg.params.tversion.version = version;
57
58 return v9fs_mux_rpc(v9ses, &msg, fcall);
59}
60
61/**
62 * v9fs_t_attach - mount the server
63 * @v9ses: 9P2000 session information
64 * @uname: user name doing the attach
65 * @aname: remote name being attached to
66 * @fid: mount fid to attatch to root node
67 * @afid: authentication fid (in this case result key)
68 * @fcall: pointer to response fcall pointer
69 *
70 */
71
72int
73v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
74 u32 fid, u32 afid, struct v9fs_fcall **fcall)
75{
76 struct v9fs_fcall msg;
77
78 dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
79 aname, fid, afid);
80 msg.id = TATTACH;
81 msg.params.tattach.fid = fid;
82 msg.params.tattach.afid = afid;
83 msg.params.tattach.uname = uname;
84 msg.params.tattach.aname = aname;
85
86 return v9fs_mux_rpc(v9ses, &msg, fcall);
87}
88
89/**
90 * v9fs_t_clunk - release a fid (finish a transaction)
91 * @v9ses: 9P2000 session information
92 * @fid: fid to release
93 * @fcall: pointer to response fcall pointer
94 *
95 */
96
97int
98v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
99 struct v9fs_fcall **fcall)
100{
101 struct v9fs_fcall msg;
102
103 dprintk(DEBUG_9P, "fid %d\n", fid);
104 msg.id = TCLUNK;
105 msg.params.tclunk.fid = fid;
106
107 return v9fs_mux_rpc(v9ses, &msg, fcall);
108}
109
110/**
111 * v9fs_v9fs_t_flush - flush a pending transaction
112 * @v9ses: 9P2000 session information
113 * @tag: tid to release
114 *
115 */
116
117int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
118{
119 struct v9fs_fcall msg;
120
121 dprintk(DEBUG_9P, "oldtag %d\n", tag);
122 msg.id = TFLUSH;
123 msg.params.tflush.oldtag = tag;
124 return v9fs_mux_rpc(v9ses, &msg, NULL);
125}
126
127/**
128 * v9fs_t_stat - read a file's meta-data
129 * @v9ses: 9P2000 session information
130 * @fid: fid pointing to file or directory to get info about
131 * @fcall: pointer to response fcall
132 *
133 */
134
135int
136v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
137{
138 struct v9fs_fcall msg;
139
140 dprintk(DEBUG_9P, "fid %d\n", fid);
141 if (fcall)
142 *fcall = NULL;
143
144 msg.id = TSTAT;
145 msg.params.tstat.fid = fid;
146 return v9fs_mux_rpc(v9ses, &msg, fcall);
147}
148
149/**
150 * v9fs_t_wstat - write a file's meta-data
151 * @v9ses: 9P2000 session information
152 * @fid: fid pointing to file or directory to write info about
153 * @stat: metadata
154 * @fcall: pointer to response fcall
155 *
156 */
157
158int
159v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
160 struct v9fs_stat *stat, struct v9fs_fcall **fcall)
161{
162 struct v9fs_fcall msg;
163
164 dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length);
165 msg.id = TWSTAT;
166 msg.params.twstat.fid = fid;
167 msg.params.twstat.stat = stat;
168
169 return v9fs_mux_rpc(v9ses, &msg, fcall);
170}
171
172/**
173 * v9fs_t_walk - walk a fid to a new file or directory
174 * @v9ses: 9P2000 session information
175 * @fid: fid to walk
176 * @newfid: new fid (for clone operations)
177 * @name: path to walk fid to
178 * @fcall: pointer to response fcall
179 *
180 */
181
182/* TODO: support multiple walk */
183
184int
185v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
186 char *name, struct v9fs_fcall **fcall)
187{
188 struct v9fs_fcall msg;
189
190 dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
191 msg.id = TWALK;
192 msg.params.twalk.fid = fid;
193 msg.params.twalk.newfid = newfid;
194
195 if (name) {
196 msg.params.twalk.nwname = 1;
197 msg.params.twalk.wnames = &name;
198 } else {
199 msg.params.twalk.nwname = 0;
200 }
201
202 return v9fs_mux_rpc(v9ses, &msg, fcall);
203}
204
205/**
206 * v9fs_t_open - open a file
207 *
208 * @v9ses - 9P2000 session information
209 * @fid - fid to open
210 * @mode - mode to open file (R, RW, etc)
211 * @fcall - pointer to response fcall
212 *
213 */
214
215int
216v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
217 struct v9fs_fcall **fcall)
218{
219 struct v9fs_fcall msg;
220 long errorno = -1;
221
222 dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
223 msg.id = TOPEN;
224 msg.params.topen.fid = fid;
225 msg.params.topen.mode = mode;
226
227 errorno = v9fs_mux_rpc(v9ses, &msg, fcall);
228
229 return errorno;
230}
231
232/**
233 * v9fs_t_remove - remove a file or directory
234 * @v9ses: 9P2000 session information
235 * @fid: fid to remove
236 * @fcall: pointer to response fcall
237 *
238 */
239
240int
241v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
242 struct v9fs_fcall **fcall)
243{
244 struct v9fs_fcall msg;
245
246 dprintk(DEBUG_9P, "fid %d\n", fid);
247 msg.id = TREMOVE;
248 msg.params.tremove.fid = fid;
249 return v9fs_mux_rpc(v9ses, &msg, fcall);
250}
251
252/**
253 * v9fs_t_create - create a file or directory
254 * @v9ses: 9P2000 session information
255 * @fid: fid to create
256 * @name: name of the file or directory to create
257 * @perm: permissions to create with
258 * @mode: mode to open file (R, RW, etc)
259 * @fcall: pointer to response fcall
260 *
261 */
262
263int
264v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
265 u32 perm, u8 mode, struct v9fs_fcall **fcall)
266{
267 struct v9fs_fcall msg;
268
269 dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
270 fid, name, perm, mode);
271
272 msg.id = TCREATE;
273 msg.params.tcreate.fid = fid;
274 msg.params.tcreate.name = name;
275 msg.params.tcreate.perm = perm;
276 msg.params.tcreate.mode = mode;
277
278 return v9fs_mux_rpc(v9ses, &msg, fcall);
279}
280
281/**
282 * v9fs_t_read - read data
283 * @v9ses: 9P2000 session information
284 * @fid: fid to read from
285 * @offset: offset to start read at
286 * @count: how many bytes to read
287 * @fcall: pointer to response fcall (with data)
288 *
289 */
290
291int
292v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
293 u32 count, struct v9fs_fcall **fcall)
294{
295 struct v9fs_fcall msg;
296 struct v9fs_fcall *rc = NULL;
297 long errorno = -1;
298
299 dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid,
300 (long unsigned int)offset, count);
301 msg.id = TREAD;
302 msg.params.tread.fid = fid;
303 msg.params.tread.offset = offset;
304 msg.params.tread.count = count;
305 errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
306
307 if (!errorno) {
308 errorno = rc->params.rread.count;
309 dump_data(rc->params.rread.data, rc->params.rread.count);
310 }
311
312 if (fcall)
313 *fcall = rc;
314 else
315 kfree(rc);
316
317 return errorno;
318}
319
320/**
321 * v9fs_t_write - write data
322 * @v9ses: 9P2000 session information
323 * @fid: fid to write to
324 * @offset: offset to start write at
325 * @count: how many bytes to write
326 * @fcall: pointer to response fcall
327 *
328 */
329
330int
331v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid,
332 u64 offset, u32 count, void *data, struct v9fs_fcall **fcall)
333{
334 struct v9fs_fcall msg;
335 struct v9fs_fcall *rc = NULL;
336 long errorno = -1;
337
338 dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid,
339 (unsigned long long)offset, count);
340 dump_data(data, count);
341
342 msg.id = TWRITE;
343 msg.params.twrite.fid = fid;
344 msg.params.twrite.offset = offset;
345 msg.params.twrite.count = count;
346 msg.params.twrite.data = data;
347
348 errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
349
350 if (!errorno)
351 errorno = rc->params.rwrite.count;
352
353 if (fcall)
354 *fcall = rc;
355 else
356 kfree(rc);
357
358 return errorno;
359}
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
new file mode 100644
index 000000000000..f55424216be2
--- /dev/null
+++ b/fs/9p/9p.h
@@ -0,0 +1,341 @@
1/*
2 * linux/fs/9p/9p.h
3 *
4 * 9P protocol definitions.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27/* Message Types */
28enum {
29 TVERSION = 100,
30 RVERSION,
31 TAUTH = 102,
32 RAUTH,
33 TATTACH = 104,
34 RATTACH,
35 TERROR = 106,
36 RERROR,
37 TFLUSH = 108,
38 RFLUSH,
39 TWALK = 110,
40 RWALK,
41 TOPEN = 112,
42 ROPEN,
43 TCREATE = 114,
44 RCREATE,
45 TREAD = 116,
46 RREAD,
47 TWRITE = 118,
48 RWRITE,
49 TCLUNK = 120,
50 RCLUNK,
51 TREMOVE = 122,
52 RREMOVE,
53 TSTAT = 124,
54 RSTAT,
55 TWSTAT = 126,
56 RWSTAT,
57};
58
59/* modes */
60enum {
61 V9FS_OREAD = 0x00,
62 V9FS_OWRITE = 0x01,
63 V9FS_ORDWR = 0x02,
64 V9FS_OEXEC = 0x03,
65 V9FS_OEXCL = 0x04,
66 V9FS_OTRUNC = 0x10,
67 V9FS_OREXEC = 0x20,
68 V9FS_ORCLOSE = 0x40,
69 V9FS_OAPPEND = 0x80,
70};
71
72/* permissions */
73enum {
74 V9FS_DMDIR = 0x80000000,
75 V9FS_DMAPPEND = 0x40000000,
76 V9FS_DMEXCL = 0x20000000,
77 V9FS_DMMOUNT = 0x10000000,
78 V9FS_DMAUTH = 0x08000000,
79 V9FS_DMTMP = 0x04000000,
80 V9FS_DMSYMLINK = 0x02000000,
81 V9FS_DMLINK = 0x01000000,
82 /* 9P2000.u extensions */
83 V9FS_DMDEVICE = 0x00800000,
84 V9FS_DMNAMEDPIPE = 0x00200000,
85 V9FS_DMSOCKET = 0x00100000,
86 V9FS_DMSETUID = 0x00080000,
87 V9FS_DMSETGID = 0x00040000,
88};
89
90/* qid.types */
91enum {
92 V9FS_QTDIR = 0x80,
93 V9FS_QTAPPEND = 0x40,
94 V9FS_QTEXCL = 0x20,
95 V9FS_QTMOUNT = 0x10,
96 V9FS_QTAUTH = 0x08,
97 V9FS_QTTMP = 0x04,
98 V9FS_QTSYMLINK = 0x02,
99 V9FS_QTLINK = 0x01,
100 V9FS_QTFILE = 0x00,
101};
102
103/* ample room for Twrite/Rread header (iounit) */
104#define V9FS_IOHDRSZ 24
105
106/* qids are the unique ID for a file (like an inode */
107struct v9fs_qid {
108 u8 type;
109 u32 version;
110 u64 path;
111};
112
113/* Plan 9 file metadata (stat) structure */
114struct v9fs_stat {
115 u16 size;
116 u16 type;
117 u32 dev;
118 struct v9fs_qid qid;
119 u32 mode;
120 u32 atime;
121 u32 mtime;
122 u64 length;
123 char *name;
124 char *uid;
125 char *gid;
126 char *muid;
127 char *extension; /* 9p2000.u extensions */
128 u32 n_uid; /* 9p2000.u extensions */
129 u32 n_gid; /* 9p2000.u extensions */
130 u32 n_muid; /* 9p2000.u extensions */
131 char data[0];
132};
133
134/* Structures for Protocol Operations */
135
136struct Tversion {
137 u32 msize;
138 char *version;
139};
140
141struct Rversion {
142 u32 msize;
143 char *version;
144};
145
146struct Tauth {
147 u32 afid;
148 char *uname;
149 char *aname;
150};
151
152struct Rauth {
153 struct v9fs_qid qid;
154};
155
156struct Rerror {
157 char *error;
158 u32 errno; /* 9p2000.u extension */
159};
160
161struct Tflush {
162 u32 oldtag;
163};
164
165struct Rflush {
166};
167
168struct Tattach {
169 u32 fid;
170 u32 afid;
171 char *uname;
172 char *aname;
173};
174
175struct Rattach {
176 struct v9fs_qid qid;
177};
178
179struct Twalk {
180 u32 fid;
181 u32 newfid;
182 u32 nwname;
183 char **wnames;
184};
185
186struct Rwalk {
187 u32 nwqid;
188 struct v9fs_qid *wqids;
189};
190
191struct Topen {
192 u32 fid;
193 u8 mode;
194};
195
196struct Ropen {
197 struct v9fs_qid qid;
198 u32 iounit;
199};
200
201struct Tcreate {
202 u32 fid;
203 char *name;
204 u32 perm;
205 u8 mode;
206};
207
208struct Rcreate {
209 struct v9fs_qid qid;
210 u32 iounit;
211};
212
213struct Tread {
214 u32 fid;
215 u64 offset;
216 u32 count;
217};
218
219struct Rread {
220 u32 count;
221 u8 *data;
222};
223
224struct Twrite {
225 u32 fid;
226 u64 offset;
227 u32 count;
228 u8 *data;
229};
230
231struct Rwrite {
232 u32 count;
233};
234
235struct Tclunk {
236 u32 fid;
237};
238
239struct Rclunk {
240};
241
242struct Tremove {
243 u32 fid;
244};
245
246struct Rremove {
247};
248
249struct Tstat {
250 u32 fid;
251};
252
253struct Rstat {
254 struct v9fs_stat *stat;
255};
256
257struct Twstat {
258 u32 fid;
259 struct v9fs_stat *stat;
260};
261
262struct Rwstat {
263};
264
265/*
266 * fcall is the primary packet structure
267 *
268 */
269
270struct v9fs_fcall {
271 u32 size;
272 u8 id;
273 u16 tag;
274
275 union {
276 struct Tversion tversion;
277 struct Rversion rversion;
278 struct Tauth tauth;
279 struct Rauth rauth;
280 struct Rerror rerror;
281 struct Tflush tflush;
282 struct Rflush rflush;
283 struct Tattach tattach;
284 struct Rattach rattach;
285 struct Twalk twalk;
286 struct Rwalk rwalk;
287 struct Topen topen;
288 struct Ropen ropen;
289 struct Tcreate tcreate;
290 struct Rcreate rcreate;
291 struct Tread tread;
292 struct Rread rread;
293 struct Twrite twrite;
294 struct Rwrite rwrite;
295 struct Tclunk tclunk;
296 struct Rclunk rclunk;
297 struct Tremove tremove;
298 struct Rremove rremove;
299 struct Tstat tstat;
300 struct Rstat rstat;
301 struct Twstat twstat;
302 struct Rwstat rwstat;
303 } params;
304};
305
306#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "")
307
308int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
309 char *version, struct v9fs_fcall **rcall);
310
311int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
312 u32 fid, u32 afid, struct v9fs_fcall **rcall);
313
314int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
315 struct v9fs_fcall **rcall);
316
317int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
318
319int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
320 struct v9fs_fcall **rcall);
321
322int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
323 struct v9fs_stat *stat, struct v9fs_fcall **rcall);
324
325int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
326 char *name, struct v9fs_fcall **rcall);
327
328int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
329 struct v9fs_fcall **rcall);
330
331int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
332 struct v9fs_fcall **rcall);
333
334int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
335 u32 perm, u8 mode, struct v9fs_fcall **rcall);
336
337int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
338 u64 offset, u32 count, struct v9fs_fcall **rcall);
339
340int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
341 u32 count, void *data, struct v9fs_fcall **rcall);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
new file mode 100644
index 000000000000..e4e4ffe5a7dc
--- /dev/null
+++ b/fs/9p/Makefile
@@ -0,0 +1,17 @@
1obj-$(CONFIG_9P_FS) := 9p2000.o
2
39p2000-objs := \
4 vfs_super.o \
5 vfs_inode.o \
6 vfs_file.o \
7 vfs_dir.o \
8 vfs_dentry.o \
9 error.o \
10 mux.o \
11 trans_fd.o \
12 trans_sock.o \
13 9p.o \
14 conv.o \
15 v9fs.o \
16 fid.o
17
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
new file mode 100644
index 000000000000..1554731bd653
--- /dev/null
+++ b/fs/9p/conv.c
@@ -0,0 +1,693 @@
1/*
2 * linux/fs/9p/conv.c
3 *
4 * 9P protocol conversion functions
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/config.h>
28#include <linux/module.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/idr.h>
32
33#include "debug.h"
34#include "v9fs.h"
35#include "9p.h"
36#include "conv.h"
37
38/*
39 * Buffer to help with string parsing
40 */
41struct cbuf {
42 unsigned char *sp;
43 unsigned char *p;
44 unsigned char *ep;
45};
46
47static inline void buf_init(struct cbuf *buf, void *data, int datalen)
48{
49 buf->sp = buf->p = data;
50 buf->ep = data + datalen;
51}
52
53static inline int buf_check_overflow(struct cbuf *buf)
54{
55 return buf->p > buf->ep;
56}
57
58static inline void buf_check_size(struct cbuf *buf, int len)
59{
60 if (buf->p+len > buf->ep) {
61 if (buf->p < buf->ep) {
62 eprintk(KERN_ERR, "buffer overflow\n");
63 buf->p = buf->ep + 1;
64 }
65 }
66}
67
68static inline void *buf_alloc(struct cbuf *buf, int len)
69{
70 void *ret = NULL;
71
72 buf_check_size(buf, len);
73 ret = buf->p;
74 buf->p += len;
75
76 return ret;
77}
78
79static inline void buf_put_int8(struct cbuf *buf, u8 val)
80{
81 buf_check_size(buf, 1);
82
83 buf->p[0] = val;
84 buf->p++;
85}
86
87static inline void buf_put_int16(struct cbuf *buf, u16 val)
88{
89 buf_check_size(buf, 2);
90
91 *(__le16 *) buf->p = cpu_to_le16(val);
92 buf->p += 2;
93}
94
95static inline void buf_put_int32(struct cbuf *buf, u32 val)
96{
97 buf_check_size(buf, 4);
98
99 *(__le32 *)buf->p = cpu_to_le32(val);
100 buf->p += 4;
101}
102
103static inline void buf_put_int64(struct cbuf *buf, u64 val)
104{
105 buf_check_size(buf, 8);
106
107 *(__le64 *)buf->p = cpu_to_le64(val);
108 buf->p += 8;
109}
110
111static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
112{
113 buf_check_size(buf, slen + 2);
114
115 buf_put_int16(buf, slen);
116 memcpy(buf->p, s, slen);
117 buf->p += slen;
118}
119
120static inline void buf_put_string(struct cbuf *buf, const char *s)
121{
122 buf_put_stringn(buf, s, strlen(s));
123}
124
125static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
126{
127 buf_check_size(buf, datalen);
128
129 memcpy(buf->p, data, datalen);
130 buf->p += datalen;
131}
132
133static inline u8 buf_get_int8(struct cbuf *buf)
134{
135 u8 ret = 0;
136
137 buf_check_size(buf, 1);
138 ret = buf->p[0];
139
140 buf->p++;
141
142 return ret;
143}
144
145static inline u16 buf_get_int16(struct cbuf *buf)
146{
147 u16 ret = 0;
148
149 buf_check_size(buf, 2);
150 ret = le16_to_cpu(*(__le16 *)buf->p);
151
152 buf->p += 2;
153
154 return ret;
155}
156
157static inline u32 buf_get_int32(struct cbuf *buf)
158{
159 u32 ret = 0;
160
161 buf_check_size(buf, 4);
162 ret = le32_to_cpu(*(__le32 *)buf->p);
163
164 buf->p += 4;
165
166 return ret;
167}
168
169static inline u64 buf_get_int64(struct cbuf *buf)
170{
171 u64 ret = 0;
172
173 buf_check_size(buf, 8);
174 ret = le64_to_cpu(*(__le64 *)buf->p);
175
176 buf->p += 8;
177
178 return ret;
179}
180
181static inline int
182buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
183{
184
185 u16 len = buf_get_int16(buf);
186 buf_check_size(buf, len);
187 if (len + 1 > datalen)
188 return 0;
189
190 memcpy(data, buf->p, len);
191 data[len] = 0;
192 buf->p += len;
193
194 return len + 1;
195}
196
197static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
198{
199 char *ret = NULL;
200 int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p);
201
202 if (n > 0) {
203 ret = sbuf->p;
204 sbuf->p += n;
205 }
206
207 return ret;
208}
209
210static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
211{
212 buf_check_size(buf, datalen);
213
214 memcpy(data, buf->p, datalen);
215 buf->p += datalen;
216
217 return datalen;
218}
219
220static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
221 int datalen)
222{
223 char *ret = NULL;
224 int n = 0;
225
226 buf_check_size(dbuf, datalen);
227
228 n = buf_get_data(buf, dbuf->p, datalen);
229
230 if (n > 0) {
231 ret = dbuf->p;
232 dbuf->p += n;
233 }
234
235 return ret;
236}
237
238/**
239 * v9fs_size_stat - calculate the size of a variable length stat struct
240 * @v9ses: session information
241 * @stat: metadata (stat) structure
242 *
243 */
244
245static int v9fs_size_stat(struct v9fs_session_info *v9ses,
246 struct v9fs_stat *stat)
247{
248 int size = 0;
249
250 if (stat == NULL) {
251 eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
252 return 0;
253 }
254
255 size = /* 2 + *//* size[2] */
256 2 + /* type[2] */
257 4 + /* dev[4] */
258 1 + /* qid.type[1] */
259 4 + /* qid.vers[4] */
260 8 + /* qid.path[8] */
261 4 + /* mode[4] */
262 4 + /* atime[4] */
263 4 + /* mtime[4] */
264 8 + /* length[8] */
265 8; /* minimum sum of string lengths */
266
267 if (stat->name)
268 size += strlen(stat->name);
269 if (stat->uid)
270 size += strlen(stat->uid);
271 if (stat->gid)
272 size += strlen(stat->gid);
273 if (stat->muid)
274 size += strlen(stat->muid);
275
276 if (v9ses->extended) {
277 size += 4 + /* n_uid[4] */
278 4 + /* n_gid[4] */
279 4 + /* n_muid[4] */
280 2; /* string length of extension[4] */
281 if (stat->extension)
282 size += strlen(stat->extension);
283 }
284
285 return size;
286}
287
288/**
289 * serialize_stat - safely format a stat structure for transmission
290 * @v9ses: session info
291 * @stat: metadata (stat) structure
292 * @bufp: buffer to serialize structure into
293 *
294 */
295
296static int
297serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
298 struct cbuf *bufp)
299{
300 buf_put_int16(bufp, stat->size);
301 buf_put_int16(bufp, stat->type);
302 buf_put_int32(bufp, stat->dev);
303 buf_put_int8(bufp, stat->qid.type);
304 buf_put_int32(bufp, stat->qid.version);
305 buf_put_int64(bufp, stat->qid.path);
306 buf_put_int32(bufp, stat->mode);
307 buf_put_int32(bufp, stat->atime);
308 buf_put_int32(bufp, stat->mtime);
309 buf_put_int64(bufp, stat->length);
310
311 buf_put_string(bufp, stat->name);
312 buf_put_string(bufp, stat->uid);
313 buf_put_string(bufp, stat->gid);
314 buf_put_string(bufp, stat->muid);
315
316 if (v9ses->extended) {
317 buf_put_string(bufp, stat->extension);
318 buf_put_int32(bufp, stat->n_uid);
319 buf_put_int32(bufp, stat->n_gid);
320 buf_put_int32(bufp, stat->n_muid);
321 }
322
323 if (buf_check_overflow(bufp))
324 return 0;
325
326 return stat->size;
327}
328
329/**
330 * deserialize_stat - safely decode a recieved metadata (stat) structure
331 * @v9ses: session info
332 * @bufp: buffer to deserialize
333 * @stat: metadata (stat) structure
334 * @dbufp: buffer to deserialize variable strings into
335 *
336 */
337
338static inline int
339deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
340 struct v9fs_stat *stat, struct cbuf *dbufp)
341{
342
343 stat->size = buf_get_int16(bufp);
344 stat->type = buf_get_int16(bufp);
345 stat->dev = buf_get_int32(bufp);
346 stat->qid.type = buf_get_int8(bufp);
347 stat->qid.version = buf_get_int32(bufp);
348 stat->qid.path = buf_get_int64(bufp);
349 stat->mode = buf_get_int32(bufp);
350 stat->atime = buf_get_int32(bufp);
351 stat->mtime = buf_get_int32(bufp);
352 stat->length = buf_get_int64(bufp);
353 stat->name = buf_get_stringb(bufp, dbufp);
354 stat->uid = buf_get_stringb(bufp, dbufp);
355 stat->gid = buf_get_stringb(bufp, dbufp);
356 stat->muid = buf_get_stringb(bufp, dbufp);
357
358 if (v9ses->extended) {
359 stat->extension = buf_get_stringb(bufp, dbufp);
360 stat->n_uid = buf_get_int32(bufp);
361 stat->n_gid = buf_get_int32(bufp);
362 stat->n_muid = buf_get_int32(bufp);
363 }
364
365 if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
366 return 0;
367
368 return stat->size + 2;
369}
370
371/**
372 * deserialize_statb - wrapper for decoding a received metadata structure
373 * @v9ses: session info
374 * @bufp: buffer to deserialize
375 * @dbufp: buffer to deserialize variable strings into
376 *
377 */
378
379static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
380 *v9ses, struct cbuf *bufp,
381 struct cbuf *dbufp)
382{
383 struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
384
385 if (ret) {
386 int n = deserialize_stat(v9ses, bufp, ret, dbufp);
387 if (n <= 0)
388 return NULL;
389 }
390
391 return ret;
392}
393
394/**
395 * v9fs_deserialize_stat - decode a received metadata structure
396 * @v9ses: session info
397 * @buf: buffer to deserialize
398 * @buflen: length of received buffer
399 * @stat: metadata structure to decode into
400 * @statlen: length of destination metadata structure
401 *
402 */
403
404int
405v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf,
406 u32 buflen, struct v9fs_stat *stat, u32 statlen)
407{
408 struct cbuf buffer;
409 struct cbuf *bufp = &buffer;
410 struct cbuf dbuffer;
411 struct cbuf *dbufp = &dbuffer;
412
413 buf_init(bufp, buf, buflen);
414 buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat),
415 statlen - sizeof(struct v9fs_stat));
416
417 return deserialize_stat(v9ses, bufp, stat, dbufp);
418}
419
420static inline int
421v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
422{
423 int size = 4 + 1 + 2; /* size[4] msg[1] tag[2] */
424 int i = 0;
425
426 switch (fcall->id) {
427 default:
428 eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
429 return 0;
430 case TVERSION: /* msize[4] version[s] */
431 size += 4 + 2 + strlen(fcall->params.tversion.version);
432 break;
433 case TAUTH: /* afid[4] uname[s] aname[s] */
434 size += 4 + 2 + strlen(fcall->params.tauth.uname) +
435 2 + strlen(fcall->params.tauth.aname);
436 break;
437 case TFLUSH: /* oldtag[2] */
438 size += 2;
439 break;
440 case TATTACH: /* fid[4] afid[4] uname[s] aname[s] */
441 size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
442 2 + strlen(fcall->params.tattach.aname);
443 break;
444 case TWALK: /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
445 size += 4 + 4 + 2;
446 /* now compute total for the array of names */
447 for (i = 0; i < fcall->params.twalk.nwname; i++)
448 size += 2 + strlen(fcall->params.twalk.wnames[i]);
449 break;
450 case TOPEN: /* fid[4] mode[1] */
451 size += 4 + 1;
452 break;
453 case TCREATE: /* fid[4] name[s] perm[4] mode[1] */
454 size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
455 break;
456 case TREAD: /* fid[4] offset[8] count[4] */
457 size += 4 + 8 + 4;
458 break;
459 case TWRITE: /* fid[4] offset[8] count[4] data[count] */
460 size += 4 + 8 + 4 + fcall->params.twrite.count;
461 break;
462 case TCLUNK: /* fid[4] */
463 size += 4;
464 break;
465 case TREMOVE: /* fid[4] */
466 size += 4;
467 break;
468 case TSTAT: /* fid[4] */
469 size += 4;
470 break;
471 case TWSTAT: /* fid[4] stat[n] */
472 fcall->params.twstat.stat->size =
473 v9fs_size_stat(v9ses, fcall->params.twstat.stat);
474 size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
475 }
476 return size;
477}
478
479/*
480 * v9fs_serialize_fcall - marshall fcall struct into a packet
481 * @v9ses: session information
482 * @fcall: structure to convert
483 * @data: buffer to serialize fcall into
484 * @datalen: length of buffer to serialize fcall into
485 *
486 */
487
488int
489v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
490 void *data, u32 datalen)
491{
492 int i = 0;
493 struct v9fs_stat *stat = NULL;
494 struct cbuf buffer;
495 struct cbuf *bufp = &buffer;
496
497 buf_init(bufp, data, datalen);
498
499 if (!fcall) {
500 eprintk(KERN_ERR, "no fcall\n");
501 return -EINVAL;
502 }
503
504 fcall->size = v9fs_size_fcall(v9ses, fcall);
505
506 buf_put_int32(bufp, fcall->size);
507 buf_put_int8(bufp, fcall->id);
508 buf_put_int16(bufp, fcall->tag);
509
510 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
511 fcall->tag);
512
513 /* now encode it */
514 switch (fcall->id) {
515 default:
516 eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
517 return -EPROTO;
518 case TVERSION:
519 buf_put_int32(bufp, fcall->params.tversion.msize);
520 buf_put_string(bufp, fcall->params.tversion.version);
521 break;
522 case TAUTH:
523 buf_put_int32(bufp, fcall->params.tauth.afid);
524 buf_put_string(bufp, fcall->params.tauth.uname);
525 buf_put_string(bufp, fcall->params.tauth.aname);
526 break;
527 case TFLUSH:
528 buf_put_int16(bufp, fcall->params.tflush.oldtag);
529 break;
530 case TATTACH:
531 buf_put_int32(bufp, fcall->params.tattach.fid);
532 buf_put_int32(bufp, fcall->params.tattach.afid);
533 buf_put_string(bufp, fcall->params.tattach.uname);
534 buf_put_string(bufp, fcall->params.tattach.aname);
535 break;
536 case TWALK:
537 buf_put_int32(bufp, fcall->params.twalk.fid);
538 buf_put_int32(bufp, fcall->params.twalk.newfid);
539 buf_put_int16(bufp, fcall->params.twalk.nwname);
540 for (i = 0; i < fcall->params.twalk.nwname; i++)
541 buf_put_string(bufp, fcall->params.twalk.wnames[i]);
542 break;
543 case TOPEN:
544 buf_put_int32(bufp, fcall->params.topen.fid);
545 buf_put_int8(bufp, fcall->params.topen.mode);
546 break;
547 case TCREATE:
548 buf_put_int32(bufp, fcall->params.tcreate.fid);
549 buf_put_string(bufp, fcall->params.tcreate.name);
550 buf_put_int32(bufp, fcall->params.tcreate.perm);
551 buf_put_int8(bufp, fcall->params.tcreate.mode);
552 break;
553 case TREAD:
554 buf_put_int32(bufp, fcall->params.tread.fid);
555 buf_put_int64(bufp, fcall->params.tread.offset);
556 buf_put_int32(bufp, fcall->params.tread.count);
557 break;
558 case TWRITE:
559 buf_put_int32(bufp, fcall->params.twrite.fid);
560 buf_put_int64(bufp, fcall->params.twrite.offset);
561 buf_put_int32(bufp, fcall->params.twrite.count);
562 buf_put_data(bufp, fcall->params.twrite.data,
563 fcall->params.twrite.count);
564 break;
565 case TCLUNK:
566 buf_put_int32(bufp, fcall->params.tclunk.fid);
567 break;
568 case TREMOVE:
569 buf_put_int32(bufp, fcall->params.tremove.fid);
570 break;
571 case TSTAT:
572 buf_put_int32(bufp, fcall->params.tstat.fid);
573 break;
574 case TWSTAT:
575 buf_put_int32(bufp, fcall->params.twstat.fid);
576 stat = fcall->params.twstat.stat;
577
578 buf_put_int16(bufp, stat->size + 2);
579 serialize_stat(v9ses, stat, bufp);
580 break;
581 }
582
583 if (buf_check_overflow(bufp))
584 return -EIO;
585
586 return fcall->size;
587}
588
589/**
590 * deserialize_fcall - unmarshal a response
591 * @v9ses: session information
592 * @msgsize: size of rcall message
593 * @buf: recieved buffer
594 * @buflen: length of received buffer
595 * @rcall: fcall structure to populate
596 * @rcalllen: length of fcall structure to populate
597 *
598 */
599
600int
601v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
602 void *buf, u32 buflen, struct v9fs_fcall *rcall,
603 int rcalllen)
604{
605
606 struct cbuf buffer;
607 struct cbuf *bufp = &buffer;
608 struct cbuf dbuffer;
609 struct cbuf *dbufp = &dbuffer;
610 int i = 0;
611
612 buf_init(bufp, buf, buflen);
613 buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
614 rcalllen - sizeof(struct v9fs_fcall));
615
616 rcall->size = msgsize;
617 rcall->id = buf_get_int8(bufp);
618 rcall->tag = buf_get_int16(bufp);
619
620 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
621 rcall->tag);
622 switch (rcall->id) {
623 default:
624 eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
625 return -EPROTO;
626 case RVERSION:
627 rcall->params.rversion.msize = buf_get_int32(bufp);
628 rcall->params.rversion.version = buf_get_stringb(bufp, dbufp);
629 break;
630 case RFLUSH:
631 break;
632 case RATTACH:
633 rcall->params.rattach.qid.type = buf_get_int8(bufp);
634 rcall->params.rattach.qid.version = buf_get_int32(bufp);
635 rcall->params.rattach.qid.path = buf_get_int64(bufp);
636 break;
637 case RWALK:
638 rcall->params.rwalk.nwqid = buf_get_int16(bufp);
639 rcall->params.rwalk.wqids = buf_alloc(bufp,
640 rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
641 if (rcall->params.rwalk.wqids)
642 for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
643 rcall->params.rwalk.wqids[i].type =
644 buf_get_int8(bufp);
645 rcall->params.rwalk.wqids[i].version =
646 buf_get_int16(bufp);
647 rcall->params.rwalk.wqids[i].path =
648 buf_get_int64(bufp);
649 }
650 break;
651 case ROPEN:
652 rcall->params.ropen.qid.type = buf_get_int8(bufp);
653 rcall->params.ropen.qid.version = buf_get_int32(bufp);
654 rcall->params.ropen.qid.path = buf_get_int64(bufp);
655 rcall->params.ropen.iounit = buf_get_int32(bufp);
656 break;
657 case RCREATE:
658 rcall->params.rcreate.qid.type = buf_get_int8(bufp);
659 rcall->params.rcreate.qid.version = buf_get_int32(bufp);
660 rcall->params.rcreate.qid.path = buf_get_int64(bufp);
661 rcall->params.rcreate.iounit = buf_get_int32(bufp);
662 break;
663 case RREAD:
664 rcall->params.rread.count = buf_get_int32(bufp);
665 rcall->params.rread.data = buf_get_datab(bufp, dbufp,
666 rcall->params.rread.count);
667 break;
668 case RWRITE:
669 rcall->params.rwrite.count = buf_get_int32(bufp);
670 break;
671 case RCLUNK:
672 break;
673 case RREMOVE:
674 break;
675 case RSTAT:
676 buf_get_int16(bufp);
677 rcall->params.rstat.stat =
678 deserialize_statb(v9ses, bufp, dbufp);
679 break;
680 case RWSTAT:
681 break;
682 case RERROR:
683 rcall->params.rerror.error = buf_get_stringb(bufp, dbufp);
684 if (v9ses->extended)
685 rcall->params.rerror.errno = buf_get_int16(bufp);
686 break;
687 }
688
689 if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
690 return -EIO;
691
692 return rcall->size;
693}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
new file mode 100644
index 000000000000..ee849613c61a
--- /dev/null
+++ b/fs/9p/conv.h
@@ -0,0 +1,36 @@
1/*
2 * linux/fs/9p/conv.h
3 *
4 * 9P protocol conversion definitions
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf,
28 u32 buflen, struct v9fs_stat *stat, u32 statlen);
29int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall,
30 void *buf, u32 buflen);
31int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
32 void *buf, u32 buflen, struct v9fs_fcall *rcall,
33 int rcalllen);
34
35/* this one is actually in error.c right now */
36int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
new file mode 100644
index 000000000000..4445f06919d9
--- /dev/null
+++ b/fs/9p/debug.h
@@ -0,0 +1,70 @@
1/*
2 * linux/fs/9p/debug.h - V9FS Debug Definitions
3 *
4 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
5 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to:
19 * Free Software Foundation
20 * 51 Franklin Street, Fifth Floor
21 * Boston, MA 02111-1301 USA
22 *
23 */
24
25#define DEBUG_ERROR (1<<0)
26#define DEBUG_CURRENT (1<<1)
27#define DEBUG_9P (1<<2)
28#define DEBUG_VFS (1<<3)
29#define DEBUG_CONV (1<<4)
30#define DEBUG_MUX (1<<5)
31#define DEBUG_TRANS (1<<6)
32#define DEBUG_SLABS (1<<7)
33
34#define DEBUG_DUMP_PKT 0
35
36extern int v9fs_debug_level;
37
38#define dprintk(level, format, arg...) \
39do { \
40 if((v9fs_debug_level & level)==level) \
41 printk(KERN_NOTICE "-- %s (%d): " \
42 format , __FUNCTION__, current->pid , ## arg); \
43} while(0)
44
45#define eprintk(level, format, arg...) \
46do { \
47 printk(level "v9fs: %s (%d): " \
48 format , __FUNCTION__, current->pid , ## arg); \
49} while(0)
50
51#if DEBUG_DUMP_PKT
52static inline void dump_data(const unsigned char *data, unsigned int datalen)
53{
54 int i, j;
55 int len = datalen;
56
57 printk(KERN_DEBUG "data ");
58 for (i = 0; i < len; i += 4) {
59 for (j = 0; (j < 4) && (i + j < len); j++)
60 printk(KERN_DEBUG "%02x", data[i + j]);
61 printk(KERN_DEBUG " ");
62 }
63 printk(KERN_DEBUG "\n");
64}
65#else /* DEBUG_DUMP_PKT */
66static inline void dump_data(const unsigned char *data, unsigned int datalen)
67{
68
69}
70#endif /* DEBUG_DUMP_PKT */
diff --git a/fs/9p/error.c b/fs/9p/error.c
new file mode 100644
index 000000000000..fee5d19179c5
--- /dev/null
+++ b/fs/9p/error.c
@@ -0,0 +1,93 @@
1/*
2 * linux/fs/9p/error.c
3 *
4 * Error string handling
5 *
6 * Plan 9 uses error strings, Unix uses error numbers. These functions
7 * try to help manage that and provide for dynamically adding error
8 * mappings.
9 *
10 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
11 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to:
25 * Free Software Foundation
26 * 51 Franklin Street, Fifth Floor
27 * Boston, MA 02111-1301 USA
28 *
29 */
30
31#include <linux/config.h>
32#include <linux/module.h>
33
34#include <linux/list.h>
35#include <linux/jhash.h>
36
37#include "debug.h"
38#include "error.h"
39
40/**
41 * v9fs_error_init - preload
42 * @errstr: error string
43 *
44 */
45
46int v9fs_error_init(void)
47{
48 struct errormap *c;
49 int bucket;
50
51 /* initialize hash table */
52 for (bucket = 0; bucket < ERRHASHSZ; bucket++)
53 INIT_HLIST_HEAD(&hash_errmap[bucket]);
54
55 /* load initial error map into hash table */
56 for (c = errmap; c->name != NULL; c++) {
57 bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ;
58 INIT_HLIST_NODE(&c->list);
59 hlist_add_head(&c->list, &hash_errmap[bucket]);
60 }
61
62 return 1;
63}
64
65/**
66 * errstr2errno - convert error string to error number
67 * @errstr: error string
68 *
69 */
70
71int v9fs_errstr2errno(char *errstr)
72{
73 int errno = 0;
74 struct hlist_node *p = NULL;
75 struct errormap *c = NULL;
76 int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ;
77
78 hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
79 if (!strcmp(c->name, errstr)) {
80 errno = c->val;
81 break;
82 }
83 }
84
85 if (errno == 0) {
86 /* TODO: if error isn't found, add it dynamically */
87 printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
88 errstr);
89 errno = 1;
90 }
91
92 return -errno;
93}
diff --git a/fs/9p/error.h b/fs/9p/error.h
new file mode 100644
index 000000000000..78f89acf7c9a
--- /dev/null
+++ b/fs/9p/error.h
@@ -0,0 +1,178 @@
1/*
2 * linux/fs/9p/error.h
3 *
4 * Huge Nasty Error Table
5 *
6 * Plan 9 uses error strings, Unix uses error numbers. This table tries to
7 * match UNIX strings and Plan 9 strings to unix error numbers. It is used
8 * to preload the dynamic error table which can also track user-specific error
9 * strings.
10 *
11 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
12 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to:
26 * Free Software Foundation
27 * 51 Franklin Street, Fifth Floor
28 * Boston, MA 02111-1301 USA
29 *
30 */
31
32#include <linux/errno.h>
33#include <asm/errno.h>
34
35struct errormap {
36 char *name;
37 int val;
38
39 struct hlist_node list;
40};
41
42#define ERRHASHSZ 32
43static struct hlist_head hash_errmap[ERRHASHSZ];
44
45/* FixMe - reduce to a reasonable size */
46static struct errormap errmap[] = {
47 {"Operation not permitted", EPERM},
48 {"wstat prohibited", EPERM},
49 {"No such file or directory", ENOENT},
50 {"directory entry not found", ENOENT},
51 {"file not found", ENOENT},
52 {"Interrupted system call", EINTR},
53 {"Input/output error", EIO},
54 {"No such device or address", ENXIO},
55 {"Argument list too long", E2BIG},
56 {"Bad file descriptor", EBADF},
57 {"Resource temporarily unavailable", EAGAIN},
58 {"Cannot allocate memory", ENOMEM},
59 {"Permission denied", EACCES},
60 {"Bad address", EFAULT},
61 {"Block device required", ENOTBLK},
62 {"Device or resource busy", EBUSY},
63 {"File exists", EEXIST},
64 {"Invalid cross-device link", EXDEV},
65 {"No such device", ENODEV},
66 {"Not a directory", ENOTDIR},
67 {"Is a directory", EISDIR},
68 {"Invalid argument", EINVAL},
69 {"Too many open files in system", ENFILE},
70 {"Too many open files", EMFILE},
71 {"Text file busy", ETXTBSY},
72 {"File too large", EFBIG},
73 {"No space left on device", ENOSPC},
74 {"Illegal seek", ESPIPE},
75 {"Read-only file system", EROFS},
76 {"Too many links", EMLINK},
77 {"Broken pipe", EPIPE},
78 {"Numerical argument out of domain", EDOM},
79 {"Numerical result out of range", ERANGE},
80 {"Resource deadlock avoided", EDEADLK},
81 {"File name too long", ENAMETOOLONG},
82 {"No locks available", ENOLCK},
83 {"Function not implemented", ENOSYS},
84 {"Directory not empty", ENOTEMPTY},
85 {"Too many levels of symbolic links", ELOOP},
86 {"No message of desired type", ENOMSG},
87 {"Identifier removed", EIDRM},
88 {"No data available", ENODATA},
89 {"Machine is not on the network", ENONET},
90 {"Package not installed", ENOPKG},
91 {"Object is remote", EREMOTE},
92 {"Link has been severed", ENOLINK},
93 {"Communication error on send", ECOMM},
94 {"Protocol error", EPROTO},
95 {"Bad message", EBADMSG},
96 {"File descriptor in bad state", EBADFD},
97 {"Streams pipe error", ESTRPIPE},
98 {"Too many users", EUSERS},
99 {"Socket operation on non-socket", ENOTSOCK},
100 {"Message too long", EMSGSIZE},
101 {"Protocol not available", ENOPROTOOPT},
102 {"Protocol not supported", EPROTONOSUPPORT},
103 {"Socket type not supported", ESOCKTNOSUPPORT},
104 {"Operation not supported", EOPNOTSUPP},
105 {"Protocol family not supported", EPFNOSUPPORT},
106 {"Network is down", ENETDOWN},
107 {"Network is unreachable", ENETUNREACH},
108 {"Network dropped connection on reset", ENETRESET},
109 {"Software caused connection abort", ECONNABORTED},
110 {"Connection reset by peer", ECONNRESET},
111 {"No buffer space available", ENOBUFS},
112 {"Transport endpoint is already connected", EISCONN},
113 {"Transport endpoint is not connected", ENOTCONN},
114 {"Cannot send after transport endpoint shutdown", ESHUTDOWN},
115 {"Connection timed out", ETIMEDOUT},
116 {"Connection refused", ECONNREFUSED},
117 {"Host is down", EHOSTDOWN},
118 {"No route to host", EHOSTUNREACH},
119 {"Operation already in progress", EALREADY},
120 {"Operation now in progress", EINPROGRESS},
121 {"Is a named type file", EISNAM},
122 {"Remote I/O error", EREMOTEIO},
123 {"Disk quota exceeded", EDQUOT},
124/* errors from fossil, vacfs, and u9fs */
125 {"fid unknown or out of range", EBADF},
126 {"permission denied", EACCES},
127 {"file does not exist", ENOENT},
128 {"authentication failed", ECONNREFUSED},
129 {"bad offset in directory read", ESPIPE},
130 {"bad use of fid", EBADF},
131 {"wstat can't convert between files and directories", EPERM},
132 {"directory is not empty", ENOTEMPTY},
133 {"file exists", EEXIST},
134 {"file already exists", EEXIST},
135 {"file or directory already exists", EEXIST},
136 {"fid already in use", EBADF},
137 {"file in use", ETXTBSY},
138 {"i/o error", EIO},
139 {"file already open for I/O", ETXTBSY},
140 {"illegal mode", EINVAL},
141 {"illegal name", ENAMETOOLONG},
142 {"not a directory", ENOTDIR},
143 {"not a member of proposed group", EPERM},
144 {"not owner", EACCES},
145 {"only owner can change group in wstat", EACCES},
146 {"read only file system", EROFS},
147 {"no access to special file", EPERM},
148 {"i/o count too large", EIO},
149 {"unknown group", EINVAL},
150 {"unknown user", EINVAL},
151 {"bogus wstat buffer", EPROTO},
152 {"exclusive use file already open", EAGAIN},
153 {"corrupted directory entry", EIO},
154 {"corrupted file entry", EIO},
155 {"corrupted block label", EIO},
156 {"corrupted meta data", EIO},
157 {"illegal offset", EINVAL},
158 {"illegal path element", ENOENT},
159 {"root of file system is corrupted", EIO},
160 {"corrupted super block", EIO},
161 {"protocol botch", EPROTO},
162 {"file system is full", ENOSPC},
163 {"file is in use", EAGAIN},
164 {"directory entry is not allocated", ENOENT},
165 {"file is read only", EROFS},
166 {"file has been removed", EIDRM},
167 {"only support truncation to zero length", EPERM},
168 {"cannot remove root", EPERM},
169 {"file too big", EFBIG},
170 {"venti i/o error", EIO},
171 /* these are not errors */
172 {"u9fs rhostsauth: no authentication required", 0},
173 {"u9fs authnone: no authentication required", 0},
174 {NULL, -1}
175};
176
177extern int v9fs_error_init(void);
178extern int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
new file mode 100644
index 000000000000..821c9c4d76aa
--- /dev/null
+++ b/fs/9p/fid.c
@@ -0,0 +1,241 @@
1/*
2 * V9FS FID Management
3 *
4 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to:
18 * Free Software Foundation
19 * 51 Franklin Street, Fifth Floor
20 * Boston, MA 02111-1301 USA
21 *
22 */
23
24#include <linux/config.h>
25#include <linux/module.h>
26#include <linux/errno.h>
27#include <linux/fs.h>
28#include <linux/idr.h>
29
30#include "debug.h"
31#include "v9fs.h"
32#include "9p.h"
33#include "v9fs_vfs.h"
34#include "transport.h"
35#include "mux.h"
36#include "conv.h"
37#include "fid.h"
38
39/**
40 * v9fs_fid_insert - add a fid to a dentry
41 * @fid: fid to add
42 * @dentry: dentry that it is being added to
43 *
44 */
45
46static int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry)
47{
48 struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
49 dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid,
50 dentry->d_iname, dentry);
51 if (dentry->d_fsdata == NULL) {
52 dentry->d_fsdata =
53 kmalloc(sizeof(struct list_head), GFP_KERNEL);
54 if (dentry->d_fsdata == NULL) {
55 dprintk(DEBUG_ERROR, "Out of memory\n");
56 return -ENOMEM;
57 }
58 fid_list = (struct list_head *)dentry->d_fsdata;
59 INIT_LIST_HEAD(fid_list); /* Initialize list head */
60 }
61
62 fid->uid = current->uid;
63 fid->pid = current->pid;
64 list_add(&fid->list, fid_list);
65 return 0;
66}
67
68/**
69 * v9fs_fid_create - allocate a FID structure
70 * @dentry - dentry to link newly created fid to
71 *
72 */
73
74struct v9fs_fid *v9fs_fid_create(struct dentry *dentry)
75{
76 struct v9fs_fid *new;
77
78 new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
79 if (new == NULL) {
80 dprintk(DEBUG_ERROR, "Out of Memory\n");
81 return ERR_PTR(-ENOMEM);
82 }
83
84 new->fid = -1;
85 new->fidopen = 0;
86 new->fidcreate = 0;
87 new->fidclunked = 0;
88 new->iounit = 0;
89
90 if (v9fs_fid_insert(new, dentry) == 0)
91 return new;
92 else {
93 dprintk(DEBUG_ERROR, "Problems inserting to dentry\n");
94 kfree(new);
95 return NULL;
96 }
97}
98
99/**
100 * v9fs_fid_destroy - deallocate a FID structure
101 * @fid: fid to destroy
102 *
103 */
104
105void v9fs_fid_destroy(struct v9fs_fid *fid)
106{
107 list_del(&fid->list);
108 kfree(fid);
109}
110
111/**
112 * v9fs_fid_lookup - retrieve the right fid from a particular dentry
113 * @dentry: dentry to look for fid in
114 * @type: intent of lookup (operation or traversal)
115 *
116 * search list of fids associated with a dentry for a fid with a matching
117 * thread id or uid. If that fails, look up the dentry's parents to see if you
118 * can find a matching fid.
119 *
120 */
121
122struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type)
123{
124 struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
125 struct v9fs_fid *current_fid = NULL;
126 struct v9fs_fid *temp = NULL;
127 struct v9fs_fid *return_fid = NULL;
128 int found_parent = 0;
129 int found_user = 0;
130
131 dprintk(DEBUG_9P, " dentry: %s (%p) type %d\n", dentry->d_iname, dentry,
132 type);
133
134 if (fid_list && !list_empty(fid_list)) {
135 list_for_each_entry_safe(current_fid, temp, fid_list, list) {
136 if (current_fid->uid == current->uid) {
137 if (return_fid == NULL) {
138 if ((type == FID_OP)
139 || (!current_fid->fidopen)) {
140 return_fid = current_fid;
141 found_user = 1;
142 }
143 }
144 }
145 if (current_fid->pid == current->real_parent->pid) {
146 if ((return_fid == NULL) || (found_parent)
147 || (found_user)) {
148 if ((type == FID_OP)
149 || (!current_fid->fidopen)) {
150 return_fid = current_fid;
151 found_parent = 1;
152 found_user = 0;
153 }
154 }
155 }
156 if (current_fid->pid == current->pid) {
157 if ((type == FID_OP) ||
158 (!current_fid->fidopen)) {
159 return_fid = current_fid;
160 found_parent = 0;
161 found_user = 0;
162 }
163 }
164 }
165 }
166
167 /* we are at the root but didn't match */
168 if ((!return_fid) && (dentry->d_parent == dentry)) {
169 /* TODO: clone attach with new uid */
170 return_fid = current_fid;
171 }
172
173 if (!return_fid) {
174 struct dentry *par = current->fs->pwd->d_parent;
175 int count = 1;
176 while (par != NULL) {
177 if (par == dentry)
178 break;
179 count++;
180 if (par == par->d_parent) {
181 dprintk(DEBUG_ERROR,
182 "got to root without finding dentry\n");
183 break;
184 }
185 par = par->d_parent;
186 }
187
188/* XXX - there may be some duplication we can get rid of */
189 if (par == dentry) {
190 /* we need to fid_lookup the starting point */
191 int fidnum = -1;
192 int oldfid = -1;
193 int result = -1;
194 struct v9fs_session_info *v9ses =
195 v9fs_inode2v9ses(current->fs->pwd->d_inode);
196
197 current_fid =
198 v9fs_fid_lookup(current->fs->pwd, FID_WALK);
199 if (current_fid == NULL) {
200 dprintk(DEBUG_ERROR,
201 "process cwd doesn't have a fid\n");
202 return return_fid;
203 }
204 oldfid = current_fid->fid;
205 par = current->fs->pwd;
206 /* TODO: take advantage of multiwalk */
207
208 fidnum = v9fs_get_idpool(&v9ses->fidpool);
209 if (fidnum < 0) {
210 dprintk(DEBUG_ERROR,
211 "could not get a new fid num\n");
212 return return_fid;
213 }
214
215 while (par != dentry) {
216 result =
217 v9fs_t_walk(v9ses, oldfid, fidnum, "..",
218 NULL);
219 if (result < 0) {
220 dprintk(DEBUG_ERROR,
221 "problem walking to parent\n");
222
223 break;
224 }
225 oldfid = fidnum;
226 if (par == par->d_parent) {
227 dprintk(DEBUG_ERROR,
228 "can't find dentry\n");
229 break;
230 }
231 par = par->d_parent;
232 }
233 if (par == dentry) {
234 return_fid = v9fs_fid_create(dentry);
235 return_fid->fid = fidnum;
236 }
237 }
238 }
239
240 return return_fid;
241}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
new file mode 100644
index 000000000000..7db478ccca36
--- /dev/null
+++ b/fs/9p/fid.h
@@ -0,0 +1,57 @@
1/*
2 * V9FS FID Management
3 *
4 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to:
18 * Free Software Foundation
19 * 51 Franklin Street, Fifth Floor
20 * Boston, MA 02111-1301 USA
21 *
22 */
23
24#include <linux/list.h>
25
26#define FID_OP 0
27#define FID_WALK 1
28
29struct v9fs_fid {
30 struct list_head list; /* list of fids associated with a dentry */
31 struct list_head active; /* XXX - debug */
32
33 u32 fid;
34 unsigned char fidopen; /* set when fid is opened */
35 unsigned char fidcreate; /* set when fid was just created */
36 unsigned char fidclunked; /* set when fid has already been clunked */
37
38 struct v9fs_qid qid;
39 u32 iounit;
40
41 /* readdir stuff */
42 int rdir_fpos;
43 loff_t rdir_pos;
44 struct v9fs_fcall *rdir_fcall;
45
46 /* management stuff */
47 pid_t pid; /* thread associated with this fid */
48 uid_t uid; /* user associated with this fid */
49
50 /* private data */
51 struct file *filp; /* backpointer to File struct for open files */
52 struct v9fs_session_info *v9ses; /* session info for this FID */
53};
54
55struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type);
56void v9fs_fid_destroy(struct v9fs_fid *fid);
57struct v9fs_fid *v9fs_fid_create(struct dentry *);
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
new file mode 100644
index 000000000000..8835b576f744
--- /dev/null
+++ b/fs/9p/mux.c
@@ -0,0 +1,475 @@
1/*
2 * linux/fs/9p/mux.c
3 *
4 * Protocol Multiplexer
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/config.h>
28#include <linux/module.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/kthread.h>
32#include <linux/idr.h>
33
34#include "debug.h"
35#include "v9fs.h"
36#include "9p.h"
37#include "transport.h"
38#include "conv.h"
39#include "mux.h"
40
41/**
42 * dprintcond - print condition of session info
43 * @v9ses: session info structure
44 * @req: RPC request structure
45 *
46 */
47
48static inline int
49dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
50{
51 dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status,
52 req->rcall);
53 return 0;
54}
55
56/**
57 * xread - force read of a certain number of bytes
58 * @v9ses: session info structure
59 * @ptr: pointer to buffer
60 * @sz: number of bytes to read
61 *
62 * Chuck Cranor CS-533 project1
63 */
64
65static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz)
66{
67 int rd = 0;
68 int ret = 0;
69 while (rd < sz) {
70 ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd);
71 if (ret <= 0) {
72 dprintk(DEBUG_ERROR, "xread errno %d\n", ret);
73 return ret;
74 }
75 rd += ret;
76 ptr += ret;
77 }
78 return (rd);
79}
80
81/**
82 * read_message - read a full 9P2000 fcall packet
83 * @v9ses: session info structure
84 * @rcall: fcall structure to read into
85 * @rcalllen: size of fcall buffer
86 *
87 */
88
89static int
90read_message(struct v9fs_session_info *v9ses,
91 struct v9fs_fcall *rcall, int rcalllen)
92{
93 unsigned char buf[4];
94 void *data;
95 int size = 0;
96 int res = 0;
97
98 res = xread(v9ses, buf, sizeof(buf));
99 if (res < 0) {
100 dprintk(DEBUG_ERROR,
101 "Reading of count field failed returned: %d\n", res);
102 return res;
103 }
104
105 if (res < 4) {
106 dprintk(DEBUG_ERROR,
107 "Reading of count field failed returned: %d\n", res);
108 return -EIO;
109 }
110
111 size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
112 dprintk(DEBUG_MUX, "got a packet count: %d\n", size);
113
114 /* adjust for the four bytes of size */
115 size -= 4;
116
117 if (size > v9ses->maxdata) {
118 dprintk(DEBUG_ERROR, "packet too big: %d\n", size);
119 return -E2BIG;
120 }
121
122 data = kmalloc(size, GFP_KERNEL);
123 if (!data) {
124 eprintk(KERN_WARNING, "out of memory\n");
125 return -ENOMEM;
126 }
127
128 res = xread(v9ses, data, size);
129 if (res < size) {
130 dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n",
131 res);
132 kfree(data);
133 return res;
134 }
135
136 /* we now have an in-memory string that is the reply.
137 * deserialize it. There is very little to go wrong at this point
138 * save for v9fs_alloc errors.
139 */
140 res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata,
141 rcall, rcalllen);
142
143 kfree(data);
144
145 if (res < 0)
146 return res;
147
148 return 0;
149}
150
151/**
152 * v9fs_recv - receive an RPC response for a particular tag
153 * @v9ses: session info structure
154 * @req: RPC request structure
155 *
156 */
157
158static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
159{
160 int ret = 0;
161
162 dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag);
163 ret = wait_event_interruptible(v9ses->read_wait,
164 ((v9ses->transport->status != Connected) ||
165 (req->rcall != 0) || (req->err < 0) ||
166 dprintcond(v9ses, req)));
167
168 dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall);
169
170 spin_lock(&v9ses->muxlock);
171 list_del(&req->next);
172 spin_unlock(&v9ses->muxlock);
173
174 if (req->err < 0)
175 return req->err;
176
177 if (v9ses->transport->status == Disconnected)
178 return -ECONNRESET;
179
180 return ret;
181}
182
183/**
184 * v9fs_send - send a 9P request
185 * @v9ses: session info structure
186 * @req: RPC request to send
187 *
188 */
189
190static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
191{
192 int ret = -1;
193 void *data = NULL;
194 struct v9fs_fcall *tcall = req->tcall;
195
196 data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
197 if (!data)
198 return -ENOMEM;
199
200 tcall->size = 0; /* enforce size recalculation */
201 ret =
202 v9fs_serialize_fcall(v9ses, tcall, data,
203 v9ses->maxdata + V9FS_IOHDRSZ);
204 if (ret < 0)
205 goto free_data;
206
207 spin_lock(&v9ses->muxlock);
208 list_add(&req->next, &v9ses->mux_fcalls);
209 spin_unlock(&v9ses->muxlock);
210
211 dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag,
212 tcall->size);
213 ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
214
215 if (ret != tcall->size) {
216 spin_lock(&v9ses->muxlock);
217 list_del(&req->next);
218 kfree(req->rcall);
219
220 spin_unlock(&v9ses->muxlock);
221 if (ret >= 0)
222 ret = -EREMOTEIO;
223 } else
224 ret = 0;
225
226 free_data:
227 kfree(data);
228 return ret;
229}
230
231/**
232 * v9fs_mux_rpc - send a request, receive a response
233 * @v9ses: session info structure
234 * @tcall: fcall to send
235 * @rcall: buffer to place response into
236 *
237 */
238
239long
240v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
241 struct v9fs_fcall **rcall)
242{
243 int tid = -1;
244 struct v9fs_fcall *fcall = NULL;
245 struct v9fs_rpcreq req;
246 int ret = -1;
247
248 if (!v9ses)
249 return -EINVAL;
250
251 if (!v9ses->transport || v9ses->transport->status != Connected)
252 return -EIO;
253
254 if (rcall)
255 *rcall = NULL;
256
257 if (tcall->id != TVERSION) {
258 tid = v9fs_get_idpool(&v9ses->tidpool);
259 if (tid < 0)
260 return -ENOMEM;
261 }
262
263 tcall->tag = tid;
264
265 req.tcall = tcall;
266 req.err = 0;
267 req.rcall = NULL;
268
269 ret = v9fs_send(v9ses, &req);
270
271 if (ret < 0) {
272 if (tcall->id != TVERSION)
273 v9fs_put_idpool(tid, &v9ses->tidpool);
274 dprintk(DEBUG_MUX, "error %d\n", ret);
275 return ret;
276 }
277
278 ret = v9fs_recv(v9ses, &req);
279
280 fcall = req.rcall;
281
282 dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret);
283 if (ret == -ERESTARTSYS) {
284 if (v9ses->transport->status != Disconnected
285 && tcall->id != TFLUSH) {
286 unsigned long flags;
287
288 dprintk(DEBUG_MUX, "flushing the tag: %d\n",
289 tcall->tag);
290 clear_thread_flag(TIF_SIGPENDING);
291 v9fs_t_flush(v9ses, tcall->tag);
292 spin_lock_irqsave(&current->sighand->siglock, flags);
293 recalc_sigpending();
294 spin_unlock_irqrestore(&current->sighand->siglock,
295 flags);
296 dprintk(DEBUG_MUX, "flushing done\n");
297 }
298
299 goto release_req;
300 } else if (ret < 0)
301 goto release_req;
302
303 if (!fcall)
304 ret = -EIO;
305 else {
306 if (fcall->id == RERROR) {
307 ret = v9fs_errstr2errno(fcall->params.rerror.error);
308 if (ret == 0) { /* string match failed */
309 if (fcall->params.rerror.errno)
310 ret = -(fcall->params.rerror.errno);
311 else
312 ret = -ESERVERFAULT;
313 }
314 } else if (fcall->id != tcall->id + 1) {
315 dprintk(DEBUG_ERROR,
316 "fcall mismatch: expected %d, got %d\n",
317 tcall->id + 1, fcall->id);
318 ret = -EIO;
319 }
320 }
321
322 release_req:
323 if (tcall->id != TVERSION)
324 v9fs_put_idpool(tid, &v9ses->tidpool);
325 if (rcall)
326 *rcall = fcall;
327 else
328 kfree(fcall);
329
330 return ret;
331}
332
333/**
334 * v9fs_mux_cancel_requests - cancels all pending requests
335 *
336 * @v9ses: session info structure
337 * @err: error code to return to the requests
338 */
339void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err)
340{
341 struct v9fs_rpcreq *rptr;
342 struct v9fs_rpcreq *rreq;
343
344 dprintk(DEBUG_MUX, " %d\n", err);
345 spin_lock(&v9ses->muxlock);
346 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
347 rreq->err = err;
348 }
349 spin_unlock(&v9ses->muxlock);
350 wake_up_all(&v9ses->read_wait);
351}
352
353/**
354 * v9fs_recvproc - kproc to handle demultiplexing responses
355 * @data: session info structure
356 *
357 */
358
359static int v9fs_recvproc(void *data)
360{
361 struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data;
362 struct v9fs_fcall *rcall = NULL;
363 struct v9fs_rpcreq *rptr;
364 struct v9fs_rpcreq *req;
365 struct v9fs_rpcreq *rreq;
366 int err = 0;
367
368 allow_signal(SIGKILL);
369 set_current_state(TASK_INTERRUPTIBLE);
370 complete(&v9ses->proccmpl);
371 while (!kthread_should_stop() && err >= 0) {
372 req = rptr = rreq = NULL;
373
374 rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
375 if (!rcall) {
376 eprintk(KERN_ERR, "no memory for buffers\n");
377 break;
378 }
379
380 err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ);
381 spin_lock(&v9ses->muxlock);
382 if (err < 0) {
383 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
384 rreq->err = err;
385 }
386 if(err != -ERESTARTSYS)
387 eprintk(KERN_ERR,
388 "Transport error while reading message %d\n", err);
389 } else {
390 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
391 if (rreq->tcall->tag == rcall->tag) {
392 req = rreq;
393 req->rcall = rcall;
394 break;
395 }
396 }
397 }
398
399 if (req && (req->tcall->id == TFLUSH)) {
400 struct v9fs_rpcreq *treq = NULL;
401 list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) {
402 if (treq->tcall->tag ==
403 req->tcall->params.tflush.oldtag) {
404 list_del(&rptr->next);
405 kfree(treq->rcall);
406 break;
407 }
408 }
409 }
410
411 spin_unlock(&v9ses->muxlock);
412
413 if (!req) {
414 if (err >= 0)
415 dprintk(DEBUG_ERROR,
416 "unexpected response: id %d tag %d\n",
417 rcall->id, rcall->tag);
418
419 kfree(rcall);
420 }
421
422 wake_up_all(&v9ses->read_wait);
423 set_current_state(TASK_INTERRUPTIBLE);
424 }
425
426 v9ses->transport->close(v9ses->transport);
427
428 /* Inform all pending processes about the failure */
429 wake_up_all(&v9ses->read_wait);
430
431 if (signal_pending(current))
432 complete(&v9ses->proccmpl);
433
434 dprintk(DEBUG_MUX, "recvproc: end\n");
435 v9ses->recvproc = NULL;
436
437 return err >= 0;
438}
439
440/**
441 * v9fs_mux_init - initialize multiplexer (spawn kproc)
442 * @v9ses: session info structure
443 * @dev_name: mount device information (to create unique kproc)
444 *
445 */
446
447int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name)
448{
449 char procname[60];
450
451 strncpy(procname, dev_name, sizeof(procname));
452 procname[sizeof(procname) - 1] = 0;
453
454 init_waitqueue_head(&v9ses->read_wait);
455 init_completion(&v9ses->fcread);
456 init_completion(&v9ses->proccmpl);
457 spin_lock_init(&v9ses->muxlock);
458 INIT_LIST_HEAD(&v9ses->mux_fcalls);
459 v9ses->recvproc = NULL;
460 v9ses->curfcall = NULL;
461
462 v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
463 "v9fs_recvproc %s", procname);
464
465 if (IS_ERR(v9ses->recvproc)) {
466 eprintk(KERN_ERR, "cannot create receiving thread\n");
467 v9fs_session_close(v9ses);
468 return -ECONNABORTED;
469 }
470
471 wake_up_process(v9ses->recvproc);
472 wait_for_completion(&v9ses->proccmpl);
473
474 return 0;
475}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
new file mode 100644
index 000000000000..4994cb10badf
--- /dev/null
+++ b/fs/9p/mux.h
@@ -0,0 +1,41 @@
1/*
2 * linux/fs/9p/mux.h
3 *
4 * Multiplexer Definitions
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26/* structure to manage each RPC transaction */
27
28struct v9fs_rpcreq {
29 struct v9fs_fcall *tcall;
30 struct v9fs_fcall *rcall;
31 int err; /* error code if response failed */
32
33 /* XXX - could we put scatter/gather buffers here? */
34
35 struct list_head next;
36};
37
38int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name);
39long v9fs_mux_rpc(struct v9fs_session_info *v9ses,
40 struct v9fs_fcall *tcall, struct v9fs_fcall **rcall);
41void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
new file mode 100644
index 000000000000..63b58ce98ff4
--- /dev/null
+++ b/fs/9p/trans_fd.c
@@ -0,0 +1,172 @@
1/*
2 * linux/fs/9p/trans_fd.c
3 *
4 * File Descriptor Transport Layer
5 *
6 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/module.h>
28#include <linux/net.h>
29#include <linux/ipv6.h>
30#include <linux/errno.h>
31#include <linux/kernel.h>
32#include <linux/un.h>
33#include <asm/uaccess.h>
34#include <linux/inet.h>
35#include <linux/idr.h>
36#include <linux/file.h>
37
38#include "debug.h"
39#include "v9fs.h"
40#include "transport.h"
41
42struct v9fs_trans_fd {
43 struct file *in_file;
44 struct file *out_file;
45};
46
47/**
48 * v9fs_fd_recv - receive from a socket
49 * @v9ses: session information
50 * @v: buffer to receive data into
51 * @len: size of receive buffer
52 *
53 */
54
55static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len)
56{
57 struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
58
59 if (!trans || trans->status != Connected || !ts)
60 return -EIO;
61
62 return kernel_read(ts->in_file, ts->in_file->f_pos, v, len);
63}
64
65/**
66 * v9fs_fd_send - send to a socket
67 * @v9ses: session information
68 * @v: buffer to send data from
69 * @len: size of send buffer
70 *
71 */
72
73static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len)
74{
75 struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
76 mm_segment_t oldfs = get_fs();
77 int ret = 0;
78
79 if (!trans || trans->status != Connected || !ts)
80 return -EIO;
81
82 set_fs(get_ds());
83 /* The cast to a user pointer is valid due to the set_fs() */
84 ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos);
85 set_fs(oldfs);
86
87 return ret;
88}
89
90/**
91 * v9fs_fd_init - initialize file descriptor transport
92 * @v9ses: session information
93 * @addr: address of server to mount
94 * @data: mount options
95 *
96 */
97
98static int
99v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
100{
101 struct v9fs_trans_fd *ts = NULL;
102 struct v9fs_transport *trans = v9ses->transport;
103
104 if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
105 printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
106 return -ENOPROTOOPT;
107 }
108
109 sema_init(&trans->writelock, 1);
110 sema_init(&trans->readlock, 1);
111
112 ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
113
114 if (!ts)
115 return -ENOMEM;
116
117 ts->in_file = fget( v9ses->rfdno );
118 ts->out_file = fget( v9ses->wfdno );
119
120 if (!ts->in_file || !ts->out_file) {
121 if (ts->in_file)
122 fput(ts->in_file);
123
124 if (ts->out_file)
125 fput(ts->out_file);
126
127 kfree(ts);
128 return -EIO;
129 }
130
131 trans->priv = ts;
132 trans->status = Connected;
133
134 return 0;
135}
136
137
138/**
139 * v9fs_fd_close - shutdown file descriptor
140 * @trans: private socket structure
141 *
142 */
143
144static void v9fs_fd_close(struct v9fs_transport *trans)
145{
146 struct v9fs_trans_fd *ts;
147
148 if (!trans)
149 return;
150
151 trans->status = Disconnected;
152 ts = trans->priv;
153
154 if (!ts)
155 return;
156
157 if (ts->in_file)
158 fput(ts->in_file);
159
160 if (ts->out_file)
161 fput(ts->out_file);
162
163 kfree(ts);
164}
165
166struct v9fs_transport v9fs_trans_fd = {
167 .init = v9fs_fd_init,
168 .write = v9fs_fd_send,
169 .read = v9fs_fd_recv,
170 .close = v9fs_fd_close,
171};
172
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
new file mode 100644
index 000000000000..01e26f0013ac
--- /dev/null
+++ b/fs/9p/trans_sock.c
@@ -0,0 +1,290 @@
1/*
2 * linux/fs/9p/trans_socket.c
3 *
4 * Socket Transport Layer
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
8 * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to:
22 * Free Software Foundation
23 * 51 Franklin Street, Fifth Floor
24 * Boston, MA 02111-1301 USA
25 *
26 */
27
28#include <linux/config.h>
29#include <linux/module.h>
30#include <linux/net.h>
31#include <linux/ipv6.h>
32#include <linux/errno.h>
33#include <linux/kernel.h>
34#include <linux/un.h>
35#include <asm/uaccess.h>
36#include <linux/inet.h>
37#include <linux/idr.h>
38
39#include "debug.h"
40#include "v9fs.h"
41#include "transport.h"
42
43#define V9FS_PORT 564
44
45struct v9fs_trans_sock {
46 struct socket *s;
47};
48
49/**
50 * v9fs_sock_recv - receive from a socket
51 * @v9ses: session information
52 * @v: buffer to receive data into
53 * @len: size of receive buffer
54 *
55 */
56
57static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
58{
59 struct msghdr msg;
60 struct kvec iov;
61 int result;
62 mm_segment_t oldfs;
63 struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
64
65 if (trans->status == Disconnected)
66 return -EREMOTEIO;
67
68 result = -EINVAL;
69
70 oldfs = get_fs();
71 set_fs(get_ds());
72
73 iov.iov_base = v;
74 iov.iov_len = len;
75 msg.msg_name = NULL;
76 msg.msg_namelen = 0;
77 msg.msg_iovlen = 1;
78 msg.msg_control = NULL;
79 msg.msg_controllen = 0;
80 msg.msg_namelen = 0;
81 msg.msg_flags = MSG_NOSIGNAL;
82
83 result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0);
84
85 dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state);
86 set_fs(oldfs);
87
88 if (result <= 0) {
89 if (result != -ERESTARTSYS)
90 trans->status = Disconnected;
91 }
92
93 return result;
94}
95
96/**
97 * v9fs_sock_send - send to a socket
98 * @v9ses: session information
99 * @v: buffer to send data from
100 * @len: size of send buffer
101 *
102 */
103
104static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
105{
106 struct kvec iov;
107 struct msghdr msg;
108 int result = -1;
109 mm_segment_t oldfs;
110 struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
111
112 dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len);
113 dump_data(v, len);
114
115 down(&trans->writelock);
116
117 oldfs = get_fs();
118 set_fs(get_ds());
119 iov.iov_base = v;
120 iov.iov_len = len;
121 msg.msg_name = NULL;
122 msg.msg_namelen = 0;
123 msg.msg_iovlen = 1;
124 msg.msg_control = NULL;
125 msg.msg_controllen = 0;
126 msg.msg_namelen = 0;
127 msg.msg_flags = MSG_NOSIGNAL;
128 result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
129 set_fs(oldfs);
130
131 if (result < 0) {
132 if (result != -ERESTARTSYS)
133 trans->status = Disconnected;
134 }
135
136 up(&trans->writelock);
137 return result;
138}
139
140/**
141 * v9fs_tcp_init - initialize TCP socket
142 * @v9ses: session information
143 * @addr: address of server to mount
144 * @data: mount options
145 *
146 */
147
148static int
149v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
150{
151 struct socket *csocket = NULL;
152 struct sockaddr_in sin_server;
153 int rc = 0;
154 struct v9fs_trans_sock *ts = NULL;
155 struct v9fs_transport *trans = v9ses->transport;
156
157 sema_init(&trans->writelock, 1);
158 sema_init(&trans->readlock, 1);
159
160 ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
161
162 if (!ts)
163 return -ENOMEM;
164
165 trans->priv = ts;
166 ts->s = NULL;
167
168 if (!addr)
169 return -EINVAL;
170
171 dprintk(DEBUG_TRANS, "Connecting to %s\n", addr);
172
173 sin_server.sin_family = AF_INET;
174 sin_server.sin_addr.s_addr = in_aton(addr);
175 sin_server.sin_port = htons(v9ses->port);
176 sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
177 rc = csocket->ops->connect(csocket,
178 (struct sockaddr *)&sin_server,
179 sizeof(struct sockaddr_in), 0);
180 if (rc < 0) {
181 eprintk(KERN_ERR,
182 "v9fs_trans_tcp: problem connecting socket to %s\n",
183 addr);
184 return rc;
185 }
186 csocket->sk->sk_allocation = GFP_NOIO;
187 ts->s = csocket;
188 trans->status = Connected;
189
190 return 0;
191}
192
193/**
194 * v9fs_unix_init - initialize UNIX domain socket
195 * @v9ses: session information
196 * @dev_name: path to named pipe
197 * @data: mount options
198 *
199 */
200
201static int
202v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
203 char *data)
204{
205 int rc;
206 struct socket *csocket;
207 struct sockaddr_un sun_server;
208 struct v9fs_transport *trans;
209 struct v9fs_trans_sock *ts;
210
211 rc = 0;
212 csocket = NULL;
213 trans = v9ses->transport;
214
215 if (strlen(dev_name) > UNIX_PATH_MAX) {
216 eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
217 dev_name);
218 return -ENOMEM;
219 }
220
221 ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
222 if (!ts)
223 return -ENOMEM;
224
225 trans->priv = ts;
226 ts->s = NULL;
227
228 sema_init(&trans->writelock, 1);
229 sema_init(&trans->readlock, 1);
230
231 sun_server.sun_family = PF_UNIX;
232 strcpy(sun_server.sun_path, dev_name);
233 sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
234 rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
235 sizeof(struct sockaddr_un) - 1, 0); /* -1 *is* important */
236 if (rc < 0) {
237 eprintk(KERN_ERR,
238 "v9fs_trans_unix: problem connecting socket: %s: %d\n",
239 dev_name, rc);
240 return rc;
241 }
242 csocket->sk->sk_allocation = GFP_NOIO;
243 ts->s = csocket;
244 trans->status = Connected;
245
246 return 0;
247}
248
249/**
250 * v9fs_sock_close - shutdown socket
251 * @trans: private socket structure
252 *
253 */
254
255static void v9fs_sock_close(struct v9fs_transport *trans)
256{
257 struct v9fs_trans_sock *ts;
258
259 if (!trans)
260 return;
261
262 ts = trans->priv;
263
264 if ((ts) && (ts->s)) {
265 dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s);
266 sock_release(ts->s);
267 ts->s = NULL;
268 trans->status = Disconnected;
269 dprintk(DEBUG_TRANS, "socket closed\n");
270 }
271
272 if (ts)
273 kfree(ts);
274
275 trans->priv = NULL;
276}
277
278struct v9fs_transport v9fs_trans_tcp = {
279 .init = v9fs_tcp_init,
280 .write = v9fs_sock_send,
281 .read = v9fs_sock_recv,
282 .close = v9fs_sock_close,
283};
284
285struct v9fs_transport v9fs_trans_unix = {
286 .init = v9fs_unix_init,
287 .write = v9fs_sock_send,
288 .read = v9fs_sock_recv,
289 .close = v9fs_sock_close,
290};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
new file mode 100644
index 000000000000..9e9cd418efd5
--- /dev/null
+++ b/fs/9p/transport.h
@@ -0,0 +1,46 @@
1/*
2 * linux/fs/9p/transport.h
3 *
4 * Transport Definition
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26enum v9fs_transport_status {
27 Connected,
28 Disconnected,
29 Hung,
30};
31
32struct v9fs_transport {
33 enum v9fs_transport_status status;
34 struct semaphore writelock;
35 struct semaphore readlock;
36 void *priv;
37
38 int (*init) (struct v9fs_session_info *, const char *, char *);
39 int (*write) (struct v9fs_transport *, void *, int);
40 int (*read) (struct v9fs_transport *, void *, int);
41 void (*close) (struct v9fs_transport *);
42};
43
44extern struct v9fs_transport v9fs_trans_tcp;
45extern struct v9fs_transport v9fs_trans_unix;
46extern struct v9fs_transport v9fs_trans_fd;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
new file mode 100644
index 000000000000..13bdbbab4387
--- /dev/null
+++ b/fs/9p/v9fs.c
@@ -0,0 +1,452 @@
1/*
2 * linux/fs/9p/v9fs.c
3 *
4 * This file contains functions assisting in mapping VFS to 9P2000
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/config.h>
28#include <linux/module.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/parser.h>
32#include <linux/idr.h>
33
34#include "debug.h"
35#include "v9fs.h"
36#include "9p.h"
37#include "v9fs_vfs.h"
38#include "transport.h"
39#include "mux.h"
40#include "conv.h"
41
42/* TODO: sysfs or debugfs interface */
43int v9fs_debug_level = 0; /* feature-rific global debug level */
44
45/*
46 * Option Parsing (code inspired by NFS code)
47 *
48 */
49
50enum {
51 /* Options that take integer arguments */
52 Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
53 Opt_rfdno, Opt_wfdno,
54 /* String options */
55 Opt_name, Opt_remotename,
56 /* Options that take no arguments */
57 Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
58 /* Error token */
59 Opt_err
60};
61
62static match_table_t tokens = {
63 {Opt_port, "port=%u"},
64 {Opt_msize, "msize=%u"},
65 {Opt_uid, "uid=%u"},
66 {Opt_gid, "gid=%u"},
67 {Opt_afid, "afid=%u"},
68 {Opt_rfdno, "rfdno=%u"},
69 {Opt_wfdno, "wfdno=%u"},
70 {Opt_debug, "debug=%u"},
71 {Opt_name, "name=%s"},
72 {Opt_remotename, "aname=%s"},
73 {Opt_unix, "proto=unix"},
74 {Opt_tcp, "proto=tcp"},
75 {Opt_fd, "proto=fd"},
76 {Opt_tcp, "tcp"},
77 {Opt_unix, "unix"},
78 {Opt_fd, "fd"},
79 {Opt_legacy, "noextend"},
80 {Opt_nodevmap, "nodevmap"},
81 {Opt_err, NULL}
82};
83
84/*
85 * Parse option string.
86 */
87
88/**
89 * v9fs_parse_options - parse mount options into session structure
90 * @options: options string passed from mount
91 * @v9ses: existing v9fs session information
92 *
93 */
94
95static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
96{
97 char *p;
98 substring_t args[MAX_OPT_ARGS];
99 int option;
100 int ret;
101
102 /* setup defaults */
103 v9ses->port = V9FS_PORT;
104 v9ses->maxdata = 9000;
105 v9ses->proto = PROTO_TCP;
106 v9ses->extended = 1;
107 v9ses->afid = ~0;
108 v9ses->debug = 0;
109 v9ses->rfdno = ~0;
110 v9ses->wfdno = ~0;
111
112 if (!options)
113 return;
114
115 while ((p = strsep(&options, ",")) != NULL) {
116 int token;
117 if (!*p)
118 continue;
119 token = match_token(p, tokens, args);
120 if (token < Opt_name) {
121 if ((ret = match_int(&args[0], &option)) < 0) {
122 dprintk(DEBUG_ERROR,
123 "integer field, but no integer?\n");
124 continue;
125 }
126
127 }
128 switch (token) {
129 case Opt_port:
130 v9ses->port = option;
131 break;
132 case Opt_msize:
133 v9ses->maxdata = option;
134 break;
135 case Opt_uid:
136 v9ses->uid = option;
137 break;
138 case Opt_gid:
139 v9ses->gid = option;
140 break;
141 case Opt_afid:
142 v9ses->afid = option;
143 break;
144 case Opt_rfdno:
145 v9ses->rfdno = option;
146 break;
147 case Opt_wfdno:
148 v9ses->wfdno = option;
149 break;
150 case Opt_debug:
151 v9ses->debug = option;
152 break;
153 case Opt_tcp:
154 v9ses->proto = PROTO_TCP;
155 break;
156 case Opt_unix:
157 v9ses->proto = PROTO_UNIX;
158 break;
159 case Opt_fd:
160 v9ses->proto = PROTO_FD;
161 break;
162 case Opt_name:
163 match_strcpy(v9ses->name, &args[0]);
164 break;
165 case Opt_remotename:
166 match_strcpy(v9ses->remotename, &args[0]);
167 break;
168 case Opt_legacy:
169 v9ses->extended = 0;
170 break;
171 case Opt_nodevmap:
172 v9ses->nodev = 1;
173 break;
174 default:
175 continue;
176 }
177 }
178}
179
180/**
181 * v9fs_inode2v9ses - safely extract v9fs session info from super block
182 * @inode: inode to extract information from
183 *
184 * Paranoid function to extract v9ses information from superblock,
185 * if anything is missing it will report an error.
186 *
187 */
188
189struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
190{
191 return (inode->i_sb->s_fs_info);
192}
193
194/**
195 * v9fs_get_idpool - allocate numeric id from pool
196 * @p - pool to allocate from
197 *
198 * XXX - This seems to be an awful generic function, should it be in idr.c with
199 * the lock included in struct idr?
200 */
201
202int v9fs_get_idpool(struct v9fs_idpool *p)
203{
204 int i = 0;
205 int error;
206
207retry:
208 if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
209 return 0;
210
211 if (down_interruptible(&p->lock) == -EINTR) {
212 eprintk(KERN_WARNING, "Interrupted while locking\n");
213 return -1;
214 }
215
216 error = idr_get_new(&p->pool, NULL, &i);
217 up(&p->lock);
218
219 if (error == -EAGAIN)
220 goto retry;
221 else if (error)
222 return -1;
223
224 return i;
225}
226
227/**
228 * v9fs_put_idpool - release numeric id from pool
229 * @p - pool to allocate from
230 *
231 * XXX - This seems to be an awful generic function, should it be in idr.c with
232 * the lock included in struct idr?
233 */
234
235void v9fs_put_idpool(int id, struct v9fs_idpool *p)
236{
237 if (down_interruptible(&p->lock) == -EINTR) {
238 eprintk(KERN_WARNING, "Interrupted while locking\n");
239 return;
240 }
241 idr_remove(&p->pool, id);
242 up(&p->lock);
243}
244
245/**
246 * v9fs_session_init - initialize session
247 * @v9ses: session information structure
248 * @dev_name: device being mounted
249 * @data: options
250 *
251 */
252
253int
254v9fs_session_init(struct v9fs_session_info *v9ses,
255 const char *dev_name, char *data)
256{
257 struct v9fs_fcall *fcall = NULL;
258 struct v9fs_transport *trans_proto;
259 int n = 0;
260 int newfid = -1;
261 int retval = -EINVAL;
262
263 v9ses->name = __getname();
264 if (!v9ses->name)
265 return -ENOMEM;
266
267 v9ses->remotename = __getname();
268 if (!v9ses->remotename) {
269 putname(v9ses->name);
270 return -ENOMEM;
271 }
272
273 strcpy(v9ses->name, V9FS_DEFUSER);
274 strcpy(v9ses->remotename, V9FS_DEFANAME);
275
276 v9fs_parse_options(data, v9ses);
277
278 /* set global debug level */
279 v9fs_debug_level = v9ses->debug;
280
281 /* id pools that are session-dependent: FIDs and TIDs */
282 idr_init(&v9ses->fidpool.pool);
283 init_MUTEX(&v9ses->fidpool.lock);
284 idr_init(&v9ses->tidpool.pool);
285 init_MUTEX(&v9ses->tidpool.lock);
286
287
288 switch (v9ses->proto) {
289 case PROTO_TCP:
290 trans_proto = &v9fs_trans_tcp;
291 break;
292 case PROTO_UNIX:
293 trans_proto = &v9fs_trans_unix;
294 *v9ses->remotename = 0;
295 break;
296 case PROTO_FD:
297 trans_proto = &v9fs_trans_fd;
298 *v9ses->remotename = 0;
299 break;
300 default:
301 printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
302 retval = -ENOPROTOOPT;
303 goto SessCleanUp;
304 };
305
306 v9ses->transport = trans_proto;
307
308 if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) {
309 eprintk(KERN_ERR, "problem initializing transport\n");
310 goto SessCleanUp;
311 }
312
313 v9ses->inprogress = 0;
314 v9ses->shutdown = 0;
315 v9ses->session_hung = 0;
316
317 if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) {
318 dprintk(DEBUG_ERROR, "problem initializing mux\n");
319 goto SessCleanUp;
320 }
321
322 if (v9ses->afid == ~0) {
323 if (v9ses->extended)
324 retval =
325 v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u",
326 &fcall);
327 else
328 retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000",
329 &fcall);
330
331 if (retval < 0) {
332 dprintk(DEBUG_ERROR, "v9fs_t_version failed\n");
333 goto FreeFcall;
334 }
335
336 /* Really should check for 9P1 and report error */
337 if (!strcmp(fcall->params.rversion.version, "9P2000.u")) {
338 dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
339 v9ses->extended = 1;
340 } else {
341 dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
342 v9ses->extended = 0;
343 }
344
345 n = fcall->params.rversion.msize;
346 kfree(fcall);
347
348 if (n < v9ses->maxdata)
349 v9ses->maxdata = n;
350 }
351
352 newfid = v9fs_get_idpool(&v9ses->fidpool);
353 if (newfid < 0) {
354 eprintk(KERN_WARNING, "couldn't allocate FID\n");
355 retval = -ENOMEM;
356 goto SessCleanUp;
357 }
358 /* it is a little bit ugly, but we have to prevent newfid */
359 /* being the same as afid, so if it is, get a new fid */
360 if (v9ses->afid != ~0 && newfid == v9ses->afid) {
361 newfid = v9fs_get_idpool(&v9ses->fidpool);
362 if (newfid < 0) {
363 eprintk(KERN_WARNING, "couldn't allocate FID\n");
364 retval = -ENOMEM;
365 goto SessCleanUp;
366 }
367 }
368
369 if ((retval =
370 v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid,
371 v9ses->afid, NULL))
372 < 0) {
373 dprintk(DEBUG_ERROR, "cannot attach\n");
374 goto SessCleanUp;
375 }
376
377 if (v9ses->afid != ~0) {
378 if (v9fs_t_clunk(v9ses, v9ses->afid, NULL))
379 dprintk(DEBUG_ERROR, "clunk failed\n");
380 }
381
382 return newfid;
383
384 FreeFcall:
385 kfree(fcall);
386
387 SessCleanUp:
388 v9fs_session_close(v9ses);
389 return retval;
390}
391
392/**
393 * v9fs_session_close - shutdown a session
394 * @v9ses: session information structure
395 *
396 */
397
398void v9fs_session_close(struct v9fs_session_info *v9ses)
399{
400 if (v9ses->recvproc) {
401 send_sig(SIGKILL, v9ses->recvproc, 1);
402 wait_for_completion(&v9ses->proccmpl);
403 }
404
405 if (v9ses->transport)
406 v9ses->transport->close(v9ses->transport);
407
408 putname(v9ses->name);
409 putname(v9ses->remotename);
410}
411
412/**
413 * v9fs_session_cancel - mark transport as disconnected
414 * and cancel all pending requests.
415 */
416void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
417 v9ses->transport->status = Disconnected;
418 v9fs_mux_cancel_requests(v9ses, -EIO);
419}
420
421extern int v9fs_error_init(void);
422
423/**
424 * v9fs_init - Initialize module
425 *
426 */
427
428static int __init init_v9fs(void)
429{
430 v9fs_error_init();
431
432 printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
433
434 return register_filesystem(&v9fs_fs_type);
435}
436
437/**
438 * v9fs_init - shutdown module
439 *
440 */
441
442static void __exit exit_v9fs(void)
443{
444 unregister_filesystem(&v9fs_fs_type);
445}
446
447module_init(init_v9fs)
448module_exit(exit_v9fs)
449
450MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
451MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
452MODULE_LICENSE("GPL");
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
new file mode 100644
index 000000000000..45dcef42bdd6
--- /dev/null
+++ b/fs/9p/v9fs.h
@@ -0,0 +1,103 @@
1/*
2 * V9FS definitions.
3 *
4 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
5 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to:
19 * Free Software Foundation
20 * 51 Franklin Street, Fifth Floor
21 * Boston, MA 02111-1301 USA
22 *
23 */
24
25/*
26 * Idpool structure provides lock and id management
27 *
28 */
29
30struct v9fs_idpool {
31 struct semaphore lock;
32 struct idr pool;
33};
34
35/*
36 * Session structure provides information for an opened session
37 *
38 */
39
40struct v9fs_session_info {
41 /* options */
42 unsigned int maxdata;
43 unsigned char extended; /* set to 1 if we are using UNIX extensions */
44 unsigned char nodev; /* set to 1 if no disable device mapping */
45 unsigned short port; /* port to connect to */
46 unsigned short debug; /* debug level */
47 unsigned short proto; /* protocol to use */
48 unsigned int afid; /* authentication fid */
49 unsigned int rfdno; /* read file descriptor number */
50 unsigned int wfdno; /* write file descriptor number */
51
52
53 char *name; /* user name to mount as */
54 char *remotename; /* name of remote hierarchy being mounted */
55 unsigned int uid; /* default uid/muid for legacy support */
56 unsigned int gid; /* default gid for legacy support */
57
58 /* book keeping */
59 struct v9fs_idpool fidpool; /* The FID pool for file descriptors */
60 struct v9fs_idpool tidpool; /* The TID pool for transactions ids */
61
62 /* transport information */
63 struct v9fs_transport *transport;
64
65 int inprogress; /* session in progress => true */
66 int shutdown; /* session shutting down. no more attaches. */
67 unsigned char session_hung;
68
69 /* mux private data */
70 struct v9fs_fcall *curfcall;
71 wait_queue_head_t read_wait;
72 struct completion fcread;
73 struct completion proccmpl;
74 struct task_struct *recvproc;
75
76 spinlock_t muxlock;
77 struct list_head mux_fcalls;
78};
79
80/* possible values of ->proto */
81enum {
82 PROTO_TCP,
83 PROTO_UNIX,
84 PROTO_FD,
85};
86
87int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
88struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
89void v9fs_session_close(struct v9fs_session_info *v9ses);
90int v9fs_get_idpool(struct v9fs_idpool *p);
91void v9fs_put_idpool(int id, struct v9fs_idpool *p);
92void v9fs_session_cancel(struct v9fs_session_info *v9ses);
93
94#define V9FS_MAGIC 0x01021997
95
96/* other default globals */
97#define V9FS_PORT 564
98#define V9FS_DEFUSER "nobody"
99#define V9FS_DEFANAME ""
100
101/* inital pool sizes for fids and tags */
102#define V9FS_START_FIDS 8192
103#define V9FS_START_TIDS 256
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
new file mode 100644
index 000000000000..2f2cea7ee3e7
--- /dev/null
+++ b/fs/9p/v9fs_vfs.h
@@ -0,0 +1,53 @@
1/*
2 * V9FS VFS extensions.
3 *
4 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
5 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to:
19 * Free Software Foundation
20 * 51 Franklin Street, Fifth Floor
21 * Boston, MA 02111-1301 USA
22 *
23 */
24
25/* plan9 semantics are that created files are implicitly opened.
26 * But linux semantics are that you call create, then open.
27 * the plan9 approach is superior as it provides an atomic
28 * open.
29 * we track the create fid here. When the file is opened, if fidopen is
30 * non-zero, we use the fid and can skip some steps.
31 * there may be a better way to do this, but I don't know it.
32 * one BAD way is to clunk the fid on create, then open it again:
33 * you lose the atomicity of file open
34 */
35
36/* special case:
37 * unlink calls remove, which is an implicit clunk. So we have to track
38 * that kind of thing so that we don't try to clunk a dead fid.
39 */
40
41extern struct file_system_type v9fs_fs_type;
42extern struct file_operations v9fs_file_operations;
43extern struct file_operations v9fs_dir_operations;
44extern struct dentry_operations v9fs_dentry_operations;
45
46struct inode *v9fs_get_inode(struct super_block *sb, int mode);
47ino_t v9fs_qid2ino(struct v9fs_qid *qid);
48void v9fs_mistat2inode(struct v9fs_stat *, struct inode *,
49 struct super_block *);
50int v9fs_dir_release(struct inode *inode, struct file *filp);
51int v9fs_file_open(struct inode *inode, struct file *file);
52void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat);
53void v9fs_dentry_release(struct dentry *);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
new file mode 100644
index 000000000000..306c96741f81
--- /dev/null
+++ b/fs/9p/vfs_dentry.c
@@ -0,0 +1,126 @@
1/*
2 * linux/fs/9p/vfs_dentry.c
3 *
4 * This file contians vfs dentry ops for the 9P2000 protocol.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/pagemap.h>
32#include <linux/stat.h>
33#include <linux/string.h>
34#include <linux/smp_lock.h>
35#include <linux/inet.h>
36#include <linux/namei.h>
37#include <linux/idr.h>
38
39#include "debug.h"
40#include "v9fs.h"
41#include "9p.h"
42#include "v9fs_vfs.h"
43#include "conv.h"
44#include "fid.h"
45
46/**
47 * v9fs_dentry_validate - VFS dcache hook to validate cache
48 * @dentry: dentry that is being validated
49 * @nd: path data
50 *
51 * dcache really shouldn't be used for 9P2000 as at all due to
52 * potential attached semantics to directory traversal (walk).
53 *
54 * FUTURE: look into how to use dcache to allow multi-stage
55 * walks in Plan 9 & potential for better dcache operation which
56 * would remain valid for Plan 9 semantics. Older versions
57 * had validation via stat for those interested. However, since
58 * stat has the same approximate overhead as walk there really
59 * is no difference. The only improvement would be from a
60 * time-decay cache like NFS has and that undermines the
61 * synchronous nature of 9P2000.
62 *
63 */
64
65static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
66{
67 struct dentry *dc = current->fs->pwd;
68
69 dprintk(DEBUG_VFS, "dentry: %s (%p)\n", dentry->d_iname, dentry);
70 if (v9fs_fid_lookup(dentry, FID_OP)) {
71 dprintk(DEBUG_VFS, "VALID\n");
72 return 1;
73 }
74
75 while (dc != NULL) {
76 if (dc == dentry) {
77 dprintk(DEBUG_VFS, "VALID\n");
78 return 1;
79 }
80 if (dc == dc->d_parent)
81 break;
82
83 dc = dc->d_parent;
84 }
85
86 dprintk(DEBUG_VFS, "INVALID\n");
87 return 0;
88}
89
90/**
91 * v9fs_dentry_release - called when dentry is going to be freed
92 * @dentry: dentry that is being release
93 *
94 */
95
96void v9fs_dentry_release(struct dentry *dentry)
97{
98 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
99
100 if (dentry->d_fsdata != NULL) {
101 struct list_head *fid_list = dentry->d_fsdata;
102 struct v9fs_fid *temp = NULL;
103 struct v9fs_fid *current_fid = NULL;
104 struct v9fs_fcall *fcall = NULL;
105
106 list_for_each_entry_safe(current_fid, temp, fid_list, list) {
107 if (v9fs_t_clunk
108 (current_fid->v9ses, current_fid->fid, &fcall))
109 dprintk(DEBUG_ERROR, "clunk failed: %s\n",
110 FCALL_ERROR(fcall));
111
112 v9fs_put_idpool(current_fid->fid,
113 &current_fid->v9ses->fidpool);
114
115 kfree(fcall);
116 v9fs_fid_destroy(current_fid);
117 }
118
119 kfree(dentry->d_fsdata); /* free the list_head */
120 }
121}
122
123struct dentry_operations v9fs_dentry_operations = {
124 .d_revalidate = v9fs_dentry_validate,
125 .d_release = v9fs_dentry_release,
126};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
new file mode 100644
index 000000000000..c478a7384186
--- /dev/null
+++ b/fs/9p/vfs_dir.c
@@ -0,0 +1,226 @@
1/*
2 * linux/fs/9p/vfs_dir.c
3 *
4 * This file contains vfs directory ops for the 9P2000 protocol.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/stat.h>
32#include <linux/string.h>
33#include <linux/smp_lock.h>
34#include <linux/inet.h>
35#include <linux/idr.h>
36
37#include "debug.h"
38#include "v9fs.h"
39#include "9p.h"
40#include "v9fs_vfs.h"
41#include "conv.h"
42#include "fid.h"
43
44/**
45 * dt_type - return file type
46 * @mistat: mistat structure
47 *
48 */
49
50static inline int dt_type(struct v9fs_stat *mistat)
51{
52 unsigned long perm = mistat->mode;
53 int rettype = DT_REG;
54
55 if (perm & V9FS_DMDIR)
56 rettype = DT_DIR;
57 if (perm & V9FS_DMSYMLINK)
58 rettype = DT_LNK;
59
60 return rettype;
61}
62
63/**
64 * v9fs_dir_readdir - read a directory
65 * @filep: opened file structure
66 * @dirent: directory structure ???
67 * @filldir: function to populate directory structure ???
68 *
69 */
70
71static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
72{
73 struct v9fs_fcall *fcall = NULL;
74 struct inode *inode = filp->f_dentry->d_inode;
75 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
76 struct v9fs_fid *file = filp->private_data;
77 unsigned int i, n;
78 int fid = -1;
79 int ret = 0;
80 struct v9fs_stat *mi = NULL;
81 int over = 0;
82
83 dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
84
85 fid = file->fid;
86
87 mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
88 if (!mi)
89 return -ENOMEM;
90
91 if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
92 kfree(file->rdir_fcall);
93 file->rdir_fcall = NULL;
94 }
95
96 if (file->rdir_fcall) {
97 n = file->rdir_fcall->params.rread.count;
98 i = file->rdir_fpos;
99 while (i < n) {
100 int s = v9fs_deserialize_stat(v9ses,
101 file->rdir_fcall->params.rread.data + i,
102 n - i, mi, v9ses->maxdata);
103
104 if (s == 0) {
105 dprintk(DEBUG_ERROR,
106 "error while deserializing mistat\n");
107 ret = -EIO;
108 goto FreeStructs;
109 }
110
111 over = filldir(dirent, mi->name, strlen(mi->name),
112 filp->f_pos, v9fs_qid2ino(&mi->qid),
113 dt_type(mi));
114
115 if (over) {
116 file->rdir_fpos = i;
117 file->rdir_pos = filp->f_pos;
118 break;
119 }
120
121 i += s;
122 filp->f_pos += s;
123 }
124
125 if (!over) {
126 kfree(file->rdir_fcall);
127 file->rdir_fcall = NULL;
128 }
129 }
130
131 while (!over) {
132 ret = v9fs_t_read(v9ses, fid, filp->f_pos,
133 v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
134 if (ret < 0) {
135 dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
136 ret, fcall);
137 goto FreeStructs;
138 } else if (ret == 0)
139 break;
140
141 n = ret;
142 i = 0;
143 while (i < n) {
144 int s = v9fs_deserialize_stat(v9ses,
145 fcall->params.rread.data + i, n - i, mi,
146 v9ses->maxdata);
147
148 if (s == 0) {
149 dprintk(DEBUG_ERROR,
150 "error while deserializing mistat\n");
151 return -EIO;
152 }
153
154 over = filldir(dirent, mi->name, strlen(mi->name),
155 filp->f_pos, v9fs_qid2ino(&mi->qid),
156 dt_type(mi));
157
158 if (over) {
159 file->rdir_fcall = fcall;
160 file->rdir_fpos = i;
161 file->rdir_pos = filp->f_pos;
162 fcall = NULL;
163 break;
164 }
165
166 i += s;
167 filp->f_pos += s;
168 }
169
170 kfree(fcall);
171 }
172
173 FreeStructs:
174 kfree(fcall);
175 kfree(mi);
176 return ret;
177}
178
179/**
180 * v9fs_dir_release - close a directory
181 * @inode: inode of the directory
182 * @filp: file pointer to a directory
183 *
184 */
185
186int v9fs_dir_release(struct inode *inode, struct file *filp)
187{
188 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
189 struct v9fs_fid *fid = filp->private_data;
190 int fidnum = -1;
191
192 dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp,
193 fid->fid);
194 fidnum = fid->fid;
195
196 filemap_fdatawrite(inode->i_mapping);
197 filemap_fdatawait(inode->i_mapping);
198
199 if (fidnum >= 0) {
200 fid->fidopen--;
201 dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
202 fid->fid);
203
204 if (fid->fidopen == 0) {
205 if (v9fs_t_clunk(v9ses, fidnum, NULL))
206 dprintk(DEBUG_ERROR, "clunk failed\n");
207
208 v9fs_put_idpool(fid->fid, &v9ses->fidpool);
209 }
210
211 kfree(fid->rdir_fcall);
212
213 filp->private_data = NULL;
214 v9fs_fid_destroy(fid);
215 }
216
217 d_drop(filp->f_dentry);
218 return 0;
219}
220
221struct file_operations v9fs_dir_operations = {
222 .read = generic_read_dir,
223 .readdir = v9fs_dir_readdir,
224 .open = v9fs_file_open,
225 .release = v9fs_dir_release,
226};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
new file mode 100644
index 000000000000..1f8ae7d580ab
--- /dev/null
+++ b/fs/9p/vfs_file.c
@@ -0,0 +1,401 @@
1/*
2 * linux/fs/9p/vfs_file.c
3 *
4 * This file contians vfs file ops for 9P2000.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/stat.h>
32#include <linux/string.h>
33#include <linux/smp_lock.h>
34#include <linux/inet.h>
35#include <linux/version.h>
36#include <linux/list.h>
37#include <asm/uaccess.h>
38#include <linux/idr.h>
39
40#include "debug.h"
41#include "v9fs.h"
42#include "9p.h"
43#include "v9fs_vfs.h"
44#include "fid.h"
45
46/**
47 * v9fs_file_open - open a file (or directory)
48 * @inode: inode to be opened
49 * @file: file being opened
50 *
51 */
52
53int v9fs_file_open(struct inode *inode, struct file *file)
54{
55 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
56 struct v9fs_fid *v9fid = v9fs_fid_lookup(file->f_dentry, FID_WALK);
57 struct v9fs_fid *v9newfid = NULL;
58 struct v9fs_fcall *fcall = NULL;
59 int open_mode = 0;
60 unsigned int iounit = 0;
61 int newfid = -1;
62 long result = -1;
63
64 dprintk(DEBUG_VFS, "inode: %p file: %p v9fid= %p\n", inode, file,
65 v9fid);
66
67 if (!v9fid) {
68 struct dentry *dentry = file->f_dentry;
69 dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n");
70
71 /* XXX - some duplication from lookup, generalize later */
72 /* basically vfs_lookup is too heavy weight */
73 v9fid = v9fs_fid_lookup(file->f_dentry, FID_OP);
74 if (!v9fid)
75 return -EBADF;
76
77 v9fid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
78 if (!v9fid)
79 return -EBADF;
80
81 newfid = v9fs_get_idpool(&v9ses->fidpool);
82 if (newfid < 0) {
83 eprintk(KERN_WARNING, "newfid fails!\n");
84 return -ENOSPC;
85 }
86
87 result =
88 v9fs_t_walk(v9ses, v9fid->fid, newfid,
89 (char *)file->f_dentry->d_name.name, NULL);
90 if (result < 0) {
91 v9fs_put_idpool(newfid, &v9ses->fidpool);
92 dprintk(DEBUG_ERROR, "rewalk didn't work\n");
93 return -EBADF;
94 }
95
96 v9fid = v9fs_fid_create(dentry);
97 if (v9fid == NULL) {
98 dprintk(DEBUG_ERROR, "couldn't insert\n");
99 return -ENOMEM;
100 }
101 v9fid->fid = newfid;
102 }
103
104 if (v9fid->fidcreate) {
105 /* create case */
106 newfid = v9fid->fid;
107 iounit = v9fid->iounit;
108 v9fid->fidcreate = 0;
109 } else {
110 if (!S_ISDIR(inode->i_mode))
111 newfid = v9fid->fid;
112 else {
113 newfid = v9fs_get_idpool(&v9ses->fidpool);
114 if (newfid < 0) {
115 eprintk(KERN_WARNING, "allocation failed\n");
116 return -ENOSPC;
117 }
118 /* This would be a somewhat critical clone */
119 result =
120 v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL,
121 &fcall);
122 if (result < 0) {
123 dprintk(DEBUG_ERROR, "clone error: %s\n",
124 FCALL_ERROR(fcall));
125 kfree(fcall);
126 return result;
127 }
128
129 v9newfid = v9fs_fid_create(file->f_dentry);
130 v9newfid->fid = newfid;
131 v9newfid->qid = v9fid->qid;
132 v9newfid->iounit = v9fid->iounit;
133 v9newfid->fidopen = 0;
134 v9newfid->fidclunked = 0;
135 v9newfid->v9ses = v9ses;
136 v9fid = v9newfid;
137 kfree(fcall);
138 }
139
140 /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
141 /* translate open mode appropriately */
142 open_mode = file->f_flags & 0x3;
143
144 if (file->f_flags & O_EXCL)
145 open_mode |= V9FS_OEXCL;
146
147 if (v9ses->extended) {
148 if (file->f_flags & O_TRUNC)
149 open_mode |= V9FS_OTRUNC;
150
151 if (file->f_flags & O_APPEND)
152 open_mode |= V9FS_OAPPEND;
153 }
154
155 result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
156 if (result < 0) {
157 dprintk(DEBUG_ERROR,
158 "open failed, open_mode 0x%x: %s\n", open_mode,
159 FCALL_ERROR(fcall));
160 kfree(fcall);
161 return result;
162 }
163
164 iounit = fcall->params.ropen.iounit;
165 kfree(fcall);
166 }
167
168
169 file->private_data = v9fid;
170
171 v9fid->rdir_pos = 0;
172 v9fid->rdir_fcall = NULL;
173 v9fid->fidopen = 1;
174 v9fid->filp = file;
175 v9fid->iounit = iounit;
176
177 return 0;
178}
179
180/**
181 * v9fs_file_lock - lock a file (or directory)
182 * @inode: inode to be opened
183 * @file: file being opened
184 *
185 * XXX - this looks like a local only lock, we should extend into 9P
186 * by using open exclusive
187 */
188
189static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
190{
191 int res = 0;
192 struct inode *inode = filp->f_dentry->d_inode;
193
194 dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
195
196 /* No mandatory locks */
197 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
198 return -ENOLCK;
199
200 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
201 filemap_fdatawrite(inode->i_mapping);
202 filemap_fdatawait(inode->i_mapping);
203 invalidate_inode_pages(&inode->i_data);
204 }
205
206 return res;
207}
208
209/**
210 * v9fs_read - read from a file (internal)
211 * @filep: file pointer to read
212 * @data: data buffer to read data into
213 * @count: size of buffer
214 * @offset: offset at which to read data
215 *
216 */
217
218static ssize_t
219v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset)
220{
221 struct inode *inode = filp->f_dentry->d_inode;
222 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
223 struct v9fs_fid *v9f = filp->private_data;
224 struct v9fs_fcall *fcall = NULL;
225 int fid = v9f->fid;
226 int rsize = 0;
227 int result = 0;
228 int total = 0;
229
230 dprintk(DEBUG_VFS, "\n");
231
232 rsize = v9ses->maxdata - V9FS_IOHDRSZ;
233 if (v9f->iounit != 0 && rsize > v9f->iounit)
234 rsize = v9f->iounit;
235
236 do {
237 if (count < rsize)
238 rsize = count;
239
240 result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall);
241
242 if (result < 0) {
243 printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n",
244 result);
245
246 kfree(fcall);
247 return total;
248 } else
249 *offset += result;
250
251 /* XXX - extra copy */
252 memcpy(buffer, fcall->params.rread.data, result);
253 count -= result;
254 buffer += result;
255 total += result;
256
257 kfree(fcall);
258
259 if (result < rsize)
260 break;
261 } while (count);
262
263 return total;
264}
265
266/**
267 * v9fs_file_read - read from a file
268 * @filep: file pointer to read
269 * @data: data buffer to read data into
270 * @count: size of buffer
271 * @offset: offset at which to read data
272 *
273 */
274
275static ssize_t
276v9fs_file_read(struct file *filp, char __user * data, size_t count,
277 loff_t * offset)
278{
279 int retval = -1;
280 int ret = 0;
281 char *buffer;
282
283 buffer = kmalloc(count, GFP_KERNEL);
284 if (!buffer)
285 return -ENOMEM;
286
287 retval = v9fs_read(filp, buffer, count, offset);
288 if (retval > 0) {
289 if ((ret = copy_to_user(data, buffer, retval)) != 0) {
290 dprintk(DEBUG_ERROR, "Problem copying to user %d\n",
291 ret);
292 retval = ret;
293 }
294 }
295
296 kfree(buffer);
297
298 return retval;
299}
300
301/**
302 * v9fs_write - write to a file
303 * @filep: file pointer to write
304 * @data: data buffer to write data from
305 * @count: size of buffer
306 * @offset: offset at which to write data
307 *
308 */
309
310static ssize_t
311v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset)
312{
313 struct inode *inode = filp->f_dentry->d_inode;
314 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
315 struct v9fs_fid *v9fid = filp->private_data;
316 struct v9fs_fcall *fcall;
317 int fid = v9fid->fid;
318 int result = -EIO;
319 int rsize = 0;
320 int total = 0;
321
322 dprintk(DEBUG_VFS, "data %p count %d offset %x\n", buffer, (int)count,
323 (int)*offset);
324 rsize = v9ses->maxdata - V9FS_IOHDRSZ;
325 if (v9fid->iounit != 0 && rsize > v9fid->iounit)
326 rsize = v9fid->iounit;
327
328 dump_data(buffer, count);
329
330 do {
331 if (count < rsize)
332 rsize = count;
333
334 result =
335 v9fs_t_write(v9ses, fid, *offset, rsize, buffer, &fcall);
336 if (result < 0) {
337 eprintk(KERN_ERR, "error while writing: %s(%d)\n",
338 FCALL_ERROR(fcall), result);
339 kfree(fcall);
340 return result;
341 } else
342 *offset += result;
343
344 kfree(fcall);
345
346 if (result != rsize) {
347 eprintk(KERN_ERR,
348 "short write: v9fs_t_write returned %d\n",
349 result);
350 break;
351 }
352
353 count -= result;
354 buffer += result;
355 total += result;
356 } while (count);
357
358 return total;
359}
360
361/**
362 * v9fs_file_write - write to a file
363 * @filep: file pointer to write
364 * @data: data buffer to write data from
365 * @count: size of buffer
366 * @offset: offset at which to write data
367 *
368 */
369
370static ssize_t
371v9fs_file_write(struct file *filp, const char __user * data,
372 size_t count, loff_t * offset)
373{
374 int ret = -1;
375 char *buffer;
376
377 buffer = kmalloc(count, GFP_KERNEL);
378 if (buffer == NULL)
379 return -ENOMEM;
380
381 ret = copy_from_user(buffer, data, count);
382 if (ret) {
383 dprintk(DEBUG_ERROR, "Problem copying from user\n");
384 ret = -EFAULT;
385 } else {
386 ret = v9fs_write(filp, buffer, count, offset);
387 }
388
389 kfree(buffer);
390
391 return ret;
392}
393
394struct file_operations v9fs_file_operations = {
395 .llseek = generic_file_llseek,
396 .read = v9fs_file_read,
397 .write = v9fs_file_write,
398 .open = v9fs_file_open,
399 .release = v9fs_dir_release,
400 .lock = v9fs_file_lock,
401};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
new file mode 100644
index 000000000000..0c13fc600049
--- /dev/null
+++ b/fs/9p/vfs_inode.c
@@ -0,0 +1,1338 @@
1/*
2 * linux/fs/9p/vfs_inode.c
3 *
4 * This file contains vfs inode ops for the 9P2000 protocol.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/pagemap.h>
32#include <linux/stat.h>
33#include <linux/string.h>
34#include <linux/smp_lock.h>
35#include <linux/inet.h>
36#include <linux/namei.h>
37#include <linux/idr.h>
38
39#include "debug.h"
40#include "v9fs.h"
41#include "9p.h"
42#include "v9fs_vfs.h"
43#include "conv.h"
44#include "fid.h"
45
46static struct inode_operations v9fs_dir_inode_operations;
47static struct inode_operations v9fs_dir_inode_operations_ext;
48static struct inode_operations v9fs_file_inode_operations;
49static struct inode_operations v9fs_symlink_inode_operations;
50
51/**
52 * unixmode2p9mode - convert unix mode bits to plan 9
53 * @v9ses: v9fs session information
54 * @mode: mode to convert
55 *
56 */
57
58static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
59{
60 int res;
61 res = mode & 0777;
62 if (S_ISDIR(mode))
63 res |= V9FS_DMDIR;
64 if (v9ses->extended) {
65 if (S_ISLNK(mode))
66 res |= V9FS_DMSYMLINK;
67 if (v9ses->nodev == 0) {
68 if (S_ISSOCK(mode))
69 res |= V9FS_DMSOCKET;
70 if (S_ISFIFO(mode))
71 res |= V9FS_DMNAMEDPIPE;
72 if (S_ISBLK(mode))
73 res |= V9FS_DMDEVICE;
74 if (S_ISCHR(mode))
75 res |= V9FS_DMDEVICE;
76 }
77
78 if ((mode & S_ISUID) == S_ISUID)
79 res |= V9FS_DMSETUID;
80 if ((mode & S_ISGID) == S_ISGID)
81 res |= V9FS_DMSETGID;
82 if ((mode & V9FS_DMLINK))
83 res |= V9FS_DMLINK;
84 }
85
86 return res;
87}
88
89/**
90 * p9mode2unixmode- convert plan9 mode bits to unix mode bits
91 * @v9ses: v9fs session information
92 * @mode: mode to convert
93 *
94 */
95
96static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
97{
98 int res;
99
100 res = mode & 0777;
101
102 if ((mode & V9FS_DMDIR) == V9FS_DMDIR)
103 res |= S_IFDIR;
104 else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended))
105 res |= S_IFLNK;
106 else if ((mode & V9FS_DMSOCKET) && (v9ses->extended)
107 && (v9ses->nodev == 0))
108 res |= S_IFSOCK;
109 else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended)
110 && (v9ses->nodev == 0))
111 res |= S_IFIFO;
112 else if ((mode & V9FS_DMDEVICE) && (v9ses->extended)
113 && (v9ses->nodev == 0))
114 res |= S_IFBLK;
115 else
116 res |= S_IFREG;
117
118 if (v9ses->extended) {
119 if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID)
120 res |= S_ISUID;
121
122 if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID)
123 res |= S_ISGID;
124 }
125
126 return res;
127}
128
129/**
130 * v9fs_blank_mistat - helper function to setup a 9P stat structure
131 * @v9ses: 9P session info (for determining extended mode)
132 * @mistat: structure to initialize
133 *
134 */
135
136static void
137v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
138{
139 mistat->type = ~0;
140 mistat->dev = ~0;
141 mistat->qid.type = ~0;
142 mistat->qid.version = ~0;
143 *((long long *)&mistat->qid.path) = ~0;
144 mistat->mode = ~0;
145 mistat->atime = ~0;
146 mistat->mtime = ~0;
147 mistat->length = ~0;
148 mistat->name = mistat->data;
149 mistat->uid = mistat->data;
150 mistat->gid = mistat->data;
151 mistat->muid = mistat->data;
152 if (v9ses->extended) {
153 mistat->n_uid = ~0;
154 mistat->n_gid = ~0;
155 mistat->n_muid = ~0;
156 mistat->extension = mistat->data;
157 }
158 *mistat->data = 0;
159}
160
161/**
162 * v9fs_mistat2unix - convert mistat to unix stat
163 * @mistat: Plan 9 metadata (mistat) structure
164 * @buf: unix metadata (stat) structure to populate
165 * @sb: superblock
166 *
167 */
168
169static void
170v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
171 struct super_block *sb)
172{
173 struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
174
175 buf->st_nlink = 1;
176
177 buf->st_atime = mistat->atime;
178 buf->st_mtime = mistat->mtime;
179 buf->st_ctime = mistat->mtime;
180
181 buf->st_uid = (unsigned short)-1;
182 buf->st_gid = (unsigned short)-1;
183
184 if (v9ses && v9ses->extended) {
185 /* TODO: string to uid mapping via user-space daemon */
186 if (mistat->n_uid != -1)
187 sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
188
189 if (mistat->n_gid != -1)
190 sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
191 }
192
193 if (buf->st_uid == (unsigned short)-1)
194 buf->st_uid = v9ses->uid;
195 if (buf->st_gid == (unsigned short)-1)
196 buf->st_gid = v9ses->gid;
197
198 buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
199 if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
200 char type = 0;
201 int major = -1;
202 int minor = -1;
203 sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
204 switch (type) {
205 case 'c':
206 buf->st_mode &= ~S_IFBLK;
207 buf->st_mode |= S_IFCHR;
208 break;
209 case 'b':
210 break;
211 default:
212 dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
213 type, mistat->extension);
214 };
215 buf->st_rdev = MKDEV(major, minor);
216 } else
217 buf->st_rdev = 0;
218
219 buf->st_size = mistat->length;
220
221 buf->st_blksize = sb->s_blocksize;
222 buf->st_blocks =
223 (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
224}
225
226/**
227 * v9fs_get_inode - helper function to setup an inode
228 * @sb: superblock
229 * @mode: mode to setup inode with
230 *
231 */
232
233struct inode *v9fs_get_inode(struct super_block *sb, int mode)
234{
235 struct inode *inode = NULL;
236 struct v9fs_session_info *v9ses = sb->s_fs_info;
237
238 dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
239
240 inode = new_inode(sb);
241 if (inode) {
242 inode->i_mode = mode;
243 inode->i_uid = current->fsuid;
244 inode->i_gid = current->fsgid;
245 inode->i_blksize = sb->s_blocksize;
246 inode->i_blocks = 0;
247 inode->i_rdev = 0;
248 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
249
250 switch (mode & S_IFMT) {
251 case S_IFIFO:
252 case S_IFBLK:
253 case S_IFCHR:
254 case S_IFSOCK:
255 if(!v9ses->extended) {
256 dprintk(DEBUG_ERROR, "special files without extended mode\n");
257 return ERR_PTR(-EINVAL);
258 }
259 init_special_inode(inode, inode->i_mode,
260 inode->i_rdev);
261 break;
262 case S_IFREG:
263 inode->i_op = &v9fs_file_inode_operations;
264 inode->i_fop = &v9fs_file_operations;
265 break;
266 case S_IFLNK:
267 if(!v9ses->extended) {
268 dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n");
269 return ERR_PTR(-EINVAL);
270 }
271 inode->i_op = &v9fs_symlink_inode_operations;
272 break;
273 case S_IFDIR:
274 inode->i_nlink++;
275 if(v9ses->extended)
276 inode->i_op = &v9fs_dir_inode_operations_ext;
277 else
278 inode->i_op = &v9fs_dir_inode_operations;
279 inode->i_fop = &v9fs_dir_operations;
280 break;
281 default:
282 dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
283 mode, mode & S_IFMT);
284 return ERR_PTR(-EINVAL);
285 }
286 } else {
287 eprintk(KERN_WARNING, "Problem allocating inode\n");
288 return ERR_PTR(-ENOMEM);
289 }
290 return inode;
291}
292
293/**
294 * v9fs_create - helper function to create files and directories
295 * @dir: directory inode file is being created in
296 * @file_dentry: dentry file is being created in
297 * @perm: permissions file is being created with
298 * @open_mode: resulting open mode for file
299 *
300 */
301
302static int
303v9fs_create(struct inode *dir,
304 struct dentry *file_dentry,
305 unsigned int perm, unsigned int open_mode)
306{
307 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
308 struct super_block *sb = dir->i_sb;
309 struct v9fs_fid *dirfid =
310 v9fs_fid_lookup(file_dentry->d_parent, FID_WALK);
311 struct v9fs_fid *fid = NULL;
312 struct inode *file_inode = NULL;
313 struct v9fs_fcall *fcall = NULL;
314 struct v9fs_qid qid;
315 struct stat newstat;
316 int dirfidnum = -1;
317 long newfid = -1;
318 int result = 0;
319 unsigned int iounit = 0;
320
321 perm = unixmode2p9mode(v9ses, perm);
322
323 dprintk(DEBUG_VFS, "dir: %p dentry: %p perm: %o mode: %o\n", dir,
324 file_dentry, perm, open_mode);
325
326 if (!dirfid)
327 return -EBADF;
328
329 dirfidnum = dirfid->fid;
330 if (dirfidnum < 0) {
331 dprintk(DEBUG_ERROR, "No fid for the directory #%lu\n",
332 dir->i_ino);
333 return -EBADF;
334 }
335
336 if (file_dentry->d_inode) {
337 dprintk(DEBUG_ERROR,
338 "Odd. There is an inode for dir %lu, name :%s:\n",
339 dir->i_ino, file_dentry->d_name.name);
340 return -EEXIST;
341 }
342
343 newfid = v9fs_get_idpool(&v9ses->fidpool);
344 if (newfid < 0) {
345 eprintk(KERN_WARNING, "no free fids available\n");
346 return -ENOSPC;
347 }
348
349 result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
350 if (result < 0) {
351 dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
352 v9fs_put_idpool(newfid, &v9ses->fidpool);
353 newfid = 0;
354 goto CleanUpFid;
355 }
356
357 kfree(fcall);
358
359 result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
360 perm, open_mode, &fcall);
361 if (result < 0) {
362 dprintk(DEBUG_ERROR, "create fails: %s(%d)\n",
363 FCALL_ERROR(fcall), result);
364
365 goto CleanUpFid;
366 }
367
368 iounit = fcall->params.rcreate.iounit;
369 qid = fcall->params.rcreate.qid;
370 kfree(fcall);
371
372 fid = v9fs_fid_create(file_dentry);
373 if (!fid) {
374 result = -ENOMEM;
375 goto CleanUpFid;
376 }
377
378 fid->fid = newfid;
379 fid->fidopen = 0;
380 fid->fidcreate = 1;
381 fid->qid = qid;
382 fid->iounit = iounit;
383 fid->rdir_pos = 0;
384 fid->rdir_fcall = NULL;
385 fid->v9ses = v9ses;
386
387 if ((perm & V9FS_DMSYMLINK) || (perm & V9FS_DMLINK) ||
388 (perm & V9FS_DMNAMEDPIPE) || (perm & V9FS_DMSOCKET) ||
389 (perm & V9FS_DMDEVICE))
390 return 0;
391
392 result = v9fs_t_stat(v9ses, newfid, &fcall);
393 if (result < 0) {
394 dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall),
395 result);
396 goto CleanUpFid;
397 }
398
399 v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
400
401 file_inode = v9fs_get_inode(sb, newstat.st_mode);
402 if ((!file_inode) || IS_ERR(file_inode)) {
403 dprintk(DEBUG_ERROR, "create inode failed\n");
404 result = -EBADF;
405 goto CleanUpFid;
406 }
407
408 v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb);
409 kfree(fcall);
410 d_instantiate(file_dentry, file_inode);
411
412 if (perm & V9FS_DMDIR) {
413 if (v9fs_t_clunk(v9ses, newfid, &fcall))
414 dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
415 FCALL_ERROR(fcall));
416
417 v9fs_put_idpool(newfid, &v9ses->fidpool);
418 kfree(fcall);
419 fid->fidopen = 0;
420 fid->fidcreate = 0;
421 d_drop(file_dentry);
422 }
423
424 return 0;
425
426 CleanUpFid:
427 kfree(fcall);
428
429 if (newfid) {
430 if (v9fs_t_clunk(v9ses, newfid, &fcall))
431 dprintk(DEBUG_ERROR, "clunk failed: %s\n",
432 FCALL_ERROR(fcall));
433
434 v9fs_put_idpool(newfid, &v9ses->fidpool);
435 kfree(fcall);
436 }
437 return result;
438}
439
440/**
441 * v9fs_remove - helper function to remove files and directories
442 * @dir: directory inode that is being deleted
443 * @file: dentry that is being deleted
444 * @rmdir: removing a directory
445 *
446 */
447
448static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
449{
450 struct v9fs_fcall *fcall = NULL;
451 struct super_block *sb = NULL;
452 struct v9fs_session_info *v9ses = NULL;
453 struct v9fs_fid *v9fid = NULL;
454 struct inode *file_inode = NULL;
455 int fid = -1;
456 int result = 0;
457
458 dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
459 rmdir);
460
461 file_inode = file->d_inode;
462 sb = file_inode->i_sb;
463 v9ses = v9fs_inode2v9ses(file_inode);
464 v9fid = v9fs_fid_lookup(file, FID_OP);
465
466 if (!v9fid) {
467 dprintk(DEBUG_ERROR,
468 "no v9fs_fid\n");
469 return -EBADF;
470 }
471
472 fid = v9fid->fid;
473 if (fid < 0) {
474 dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n",
475 file_inode->i_ino);
476 return -EBADF;
477 }
478
479 result = v9fs_t_remove(v9ses, fid, &fcall);
480 if (result < 0)
481 dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n",
482 FCALL_ERROR(fcall), result);
483 else {
484 v9fs_put_idpool(fid, &v9ses->fidpool);
485 v9fs_fid_destroy(v9fid);
486 }
487
488 kfree(fcall);
489 return result;
490}
491
492/**
493 * v9fs_vfs_create - VFS hook to create files
494 * @inode: directory inode that is being deleted
495 * @dentry: dentry that is being deleted
496 * @perm: create permissions
497 * @nd: path information
498 *
499 */
500
501static int
502v9fs_vfs_create(struct inode *inode, struct dentry *dentry, int perm,
503 struct nameidata *nd)
504{
505 return v9fs_create(inode, dentry, perm, O_RDWR);
506}
507
508/**
509 * v9fs_vfs_mkdir - VFS mkdir hook to create a directory
510 * @inode: inode that is being unlinked
511 * @dentry: dentry that is being unlinked
512 * @mode: mode for new directory
513 *
514 */
515
516static int v9fs_vfs_mkdir(struct inode *inode, struct dentry *dentry, int mode)
517{
518 return v9fs_create(inode, dentry, mode | S_IFDIR, O_RDONLY);
519}
520
521/**
522 * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
523 * @dir: inode that is being walked from
524 * @dentry: dentry that is being walked to?
525 * @nameidata: path data
526 *
527 */
528
529static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
530 struct nameidata *nameidata)
531{
532 struct super_block *sb;
533 struct v9fs_session_info *v9ses;
534 struct v9fs_fid *dirfid;
535 struct v9fs_fid *fid;
536 struct inode *inode;
537 struct v9fs_fcall *fcall = NULL;
538 struct stat newstat;
539 int dirfidnum = -1;
540 int newfid = -1;
541 int result = 0;
542
543 dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
544 dir, dentry->d_iname, dentry, nameidata);
545
546 sb = dir->i_sb;
547 v9ses = v9fs_inode2v9ses(dir);
548 dirfid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
549
550 if (!dirfid) {
551 dprintk(DEBUG_ERROR, "no dirfid\n");
552 return ERR_PTR(-EINVAL);
553 }
554
555 dirfidnum = dirfid->fid;
556
557 if (dirfidnum < 0) {
558 dprintk(DEBUG_ERROR, "no dirfid for inode %p, #%lu\n",
559 dir, dir->i_ino);
560 return ERR_PTR(-EBADF);
561 }
562
563 newfid = v9fs_get_idpool(&v9ses->fidpool);
564 if (newfid < 0) {
565 eprintk(KERN_WARNING, "newfid fails!\n");
566 return ERR_PTR(-ENOSPC);
567 }
568
569 result =
570 v9fs_t_walk(v9ses, dirfidnum, newfid, (char *)dentry->d_name.name,
571 NULL);
572 if (result < 0) {
573 v9fs_put_idpool(newfid, &v9ses->fidpool);
574 if (result == -ENOENT) {
575 d_add(dentry, NULL);
576 dprintk(DEBUG_ERROR,
577 "Return negative dentry %p count %d\n",
578 dentry, atomic_read(&dentry->d_count));
579 return NULL;
580 }
581 dprintk(DEBUG_ERROR, "walk error:%d\n", result);
582 goto FreeFcall;
583 }
584
585 result = v9fs_t_stat(v9ses, newfid, &fcall);
586 if (result < 0) {
587 dprintk(DEBUG_ERROR, "stat error\n");
588 goto FreeFcall;
589 }
590
591 v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
592 inode = v9fs_get_inode(sb, newstat.st_mode);
593
594 if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
595 eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
596 PTR_ERR(inode));
597
598 result = -ENOSPC;
599 goto FreeFcall;
600 }
601
602 inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid);
603
604 fid = v9fs_fid_create(dentry);
605 if (fid == NULL) {
606 dprintk(DEBUG_ERROR, "couldn't insert\n");
607 result = -ENOMEM;
608 goto FreeFcall;
609 }
610
611 fid->fid = newfid;
612 fid->fidopen = 0;
613 fid->v9ses = v9ses;
614 fid->qid = fcall->params.rstat.stat->qid;
615
616 dentry->d_op = &v9fs_dentry_operations;
617 v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb);
618
619 d_add(dentry, inode);
620 kfree(fcall);
621
622 return NULL;
623
624 FreeFcall:
625 kfree(fcall);
626 return ERR_PTR(result);
627}
628
629/**
630 * v9fs_vfs_unlink - VFS unlink hook to delete an inode
631 * @i: inode that is being unlinked
632 * @d: dentry that is being unlinked
633 *
634 */
635
636static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
637{
638 return v9fs_remove(i, d, 0);
639}
640
641/**
642 * v9fs_vfs_rmdir - VFS unlink hook to delete a directory
643 * @i: inode that is being unlinked
644 * @d: dentry that is being unlinked
645 *
646 */
647
648static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
649{
650 return v9fs_remove(i, d, 1);
651}
652
653/**
654 * v9fs_vfs_rename - VFS hook to rename an inode
655 * @old_dir: old dir inode
656 * @old_dentry: old dentry
657 * @new_dir: new dir inode
658 * @new_dentry: new dentry
659 *
660 */
661
662static int
663v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
664 struct inode *new_dir, struct dentry *new_dentry)
665{
666 struct inode *old_inode = old_dentry->d_inode;
667 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode);
668 struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_WALK);
669 struct v9fs_fid *olddirfid =
670 v9fs_fid_lookup(old_dentry->d_parent, FID_WALK);
671 struct v9fs_fid *newdirfid =
672 v9fs_fid_lookup(new_dentry->d_parent, FID_WALK);
673 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
674 struct v9fs_fcall *fcall = NULL;
675 int fid = -1;
676 int olddirfidnum = -1;
677 int newdirfidnum = -1;
678 int retval = 0;
679
680 dprintk(DEBUG_VFS, "\n");
681
682 if (!mistat)
683 return -ENOMEM;
684
685 if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
686 dprintk(DEBUG_ERROR, "problem with arguments\n");
687 return -EBADF;
688 }
689
690 /* 9P can only handle file rename in the same directory */
691 if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
692 dprintk(DEBUG_ERROR, "old dir and new dir are different\n");
693 retval = -EPERM;
694 goto FreeFcallnBail;
695 }
696
697 fid = oldfid->fid;
698 olddirfidnum = olddirfid->fid;
699 newdirfidnum = newdirfid->fid;
700
701 if (fid < 0) {
702 dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
703 old_inode->i_ino);
704 retval = -EBADF;
705 goto FreeFcallnBail;
706 }
707
708 v9fs_blank_mistat(v9ses, mistat);
709
710 strcpy(mistat->data + 1, v9ses->name);
711 mistat->name = mistat->data + 1 + strlen(v9ses->name);
712
713 if (new_dentry->d_name.len >
714 (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
715 dprintk(DEBUG_ERROR, "new name too long\n");
716 goto FreeFcallnBail;
717 }
718
719 strcpy(mistat->name, new_dentry->d_name.name);
720 retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
721
722 FreeFcallnBail:
723 kfree(mistat);
724
725 if (retval < 0)
726 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
727 FCALL_ERROR(fcall));
728
729 kfree(fcall);
730 return retval;
731}
732
733/**
734 * v9fs_vfs_getattr - retreive file metadata
735 * @mnt - mount information
736 * @dentry - file to get attributes on
737 * @stat - metadata structure to populate
738 *
739 */
740
741static int
742v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
743 struct kstat *stat)
744{
745 struct v9fs_fcall *fcall = NULL;
746 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
747 struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
748 int err = -EPERM;
749
750 dprintk(DEBUG_VFS, "dentry: %p\n", dentry);
751 if (!fid) {
752 dprintk(DEBUG_ERROR,
753 "couldn't find fid associated with dentry\n");
754 return -EBADF;
755 }
756
757 err = v9fs_t_stat(v9ses, fid->fid, &fcall);
758
759 if (err < 0)
760 dprintk(DEBUG_ERROR, "stat error\n");
761 else {
762 v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode,
763 dentry->d_inode->i_sb);
764 generic_fillattr(dentry->d_inode, stat);
765 }
766
767 kfree(fcall);
768 return err;
769}
770
771/**
772 * v9fs_vfs_setattr - set file metadata
773 * @dentry: file whose metadata to set
774 * @iattr: metadata assignment structure
775 *
776 */
777
778static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
779{
780 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
781 struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
782 struct v9fs_fcall *fcall = NULL;
783 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
784 int res = -EPERM;
785
786 dprintk(DEBUG_VFS, "\n");
787
788 if (!mistat)
789 return -ENOMEM;
790
791 if (!fid) {
792 dprintk(DEBUG_ERROR,
793 "Couldn't find fid associated with dentry\n");
794 return -EBADF;
795 }
796
797 v9fs_blank_mistat(v9ses, mistat);
798 if (iattr->ia_valid & ATTR_MODE)
799 mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode);
800
801 if (iattr->ia_valid & ATTR_MTIME)
802 mistat->mtime = iattr->ia_mtime.tv_sec;
803
804 if (iattr->ia_valid & ATTR_ATIME)
805 mistat->atime = iattr->ia_atime.tv_sec;
806
807 if (iattr->ia_valid & ATTR_SIZE)
808 mistat->length = iattr->ia_size;
809
810 if (v9ses->extended) {
811 char *ptr = mistat->data+1;
812
813 if (iattr->ia_valid & ATTR_UID) {
814 mistat->uid = ptr;
815 ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
816 mistat->n_uid = iattr->ia_uid;
817 }
818
819 if (iattr->ia_valid & ATTR_GID) {
820 mistat->gid = ptr;
821 ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
822 mistat->n_gid = iattr->ia_gid;
823 }
824 }
825
826 res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall);
827
828 if (res < 0)
829 dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall));
830
831 kfree(mistat);
832 kfree(fcall);
833
834 if (res >= 0)
835 res = inode_setattr(dentry->d_inode, iattr);
836
837 return res;
838}
839
840/**
841 * v9fs_mistat2inode - populate an inode structure with mistat info
842 * @mistat: Plan 9 metadata (mistat) structure
843 * @inode: inode to populate
844 * @sb: superblock of filesystem
845 *
846 */
847
848void
849v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
850 struct super_block *sb)
851{
852 struct v9fs_session_info *v9ses = sb->s_fs_info;
853
854 inode->i_nlink = 1;
855
856 inode->i_atime.tv_sec = mistat->atime;
857 inode->i_mtime.tv_sec = mistat->mtime;
858 inode->i_ctime.tv_sec = mistat->mtime;
859
860 inode->i_uid = -1;
861 inode->i_gid = -1;
862
863 if (v9ses->extended) {
864 /* TODO: string to uid mapping via user-space daemon */
865 inode->i_uid = mistat->n_uid;
866 inode->i_gid = mistat->n_gid;
867
868 if (mistat->n_uid == -1)
869 sscanf(mistat->uid, "%x", &inode->i_uid);
870
871 if (mistat->n_gid == -1)
872 sscanf(mistat->gid, "%x", &inode->i_gid);
873 }
874
875 if (inode->i_uid == -1)
876 inode->i_uid = v9ses->uid;
877 if (inode->i_gid == -1)
878 inode->i_gid = v9ses->gid;
879
880 inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
881 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
882 char type = 0;
883 int major = -1;
884 int minor = -1;
885 sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
886 switch (type) {
887 case 'c':
888 inode->i_mode &= ~S_IFBLK;
889 inode->i_mode |= S_IFCHR;
890 break;
891 case 'b':
892 break;
893 default:
894 dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
895 type, mistat->extension);
896 };
897 inode->i_rdev = MKDEV(major, minor);
898 } else
899 inode->i_rdev = 0;
900
901 inode->i_size = mistat->length;
902
903 inode->i_blksize = sb->s_blocksize;
904 inode->i_blocks =
905 (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits;
906}
907
908/**
909 * v9fs_qid2ino - convert qid into inode number
910 * @qid: qid to hash
911 *
912 * BUG: potential for inode number collisions?
913 */
914
915ino_t v9fs_qid2ino(struct v9fs_qid *qid)
916{
917 u64 path = qid->path + 2;
918 ino_t i = 0;
919
920 if (sizeof(ino_t) == sizeof(path))
921 memcpy(&i, &path, sizeof(ino_t));
922 else
923 i = (ino_t) (path ^ (path >> 32));
924
925 return i;
926}
927
928/**
929 * v9fs_vfs_symlink - helper function to create symlinks
930 * @dir: directory inode containing symlink
931 * @dentry: dentry for symlink
932 * @symname: symlink data
933 *
934 * See 9P2000.u RFC for more information
935 *
936 */
937
938static int
939v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
940{
941 int retval = -EPERM;
942 struct v9fs_fid *newfid;
943 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
944 struct v9fs_fcall *fcall = NULL;
945 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
946
947 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
948 symname);
949
950 if (!mistat)
951 return -ENOMEM;
952
953 if (!v9ses->extended) {
954 dprintk(DEBUG_ERROR, "not extended\n");
955 goto FreeFcall;
956 }
957
958 /* issue a create */
959 retval = v9fs_create(dir, dentry, S_IFLNK, 0);
960 if (retval != 0)
961 goto FreeFcall;
962
963 newfid = v9fs_fid_lookup(dentry, FID_OP);
964
965 /* issue a twstat */
966 v9fs_blank_mistat(v9ses, mistat);
967 strcpy(mistat->data + 1, symname);
968 mistat->extension = mistat->data + 1;
969 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
970 if (retval < 0) {
971 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
972 FCALL_ERROR(fcall));
973 goto FreeFcall;
974 }
975
976 kfree(fcall);
977
978 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
979 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
980 FCALL_ERROR(fcall));
981 goto FreeFcall;
982 }
983
984 d_drop(dentry); /* FID - will this also clunk? */
985
986 FreeFcall:
987 kfree(mistat);
988 kfree(fcall);
989
990 return retval;
991}
992
993/**
994 * v9fs_readlink - read a symlink's location (internal version)
995 * @dentry: dentry for symlink
996 * @buffer: buffer to load symlink location into
997 * @buflen: length of buffer
998 *
999 */
1000
1001static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1002{
1003 int retval = -EPERM;
1004
1005 struct v9fs_fcall *fcall = NULL;
1006 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
1007 struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
1008
1009 if (!fid) {
1010 dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n");
1011 retval = -EBADF;
1012 goto FreeFcall;
1013 }
1014
1015 if (!v9ses->extended) {
1016 retval = -EBADF;
1017 dprintk(DEBUG_ERROR, "not extended\n");
1018 goto FreeFcall;
1019 }
1020
1021 dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
1022 retval = v9fs_t_stat(v9ses, fid->fid, &fcall);
1023
1024 if (retval < 0) {
1025 dprintk(DEBUG_ERROR, "stat error\n");
1026 goto FreeFcall;
1027 }
1028
1029 if (!fcall)
1030 return -EIO;
1031
1032 if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) {
1033 retval = -EINVAL;
1034 goto FreeFcall;
1035 }
1036
1037 /* copy extension buffer into buffer */
1038 if (strlen(fcall->params.rstat.stat->extension) < buflen)
1039 buflen = strlen(fcall->params.rstat.stat->extension);
1040
1041 memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1);
1042
1043 retval = buflen;
1044
1045 FreeFcall:
1046 kfree(fcall);
1047
1048 return retval;
1049}
1050
1051/**
1052 * v9fs_vfs_readlink - read a symlink's location
1053 * @dentry: dentry for symlink
1054 * @buf: buffer to load symlink location into
1055 * @buflen: length of buffer
1056 *
1057 */
1058
1059static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1060 int buflen)
1061{
1062 int retval;
1063 int ret;
1064 char *link = __getname();
1065
1066 if (strlen(link) < buflen)
1067 buflen = strlen(link);
1068
1069 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
1070
1071 retval = v9fs_readlink(dentry, link, buflen);
1072
1073 if (retval > 0) {
1074 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1075 dprintk(DEBUG_ERROR, "problem copying to user: %d\n",
1076 ret);
1077 retval = ret;
1078 }
1079 }
1080
1081 putname(link);
1082 return retval;
1083}
1084
1085/**
1086 * v9fs_vfs_follow_link - follow a symlink path
1087 * @dentry: dentry for symlink
1088 * @nd: nameidata
1089 *
1090 */
1091
1092static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
1093{
1094 int len = 0;
1095 char *link = __getname();
1096
1097 dprintk(DEBUG_VFS, "%s n", dentry->d_name.name);
1098
1099 if (!link)
1100 link = ERR_PTR(-ENOMEM);
1101 else {
1102 len = v9fs_readlink(dentry, link, strlen(link));
1103
1104 if (len < 0) {
1105 putname(link);
1106 link = ERR_PTR(len);
1107 } else
1108 link[len] = 0;
1109 }
1110 nd_set_link(nd, link);
1111
1112 return NULL;
1113}
1114
1115/**
1116 * v9fs_vfs_put_link - release a symlink path
1117 * @dentry: dentry for symlink
1118 * @nd: nameidata
1119 *
1120 */
1121
1122static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1123{
1124 char *s = nd_get_link(nd);
1125
1126 dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
1127 if (!IS_ERR(s))
1128 putname(s);
1129}
1130
1131/**
1132 * v9fs_vfs_link - create a hardlink
1133 * @old_dentry: dentry for file to link to
1134 * @dir: inode destination for new link
1135 * @dentry: dentry for link
1136 *
1137 */
1138
1139/* XXX - lots of code dup'd from symlink and creates,
1140 * figure out a better reuse strategy
1141 */
1142
1143static int
1144v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1145 struct dentry *dentry)
1146{
1147 int retval = -EPERM;
1148 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
1149 struct v9fs_fcall *fcall = NULL;
1150 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
1151 struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_OP);
1152 struct v9fs_fid *newfid = NULL;
1153 char *symname = __getname();
1154
1155 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
1156 old_dentry->d_name.name);
1157
1158 if (!v9ses->extended) {
1159 dprintk(DEBUG_ERROR, "not extended\n");
1160 goto FreeMem;
1161 }
1162
1163 /* get fid of old_dentry */
1164 sprintf(symname, "hardlink(%d)\n", oldfid->fid);
1165
1166 /* issue a create */
1167 retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
1168 if (retval != 0)
1169 goto FreeMem;
1170
1171 newfid = v9fs_fid_lookup(dentry, FID_OP);
1172 if (!newfid) {
1173 dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
1174 goto FreeMem;
1175 }
1176
1177 /* issue a twstat */
1178 v9fs_blank_mistat(v9ses, mistat);
1179 strcpy(mistat->data + 1, symname);
1180 mistat->extension = mistat->data + 1;
1181 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
1182 if (retval < 0) {
1183 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
1184 FCALL_ERROR(fcall));
1185 goto FreeMem;
1186 }
1187
1188 kfree(fcall);
1189
1190 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
1191 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
1192 FCALL_ERROR(fcall));
1193 goto FreeMem;
1194 }
1195
1196 d_drop(dentry); /* FID - will this also clunk? */
1197
1198 kfree(fcall);
1199 fcall = NULL;
1200
1201 FreeMem:
1202 kfree(mistat);
1203 kfree(fcall);
1204 putname(symname);
1205 return retval;
1206}
1207
1208/**
1209 * v9fs_vfs_mknod - create a special file
1210 * @dir: inode destination for new link
1211 * @dentry: dentry for file
1212 * @mode: mode for creation
1213 * @dev_t: device associated with special file
1214 *
1215 */
1216
1217static int
1218v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1219{
1220 int retval = -EPERM;
1221 struct v9fs_fid *newfid;
1222 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
1223 struct v9fs_fcall *fcall = NULL;
1224 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
1225 char *symname = __getname();
1226
1227 dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1228 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
1229
1230 if (!mistat)
1231 return -ENOMEM;
1232
1233 if (!new_valid_dev(rdev)) {
1234 retval = -EINVAL;
1235 goto FreeMem;
1236 }
1237
1238 if (!v9ses->extended) {
1239 dprintk(DEBUG_ERROR, "not extended\n");
1240 goto FreeMem;
1241 }
1242
1243 /* issue a create */
1244 retval = v9fs_create(dir, dentry, mode, 0);
1245
1246 if (retval != 0)
1247 goto FreeMem;
1248
1249 newfid = v9fs_fid_lookup(dentry, FID_OP);
1250 if (!newfid) {
1251 dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
1252 retval = -EINVAL;
1253 goto FreeMem;
1254 }
1255
1256 /* build extension */
1257 if (S_ISBLK(mode))
1258 sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev));
1259 else if (S_ISCHR(mode))
1260 sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev));
1261 else if (S_ISFIFO(mode))
1262 ; /* DO NOTHING */
1263 else {
1264 retval = -EINVAL;
1265 goto FreeMem;
1266 }
1267
1268 if (!S_ISFIFO(mode)) {
1269 /* issue a twstat */
1270 v9fs_blank_mistat(v9ses, mistat);
1271 strcpy(mistat->data + 1, symname);
1272 mistat->extension = mistat->data + 1;
1273 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
1274 if (retval < 0) {
1275 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
1276 FCALL_ERROR(fcall));
1277 goto FreeMem;
1278 }
1279 }
1280
1281 /* need to update dcache so we show up */
1282 kfree(fcall);
1283
1284 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
1285 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
1286 FCALL_ERROR(fcall));
1287 goto FreeMem;
1288 }
1289
1290 d_drop(dentry); /* FID - will this also clunk? */
1291
1292 FreeMem:
1293 kfree(mistat);
1294 kfree(fcall);
1295 putname(symname);
1296
1297 return retval;
1298}
1299
1300static struct inode_operations v9fs_dir_inode_operations_ext = {
1301 .create = v9fs_vfs_create,
1302 .lookup = v9fs_vfs_lookup,
1303 .symlink = v9fs_vfs_symlink,
1304 .link = v9fs_vfs_link,
1305 .unlink = v9fs_vfs_unlink,
1306 .mkdir = v9fs_vfs_mkdir,
1307 .rmdir = v9fs_vfs_rmdir,
1308 .mknod = v9fs_vfs_mknod,
1309 .rename = v9fs_vfs_rename,
1310 .readlink = v9fs_vfs_readlink,
1311 .getattr = v9fs_vfs_getattr,
1312 .setattr = v9fs_vfs_setattr,
1313};
1314
1315static struct inode_operations v9fs_dir_inode_operations = {
1316 .create = v9fs_vfs_create,
1317 .lookup = v9fs_vfs_lookup,
1318 .unlink = v9fs_vfs_unlink,
1319 .mkdir = v9fs_vfs_mkdir,
1320 .rmdir = v9fs_vfs_rmdir,
1321 .mknod = v9fs_vfs_mknod,
1322 .rename = v9fs_vfs_rename,
1323 .getattr = v9fs_vfs_getattr,
1324 .setattr = v9fs_vfs_setattr,
1325};
1326
1327static struct inode_operations v9fs_file_inode_operations = {
1328 .getattr = v9fs_vfs_getattr,
1329 .setattr = v9fs_vfs_setattr,
1330};
1331
1332static struct inode_operations v9fs_symlink_inode_operations = {
1333 .readlink = v9fs_vfs_readlink,
1334 .follow_link = v9fs_vfs_follow_link,
1335 .put_link = v9fs_vfs_put_link,
1336 .getattr = v9fs_vfs_getattr,
1337 .setattr = v9fs_vfs_setattr,
1338};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
new file mode 100644
index 000000000000..868f350b2c5f
--- /dev/null
+++ b/fs/9p/vfs_super.c
@@ -0,0 +1,280 @@
1/*
2 * linux/fs/9p/vfs_super.c
3 *
4 * This file contians superblock ops for 9P2000. It is intended that
5 * you mount this file system on directories.
6 *
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to:
22 * Free Software Foundation
23 * 51 Franklin Street, Fifth Floor
24 * Boston, MA 02111-1301 USA
25 *
26 */
27
28#include <linux/kernel.h>
29#include <linux/config.h>
30#include <linux/module.h>
31#include <linux/errno.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/stat.h>
35#include <linux/string.h>
36#include <linux/smp_lock.h>
37#include <linux/inet.h>
38#include <linux/pagemap.h>
39#include <linux/seq_file.h>
40#include <linux/mount.h>
41#include <linux/idr.h>
42
43#include "debug.h"
44#include "v9fs.h"
45#include "9p.h"
46#include "v9fs_vfs.h"
47#include "conv.h"
48#include "fid.h"
49
50static void v9fs_clear_inode(struct inode *);
51static struct super_operations v9fs_super_ops;
52
53/**
54 * v9fs_clear_inode - release an inode
55 * @inode: inode to release
56 *
57 */
58
59static void v9fs_clear_inode(struct inode *inode)
60{
61 filemap_fdatawrite(inode->i_mapping);
62}
63
64/**
65 * v9fs_set_super - set the superblock
66 * @s: super block
67 * @data: file system specific data
68 *
69 */
70
71static int v9fs_set_super(struct super_block *s, void *data)
72{
73 s->s_fs_info = data;
74 return set_anon_super(s, data);
75}
76
77/**
78 * v9fs_fill_super - populate superblock with info
79 * @sb: superblock
80 * @v9ses: session information
81 *
82 */
83
84static void
85v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
86 int flags)
87{
88 sb->s_maxbytes = MAX_LFS_FILESIZE;
89 sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
90 sb->s_blocksize = 1 << sb->s_blocksize_bits;
91 sb->s_magic = V9FS_MAGIC;
92 sb->s_op = &v9fs_super_ops;
93
94 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
95 MS_NODIRATIME | MS_NOATIME;
96}
97
98/**
99 * v9fs_get_sb - mount a superblock
100 * @fs_type: file system type
101 * @flags: mount flags
102 * @dev_name: device name that was mounted
103 * @data: mount options
104 *
105 */
106
107static struct super_block *v9fs_get_sb(struct file_system_type
108 *fs_type, int flags,
109 const char *dev_name, void *data)
110{
111 struct super_block *sb = NULL;
112 struct v9fs_fcall *fcall = NULL;
113 struct inode *inode = NULL;
114 struct dentry *root = NULL;
115 struct v9fs_session_info *v9ses = NULL;
116 struct v9fs_fid *root_fid = NULL;
117 int mode = S_IRWXUGO | S_ISVTX;
118 uid_t uid = current->fsuid;
119 gid_t gid = current->fsgid;
120 int stat_result = 0;
121 int newfid = 0;
122 int retval = 0;
123
124 dprintk(DEBUG_VFS, " \n");
125
126 v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL);
127 if (!v9ses)
128 return ERR_PTR(-ENOMEM);
129
130 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
131 dprintk(DEBUG_ERROR, "problem initiating session\n");
132 retval = newfid;
133 goto free_session;
134 }
135
136 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
137
138 v9fs_fill_super(sb, v9ses, flags);
139
140 inode = v9fs_get_inode(sb, S_IFDIR | mode);
141 if (IS_ERR(inode)) {
142 retval = PTR_ERR(inode);
143 goto put_back_sb;
144 }
145
146 inode->i_uid = uid;
147 inode->i_gid = gid;
148
149 root = d_alloc_root(inode);
150
151 if (!root) {
152 retval = -ENOMEM;
153 goto release_inode;
154 }
155
156 sb->s_root = root;
157
158 /* Setup the Root Inode */
159 root_fid = v9fs_fid_create(root);
160 if (root_fid == NULL) {
161 retval = -ENOMEM;
162 goto release_dentry;
163 }
164
165 root_fid->fidopen = 0;
166 root_fid->v9ses = v9ses;
167
168 stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
169 if (stat_result < 0) {
170 dprintk(DEBUG_ERROR, "stat error\n");
171 v9fs_t_clunk(v9ses, newfid, NULL);
172 v9fs_put_idpool(newfid, &v9ses->fidpool);
173 } else {
174 root_fid->fid = newfid;
175 root_fid->qid = fcall->params.rstat.stat->qid;
176 root->d_inode->i_ino =
177 v9fs_qid2ino(&fcall->params.rstat.stat->qid);
178 v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb);
179 }
180
181 kfree(fcall);
182
183 if (stat_result < 0) {
184 retval = stat_result;
185 goto release_dentry;
186 }
187
188 return sb;
189
190 release_dentry:
191 dput(sb->s_root);
192
193 release_inode:
194 iput(inode);
195
196 put_back_sb:
197 up_write(&sb->s_umount);
198 deactivate_super(sb);
199 v9fs_session_close(v9ses);
200
201 free_session:
202 kfree(v9ses);
203
204 return ERR_PTR(retval);
205}
206
207/**
208 * v9fs_kill_super - Kill Superblock
209 * @s: superblock
210 *
211 */
212
213static void v9fs_kill_super(struct super_block *s)
214{
215 struct v9fs_session_info *v9ses = s->s_fs_info;
216
217 dprintk(DEBUG_VFS, " %p\n", s);
218
219 v9fs_dentry_release(s->s_root); /* clunk root */
220
221 kill_anon_super(s);
222
223 v9fs_session_close(v9ses);
224 kfree(v9ses);
225 dprintk(DEBUG_VFS, "exiting kill_super\n");
226}
227
228/**
229 * v9fs_show_options - Show mount options in /proc/mounts
230 * @m: seq_file to write to
231 * @mnt: mount descriptor
232 *
233 */
234
235static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
236{
237 struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
238
239 if (v9ses->debug != 0)
240 seq_printf(m, ",debug=%u", v9ses->debug);
241 if (v9ses->port != V9FS_PORT)
242 seq_printf(m, ",port=%u", v9ses->port);
243 if (v9ses->maxdata != 9000)
244 seq_printf(m, ",msize=%u", v9ses->maxdata);
245 if (v9ses->afid != ~0)
246 seq_printf(m, ",afid=%u", v9ses->afid);
247 if (v9ses->proto == PROTO_UNIX)
248 seq_puts(m, ",proto=unix");
249 if (v9ses->extended == 0)
250 seq_puts(m, ",noextend");
251 if (v9ses->nodev == 1)
252 seq_puts(m, ",nodevmap");
253 seq_printf(m, ",name=%s", v9ses->name);
254 seq_printf(m, ",aname=%s", v9ses->remotename);
255 seq_printf(m, ",uid=%u", v9ses->uid);
256 seq_printf(m, ",gid=%u", v9ses->gid);
257 return 0;
258}
259
260static void
261v9fs_umount_begin(struct super_block *sb)
262{
263 struct v9fs_session_info *v9ses = sb->s_fs_info;
264
265 v9fs_session_cancel(v9ses);
266}
267
268static struct super_operations v9fs_super_ops = {
269 .statfs = simple_statfs,
270 .clear_inode = v9fs_clear_inode,
271 .show_options = v9fs_show_options,
272 .umount_begin = v9fs_umount_begin,
273};
274
275struct file_system_type v9fs_fs_type = {
276 .name = "9P",
277 .get_sb = v9fs_get_sb,
278 .kill_sb = v9fs_kill_super,
279 .owner = THIS_MODULE,
280};
diff --git a/fs/Kconfig b/fs/Kconfig
index e54be7058359..068ccea2f184 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -382,10 +382,8 @@ config QUOTA
382 usage (also called disk quotas). Currently, it works for the 382 usage (also called disk quotas). Currently, it works for the
383 ext2, ext3, and reiserfs file system. ext3 also supports journalled 383 ext2, ext3, and reiserfs file system. ext3 also supports journalled
384 quotas for which you don't need to run quotacheck(8) after an unclean 384 quotas for which you don't need to run quotacheck(8) after an unclean
385 shutdown. You need additional software in order to use quota support 385 shutdown.
386 (you can download sources from 386 For further details, read the Quota mini-HOWTO, available from
387 <http://www.sf.net/projects/linuxquota/>). For further details, read
388 the Quota mini-HOWTO, available from
389 <http://www.tldp.org/docs.html#howto>, or the documentation provided 387 <http://www.tldp.org/docs.html#howto>, or the documentation provided
390 with the quota tools. Probably the quota support is only useful for 388 with the quota tools. Probably the quota support is only useful for
391 multi user systems. If unsure, say N. 389 multi user systems. If unsure, say N.
@@ -403,8 +401,7 @@ config QFMT_V2
403 depends on QUOTA 401 depends on QUOTA
404 help 402 help
405 This quota format allows using quotas with 32-bit UIDs/GIDs. If you 403 This quota format allows using quotas with 32-bit UIDs/GIDs. If you
406 need this functionality say Y here. Note that you will need recent 404 need this functionality say Y here.
407 quota utilities (>= 3.01) for new quota format with this kernel.
408 405
409config QUOTACTL 406config QUOTACTL
410 bool 407 bool
@@ -465,6 +462,19 @@ config AUTOFS4_FS
465 local network, you probably do not need an automounter, and can say 462 local network, you probably do not need an automounter, and can say
466 N here. 463 N here.
467 464
465config FUSE_FS
466 tristate "Filesystem in Userspace support"
467 help
468 With FUSE it is possible to implement a fully functional filesystem
469 in a userspace program.
470
471 There's also companion library: libfuse. This library along with
472 utilities is available from the FUSE homepage:
473 <http://fuse.sourceforge.net/>
474
475 If you want to develop a userspace FS, or if you want to use
476 a filesystem based on FUSE, answer Y or M.
477
468menu "CD-ROM/DVD Filesystems" 478menu "CD-ROM/DVD Filesystems"
469 479
470config ISO9660_FS 480config ISO9660_FS
@@ -783,28 +793,6 @@ config SYSFS
783 793
784 Designers of embedded systems may wish to say N here to conserve space. 794 Designers of embedded systems may wish to say N here to conserve space.
785 795
786config DEVPTS_FS_XATTR
787 bool "/dev/pts Extended Attributes"
788 depends on UNIX98_PTYS
789 help
790 Extended attributes are name:value pairs associated with inodes by
791 the kernel or by users (see the attr(5) manual page, or visit
792 <http://acl.bestbits.at/> for details).
793
794 If unsure, say N.
795
796config DEVPTS_FS_SECURITY
797 bool "/dev/pts Security Labels"
798 depends on DEVPTS_FS_XATTR
799 help
800 Security labels support alternative access control models
801 implemented by security modules like SELinux. This option
802 enables an extended attribute handler for file security
803 labels in the /dev/pts filesystem.
804
805 If you are not using a security module that requires using
806 extended attributes for file security labels, say N.
807
808config TMPFS 796config TMPFS
809 bool "Virtual memory file system support (former shm fs)" 797 bool "Virtual memory file system support (former shm fs)"
810 help 798 help
@@ -817,27 +805,6 @@ config TMPFS
817 805
818 See <file:Documentation/filesystems/tmpfs.txt> for details. 806 See <file:Documentation/filesystems/tmpfs.txt> for details.
819 807
820config TMPFS_XATTR
821 bool "tmpfs Extended Attributes"
822 depends on TMPFS
823 help
824 Extended attributes are name:value pairs associated with inodes by
825 the kernel or by users (see the attr(5) manual page, or visit
826 <http://acl.bestbits.at/> for details).
827
828 If unsure, say N.
829
830config TMPFS_SECURITY
831 bool "tmpfs Security Labels"
832 depends on TMPFS_XATTR
833 help
834 Security labels support alternative access control models
835 implemented by security modules like SELinux. This option
836 enables an extended attribute handler for file security
837 labels in the tmpfs filesystem.
838 If you are not using a security module that requires using
839 extended attributes for file security labels, say N.
840
841config HUGETLBFS 808config HUGETLBFS
842 bool "HugeTLB file system support" 809 bool "HugeTLB file system support"
843 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN 810 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN
@@ -859,6 +826,18 @@ config RAMFS
859 To compile this as a module, choose M here: the module will be called 826 To compile this as a module, choose M here: the module will be called
860 ramfs. 827 ramfs.
861 828
829config RELAYFS_FS
830 tristate "Relayfs file system support"
831 ---help---
832 Relayfs is a high-speed data relay filesystem designed to provide
833 an efficient mechanism for tools and facilities to relay large
834 amounts of data from kernel space to user space.
835
836 To compile this code as a module, choose M here: the module will be
837 called relayfs.
838
839 If unsure, say N.
840
862endmenu 841endmenu
863 842
864menu "Miscellaneous filesystems" 843menu "Miscellaneous filesystems"
@@ -1737,6 +1716,17 @@ config AFS_FS
1737config RXRPC 1716config RXRPC
1738 tristate 1717 tristate
1739 1718
1719config 9P_FS
1720 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
1721 depends on INET && EXPERIMENTAL
1722 help
1723 If you say Y here, you will get experimental support for
1724 Plan 9 resource sharing via the 9P2000 protocol.
1725
1726 See <http://v9fs.sf.net> for more information.
1727
1728 If unsure, say N.
1729
1740endmenu 1730endmenu
1741 1731
1742menu "Partition Types" 1732menu "Partition Types"
diff --git a/fs/Makefile b/fs/Makefile
index cf95eb894fd5..1972da186272 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -89,10 +89,13 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4/
89obj-$(CONFIG_AUTOFS_FS) += autofs/ 89obj-$(CONFIG_AUTOFS_FS) += autofs/
90obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 90obj-$(CONFIG_AUTOFS4_FS) += autofs4/
91obj-$(CONFIG_ADFS_FS) += adfs/ 91obj-$(CONFIG_ADFS_FS) += adfs/
92obj-$(CONFIG_FUSE_FS) += fuse/
92obj-$(CONFIG_UDF_FS) += udf/ 93obj-$(CONFIG_UDF_FS) += udf/
94obj-$(CONFIG_RELAYFS_FS) += relayfs/
93obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ 95obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
94obj-$(CONFIG_JFS_FS) += jfs/ 96obj-$(CONFIG_JFS_FS) += jfs/
95obj-$(CONFIG_XFS_FS) += xfs/ 97obj-$(CONFIG_XFS_FS) += xfs/
98obj-$(CONFIG_9P_FS) += 9p/
96obj-$(CONFIG_AFS_FS) += afs/ 99obj-$(CONFIG_AFS_FS) += afs/
97obj-$(CONFIG_BEFS_FS) += befs/ 100obj-$(CONFIG_BEFS_FS) += befs/
98obj-$(CONFIG_HOSTFS) += hostfs/ 101obj-$(CONFIG_HOSTFS) += hostfs/
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 7aa6f2004536..9ebe881c6786 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -255,6 +255,7 @@ void
255affs_delete_inode(struct inode *inode) 255affs_delete_inode(struct inode *inode)
256{ 256{
257 pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); 257 pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
258 truncate_inode_pages(&inode->i_data, 0);
258 inode->i_size = 0; 259 inode->i_size = 0;
259 if (S_ISREG(inode->i_mode)) 260 if (S_ISREG(inode->i_mode))
260 affs_truncate(inode); 261 affs_truncate(inode);
diff --git a/fs/aio.c b/fs/aio.c
index 06d7d4390fe7..38f62680fd63 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,6 +29,7 @@
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/workqueue.h> 30#include <linux/workqueue.h>
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/rcuref.h>
32 33
33#include <asm/kmap_types.h> 34#include <asm/kmap_types.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -499,7 +500,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
499 /* Must be done under the lock to serialise against cancellation. 500 /* Must be done under the lock to serialise against cancellation.
500 * Call this aio_fput as it duplicates fput via the fput_work. 501 * Call this aio_fput as it duplicates fput via the fput_work.
501 */ 502 */
502 if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { 503 if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) {
503 get_ioctx(ctx); 504 get_ioctx(ctx);
504 spin_lock(&fput_lock); 505 spin_lock(&fput_lock);
505 list_add(&req->ki_list, &fput_head); 506 list_add(&req->ki_list, &fput_head);
@@ -546,6 +547,24 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id)
546 return ioctx; 547 return ioctx;
547} 548}
548 549
550static int lock_kiocb_action(void *param)
551{
552 schedule();
553 return 0;
554}
555
556static inline void lock_kiocb(struct kiocb *iocb)
557{
558 wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action,
559 TASK_UNINTERRUPTIBLE);
560}
561
562static inline void unlock_kiocb(struct kiocb *iocb)
563{
564 kiocbClearLocked(iocb);
565 wake_up_bit(&iocb->ki_flags, KIF_LOCKED);
566}
567
549/* 568/*
550 * use_mm 569 * use_mm
551 * Makes the calling kernel thread take on the specified 570 * Makes the calling kernel thread take on the specified
@@ -567,6 +586,10 @@ static void use_mm(struct mm_struct *mm)
567 atomic_inc(&mm->mm_count); 586 atomic_inc(&mm->mm_count);
568 tsk->mm = mm; 587 tsk->mm = mm;
569 tsk->active_mm = mm; 588 tsk->active_mm = mm;
589 /*
590 * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise
591 * it won't work. Update it accordingly if you change it here
592 */
570 activate_mm(active_mm, mm); 593 activate_mm(active_mm, mm);
571 task_unlock(tsk); 594 task_unlock(tsk);
572 595
@@ -782,7 +805,9 @@ static int __aio_run_iocbs(struct kioctx *ctx)
782 * Hold an extra reference while retrying i/o. 805 * Hold an extra reference while retrying i/o.
783 */ 806 */
784 iocb->ki_users++; /* grab extra reference */ 807 iocb->ki_users++; /* grab extra reference */
808 lock_kiocb(iocb);
785 aio_run_iocb(iocb); 809 aio_run_iocb(iocb);
810 unlock_kiocb(iocb);
786 if (__aio_put_req(ctx, iocb)) /* drop extra ref */ 811 if (__aio_put_req(ctx, iocb)) /* drop extra ref */
787 put_ioctx(ctx); 812 put_ioctx(ctx);
788 } 813 }
@@ -1523,10 +1548,9 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1523 goto out_put_req; 1548 goto out_put_req;
1524 1549
1525 spin_lock_irq(&ctx->ctx_lock); 1550 spin_lock_irq(&ctx->ctx_lock);
1526 if (likely(list_empty(&ctx->run_list))) { 1551 aio_run_iocb(req);
1527 aio_run_iocb(req); 1552 unlock_kiocb(req);
1528 } else { 1553 if (!list_empty(&ctx->run_list)) {
1529 list_add_tail(&req->ki_run_list, &ctx->run_list);
1530 /* drain the run list */ 1554 /* drain the run list */
1531 while (__aio_run_iocbs(ctx)) 1555 while (__aio_run_iocbs(ctx))
1532 ; 1556 ;
@@ -1657,6 +1681,7 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
1657 if (NULL != cancel) { 1681 if (NULL != cancel) {
1658 struct io_event tmp; 1682 struct io_event tmp;
1659 pr_debug("calling cancel\n"); 1683 pr_debug("calling cancel\n");
1684 lock_kiocb(kiocb);
1660 memset(&tmp, 0, sizeof(tmp)); 1685 memset(&tmp, 0, sizeof(tmp));
1661 tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user; 1686 tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user;
1662 tmp.data = kiocb->ki_user_data; 1687 tmp.data = kiocb->ki_user_data;
@@ -1668,8 +1693,9 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
1668 if (copy_to_user(result, &tmp, sizeof(tmp))) 1693 if (copy_to_user(result, &tmp, sizeof(tmp)))
1669 ret = -EFAULT; 1694 ret = -EFAULT;
1670 } 1695 }
1696 unlock_kiocb(kiocb);
1671 } else 1697 } else
1672 printk(KERN_DEBUG "iocb has no cancel operation\n"); 1698 ret = -EINVAL;
1673 1699
1674 put_ioctx(ctx); 1700 put_ioctx(ctx);
1675 1701
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 6171431272dc..990c28da5aec 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -105,6 +105,7 @@ struct autofs_sb_info {
105 struct file *pipe; 105 struct file *pipe;
106 pid_t oz_pgrp; 106 pid_t oz_pgrp;
107 int catatonic; 107 int catatonic;
108 struct super_block *sb;
108 unsigned long exp_timeout; 109 unsigned long exp_timeout;
109 ino_t next_dir_ino; 110 ino_t next_dir_ino;
110 struct autofs_wait_queue *queues; /* Wait queue pointer */ 111 struct autofs_wait_queue *queues; /* Wait queue pointer */
@@ -134,7 +135,7 @@ void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
134void autofs_hash_delete(struct autofs_dir_ent *); 135void autofs_hash_delete(struct autofs_dir_ent *);
135struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *); 136struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
136void autofs_hash_dputall(struct autofs_dirhash *); 137void autofs_hash_dputall(struct autofs_dirhash *);
137void autofs_hash_nuke(struct autofs_dirhash *); 138void autofs_hash_nuke(struct autofs_sb_info *);
138 139
139/* Expiration-handling functions */ 140/* Expiration-handling functions */
140 141
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 448143fd0796..5ccfcf26310d 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -232,13 +232,13 @@ void autofs_hash_dputall(struct autofs_dirhash *dh)
232 232
233/* Delete everything. This is used on filesystem destruction, so we 233/* Delete everything. This is used on filesystem destruction, so we
234 make no attempt to keep the pointers valid */ 234 make no attempt to keep the pointers valid */
235void autofs_hash_nuke(struct autofs_dirhash *dh) 235void autofs_hash_nuke(struct autofs_sb_info *sbi)
236{ 236{
237 int i; 237 int i;
238 struct autofs_dir_ent *ent, *nent; 238 struct autofs_dir_ent *ent, *nent;
239 239
240 for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) { 240 for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
241 for ( ent = dh->h[i] ; ent ; ent = nent ) { 241 for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
242 nent = ent->next; 242 nent = ent->next;
243 if ( ent->dentry ) 243 if ( ent->dentry )
244 dput(ent->dentry); 244 dput(ent->dentry);
@@ -246,4 +246,5 @@ void autofs_hash_nuke(struct autofs_dirhash *dh)
246 kfree(ent); 246 kfree(ent);
247 } 247 }
248 } 248 }
249 shrink_dcache_sb(sbi->sb);
249} 250}
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 4888c1fabbf7..65e5ed42190e 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -27,7 +27,7 @@ static void autofs_put_super(struct super_block *sb)
27 if ( !sbi->catatonic ) 27 if ( !sbi->catatonic )
28 autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */ 28 autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
29 29
30 autofs_hash_nuke(&sbi->dirhash); 30 autofs_hash_nuke(sbi);
31 for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) { 31 for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) {
32 if ( test_bit(n, sbi->symlink_bitmap) ) 32 if ( test_bit(n, sbi->symlink_bitmap) )
33 kfree(sbi->symlink[n].data); 33 kfree(sbi->symlink[n].data);
@@ -148,6 +148,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
148 s->s_magic = AUTOFS_SUPER_MAGIC; 148 s->s_magic = AUTOFS_SUPER_MAGIC;
149 s->s_op = &autofs_sops; 149 s->s_op = &autofs_sops;
150 s->s_time_gran = 1; 150 s->s_time_gran = 1;
151 sbi->sb = s;
151 152
152 root_inode = iget(s, AUTOFS_ROOT_INO); 153 root_inode = iget(s, AUTOFS_ROOT_INO);
153 root = d_alloc_root(root_inode); 154 root = d_alloc_root(root_inode);
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 1020dbc88bec..1fbc53f14aba 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -20,7 +20,6 @@ struct bfs_sb_info {
20 unsigned long si_lasti; 20 unsigned long si_lasti;
21 unsigned long * si_imap; 21 unsigned long * si_imap;
22 struct buffer_head * si_sbh; /* buffer header w/superblock */ 22 struct buffer_head * si_sbh; /* buffer header w/superblock */
23 struct bfs_super_block * si_bfs_sb; /* superblock in si_sbh->b_data */
24}; 23};
25 24
26/* 25/*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 5a1e5ce057ff..e240c335eb23 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -2,6 +2,7 @@
2 * fs/bfs/dir.c 2 * fs/bfs/dir.c
3 * BFS directory operations. 3 * BFS directory operations.
4 * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> 4 * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
5 * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
5 */ 6 */
6 7
7#include <linux/time.h> 8#include <linux/time.h>
@@ -20,9 +21,9 @@
20#define dprintf(x...) 21#define dprintf(x...)
21#endif 22#endif
22 23
23static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino); 24static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino);
24static struct buffer_head * bfs_find_entry(struct inode * dir, 25static struct buffer_head * bfs_find_entry(struct inode * dir,
25 const char * name, int namelen, struct bfs_dirent ** res_dir); 26 const unsigned char * name, int namelen, struct bfs_dirent ** res_dir);
26 27
27static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) 28static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
28{ 29{
@@ -53,7 +54,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
53 de = (struct bfs_dirent *)(bh->b_data + offset); 54 de = (struct bfs_dirent *)(bh->b_data + offset);
54 if (de->ino) { 55 if (de->ino) {
55 int size = strnlen(de->name, BFS_NAMELEN); 56 int size = strnlen(de->name, BFS_NAMELEN);
56 if (filldir(dirent, de->name, size, f->f_pos, de->ino, DT_UNKNOWN) < 0) { 57 if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) {
57 brelse(bh); 58 brelse(bh);
58 unlock_kernel(); 59 unlock_kernel();
59 return 0; 60 return 0;
@@ -107,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode,
107 inode->i_mapping->a_ops = &bfs_aops; 108 inode->i_mapping->a_ops = &bfs_aops;
108 inode->i_mode = mode; 109 inode->i_mode = mode;
109 inode->i_ino = ino; 110 inode->i_ino = ino;
110 BFS_I(inode)->i_dsk_ino = ino; 111 BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino);
111 BFS_I(inode)->i_sblock = 0; 112 BFS_I(inode)->i_sblock = 0;
112 BFS_I(inode)->i_eblock = 0; 113 BFS_I(inode)->i_eblock = 0;
113 insert_inode_hash(inode); 114 insert_inode_hash(inode);
@@ -139,7 +140,7 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st
139 lock_kernel(); 140 lock_kernel();
140 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); 141 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
141 if (bh) { 142 if (bh) {
142 unsigned long ino = le32_to_cpu(de->ino); 143 unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
143 brelse(bh); 144 brelse(bh);
144 inode = iget(dir->i_sb, ino); 145 inode = iget(dir->i_sb, ino);
145 if (!inode) { 146 if (!inode) {
@@ -183,7 +184,7 @@ static int bfs_unlink(struct inode * dir, struct dentry * dentry)
183 inode = dentry->d_inode; 184 inode = dentry->d_inode;
184 lock_kernel(); 185 lock_kernel();
185 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); 186 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
186 if (!bh || de->ino != inode->i_ino) 187 if (!bh || le16_to_cpu(de->ino) != inode->i_ino)
187 goto out_brelse; 188 goto out_brelse;
188 189
189 if (!inode->i_nlink) { 190 if (!inode->i_nlink) {
@@ -224,7 +225,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry,
224 old_dentry->d_name.name, 225 old_dentry->d_name.name,
225 old_dentry->d_name.len, &old_de); 226 old_dentry->d_name.len, &old_de);
226 227
227 if (!old_bh || old_de->ino != old_inode->i_ino) 228 if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino)
228 goto end_rename; 229 goto end_rename;
229 230
230 error = -EPERM; 231 error = -EPERM;
@@ -270,7 +271,7 @@ struct inode_operations bfs_dir_inops = {
270 .rename = bfs_rename, 271 .rename = bfs_rename,
271}; 272};
272 273
273static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino) 274static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino)
274{ 275{
275 struct buffer_head * bh; 276 struct buffer_head * bh;
276 struct bfs_dirent * de; 277 struct bfs_dirent * de;
@@ -304,7 +305,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
304 } 305 }
305 dir->i_mtime = CURRENT_TIME_SEC; 306 dir->i_mtime = CURRENT_TIME_SEC;
306 mark_inode_dirty(dir); 307 mark_inode_dirty(dir);
307 de->ino = ino; 308 de->ino = cpu_to_le16((u16)ino);
308 for (i=0; i<BFS_NAMELEN; i++) 309 for (i=0; i<BFS_NAMELEN; i++)
309 de->name[i] = (i < namelen) ? name[i] : 0; 310 de->name[i] = (i < namelen) ? name[i] : 0;
310 mark_buffer_dirty(bh); 311 mark_buffer_dirty(bh);
@@ -317,7 +318,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
317 return -ENOSPC; 318 return -ENOSPC;
318} 319}
319 320
320static inline int bfs_namecmp(int len, const char * name, const char * buffer) 321static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer)
321{ 322{
322 if (len < BFS_NAMELEN && buffer[len]) 323 if (len < BFS_NAMELEN && buffer[len])
323 return 0; 324 return 0;
@@ -325,7 +326,7 @@ static inline int bfs_namecmp(int len, const char * name, const char * buffer)
325} 326}
326 327
327static struct buffer_head * bfs_find_entry(struct inode * dir, 328static struct buffer_head * bfs_find_entry(struct inode * dir,
328 const char * name, int namelen, struct bfs_dirent ** res_dir) 329 const unsigned char * name, int namelen, struct bfs_dirent ** res_dir)
329{ 330{
330 unsigned long block, offset; 331 unsigned long block, offset;
331 struct buffer_head * bh; 332 struct buffer_head * bh;
@@ -346,7 +347,7 @@ static struct buffer_head * bfs_find_entry(struct inode * dir,
346 } 347 }
347 de = (struct bfs_dirent *)(bh->b_data + offset); 348 de = (struct bfs_dirent *)(bh->b_data + offset);
348 offset += BFS_DIRENT_SIZE; 349 offset += BFS_DIRENT_SIZE;
349 if (de->ino && bfs_namecmp(namelen, name, de->name)) { 350 if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) {
350 *res_dir = de; 351 *res_dir = de;
351 return bh; 352 return bh;
352 } 353 }
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 747fd1ea55e0..807723b65daf 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -40,8 +40,8 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo
40 return 0; 40 return 0;
41} 41}
42 42
43static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned long end, 43static int bfs_move_blocks(struct super_block *sb, unsigned long start,
44 unsigned long where) 44 unsigned long end, unsigned long where)
45{ 45{
46 unsigned long i; 46 unsigned long i;
47 47
@@ -57,20 +57,21 @@ static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned
57static int bfs_get_block(struct inode * inode, sector_t block, 57static int bfs_get_block(struct inode * inode, sector_t block,
58 struct buffer_head * bh_result, int create) 58 struct buffer_head * bh_result, int create)
59{ 59{
60 long phys; 60 unsigned long phys;
61 int err; 61 int err;
62 struct super_block *sb = inode->i_sb; 62 struct super_block *sb = inode->i_sb;
63 struct bfs_sb_info *info = BFS_SB(sb); 63 struct bfs_sb_info *info = BFS_SB(sb);
64 struct bfs_inode_info *bi = BFS_I(inode); 64 struct bfs_inode_info *bi = BFS_I(inode);
65 struct buffer_head *sbh = info->si_sbh; 65 struct buffer_head *sbh = info->si_sbh;
66 66
67 if (block < 0 || block > info->si_blocks) 67 if (block > info->si_blocks)
68 return -EIO; 68 return -EIO;
69 69
70 phys = bi->i_sblock + block; 70 phys = bi->i_sblock + block;
71 if (!create) { 71 if (!create) {
72 if (phys <= bi->i_eblock) { 72 if (phys <= bi->i_eblock) {
73 dprintf("c=%d, b=%08lx, phys=%08lx (granted)\n", create, block, phys); 73 dprintf("c=%d, b=%08lx, phys=%09lx (granted)\n",
74 create, (unsigned long)block, phys);
74 map_bh(bh_result, sb, phys); 75 map_bh(bh_result, sb, phys);
75 } 76 }
76 return 0; 77 return 0;
@@ -80,7 +81,7 @@ static int bfs_get_block(struct inode * inode, sector_t block,
80 of blocks allocated for this file, we can grant it */ 81 of blocks allocated for this file, we can grant it */
81 if (inode->i_size && phys <= bi->i_eblock) { 82 if (inode->i_size && phys <= bi->i_eblock) {
82 dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n", 83 dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n",
83 create, block, phys); 84 create, (unsigned long)block, phys);
84 map_bh(bh_result, sb, phys); 85 map_bh(bh_result, sb, phys);
85 return 0; 86 return 0;
86 } 87 }
@@ -88,11 +89,12 @@ static int bfs_get_block(struct inode * inode, sector_t block,
88 /* the rest has to be protected against itself */ 89 /* the rest has to be protected against itself */
89 lock_kernel(); 90 lock_kernel();
90 91
91 /* if the last data block for this file is the last allocated block, we can 92 /* if the last data block for this file is the last allocated
92 extend the file trivially, without moving it anywhere */ 93 block, we can extend the file trivially, without moving it
94 anywhere */
93 if (bi->i_eblock == info->si_lf_eblk) { 95 if (bi->i_eblock == info->si_lf_eblk) {
94 dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n", 96 dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n",
95 create, block, phys); 97 create, (unsigned long)block, phys);
96 map_bh(bh_result, sb, phys); 98 map_bh(bh_result, sb, phys);
97 info->si_freeb -= phys - bi->i_eblock; 99 info->si_freeb -= phys - bi->i_eblock;
98 info->si_lf_eblk = bi->i_eblock = phys; 100 info->si_lf_eblk = bi->i_eblock = phys;
@@ -114,7 +116,8 @@ static int bfs_get_block(struct inode * inode, sector_t block,
114 } else 116 } else
115 err = 0; 117 err = 0;
116 118
117 dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", create, block, phys); 119 dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n",
120 create, (unsigned long)block, phys);
118 bi->i_sblock = phys; 121 bi->i_sblock = phys;
119 phys += block; 122 phys += block;
120 info->si_lf_eblk = bi->i_eblock = phys; 123 info->si_lf_eblk = bi->i_eblock = phys;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 64e0fb33fc0c..c7b39aa279d7 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -3,6 +3,8 @@
3 * BFS superblock and inode operations. 3 * BFS superblock and inode operations.
4 * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> 4 * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
5 * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. 5 * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
6 *
7 * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
6 */ 8 */
7 9
8#include <linux/module.h> 10#include <linux/module.h>
@@ -54,46 +56,50 @@ static void bfs_read_inode(struct inode * inode)
54 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; 56 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
55 di = (struct bfs_inode *)bh->b_data + off; 57 di = (struct bfs_inode *)bh->b_data + off;
56 58
57 inode->i_mode = 0x0000FFFF & di->i_mode; 59 inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode);
58 if (di->i_vtype == BFS_VDIR) { 60 if (le32_to_cpu(di->i_vtype) == BFS_VDIR) {
59 inode->i_mode |= S_IFDIR; 61 inode->i_mode |= S_IFDIR;
60 inode->i_op = &bfs_dir_inops; 62 inode->i_op = &bfs_dir_inops;
61 inode->i_fop = &bfs_dir_operations; 63 inode->i_fop = &bfs_dir_operations;
62 } else if (di->i_vtype == BFS_VREG) { 64 } else if (le32_to_cpu(di->i_vtype) == BFS_VREG) {
63 inode->i_mode |= S_IFREG; 65 inode->i_mode |= S_IFREG;
64 inode->i_op = &bfs_file_inops; 66 inode->i_op = &bfs_file_inops;
65 inode->i_fop = &bfs_file_operations; 67 inode->i_fop = &bfs_file_operations;
66 inode->i_mapping->a_ops = &bfs_aops; 68 inode->i_mapping->a_ops = &bfs_aops;
67 } 69 }
68 70
69 inode->i_uid = di->i_uid; 71 BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock);
70 inode->i_gid = di->i_gid; 72 BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock);
71 inode->i_nlink = di->i_nlink; 73 inode->i_uid = le32_to_cpu(di->i_uid);
74 inode->i_gid = le32_to_cpu(di->i_gid);
75 inode->i_nlink = le32_to_cpu(di->i_nlink);
72 inode->i_size = BFS_FILESIZE(di); 76 inode->i_size = BFS_FILESIZE(di);
73 inode->i_blocks = BFS_FILEBLOCKS(di); 77 inode->i_blocks = BFS_FILEBLOCKS(di);
78 if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks);
74 inode->i_blksize = PAGE_SIZE; 79 inode->i_blksize = PAGE_SIZE;
75 inode->i_atime.tv_sec = di->i_atime; 80 inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
76 inode->i_mtime.tv_sec = di->i_mtime; 81 inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime);
77 inode->i_ctime.tv_sec = di->i_ctime; 82 inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime);
78 inode->i_atime.tv_nsec = 0; 83 inode->i_atime.tv_nsec = 0;
79 inode->i_mtime.tv_nsec = 0; 84 inode->i_mtime.tv_nsec = 0;
80 inode->i_ctime.tv_nsec = 0; 85 inode->i_ctime.tv_nsec = 0;
81 BFS_I(inode)->i_dsk_ino = di->i_ino; /* can be 0 so we store a copy */ 86 BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */
82 BFS_I(inode)->i_sblock = di->i_sblock;
83 BFS_I(inode)->i_eblock = di->i_eblock;
84 87
85 brelse(bh); 88 brelse(bh);
86} 89}
87 90
88static int bfs_write_inode(struct inode * inode, int unused) 91static int bfs_write_inode(struct inode * inode, int unused)
89{ 92{
90 unsigned long ino = inode->i_ino; 93 unsigned int ino = (u16)inode->i_ino;
94 unsigned long i_sblock;
91 struct bfs_inode * di; 95 struct bfs_inode * di;
92 struct buffer_head * bh; 96 struct buffer_head * bh;
93 int block, off; 97 int block, off;
94 98
99 dprintf("ino=%08x\n", ino);
100
95 if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { 101 if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) {
96 printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino); 102 printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino);
97 return -EIO; 103 return -EIO;
98 } 104 }
99 105
@@ -101,7 +107,7 @@ static int bfs_write_inode(struct inode * inode, int unused)
101 block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; 107 block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1;
102 bh = sb_bread(inode->i_sb, block); 108 bh = sb_bread(inode->i_sb, block);
103 if (!bh) { 109 if (!bh) {
104 printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); 110 printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino);
105 unlock_kernel(); 111 unlock_kernel();
106 return -EIO; 112 return -EIO;
107 } 113 }
@@ -109,24 +115,26 @@ static int bfs_write_inode(struct inode * inode, int unused)
109 off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; 115 off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
110 di = (struct bfs_inode *)bh->b_data + off; 116 di = (struct bfs_inode *)bh->b_data + off;
111 117
112 if (inode->i_ino == BFS_ROOT_INO) 118 if (ino == BFS_ROOT_INO)
113 di->i_vtype = BFS_VDIR; 119 di->i_vtype = cpu_to_le32(BFS_VDIR);
114 else 120 else
115 di->i_vtype = BFS_VREG; 121 di->i_vtype = cpu_to_le32(BFS_VREG);
116 122
117 di->i_ino = inode->i_ino; 123 di->i_ino = cpu_to_le16(ino);
118 di->i_mode = inode->i_mode; 124 di->i_mode = cpu_to_le32(inode->i_mode);
119 di->i_uid = inode->i_uid; 125 di->i_uid = cpu_to_le32(inode->i_uid);
120 di->i_gid = inode->i_gid; 126 di->i_gid = cpu_to_le32(inode->i_gid);
121 di->i_nlink = inode->i_nlink; 127 di->i_nlink = cpu_to_le32(inode->i_nlink);
122 di->i_atime = inode->i_atime.tv_sec; 128 di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
123 di->i_mtime = inode->i_mtime.tv_sec; 129 di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
124 di->i_ctime = inode->i_ctime.tv_sec; 130 di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
125 di->i_sblock = BFS_I(inode)->i_sblock; 131 i_sblock = BFS_I(inode)->i_sblock;
126 di->i_eblock = BFS_I(inode)->i_eblock; 132 di->i_sblock = cpu_to_le32(i_sblock);
127 di->i_eoffset = di->i_sblock * BFS_BSIZE + inode->i_size - 1; 133 di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
134 di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
128 135
129 mark_buffer_dirty(bh); 136 mark_buffer_dirty(bh);
137 dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off);
130 brelse(bh); 138 brelse(bh);
131 unlock_kernel(); 139 unlock_kernel();
132 return 0; 140 return 0;
@@ -140,11 +148,14 @@ static void bfs_delete_inode(struct inode * inode)
140 int block, off; 148 int block, off;
141 struct super_block * s = inode->i_sb; 149 struct super_block * s = inode->i_sb;
142 struct bfs_sb_info * info = BFS_SB(s); 150 struct bfs_sb_info * info = BFS_SB(s);
151 struct bfs_inode_info * bi = BFS_I(inode);
143 152
144 dprintf("ino=%08lx\n", inode->i_ino); 153 dprintf("ino=%08lx\n", ino);
145 154
146 if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > info->si_lasti) { 155 truncate_inode_pages(&inode->i_data, 0);
147 printf("invalid ino=%08lx\n", inode->i_ino); 156
157 if (ino < BFS_ROOT_INO || ino > info->si_lasti) {
158 printf("invalid ino=%08lx\n", ino);
148 return; 159 return;
149 } 160 }
150 161
@@ -160,13 +171,13 @@ static void bfs_delete_inode(struct inode * inode)
160 return; 171 return;
161 } 172 }
162 off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; 173 off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
163 di = (struct bfs_inode *)bh->b_data + off; 174 di = (struct bfs_inode *) bh->b_data + off;
164 if (di->i_ino) { 175 if (bi->i_dsk_ino) {
165 info->si_freeb += BFS_FILEBLOCKS(di); 176 info->si_freeb += 1 + bi->i_eblock - bi->i_sblock;
166 info->si_freei++; 177 info->si_freei++;
167 clear_bit(di->i_ino, info->si_imap); 178 clear_bit(ino, info->si_imap);
168 dump_imap("delete_inode", s); 179 dump_imap("delete_inode", s);
169 } 180 }
170 di->i_ino = 0; 181 di->i_ino = 0;
171 di->i_sblock = 0; 182 di->i_sblock = 0;
172 mark_buffer_dirty(bh); 183 mark_buffer_dirty(bh);
@@ -272,14 +283,14 @@ static struct super_operations bfs_sops = {
272 283
273void dump_imap(const char *prefix, struct super_block * s) 284void dump_imap(const char *prefix, struct super_block * s)
274{ 285{
275#if 0 286#ifdef DEBUG
276 int i; 287 int i;
277 char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL); 288 char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL);
278 289
279 if (!tmpbuf) 290 if (!tmpbuf)
280 return; 291 return;
281 for (i=BFS_SB(s)->si_lasti; i>=0; i--) { 292 for (i=BFS_SB(s)->si_lasti; i>=0; i--) {
282 if (i>PAGE_SIZE-100) break; 293 if (i > PAGE_SIZE-100) break;
283 if (test_bit(i, BFS_SB(s)->si_imap)) 294 if (test_bit(i, BFS_SB(s)->si_imap))
284 strcat(tmpbuf, "1"); 295 strcat(tmpbuf, "1");
285 else 296 else
@@ -295,7 +306,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
295 struct buffer_head * bh; 306 struct buffer_head * bh;
296 struct bfs_super_block * bfs_sb; 307 struct bfs_super_block * bfs_sb;
297 struct inode * inode; 308 struct inode * inode;
298 int i, imap_len; 309 unsigned i, imap_len;
299 struct bfs_sb_info * info; 310 struct bfs_sb_info * info;
300 311
301 info = kmalloc(sizeof(*info), GFP_KERNEL); 312 info = kmalloc(sizeof(*info), GFP_KERNEL);
@@ -310,19 +321,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
310 if(!bh) 321 if(!bh)
311 goto out; 322 goto out;
312 bfs_sb = (struct bfs_super_block *)bh->b_data; 323 bfs_sb = (struct bfs_super_block *)bh->b_data;
313 if (bfs_sb->s_magic != BFS_MAGIC) { 324 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
314 if (!silent) 325 if (!silent)
315 printf("No BFS filesystem on %s (magic=%08x)\n", 326 printf("No BFS filesystem on %s (magic=%08x)\n",
316 s->s_id, bfs_sb->s_magic); 327 s->s_id, le32_to_cpu(bfs_sb->s_magic));
317 goto out; 328 goto out;
318 } 329 }
319 if (BFS_UNCLEAN(bfs_sb, s) && !silent) 330 if (BFS_UNCLEAN(bfs_sb, s) && !silent)
320 printf("%s is unclean, continuing\n", s->s_id); 331 printf("%s is unclean, continuing\n", s->s_id);
321 332
322 s->s_magic = BFS_MAGIC; 333 s->s_magic = BFS_MAGIC;
323 info->si_bfs_sb = bfs_sb;
324 info->si_sbh = bh; 334 info->si_sbh = bh;
325 info->si_lasti = (bfs_sb->s_start - BFS_BSIZE)/sizeof(struct bfs_inode) 335 info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode)
326 + BFS_ROOT_INO - 1; 336 + BFS_ROOT_INO - 1;
327 337
328 imap_len = info->si_lasti/8 + 1; 338 imap_len = info->si_lasti/8 + 1;
@@ -346,8 +356,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
346 goto out; 356 goto out;
347 } 357 }
348 358
349 info->si_blocks = (bfs_sb->s_end + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ 359 info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
350 info->si_freeb = (bfs_sb->s_end + 1 - bfs_sb->s_start)>>BFS_BSIZE_BITS; 360 info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS;
351 info->si_freei = 0; 361 info->si_freei = 0;
352 info->si_lf_eblk = 0; 362 info->si_lf_eblk = 0;
353 info->si_lf_sblk = 0; 363 info->si_lf_sblk = 0;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index c8998dc66882..7974efa107bc 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -520,7 +520,7 @@ static int load_flat_file(struct linux_binprm * bprm,
520 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); 520 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
521 521
522 down_write(&current->mm->mmap_sem); 522 down_write(&current->mm->mmap_sem);
523 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_SHARED, 0); 523 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0);
524 up_write(&current->mm->mmap_sem); 524 up_write(&current->mm->mmap_sem);
525 if (!textpos || textpos >= (unsigned long) -4096) { 525 if (!textpos || textpos >= (unsigned long) -4096) {
526 if (!textpos) 526 if (!textpos)
diff --git a/fs/bio.c b/fs/bio.c
index 1f2d4649b188..83a349574567 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -25,6 +25,7 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <scsi/sg.h> /* for struct sg_iovec */
28 29
29#define BIO_POOL_SIZE 256 30#define BIO_POOL_SIZE 256
30 31
@@ -104,18 +105,22 @@ static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int
104 return bvl; 105 return bvl;
105} 106}
106 107
107/* 108void bio_free(struct bio *bio, struct bio_set *bio_set)
108 * default destructor for a bio allocated with bio_alloc_bioset()
109 */
110static void bio_destructor(struct bio *bio)
111{ 109{
112 const int pool_idx = BIO_POOL_IDX(bio); 110 const int pool_idx = BIO_POOL_IDX(bio);
113 struct bio_set *bs = bio->bi_set;
114 111
115 BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); 112 BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
116 113
117 mempool_free(bio->bi_io_vec, bs->bvec_pools[pool_idx]); 114 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
118 mempool_free(bio, bs->bio_pool); 115 mempool_free(bio, bio_set->bio_pool);
116}
117
118/*
119 * default destructor for a bio allocated with bio_alloc_bioset()
120 */
121static void bio_fs_destructor(struct bio *bio)
122{
123 bio_free(bio, fs_bio_set);
119} 124}
120 125
121inline void bio_init(struct bio *bio) 126inline void bio_init(struct bio *bio)
@@ -171,8 +176,6 @@ struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, stru
171 bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; 176 bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
172 } 177 }
173 bio->bi_io_vec = bvl; 178 bio->bi_io_vec = bvl;
174 bio->bi_destructor = bio_destructor;
175 bio->bi_set = bs;
176 } 179 }
177out: 180out:
178 return bio; 181 return bio;
@@ -180,7 +183,12 @@ out:
180 183
181struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) 184struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs)
182{ 185{
183 return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); 186 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
187
188 if (bio)
189 bio->bi_destructor = bio_fs_destructor;
190
191 return bio;
184} 192}
185 193
186void zero_fill_bio(struct bio *bio) 194void zero_fill_bio(struct bio *bio)
@@ -273,8 +281,10 @@ struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask)
273{ 281{
274 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); 282 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
275 283
276 if (b) 284 if (b) {
285 b->bi_destructor = bio_fs_destructor;
277 __bio_clone(b, bio); 286 __bio_clone(b, bio);
287 }
278 288
279 return b; 289 return b;
280} 290}
@@ -546,22 +556,34 @@ out_bmd:
546 return ERR_PTR(ret); 556 return ERR_PTR(ret);
547} 557}
548 558
549static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, 559static struct bio *__bio_map_user_iov(request_queue_t *q,
550 unsigned long uaddr, unsigned int len, 560 struct block_device *bdev,
551 int write_to_vm) 561 struct sg_iovec *iov, int iov_count,
562 int write_to_vm)
552{ 563{
553 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 564 int i, j;
554 unsigned long start = uaddr >> PAGE_SHIFT; 565 int nr_pages = 0;
555 const int nr_pages = end - start;
556 int ret, offset, i;
557 struct page **pages; 566 struct page **pages;
558 struct bio *bio; 567 struct bio *bio;
568 int cur_page = 0;
569 int ret, offset;
559 570
560 /* 571 for (i = 0; i < iov_count; i++) {
561 * transfer and buffer must be aligned to at least hardsector 572 unsigned long uaddr = (unsigned long)iov[i].iov_base;
562 * size for now, in the future we can relax this restriction 573 unsigned long len = iov[i].iov_len;
563 */ 574 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
564 if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) 575 unsigned long start = uaddr >> PAGE_SHIFT;
576
577 nr_pages += end - start;
578 /*
579 * transfer and buffer must be aligned to at least hardsector
580 * size for now, in the future we can relax this restriction
581 */
582 if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
583 return ERR_PTR(-EINVAL);
584 }
585
586 if (!nr_pages)
565 return ERR_PTR(-EINVAL); 587 return ERR_PTR(-EINVAL);
566 588
567 bio = bio_alloc(GFP_KERNEL, nr_pages); 589 bio = bio_alloc(GFP_KERNEL, nr_pages);
@@ -573,42 +595,54 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
573 if (!pages) 595 if (!pages)
574 goto out; 596 goto out;
575 597
576 down_read(&current->mm->mmap_sem); 598 memset(pages, 0, nr_pages * sizeof(struct page *));
577 ret = get_user_pages(current, current->mm, uaddr, nr_pages, 599
578 write_to_vm, 0, pages, NULL); 600 for (i = 0; i < iov_count; i++) {
579 up_read(&current->mm->mmap_sem); 601 unsigned long uaddr = (unsigned long)iov[i].iov_base;
580 602 unsigned long len = iov[i].iov_len;
581 if (ret < nr_pages) 603 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
582 goto out; 604 unsigned long start = uaddr >> PAGE_SHIFT;
583 605 const int local_nr_pages = end - start;
584 bio->bi_bdev = bdev; 606 const int page_limit = cur_page + local_nr_pages;
585 607
586 offset = uaddr & ~PAGE_MASK; 608 down_read(&current->mm->mmap_sem);
587 for (i = 0; i < nr_pages; i++) { 609 ret = get_user_pages(current, current->mm, uaddr,
588 unsigned int bytes = PAGE_SIZE - offset; 610 local_nr_pages,
589 611 write_to_vm, 0, &pages[cur_page], NULL);
590 if (len <= 0) 612 up_read(&current->mm->mmap_sem);
591 break; 613
592 614 if (ret < local_nr_pages)
593 if (bytes > len) 615 goto out_unmap;
594 bytes = len; 616
617
618 offset = uaddr & ~PAGE_MASK;
619 for (j = cur_page; j < page_limit; j++) {
620 unsigned int bytes = PAGE_SIZE - offset;
621
622 if (len <= 0)
623 break;
624
625 if (bytes > len)
626 bytes = len;
627
628 /*
629 * sorry...
630 */
631 if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes)
632 break;
633
634 len -= bytes;
635 offset = 0;
636 }
595 637
638 cur_page = j;
596 /* 639 /*
597 * sorry... 640 * release the pages we didn't map into the bio, if any
598 */ 641 */
599 if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes) 642 while (j < page_limit)
600 break; 643 page_cache_release(pages[j++]);
601
602 len -= bytes;
603 offset = 0;
604 } 644 }
605 645
606 /*
607 * release the pages we didn't map into the bio, if any
608 */
609 while (i < nr_pages)
610 page_cache_release(pages[i++]);
611
612 kfree(pages); 646 kfree(pages);
613 647
614 /* 648 /*
@@ -617,9 +651,17 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
617 if (!write_to_vm) 651 if (!write_to_vm)
618 bio->bi_rw |= (1 << BIO_RW); 652 bio->bi_rw |= (1 << BIO_RW);
619 653
654 bio->bi_bdev = bdev;
620 bio->bi_flags |= (1 << BIO_USER_MAPPED); 655 bio->bi_flags |= (1 << BIO_USER_MAPPED);
621 return bio; 656 return bio;
622out: 657
658 out_unmap:
659 for (i = 0; i < nr_pages; i++) {
660 if(!pages[i])
661 break;
662 page_cache_release(pages[i]);
663 }
664 out:
623 kfree(pages); 665 kfree(pages);
624 bio_put(bio); 666 bio_put(bio);
625 return ERR_PTR(ret); 667 return ERR_PTR(ret);
@@ -639,9 +681,33 @@ out:
639struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, 681struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
640 unsigned long uaddr, unsigned int len, int write_to_vm) 682 unsigned long uaddr, unsigned int len, int write_to_vm)
641{ 683{
684 struct sg_iovec iov;
685
686 iov.iov_base = (void __user *)uaddr;
687 iov.iov_len = len;
688
689 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm);
690}
691
692/**
693 * bio_map_user_iov - map user sg_iovec table into bio
694 * @q: the request_queue_t for the bio
695 * @bdev: destination block device
696 * @iov: the iovec.
697 * @iov_count: number of elements in the iovec
698 * @write_to_vm: bool indicating writing to pages or not
699 *
700 * Map the user space address into a bio suitable for io to a block
701 * device. Returns an error pointer in case of error.
702 */
703struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev,
704 struct sg_iovec *iov, int iov_count,
705 int write_to_vm)
706{
642 struct bio *bio; 707 struct bio *bio;
708 int len = 0, i;
643 709
644 bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm); 710 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm);
645 711
646 if (IS_ERR(bio)) 712 if (IS_ERR(bio))
647 return bio; 713 return bio;
@@ -654,6 +720,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
654 */ 720 */
655 bio_get(bio); 721 bio_get(bio);
656 722
723 for (i = 0; i < iov_count; i++)
724 len += iov[i].iov_len;
725
657 if (bio->bi_size == len) 726 if (bio->bi_size == len)
658 return bio; 727 return bio;
659 728
@@ -698,6 +767,82 @@ void bio_unmap_user(struct bio *bio)
698 bio_put(bio); 767 bio_put(bio);
699} 768}
700 769
770static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err)
771{
772 if (bio->bi_size)
773 return 1;
774
775 bio_put(bio);
776 return 0;
777}
778
779
780static struct bio *__bio_map_kern(request_queue_t *q, void *data,
781 unsigned int len, unsigned int gfp_mask)
782{
783 unsigned long kaddr = (unsigned long)data;
784 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
785 unsigned long start = kaddr >> PAGE_SHIFT;
786 const int nr_pages = end - start;
787 int offset, i;
788 struct bio *bio;
789
790 bio = bio_alloc(gfp_mask, nr_pages);
791 if (!bio)
792 return ERR_PTR(-ENOMEM);
793
794 offset = offset_in_page(kaddr);
795 for (i = 0; i < nr_pages; i++) {
796 unsigned int bytes = PAGE_SIZE - offset;
797
798 if (len <= 0)
799 break;
800
801 if (bytes > len)
802 bytes = len;
803
804 if (__bio_add_page(q, bio, virt_to_page(data), bytes,
805 offset) < bytes)
806 break;
807
808 data += bytes;
809 len -= bytes;
810 offset = 0;
811 }
812
813 bio->bi_end_io = bio_map_kern_endio;
814 return bio;
815}
816
817/**
818 * bio_map_kern - map kernel address into bio
819 * @q: the request_queue_t for the bio
820 * @data: pointer to buffer to map
821 * @len: length in bytes
822 * @gfp_mask: allocation flags for bio allocation
823 *
824 * Map the kernel address into a bio suitable for io to a block
825 * device. Returns an error pointer in case of error.
826 */
827struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len,
828 unsigned int gfp_mask)
829{
830 struct bio *bio;
831
832 bio = __bio_map_kern(q, data, len, gfp_mask);
833 if (IS_ERR(bio))
834 return bio;
835
836 if (bio->bi_size == len)
837 return bio;
838
839 /*
840 * Don't support partial mappings.
841 */
842 bio_put(bio);
843 return ERR_PTR(-EINVAL);
844}
845
701/* 846/*
702 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions 847 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
703 * for performing direct-IO in BIOs. 848 * for performing direct-IO in BIOs.
@@ -1075,6 +1220,7 @@ subsys_initcall(init_bio);
1075 1220
1076EXPORT_SYMBOL(bio_alloc); 1221EXPORT_SYMBOL(bio_alloc);
1077EXPORT_SYMBOL(bio_put); 1222EXPORT_SYMBOL(bio_put);
1223EXPORT_SYMBOL(bio_free);
1078EXPORT_SYMBOL(bio_endio); 1224EXPORT_SYMBOL(bio_endio);
1079EXPORT_SYMBOL(bio_init); 1225EXPORT_SYMBOL(bio_init);
1080EXPORT_SYMBOL(__bio_clone); 1226EXPORT_SYMBOL(__bio_clone);
@@ -1085,6 +1231,7 @@ EXPORT_SYMBOL(bio_add_page);
1085EXPORT_SYMBOL(bio_get_nr_vecs); 1231EXPORT_SYMBOL(bio_get_nr_vecs);
1086EXPORT_SYMBOL(bio_map_user); 1232EXPORT_SYMBOL(bio_map_user);
1087EXPORT_SYMBOL(bio_unmap_user); 1233EXPORT_SYMBOL(bio_unmap_user);
1234EXPORT_SYMBOL(bio_map_kern);
1088EXPORT_SYMBOL(bio_pair_release); 1235EXPORT_SYMBOL(bio_pair_release);
1089EXPORT_SYMBOL(bio_split); 1236EXPORT_SYMBOL(bio_split);
1090EXPORT_SYMBOL(bio_split_pool); 1237EXPORT_SYMBOL(bio_split_pool);
diff --git a/fs/buffer.c b/fs/buffer.c
index 6a25d7df89b1..6cbfceabd95d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -40,6 +40,7 @@
40#include <linux/cpu.h> 40#include <linux/cpu.h>
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/mpage.h> 42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h>
43 44
44static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
45static void invalidate_bh_lrus(void); 46static void invalidate_bh_lrus(void);
@@ -917,8 +918,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
917 * contents - it is a noop if I/O is still in 918 * contents - it is a noop if I/O is still in
918 * flight on potentially older contents. 919 * flight on potentially older contents.
919 */ 920 */
920 wait_on_buffer(bh); 921 ll_rw_block(SWRITE, 1, &bh);
921 ll_rw_block(WRITE, 1, &bh);
922 brelse(bh); 922 brelse(bh);
923 spin_lock(lock); 923 spin_lock(lock);
924 } 924 }
@@ -2793,21 +2793,22 @@ int submit_bh(int rw, struct buffer_head * bh)
2793 2793
2794/** 2794/**
2795 * ll_rw_block: low-level access to block devices (DEPRECATED) 2795 * ll_rw_block: low-level access to block devices (DEPRECATED)
2796 * @rw: whether to %READ or %WRITE or maybe %READA (readahead) 2796 * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
2797 * @nr: number of &struct buffer_heads in the array 2797 * @nr: number of &struct buffer_heads in the array
2798 * @bhs: array of pointers to &struct buffer_head 2798 * @bhs: array of pointers to &struct buffer_head
2799 * 2799 *
2800 * ll_rw_block() takes an array of pointers to &struct buffer_heads, 2800 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
2801 * and requests an I/O operation on them, either a %READ or a %WRITE. 2801 * requests an I/O operation on them, either a %READ or a %WRITE. The third
2802 * The third %READA option is described in the documentation for 2802 * %SWRITE is like %WRITE only we make sure that the *current* data in buffers
2803 * generic_make_request() which ll_rw_block() calls. 2803 * are sent to disk. The fourth %READA option is described in the documentation
2804 * for generic_make_request() which ll_rw_block() calls.
2804 * 2805 *
2805 * This function drops any buffer that it cannot get a lock on (with the 2806 * This function drops any buffer that it cannot get a lock on (with the
2806 * BH_Lock state bit), any buffer that appears to be clean when doing a 2807 * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
2807 * write request, and any buffer that appears to be up-to-date when doing 2808 * clean when doing a write request, and any buffer that appears to be
2808 * read request. Further it marks as clean buffers that are processed for 2809 * up-to-date when doing read request. Further it marks as clean buffers that
2809 * writing (the buffer cache won't assume that they are actually clean until 2810 * are processed for writing (the buffer cache won't assume that they are
2810 * the buffer gets unlocked). 2811 * actually clean until the buffer gets unlocked).
2811 * 2812 *
2812 * ll_rw_block sets b_end_io to simple completion handler that marks 2813 * ll_rw_block sets b_end_io to simple completion handler that marks
2813 * the buffer up-to-date (if approriate), unlocks the buffer and wakes 2814 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -2823,11 +2824,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2823 for (i = 0; i < nr; i++) { 2824 for (i = 0; i < nr; i++) {
2824 struct buffer_head *bh = bhs[i]; 2825 struct buffer_head *bh = bhs[i];
2825 2826
2826 if (test_set_buffer_locked(bh)) 2827 if (rw == SWRITE)
2828 lock_buffer(bh);
2829 else if (test_set_buffer_locked(bh))
2827 continue; 2830 continue;
2828 2831
2829 get_bh(bh); 2832 get_bh(bh);
2830 if (rw == WRITE) { 2833 if (rw == WRITE || rw == SWRITE) {
2831 if (test_clear_buffer_dirty(bh)) { 2834 if (test_clear_buffer_dirty(bh)) {
2832 bh->b_end_io = end_buffer_write_sync; 2835 bh->b_end_io = end_buffer_write_sync;
2833 submit_bh(WRITE, bh); 2836 submit_bh(WRITE, bh);
@@ -3046,10 +3049,9 @@ struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags)
3046{ 3049{
3047 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); 3050 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
3048 if (ret) { 3051 if (ret) {
3049 preempt_disable(); 3052 get_cpu_var(bh_accounting).nr++;
3050 __get_cpu_var(bh_accounting).nr++;
3051 recalc_bh_state(); 3053 recalc_bh_state();
3052 preempt_enable(); 3054 put_cpu_var(bh_accounting);
3053 } 3055 }
3054 return ret; 3056 return ret;
3055} 3057}
@@ -3059,10 +3061,9 @@ void free_buffer_head(struct buffer_head *bh)
3059{ 3061{
3060 BUG_ON(!list_empty(&bh->b_assoc_buffers)); 3062 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3061 kmem_cache_free(bh_cachep, bh); 3063 kmem_cache_free(bh_cachep, bh);
3062 preempt_disable(); 3064 get_cpu_var(bh_accounting).nr--;
3063 __get_cpu_var(bh_accounting).nr--;
3064 recalc_bh_state(); 3065 recalc_bh_state();
3065 preempt_enable(); 3066 put_cpu_var(bh_accounting);
3066} 3067}
3067EXPORT_SYMBOL(free_buffer_head); 3068EXPORT_SYMBOL(free_buffer_head);
3068 3069
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e568cc47a7f9..2335f14a1583 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -836,7 +836,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
836 /* go from value to value + temp_len condensing 836 /* go from value to value + temp_len condensing
837 double commas to singles. Note that this ends up 837 double commas to singles. Note that this ends up
838 allocating a few bytes too many, which is ok */ 838 allocating a few bytes too many, which is ok */
839 vol->password = kcalloc(1, temp_len, GFP_KERNEL); 839 vol->password = kzalloc(temp_len, GFP_KERNEL);
840 if(vol->password == NULL) { 840 if(vol->password == NULL) {
841 printk("CIFS: no memory for pass\n"); 841 printk("CIFS: no memory for pass\n");
842 return 1; 842 return 1;
@@ -851,7 +851,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
851 } 851 }
852 vol->password[j] = 0; 852 vol->password[j] = 0;
853 } else { 853 } else {
854 vol->password = kcalloc(1, temp_len+1, GFP_KERNEL); 854 vol->password = kzalloc(temp_len+1, GFP_KERNEL);
855 if(vol->password == NULL) { 855 if(vol->password == NULL) {
856 printk("CIFS: no memory for pass\n"); 856 printk("CIFS: no memory for pass\n");
857 return 1; 857 return 1;
@@ -1317,7 +1317,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1317 sessinit is sent but no second negprot */ 1317 sessinit is sent but no second negprot */
1318 struct rfc1002_session_packet * ses_init_buf; 1318 struct rfc1002_session_packet * ses_init_buf;
1319 struct smb_hdr * smb_buf; 1319 struct smb_hdr * smb_buf;
1320 ses_init_buf = kcalloc(1, sizeof(struct rfc1002_session_packet), GFP_KERNEL); 1320 ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet), GFP_KERNEL);
1321 if(ses_init_buf) { 1321 if(ses_init_buf) {
1322 ses_init_buf->trailer.session_req.called_len = 32; 1322 ses_init_buf->trailer.session_req.called_len = 32;
1323 rfc1002mangle(ses_init_buf->trailer.session_req.called_name, 1323 rfc1002mangle(ses_init_buf->trailer.session_req.called_name,
@@ -1964,7 +1964,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
1964/* We look for obvious messed up bcc or strings in response so we do not go off 1964/* We look for obvious messed up bcc or strings in response so we do not go off
1965 the end since (at least) WIN2K and Windows XP have a major bug in not null 1965 the end since (at least) WIN2K and Windows XP have a major bug in not null
1966 terminating last Unicode string in response */ 1966 terminating last Unicode string in response */
1967 ses->serverOS = kcalloc(1, 2 * (len + 1), GFP_KERNEL); 1967 ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL);
1968 if(ses->serverOS == NULL) 1968 if(ses->serverOS == NULL)
1969 goto sesssetup_nomem; 1969 goto sesssetup_nomem;
1970 cifs_strfromUCS_le(ses->serverOS, 1970 cifs_strfromUCS_le(ses->serverOS,
@@ -1976,7 +1976,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
1976 if (remaining_words > 0) { 1976 if (remaining_words > 0) {
1977 len = UniStrnlen((wchar_t *)bcc_ptr, 1977 len = UniStrnlen((wchar_t *)bcc_ptr,
1978 remaining_words-1); 1978 remaining_words-1);
1979 ses->serverNOS = kcalloc(1, 2 * (len + 1),GFP_KERNEL); 1979 ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL);
1980 if(ses->serverNOS == NULL) 1980 if(ses->serverNOS == NULL)
1981 goto sesssetup_nomem; 1981 goto sesssetup_nomem;
1982 cifs_strfromUCS_le(ses->serverNOS, 1982 cifs_strfromUCS_le(ses->serverNOS,
@@ -1994,7 +1994,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
1994 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 1994 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
1995 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 1995 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
1996 ses->serverDomain = 1996 ses->serverDomain =
1997 kcalloc(1, 2*(len+1),GFP_KERNEL); 1997 kzalloc(2*(len+1),GFP_KERNEL);
1998 if(ses->serverDomain == NULL) 1998 if(ses->serverDomain == NULL)
1999 goto sesssetup_nomem; 1999 goto sesssetup_nomem;
2000 cifs_strfromUCS_le(ses->serverDomain, 2000 cifs_strfromUCS_le(ses->serverDomain,
@@ -2005,22 +2005,22 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2005 } /* else no more room so create dummy domain string */ 2005 } /* else no more room so create dummy domain string */
2006 else 2006 else
2007 ses->serverDomain = 2007 ses->serverDomain =
2008 kcalloc(1, 2, GFP_KERNEL); 2008 kzalloc(2, GFP_KERNEL);
2009 } else { /* no room so create dummy domain and NOS string */ 2009 } else { /* no room so create dummy domain and NOS string */
2010 /* if these kcallocs fail not much we 2010 /* if these kcallocs fail not much we
2011 can do, but better to not fail the 2011 can do, but better to not fail the
2012 sesssetup itself */ 2012 sesssetup itself */
2013 ses->serverDomain = 2013 ses->serverDomain =
2014 kcalloc(1, 2, GFP_KERNEL); 2014 kzalloc(2, GFP_KERNEL);
2015 ses->serverNOS = 2015 ses->serverNOS =
2016 kcalloc(1, 2, GFP_KERNEL); 2016 kzalloc(2, GFP_KERNEL);
2017 } 2017 }
2018 } else { /* ASCII */ 2018 } else { /* ASCII */
2019 len = strnlen(bcc_ptr, 1024); 2019 len = strnlen(bcc_ptr, 1024);
2020 if (((long) bcc_ptr + len) - (long) 2020 if (((long) bcc_ptr + len) - (long)
2021 pByteArea(smb_buffer_response) 2021 pByteArea(smb_buffer_response)
2022 <= BCC(smb_buffer_response)) { 2022 <= BCC(smb_buffer_response)) {
2023 ses->serverOS = kcalloc(1, len + 1,GFP_KERNEL); 2023 ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
2024 if(ses->serverOS == NULL) 2024 if(ses->serverOS == NULL)
2025 goto sesssetup_nomem; 2025 goto sesssetup_nomem;
2026 strncpy(ses->serverOS,bcc_ptr, len); 2026 strncpy(ses->serverOS,bcc_ptr, len);
@@ -2030,7 +2030,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2030 bcc_ptr++; 2030 bcc_ptr++;
2031 2031
2032 len = strnlen(bcc_ptr, 1024); 2032 len = strnlen(bcc_ptr, 1024);
2033 ses->serverNOS = kcalloc(1, len + 1,GFP_KERNEL); 2033 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
2034 if(ses->serverNOS == NULL) 2034 if(ses->serverNOS == NULL)
2035 goto sesssetup_nomem; 2035 goto sesssetup_nomem;
2036 strncpy(ses->serverNOS, bcc_ptr, len); 2036 strncpy(ses->serverNOS, bcc_ptr, len);
@@ -2039,7 +2039,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2039 bcc_ptr++; 2039 bcc_ptr++;
2040 2040
2041 len = strnlen(bcc_ptr, 1024); 2041 len = strnlen(bcc_ptr, 1024);
2042 ses->serverDomain = kcalloc(1, len + 1,GFP_KERNEL); 2042 ses->serverDomain = kzalloc(len + 1,GFP_KERNEL);
2043 if(ses->serverDomain == NULL) 2043 if(ses->serverDomain == NULL)
2044 goto sesssetup_nomem; 2044 goto sesssetup_nomem;
2045 strncpy(ses->serverDomain, bcc_ptr, len); 2045 strncpy(ses->serverDomain, bcc_ptr, len);
@@ -2240,7 +2240,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2240 the end since (at least) WIN2K and Windows XP have a major bug in not null 2240 the end since (at least) WIN2K and Windows XP have a major bug in not null
2241 terminating last Unicode string in response */ 2241 terminating last Unicode string in response */
2242 ses->serverOS = 2242 ses->serverOS =
2243 kcalloc(1, 2 * (len + 1), GFP_KERNEL); 2243 kzalloc(2 * (len + 1), GFP_KERNEL);
2244 cifs_strfromUCS_le(ses->serverOS, 2244 cifs_strfromUCS_le(ses->serverOS,
2245 (wchar_t *) 2245 (wchar_t *)
2246 bcc_ptr, len, 2246 bcc_ptr, len,
@@ -2254,7 +2254,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2254 remaining_words 2254 remaining_words
2255 - 1); 2255 - 1);
2256 ses->serverNOS = 2256 ses->serverNOS =
2257 kcalloc(1, 2 * (len + 1), 2257 kzalloc(2 * (len + 1),
2258 GFP_KERNEL); 2258 GFP_KERNEL);
2259 cifs_strfromUCS_le(ses->serverNOS, 2259 cifs_strfromUCS_le(ses->serverNOS,
2260 (wchar_t *)bcc_ptr, 2260 (wchar_t *)bcc_ptr,
@@ -2267,7 +2267,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2267 if (remaining_words > 0) { 2267 if (remaining_words > 0) {
2268 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2268 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2269 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 2269 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
2270 ses->serverDomain = kcalloc(1, 2*(len+1),GFP_KERNEL); 2270 ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL);
2271 cifs_strfromUCS_le(ses->serverDomain, 2271 cifs_strfromUCS_le(ses->serverDomain,
2272 (wchar_t *)bcc_ptr, 2272 (wchar_t *)bcc_ptr,
2273 len, 2273 len,
@@ -2278,10 +2278,10 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2278 } /* else no more room so create dummy domain string */ 2278 } /* else no more room so create dummy domain string */
2279 else 2279 else
2280 ses->serverDomain = 2280 ses->serverDomain =
2281 kcalloc(1, 2,GFP_KERNEL); 2281 kzalloc(2,GFP_KERNEL);
2282 } else { /* no room so create dummy domain and NOS string */ 2282 } else { /* no room so create dummy domain and NOS string */
2283 ses->serverDomain = kcalloc(1, 2, GFP_KERNEL); 2283 ses->serverDomain = kzalloc(2, GFP_KERNEL);
2284 ses->serverNOS = kcalloc(1, 2, GFP_KERNEL); 2284 ses->serverNOS = kzalloc(2, GFP_KERNEL);
2285 } 2285 }
2286 } else { /* ASCII */ 2286 } else { /* ASCII */
2287 2287
@@ -2289,7 +2289,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2289 if (((long) bcc_ptr + len) - (long) 2289 if (((long) bcc_ptr + len) - (long)
2290 pByteArea(smb_buffer_response) 2290 pByteArea(smb_buffer_response)
2291 <= BCC(smb_buffer_response)) { 2291 <= BCC(smb_buffer_response)) {
2292 ses->serverOS = kcalloc(1, len + 1, GFP_KERNEL); 2292 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
2293 strncpy(ses->serverOS, bcc_ptr, len); 2293 strncpy(ses->serverOS, bcc_ptr, len);
2294 2294
2295 bcc_ptr += len; 2295 bcc_ptr += len;
@@ -2297,14 +2297,14 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2297 bcc_ptr++; 2297 bcc_ptr++;
2298 2298
2299 len = strnlen(bcc_ptr, 1024); 2299 len = strnlen(bcc_ptr, 1024);
2300 ses->serverNOS = kcalloc(1, len + 1,GFP_KERNEL); 2300 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
2301 strncpy(ses->serverNOS, bcc_ptr, len); 2301 strncpy(ses->serverNOS, bcc_ptr, len);
2302 bcc_ptr += len; 2302 bcc_ptr += len;
2303 bcc_ptr[0] = 0; 2303 bcc_ptr[0] = 0;
2304 bcc_ptr++; 2304 bcc_ptr++;
2305 2305
2306 len = strnlen(bcc_ptr, 1024); 2306 len = strnlen(bcc_ptr, 1024);
2307 ses->serverDomain = kcalloc(1, len + 1, GFP_KERNEL); 2307 ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
2308 strncpy(ses->serverDomain, bcc_ptr, len); 2308 strncpy(ses->serverDomain, bcc_ptr, len);
2309 bcc_ptr += len; 2309 bcc_ptr += len;
2310 bcc_ptr[0] = 0; 2310 bcc_ptr[0] = 0;
@@ -2554,7 +2554,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2554 the end since (at least) WIN2K and Windows XP have a major bug in not null 2554 the end since (at least) WIN2K and Windows XP have a major bug in not null
2555 terminating last Unicode string in response */ 2555 terminating last Unicode string in response */
2556 ses->serverOS = 2556 ses->serverOS =
2557 kcalloc(1, 2 * (len + 1), GFP_KERNEL); 2557 kzalloc(2 * (len + 1), GFP_KERNEL);
2558 cifs_strfromUCS_le(ses->serverOS, 2558 cifs_strfromUCS_le(ses->serverOS,
2559 (wchar_t *) 2559 (wchar_t *)
2560 bcc_ptr, len, 2560 bcc_ptr, len,
@@ -2569,7 +2569,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2569 remaining_words 2569 remaining_words
2570 - 1); 2570 - 1);
2571 ses->serverNOS = 2571 ses->serverNOS =
2572 kcalloc(1, 2 * (len + 1), 2572 kzalloc(2 * (len + 1),
2573 GFP_KERNEL); 2573 GFP_KERNEL);
2574 cifs_strfromUCS_le(ses-> 2574 cifs_strfromUCS_le(ses->
2575 serverNOS, 2575 serverNOS,
@@ -2586,7 +2586,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2586 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2586 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2587 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 2587 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
2588 ses->serverDomain = 2588 ses->serverDomain =
2589 kcalloc(1, 2 * 2589 kzalloc(2 *
2590 (len + 2590 (len +
2591 1), 2591 1),
2592 GFP_KERNEL); 2592 GFP_KERNEL);
@@ -2612,13 +2612,13 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2612 } /* else no more room so create dummy domain string */ 2612 } /* else no more room so create dummy domain string */
2613 else 2613 else
2614 ses->serverDomain = 2614 ses->serverDomain =
2615 kcalloc(1, 2, 2615 kzalloc(2,
2616 GFP_KERNEL); 2616 GFP_KERNEL);
2617 } else { /* no room so create dummy domain and NOS string */ 2617 } else { /* no room so create dummy domain and NOS string */
2618 ses->serverDomain = 2618 ses->serverDomain =
2619 kcalloc(1, 2, GFP_KERNEL); 2619 kzalloc(2, GFP_KERNEL);
2620 ses->serverNOS = 2620 ses->serverNOS =
2621 kcalloc(1, 2, GFP_KERNEL); 2621 kzalloc(2, GFP_KERNEL);
2622 } 2622 }
2623 } else { /* ASCII */ 2623 } else { /* ASCII */
2624 len = strnlen(bcc_ptr, 1024); 2624 len = strnlen(bcc_ptr, 1024);
@@ -2626,7 +2626,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2626 pByteArea(smb_buffer_response) 2626 pByteArea(smb_buffer_response)
2627 <= BCC(smb_buffer_response)) { 2627 <= BCC(smb_buffer_response)) {
2628 ses->serverOS = 2628 ses->serverOS =
2629 kcalloc(1, len + 1, 2629 kzalloc(len + 1,
2630 GFP_KERNEL); 2630 GFP_KERNEL);
2631 strncpy(ses->serverOS, 2631 strncpy(ses->serverOS,
2632 bcc_ptr, len); 2632 bcc_ptr, len);
@@ -2637,7 +2637,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2637 2637
2638 len = strnlen(bcc_ptr, 1024); 2638 len = strnlen(bcc_ptr, 1024);
2639 ses->serverNOS = 2639 ses->serverNOS =
2640 kcalloc(1, len + 1, 2640 kzalloc(len + 1,
2641 GFP_KERNEL); 2641 GFP_KERNEL);
2642 strncpy(ses->serverNOS, bcc_ptr, len); 2642 strncpy(ses->serverNOS, bcc_ptr, len);
2643 bcc_ptr += len; 2643 bcc_ptr += len;
@@ -2646,7 +2646,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2646 2646
2647 len = strnlen(bcc_ptr, 1024); 2647 len = strnlen(bcc_ptr, 1024);
2648 ses->serverDomain = 2648 ses->serverDomain =
2649 kcalloc(1, len + 1, 2649 kzalloc(len + 1,
2650 GFP_KERNEL); 2650 GFP_KERNEL);
2651 strncpy(ses->serverDomain, bcc_ptr, len); 2651 strncpy(ses->serverDomain, bcc_ptr, len);
2652 bcc_ptr += len; 2652 bcc_ptr += len;
@@ -2948,7 +2948,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2948 the end since (at least) WIN2K and Windows XP have a major bug in not null 2948 the end since (at least) WIN2K and Windows XP have a major bug in not null
2949 terminating last Unicode string in response */ 2949 terminating last Unicode string in response */
2950 ses->serverOS = 2950 ses->serverOS =
2951 kcalloc(1, 2 * (len + 1), GFP_KERNEL); 2951 kzalloc(2 * (len + 1), GFP_KERNEL);
2952 cifs_strfromUCS_le(ses->serverOS, 2952 cifs_strfromUCS_le(ses->serverOS,
2953 (wchar_t *) 2953 (wchar_t *)
2954 bcc_ptr, len, 2954 bcc_ptr, len,
@@ -2963,7 +2963,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2963 remaining_words 2963 remaining_words
2964 - 1); 2964 - 1);
2965 ses->serverNOS = 2965 ses->serverNOS =
2966 kcalloc(1, 2 * (len + 1), 2966 kzalloc(2 * (len + 1),
2967 GFP_KERNEL); 2967 GFP_KERNEL);
2968 cifs_strfromUCS_le(ses-> 2968 cifs_strfromUCS_le(ses->
2969 serverNOS, 2969 serverNOS,
@@ -2979,7 +2979,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2979 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2979 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2980 /* last string not always null terminated (e.g. for Windows XP & 2000) */ 2980 /* last string not always null terminated (e.g. for Windows XP & 2000) */
2981 ses->serverDomain = 2981 ses->serverDomain =
2982 kcalloc(1, 2 * 2982 kzalloc(2 *
2983 (len + 2983 (len +
2984 1), 2984 1),
2985 GFP_KERNEL); 2985 GFP_KERNEL);
@@ -3004,17 +3004,17 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3004 = 0; 3004 = 0;
3005 } /* else no more room so create dummy domain string */ 3005 } /* else no more room so create dummy domain string */
3006 else 3006 else
3007 ses->serverDomain = kcalloc(1, 2,GFP_KERNEL); 3007 ses->serverDomain = kzalloc(2,GFP_KERNEL);
3008 } else { /* no room so create dummy domain and NOS string */ 3008 } else { /* no room so create dummy domain and NOS string */
3009 ses->serverDomain = kcalloc(1, 2, GFP_KERNEL); 3009 ses->serverDomain = kzalloc(2, GFP_KERNEL);
3010 ses->serverNOS = kcalloc(1, 2, GFP_KERNEL); 3010 ses->serverNOS = kzalloc(2, GFP_KERNEL);
3011 } 3011 }
3012 } else { /* ASCII */ 3012 } else { /* ASCII */
3013 len = strnlen(bcc_ptr, 1024); 3013 len = strnlen(bcc_ptr, 1024);
3014 if (((long) bcc_ptr + len) - 3014 if (((long) bcc_ptr + len) -
3015 (long) pByteArea(smb_buffer_response) 3015 (long) pByteArea(smb_buffer_response)
3016 <= BCC(smb_buffer_response)) { 3016 <= BCC(smb_buffer_response)) {
3017 ses->serverOS = kcalloc(1, len + 1,GFP_KERNEL); 3017 ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
3018 strncpy(ses->serverOS,bcc_ptr, len); 3018 strncpy(ses->serverOS,bcc_ptr, len);
3019 3019
3020 bcc_ptr += len; 3020 bcc_ptr += len;
@@ -3022,14 +3022,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3022 bcc_ptr++; 3022 bcc_ptr++;
3023 3023
3024 len = strnlen(bcc_ptr, 1024); 3024 len = strnlen(bcc_ptr, 1024);
3025 ses->serverNOS = kcalloc(1, len+1,GFP_KERNEL); 3025 ses->serverNOS = kzalloc(len+1,GFP_KERNEL);
3026 strncpy(ses->serverNOS, bcc_ptr, len); 3026 strncpy(ses->serverNOS, bcc_ptr, len);
3027 bcc_ptr += len; 3027 bcc_ptr += len;
3028 bcc_ptr[0] = 0; 3028 bcc_ptr[0] = 0;
3029 bcc_ptr++; 3029 bcc_ptr++;
3030 3030
3031 len = strnlen(bcc_ptr, 1024); 3031 len = strnlen(bcc_ptr, 1024);
3032 ses->serverDomain = kcalloc(1, len+1,GFP_KERNEL); 3032 ses->serverDomain = kzalloc(len+1,GFP_KERNEL);
3033 strncpy(ses->serverDomain, bcc_ptr, len); 3033 strncpy(ses->serverDomain, bcc_ptr, len);
3034 bcc_ptr += len; 3034 bcc_ptr += len;
3035 bcc_ptr[0] = 0; 3035 bcc_ptr[0] = 0;
@@ -3141,7 +3141,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3141 if(tcon->nativeFileSystem) 3141 if(tcon->nativeFileSystem)
3142 kfree(tcon->nativeFileSystem); 3142 kfree(tcon->nativeFileSystem);
3143 tcon->nativeFileSystem = 3143 tcon->nativeFileSystem =
3144 kcalloc(1, length + 2, GFP_KERNEL); 3144 kzalloc(length + 2, GFP_KERNEL);
3145 cifs_strfromUCS_le(tcon->nativeFileSystem, 3145 cifs_strfromUCS_le(tcon->nativeFileSystem,
3146 (wchar_t *) bcc_ptr, 3146 (wchar_t *) bcc_ptr,
3147 length, nls_codepage); 3147 length, nls_codepage);
@@ -3159,7 +3159,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3159 if(tcon->nativeFileSystem) 3159 if(tcon->nativeFileSystem)
3160 kfree(tcon->nativeFileSystem); 3160 kfree(tcon->nativeFileSystem);
3161 tcon->nativeFileSystem = 3161 tcon->nativeFileSystem =
3162 kcalloc(1, length + 1, GFP_KERNEL); 3162 kzalloc(length + 1, GFP_KERNEL);
3163 strncpy(tcon->nativeFileSystem, bcc_ptr, 3163 strncpy(tcon->nativeFileSystem, bcc_ptr,
3164 length); 3164 length);
3165 } 3165 }
@@ -3215,10 +3215,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3215 } 3215 }
3216 3216
3217 cifs_sb->tcon = NULL; 3217 cifs_sb->tcon = NULL;
3218 if (ses) { 3218 if (ses)
3219 set_current_state(TASK_INTERRUPTIBLE); 3219 schedule_timeout_interruptible(msecs_to_jiffies(500));
3220 schedule_timeout(HZ / 2);
3221 }
3222 if (ses) 3220 if (ses)
3223 sesInfoFree(ses); 3221 sesInfoFree(ses);
3224 3222
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3f3538d4a1fa..d335269bd91c 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -145,24 +145,23 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
145 return -ENOMEM; 145 return -ENOMEM;
146 } 146 }
147 147
148 if(nd) { 148 if(nd && (nd->flags & LOOKUP_OPEN)) {
149 if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) 149 int oflags = nd->intent.open.flags;
150 desiredAccess = GENERIC_READ; 150
151 else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) { 151 desiredAccess = 0;
152 desiredAccess = GENERIC_WRITE; 152 if (oflags & FMODE_READ)
153 write_only = TRUE; 153 desiredAccess |= GENERIC_READ;
154 } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { 154 if (oflags & FMODE_WRITE) {
155 /* GENERIC_ALL is too much permission to request */ 155 desiredAccess |= GENERIC_WRITE;
156 /* can cause unnecessary access denied on create */ 156 if (!(oflags & FMODE_READ))
157 /* desiredAccess = GENERIC_ALL; */ 157 write_only = TRUE;
158 desiredAccess = GENERIC_READ | GENERIC_WRITE;
159 } 158 }
160 159
161 if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 160 if((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
162 disposition = FILE_CREATE; 161 disposition = FILE_CREATE;
163 else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 162 else if((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
164 disposition = FILE_OVERWRITE_IF; 163 disposition = FILE_OVERWRITE_IF;
165 else if((nd->intent.open.flags & O_CREAT) == O_CREAT) 164 else if((oflags & O_CREAT) == O_CREAT)
166 disposition = FILE_OPEN_IF; 165 disposition = FILE_OPEN_IF;
167 else { 166 else {
168 cFYI(1,("Create flag not set in create function")); 167 cFYI(1,("Create flag not set in create function"));
diff --git a/fs/compat.c b/fs/compat.c
index 6b06b6bae35e..ac3fb9ed8eea 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -310,96 +310,6 @@ static int __init init_sys32_ioctl(void)
310 310
311__initcall(init_sys32_ioctl); 311__initcall(init_sys32_ioctl);
312 312
313int register_ioctl32_conversion(unsigned int cmd,
314 ioctl_trans_handler_t handler)
315{
316 struct ioctl_trans *t;
317 struct ioctl_trans *new_t;
318 unsigned long hash = ioctl32_hash(cmd);
319
320 new_t = kmalloc(sizeof(*new_t), GFP_KERNEL);
321 if (!new_t)
322 return -ENOMEM;
323
324 down_write(&ioctl32_sem);
325 for (t = ioctl32_hash_table[hash]; t; t = t->next) {
326 if (t->cmd == cmd) {
327 printk(KERN_ERR "Trying to register duplicated ioctl32 "
328 "handler %x\n", cmd);
329 up_write(&ioctl32_sem);
330 kfree(new_t);
331 return -EINVAL;
332 }
333 }
334 new_t->next = NULL;
335 new_t->cmd = cmd;
336 new_t->handler = handler;
337 ioctl32_insert_translation(new_t);
338
339 up_write(&ioctl32_sem);
340 return 0;
341}
342EXPORT_SYMBOL(register_ioctl32_conversion);
343
344static inline int builtin_ioctl(struct ioctl_trans *t)
345{
346 return t >= ioctl_start && t < (ioctl_start + ioctl_table_size);
347}
348
349/* Problem:
350 This function cannot unregister duplicate ioctls, because they are not
351 unique.
352 When they happen we need to extend the prototype to pass the handler too. */
353
354int unregister_ioctl32_conversion(unsigned int cmd)
355{
356 unsigned long hash = ioctl32_hash(cmd);
357 struct ioctl_trans *t, *t1;
358
359 down_write(&ioctl32_sem);
360
361 t = ioctl32_hash_table[hash];
362 if (!t) {
363 up_write(&ioctl32_sem);
364 return -EINVAL;
365 }
366
367 if (t->cmd == cmd) {
368 if (builtin_ioctl(t)) {
369 printk("%p tried to unregister builtin ioctl %x\n",
370 __builtin_return_address(0), cmd);
371 } else {
372 ioctl32_hash_table[hash] = t->next;
373 up_write(&ioctl32_sem);
374 kfree(t);
375 return 0;
376 }
377 }
378 while (t->next) {
379 t1 = t->next;
380 if (t1->cmd == cmd) {
381 if (builtin_ioctl(t1)) {
382 printk("%p tried to unregister builtin "
383 "ioctl %x\n",
384 __builtin_return_address(0), cmd);
385 goto out;
386 } else {
387 t->next = t1->next;
388 up_write(&ioctl32_sem);
389 kfree(t1);
390 return 0;
391 }
392 }
393 t = t1;
394 }
395 printk(KERN_ERR "Trying to free unknown 32bit ioctl handler %x\n",
396 cmd);
397out:
398 up_write(&ioctl32_sem);
399 return -EINVAL;
400}
401EXPORT_SYMBOL(unregister_ioctl32_conversion);
402
403static void compat_ioctl_error(struct file *filp, unsigned int fd, 313static void compat_ioctl_error(struct file *filp, unsigned int fd,
404 unsigned int cmd, unsigned long arg) 314 unsigned int cmd, unsigned long arg)
405{ 315{
@@ -720,14 +630,14 @@ compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
720struct compat_ncp_mount_data { 630struct compat_ncp_mount_data {
721 compat_int_t version; 631 compat_int_t version;
722 compat_uint_t ncp_fd; 632 compat_uint_t ncp_fd;
723 compat_uid_t mounted_uid; 633 __compat_uid_t mounted_uid;
724 compat_pid_t wdog_pid; 634 compat_pid_t wdog_pid;
725 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; 635 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1];
726 compat_uint_t time_out; 636 compat_uint_t time_out;
727 compat_uint_t retry_count; 637 compat_uint_t retry_count;
728 compat_uint_t flags; 638 compat_uint_t flags;
729 compat_uid_t uid; 639 __compat_uid_t uid;
730 compat_gid_t gid; 640 __compat_gid_t gid;
731 compat_mode_t file_mode; 641 compat_mode_t file_mode;
732 compat_mode_t dir_mode; 642 compat_mode_t dir_mode;
733}; 643};
@@ -784,9 +694,9 @@ static void *do_ncp_super_data_conv(void *raw_data)
784 694
785struct compat_smb_mount_data { 695struct compat_smb_mount_data {
786 compat_int_t version; 696 compat_int_t version;
787 compat_uid_t mounted_uid; 697 __compat_uid_t mounted_uid;
788 compat_uid_t uid; 698 __compat_uid_t uid;
789 compat_gid_t gid; 699 __compat_gid_t gid;
790 compat_mode_t file_mode; 700 compat_mode_t file_mode;
791 compat_mode_t dir_mode; 701 compat_mode_t dir_mode;
792}; 702};
@@ -1365,6 +1275,16 @@ out:
1365} 1275}
1366 1276
1367/* 1277/*
1278 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
1279 * O_LARGEFILE flag.
1280 */
1281asmlinkage long
1282compat_sys_open(const char __user *filename, int flags, int mode)
1283{
1284 return do_sys_open(filename, flags, mode);
1285}
1286
1287/*
1368 * compat_count() counts the number of arguments/envelopes. It is basically 1288 * compat_count() counts the number of arguments/envelopes. It is basically
1369 * a copy of count() from fs/exec.c, except that it works with 32 bit argv 1289 * a copy of count() from fs/exec.c, except that it works with 32 bit argv
1370 * and envp pointers. 1290 * and envp pointers.
@@ -1699,6 +1619,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
1699 char *bits; 1619 char *bits;
1700 long timeout; 1620 long timeout;
1701 int size, max_fdset, ret = -EINVAL; 1621 int size, max_fdset, ret = -EINVAL;
1622 struct fdtable *fdt;
1702 1623
1703 timeout = MAX_SCHEDULE_TIMEOUT; 1624 timeout = MAX_SCHEDULE_TIMEOUT;
1704 if (tvp) { 1625 if (tvp) {
@@ -1724,7 +1645,10 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
1724 goto out_nofds; 1645 goto out_nofds;
1725 1646
1726 /* max_fdset can increase, so grab it once to avoid race */ 1647 /* max_fdset can increase, so grab it once to avoid race */
1727 max_fdset = current->files->max_fdset; 1648 rcu_read_lock();
1649 fdt = files_fdtable(current->files);
1650 max_fdset = fdt->max_fdset;
1651 rcu_read_unlock();
1728 if (n > max_fdset) 1652 if (n > max_fdset)
1729 n = max_fdset; 1653 n = max_fdset;
1730 1654
@@ -1808,8 +1732,8 @@ struct compat_nfsctl_export {
1808 compat_dev_t ex32_dev; 1732 compat_dev_t ex32_dev;
1809 compat_ino_t ex32_ino; 1733 compat_ino_t ex32_ino;
1810 compat_int_t ex32_flags; 1734 compat_int_t ex32_flags;
1811 compat_uid_t ex32_anon_uid; 1735 __compat_uid_t ex32_anon_uid;
1812 compat_gid_t ex32_anon_gid; 1736 __compat_gid_t ex32_anon_gid;
1813}; 1737};
1814 1738
1815struct compat_nfsctl_fdparm { 1739struct compat_nfsctl_fdparm {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 155e612635f1..e28a74203f3b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -798,13 +798,16 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
798 r = (void *) &r4; 798 r = (void *) &r4;
799 } 799 }
800 800
801 if (ret) 801 if (ret) {
802 return -EFAULT; 802 ret = -EFAULT;
803 goto out;
804 }
803 805
804 set_fs (KERNEL_DS); 806 set_fs (KERNEL_DS);
805 ret = sys_ioctl (fd, cmd, (unsigned long) r); 807 ret = sys_ioctl (fd, cmd, (unsigned long) r);
806 set_fs (old_fs); 808 set_fs (old_fs);
807 809
810out:
808 if (mysock) 811 if (mysock)
809 sockfd_put(mysock); 812 sockfd_put(mysock);
810 813
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 6c285efa2004..7fe85415ae7c 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -39,12 +39,47 @@ static DECLARE_MUTEX(read_mutex);
39#define CRAMINO(x) ((x)->offset?(x)->offset<<2:1) 39#define CRAMINO(x) ((x)->offset?(x)->offset<<2:1)
40#define OFFSET(x) ((x)->i_ino) 40#define OFFSET(x) ((x)->i_ino)
41 41
42static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inode * cramfs_inode) 42
43static int cramfs_iget5_test(struct inode *inode, void *opaque)
44{
45 struct cramfs_inode *cramfs_inode = opaque;
46
47 if (inode->i_ino != CRAMINO(cramfs_inode))
48 return 0; /* does not match */
49
50 if (inode->i_ino != 1)
51 return 1;
52
53 /* all empty directories, char, block, pipe, and sock, share inode #1 */
54
55 if ((inode->i_mode != cramfs_inode->mode) ||
56 (inode->i_gid != cramfs_inode->gid) ||
57 (inode->i_uid != cramfs_inode->uid))
58 return 0; /* does not match */
59
60 if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) &&
61 (inode->i_rdev != old_decode_dev(cramfs_inode->size)))
62 return 0; /* does not match */
63
64 return 1; /* matches */
65}
66
67static int cramfs_iget5_set(struct inode *inode, void *opaque)
68{
69 struct cramfs_inode *cramfs_inode = opaque;
70 inode->i_ino = CRAMINO(cramfs_inode);
71 return 0;
72}
73
74static struct inode *get_cramfs_inode(struct super_block *sb,
75 struct cramfs_inode * cramfs_inode)
43{ 76{
44 struct inode * inode = new_inode(sb); 77 struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
78 cramfs_iget5_test, cramfs_iget5_set,
79 cramfs_inode);
45 static struct timespec zerotime; 80 static struct timespec zerotime;
46 81
47 if (inode) { 82 if (inode && (inode->i_state & I_NEW)) {
48 inode->i_mode = cramfs_inode->mode; 83 inode->i_mode = cramfs_inode->mode;
49 inode->i_uid = cramfs_inode->uid; 84 inode->i_uid = cramfs_inode->uid;
50 inode->i_size = cramfs_inode->size; 85 inode->i_size = cramfs_inode->size;
@@ -58,7 +93,6 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inod
58 but it's the best we can do without reading the directory 93 but it's the best we can do without reading the directory
59 contents. 1 yields the right result in GNU find, even 94 contents. 1 yields the right result in GNU find, even
60 without -noleaf option. */ 95 without -noleaf option. */
61 insert_inode_hash(inode);
62 if (S_ISREG(inode->i_mode)) { 96 if (S_ISREG(inode->i_mode)) {
63 inode->i_fop = &generic_ro_fops; 97 inode->i_fop = &generic_ro_fops;
64 inode->i_data.a_ops = &cramfs_aops; 98 inode->i_data.a_ops = &cramfs_aops;
@@ -74,6 +108,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inod
74 init_special_inode(inode, inode->i_mode, 108 init_special_inode(inode, inode->i_mode,
75 old_decode_dev(cramfs_inode->size)); 109 old_decode_dev(cramfs_inode->size));
76 } 110 }
111 unlock_new_inode(inode);
77 } 112 }
78 return inode; 113 return inode;
79} 114}
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 5034365b06a8..8def89f2c438 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -19,6 +19,7 @@
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
21#include <linux/zlib.h> 21#include <linux/zlib.h>
22#include <linux/cramfs_fs.h>
22 23
23static z_stream stream; 24static z_stream stream;
24static int initialized; 25static int initialized;
diff --git a/fs/dcache.c b/fs/dcache.c
index a15a2e1f5520..7376b61269fb 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -337,12 +337,10 @@ struct dentry * d_find_alias(struct inode *inode)
337 */ 337 */
338void d_prune_aliases(struct inode *inode) 338void d_prune_aliases(struct inode *inode)
339{ 339{
340 struct list_head *tmp, *head = &inode->i_dentry; 340 struct dentry *dentry;
341restart: 341restart:
342 spin_lock(&dcache_lock); 342 spin_lock(&dcache_lock);
343 tmp = head; 343 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
344 while ((tmp = tmp->next) != head) {
345 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
346 spin_lock(&dentry->d_lock); 344 spin_lock(&dentry->d_lock);
347 if (!atomic_read(&dentry->d_count)) { 345 if (!atomic_read(&dentry->d_count)) {
348 __dget_locked(dentry); 346 __dget_locked(dentry);
@@ -463,10 +461,7 @@ void shrink_dcache_sb(struct super_block * sb)
463 * superblock to the most recent end of the unused list. 461 * superblock to the most recent end of the unused list.
464 */ 462 */
465 spin_lock(&dcache_lock); 463 spin_lock(&dcache_lock);
466 next = dentry_unused.next; 464 list_for_each_safe(tmp, next, &dentry_unused) {
467 while (next != &dentry_unused) {
468 tmp = next;
469 next = tmp->next;
470 dentry = list_entry(tmp, struct dentry, d_lru); 465 dentry = list_entry(tmp, struct dentry, d_lru);
471 if (dentry->d_sb != sb) 466 if (dentry->d_sb != sb)
472 continue; 467 continue;
@@ -478,10 +473,7 @@ void shrink_dcache_sb(struct super_block * sb)
478 * Pass two ... free the dentries for this superblock. 473 * Pass two ... free the dentries for this superblock.
479 */ 474 */
480repeat: 475repeat:
481 next = dentry_unused.next; 476 list_for_each_safe(tmp, next, &dentry_unused) {
482 while (next != &dentry_unused) {
483 tmp = next;
484 next = tmp->next;
485 dentry = list_entry(tmp, struct dentry, d_lru); 477 dentry = list_entry(tmp, struct dentry, d_lru);
486 if (dentry->d_sb != sb) 478 if (dentry->d_sb != sb)
487 continue; 479 continue;
diff --git a/fs/devpts/Makefile b/fs/devpts/Makefile
index 5800df2e50c8..236696efcbac 100644
--- a/fs/devpts/Makefile
+++ b/fs/devpts/Makefile
@@ -5,4 +5,3 @@
5obj-$(CONFIG_UNIX98_PTYS) += devpts.o 5obj-$(CONFIG_UNIX98_PTYS) += devpts.o
6 6
7devpts-$(CONFIG_UNIX98_PTYS) := inode.o 7devpts-$(CONFIG_UNIX98_PTYS) := inode.o
8devpts-$(CONFIG_DEVPTS_FS_SECURITY) += xattr_security.o
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1571c8d6c232..f2be44d4491f 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -18,28 +18,9 @@
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/tty.h> 19#include <linux/tty.h>
20#include <linux/devpts_fs.h> 20#include <linux/devpts_fs.h>
21#include <linux/xattr.h>
22 21
23#define DEVPTS_SUPER_MAGIC 0x1cd1 22#define DEVPTS_SUPER_MAGIC 0x1cd1
24 23
25extern struct xattr_handler devpts_xattr_security_handler;
26
27static struct xattr_handler *devpts_xattr_handlers[] = {
28#ifdef CONFIG_DEVPTS_FS_SECURITY
29 &devpts_xattr_security_handler,
30#endif
31 NULL
32};
33
34static struct inode_operations devpts_file_inode_operations = {
35#ifdef CONFIG_DEVPTS_FS_XATTR
36 .setxattr = generic_setxattr,
37 .getxattr = generic_getxattr,
38 .listxattr = generic_listxattr,
39 .removexattr = generic_removexattr,
40#endif
41};
42
43static struct vfsmount *devpts_mnt; 24static struct vfsmount *devpts_mnt;
44static struct dentry *devpts_root; 25static struct dentry *devpts_root;
45 26
@@ -102,7 +83,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
102 s->s_blocksize_bits = 10; 83 s->s_blocksize_bits = 10;
103 s->s_magic = DEVPTS_SUPER_MAGIC; 84 s->s_magic = DEVPTS_SUPER_MAGIC;
104 s->s_op = &devpts_sops; 85 s->s_op = &devpts_sops;
105 s->s_xattr = devpts_xattr_handlers;
106 s->s_time_gran = 1; 86 s->s_time_gran = 1;
107 87
108 inode = new_inode(s); 88 inode = new_inode(s);
@@ -175,7 +155,6 @@ int devpts_pty_new(struct tty_struct *tty)
175 inode->i_gid = config.setgid ? config.gid : current->fsgid; 155 inode->i_gid = config.setgid ? config.gid : current->fsgid;
176 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 156 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
177 init_special_inode(inode, S_IFCHR|config.mode, device); 157 init_special_inode(inode, S_IFCHR|config.mode, device);
178 inode->i_op = &devpts_file_inode_operations;
179 inode->u.generic_ip = tty; 158 inode->u.generic_ip = tty;
180 159
181 dentry = get_node(number); 160 dentry = get_node(number);
diff --git a/fs/devpts/xattr_security.c b/fs/devpts/xattr_security.c
deleted file mode 100644
index 864cb5c79baa..000000000000
--- a/fs/devpts/xattr_security.c
+++ /dev/null
@@ -1,47 +0,0 @@
1/*
2 * Security xattr support for devpts.
3 *
4 * Author: Stephen Smalley <sds@epoch.ncsc.mil>
5 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the Free
9 * Software Foundation; either version 2 of the License, or (at your option)
10 * any later version.
11 */
12#include <linux/string.h>
13#include <linux/fs.h>
14#include <linux/security.h>
15#include <linux/xattr.h>
16
17static size_t
18devpts_xattr_security_list(struct inode *inode, char *list, size_t list_len,
19 const char *name, size_t name_len)
20{
21 return security_inode_listsecurity(inode, list, list_len);
22}
23
24static int
25devpts_xattr_security_get(struct inode *inode, const char *name,
26 void *buffer, size_t size)
27{
28 if (strcmp(name, "") == 0)
29 return -EINVAL;
30 return security_inode_getsecurity(inode, name, buffer, size);
31}
32
33static int
34devpts_xattr_security_set(struct inode *inode, const char *name,
35 const void *value, size_t size, int flags)
36{
37 if (strcmp(name, "") == 0)
38 return -EINVAL;
39 return security_inode_setsecurity(inode, name, value, size, flags);
40}
41
42struct xattr_handler devpts_xattr_security_handler = {
43 .prefix = XATTR_SECURITY_PREFIX,
44 .list = devpts_xattr_security_list,
45 .get = devpts_xattr_security_get,
46 .set = devpts_xattr_security_set,
47};
diff --git a/fs/exec.c b/fs/exec.c
index 222ab1c572d8..14dd03907ccb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -798,6 +798,7 @@ no_thread_group:
798static inline void flush_old_files(struct files_struct * files) 798static inline void flush_old_files(struct files_struct * files)
799{ 799{
800 long j = -1; 800 long j = -1;
801 struct fdtable *fdt;
801 802
802 spin_lock(&files->file_lock); 803 spin_lock(&files->file_lock);
803 for (;;) { 804 for (;;) {
@@ -805,12 +806,13 @@ static inline void flush_old_files(struct files_struct * files)
805 806
806 j++; 807 j++;
807 i = j * __NFDBITS; 808 i = j * __NFDBITS;
808 if (i >= files->max_fds || i >= files->max_fdset) 809 fdt = files_fdtable(files);
810 if (i >= fdt->max_fds || i >= fdt->max_fdset)
809 break; 811 break;
810 set = files->close_on_exec->fds_bits[j]; 812 set = fdt->close_on_exec->fds_bits[j];
811 if (!set) 813 if (!set)
812 continue; 814 continue;
813 files->close_on_exec->fds_bits[j] = 0; 815 fdt->close_on_exec->fds_bits[j] = 0;
814 spin_unlock(&files->file_lock); 816 spin_unlock(&files->file_lock);
815 for ( ; set ; i++,set >>= 1) { 817 for ( ; set ; i++,set >>= 1) {
816 if (set & 1) { 818 if (set & 1) {
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 161f156d98c8..c8d07030c897 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -615,6 +615,11 @@ got:
615 DQUOT_DROP(inode); 615 DQUOT_DROP(inode);
616 goto fail2; 616 goto fail2;
617 } 617 }
618 err = ext2_init_security(inode,dir);
619 if (err) {
620 DQUOT_FREE_INODE(inode);
621 goto fail2;
622 }
618 mark_inode_dirty(inode); 623 mark_inode_dirty(inode);
619 ext2_debug("allocating inode %lu\n", inode->i_ino); 624 ext2_debug("allocating inode %lu\n", inode->i_ino);
620 ext2_preread_inode(inode); 625 ext2_preread_inode(inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 53dceb0c6593..fdba4d1d3c60 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -71,6 +71,8 @@ void ext2_put_inode(struct inode *inode)
71 */ 71 */
72void ext2_delete_inode (struct inode * inode) 72void ext2_delete_inode (struct inode * inode)
73{ 73{
74 truncate_inode_pages(&inode->i_data, 0);
75
74 if (is_bad_inode(inode)) 76 if (is_bad_inode(inode))
75 goto no_delete; 77 goto no_delete;
76 EXT2_I(inode)->i_dtime = get_seconds(); 78 EXT2_I(inode)->i_dtime = get_seconds();
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index dcfe331dc4c4..3c0c7c6a5b44 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -19,6 +19,7 @@
19#include <linux/config.h> 19#include <linux/config.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/string.h> 21#include <linux/string.h>
22#include <linux/fs.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23#include <linux/init.h> 24#include <linux/init.h>
24#include <linux/blkdev.h> 25#include <linux/blkdev.h>
@@ -27,6 +28,8 @@
27#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
28#include <linux/smp_lock.h> 29#include <linux/smp_lock.h>
29#include <linux/vfs.h> 30#include <linux/vfs.h>
31#include <linux/seq_file.h>
32#include <linux/mount.h>
30#include <asm/uaccess.h> 33#include <asm/uaccess.h>
31#include "ext2.h" 34#include "ext2.h"
32#include "xattr.h" 35#include "xattr.h"
@@ -201,6 +204,26 @@ static void ext2_clear_inode(struct inode *inode)
201#endif 204#endif
202} 205}
203 206
207static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
208{
209 struct ext2_sb_info *sbi = EXT2_SB(vfs->mnt_sb);
210
211 if (sbi->s_mount_opt & EXT2_MOUNT_GRPID)
212 seq_puts(seq, ",grpid");
213 else
214 seq_puts(seq, ",nogrpid");
215
216#if defined(CONFIG_QUOTA)
217 if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
218 seq_puts(seq, ",usrquota");
219
220 if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA)
221 seq_puts(seq, ",grpquota");
222#endif
223
224 return 0;
225}
226
204#ifdef CONFIG_QUOTA 227#ifdef CONFIG_QUOTA
205static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); 228static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
206static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); 229static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
@@ -218,6 +241,7 @@ static struct super_operations ext2_sops = {
218 .statfs = ext2_statfs, 241 .statfs = ext2_statfs,
219 .remount_fs = ext2_remount, 242 .remount_fs = ext2_remount,
220 .clear_inode = ext2_clear_inode, 243 .clear_inode = ext2_clear_inode,
244 .show_options = ext2_show_options,
221#ifdef CONFIG_QUOTA 245#ifdef CONFIG_QUOTA
222 .quota_read = ext2_quota_read, 246 .quota_read = ext2_quota_read,
223 .quota_write = ext2_quota_write, 247 .quota_write = ext2_quota_write,
@@ -256,10 +280,11 @@ static unsigned long get_sb_block(void **data)
256 280
257enum { 281enum {
258 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 282 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
259 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 283 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
260 Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, 284 Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug,
261 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, 285 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
262 Opt_ignore, Opt_err, 286 Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
287 Opt_usrquota, Opt_grpquota
263}; 288};
264 289
265static match_table_t tokens = { 290static match_table_t tokens = {
@@ -288,10 +313,10 @@ static match_table_t tokens = {
288 {Opt_acl, "acl"}, 313 {Opt_acl, "acl"},
289 {Opt_noacl, "noacl"}, 314 {Opt_noacl, "noacl"},
290 {Opt_xip, "xip"}, 315 {Opt_xip, "xip"},
291 {Opt_ignore, "grpquota"}, 316 {Opt_grpquota, "grpquota"},
292 {Opt_ignore, "noquota"}, 317 {Opt_ignore, "noquota"},
293 {Opt_ignore, "quota"}, 318 {Opt_quota, "quota"},
294 {Opt_ignore, "usrquota"}, 319 {Opt_usrquota, "usrquota"},
295 {Opt_err, NULL} 320 {Opt_err, NULL}
296}; 321};
297 322
@@ -406,6 +431,26 @@ static int parse_options (char * options,
406 printk("EXT2 xip option not supported\n"); 431 printk("EXT2 xip option not supported\n");
407#endif 432#endif
408 break; 433 break;
434
435#if defined(CONFIG_QUOTA)
436 case Opt_quota:
437 case Opt_usrquota:
438 set_opt(sbi->s_mount_opt, USRQUOTA);
439 break;
440
441 case Opt_grpquota:
442 set_opt(sbi->s_mount_opt, GRPQUOTA);
443 break;
444#else
445 case Opt_quota:
446 case Opt_usrquota:
447 case Opt_grpquota:
448 printk(KERN_ERR
449 "EXT2-fs: quota operations not supported.\n");
450
451 break;
452#endif
453
409 case Opt_ignore: 454 case Opt_ignore:
410 break; 455 break;
411 default: 456 default:
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 5f3bfde3b810..67cfeb66e897 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,3 +116,11 @@ exit_ext2_xattr(void)
116 116
117# endif /* CONFIG_EXT2_FS_XATTR */ 117# endif /* CONFIG_EXT2_FS_XATTR */
118 118
119#ifdef CONFIG_EXT2_FS_SECURITY
120extern int ext2_init_security(struct inode *inode, struct inode *dir);
121#else
122static inline int ext2_init_security(struct inode *inode, struct inode *dir)
123{
124 return 0;
125}
126#endif
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 6a6c59fbe599..a26612798471 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -8,6 +8,7 @@
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/smp_lock.h> 9#include <linux/smp_lock.h>
10#include <linux/ext2_fs.h> 10#include <linux/ext2_fs.h>
11#include <linux/security.h>
11#include "xattr.h" 12#include "xattr.h"
12 13
13static size_t 14static size_t
@@ -45,6 +46,27 @@ ext2_xattr_security_set(struct inode *inode, const char *name,
45 value, size, flags); 46 value, size, flags);
46} 47}
47 48
49int
50ext2_init_security(struct inode *inode, struct inode *dir)
51{
52 int err;
53 size_t len;
54 void *value;
55 char *name;
56
57 err = security_inode_init_security(inode, dir, &name, &value, &len);
58 if (err) {
59 if (err == -EOPNOTSUPP)
60 return 0;
61 return err;
62 }
63 err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY,
64 name, value, len, 0);
65 kfree(name);
66 kfree(value);
67 return err;
68}
69
48struct xattr_handler ext2_xattr_security_handler = { 70struct xattr_handler ext2_xattr_security_handler = {
49 .prefix = XATTR_SECURITY_PREFIX, 71 .prefix = XATTR_SECURITY_PREFIX,
50 .list = ext2_xattr_security_list, 72 .list = ext2_xattr_security_list,
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 6981bd014ede..96552769d039 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -607,6 +607,11 @@ got:
607 DQUOT_DROP(inode); 607 DQUOT_DROP(inode);
608 goto fail2; 608 goto fail2;
609 } 609 }
610 err = ext3_init_security(handle,inode, dir);
611 if (err) {
612 DQUOT_FREE_INODE(inode);
613 goto fail2;
614 }
610 err = ext3_mark_inode_dirty(handle, inode); 615 err = ext3_mark_inode_dirty(handle, inode);
611 if (err) { 616 if (err) {
612 ext3_std_error(sb, err); 617 ext3_std_error(sb, err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9989fdcf4d5a..b5177c90d6f1 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -187,6 +187,8 @@ void ext3_delete_inode (struct inode * inode)
187{ 187{
188 handle_t *handle; 188 handle_t *handle;
189 189
190 truncate_inode_pages(&inode->i_data, 0);
191
190 if (is_bad_inode(inode)) 192 if (is_bad_inode(inode))
191 goto no_delete; 193 goto no_delete;
192 194
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 3c3c6e399fb3..a93c3609025d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -35,6 +35,7 @@
35#include <linux/mount.h> 35#include <linux/mount.h>
36#include <linux/namei.h> 36#include <linux/namei.h>
37#include <linux/quotaops.h> 37#include <linux/quotaops.h>
38#include <linux/seq_file.h>
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39#include "xattr.h" 40#include "xattr.h"
40#include "acl.h" 41#include "acl.h"
@@ -509,8 +510,41 @@ static void ext3_clear_inode(struct inode *inode)
509 kfree(rsv); 510 kfree(rsv);
510} 511}
511 512
512#ifdef CONFIG_QUOTA 513static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
514{
515 struct ext3_sb_info *sbi = EXT3_SB(vfs->mnt_sb);
516
517 if (sbi->s_mount_opt & EXT3_MOUNT_JOURNAL_DATA)
518 seq_puts(seq, ",data=journal");
519
520 if (sbi->s_mount_opt & EXT3_MOUNT_ORDERED_DATA)
521 seq_puts(seq, ",data=ordered");
522
523 if (sbi->s_mount_opt & EXT3_MOUNT_WRITEBACK_DATA)
524 seq_puts(seq, ",data=writeback");
525
526#if defined(CONFIG_QUOTA)
527 if (sbi->s_jquota_fmt)
528 seq_printf(seq, ",jqfmt=%s",
529 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
530
531 if (sbi->s_qf_names[USRQUOTA])
532 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
533
534 if (sbi->s_qf_names[GRPQUOTA])
535 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
513 536
537 if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
538 seq_puts(seq, ",usrquota");
539
540 if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
541 seq_puts(seq, ",grpquota");
542#endif
543
544 return 0;
545}
546
547#ifdef CONFIG_QUOTA
514#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 548#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
515#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 549#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
516 550
@@ -569,6 +603,7 @@ static struct super_operations ext3_sops = {
569 .statfs = ext3_statfs, 603 .statfs = ext3_statfs,
570 .remount_fs = ext3_remount, 604 .remount_fs = ext3_remount,
571 .clear_inode = ext3_clear_inode, 605 .clear_inode = ext3_clear_inode,
606 .show_options = ext3_show_options,
572#ifdef CONFIG_QUOTA 607#ifdef CONFIG_QUOTA
573 .quota_read = ext3_quota_read, 608 .quota_read = ext3_quota_read,
574 .quota_write = ext3_quota_write, 609 .quota_write = ext3_quota_write,
@@ -590,7 +625,8 @@ enum {
590 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 625 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
591 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 626 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
592 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 627 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
593 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, 628 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
629 Opt_grpquota
594}; 630};
595 631
596static match_table_t tokens = { 632static match_table_t tokens = {
@@ -634,10 +670,10 @@ static match_table_t tokens = {
634 {Opt_grpjquota, "grpjquota=%s"}, 670 {Opt_grpjquota, "grpjquota=%s"},
635 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 671 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
636 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 672 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
637 {Opt_quota, "grpquota"}, 673 {Opt_grpquota, "grpquota"},
638 {Opt_noquota, "noquota"}, 674 {Opt_noquota, "noquota"},
639 {Opt_quota, "quota"}, 675 {Opt_quota, "quota"},
640 {Opt_quota, "usrquota"}, 676 {Opt_usrquota, "usrquota"},
641 {Opt_barrier, "barrier=%u"}, 677 {Opt_barrier, "barrier=%u"},
642 {Opt_err, NULL}, 678 {Opt_err, NULL},
643 {Opt_resize, "resize"}, 679 {Opt_resize, "resize"},
@@ -903,7 +939,13 @@ clear_qf_name:
903 sbi->s_jquota_fmt = QFMT_VFS_V0; 939 sbi->s_jquota_fmt = QFMT_VFS_V0;
904 break; 940 break;
905 case Opt_quota: 941 case Opt_quota:
942 case Opt_usrquota:
906 set_opt(sbi->s_mount_opt, QUOTA); 943 set_opt(sbi->s_mount_opt, QUOTA);
944 set_opt(sbi->s_mount_opt, USRQUOTA);
945 break;
946 case Opt_grpquota:
947 set_opt(sbi->s_mount_opt, QUOTA);
948 set_opt(sbi->s_mount_opt, GRPQUOTA);
907 break; 949 break;
908 case Opt_noquota: 950 case Opt_noquota:
909 if (sb_any_quota_enabled(sb)) { 951 if (sb_any_quota_enabled(sb)) {
@@ -912,8 +954,13 @@ clear_qf_name:
912 return 0; 954 return 0;
913 } 955 }
914 clear_opt(sbi->s_mount_opt, QUOTA); 956 clear_opt(sbi->s_mount_opt, QUOTA);
957 clear_opt(sbi->s_mount_opt, USRQUOTA);
958 clear_opt(sbi->s_mount_opt, GRPQUOTA);
915 break; 959 break;
916#else 960#else
961 case Opt_quota:
962 case Opt_usrquota:
963 case Opt_grpquota:
917 case Opt_usrjquota: 964 case Opt_usrjquota:
918 case Opt_grpjquota: 965 case Opt_grpjquota:
919 case Opt_offusrjquota: 966 case Opt_offusrjquota:
@@ -924,7 +971,6 @@ clear_qf_name:
924 "EXT3-fs: journalled quota options not " 971 "EXT3-fs: journalled quota options not "
925 "supported.\n"); 972 "supported.\n");
926 break; 973 break;
927 case Opt_quota:
928 case Opt_noquota: 974 case Opt_noquota:
929 break; 975 break;
930#endif 976#endif
@@ -962,14 +1008,38 @@ clear_qf_name:
962 } 1008 }
963 } 1009 }
964#ifdef CONFIG_QUOTA 1010#ifdef CONFIG_QUOTA
965 if (!sbi->s_jquota_fmt && (sbi->s_qf_names[USRQUOTA] || 1011 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
966 sbi->s_qf_names[GRPQUOTA])) { 1012 if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
967 printk(KERN_ERR 1013 sbi->s_qf_names[USRQUOTA])
968 "EXT3-fs: journalled quota format not specified.\n"); 1014 clear_opt(sbi->s_mount_opt, USRQUOTA);
969 return 0; 1015
1016 if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
1017 sbi->s_qf_names[GRPQUOTA])
1018 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1019
1020 if ((sbi->s_qf_names[USRQUOTA] &&
1021 (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
1022 (sbi->s_qf_names[GRPQUOTA] &&
1023 (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
1024 printk(KERN_ERR "EXT3-fs: old and new quota "
1025 "format mixing.\n");
1026 return 0;
1027 }
1028
1029 if (!sbi->s_jquota_fmt) {
1030 printk(KERN_ERR "EXT3-fs: journalled quota format "
1031 "not specified.\n");
1032 return 0;
1033 }
1034 } else {
1035 if (sbi->s_jquota_fmt) {
1036 printk(KERN_ERR "EXT3-fs: journalled quota format "
1037 "specified with no journalling "
1038 "enabled.\n");
1039 return 0;
1040 }
970 } 1041 }
971#endif 1042#endif
972
973 return 1; 1043 return 1;
974} 1044}
975 1045
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index eb31a69e82dc..2ceae38f3d49 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -133,3 +133,14 @@ exit_ext3_xattr(void)
133#define ext3_xattr_handlers NULL 133#define ext3_xattr_handlers NULL
134 134
135# endif /* CONFIG_EXT3_FS_XATTR */ 135# endif /* CONFIG_EXT3_FS_XATTR */
136
137#ifdef CONFIG_EXT3_FS_SECURITY
138extern int ext3_init_security(handle_t *handle, struct inode *inode,
139 struct inode *dir);
140#else
141static inline int ext3_init_security(handle_t *handle, struct inode *inode,
142 struct inode *dir)
143{
144 return 0;
145}
146#endif
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index ddc1c41750e1..b9c40c15647b 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -9,6 +9,7 @@
9#include <linux/smp_lock.h> 9#include <linux/smp_lock.h>
10#include <linux/ext3_jbd.h> 10#include <linux/ext3_jbd.h>
11#include <linux/ext3_fs.h> 11#include <linux/ext3_fs.h>
12#include <linux/security.h>
12#include "xattr.h" 13#include "xattr.h"
13 14
14static size_t 15static size_t
@@ -47,6 +48,27 @@ ext3_xattr_security_set(struct inode *inode, const char *name,
47 value, size, flags); 48 value, size, flags);
48} 49}
49 50
51int
52ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
53{
54 int err;
55 size_t len;
56 void *value;
57 char *name;
58
59 err = security_inode_init_security(inode, dir, &name, &value, &len);
60 if (err) {
61 if (err == -EOPNOTSUPP)
62 return 0;
63 return err;
64 }
65 err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
66 name, value, len, 0);
67 kfree(name);
68 kfree(value);
69 return err;
70}
71
50struct xattr_handler ext3_xattr_security_handler = { 72struct xattr_handler ext3_xattr_security_handler = {
51 .prefix = XATTR_SECURITY_PREFIX, 73 .prefix = XATTR_SECURITY_PREFIX,
52 .list = ext3_xattr_security_list, 74 .list = ext3_xattr_security_list,
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index e5ae1b720dde..895049b2ac9c 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -30,6 +30,29 @@ static inline loff_t fat_make_i_pos(struct super_block *sb,
30 | (de - (struct msdos_dir_entry *)bh->b_data); 30 | (de - (struct msdos_dir_entry *)bh->b_data);
31} 31}
32 32
33static inline void fat_dir_readahead(struct inode *dir, sector_t iblock,
34 sector_t phys)
35{
36 struct super_block *sb = dir->i_sb;
37 struct msdos_sb_info *sbi = MSDOS_SB(sb);
38 struct buffer_head *bh;
39 int sec;
40
41 /* This is not a first sector of cluster, or sec_per_clus == 1 */
42 if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1)
43 return;
44 /* root dir of FAT12/FAT16 */
45 if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
46 return;
47
48 bh = sb_getblk(sb, phys);
49 if (bh && !buffer_uptodate(bh)) {
50 for (sec = 0; sec < sbi->sec_per_clus; sec++)
51 sb_breadahead(sb, phys + sec);
52 }
53 brelse(bh);
54}
55
33/* Returns the inode number of the directory entry at offset pos. If bh is 56/* Returns the inode number of the directory entry at offset pos. If bh is
34 non-NULL, it is brelse'd before. Pos is incremented. The buffer header is 57 non-NULL, it is brelse'd before. Pos is incremented. The buffer header is
35 returned in bh. 58 returned in bh.
@@ -58,6 +81,8 @@ next:
58 if (err || !phys) 81 if (err || !phys)
59 return -1; /* beyond EOF or error */ 82 return -1; /* beyond EOF or error */
60 83
84 fat_dir_readahead(dir, iblock, phys);
85
61 *bh = sb_bread(sb, phys); 86 *bh = sb_bread(sb, phys);
62 if (*bh == NULL) { 87 if (*bh == NULL) {
63 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", 88 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n",
@@ -635,8 +660,7 @@ RecEnd:
635EODir: 660EODir:
636 filp->f_pos = cpos; 661 filp->f_pos = cpos;
637FillFailed: 662FillFailed:
638 if (bh) 663 brelse(bh);
639 brelse(bh);
640 if (unicode) 664 if (unicode)
641 free_page((unsigned long)unicode); 665 free_page((unsigned long)unicode);
642out: 666out:
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 96ae85b67eba..a7cbe68e2259 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -335,6 +335,8 @@ EXPORT_SYMBOL(fat_build_inode);
335 335
336static void fat_delete_inode(struct inode *inode) 336static void fat_delete_inode(struct inode *inode)
337{ 337{
338 truncate_inode_pages(&inode->i_data, 0);
339
338 if (!is_bad_inode(inode)) { 340 if (!is_bad_inode(inode)) {
339 inode->i_size = 0; 341 inode->i_size = 0;
340 fat_truncate(inode); 342 fat_truncate(inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6fbc9d8fcc36..863b46e0d78a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -16,6 +16,7 @@
16#include <linux/security.h> 16#include <linux/security.h>
17#include <linux/ptrace.h> 17#include <linux/ptrace.h>
18#include <linux/signal.h> 18#include <linux/signal.h>
19#include <linux/rcupdate.h>
19 20
20#include <asm/poll.h> 21#include <asm/poll.h>
21#include <asm/siginfo.h> 22#include <asm/siginfo.h>
@@ -24,21 +25,25 @@
24void fastcall set_close_on_exec(unsigned int fd, int flag) 25void fastcall set_close_on_exec(unsigned int fd, int flag)
25{ 26{
26 struct files_struct *files = current->files; 27 struct files_struct *files = current->files;
28 struct fdtable *fdt;
27 spin_lock(&files->file_lock); 29 spin_lock(&files->file_lock);
30 fdt = files_fdtable(files);
28 if (flag) 31 if (flag)
29 FD_SET(fd, files->close_on_exec); 32 FD_SET(fd, fdt->close_on_exec);
30 else 33 else
31 FD_CLR(fd, files->close_on_exec); 34 FD_CLR(fd, fdt->close_on_exec);
32 spin_unlock(&files->file_lock); 35 spin_unlock(&files->file_lock);
33} 36}
34 37
35static inline int get_close_on_exec(unsigned int fd) 38static inline int get_close_on_exec(unsigned int fd)
36{ 39{
37 struct files_struct *files = current->files; 40 struct files_struct *files = current->files;
41 struct fdtable *fdt;
38 int res; 42 int res;
39 spin_lock(&files->file_lock); 43 rcu_read_lock();
40 res = FD_ISSET(fd, files->close_on_exec); 44 fdt = files_fdtable(files);
41 spin_unlock(&files->file_lock); 45 res = FD_ISSET(fd, fdt->close_on_exec);
46 rcu_read_unlock();
42 return res; 47 return res;
43} 48}
44 49
@@ -54,24 +59,26 @@ static int locate_fd(struct files_struct *files,
54 unsigned int newfd; 59 unsigned int newfd;
55 unsigned int start; 60 unsigned int start;
56 int error; 61 int error;
62 struct fdtable *fdt;
57 63
58 error = -EINVAL; 64 error = -EINVAL;
59 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 65 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
60 goto out; 66 goto out;
61 67
62repeat: 68repeat:
69 fdt = files_fdtable(files);
63 /* 70 /*
64 * Someone might have closed fd's in the range 71 * Someone might have closed fd's in the range
65 * orig_start..files->next_fd 72 * orig_start..fdt->next_fd
66 */ 73 */
67 start = orig_start; 74 start = orig_start;
68 if (start < files->next_fd) 75 if (start < fdt->next_fd)
69 start = files->next_fd; 76 start = fdt->next_fd;
70 77
71 newfd = start; 78 newfd = start;
72 if (start < files->max_fdset) { 79 if (start < fdt->max_fdset) {
73 newfd = find_next_zero_bit(files->open_fds->fds_bits, 80 newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
74 files->max_fdset, start); 81 fdt->max_fdset, start);
75 } 82 }
76 83
77 error = -EMFILE; 84 error = -EMFILE;
@@ -89,9 +96,15 @@ repeat:
89 if (error) 96 if (error)
90 goto repeat; 97 goto repeat;
91 98
92 if (start <= files->next_fd) 99 /*
93 files->next_fd = newfd + 1; 100 * We reacquired files_lock, so we are safe as long as
94 101 * we reacquire the fdtable pointer and use it while holding
102 * the lock, no one can free it during that time.
103 */
104 fdt = files_fdtable(files);
105 if (start <= fdt->next_fd)
106 fdt->next_fd = newfd + 1;
107
95 error = newfd; 108 error = newfd;
96 109
97out: 110out:
@@ -101,13 +114,16 @@ out:
101static int dupfd(struct file *file, unsigned int start) 114static int dupfd(struct file *file, unsigned int start)
102{ 115{
103 struct files_struct * files = current->files; 116 struct files_struct * files = current->files;
117 struct fdtable *fdt;
104 int fd; 118 int fd;
105 119
106 spin_lock(&files->file_lock); 120 spin_lock(&files->file_lock);
107 fd = locate_fd(files, file, start); 121 fd = locate_fd(files, file, start);
108 if (fd >= 0) { 122 if (fd >= 0) {
109 FD_SET(fd, files->open_fds); 123 /* locate_fd() may have expanded fdtable, load the ptr */
110 FD_CLR(fd, files->close_on_exec); 124 fdt = files_fdtable(files);
125 FD_SET(fd, fdt->open_fds);
126 FD_CLR(fd, fdt->close_on_exec);
111 spin_unlock(&files->file_lock); 127 spin_unlock(&files->file_lock);
112 fd_install(fd, file); 128 fd_install(fd, file);
113 } else { 129 } else {
@@ -123,6 +139,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
123 int err = -EBADF; 139 int err = -EBADF;
124 struct file * file, *tofree; 140 struct file * file, *tofree;
125 struct files_struct * files = current->files; 141 struct files_struct * files = current->files;
142 struct fdtable *fdt;
126 143
127 spin_lock(&files->file_lock); 144 spin_lock(&files->file_lock);
128 if (!(file = fcheck(oldfd))) 145 if (!(file = fcheck(oldfd)))
@@ -148,13 +165,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
148 165
149 /* Yes. It's a race. In user space. Nothing sane to do */ 166 /* Yes. It's a race. In user space. Nothing sane to do */
150 err = -EBUSY; 167 err = -EBUSY;
151 tofree = files->fd[newfd]; 168 fdt = files_fdtable(files);
152 if (!tofree && FD_ISSET(newfd, files->open_fds)) 169 tofree = fdt->fd[newfd];
170 if (!tofree && FD_ISSET(newfd, fdt->open_fds))
153 goto out_fput; 171 goto out_fput;
154 172
155 files->fd[newfd] = file; 173 rcu_assign_pointer(fdt->fd[newfd], file);
156 FD_SET(newfd, files->open_fds); 174 FD_SET(newfd, fdt->open_fds);
157 FD_CLR(newfd, files->close_on_exec); 175 FD_CLR(newfd, fdt->close_on_exec);
158 spin_unlock(&files->file_lock); 176 spin_unlock(&files->file_lock);
159 177
160 if (tofree) 178 if (tofree)
diff --git a/fs/file.c b/fs/file.c
index 92b5f25985d2..2127a7b9dc3a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -13,6 +13,25 @@
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/file.h> 14#include <linux/file.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/interrupt.h>
17#include <linux/spinlock.h>
18#include <linux/rcupdate.h>
19#include <linux/workqueue.h>
20
21struct fdtable_defer {
22 spinlock_t lock;
23 struct work_struct wq;
24 struct timer_list timer;
25 struct fdtable *next;
26};
27
28/*
29 * We use this list to defer free fdtables that have vmalloced
30 * sets/arrays. By keeping a per-cpu list, we avoid having to embed
31 * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
32 * this per-task structure.
33 */
34static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
16 35
17 36
18/* 37/*
@@ -48,82 +67,143 @@ void free_fd_array(struct file **array, int num)
48 vfree(array); 67 vfree(array);
49} 68}
50 69
51/* 70static void __free_fdtable(struct fdtable *fdt)
52 * Expand the fd array in the files_struct. Called with the files 71{
53 * spinlock held for write. 72 int fdset_size, fdarray_size;
54 */
55 73
56static int expand_fd_array(struct files_struct *files, int nr) 74 fdset_size = fdt->max_fdset / 8;
57 __releases(files->file_lock) 75 fdarray_size = fdt->max_fds * sizeof(struct file *);
58 __acquires(files->file_lock) 76 free_fdset(fdt->open_fds, fdset_size);
77 free_fdset(fdt->close_on_exec, fdset_size);
78 free_fd_array(fdt->fd, fdarray_size);
79 kfree(fdt);
80}
81
82static void fdtable_timer(unsigned long data)
59{ 83{
60 struct file **new_fds; 84 struct fdtable_defer *fddef = (struct fdtable_defer *)data;
61 int error, nfds;
62 85
63 86 spin_lock(&fddef->lock);
64 error = -EMFILE; 87 /*
65 if (files->max_fds >= NR_OPEN || nr >= NR_OPEN) 88 * If someone already emptied the queue return.
89 */
90 if (!fddef->next)
66 goto out; 91 goto out;
92 if (!schedule_work(&fddef->wq))
93 mod_timer(&fddef->timer, 5);
94out:
95 spin_unlock(&fddef->lock);
96}
67 97
68 nfds = files->max_fds; 98static void free_fdtable_work(struct fdtable_defer *f)
69 spin_unlock(&files->file_lock); 99{
100 struct fdtable *fdt;
70 101
71 /* 102 spin_lock_bh(&f->lock);
72 * Expand to the max in easy steps, and keep expanding it until 103 fdt = f->next;
73 * we have enough for the requested fd array size. 104 f->next = NULL;
74 */ 105 spin_unlock_bh(&f->lock);
106 while(fdt) {
107 struct fdtable *next = fdt->next;
108 __free_fdtable(fdt);
109 fdt = next;
110 }
111}
75 112
76 do { 113static void free_fdtable_rcu(struct rcu_head *rcu)
77#if NR_OPEN_DEFAULT < 256 114{
78 if (nfds < 256) 115 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
79 nfds = 256; 116 int fdset_size, fdarray_size;
80 else 117 struct fdtable_defer *fddef;
81#endif
82 if (nfds < (PAGE_SIZE / sizeof(struct file *)))
83 nfds = PAGE_SIZE / sizeof(struct file *);
84 else {
85 nfds = nfds * 2;
86 if (nfds > NR_OPEN)
87 nfds = NR_OPEN;
88 }
89 } while (nfds <= nr);
90 118
91 error = -ENOMEM; 119 BUG_ON(!fdt);
92 new_fds = alloc_fd_array(nfds); 120 fdset_size = fdt->max_fdset / 8;
93 spin_lock(&files->file_lock); 121 fdarray_size = fdt->max_fds * sizeof(struct file *);
94 if (!new_fds)
95 goto out;
96 122
97 /* Copy the existing array and install the new pointer */ 123 if (fdt->free_files) {
98 124 /*
99 if (nfds > files->max_fds) { 125 * The this fdtable was embedded in the files structure
100 struct file **old_fds; 126 * and the files structure itself was getting destroyed.
101 int i; 127 * It is now safe to free the files structure.
102 128 */
103 old_fds = xchg(&files->fd, new_fds); 129 kmem_cache_free(files_cachep, fdt->free_files);
104 i = xchg(&files->max_fds, nfds); 130 return;
105 131 }
106 /* Don't copy/clear the array if we are creating a new 132 if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
107 fd array for fork() */ 133 /*
108 if (i) { 134 * The fdtable was embedded
109 memcpy(new_fds, old_fds, i * sizeof(struct file *)); 135 */
110 /* clear the remainder of the array */ 136 return;
111 memset(&new_fds[i], 0, 137 }
112 (nfds-i) * sizeof(struct file *)); 138 if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
113 139 kfree(fdt->open_fds);
114 spin_unlock(&files->file_lock); 140 kfree(fdt->close_on_exec);
115 free_fd_array(old_fds, i); 141 kfree(fdt->fd);
116 spin_lock(&files->file_lock); 142 kfree(fdt);
117 }
118 } else { 143 } else {
119 /* Somebody expanded the array while we slept ... */ 144 fddef = &get_cpu_var(fdtable_defer_list);
120 spin_unlock(&files->file_lock); 145 spin_lock(&fddef->lock);
121 free_fd_array(new_fds, nfds); 146 fdt->next = fddef->next;
122 spin_lock(&files->file_lock); 147 fddef->next = fdt;
148 /*
149 * vmallocs are handled from the workqueue context.
150 * If the per-cpu workqueue is running, then we
151 * defer work scheduling through a timer.
152 */
153 if (!schedule_work(&fddef->wq))
154 mod_timer(&fddef->timer, 5);
155 spin_unlock(&fddef->lock);
156 put_cpu_var(fdtable_defer_list);
123 } 157 }
124 error = 0; 158}
125out: 159
126 return error; 160void free_fdtable(struct fdtable *fdt)
161{
162 if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
163 fdt->max_fds > NR_OPEN_DEFAULT)
164 call_rcu(&fdt->rcu, free_fdtable_rcu);
165}
166
167/*
168 * Expand the fdset in the files_struct. Called with the files spinlock
169 * held for write.
170 */
171static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
172{
173 int i;
174 int count;
175
176 BUG_ON(nfdt->max_fdset < fdt->max_fdset);
177 BUG_ON(nfdt->max_fds < fdt->max_fds);
178 /* Copy the existing tables and install the new pointers */
179
180 i = fdt->max_fdset / (sizeof(unsigned long) * 8);
181 count = (nfdt->max_fdset - fdt->max_fdset) / 8;
182
183 /*
184 * Don't copy the entire array if the current fdset is
185 * not yet initialised.
186 */
187 if (i) {
188 memcpy (nfdt->open_fds, fdt->open_fds,
189 fdt->max_fdset/8);
190 memcpy (nfdt->close_on_exec, fdt->close_on_exec,
191 fdt->max_fdset/8);
192 memset (&nfdt->open_fds->fds_bits[i], 0, count);
193 memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
194 }
195
196 /* Don't copy/clear the array if we are creating a new
197 fd array for fork() */
198 if (fdt->max_fds) {
199 memcpy(nfdt->fd, fdt->fd,
200 fdt->max_fds * sizeof(struct file *));
201 /* clear the remainder of the array */
202 memset(&nfdt->fd[fdt->max_fds], 0,
203 (nfdt->max_fds - fdt->max_fds) *
204 sizeof(struct file *));
205 }
206 nfdt->next_fd = fdt->next_fd;
127} 207}
128 208
129/* 209/*
@@ -154,26 +234,21 @@ void free_fdset(fd_set *array, int num)
154 vfree(array); 234 vfree(array);
155} 235}
156 236
157/* 237static struct fdtable *alloc_fdtable(int nr)
158 * Expand the fdset in the files_struct. Called with the files spinlock
159 * held for write.
160 */
161static int expand_fdset(struct files_struct *files, int nr)
162 __releases(file->file_lock)
163 __acquires(file->file_lock)
164{ 238{
165 fd_set *new_openset = NULL, *new_execset = NULL; 239 struct fdtable *fdt = NULL;
166 int error, nfds = 0; 240 int nfds = 0;
167 241 fd_set *new_openset = NULL, *new_execset = NULL;
168 error = -EMFILE; 242 struct file **new_fds;
169 if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)
170 goto out;
171 243
172 nfds = files->max_fdset; 244 fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
173 spin_unlock(&files->file_lock); 245 if (!fdt)
246 goto out;
247 memset(fdt, 0, sizeof(*fdt));
174 248
175 /* Expand to the max in easy steps */ 249 nfds = __FD_SETSIZE;
176 do { 250 /* Expand to the max in easy steps */
251 do {
177 if (nfds < (PAGE_SIZE * 8)) 252 if (nfds < (PAGE_SIZE * 8))
178 nfds = PAGE_SIZE * 8; 253 nfds = PAGE_SIZE * 8;
179 else { 254 else {
@@ -183,49 +258,88 @@ static int expand_fdset(struct files_struct *files, int nr)
183 } 258 }
184 } while (nfds <= nr); 259 } while (nfds <= nr);
185 260
186 error = -ENOMEM; 261 new_openset = alloc_fdset(nfds);
187 new_openset = alloc_fdset(nfds); 262 new_execset = alloc_fdset(nfds);
188 new_execset = alloc_fdset(nfds); 263 if (!new_openset || !new_execset)
189 spin_lock(&files->file_lock); 264 goto out;
190 if (!new_openset || !new_execset) 265 fdt->open_fds = new_openset;
266 fdt->close_on_exec = new_execset;
267 fdt->max_fdset = nfds;
268
269 nfds = NR_OPEN_DEFAULT;
270 /*
271 * Expand to the max in easy steps, and keep expanding it until
272 * we have enough for the requested fd array size.
273 */
274 do {
275#if NR_OPEN_DEFAULT < 256
276 if (nfds < 256)
277 nfds = 256;
278 else
279#endif
280 if (nfds < (PAGE_SIZE / sizeof(struct file *)))
281 nfds = PAGE_SIZE / sizeof(struct file *);
282 else {
283 nfds = nfds * 2;
284 if (nfds > NR_OPEN)
285 nfds = NR_OPEN;
286 }
287 } while (nfds <= nr);
288 new_fds = alloc_fd_array(nfds);
289 if (!new_fds)
290 goto out;
291 fdt->fd = new_fds;
292 fdt->max_fds = nfds;
293 fdt->free_files = NULL;
294 return fdt;
295out:
296 if (new_openset)
297 free_fdset(new_openset, nfds);
298 if (new_execset)
299 free_fdset(new_execset, nfds);
300 kfree(fdt);
301 return NULL;
302}
303
304/*
305 * Expands the file descriptor table - it will allocate a new fdtable and
306 * both fd array and fdset. It is expected to be called with the
307 * files_lock held.
308 */
309static int expand_fdtable(struct files_struct *files, int nr)
310 __releases(files->file_lock)
311 __acquires(files->file_lock)
312{
313 int error = 0;
314 struct fdtable *fdt;
315 struct fdtable *nfdt = NULL;
316
317 spin_unlock(&files->file_lock);
318 nfdt = alloc_fdtable(nr);
319 if (!nfdt) {
320 error = -ENOMEM;
321 spin_lock(&files->file_lock);
191 goto out; 322 goto out;
323 }
192 324
193 error = 0; 325 spin_lock(&files->file_lock);
194 326 fdt = files_fdtable(files);
195 /* Copy the existing tables and install the new pointers */ 327 /*
196 if (nfds > files->max_fdset) { 328 * Check again since another task may have expanded the
197 int i = files->max_fdset / (sizeof(unsigned long) * 8); 329 * fd table while we dropped the lock
198 int count = (nfds - files->max_fdset) / 8; 330 */
199 331 if (nr >= fdt->max_fds || nr >= fdt->max_fdset) {
200 /* 332 copy_fdtable(nfdt, fdt);
201 * Don't copy the entire array if the current fdset is 333 } else {
202 * not yet initialised. 334 /* Somebody expanded while we dropped file_lock */
203 */
204 if (i) {
205 memcpy (new_openset, files->open_fds, files->max_fdset/8);
206 memcpy (new_execset, files->close_on_exec, files->max_fdset/8);
207 memset (&new_openset->fds_bits[i], 0, count);
208 memset (&new_execset->fds_bits[i], 0, count);
209 }
210
211 nfds = xchg(&files->max_fdset, nfds);
212 new_openset = xchg(&files->open_fds, new_openset);
213 new_execset = xchg(&files->close_on_exec, new_execset);
214 spin_unlock(&files->file_lock); 335 spin_unlock(&files->file_lock);
215 free_fdset (new_openset, nfds); 336 __free_fdtable(nfdt);
216 free_fdset (new_execset, nfds);
217 spin_lock(&files->file_lock); 337 spin_lock(&files->file_lock);
218 return 0; 338 goto out;
219 } 339 }
220 /* Somebody expanded the array while we slept ... */ 340 rcu_assign_pointer(files->fdt, nfdt);
221 341 free_fdtable(fdt);
222out: 342out:
223 spin_unlock(&files->file_lock);
224 if (new_openset)
225 free_fdset(new_openset, nfds);
226 if (new_execset)
227 free_fdset(new_execset, nfds);
228 spin_lock(&files->file_lock);
229 return error; 343 return error;
230} 344}
231 345
@@ -237,18 +351,39 @@ out:
237int expand_files(struct files_struct *files, int nr) 351int expand_files(struct files_struct *files, int nr)
238{ 352{
239 int err, expand = 0; 353 int err, expand = 0;
354 struct fdtable *fdt;
240 355
241 if (nr >= files->max_fdset) { 356 fdt = files_fdtable(files);
242 expand = 1; 357 if (nr >= fdt->max_fdset || nr >= fdt->max_fds) {
243 if ((err = expand_fdset(files, nr))) 358 if (fdt->max_fdset >= NR_OPEN ||
359 fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) {
360 err = -EMFILE;
244 goto out; 361 goto out;
245 } 362 }
246 if (nr >= files->max_fds) {
247 expand = 1; 363 expand = 1;
248 if ((err = expand_fd_array(files, nr))) 364 if ((err = expand_fdtable(files, nr)))
249 goto out; 365 goto out;
250 } 366 }
251 err = expand; 367 err = expand;
252out: 368out:
253 return err; 369 return err;
254} 370}
371
372static void __devinit fdtable_defer_list_init(int cpu)
373{
374 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
375 spin_lock_init(&fddef->lock);
376 INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
377 init_timer(&fddef->timer);
378 fddef->timer.data = (unsigned long)fddef;
379 fddef->timer.function = fdtable_timer;
380 fddef->next = NULL;
381}
382
383void __init files_defer_init(void)
384{
385 int i;
386 /* Really early - can't use for_each_cpu */
387 for (i = 0; i < NR_CPUS; i++)
388 fdtable_defer_list_init(i);
389}
diff --git a/fs/file_table.c b/fs/file_table.c
index 1d3de78e6bc9..86ec8ae985b4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -14,6 +14,7 @@
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/eventpoll.h> 16#include <linux/eventpoll.h>
17#include <linux/rcupdate.h>
17#include <linux/mount.h> 18#include <linux/mount.h>
18#include <linux/cdev.h> 19#include <linux/cdev.h>
19#include <linux/fsnotify.h> 20#include <linux/fsnotify.h>
@@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags)
53 spin_unlock_irqrestore(&filp_count_lock, flags); 54 spin_unlock_irqrestore(&filp_count_lock, flags);
54} 55}
55 56
56static inline void file_free(struct file *f) 57static inline void file_free_rcu(struct rcu_head *head)
57{ 58{
59 struct file *f = container_of(head, struct file, f_rcuhead);
58 kmem_cache_free(filp_cachep, f); 60 kmem_cache_free(filp_cachep, f);
59} 61}
60 62
63static inline void file_free(struct file *f)
64{
65 call_rcu(&f->f_rcuhead, file_free_rcu);
66}
67
61/* Find an unused file structure and return a pointer to it. 68/* Find an unused file structure and return a pointer to it.
62 * Returns NULL, if there are no more free file structures or 69 * Returns NULL, if there are no more free file structures or
63 * we run out of memory. 70 * we run out of memory.
@@ -89,7 +96,6 @@ struct file *get_empty_filp(void)
89 rwlock_init(&f->f_owner.lock); 96 rwlock_init(&f->f_owner.lock);
90 /* f->f_version: 0 */ 97 /* f->f_version: 0 */
91 INIT_LIST_HEAD(&f->f_list); 98 INIT_LIST_HEAD(&f->f_list);
92 f->f_maxcount = INT_MAX;
93 return f; 99 return f;
94 100
95over: 101over:
@@ -111,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp);
111 117
112void fastcall fput(struct file *file) 118void fastcall fput(struct file *file)
113{ 119{
114 if (atomic_dec_and_test(&file->f_count)) 120 if (rcuref_dec_and_test(&file->f_count))
115 __fput(file); 121 __fput(file);
116} 122}
117 123
@@ -157,11 +163,17 @@ struct file fastcall *fget(unsigned int fd)
157 struct file *file; 163 struct file *file;
158 struct files_struct *files = current->files; 164 struct files_struct *files = current->files;
159 165
160 spin_lock(&files->file_lock); 166 rcu_read_lock();
161 file = fcheck_files(files, fd); 167 file = fcheck_files(files, fd);
162 if (file) 168 if (file) {
163 get_file(file); 169 if (!rcuref_inc_lf(&file->f_count)) {
164 spin_unlock(&files->file_lock); 170 /* File object ref couldn't be taken */
171 rcu_read_unlock();
172 return NULL;
173 }
174 }
175 rcu_read_unlock();
176
165 return file; 177 return file;
166} 178}
167 179
@@ -183,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
183 if (likely((atomic_read(&files->count) == 1))) { 195 if (likely((atomic_read(&files->count) == 1))) {
184 file = fcheck_files(files, fd); 196 file = fcheck_files(files, fd);
185 } else { 197 } else {
186 spin_lock(&files->file_lock); 198 rcu_read_lock();
187 file = fcheck_files(files, fd); 199 file = fcheck_files(files, fd);
188 if (file) { 200 if (file) {
189 get_file(file); 201 if (rcuref_inc_lf(&file->f_count))
190 *fput_needed = 1; 202 *fput_needed = 1;
203 else
204 /* Didn't get the reference, someone's freed */
205 file = NULL;
191 } 206 }
192 spin_unlock(&files->file_lock); 207 rcu_read_unlock();
193 } 208 }
209
194 return file; 210 return file;
195} 211}
196 212
197 213
198void put_filp(struct file *file) 214void put_filp(struct file *file)
199{ 215{
200 if (atomic_dec_and_test(&file->f_count)) { 216 if (rcuref_dec_and_test(&file->f_count)) {
201 security_file_free(file); 217 security_file_free(file);
202 file_kill(file); 218 file_kill(file);
203 file_free(file); 219 file_free(file);
@@ -258,4 +274,5 @@ void __init files_init(unsigned long mempages)
258 files_stat.max_files = n; 274 files_stat.max_files = n;
259 if (files_stat.max_files < NR_FILE) 275 if (files_stat.max_files < NR_FILE)
260 files_stat.max_files = NR_FILE; 276 files_stat.max_files = NR_FILE;
277 files_defer_init();
261} 278}
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 27f66d3e8a04..6aa6fbe4f8ee 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -155,7 +155,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
155 155
156 sbp->s_flags |= MS_RDONLY; 156 sbp->s_flags |= MS_RDONLY;
157 157
158 infp = kcalloc(1, sizeof(*infp), GFP_KERNEL); 158 infp = kzalloc(sizeof(*infp), GFP_KERNEL);
159 if (!infp) { 159 if (!infp) {
160 printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n"); 160 printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n");
161 return -ENOMEM; 161 return -ENOMEM;
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
new file mode 100644
index 000000000000..c3e1f760cac9
--- /dev/null
+++ b/fs/fuse/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the FUSE filesystem.
3#
4
5obj-$(CONFIG_FUSE_FS) += fuse.o
6
7fuse-objs := dev.o dir.o file.o inode.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
new file mode 100644
index 000000000000..d4c869c6d01b
--- /dev/null
+++ b/fs/fuse/dev.c
@@ -0,0 +1,877 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/poll.h>
14#include <linux/uio.h>
15#include <linux/miscdevice.h>
16#include <linux/pagemap.h>
17#include <linux/file.h>
18#include <linux/slab.h>
19
20MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21
22static kmem_cache_t *fuse_req_cachep;
23
24static inline struct fuse_conn *fuse_get_conn(struct file *file)
25{
26 struct fuse_conn *fc;
27 spin_lock(&fuse_lock);
28 fc = file->private_data;
29 if (fc && !fc->mounted)
30 fc = NULL;
31 spin_unlock(&fuse_lock);
32 return fc;
33}
34
35static inline void fuse_request_init(struct fuse_req *req)
36{
37 memset(req, 0, sizeof(*req));
38 INIT_LIST_HEAD(&req->list);
39 init_waitqueue_head(&req->waitq);
40 atomic_set(&req->count, 1);
41}
42
43struct fuse_req *fuse_request_alloc(void)
44{
45 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
46 if (req)
47 fuse_request_init(req);
48 return req;
49}
50
51void fuse_request_free(struct fuse_req *req)
52{
53 kmem_cache_free(fuse_req_cachep, req);
54}
55
56static inline void block_sigs(sigset_t *oldset)
57{
58 sigset_t mask;
59
60 siginitsetinv(&mask, sigmask(SIGKILL));
61 sigprocmask(SIG_BLOCK, &mask, oldset);
62}
63
64static inline void restore_sigs(sigset_t *oldset)
65{
66 sigprocmask(SIG_SETMASK, oldset, NULL);
67}
68
69void fuse_reset_request(struct fuse_req *req)
70{
71 int preallocated = req->preallocated;
72 BUG_ON(atomic_read(&req->count) != 1);
73 fuse_request_init(req);
74 req->preallocated = preallocated;
75}
76
77static void __fuse_get_request(struct fuse_req *req)
78{
79 atomic_inc(&req->count);
80}
81
82/* Must be called with > 1 refcount */
83static void __fuse_put_request(struct fuse_req *req)
84{
85 BUG_ON(atomic_read(&req->count) < 2);
86 atomic_dec(&req->count);
87}
88
89static struct fuse_req *do_get_request(struct fuse_conn *fc)
90{
91 struct fuse_req *req;
92
93 spin_lock(&fuse_lock);
94 BUG_ON(list_empty(&fc->unused_list));
95 req = list_entry(fc->unused_list.next, struct fuse_req, list);
96 list_del_init(&req->list);
97 spin_unlock(&fuse_lock);
98 fuse_request_init(req);
99 req->preallocated = 1;
100 req->in.h.uid = current->fsuid;
101 req->in.h.gid = current->fsgid;
102 req->in.h.pid = current->pid;
103 return req;
104}
105
106/* This can return NULL, but only in case it's interrupted by a SIGKILL */
107struct fuse_req *fuse_get_request(struct fuse_conn *fc)
108{
109 int intr;
110 sigset_t oldset;
111
112 block_sigs(&oldset);
113 intr = down_interruptible(&fc->outstanding_sem);
114 restore_sigs(&oldset);
115 return intr ? NULL : do_get_request(fc);
116}
117
118static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
119{
120 spin_lock(&fuse_lock);
121 if (req->preallocated)
122 list_add(&req->list, &fc->unused_list);
123 else
124 fuse_request_free(req);
125
126 /* If we are in debt decrease that first */
127 if (fc->outstanding_debt)
128 fc->outstanding_debt--;
129 else
130 up(&fc->outstanding_sem);
131 spin_unlock(&fuse_lock);
132}
133
134void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
135{
136 if (atomic_dec_and_test(&req->count))
137 fuse_putback_request(fc, req);
138}
139
140void fuse_release_background(struct fuse_req *req)
141{
142 iput(req->inode);
143 iput(req->inode2);
144 if (req->file)
145 fput(req->file);
146 spin_lock(&fuse_lock);
147 list_del(&req->bg_entry);
148 spin_unlock(&fuse_lock);
149}
150
151/*
152 * This function is called when a request is finished. Either a reply
153 * has arrived or it was interrupted (and not yet sent) or some error
154 * occured during communication with userspace, or the device file was
155 * closed. It decreases the referece count for the request. In case
156 * of a background request the referece to the stored objects are
157 * released. The requester thread is woken up (if still waiting), and
158 * finally the request is either freed or put on the unused_list
159 *
160 * Called with fuse_lock, unlocks it
161 */
162static void request_end(struct fuse_conn *fc, struct fuse_req *req)
163{
164 int putback;
165 req->finished = 1;
166 putback = atomic_dec_and_test(&req->count);
167 spin_unlock(&fuse_lock);
168 if (req->background) {
169 down_read(&fc->sbput_sem);
170 if (fc->mounted)
171 fuse_release_background(req);
172 up_read(&fc->sbput_sem);
173 }
174 wake_up(&req->waitq);
175 if (req->in.h.opcode == FUSE_INIT) {
176 int i;
177
178 if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
179 fc->conn_error = 1;
180
181 /* After INIT reply is received other requests can go
182 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
183 up()s on outstanding_sem. The last up() is done in
184 fuse_putback_request() */
185 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
186 up(&fc->outstanding_sem);
187 }
188 if (putback)
189 fuse_putback_request(fc, req);
190}
191
192/*
193 * Unfortunately request interruption not just solves the deadlock
194 * problem, it causes problems too. These stem from the fact, that an
195 * interrupted request is continued to be processed in userspace,
196 * while all the locks and object references (inode and file) held
197 * during the operation are released.
198 *
199 * To release the locks is exactly why there's a need to interrupt the
200 * request, so there's not a lot that can be done about this, except
201 * introduce additional locking in userspace.
202 *
203 * More important is to keep inode and file references until userspace
204 * has replied, otherwise FORGET and RELEASE could be sent while the
205 * inode/file is still used by the filesystem.
206 *
207 * For this reason the concept of "background" request is introduced.
208 * An interrupted request is backgrounded if it has been already sent
209 * to userspace. Backgrounding involves getting an extra reference to
210 * inode(s) or file used in the request, and adding the request to
211 * fc->background list. When a reply is received for a background
212 * request, the object references are released, and the request is
213 * removed from the list. If the filesystem is unmounted while there
214 * are still background requests, the list is walked and references
215 * are released as if a reply was received.
216 *
217 * There's one more use for a background request. The RELEASE message is
218 * always sent as background, since it doesn't return an error or
219 * data.
220 */
221static void background_request(struct fuse_conn *fc, struct fuse_req *req)
222{
223 req->background = 1;
224 list_add(&req->bg_entry, &fc->background);
225 if (req->inode)
226 req->inode = igrab(req->inode);
227 if (req->inode2)
228 req->inode2 = igrab(req->inode2);
229 if (req->file)
230 get_file(req->file);
231}
232
233/* Called with fuse_lock held. Releases, and then reacquires it. */
234static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
235{
236 sigset_t oldset;
237
238 spin_unlock(&fuse_lock);
239 block_sigs(&oldset);
240 wait_event_interruptible(req->waitq, req->finished);
241 restore_sigs(&oldset);
242 spin_lock(&fuse_lock);
243 if (req->finished)
244 return;
245
246 req->out.h.error = -EINTR;
247 req->interrupted = 1;
248 if (req->locked) {
249 /* This is uninterruptible sleep, because data is
250 being copied to/from the buffers of req. During
251 locked state, there mustn't be any filesystem
252 operation (e.g. page fault), since that could lead
253 to deadlock */
254 spin_unlock(&fuse_lock);
255 wait_event(req->waitq, !req->locked);
256 spin_lock(&fuse_lock);
257 }
258 if (!req->sent && !list_empty(&req->list)) {
259 list_del(&req->list);
260 __fuse_put_request(req);
261 } else if (!req->finished && req->sent)
262 background_request(fc, req);
263}
264
265static unsigned len_args(unsigned numargs, struct fuse_arg *args)
266{
267 unsigned nbytes = 0;
268 unsigned i;
269
270 for (i = 0; i < numargs; i++)
271 nbytes += args[i].size;
272
273 return nbytes;
274}
275
276static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
277{
278 fc->reqctr++;
279 /* zero is special */
280 if (fc->reqctr == 0)
281 fc->reqctr = 1;
282 req->in.h.unique = fc->reqctr;
283 req->in.h.len = sizeof(struct fuse_in_header) +
284 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
285 if (!req->preallocated) {
286 /* If request is not preallocated (either FORGET or
287 RELEASE), then still decrease outstanding_sem, so
288 user can't open infinite number of files while not
289 processing the RELEASE requests. However for
290 efficiency do it without blocking, so if down()
291 would block, just increase the debt instead */
292 if (down_trylock(&fc->outstanding_sem))
293 fc->outstanding_debt++;
294 }
295 list_add_tail(&req->list, &fc->pending);
296 wake_up(&fc->waitq);
297}
298
299/*
300 * This can only be interrupted by a SIGKILL
301 */
302void request_send(struct fuse_conn *fc, struct fuse_req *req)
303{
304 req->isreply = 1;
305 spin_lock(&fuse_lock);
306 if (!fc->connected)
307 req->out.h.error = -ENOTCONN;
308 else if (fc->conn_error)
309 req->out.h.error = -ECONNREFUSED;
310 else {
311 queue_request(fc, req);
312 /* acquire extra reference, since request is still needed
313 after request_end() */
314 __fuse_get_request(req);
315
316 request_wait_answer(fc, req);
317 }
318 spin_unlock(&fuse_lock);
319}
320
321static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
322{
323 spin_lock(&fuse_lock);
324 if (fc->connected) {
325 queue_request(fc, req);
326 spin_unlock(&fuse_lock);
327 } else {
328 req->out.h.error = -ENOTCONN;
329 request_end(fc, req);
330 }
331}
332
333void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
334{
335 req->isreply = 0;
336 request_send_nowait(fc, req);
337}
338
339void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
340{
341 req->isreply = 1;
342 spin_lock(&fuse_lock);
343 background_request(fc, req);
344 spin_unlock(&fuse_lock);
345 request_send_nowait(fc, req);
346}
347
348void fuse_send_init(struct fuse_conn *fc)
349{
350 /* This is called from fuse_read_super() so there's guaranteed
351 to be a request available */
352 struct fuse_req *req = do_get_request(fc);
353 struct fuse_init_in_out *arg = &req->misc.init_in_out;
354 arg->major = FUSE_KERNEL_VERSION;
355 arg->minor = FUSE_KERNEL_MINOR_VERSION;
356 req->in.h.opcode = FUSE_INIT;
357 req->in.numargs = 1;
358 req->in.args[0].size = sizeof(*arg);
359 req->in.args[0].value = arg;
360 req->out.numargs = 1;
361 req->out.args[0].size = sizeof(*arg);
362 req->out.args[0].value = arg;
363 request_send_background(fc, req);
364}
365
366/*
367 * Lock the request. Up to the next unlock_request() there mustn't be
368 * anything that could cause a page-fault. If the request was already
369 * interrupted bail out.
370 */
371static inline int lock_request(struct fuse_req *req)
372{
373 int err = 0;
374 if (req) {
375 spin_lock(&fuse_lock);
376 if (req->interrupted)
377 err = -ENOENT;
378 else
379 req->locked = 1;
380 spin_unlock(&fuse_lock);
381 }
382 return err;
383}
384
385/*
386 * Unlock request. If it was interrupted during being locked, the
387 * requester thread is currently waiting for it to be unlocked, so
388 * wake it up.
389 */
390static inline void unlock_request(struct fuse_req *req)
391{
392 if (req) {
393 spin_lock(&fuse_lock);
394 req->locked = 0;
395 if (req->interrupted)
396 wake_up(&req->waitq);
397 spin_unlock(&fuse_lock);
398 }
399}
400
401struct fuse_copy_state {
402 int write;
403 struct fuse_req *req;
404 const struct iovec *iov;
405 unsigned long nr_segs;
406 unsigned long seglen;
407 unsigned long addr;
408 struct page *pg;
409 void *mapaddr;
410 void *buf;
411 unsigned len;
412};
413
414static void fuse_copy_init(struct fuse_copy_state *cs, int write,
415 struct fuse_req *req, const struct iovec *iov,
416 unsigned long nr_segs)
417{
418 memset(cs, 0, sizeof(*cs));
419 cs->write = write;
420 cs->req = req;
421 cs->iov = iov;
422 cs->nr_segs = nr_segs;
423}
424
425/* Unmap and put previous page of userspace buffer */
426static inline void fuse_copy_finish(struct fuse_copy_state *cs)
427{
428 if (cs->mapaddr) {
429 kunmap_atomic(cs->mapaddr, KM_USER0);
430 if (cs->write) {
431 flush_dcache_page(cs->pg);
432 set_page_dirty_lock(cs->pg);
433 }
434 put_page(cs->pg);
435 cs->mapaddr = NULL;
436 }
437}
438
439/*
440 * Get another pagefull of userspace buffer, and map it to kernel
441 * address space, and lock request
442 */
443static int fuse_copy_fill(struct fuse_copy_state *cs)
444{
445 unsigned long offset;
446 int err;
447
448 unlock_request(cs->req);
449 fuse_copy_finish(cs);
450 if (!cs->seglen) {
451 BUG_ON(!cs->nr_segs);
452 cs->seglen = cs->iov[0].iov_len;
453 cs->addr = (unsigned long) cs->iov[0].iov_base;
454 cs->iov ++;
455 cs->nr_segs --;
456 }
457 down_read(&current->mm->mmap_sem);
458 err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
459 &cs->pg, NULL);
460 up_read(&current->mm->mmap_sem);
461 if (err < 0)
462 return err;
463 BUG_ON(err != 1);
464 offset = cs->addr % PAGE_SIZE;
465 cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
466 cs->buf = cs->mapaddr + offset;
467 cs->len = min(PAGE_SIZE - offset, cs->seglen);
468 cs->seglen -= cs->len;
469 cs->addr += cs->len;
470
471 return lock_request(cs->req);
472}
473
474/* Do as much copy to/from userspace buffer as we can */
475static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
476 unsigned *size)
477{
478 unsigned ncpy = min(*size, cs->len);
479 if (val) {
480 if (cs->write)
481 memcpy(cs->buf, *val, ncpy);
482 else
483 memcpy(*val, cs->buf, ncpy);
484 *val += ncpy;
485 }
486 *size -= ncpy;
487 cs->len -= ncpy;
488 cs->buf += ncpy;
489 return ncpy;
490}
491
492/*
493 * Copy a page in the request to/from the userspace buffer. Must be
494 * done atomically
495 */
496static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
497 unsigned offset, unsigned count, int zeroing)
498{
499 if (page && zeroing && count < PAGE_SIZE) {
500 void *mapaddr = kmap_atomic(page, KM_USER1);
501 memset(mapaddr, 0, PAGE_SIZE);
502 kunmap_atomic(mapaddr, KM_USER1);
503 }
504 while (count) {
505 int err;
506 if (!cs->len && (err = fuse_copy_fill(cs)))
507 return err;
508 if (page) {
509 void *mapaddr = kmap_atomic(page, KM_USER1);
510 void *buf = mapaddr + offset;
511 offset += fuse_copy_do(cs, &buf, &count);
512 kunmap_atomic(mapaddr, KM_USER1);
513 } else
514 offset += fuse_copy_do(cs, NULL, &count);
515 }
516 if (page && !cs->write)
517 flush_dcache_page(page);
518 return 0;
519}
520
521/* Copy pages in the request to/from userspace buffer */
522static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
523 int zeroing)
524{
525 unsigned i;
526 struct fuse_req *req = cs->req;
527 unsigned offset = req->page_offset;
528 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
529
530 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
531 struct page *page = req->pages[i];
532 int err = fuse_copy_page(cs, page, offset, count, zeroing);
533 if (err)
534 return err;
535
536 nbytes -= count;
537 count = min(nbytes, (unsigned) PAGE_SIZE);
538 offset = 0;
539 }
540 return 0;
541}
542
543/* Copy a single argument in the request to/from userspace buffer */
544static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
545{
546 while (size) {
547 int err;
548 if (!cs->len && (err = fuse_copy_fill(cs)))
549 return err;
550 fuse_copy_do(cs, &val, &size);
551 }
552 return 0;
553}
554
555/* Copy request arguments to/from userspace buffer */
556static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
557 unsigned argpages, struct fuse_arg *args,
558 int zeroing)
559{
560 int err = 0;
561 unsigned i;
562
563 for (i = 0; !err && i < numargs; i++) {
564 struct fuse_arg *arg = &args[i];
565 if (i == numargs - 1 && argpages)
566 err = fuse_copy_pages(cs, arg->size, zeroing);
567 else
568 err = fuse_copy_one(cs, arg->value, arg->size);
569 }
570 return err;
571}
572
573/* Wait until a request is available on the pending list */
574static void request_wait(struct fuse_conn *fc)
575{
576 DECLARE_WAITQUEUE(wait, current);
577
578 add_wait_queue_exclusive(&fc->waitq, &wait);
579 while (fc->mounted && list_empty(&fc->pending)) {
580 set_current_state(TASK_INTERRUPTIBLE);
581 if (signal_pending(current))
582 break;
583
584 spin_unlock(&fuse_lock);
585 schedule();
586 spin_lock(&fuse_lock);
587 }
588 set_current_state(TASK_RUNNING);
589 remove_wait_queue(&fc->waitq, &wait);
590}
591
592/*
593 * Read a single request into the userspace filesystem's buffer. This
594 * function waits until a request is available, then removes it from
595 * the pending list and copies request data to userspace buffer. If
596 * no reply is needed (FORGET) or request has been interrupted or
597 * there was an error during the copying then it's finished by calling
598 * request_end(). Otherwise add it to the processing list, and set
599 * the 'sent' flag.
600 */
601static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
602 unsigned long nr_segs, loff_t *off)
603{
604 int err;
605 struct fuse_conn *fc;
606 struct fuse_req *req;
607 struct fuse_in *in;
608 struct fuse_copy_state cs;
609 unsigned reqsize;
610
611 spin_lock(&fuse_lock);
612 fc = file->private_data;
613 err = -EPERM;
614 if (!fc)
615 goto err_unlock;
616 request_wait(fc);
617 err = -ENODEV;
618 if (!fc->mounted)
619 goto err_unlock;
620 err = -ERESTARTSYS;
621 if (list_empty(&fc->pending))
622 goto err_unlock;
623
624 req = list_entry(fc->pending.next, struct fuse_req, list);
625 list_del_init(&req->list);
626 spin_unlock(&fuse_lock);
627
628 in = &req->in;
629 reqsize = req->in.h.len;
630 fuse_copy_init(&cs, 1, req, iov, nr_segs);
631 err = -EINVAL;
632 if (iov_length(iov, nr_segs) >= reqsize) {
633 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
634 if (!err)
635 err = fuse_copy_args(&cs, in->numargs, in->argpages,
636 (struct fuse_arg *) in->args, 0);
637 }
638 fuse_copy_finish(&cs);
639
640 spin_lock(&fuse_lock);
641 req->locked = 0;
642 if (!err && req->interrupted)
643 err = -ENOENT;
644 if (err) {
645 if (!req->interrupted)
646 req->out.h.error = -EIO;
647 request_end(fc, req);
648 return err;
649 }
650 if (!req->isreply)
651 request_end(fc, req);
652 else {
653 req->sent = 1;
654 list_add_tail(&req->list, &fc->processing);
655 spin_unlock(&fuse_lock);
656 }
657 return reqsize;
658
659 err_unlock:
660 spin_unlock(&fuse_lock);
661 return err;
662}
663
664static ssize_t fuse_dev_read(struct file *file, char __user *buf,
665 size_t nbytes, loff_t *off)
666{
667 struct iovec iov;
668 iov.iov_len = nbytes;
669 iov.iov_base = buf;
670 return fuse_dev_readv(file, &iov, 1, off);
671}
672
673/* Look up request on processing list by unique ID */
674static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
675{
676 struct list_head *entry;
677
678 list_for_each(entry, &fc->processing) {
679 struct fuse_req *req;
680 req = list_entry(entry, struct fuse_req, list);
681 if (req->in.h.unique == unique)
682 return req;
683 }
684 return NULL;
685}
686
687static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
688 unsigned nbytes)
689{
690 unsigned reqsize = sizeof(struct fuse_out_header);
691
692 if (out->h.error)
693 return nbytes != reqsize ? -EINVAL : 0;
694
695 reqsize += len_args(out->numargs, out->args);
696
697 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
698 return -EINVAL;
699 else if (reqsize > nbytes) {
700 struct fuse_arg *lastarg = &out->args[out->numargs-1];
701 unsigned diffsize = reqsize - nbytes;
702 if (diffsize > lastarg->size)
703 return -EINVAL;
704 lastarg->size -= diffsize;
705 }
706 return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
707 out->page_zeroing);
708}
709
710/*
711 * Write a single reply to a request. First the header is copied from
712 * the write buffer. The request is then searched on the processing
713 * list by the unique ID found in the header. If found, then remove
714 * it from the list and copy the rest of the buffer to the request.
715 * The request is finished by calling request_end()
716 */
717static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
718 unsigned long nr_segs, loff_t *off)
719{
720 int err;
721 unsigned nbytes = iov_length(iov, nr_segs);
722 struct fuse_req *req;
723 struct fuse_out_header oh;
724 struct fuse_copy_state cs;
725 struct fuse_conn *fc = fuse_get_conn(file);
726 if (!fc)
727 return -ENODEV;
728
729 fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
730 if (nbytes < sizeof(struct fuse_out_header))
731 return -EINVAL;
732
733 err = fuse_copy_one(&cs, &oh, sizeof(oh));
734 if (err)
735 goto err_finish;
736 err = -EINVAL;
737 if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
738 oh.len != nbytes)
739 goto err_finish;
740
741 spin_lock(&fuse_lock);
742 req = request_find(fc, oh.unique);
743 err = -EINVAL;
744 if (!req)
745 goto err_unlock;
746
747 list_del_init(&req->list);
748 if (req->interrupted) {
749 request_end(fc, req);
750 fuse_copy_finish(&cs);
751 return -ENOENT;
752 }
753 req->out.h = oh;
754 req->locked = 1;
755 cs.req = req;
756 spin_unlock(&fuse_lock);
757
758 err = copy_out_args(&cs, &req->out, nbytes);
759 fuse_copy_finish(&cs);
760
761 spin_lock(&fuse_lock);
762 req->locked = 0;
763 if (!err) {
764 if (req->interrupted)
765 err = -ENOENT;
766 } else if (!req->interrupted)
767 req->out.h.error = -EIO;
768 request_end(fc, req);
769
770 return err ? err : nbytes;
771
772 err_unlock:
773 spin_unlock(&fuse_lock);
774 err_finish:
775 fuse_copy_finish(&cs);
776 return err;
777}
778
779static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
780 size_t nbytes, loff_t *off)
781{
782 struct iovec iov;
783 iov.iov_len = nbytes;
784 iov.iov_base = (char __user *) buf;
785 return fuse_dev_writev(file, &iov, 1, off);
786}
787
788static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
789{
790 struct fuse_conn *fc = fuse_get_conn(file);
791 unsigned mask = POLLOUT | POLLWRNORM;
792
793 if (!fc)
794 return -ENODEV;
795
796 poll_wait(file, &fc->waitq, wait);
797
798 spin_lock(&fuse_lock);
799 if (!list_empty(&fc->pending))
800 mask |= POLLIN | POLLRDNORM;
801 spin_unlock(&fuse_lock);
802
803 return mask;
804}
805
806/* Abort all requests on the given list (pending or processing) */
807static void end_requests(struct fuse_conn *fc, struct list_head *head)
808{
809 while (!list_empty(head)) {
810 struct fuse_req *req;
811 req = list_entry(head->next, struct fuse_req, list);
812 list_del_init(&req->list);
813 req->out.h.error = -ECONNABORTED;
814 request_end(fc, req);
815 spin_lock(&fuse_lock);
816 }
817}
818
819static int fuse_dev_release(struct inode *inode, struct file *file)
820{
821 struct fuse_conn *fc;
822
823 spin_lock(&fuse_lock);
824 fc = file->private_data;
825 if (fc) {
826 fc->connected = 0;
827 end_requests(fc, &fc->pending);
828 end_requests(fc, &fc->processing);
829 fuse_release_conn(fc);
830 }
831 spin_unlock(&fuse_lock);
832 return 0;
833}
834
835struct file_operations fuse_dev_operations = {
836 .owner = THIS_MODULE,
837 .llseek = no_llseek,
838 .read = fuse_dev_read,
839 .readv = fuse_dev_readv,
840 .write = fuse_dev_write,
841 .writev = fuse_dev_writev,
842 .poll = fuse_dev_poll,
843 .release = fuse_dev_release,
844};
845
846static struct miscdevice fuse_miscdevice = {
847 .minor = FUSE_MINOR,
848 .name = "fuse",
849 .fops = &fuse_dev_operations,
850};
851
852int __init fuse_dev_init(void)
853{
854 int err = -ENOMEM;
855 fuse_req_cachep = kmem_cache_create("fuse_request",
856 sizeof(struct fuse_req),
857 0, 0, NULL, NULL);
858 if (!fuse_req_cachep)
859 goto out;
860
861 err = misc_register(&fuse_miscdevice);
862 if (err)
863 goto out_cache_clean;
864
865 return 0;
866
867 out_cache_clean:
868 kmem_cache_destroy(fuse_req_cachep);
869 out:
870 return err;
871}
872
873void fuse_dev_cleanup(void)
874{
875 misc_deregister(&fuse_miscdevice);
876 kmem_cache_destroy(fuse_req_cachep);
877}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
new file mode 100644
index 000000000000..e79e49b3eec7
--- /dev/null
+++ b/fs/fuse/dir.c
@@ -0,0 +1,982 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/file.h>
13#include <linux/gfp.h>
14#include <linux/sched.h>
15#include <linux/namei.h>
16
17static inline unsigned long time_to_jiffies(unsigned long sec,
18 unsigned long nsec)
19{
20 struct timespec ts = {sec, nsec};
21 return jiffies + timespec_to_jiffies(&ts);
22}
23
24static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
25 struct dentry *entry,
26 struct fuse_entry_out *outarg)
27{
28 req->in.h.opcode = FUSE_LOOKUP;
29 req->in.h.nodeid = get_node_id(dir);
30 req->inode = dir;
31 req->in.numargs = 1;
32 req->in.args[0].size = entry->d_name.len + 1;
33 req->in.args[0].value = entry->d_name.name;
34 req->out.numargs = 1;
35 req->out.args[0].size = sizeof(struct fuse_entry_out);
36 req->out.args[0].value = outarg;
37}
38
39static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
40{
41 if (!entry->d_inode || is_bad_inode(entry->d_inode))
42 return 0;
43 else if (time_after(jiffies, entry->d_time)) {
44 int err;
45 struct fuse_entry_out outarg;
46 struct inode *inode = entry->d_inode;
47 struct fuse_inode *fi = get_fuse_inode(inode);
48 struct fuse_conn *fc = get_fuse_conn(inode);
49 struct fuse_req *req = fuse_get_request(fc);
50 if (!req)
51 return 0;
52
53 fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
54 request_send(fc, req);
55 err = req->out.h.error;
56 if (!err) {
57 if (outarg.nodeid != get_node_id(inode)) {
58 fuse_send_forget(fc, req, outarg.nodeid, 1);
59 return 0;
60 }
61 fi->nlookup ++;
62 }
63 fuse_put_request(fc, req);
64 if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
65 return 0;
66
67 fuse_change_attributes(inode, &outarg.attr);
68 entry->d_time = time_to_jiffies(outarg.entry_valid,
69 outarg.entry_valid_nsec);
70 fi->i_time = time_to_jiffies(outarg.attr_valid,
71 outarg.attr_valid_nsec);
72 }
73 return 1;
74}
75
76static struct dentry_operations fuse_dentry_operations = {
77 .d_revalidate = fuse_dentry_revalidate,
78};
79
80static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
81 struct inode **inodep)
82{
83 int err;
84 struct fuse_entry_out outarg;
85 struct inode *inode = NULL;
86 struct fuse_conn *fc = get_fuse_conn(dir);
87 struct fuse_req *req;
88
89 if (entry->d_name.len > FUSE_NAME_MAX)
90 return -ENAMETOOLONG;
91
92 req = fuse_get_request(fc);
93 if (!req)
94 return -EINTR;
95
96 fuse_lookup_init(req, dir, entry, &outarg);
97 request_send(fc, req);
98 err = req->out.h.error;
99 if (!err) {
100 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
101 &outarg.attr);
102 if (!inode) {
103 fuse_send_forget(fc, req, outarg.nodeid, 1);
104 return -ENOMEM;
105 }
106 }
107 fuse_put_request(fc, req);
108 if (err && err != -ENOENT)
109 return err;
110
111 if (inode) {
112 struct fuse_inode *fi = get_fuse_inode(inode);
113 entry->d_time = time_to_jiffies(outarg.entry_valid,
114 outarg.entry_valid_nsec);
115 fi->i_time = time_to_jiffies(outarg.attr_valid,
116 outarg.attr_valid_nsec);
117 }
118
119 entry->d_op = &fuse_dentry_operations;
120 *inodep = inode;
121 return 0;
122}
123
124void fuse_invalidate_attr(struct inode *inode)
125{
126 get_fuse_inode(inode)->i_time = jiffies - 1;
127}
128
129static void fuse_invalidate_entry(struct dentry *entry)
130{
131 d_invalidate(entry);
132 entry->d_time = jiffies - 1;
133}
134
135static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
136 struct inode *dir, struct dentry *entry,
137 int mode)
138{
139 struct fuse_entry_out outarg;
140 struct inode *inode;
141 struct fuse_inode *fi;
142 int err;
143
144 req->in.h.nodeid = get_node_id(dir);
145 req->inode = dir;
146 req->out.numargs = 1;
147 req->out.args[0].size = sizeof(outarg);
148 req->out.args[0].value = &outarg;
149 request_send(fc, req);
150 err = req->out.h.error;
151 if (err) {
152 fuse_put_request(fc, req);
153 return err;
154 }
155 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
156 &outarg.attr);
157 if (!inode) {
158 fuse_send_forget(fc, req, outarg.nodeid, 1);
159 return -ENOMEM;
160 }
161 fuse_put_request(fc, req);
162
163 /* Don't allow userspace to do really stupid things... */
164 if ((inode->i_mode ^ mode) & S_IFMT) {
165 iput(inode);
166 return -EIO;
167 }
168
169 entry->d_time = time_to_jiffies(outarg.entry_valid,
170 outarg.entry_valid_nsec);
171
172 fi = get_fuse_inode(inode);
173 fi->i_time = time_to_jiffies(outarg.attr_valid,
174 outarg.attr_valid_nsec);
175
176 d_instantiate(entry, inode);
177 fuse_invalidate_attr(dir);
178 return 0;
179}
180
181static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
182 dev_t rdev)
183{
184 struct fuse_mknod_in inarg;
185 struct fuse_conn *fc = get_fuse_conn(dir);
186 struct fuse_req *req = fuse_get_request(fc);
187 if (!req)
188 return -EINTR;
189
190 memset(&inarg, 0, sizeof(inarg));
191 inarg.mode = mode;
192 inarg.rdev = new_encode_dev(rdev);
193 req->in.h.opcode = FUSE_MKNOD;
194 req->in.numargs = 2;
195 req->in.args[0].size = sizeof(inarg);
196 req->in.args[0].value = &inarg;
197 req->in.args[1].size = entry->d_name.len + 1;
198 req->in.args[1].value = entry->d_name.name;
199 return create_new_entry(fc, req, dir, entry, mode);
200}
201
202static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
203 struct nameidata *nd)
204{
205 return fuse_mknod(dir, entry, mode, 0);
206}
207
208static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
209{
210 struct fuse_mkdir_in inarg;
211 struct fuse_conn *fc = get_fuse_conn(dir);
212 struct fuse_req *req = fuse_get_request(fc);
213 if (!req)
214 return -EINTR;
215
216 memset(&inarg, 0, sizeof(inarg));
217 inarg.mode = mode;
218 req->in.h.opcode = FUSE_MKDIR;
219 req->in.numargs = 2;
220 req->in.args[0].size = sizeof(inarg);
221 req->in.args[0].value = &inarg;
222 req->in.args[1].size = entry->d_name.len + 1;
223 req->in.args[1].value = entry->d_name.name;
224 return create_new_entry(fc, req, dir, entry, S_IFDIR);
225}
226
227static int fuse_symlink(struct inode *dir, struct dentry *entry,
228 const char *link)
229{
230 struct fuse_conn *fc = get_fuse_conn(dir);
231 unsigned len = strlen(link) + 1;
232 struct fuse_req *req;
233
234 if (len > FUSE_SYMLINK_MAX)
235 return -ENAMETOOLONG;
236
237 req = fuse_get_request(fc);
238 if (!req)
239 return -EINTR;
240
241 req->in.h.opcode = FUSE_SYMLINK;
242 req->in.numargs = 2;
243 req->in.args[0].size = entry->d_name.len + 1;
244 req->in.args[0].value = entry->d_name.name;
245 req->in.args[1].size = len;
246 req->in.args[1].value = link;
247 return create_new_entry(fc, req, dir, entry, S_IFLNK);
248}
249
250static int fuse_unlink(struct inode *dir, struct dentry *entry)
251{
252 int err;
253 struct fuse_conn *fc = get_fuse_conn(dir);
254 struct fuse_req *req = fuse_get_request(fc);
255 if (!req)
256 return -EINTR;
257
258 req->in.h.opcode = FUSE_UNLINK;
259 req->in.h.nodeid = get_node_id(dir);
260 req->inode = dir;
261 req->in.numargs = 1;
262 req->in.args[0].size = entry->d_name.len + 1;
263 req->in.args[0].value = entry->d_name.name;
264 request_send(fc, req);
265 err = req->out.h.error;
266 fuse_put_request(fc, req);
267 if (!err) {
268 struct inode *inode = entry->d_inode;
269
270 /* Set nlink to zero so the inode can be cleared, if
271 the inode does have more links this will be
272 discovered at the next lookup/getattr */
273 inode->i_nlink = 0;
274 fuse_invalidate_attr(inode);
275 fuse_invalidate_attr(dir);
276 } else if (err == -EINTR)
277 fuse_invalidate_entry(entry);
278 return err;
279}
280
281static int fuse_rmdir(struct inode *dir, struct dentry *entry)
282{
283 int err;
284 struct fuse_conn *fc = get_fuse_conn(dir);
285 struct fuse_req *req = fuse_get_request(fc);
286 if (!req)
287 return -EINTR;
288
289 req->in.h.opcode = FUSE_RMDIR;
290 req->in.h.nodeid = get_node_id(dir);
291 req->inode = dir;
292 req->in.numargs = 1;
293 req->in.args[0].size = entry->d_name.len + 1;
294 req->in.args[0].value = entry->d_name.name;
295 request_send(fc, req);
296 err = req->out.h.error;
297 fuse_put_request(fc, req);
298 if (!err) {
299 entry->d_inode->i_nlink = 0;
300 fuse_invalidate_attr(dir);
301 } else if (err == -EINTR)
302 fuse_invalidate_entry(entry);
303 return err;
304}
305
306static int fuse_rename(struct inode *olddir, struct dentry *oldent,
307 struct inode *newdir, struct dentry *newent)
308{
309 int err;
310 struct fuse_rename_in inarg;
311 struct fuse_conn *fc = get_fuse_conn(olddir);
312 struct fuse_req *req = fuse_get_request(fc);
313 if (!req)
314 return -EINTR;
315
316 memset(&inarg, 0, sizeof(inarg));
317 inarg.newdir = get_node_id(newdir);
318 req->in.h.opcode = FUSE_RENAME;
319 req->in.h.nodeid = get_node_id(olddir);
320 req->inode = olddir;
321 req->inode2 = newdir;
322 req->in.numargs = 3;
323 req->in.args[0].size = sizeof(inarg);
324 req->in.args[0].value = &inarg;
325 req->in.args[1].size = oldent->d_name.len + 1;
326 req->in.args[1].value = oldent->d_name.name;
327 req->in.args[2].size = newent->d_name.len + 1;
328 req->in.args[2].value = newent->d_name.name;
329 request_send(fc, req);
330 err = req->out.h.error;
331 fuse_put_request(fc, req);
332 if (!err) {
333 fuse_invalidate_attr(olddir);
334 if (olddir != newdir)
335 fuse_invalidate_attr(newdir);
336 } else if (err == -EINTR) {
337 /* If request was interrupted, DEITY only knows if the
338 rename actually took place. If the invalidation
339 fails (e.g. some process has CWD under the renamed
340 directory), then there can be inconsistency between
341 the dcache and the real filesystem. Tough luck. */
342 fuse_invalidate_entry(oldent);
343 if (newent->d_inode)
344 fuse_invalidate_entry(newent);
345 }
346
347 return err;
348}
349
350static int fuse_link(struct dentry *entry, struct inode *newdir,
351 struct dentry *newent)
352{
353 int err;
354 struct fuse_link_in inarg;
355 struct inode *inode = entry->d_inode;
356 struct fuse_conn *fc = get_fuse_conn(inode);
357 struct fuse_req *req = fuse_get_request(fc);
358 if (!req)
359 return -EINTR;
360
361 memset(&inarg, 0, sizeof(inarg));
362 inarg.oldnodeid = get_node_id(inode);
363 req->in.h.opcode = FUSE_LINK;
364 req->inode2 = inode;
365 req->in.numargs = 2;
366 req->in.args[0].size = sizeof(inarg);
367 req->in.args[0].value = &inarg;
368 req->in.args[1].size = newent->d_name.len + 1;
369 req->in.args[1].value = newent->d_name.name;
370 err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
371 /* Contrary to "normal" filesystems it can happen that link
372 makes two "logical" inodes point to the same "physical"
373 inode. We invalidate the attributes of the old one, so it
374 will reflect changes in the backing inode (link count,
375 etc.)
376 */
377 if (!err || err == -EINTR)
378 fuse_invalidate_attr(inode);
379 return err;
380}
381
382int fuse_do_getattr(struct inode *inode)
383{
384 int err;
385 struct fuse_attr_out arg;
386 struct fuse_conn *fc = get_fuse_conn(inode);
387 struct fuse_req *req = fuse_get_request(fc);
388 if (!req)
389 return -EINTR;
390
391 req->in.h.opcode = FUSE_GETATTR;
392 req->in.h.nodeid = get_node_id(inode);
393 req->inode = inode;
394 req->out.numargs = 1;
395 req->out.args[0].size = sizeof(arg);
396 req->out.args[0].value = &arg;
397 request_send(fc, req);
398 err = req->out.h.error;
399 fuse_put_request(fc, req);
400 if (!err) {
401 if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) {
402 make_bad_inode(inode);
403 err = -EIO;
404 } else {
405 struct fuse_inode *fi = get_fuse_inode(inode);
406 fuse_change_attributes(inode, &arg.attr);
407 fi->i_time = time_to_jiffies(arg.attr_valid,
408 arg.attr_valid_nsec);
409 }
410 }
411 return err;
412}
413
414/*
415 * Calling into a user-controlled filesystem gives the filesystem
416 * daemon ptrace-like capabilities over the requester process. This
417 * means, that the filesystem daemon is able to record the exact
418 * filesystem operations performed, and can also control the behavior
419 * of the requester process in otherwise impossible ways. For example
420 * it can delay the operation for arbitrary length of time allowing
421 * DoS against the requester.
422 *
423 * For this reason only those processes can call into the filesystem,
424 * for which the owner of the mount has ptrace privilege. This
425 * excludes processes started by other users, suid or sgid processes.
426 */
427static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
428{
429 if (fc->flags & FUSE_ALLOW_OTHER)
430 return 1;
431
432 if (task->euid == fc->user_id &&
433 task->suid == fc->user_id &&
434 task->uid == fc->user_id &&
435 task->egid == fc->group_id &&
436 task->sgid == fc->group_id &&
437 task->gid == fc->group_id)
438 return 1;
439
440 return 0;
441}
442
443static int fuse_revalidate(struct dentry *entry)
444{
445 struct inode *inode = entry->d_inode;
446 struct fuse_inode *fi = get_fuse_inode(inode);
447 struct fuse_conn *fc = get_fuse_conn(inode);
448
449 if (!fuse_allow_task(fc, current))
450 return -EACCES;
451 if (get_node_id(inode) != FUSE_ROOT_ID &&
452 time_before_eq(jiffies, fi->i_time))
453 return 0;
454
455 return fuse_do_getattr(inode);
456}
457
458static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
459{
460 struct fuse_conn *fc = get_fuse_conn(inode);
461
462 if (!fuse_allow_task(fc, current))
463 return -EACCES;
464 else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
465 int err = generic_permission(inode, mask, NULL);
466
467 /* If permission is denied, try to refresh file
468 attributes. This is also needed, because the root
469 node will at first have no permissions */
470 if (err == -EACCES) {
471 err = fuse_do_getattr(inode);
472 if (!err)
473 err = generic_permission(inode, mask, NULL);
474 }
475
476 /* FIXME: Need some mechanism to revoke permissions:
477 currently if the filesystem suddenly changes the
478 file mode, we will not be informed about it, and
479 continue to allow access to the file/directory.
480
481 This is actually not so grave, since the user can
482 simply keep access to the file/directory anyway by
483 keeping it open... */
484
485 return err;
486 } else {
487 int mode = inode->i_mode;
488 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
489 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
490 return -EROFS;
491 if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO))
492 return -EACCES;
493 return 0;
494 }
495}
496
497static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
498 void *dstbuf, filldir_t filldir)
499{
500 while (nbytes >= FUSE_NAME_OFFSET) {
501 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
502 size_t reclen = FUSE_DIRENT_SIZE(dirent);
503 int over;
504 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
505 return -EIO;
506 if (reclen > nbytes)
507 break;
508
509 over = filldir(dstbuf, dirent->name, dirent->namelen,
510 file->f_pos, dirent->ino, dirent->type);
511 if (over)
512 break;
513
514 buf += reclen;
515 nbytes -= reclen;
516 file->f_pos = dirent->off;
517 }
518
519 return 0;
520}
521
522static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file,
523 struct inode *inode, loff_t pos,
524 size_t count)
525{
526 return fuse_send_read_common(req, file, inode, pos, count, 1);
527}
528
529static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
530{
531 int err;
532 size_t nbytes;
533 struct page *page;
534 struct inode *inode = file->f_dentry->d_inode;
535 struct fuse_conn *fc = get_fuse_conn(inode);
536 struct fuse_req *req = fuse_get_request(fc);
537 if (!req)
538 return -EINTR;
539
540 page = alloc_page(GFP_KERNEL);
541 if (!page) {
542 fuse_put_request(fc, req);
543 return -ENOMEM;
544 }
545 req->num_pages = 1;
546 req->pages[0] = page;
547 nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE);
548 err = req->out.h.error;
549 fuse_put_request(fc, req);
550 if (!err)
551 err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
552 filldir);
553
554 __free_page(page);
555 fuse_invalidate_attr(inode); /* atime changed */
556 return err;
557}
558
559static char *read_link(struct dentry *dentry)
560{
561 struct inode *inode = dentry->d_inode;
562 struct fuse_conn *fc = get_fuse_conn(inode);
563 struct fuse_req *req = fuse_get_request(fc);
564 char *link;
565
566 if (!req)
567 return ERR_PTR(-EINTR);
568
569 link = (char *) __get_free_page(GFP_KERNEL);
570 if (!link) {
571 link = ERR_PTR(-ENOMEM);
572 goto out;
573 }
574 req->in.h.opcode = FUSE_READLINK;
575 req->in.h.nodeid = get_node_id(inode);
576 req->inode = inode;
577 req->out.argvar = 1;
578 req->out.numargs = 1;
579 req->out.args[0].size = PAGE_SIZE - 1;
580 req->out.args[0].value = link;
581 request_send(fc, req);
582 if (req->out.h.error) {
583 free_page((unsigned long) link);
584 link = ERR_PTR(req->out.h.error);
585 } else
586 link[req->out.args[0].size] = '\0';
587 out:
588 fuse_put_request(fc, req);
589 fuse_invalidate_attr(inode); /* atime changed */
590 return link;
591}
592
593static void free_link(char *link)
594{
595 if (!IS_ERR(link))
596 free_page((unsigned long) link);
597}
598
599static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
600{
601 nd_set_link(nd, read_link(dentry));
602 return NULL;
603}
604
605static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
606{
607 free_link(nd_get_link(nd));
608}
609
610static int fuse_dir_open(struct inode *inode, struct file *file)
611{
612 return fuse_open_common(inode, file, 1);
613}
614
615static int fuse_dir_release(struct inode *inode, struct file *file)
616{
617 return fuse_release_common(inode, file, 1);
618}
619
620static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
621{
622 /* nfsd can call this with no file */
623 return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
624}
625
626static unsigned iattr_to_fattr(struct iattr *iattr, struct fuse_attr *fattr)
627{
628 unsigned ivalid = iattr->ia_valid;
629 unsigned fvalid = 0;
630
631 memset(fattr, 0, sizeof(*fattr));
632
633 if (ivalid & ATTR_MODE)
634 fvalid |= FATTR_MODE, fattr->mode = iattr->ia_mode;
635 if (ivalid & ATTR_UID)
636 fvalid |= FATTR_UID, fattr->uid = iattr->ia_uid;
637 if (ivalid & ATTR_GID)
638 fvalid |= FATTR_GID, fattr->gid = iattr->ia_gid;
639 if (ivalid & ATTR_SIZE)
640 fvalid |= FATTR_SIZE, fattr->size = iattr->ia_size;
641 /* You can only _set_ these together (they may change by themselves) */
642 if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) {
643 fvalid |= FATTR_ATIME | FATTR_MTIME;
644 fattr->atime = iattr->ia_atime.tv_sec;
645 fattr->mtime = iattr->ia_mtime.tv_sec;
646 }
647
648 return fvalid;
649}
650
651static int fuse_setattr(struct dentry *entry, struct iattr *attr)
652{
653 struct inode *inode = entry->d_inode;
654 struct fuse_conn *fc = get_fuse_conn(inode);
655 struct fuse_inode *fi = get_fuse_inode(inode);
656 struct fuse_req *req;
657 struct fuse_setattr_in inarg;
658 struct fuse_attr_out outarg;
659 int err;
660 int is_truncate = 0;
661
662 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
663 err = inode_change_ok(inode, attr);
664 if (err)
665 return err;
666 }
667
668 if (attr->ia_valid & ATTR_SIZE) {
669 unsigned long limit;
670 is_truncate = 1;
671 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
672 if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
673 send_sig(SIGXFSZ, current, 0);
674 return -EFBIG;
675 }
676 }
677
678 req = fuse_get_request(fc);
679 if (!req)
680 return -EINTR;
681
682 memset(&inarg, 0, sizeof(inarg));
683 inarg.valid = iattr_to_fattr(attr, &inarg.attr);
684 req->in.h.opcode = FUSE_SETATTR;
685 req->in.h.nodeid = get_node_id(inode);
686 req->inode = inode;
687 req->in.numargs = 1;
688 req->in.args[0].size = sizeof(inarg);
689 req->in.args[0].value = &inarg;
690 req->out.numargs = 1;
691 req->out.args[0].size = sizeof(outarg);
692 req->out.args[0].value = &outarg;
693 request_send(fc, req);
694 err = req->out.h.error;
695 fuse_put_request(fc, req);
696 if (!err) {
697 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
698 make_bad_inode(inode);
699 err = -EIO;
700 } else {
701 if (is_truncate) {
702 loff_t origsize = i_size_read(inode);
703 i_size_write(inode, outarg.attr.size);
704 if (origsize > outarg.attr.size)
705 vmtruncate(inode, outarg.attr.size);
706 }
707 fuse_change_attributes(inode, &outarg.attr);
708 fi->i_time = time_to_jiffies(outarg.attr_valid,
709 outarg.attr_valid_nsec);
710 }
711 } else if (err == -EINTR)
712 fuse_invalidate_attr(inode);
713
714 return err;
715}
716
717static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
718 struct kstat *stat)
719{
720 struct inode *inode = entry->d_inode;
721 int err = fuse_revalidate(entry);
722 if (!err)
723 generic_fillattr(inode, stat);
724
725 return err;
726}
727
728static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
729 struct nameidata *nd)
730{
731 struct inode *inode;
732 int err = fuse_lookup_iget(dir, entry, &inode);
733 if (err)
734 return ERR_PTR(err);
735 if (inode && S_ISDIR(inode->i_mode)) {
736 /* Don't allow creating an alias to a directory */
737 struct dentry *alias = d_find_alias(inode);
738 if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
739 dput(alias);
740 iput(inode);
741 return ERR_PTR(-EIO);
742 }
743 }
744 return d_splice_alias(inode, entry);
745}
746
747static int fuse_setxattr(struct dentry *entry, const char *name,
748 const void *value, size_t size, int flags)
749{
750 struct inode *inode = entry->d_inode;
751 struct fuse_conn *fc = get_fuse_conn(inode);
752 struct fuse_req *req;
753 struct fuse_setxattr_in inarg;
754 int err;
755
756 if (size > FUSE_XATTR_SIZE_MAX)
757 return -E2BIG;
758
759 if (fc->no_setxattr)
760 return -EOPNOTSUPP;
761
762 req = fuse_get_request(fc);
763 if (!req)
764 return -EINTR;
765
766 memset(&inarg, 0, sizeof(inarg));
767 inarg.size = size;
768 inarg.flags = flags;
769 req->in.h.opcode = FUSE_SETXATTR;
770 req->in.h.nodeid = get_node_id(inode);
771 req->inode = inode;
772 req->in.numargs = 3;
773 req->in.args[0].size = sizeof(inarg);
774 req->in.args[0].value = &inarg;
775 req->in.args[1].size = strlen(name) + 1;
776 req->in.args[1].value = name;
777 req->in.args[2].size = size;
778 req->in.args[2].value = value;
779 request_send(fc, req);
780 err = req->out.h.error;
781 fuse_put_request(fc, req);
782 if (err == -ENOSYS) {
783 fc->no_setxattr = 1;
784 err = -EOPNOTSUPP;
785 }
786 return err;
787}
788
789static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
790 void *value, size_t size)
791{
792 struct inode *inode = entry->d_inode;
793 struct fuse_conn *fc = get_fuse_conn(inode);
794 struct fuse_req *req;
795 struct fuse_getxattr_in inarg;
796 struct fuse_getxattr_out outarg;
797 ssize_t ret;
798
799 if (fc->no_getxattr)
800 return -EOPNOTSUPP;
801
802 req = fuse_get_request(fc);
803 if (!req)
804 return -EINTR;
805
806 memset(&inarg, 0, sizeof(inarg));
807 inarg.size = size;
808 req->in.h.opcode = FUSE_GETXATTR;
809 req->in.h.nodeid = get_node_id(inode);
810 req->inode = inode;
811 req->in.numargs = 2;
812 req->in.args[0].size = sizeof(inarg);
813 req->in.args[0].value = &inarg;
814 req->in.args[1].size = strlen(name) + 1;
815 req->in.args[1].value = name;
816 /* This is really two different operations rolled into one */
817 req->out.numargs = 1;
818 if (size) {
819 req->out.argvar = 1;
820 req->out.args[0].size = size;
821 req->out.args[0].value = value;
822 } else {
823 req->out.args[0].size = sizeof(outarg);
824 req->out.args[0].value = &outarg;
825 }
826 request_send(fc, req);
827 ret = req->out.h.error;
828 if (!ret)
829 ret = size ? req->out.args[0].size : outarg.size;
830 else {
831 if (ret == -ENOSYS) {
832 fc->no_getxattr = 1;
833 ret = -EOPNOTSUPP;
834 }
835 }
836 fuse_put_request(fc, req);
837 return ret;
838}
839
840static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
841{
842 struct inode *inode = entry->d_inode;
843 struct fuse_conn *fc = get_fuse_conn(inode);
844 struct fuse_req *req;
845 struct fuse_getxattr_in inarg;
846 struct fuse_getxattr_out outarg;
847 ssize_t ret;
848
849 if (fc->no_listxattr)
850 return -EOPNOTSUPP;
851
852 req = fuse_get_request(fc);
853 if (!req)
854 return -EINTR;
855
856 memset(&inarg, 0, sizeof(inarg));
857 inarg.size = size;
858 req->in.h.opcode = FUSE_LISTXATTR;
859 req->in.h.nodeid = get_node_id(inode);
860 req->inode = inode;
861 req->in.numargs = 1;
862 req->in.args[0].size = sizeof(inarg);
863 req->in.args[0].value = &inarg;
864 /* This is really two different operations rolled into one */
865 req->out.numargs = 1;
866 if (size) {
867 req->out.argvar = 1;
868 req->out.args[0].size = size;
869 req->out.args[0].value = list;
870 } else {
871 req->out.args[0].size = sizeof(outarg);
872 req->out.args[0].value = &outarg;
873 }
874 request_send(fc, req);
875 ret = req->out.h.error;
876 if (!ret)
877 ret = size ? req->out.args[0].size : outarg.size;
878 else {
879 if (ret == -ENOSYS) {
880 fc->no_listxattr = 1;
881 ret = -EOPNOTSUPP;
882 }
883 }
884 fuse_put_request(fc, req);
885 return ret;
886}
887
888static int fuse_removexattr(struct dentry *entry, const char *name)
889{
890 struct inode *inode = entry->d_inode;
891 struct fuse_conn *fc = get_fuse_conn(inode);
892 struct fuse_req *req;
893 int err;
894
895 if (fc->no_removexattr)
896 return -EOPNOTSUPP;
897
898 req = fuse_get_request(fc);
899 if (!req)
900 return -EINTR;
901
902 req->in.h.opcode = FUSE_REMOVEXATTR;
903 req->in.h.nodeid = get_node_id(inode);
904 req->inode = inode;
905 req->in.numargs = 1;
906 req->in.args[0].size = strlen(name) + 1;
907 req->in.args[0].value = name;
908 request_send(fc, req);
909 err = req->out.h.error;
910 fuse_put_request(fc, req);
911 if (err == -ENOSYS) {
912 fc->no_removexattr = 1;
913 err = -EOPNOTSUPP;
914 }
915 return err;
916}
917
918static struct inode_operations fuse_dir_inode_operations = {
919 .lookup = fuse_lookup,
920 .mkdir = fuse_mkdir,
921 .symlink = fuse_symlink,
922 .unlink = fuse_unlink,
923 .rmdir = fuse_rmdir,
924 .rename = fuse_rename,
925 .link = fuse_link,
926 .setattr = fuse_setattr,
927 .create = fuse_create,
928 .mknod = fuse_mknod,
929 .permission = fuse_permission,
930 .getattr = fuse_getattr,
931 .setxattr = fuse_setxattr,
932 .getxattr = fuse_getxattr,
933 .listxattr = fuse_listxattr,
934 .removexattr = fuse_removexattr,
935};
936
937static struct file_operations fuse_dir_operations = {
938 .llseek = generic_file_llseek,
939 .read = generic_read_dir,
940 .readdir = fuse_readdir,
941 .open = fuse_dir_open,
942 .release = fuse_dir_release,
943 .fsync = fuse_dir_fsync,
944};
945
946static struct inode_operations fuse_common_inode_operations = {
947 .setattr = fuse_setattr,
948 .permission = fuse_permission,
949 .getattr = fuse_getattr,
950 .setxattr = fuse_setxattr,
951 .getxattr = fuse_getxattr,
952 .listxattr = fuse_listxattr,
953 .removexattr = fuse_removexattr,
954};
955
956static struct inode_operations fuse_symlink_inode_operations = {
957 .setattr = fuse_setattr,
958 .follow_link = fuse_follow_link,
959 .put_link = fuse_put_link,
960 .readlink = generic_readlink,
961 .getattr = fuse_getattr,
962 .setxattr = fuse_setxattr,
963 .getxattr = fuse_getxattr,
964 .listxattr = fuse_listxattr,
965 .removexattr = fuse_removexattr,
966};
967
968void fuse_init_common(struct inode *inode)
969{
970 inode->i_op = &fuse_common_inode_operations;
971}
972
973void fuse_init_dir(struct inode *inode)
974{
975 inode->i_op = &fuse_dir_inode_operations;
976 inode->i_fop = &fuse_dir_operations;
977}
978
979void fuse_init_symlink(struct inode *inode)
980{
981 inode->i_op = &fuse_symlink_inode_operations;
982}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
new file mode 100644
index 000000000000..6454022b0536
--- /dev/null
+++ b/fs/fuse/file.c
@@ -0,0 +1,555 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/slab.h>
13#include <linux/kernel.h>
14
15static struct file_operations fuse_direct_io_file_operations;
16
17int fuse_open_common(struct inode *inode, struct file *file, int isdir)
18{
19 struct fuse_conn *fc = get_fuse_conn(inode);
20 struct fuse_req *req;
21 struct fuse_open_in inarg;
22 struct fuse_open_out outarg;
23 struct fuse_file *ff;
24 int err;
25
26 err = generic_file_open(inode, file);
27 if (err)
28 return err;
29
30 /* If opening the root node, no lookup has been performed on
31 it, so the attributes must be refreshed */
32 if (get_node_id(inode) == FUSE_ROOT_ID) {
33 int err = fuse_do_getattr(inode);
34 if (err)
35 return err;
36 }
37
38 req = fuse_get_request(fc);
39 if (!req)
40 return -EINTR;
41
42 err = -ENOMEM;
43 ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
44 if (!ff)
45 goto out_put_request;
46
47 ff->release_req = fuse_request_alloc();
48 if (!ff->release_req) {
49 kfree(ff);
50 goto out_put_request;
51 }
52
53 memset(&inarg, 0, sizeof(inarg));
54 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
55 req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
56 req->in.h.nodeid = get_node_id(inode);
57 req->inode = inode;
58 req->in.numargs = 1;
59 req->in.args[0].size = sizeof(inarg);
60 req->in.args[0].value = &inarg;
61 req->out.numargs = 1;
62 req->out.args[0].size = sizeof(outarg);
63 req->out.args[0].value = &outarg;
64 request_send(fc, req);
65 err = req->out.h.error;
66 if (err) {
67 fuse_request_free(ff->release_req);
68 kfree(ff);
69 } else {
70 if (!isdir && (outarg.open_flags & FOPEN_DIRECT_IO))
71 file->f_op = &fuse_direct_io_file_operations;
72 if (!(outarg.open_flags & FOPEN_KEEP_CACHE))
73 invalidate_inode_pages(inode->i_mapping);
74 ff->fh = outarg.fh;
75 file->private_data = ff;
76 }
77
78 out_put_request:
79 fuse_put_request(fc, req);
80 return err;
81}
82
83int fuse_release_common(struct inode *inode, struct file *file, int isdir)
84{
85 struct fuse_conn *fc = get_fuse_conn(inode);
86 struct fuse_file *ff = file->private_data;
87 struct fuse_req *req = ff->release_req;
88 struct fuse_release_in *inarg = &req->misc.release_in;
89
90 inarg->fh = ff->fh;
91 inarg->flags = file->f_flags & ~O_EXCL;
92 req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
93 req->in.h.nodeid = get_node_id(inode);
94 req->inode = inode;
95 req->in.numargs = 1;
96 req->in.args[0].size = sizeof(struct fuse_release_in);
97 req->in.args[0].value = inarg;
98 request_send_background(fc, req);
99 kfree(ff);
100
101 /* Return value is ignored by VFS */
102 return 0;
103}
104
105static int fuse_open(struct inode *inode, struct file *file)
106{
107 return fuse_open_common(inode, file, 0);
108}
109
110static int fuse_release(struct inode *inode, struct file *file)
111{
112 return fuse_release_common(inode, file, 0);
113}
114
115static int fuse_flush(struct file *file)
116{
117 struct inode *inode = file->f_dentry->d_inode;
118 struct fuse_conn *fc = get_fuse_conn(inode);
119 struct fuse_file *ff = file->private_data;
120 struct fuse_req *req;
121 struct fuse_flush_in inarg;
122 int err;
123
124 if (fc->no_flush)
125 return 0;
126
127 req = fuse_get_request(fc);
128 if (!req)
129 return -EINTR;
130
131 memset(&inarg, 0, sizeof(inarg));
132 inarg.fh = ff->fh;
133 req->in.h.opcode = FUSE_FLUSH;
134 req->in.h.nodeid = get_node_id(inode);
135 req->inode = inode;
136 req->file = file;
137 req->in.numargs = 1;
138 req->in.args[0].size = sizeof(inarg);
139 req->in.args[0].value = &inarg;
140 request_send(fc, req);
141 err = req->out.h.error;
142 fuse_put_request(fc, req);
143 if (err == -ENOSYS) {
144 fc->no_flush = 1;
145 err = 0;
146 }
147 return err;
148}
149
150int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
151 int isdir)
152{
153 struct inode *inode = de->d_inode;
154 struct fuse_conn *fc = get_fuse_conn(inode);
155 struct fuse_file *ff = file->private_data;
156 struct fuse_req *req;
157 struct fuse_fsync_in inarg;
158 int err;
159
160 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
161 return 0;
162
163 req = fuse_get_request(fc);
164 if (!req)
165 return -EINTR;
166
167 memset(&inarg, 0, sizeof(inarg));
168 inarg.fh = ff->fh;
169 inarg.fsync_flags = datasync ? 1 : 0;
170 req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
171 req->in.h.nodeid = get_node_id(inode);
172 req->inode = inode;
173 req->file = file;
174 req->in.numargs = 1;
175 req->in.args[0].size = sizeof(inarg);
176 req->in.args[0].value = &inarg;
177 request_send(fc, req);
178 err = req->out.h.error;
179 fuse_put_request(fc, req);
180 if (err == -ENOSYS) {
181 if (isdir)
182 fc->no_fsyncdir = 1;
183 else
184 fc->no_fsync = 1;
185 err = 0;
186 }
187 return err;
188}
189
190static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
191{
192 return fuse_fsync_common(file, de, datasync, 0);
193}
194
195size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
196 struct inode *inode, loff_t pos, size_t count,
197 int isdir)
198{
199 struct fuse_conn *fc = get_fuse_conn(inode);
200 struct fuse_file *ff = file->private_data;
201 struct fuse_read_in inarg;
202
203 memset(&inarg, 0, sizeof(struct fuse_read_in));
204 inarg.fh = ff->fh;
205 inarg.offset = pos;
206 inarg.size = count;
207 req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ;
208 req->in.h.nodeid = get_node_id(inode);
209 req->inode = inode;
210 req->file = file;
211 req->in.numargs = 1;
212 req->in.args[0].size = sizeof(struct fuse_read_in);
213 req->in.args[0].value = &inarg;
214 req->out.argpages = 1;
215 req->out.argvar = 1;
216 req->out.numargs = 1;
217 req->out.args[0].size = count;
218 request_send(fc, req);
219 return req->out.args[0].size;
220}
221
222static inline size_t fuse_send_read(struct fuse_req *req, struct file *file,
223 struct inode *inode, loff_t pos,
224 size_t count)
225{
226 return fuse_send_read_common(req, file, inode, pos, count, 0);
227}
228
229static int fuse_readpage(struct file *file, struct page *page)
230{
231 struct inode *inode = page->mapping->host;
232 struct fuse_conn *fc = get_fuse_conn(inode);
233 loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT;
234 struct fuse_req *req = fuse_get_request(fc);
235 int err = -EINTR;
236 if (!req)
237 goto out;
238
239 req->out.page_zeroing = 1;
240 req->num_pages = 1;
241 req->pages[0] = page;
242 fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE);
243 err = req->out.h.error;
244 fuse_put_request(fc, req);
245 if (!err)
246 SetPageUptodate(page);
247 fuse_invalidate_attr(inode); /* atime changed */
248 out:
249 unlock_page(page);
250 return err;
251}
252
253static int fuse_send_readpages(struct fuse_req *req, struct file *file,
254 struct inode *inode)
255{
256 loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT;
257 size_t count = req->num_pages << PAGE_CACHE_SHIFT;
258 unsigned i;
259 req->out.page_zeroing = 1;
260 fuse_send_read(req, file, inode, pos, count);
261 for (i = 0; i < req->num_pages; i++) {
262 struct page *page = req->pages[i];
263 if (!req->out.h.error)
264 SetPageUptodate(page);
265 unlock_page(page);
266 }
267 return req->out.h.error;
268}
269
270struct fuse_readpages_data {
271 struct fuse_req *req;
272 struct file *file;
273 struct inode *inode;
274};
275
276static int fuse_readpages_fill(void *_data, struct page *page)
277{
278 struct fuse_readpages_data *data = _data;
279 struct fuse_req *req = data->req;
280 struct inode *inode = data->inode;
281 struct fuse_conn *fc = get_fuse_conn(inode);
282
283 if (req->num_pages &&
284 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
285 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
286 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
287 int err = fuse_send_readpages(req, data->file, inode);
288 if (err) {
289 unlock_page(page);
290 return err;
291 }
292 fuse_reset_request(req);
293 }
294 req->pages[req->num_pages] = page;
295 req->num_pages ++;
296 return 0;
297}
298
299static int fuse_readpages(struct file *file, struct address_space *mapping,
300 struct list_head *pages, unsigned nr_pages)
301{
302 struct inode *inode = mapping->host;
303 struct fuse_conn *fc = get_fuse_conn(inode);
304 struct fuse_readpages_data data;
305 int err;
306 data.file = file;
307 data.inode = inode;
308 data.req = fuse_get_request(fc);
309 if (!data.req)
310 return -EINTR;
311
312 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
313 if (!err && data.req->num_pages)
314 err = fuse_send_readpages(data.req, file, inode);
315 fuse_put_request(fc, data.req);
316 fuse_invalidate_attr(inode); /* atime changed */
317 return err;
318}
319
320static size_t fuse_send_write(struct fuse_req *req, struct file *file,
321 struct inode *inode, loff_t pos, size_t count)
322{
323 struct fuse_conn *fc = get_fuse_conn(inode);
324 struct fuse_file *ff = file->private_data;
325 struct fuse_write_in inarg;
326 struct fuse_write_out outarg;
327
328 memset(&inarg, 0, sizeof(struct fuse_write_in));
329 inarg.fh = ff->fh;
330 inarg.offset = pos;
331 inarg.size = count;
332 req->in.h.opcode = FUSE_WRITE;
333 req->in.h.nodeid = get_node_id(inode);
334 req->inode = inode;
335 req->file = file;
336 req->in.argpages = 1;
337 req->in.numargs = 2;
338 req->in.args[0].size = sizeof(struct fuse_write_in);
339 req->in.args[0].value = &inarg;
340 req->in.args[1].size = count;
341 req->out.numargs = 1;
342 req->out.args[0].size = sizeof(struct fuse_write_out);
343 req->out.args[0].value = &outarg;
344 request_send(fc, req);
345 return outarg.size;
346}
347
348static int fuse_prepare_write(struct file *file, struct page *page,
349 unsigned offset, unsigned to)
350{
351 /* No op */
352 return 0;
353}
354
355static int fuse_commit_write(struct file *file, struct page *page,
356 unsigned offset, unsigned to)
357{
358 int err;
359 size_t nres;
360 unsigned count = to - offset;
361 struct inode *inode = page->mapping->host;
362 struct fuse_conn *fc = get_fuse_conn(inode);
363 loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset;
364 struct fuse_req *req = fuse_get_request(fc);
365 if (!req)
366 return -EINTR;
367
368 req->num_pages = 1;
369 req->pages[0] = page;
370 req->page_offset = offset;
371 nres = fuse_send_write(req, file, inode, pos, count);
372 err = req->out.h.error;
373 fuse_put_request(fc, req);
374 if (!err && nres != count)
375 err = -EIO;
376 if (!err) {
377 pos += count;
378 if (pos > i_size_read(inode))
379 i_size_write(inode, pos);
380
381 if (offset == 0 && to == PAGE_CACHE_SIZE) {
382 clear_page_dirty(page);
383 SetPageUptodate(page);
384 }
385 }
386 fuse_invalidate_attr(inode);
387 return err;
388}
389
390static void fuse_release_user_pages(struct fuse_req *req, int write)
391{
392 unsigned i;
393
394 for (i = 0; i < req->num_pages; i++) {
395 struct page *page = req->pages[i];
396 if (write)
397 set_page_dirty_lock(page);
398 put_page(page);
399 }
400}
401
402static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
403 unsigned nbytes, int write)
404{
405 unsigned long user_addr = (unsigned long) buf;
406 unsigned offset = user_addr & ~PAGE_MASK;
407 int npages;
408
409 /* This doesn't work with nfsd */
410 if (!current->mm)
411 return -EPERM;
412
413 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
414 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
415 npages = min(npages, FUSE_MAX_PAGES_PER_REQ);
416 down_read(&current->mm->mmap_sem);
417 npages = get_user_pages(current, current->mm, user_addr, npages, write,
418 0, req->pages, NULL);
419 up_read(&current->mm->mmap_sem);
420 if (npages < 0)
421 return npages;
422
423 req->num_pages = npages;
424 req->page_offset = offset;
425 return 0;
426}
427
428static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
429 size_t count, loff_t *ppos, int write)
430{
431 struct inode *inode = file->f_dentry->d_inode;
432 struct fuse_conn *fc = get_fuse_conn(inode);
433 size_t nmax = write ? fc->max_write : fc->max_read;
434 loff_t pos = *ppos;
435 ssize_t res = 0;
436 struct fuse_req *req = fuse_get_request(fc);
437 if (!req)
438 return -EINTR;
439
440 while (count) {
441 size_t tmp;
442 size_t nres;
443 size_t nbytes = min(count, nmax);
444 int err = fuse_get_user_pages(req, buf, nbytes, !write);
445 if (err) {
446 res = err;
447 break;
448 }
449 tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset;
450 nbytes = min(nbytes, tmp);
451 if (write)
452 nres = fuse_send_write(req, file, inode, pos, nbytes);
453 else
454 nres = fuse_send_read(req, file, inode, pos, nbytes);
455 fuse_release_user_pages(req, !write);
456 if (req->out.h.error) {
457 if (!res)
458 res = req->out.h.error;
459 break;
460 } else if (nres > nbytes) {
461 res = -EIO;
462 break;
463 }
464 count -= nres;
465 res += nres;
466 pos += nres;
467 buf += nres;
468 if (nres != nbytes)
469 break;
470 if (count)
471 fuse_reset_request(req);
472 }
473 fuse_put_request(fc, req);
474 if (res > 0) {
475 if (write && pos > i_size_read(inode))
476 i_size_write(inode, pos);
477 *ppos = pos;
478 }
479 fuse_invalidate_attr(inode);
480
481 return res;
482}
483
484static ssize_t fuse_direct_read(struct file *file, char __user *buf,
485 size_t count, loff_t *ppos)
486{
487 return fuse_direct_io(file, buf, count, ppos, 0);
488}
489
490static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
491 size_t count, loff_t *ppos)
492{
493 struct inode *inode = file->f_dentry->d_inode;
494 ssize_t res;
495 /* Don't allow parallel writes to the same file */
496 down(&inode->i_sem);
497 res = fuse_direct_io(file, buf, count, ppos, 1);
498 up(&inode->i_sem);
499 return res;
500}
501
502static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
503{
504 if ((vma->vm_flags & VM_SHARED)) {
505 if ((vma->vm_flags & VM_WRITE))
506 return -ENODEV;
507 else
508 vma->vm_flags &= ~VM_MAYWRITE;
509 }
510 return generic_file_mmap(file, vma);
511}
512
513static int fuse_set_page_dirty(struct page *page)
514{
515 printk("fuse_set_page_dirty: should not happen\n");
516 dump_stack();
517 return 0;
518}
519
520static struct file_operations fuse_file_operations = {
521 .llseek = generic_file_llseek,
522 .read = generic_file_read,
523 .write = generic_file_write,
524 .mmap = fuse_file_mmap,
525 .open = fuse_open,
526 .flush = fuse_flush,
527 .release = fuse_release,
528 .fsync = fuse_fsync,
529 .sendfile = generic_file_sendfile,
530};
531
532static struct file_operations fuse_direct_io_file_operations = {
533 .llseek = generic_file_llseek,
534 .read = fuse_direct_read,
535 .write = fuse_direct_write,
536 .open = fuse_open,
537 .flush = fuse_flush,
538 .release = fuse_release,
539 .fsync = fuse_fsync,
540 /* no mmap and sendfile */
541};
542
543static struct address_space_operations fuse_file_aops = {
544 .readpage = fuse_readpage,
545 .prepare_write = fuse_prepare_write,
546 .commit_write = fuse_commit_write,
547 .readpages = fuse_readpages,
548 .set_page_dirty = fuse_set_page_dirty,
549};
550
551void fuse_init_file_inode(struct inode *inode)
552{
553 inode->i_fop = &fuse_file_operations;
554 inode->i_data.a_ops = &fuse_file_aops;
555}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
new file mode 100644
index 000000000000..24d761518d86
--- /dev/null
+++ b/fs/fuse/fuse_i.h
@@ -0,0 +1,451 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include <linux/fuse.h>
10#include <linux/fs.h>
11#include <linux/wait.h>
12#include <linux/list.h>
13#include <linux/spinlock.h>
14#include <linux/mm.h>
15#include <linux/backing-dev.h>
16#include <asm/semaphore.h>
17
18/** Max number of pages that can be used in a single read request */
19#define FUSE_MAX_PAGES_PER_REQ 32
20
21/** If more requests are outstanding, then the operation will block */
22#define FUSE_MAX_OUTSTANDING 10
23
24/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
25 module will check permissions based on the file mode. Otherwise no
26 permission checking is done in the kernel */
27#define FUSE_DEFAULT_PERMISSIONS (1 << 0)
28
29/** If the FUSE_ALLOW_OTHER flag is given, then not only the user
30 doing the mount will be allowed to access the filesystem */
31#define FUSE_ALLOW_OTHER (1 << 1)
32
33
34/** FUSE inode */
35struct fuse_inode {
36 /** Inode data */
37 struct inode inode;
38
39 /** Unique ID, which identifies the inode between userspace
40 * and kernel */
41 u64 nodeid;
42
43 /** Number of lookups on this inode */
44 u64 nlookup;
45
46 /** The request used for sending the FORGET message */
47 struct fuse_req *forget_req;
48
49 /** Time in jiffies until the file attributes are valid */
50 unsigned long i_time;
51};
52
53/** FUSE specific file data */
54struct fuse_file {
55 /** Request reserved for flush and release */
56 struct fuse_req *release_req;
57
58 /** File handle used by userspace */
59 u64 fh;
60};
61
62/** One input argument of a request */
63struct fuse_in_arg {
64 unsigned size;
65 const void *value;
66};
67
68/** The request input */
69struct fuse_in {
70 /** The request header */
71 struct fuse_in_header h;
72
73 /** True if the data for the last argument is in req->pages */
74 unsigned argpages:1;
75
76 /** Number of arguments */
77 unsigned numargs;
78
79 /** Array of arguments */
80 struct fuse_in_arg args[3];
81};
82
83/** One output argument of a request */
84struct fuse_arg {
85 unsigned size;
86 void *value;
87};
88
89/** The request output */
90struct fuse_out {
91 /** Header returned from userspace */
92 struct fuse_out_header h;
93
94 /** Last argument is variable length (can be shorter than
95 arg->size) */
96 unsigned argvar:1;
97
98 /** Last argument is a list of pages to copy data to */
99 unsigned argpages:1;
100
101 /** Zero partially or not copied pages */
102 unsigned page_zeroing:1;
103
104 /** Number or arguments */
105 unsigned numargs;
106
107 /** Array of arguments */
108 struct fuse_arg args[3];
109};
110
111struct fuse_req;
112struct fuse_conn;
113
114/**
115 * A request to the client
116 */
117struct fuse_req {
118 /** This can be on either unused_list, pending or processing
119 lists in fuse_conn */
120 struct list_head list;
121
122 /** Entry on the background list */
123 struct list_head bg_entry;
124
125 /** refcount */
126 atomic_t count;
127
128 /** True if the request has reply */
129 unsigned isreply:1;
130
131 /** The request is preallocated */
132 unsigned preallocated:1;
133
134 /** The request was interrupted */
135 unsigned interrupted:1;
136
137 /** Request is sent in the background */
138 unsigned background:1;
139
140 /** Data is being copied to/from the request */
141 unsigned locked:1;
142
143 /** Request has been sent to userspace */
144 unsigned sent:1;
145
146 /** The request is finished */
147 unsigned finished:1;
148
149 /** The request input */
150 struct fuse_in in;
151
152 /** The request output */
153 struct fuse_out out;
154
155 /** Used to wake up the task waiting for completion of request*/
156 wait_queue_head_t waitq;
157
158 /** Data for asynchronous requests */
159 union {
160 struct fuse_forget_in forget_in;
161 struct fuse_release_in release_in;
162 struct fuse_init_in_out init_in_out;
163 } misc;
164
165 /** page vector */
166 struct page *pages[FUSE_MAX_PAGES_PER_REQ];
167
168 /** number of pages in vector */
169 unsigned num_pages;
170
171 /** offset of data on first page */
172 unsigned page_offset;
173
174 /** Inode used in the request */
175 struct inode *inode;
176
177 /** Second inode used in the request (or NULL) */
178 struct inode *inode2;
179
180 /** File used in the request (or NULL) */
181 struct file *file;
182};
183
184/**
185 * A Fuse connection.
186 *
187 * This structure is created, when the filesystem is mounted, and is
188 * destroyed, when the client device is closed and the filesystem is
189 * unmounted.
190 */
191struct fuse_conn {
192 /** Reference count */
193 int count;
194
195 /** The user id for this mount */
196 uid_t user_id;
197
198 /** The group id for this mount */
199 gid_t group_id;
200
201 /** The fuse mount flags for this mount */
202 unsigned flags;
203
204 /** Maximum read size */
205 unsigned max_read;
206
207 /** Maximum write size */
208 unsigned max_write;
209
210 /** Readers of the connection are waiting on this */
211 wait_queue_head_t waitq;
212
213 /** The list of pending requests */
214 struct list_head pending;
215
216 /** The list of requests being processed */
217 struct list_head processing;
218
219 /** Requests put in the background (RELEASE or any other
220 interrupted request) */
221 struct list_head background;
222
223 /** Controls the maximum number of outstanding requests */
224 struct semaphore outstanding_sem;
225
226 /** This counts the number of outstanding requests if
227 outstanding_sem would go negative */
228 unsigned outstanding_debt;
229
230 /** RW semaphore for exclusion with fuse_put_super() */
231 struct rw_semaphore sbput_sem;
232
233 /** The list of unused requests */
234 struct list_head unused_list;
235
236 /** The next unique request id */
237 u64 reqctr;
238
239 /** Mount is active */
240 unsigned mounted : 1;
241
242 /** Connection established */
243 unsigned connected : 1;
244
245 /** Connection failed (version mismatch) */
246 unsigned conn_error : 1;
247
248 /** Is fsync not implemented by fs? */
249 unsigned no_fsync : 1;
250
251 /** Is fsyncdir not implemented by fs? */
252 unsigned no_fsyncdir : 1;
253
254 /** Is flush not implemented by fs? */
255 unsigned no_flush : 1;
256
257 /** Is setxattr not implemented by fs? */
258 unsigned no_setxattr : 1;
259
260 /** Is getxattr not implemented by fs? */
261 unsigned no_getxattr : 1;
262
263 /** Is listxattr not implemented by fs? */
264 unsigned no_listxattr : 1;
265
266 /** Is removexattr not implemented by fs? */
267 unsigned no_removexattr : 1;
268
269 /** Backing dev info */
270 struct backing_dev_info bdi;
271};
272
273static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb)
274{
275 return (struct fuse_conn **) &sb->s_fs_info;
276}
277
278static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
279{
280 return *get_fuse_conn_super_p(sb);
281}
282
283static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
284{
285 return get_fuse_conn_super(inode->i_sb);
286}
287
288static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
289{
290 return container_of(inode, struct fuse_inode, inode);
291}
292
293static inline u64 get_node_id(struct inode *inode)
294{
295 return get_fuse_inode(inode)->nodeid;
296}
297
298/** Device operations */
299extern struct file_operations fuse_dev_operations;
300
301/**
302 * This is the single global spinlock which protects FUSE's structures
303 *
304 * The following data is protected by this lock:
305 *
306 * - the private_data field of the device file
307 * - the s_fs_info field of the super block
308 * - unused_list, pending, processing lists in fuse_conn
309 * - background list in fuse_conn
310 * - the unique request ID counter reqctr in fuse_conn
311 * - the sb (super_block) field in fuse_conn
312 * - the file (device file) field in fuse_conn
313 */
314extern spinlock_t fuse_lock;
315
316/**
317 * Get a filled in inode
318 */
319struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
320 int generation, struct fuse_attr *attr);
321
322/**
323 * Send FORGET command
324 */
325void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
326 unsigned long nodeid, u64 nlookup);
327
328/**
329 * Send READ or READDIR request
330 */
331size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
332 struct inode *inode, loff_t pos, size_t count,
333 int isdir);
334
335/**
336 * Send OPEN or OPENDIR request
337 */
338int fuse_open_common(struct inode *inode, struct file *file, int isdir);
339
340/**
341 * Send RELEASE or RELEASEDIR request
342 */
343int fuse_release_common(struct inode *inode, struct file *file, int isdir);
344
345/**
346 * Send FSYNC or FSYNCDIR request
347 */
348int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
349 int isdir);
350
351/**
352 * Initialise file operations on a regular file
353 */
354void fuse_init_file_inode(struct inode *inode);
355
356/**
357 * Initialise inode operations on regular files and special files
358 */
359void fuse_init_common(struct inode *inode);
360
361/**
362 * Initialise inode and file operations on a directory
363 */
364void fuse_init_dir(struct inode *inode);
365
366/**
367 * Initialise inode operations on a symlink
368 */
369void fuse_init_symlink(struct inode *inode);
370
371/**
372 * Change attributes of an inode
373 */
374void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr);
375
376/**
377 * Check if the connection can be released, and if yes, then free the
378 * connection structure
379 */
380void fuse_release_conn(struct fuse_conn *fc);
381
382/**
383 * Initialize the client device
384 */
385int fuse_dev_init(void);
386
387/**
388 * Cleanup the client device
389 */
390void fuse_dev_cleanup(void);
391
392/**
393 * Allocate a request
394 */
395struct fuse_req *fuse_request_alloc(void);
396
397/**
398 * Free a request
399 */
400void fuse_request_free(struct fuse_req *req);
401
402/**
403 * Reinitialize a request, the preallocated flag is left unmodified
404 */
405void fuse_reset_request(struct fuse_req *req);
406
407/**
408 * Reserve a preallocated request
409 */
410struct fuse_req *fuse_get_request(struct fuse_conn *fc);
411
412/**
413 * Decrement reference count of a request. If count goes to zero put
414 * on unused list (preallocated) or free reqest (not preallocated).
415 */
416void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
417
418/**
419 * Send a request (synchronous)
420 */
421void request_send(struct fuse_conn *fc, struct fuse_req *req);
422
423/**
424 * Send a request with no reply
425 */
426void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
427
428/**
429 * Send a request in the background
430 */
431void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
432
433/**
434 * Release inodes and file assiciated with background request
435 */
436void fuse_release_background(struct fuse_req *req);
437
438/**
439 * Get the attributes of a file
440 */
441int fuse_do_getattr(struct inode *inode);
442
443/**
444 * Invalidate inode attributes
445 */
446void fuse_invalidate_attr(struct inode *inode);
447
448/**
449 * Send the INIT message
450 */
451void fuse_send_init(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
new file mode 100644
index 000000000000..e69a546844d0
--- /dev/null
+++ b/fs/fuse/inode.c
@@ -0,0 +1,591 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/slab.h>
13#include <linux/file.h>
14#include <linux/mount.h>
15#include <linux/seq_file.h>
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/parser.h>
19#include <linux/statfs.h>
20
21MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
22MODULE_DESCRIPTION("Filesystem in Userspace");
23MODULE_LICENSE("GPL");
24
25spinlock_t fuse_lock;
26static kmem_cache_t *fuse_inode_cachep;
27
28#define FUSE_SUPER_MAGIC 0x65735546
29
30struct fuse_mount_data {
31 int fd;
32 unsigned rootmode;
33 unsigned user_id;
34 unsigned group_id;
35 unsigned fd_present : 1;
36 unsigned rootmode_present : 1;
37 unsigned user_id_present : 1;
38 unsigned group_id_present : 1;
39 unsigned flags;
40 unsigned max_read;
41};
42
43static struct inode *fuse_alloc_inode(struct super_block *sb)
44{
45 struct inode *inode;
46 struct fuse_inode *fi;
47
48 inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL);
49 if (!inode)
50 return NULL;
51
52 fi = get_fuse_inode(inode);
53 fi->i_time = jiffies - 1;
54 fi->nodeid = 0;
55 fi->nlookup = 0;
56 fi->forget_req = fuse_request_alloc();
57 if (!fi->forget_req) {
58 kmem_cache_free(fuse_inode_cachep, inode);
59 return NULL;
60 }
61
62 return inode;
63}
64
65static void fuse_destroy_inode(struct inode *inode)
66{
67 struct fuse_inode *fi = get_fuse_inode(inode);
68 if (fi->forget_req)
69 fuse_request_free(fi->forget_req);
70 kmem_cache_free(fuse_inode_cachep, inode);
71}
72
73static void fuse_read_inode(struct inode *inode)
74{
75 /* No op */
76}
77
78void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
79 unsigned long nodeid, u64 nlookup)
80{
81 struct fuse_forget_in *inarg = &req->misc.forget_in;
82 inarg->nlookup = nlookup;
83 req->in.h.opcode = FUSE_FORGET;
84 req->in.h.nodeid = nodeid;
85 req->in.numargs = 1;
86 req->in.args[0].size = sizeof(struct fuse_forget_in);
87 req->in.args[0].value = inarg;
88 request_send_noreply(fc, req);
89}
90
91static void fuse_clear_inode(struct inode *inode)
92{
93 if (inode->i_sb->s_flags & MS_ACTIVE) {
94 struct fuse_conn *fc = get_fuse_conn(inode);
95 struct fuse_inode *fi = get_fuse_inode(inode);
96 fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
97 fi->forget_req = NULL;
98 }
99}
100
101void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
102{
103 if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
104 invalidate_inode_pages(inode->i_mapping);
105
106 inode->i_ino = attr->ino;
107 inode->i_mode = (inode->i_mode & S_IFMT) + (attr->mode & 07777);
108 inode->i_nlink = attr->nlink;
109 inode->i_uid = attr->uid;
110 inode->i_gid = attr->gid;
111 i_size_write(inode, attr->size);
112 inode->i_blksize = PAGE_CACHE_SIZE;
113 inode->i_blocks = attr->blocks;
114 inode->i_atime.tv_sec = attr->atime;
115 inode->i_atime.tv_nsec = attr->atimensec;
116 inode->i_mtime.tv_sec = attr->mtime;
117 inode->i_mtime.tv_nsec = attr->mtimensec;
118 inode->i_ctime.tv_sec = attr->ctime;
119 inode->i_ctime.tv_nsec = attr->ctimensec;
120}
121
122static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
123{
124 inode->i_mode = attr->mode & S_IFMT;
125 i_size_write(inode, attr->size);
126 if (S_ISREG(inode->i_mode)) {
127 fuse_init_common(inode);
128 fuse_init_file_inode(inode);
129 } else if (S_ISDIR(inode->i_mode))
130 fuse_init_dir(inode);
131 else if (S_ISLNK(inode->i_mode))
132 fuse_init_symlink(inode);
133 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
134 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
135 fuse_init_common(inode);
136 init_special_inode(inode, inode->i_mode,
137 new_decode_dev(attr->rdev));
138 } else {
139 /* Don't let user create weird files */
140 inode->i_mode = S_IFREG;
141 fuse_init_common(inode);
142 fuse_init_file_inode(inode);
143 }
144}
145
146static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
147{
148 unsigned long nodeid = *(unsigned long *) _nodeidp;
149 if (get_node_id(inode) == nodeid)
150 return 1;
151 else
152 return 0;
153}
154
155static int fuse_inode_set(struct inode *inode, void *_nodeidp)
156{
157 unsigned long nodeid = *(unsigned long *) _nodeidp;
158 get_fuse_inode(inode)->nodeid = nodeid;
159 return 0;
160}
161
162struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
163 int generation, struct fuse_attr *attr)
164{
165 struct inode *inode;
166 struct fuse_inode *fi;
167 struct fuse_conn *fc = get_fuse_conn_super(sb);
168 int retried = 0;
169
170 retry:
171 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
172 if (!inode)
173 return NULL;
174
175 if ((inode->i_state & I_NEW)) {
176 inode->i_flags |= S_NOATIME|S_NOCMTIME;
177 inode->i_generation = generation;
178 inode->i_data.backing_dev_info = &fc->bdi;
179 fuse_init_inode(inode, attr);
180 unlock_new_inode(inode);
181 } else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
182 BUG_ON(retried);
183 /* Inode has changed type, any I/O on the old should fail */
184 make_bad_inode(inode);
185 iput(inode);
186 retried = 1;
187 goto retry;
188 }
189
190 fi = get_fuse_inode(inode);
191 fi->nlookup ++;
192 fuse_change_attributes(inode, attr);
193 return inode;
194}
195
196static void fuse_put_super(struct super_block *sb)
197{
198 struct fuse_conn *fc = get_fuse_conn_super(sb);
199
200 down_write(&fc->sbput_sem);
201 while (!list_empty(&fc->background))
202 fuse_release_background(list_entry(fc->background.next,
203 struct fuse_req, bg_entry));
204
205 spin_lock(&fuse_lock);
206 fc->mounted = 0;
207 fc->user_id = 0;
208 fc->group_id = 0;
209 fc->flags = 0;
210 /* Flush all readers on this fs */
211 wake_up_all(&fc->waitq);
212 up_write(&fc->sbput_sem);
213 fuse_release_conn(fc);
214 spin_unlock(&fuse_lock);
215}
216
217static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
218{
219 stbuf->f_type = FUSE_SUPER_MAGIC;
220 stbuf->f_bsize = attr->bsize;
221 stbuf->f_blocks = attr->blocks;
222 stbuf->f_bfree = attr->bfree;
223 stbuf->f_bavail = attr->bavail;
224 stbuf->f_files = attr->files;
225 stbuf->f_ffree = attr->ffree;
226 stbuf->f_namelen = attr->namelen;
227 /* fsid is left zero */
228}
229
230static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
231{
232 struct fuse_conn *fc = get_fuse_conn_super(sb);
233 struct fuse_req *req;
234 struct fuse_statfs_out outarg;
235 int err;
236
237 req = fuse_get_request(fc);
238 if (!req)
239 return -EINTR;
240
241 req->in.numargs = 0;
242 req->in.h.opcode = FUSE_STATFS;
243 req->out.numargs = 1;
244 req->out.args[0].size = sizeof(outarg);
245 req->out.args[0].value = &outarg;
246 request_send(fc, req);
247 err = req->out.h.error;
248 if (!err)
249 convert_fuse_statfs(buf, &outarg.st);
250 fuse_put_request(fc, req);
251 return err;
252}
253
254enum {
255 OPT_FD,
256 OPT_ROOTMODE,
257 OPT_USER_ID,
258 OPT_GROUP_ID,
259 OPT_DEFAULT_PERMISSIONS,
260 OPT_ALLOW_OTHER,
261 OPT_MAX_READ,
262 OPT_ERR
263};
264
265static match_table_t tokens = {
266 {OPT_FD, "fd=%u"},
267 {OPT_ROOTMODE, "rootmode=%o"},
268 {OPT_USER_ID, "user_id=%u"},
269 {OPT_GROUP_ID, "group_id=%u"},
270 {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
271 {OPT_ALLOW_OTHER, "allow_other"},
272 {OPT_MAX_READ, "max_read=%u"},
273 {OPT_ERR, NULL}
274};
275
276static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
277{
278 char *p;
279 memset(d, 0, sizeof(struct fuse_mount_data));
280 d->max_read = ~0;
281
282 while ((p = strsep(&opt, ",")) != NULL) {
283 int token;
284 int value;
285 substring_t args[MAX_OPT_ARGS];
286 if (!*p)
287 continue;
288
289 token = match_token(p, tokens, args);
290 switch (token) {
291 case OPT_FD:
292 if (match_int(&args[0], &value))
293 return 0;
294 d->fd = value;
295 d->fd_present = 1;
296 break;
297
298 case OPT_ROOTMODE:
299 if (match_octal(&args[0], &value))
300 return 0;
301 d->rootmode = value;
302 d->rootmode_present = 1;
303 break;
304
305 case OPT_USER_ID:
306 if (match_int(&args[0], &value))
307 return 0;
308 d->user_id = value;
309 d->user_id_present = 1;
310 break;
311
312 case OPT_GROUP_ID:
313 if (match_int(&args[0], &value))
314 return 0;
315 d->group_id = value;
316 d->group_id_present = 1;
317 break;
318
319 case OPT_DEFAULT_PERMISSIONS:
320 d->flags |= FUSE_DEFAULT_PERMISSIONS;
321 break;
322
323 case OPT_ALLOW_OTHER:
324 d->flags |= FUSE_ALLOW_OTHER;
325 break;
326
327 case OPT_MAX_READ:
328 if (match_int(&args[0], &value))
329 return 0;
330 d->max_read = value;
331 break;
332
333 default:
334 return 0;
335 }
336 }
337
338 if (!d->fd_present || !d->rootmode_present ||
339 !d->user_id_present || !d->group_id_present)
340 return 0;
341
342 return 1;
343}
344
345static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
346{
347 struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
348
349 seq_printf(m, ",user_id=%u", fc->user_id);
350 seq_printf(m, ",group_id=%u", fc->group_id);
351 if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
352 seq_puts(m, ",default_permissions");
353 if (fc->flags & FUSE_ALLOW_OTHER)
354 seq_puts(m, ",allow_other");
355 if (fc->max_read != ~0)
356 seq_printf(m, ",max_read=%u", fc->max_read);
357 return 0;
358}
359
360static void free_conn(struct fuse_conn *fc)
361{
362 while (!list_empty(&fc->unused_list)) {
363 struct fuse_req *req;
364 req = list_entry(fc->unused_list.next, struct fuse_req, list);
365 list_del(&req->list);
366 fuse_request_free(req);
367 }
368 kfree(fc);
369}
370
371/* Must be called with the fuse lock held */
372void fuse_release_conn(struct fuse_conn *fc)
373{
374 fc->count--;
375 if (!fc->count)
376 free_conn(fc);
377}
378
379static struct fuse_conn *new_conn(void)
380{
381 struct fuse_conn *fc;
382
383 fc = kmalloc(sizeof(*fc), GFP_KERNEL);
384 if (fc != NULL) {
385 int i;
386 memset(fc, 0, sizeof(*fc));
387 init_waitqueue_head(&fc->waitq);
388 INIT_LIST_HEAD(&fc->pending);
389 INIT_LIST_HEAD(&fc->processing);
390 INIT_LIST_HEAD(&fc->unused_list);
391 INIT_LIST_HEAD(&fc->background);
392 sema_init(&fc->outstanding_sem, 0);
393 init_rwsem(&fc->sbput_sem);
394 for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
395 struct fuse_req *req = fuse_request_alloc();
396 if (!req) {
397 free_conn(fc);
398 return NULL;
399 }
400 list_add(&req->list, &fc->unused_list);
401 }
402 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
403 fc->bdi.unplug_io_fn = default_unplug_io_fn;
404 fc->reqctr = 0;
405 }
406 return fc;
407}
408
409static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
410{
411 struct fuse_conn *fc;
412
413 if (file->f_op != &fuse_dev_operations)
414 return ERR_PTR(-EINVAL);
415 fc = new_conn();
416 if (fc == NULL)
417 return ERR_PTR(-ENOMEM);
418 spin_lock(&fuse_lock);
419 if (file->private_data) {
420 free_conn(fc);
421 fc = ERR_PTR(-EINVAL);
422 } else {
423 file->private_data = fc;
424 *get_fuse_conn_super_p(sb) = fc;
425 fc->mounted = 1;
426 fc->connected = 1;
427 fc->count = 2;
428 }
429 spin_unlock(&fuse_lock);
430 return fc;
431}
432
433static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
434{
435 struct fuse_attr attr;
436 memset(&attr, 0, sizeof(attr));
437
438 attr.mode = mode;
439 attr.ino = FUSE_ROOT_ID;
440 return fuse_iget(sb, 1, 0, &attr);
441}
442
443static struct super_operations fuse_super_operations = {
444 .alloc_inode = fuse_alloc_inode,
445 .destroy_inode = fuse_destroy_inode,
446 .read_inode = fuse_read_inode,
447 .clear_inode = fuse_clear_inode,
448 .put_super = fuse_put_super,
449 .statfs = fuse_statfs,
450 .show_options = fuse_show_options,
451};
452
453static int fuse_fill_super(struct super_block *sb, void *data, int silent)
454{
455 struct fuse_conn *fc;
456 struct inode *root;
457 struct fuse_mount_data d;
458 struct file *file;
459 int err;
460
461 if (!parse_fuse_opt((char *) data, &d))
462 return -EINVAL;
463
464 sb->s_blocksize = PAGE_CACHE_SIZE;
465 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
466 sb->s_magic = FUSE_SUPER_MAGIC;
467 sb->s_op = &fuse_super_operations;
468 sb->s_maxbytes = MAX_LFS_FILESIZE;
469
470 file = fget(d.fd);
471 if (!file)
472 return -EINVAL;
473
474 fc = get_conn(file, sb);
475 fput(file);
476 if (IS_ERR(fc))
477 return PTR_ERR(fc);
478
479 fc->flags = d.flags;
480 fc->user_id = d.user_id;
481 fc->group_id = d.group_id;
482 fc->max_read = d.max_read;
483 if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
484 fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
485 fc->max_write = FUSE_MAX_IN / 2;
486
487 err = -ENOMEM;
488 root = get_root_inode(sb, d.rootmode);
489 if (root == NULL)
490 goto err;
491
492 sb->s_root = d_alloc_root(root);
493 if (!sb->s_root) {
494 iput(root);
495 goto err;
496 }
497 fuse_send_init(fc);
498 return 0;
499
500 err:
501 spin_lock(&fuse_lock);
502 fuse_release_conn(fc);
503 spin_unlock(&fuse_lock);
504 return err;
505}
506
507static struct super_block *fuse_get_sb(struct file_system_type *fs_type,
508 int flags, const char *dev_name,
509 void *raw_data)
510{
511 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super);
512}
513
514static struct file_system_type fuse_fs_type = {
515 .owner = THIS_MODULE,
516 .name = "fuse",
517 .get_sb = fuse_get_sb,
518 .kill_sb = kill_anon_super,
519};
520
521static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
522 unsigned long flags)
523{
524 struct inode * inode = foo;
525
526 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
527 SLAB_CTOR_CONSTRUCTOR)
528 inode_init_once(inode);
529}
530
531static int __init fuse_fs_init(void)
532{
533 int err;
534
535 err = register_filesystem(&fuse_fs_type);
536 if (err)
537 printk("fuse: failed to register filesystem\n");
538 else {
539 fuse_inode_cachep = kmem_cache_create("fuse_inode",
540 sizeof(struct fuse_inode),
541 0, SLAB_HWCACHE_ALIGN,
542 fuse_inode_init_once, NULL);
543 if (!fuse_inode_cachep) {
544 unregister_filesystem(&fuse_fs_type);
545 err = -ENOMEM;
546 }
547 }
548
549 return err;
550}
551
552static void fuse_fs_cleanup(void)
553{
554 unregister_filesystem(&fuse_fs_type);
555 kmem_cache_destroy(fuse_inode_cachep);
556}
557
558static int __init fuse_init(void)
559{
560 int res;
561
562 printk("fuse init (API version %i.%i)\n",
563 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
564
565 spin_lock_init(&fuse_lock);
566 res = fuse_fs_init();
567 if (res)
568 goto err;
569
570 res = fuse_dev_init();
571 if (res)
572 goto err_fs_cleanup;
573
574 return 0;
575
576 err_fs_cleanup:
577 fuse_fs_cleanup();
578 err:
579 return res;
580}
581
582static void __exit fuse_exit(void)
583{
584 printk(KERN_DEBUG "fuse exit\n");
585
586 fuse_fs_cleanup();
587 fuse_dev_cleanup();
588}
589
590module_init(fuse_init);
591module_exit(fuse_exit);
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index a096c5a56664..3d5cdc6847c0 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -13,8 +13,6 @@
13 13
14#include "btree.h" 14#include "btree.h"
15 15
16#define REF_PAGES 0
17
18void hfs_bnode_read(struct hfs_bnode *node, void *buf, 16void hfs_bnode_read(struct hfs_bnode *node, void *buf,
19 int off, int len) 17 int off, int len)
20{ 18{
@@ -289,9 +287,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
289 page_cache_release(page); 287 page_cache_release(page);
290 goto fail; 288 goto fail;
291 } 289 }
292#if !REF_PAGES
293 page_cache_release(page); 290 page_cache_release(page);
294#endif
295 node->page[i] = page; 291 node->page[i] = page;
296 } 292 }
297 293
@@ -449,13 +445,6 @@ void hfs_bnode_get(struct hfs_bnode *node)
449{ 445{
450 if (node) { 446 if (node) {
451 atomic_inc(&node->refcnt); 447 atomic_inc(&node->refcnt);
452#if REF_PAGES
453 {
454 int i;
455 for (i = 0; i < node->tree->pages_per_bnode; i++)
456 get_page(node->page[i]);
457 }
458#endif
459 dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", 448 dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n",
460 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 449 node->tree->cnid, node->this, atomic_read(&node->refcnt));
461 } 450 }
@@ -472,20 +461,12 @@ void hfs_bnode_put(struct hfs_bnode *node)
472 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 461 node->tree->cnid, node->this, atomic_read(&node->refcnt));
473 if (!atomic_read(&node->refcnt)) 462 if (!atomic_read(&node->refcnt))
474 BUG(); 463 BUG();
475 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) { 464 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
476#if REF_PAGES
477 for (i = 0; i < tree->pages_per_bnode; i++)
478 put_page(node->page[i]);
479#endif
480 return; 465 return;
481 }
482 for (i = 0; i < tree->pages_per_bnode; i++) { 466 for (i = 0; i < tree->pages_per_bnode; i++) {
483 if (!node->page[i]) 467 if (!node->page[i])
484 continue; 468 continue;
485 mark_page_accessed(node->page[i]); 469 mark_page_accessed(node->page[i]);
486#if REF_PAGES
487 put_page(node->page[i]);
488#endif
489 } 470 }
490 471
491 if (test_bit(HFS_BNODE_DELETED, &node->flags)) { 472 if (test_bit(HFS_BNODE_DELETED, &node->flags)) {
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index 65dedefcabfc..2fcd679f0238 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -20,12 +20,12 @@
20 * 20 *
21 * Given the ID of the parent and the name build a search key. 21 * Given the ID of the parent and the name build a search key.
22 */ 22 */
23void hfs_cat_build_key(btree_key *key, u32 parent, struct qstr *name) 23void hfs_cat_build_key(struct super_block *sb, btree_key *key, u32 parent, struct qstr *name)
24{ 24{
25 key->cat.reserved = 0; 25 key->cat.reserved = 0;
26 key->cat.ParID = cpu_to_be32(parent); 26 key->cat.ParID = cpu_to_be32(parent);
27 if (name) { 27 if (name) {
28 hfs_triv2mac(&key->cat.CName, name); 28 hfs_asc2mac(sb, &key->cat.CName, name);
29 key->key_len = 6 + key->cat.CName.len; 29 key->key_len = 6 + key->cat.CName.len;
30 } else { 30 } else {
31 memset(&key->cat.CName, 0, sizeof(struct hfs_name)); 31 memset(&key->cat.CName, 0, sizeof(struct hfs_name));
@@ -62,13 +62,14 @@ static int hfs_cat_build_record(hfs_cat_rec *rec, u32 cnid, struct inode *inode)
62 } 62 }
63} 63}
64 64
65static int hfs_cat_build_thread(hfs_cat_rec *rec, int type, 65static int hfs_cat_build_thread(struct super_block *sb,
66 hfs_cat_rec *rec, int type,
66 u32 parentid, struct qstr *name) 67 u32 parentid, struct qstr *name)
67{ 68{
68 rec->type = type; 69 rec->type = type;
69 memset(rec->thread.reserved, 0, sizeof(rec->thread.reserved)); 70 memset(rec->thread.reserved, 0, sizeof(rec->thread.reserved));
70 rec->thread.ParID = cpu_to_be32(parentid); 71 rec->thread.ParID = cpu_to_be32(parentid);
71 hfs_triv2mac(&rec->thread.CName, name); 72 hfs_asc2mac(sb, &rec->thread.CName, name);
72 return sizeof(struct hfs_cat_thread); 73 return sizeof(struct hfs_cat_thread);
73} 74}
74 75
@@ -93,8 +94,8 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode *
93 sb = dir->i_sb; 94 sb = dir->i_sb;
94 hfs_find_init(HFS_SB(sb)->cat_tree, &fd); 95 hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
95 96
96 hfs_cat_build_key(fd.search_key, cnid, NULL); 97 hfs_cat_build_key(sb, fd.search_key, cnid, NULL);
97 entry_size = hfs_cat_build_thread(&entry, S_ISDIR(inode->i_mode) ? 98 entry_size = hfs_cat_build_thread(sb, &entry, S_ISDIR(inode->i_mode) ?
98 HFS_CDR_THD : HFS_CDR_FTH, 99 HFS_CDR_THD : HFS_CDR_FTH,
99 dir->i_ino, str); 100 dir->i_ino, str);
100 err = hfs_brec_find(&fd); 101 err = hfs_brec_find(&fd);
@@ -107,7 +108,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode *
107 if (err) 108 if (err)
108 goto err2; 109 goto err2;
109 110
110 hfs_cat_build_key(fd.search_key, dir->i_ino, str); 111 hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str);
111 entry_size = hfs_cat_build_record(&entry, cnid, inode); 112 entry_size = hfs_cat_build_record(&entry, cnid, inode);
112 err = hfs_brec_find(&fd); 113 err = hfs_brec_find(&fd);
113 if (err != -ENOENT) { 114 if (err != -ENOENT) {
@@ -127,7 +128,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode *
127 return 0; 128 return 0;
128 129
129err1: 130err1:
130 hfs_cat_build_key(fd.search_key, cnid, NULL); 131 hfs_cat_build_key(sb, fd.search_key, cnid, NULL);
131 if (!hfs_brec_find(&fd)) 132 if (!hfs_brec_find(&fd))
132 hfs_brec_remove(&fd); 133 hfs_brec_remove(&fd);
133err2: 134err2:
@@ -176,7 +177,7 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid,
176 hfs_cat_rec rec; 177 hfs_cat_rec rec;
177 int res, len, type; 178 int res, len, type;
178 179
179 hfs_cat_build_key(fd->search_key, cnid, NULL); 180 hfs_cat_build_key(sb, fd->search_key, cnid, NULL);
180 res = hfs_brec_read(fd, &rec, sizeof(rec)); 181 res = hfs_brec_read(fd, &rec, sizeof(rec));
181 if (res) 182 if (res)
182 return res; 183 return res;
@@ -211,7 +212,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str)
211 sb = dir->i_sb; 212 sb = dir->i_sb;
212 hfs_find_init(HFS_SB(sb)->cat_tree, &fd); 213 hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
213 214
214 hfs_cat_build_key(fd.search_key, dir->i_ino, str); 215 hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str);
215 res = hfs_brec_find(&fd); 216 res = hfs_brec_find(&fd);
216 if (res) 217 if (res)
217 goto out; 218 goto out;
@@ -239,7 +240,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str)
239 if (res) 240 if (res)
240 goto out; 241 goto out;
241 242
242 hfs_cat_build_key(fd.search_key, cnid, NULL); 243 hfs_cat_build_key(sb, fd.search_key, cnid, NULL);
243 res = hfs_brec_find(&fd); 244 res = hfs_brec_find(&fd);
244 if (!res) { 245 if (!res) {
245 res = hfs_brec_remove(&fd); 246 res = hfs_brec_remove(&fd);
@@ -280,7 +281,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
280 dst_fd = src_fd; 281 dst_fd = src_fd;
281 282
282 /* find the old dir entry and read the data */ 283 /* find the old dir entry and read the data */
283 hfs_cat_build_key(src_fd.search_key, src_dir->i_ino, src_name); 284 hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
284 err = hfs_brec_find(&src_fd); 285 err = hfs_brec_find(&src_fd);
285 if (err) 286 if (err)
286 goto out; 287 goto out;
@@ -289,7 +290,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
289 src_fd.entrylength); 290 src_fd.entrylength);
290 291
291 /* create new dir entry with the data from the old entry */ 292 /* create new dir entry with the data from the old entry */
292 hfs_cat_build_key(dst_fd.search_key, dst_dir->i_ino, dst_name); 293 hfs_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
293 err = hfs_brec_find(&dst_fd); 294 err = hfs_brec_find(&dst_fd);
294 if (err != -ENOENT) { 295 if (err != -ENOENT) {
295 if (!err) 296 if (!err)
@@ -305,7 +306,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
305 mark_inode_dirty(dst_dir); 306 mark_inode_dirty(dst_dir);
306 307
307 /* finally remove the old entry */ 308 /* finally remove the old entry */
308 hfs_cat_build_key(src_fd.search_key, src_dir->i_ino, src_name); 309 hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
309 err = hfs_brec_find(&src_fd); 310 err = hfs_brec_find(&src_fd);
310 if (err) 311 if (err)
311 goto out; 312 goto out;
@@ -321,7 +322,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
321 goto out; 322 goto out;
322 323
323 /* remove old thread entry */ 324 /* remove old thread entry */
324 hfs_cat_build_key(src_fd.search_key, cnid, NULL); 325 hfs_cat_build_key(sb, src_fd.search_key, cnid, NULL);
325 err = hfs_brec_find(&src_fd); 326 err = hfs_brec_find(&src_fd);
326 if (err) 327 if (err)
327 goto out; 328 goto out;
@@ -330,8 +331,8 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
330 goto out; 331 goto out;
331 332
332 /* create new thread entry */ 333 /* create new thread entry */
333 hfs_cat_build_key(dst_fd.search_key, cnid, NULL); 334 hfs_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
334 entry_size = hfs_cat_build_thread(&entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD, 335 entry_size = hfs_cat_build_thread(sb, &entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD,
335 dst_dir->i_ino, dst_name); 336 dst_dir->i_ino, dst_name);
336 err = hfs_brec_find(&dst_fd); 337 err = hfs_brec_find(&dst_fd);
337 if (err != -ENOENT) { 338 if (err != -ENOENT) {
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index c55998262aed..e1f24befba58 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -28,7 +28,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
28 dentry->d_op = &hfs_dentry_operations; 28 dentry->d_op = &hfs_dentry_operations;
29 29
30 hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); 30 hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd);
31 hfs_cat_build_key(fd.search_key, dir->i_ino, &dentry->d_name); 31 hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
32 res = hfs_brec_read(&fd, &rec, sizeof(rec)); 32 res = hfs_brec_read(&fd, &rec, sizeof(rec));
33 if (res) { 33 if (res) {
34 hfs_find_exit(&fd); 34 hfs_find_exit(&fd);
@@ -56,7 +56,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
56 struct inode *inode = filp->f_dentry->d_inode; 56 struct inode *inode = filp->f_dentry->d_inode;
57 struct super_block *sb = inode->i_sb; 57 struct super_block *sb = inode->i_sb;
58 int len, err; 58 int len, err;
59 char strbuf[HFS_NAMELEN + 1]; 59 char strbuf[HFS_MAX_NAMELEN];
60 union hfs_cat_rec entry; 60 union hfs_cat_rec entry;
61 struct hfs_find_data fd; 61 struct hfs_find_data fd;
62 struct hfs_readdir_data *rd; 62 struct hfs_readdir_data *rd;
@@ -66,7 +66,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
66 return 0; 66 return 0;
67 67
68 hfs_find_init(HFS_SB(sb)->cat_tree, &fd); 68 hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
69 hfs_cat_build_key(fd.search_key, inode->i_ino, NULL); 69 hfs_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
70 err = hfs_brec_find(&fd); 70 err = hfs_brec_find(&fd);
71 if (err) 71 if (err)
72 goto out; 72 goto out;
@@ -111,7 +111,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
111 } 111 }
112 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); 112 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
113 type = entry.type; 113 type = entry.type;
114 len = hfs_mac2triv(strbuf, &fd.key->cat.CName); 114 len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName);
115 if (type == HFS_CDR_DIR) { 115 if (type == HFS_CDR_DIR) {
116 if (fd.entrylength < sizeof(struct hfs_cat_dir)) { 116 if (fd.entrylength < sizeof(struct hfs_cat_dir)) {
117 printk("HFS: small dir entry\n"); 117 printk("HFS: small dir entry\n");
@@ -307,7 +307,8 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
307 old_dir, &old_dentry->d_name, 307 old_dir, &old_dentry->d_name,
308 new_dir, &new_dentry->d_name); 308 new_dir, &new_dentry->d_name);
309 if (!res) 309 if (!res)
310 hfs_cat_build_key((btree_key *)&HFS_I(old_dentry->d_inode)->cat_key, 310 hfs_cat_build_key(old_dir->i_sb,
311 (btree_key *)&HFS_I(old_dentry->d_inode)->cat_key,
311 new_dir->i_ino, &new_dentry->d_name); 312 new_dir->i_ino, &new_dentry->d_name);
312 return res; 313 return res;
313} 314}
diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h
index df6b33adee3b..88099ab1a180 100644
--- a/fs/hfs/hfs.h
+++ b/fs/hfs/hfs.h
@@ -25,6 +25,7 @@
25#define HFS_SECTOR_SIZE 512 /* size of an HFS sector */ 25#define HFS_SECTOR_SIZE 512 /* size of an HFS sector */
26#define HFS_SECTOR_SIZE_BITS 9 /* log_2(HFS_SECTOR_SIZE) */ 26#define HFS_SECTOR_SIZE_BITS 9 /* log_2(HFS_SECTOR_SIZE) */
27#define HFS_NAMELEN 31 /* maximum length of an HFS filename */ 27#define HFS_NAMELEN 31 /* maximum length of an HFS filename */
28#define HFS_MAX_NAMELEN 128
28#define HFS_MAX_VALENCE 32767U 29#define HFS_MAX_VALENCE 32767U
29 30
30/* Meanings of the drAtrb field of the MDB, 31/* Meanings of the drAtrb field of the MDB,
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 0dc8ef8e14de..aae019aadf88 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -141,6 +141,8 @@ struct hfs_sb_info {
141 141
142 int session, part; 142 int session, part;
143 143
144 struct nls_table *nls_io, *nls_disk;
145
144 struct semaphore bitmap_lock; 146 struct semaphore bitmap_lock;
145 147
146 unsigned long flags; 148 unsigned long flags;
@@ -168,7 +170,7 @@ extern int hfs_cat_create(u32, struct inode *, struct qstr *, struct inode *);
168extern int hfs_cat_delete(u32, struct inode *, struct qstr *); 170extern int hfs_cat_delete(u32, struct inode *, struct qstr *);
169extern int hfs_cat_move(u32, struct inode *, struct qstr *, 171extern int hfs_cat_move(u32, struct inode *, struct qstr *,
170 struct inode *, struct qstr *); 172 struct inode *, struct qstr *);
171extern void hfs_cat_build_key(btree_key *, u32, struct qstr *); 173extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qstr *);
172 174
173/* dir.c */ 175/* dir.c */
174extern struct file_operations hfs_dir_operations; 176extern struct file_operations hfs_dir_operations;
@@ -222,8 +224,8 @@ extern int hfs_strcmp(const unsigned char *, unsigned int,
222extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 224extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
223 225
224/* trans.c */ 226/* trans.c */
225extern void hfs_triv2mac(struct hfs_name *, struct qstr *); 227extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
226extern int hfs_mac2triv(char *, const struct hfs_name *); 228extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *);
227 229
228extern struct timezone sys_tz; 230extern struct timezone sys_tz;
229 231
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 751912326094..f1570b9f9de3 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -160,7 +160,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
160 160
161 init_MUTEX(&HFS_I(inode)->extents_lock); 161 init_MUTEX(&HFS_I(inode)->extents_lock);
162 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); 162 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
163 hfs_cat_build_key((btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); 163 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
164 inode->i_ino = HFS_SB(sb)->next_id++; 164 inode->i_ino = HFS_SB(sb)->next_id++;
165 inode->i_mode = mode; 165 inode->i_mode = mode;
166 inode->i_uid = current->fsuid; 166 inode->i_uid = current->fsuid;
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 217e32f37e0b..0a473f79c89f 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/cdrom.h> 11#include <linux/cdrom.h>
12#include <linux/genhd.h> 12#include <linux/genhd.h>
13#include <linux/nls.h>
13 14
14#include "hfs_fs.h" 15#include "hfs_fs.h"
15#include "btree.h" 16#include "btree.h"
@@ -343,6 +344,11 @@ void hfs_mdb_put(struct super_block *sb)
343 brelse(HFS_SB(sb)->mdb_bh); 344 brelse(HFS_SB(sb)->mdb_bh);
344 brelse(HFS_SB(sb)->alt_mdb_bh); 345 brelse(HFS_SB(sb)->alt_mdb_bh);
345 346
347 if (HFS_SB(sb)->nls_io)
348 unload_nls(HFS_SB(sb)->nls_io);
349 if (HFS_SB(sb)->nls_disk)
350 unload_nls(HFS_SB(sb)->nls_disk);
351
346 kfree(HFS_SB(sb)); 352 kfree(HFS_SB(sb));
347 sb->s_fs_info = NULL; 353 sb->s_fs_info = NULL;
348} 354}
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ab783f6afa3b..c5074aeafcae 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -15,8 +15,11 @@
15#include <linux/config.h> 15#include <linux/config.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/mount.h>
18#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/nls.h>
19#include <linux/parser.h> 21#include <linux/parser.h>
22#include <linux/seq_file.h>
20#include <linux/vfs.h> 23#include <linux/vfs.h>
21 24
22#include "hfs_fs.h" 25#include "hfs_fs.h"
@@ -111,6 +114,32 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data)
111 return 0; 114 return 0;
112} 115}
113 116
117static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
118{
119 struct hfs_sb_info *sbi = HFS_SB(mnt->mnt_sb);
120
121 if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f))
122 seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
123 if (sbi->s_type != cpu_to_be32(0x3f3f3f3f))
124 seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type);
125 seq_printf(seq, ",uid=%u,gid=%u", sbi->s_uid, sbi->s_gid);
126 if (sbi->s_file_umask != 0133)
127 seq_printf(seq, ",file_umask=%o", sbi->s_file_umask);
128 if (sbi->s_dir_umask != 0022)
129 seq_printf(seq, ",dir_umask=%o", sbi->s_dir_umask);
130 if (sbi->part >= 0)
131 seq_printf(seq, ",part=%u", sbi->part);
132 if (sbi->session >= 0)
133 seq_printf(seq, ",session=%u", sbi->session);
134 if (sbi->nls_disk)
135 seq_printf(seq, ",codepage=%s", sbi->nls_disk->charset);
136 if (sbi->nls_io)
137 seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset);
138 if (sbi->s_quiet)
139 seq_printf(seq, ",quiet");
140 return 0;
141}
142
114static struct inode *hfs_alloc_inode(struct super_block *sb) 143static struct inode *hfs_alloc_inode(struct super_block *sb)
115{ 144{
116 struct hfs_inode_info *i; 145 struct hfs_inode_info *i;
@@ -133,11 +162,13 @@ static struct super_operations hfs_super_operations = {
133 .write_super = hfs_write_super, 162 .write_super = hfs_write_super,
134 .statfs = hfs_statfs, 163 .statfs = hfs_statfs,
135 .remount_fs = hfs_remount, 164 .remount_fs = hfs_remount,
165 .show_options = hfs_show_options,
136}; 166};
137 167
138enum { 168enum {
139 opt_uid, opt_gid, opt_umask, opt_file_umask, opt_dir_umask, 169 opt_uid, opt_gid, opt_umask, opt_file_umask, opt_dir_umask,
140 opt_part, opt_session, opt_type, opt_creator, opt_quiet, 170 opt_part, opt_session, opt_type, opt_creator, opt_quiet,
171 opt_codepage, opt_iocharset,
141 opt_err 172 opt_err
142}; 173};
143 174
@@ -152,6 +183,8 @@ static match_table_t tokens = {
152 { opt_type, "type=%s" }, 183 { opt_type, "type=%s" },
153 { opt_creator, "creator=%s" }, 184 { opt_creator, "creator=%s" },
154 { opt_quiet, "quiet" }, 185 { opt_quiet, "quiet" },
186 { opt_codepage, "codepage=%s" },
187 { opt_iocharset, "iocharset=%s" },
155 { opt_err, NULL } 188 { opt_err, NULL }
156}; 189};
157 190
@@ -257,11 +290,46 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
257 case opt_quiet: 290 case opt_quiet:
258 hsb->s_quiet = 1; 291 hsb->s_quiet = 1;
259 break; 292 break;
293 case opt_codepage:
294 if (hsb->nls_disk) {
295 printk("HFS+-fs: unable to change codepage\n");
296 return 0;
297 }
298 p = match_strdup(&args[0]);
299 hsb->nls_disk = load_nls(p);
300 if (!hsb->nls_disk) {
301 printk("HFS+-fs: unable to load codepage \"%s\"\n", p);
302 kfree(p);
303 return 0;
304 }
305 kfree(p);
306 break;
307 case opt_iocharset:
308 if (hsb->nls_io) {
309 printk("HFS: unable to change iocharset\n");
310 return 0;
311 }
312 p = match_strdup(&args[0]);
313 hsb->nls_io = load_nls(p);
314 if (!hsb->nls_io) {
315 printk("HFS: unable to load iocharset \"%s\"\n", p);
316 kfree(p);
317 return 0;
318 }
319 kfree(p);
320 break;
260 default: 321 default:
261 return 0; 322 return 0;
262 } 323 }
263 } 324 }
264 325
326 if (hsb->nls_disk && !hsb->nls_io) {
327 hsb->nls_io = load_nls_default();
328 if (!hsb->nls_io) {
329 printk("HFS: unable to load default iocharset\n");
330 return 0;
331 }
332 }
265 hsb->s_dir_umask &= 0777; 333 hsb->s_dir_umask &= 0777;
266 hsb->s_file_umask &= 0577; 334 hsb->s_file_umask &= 0577;
267 335
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index fb9720abbadd..e673a88b8ae7 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -9,12 +9,15 @@
9 * with ':' vs. '/' as the path-element separator. 9 * with ':' vs. '/' as the path-element separator.
10 */ 10 */
11 11
12#include <linux/types.h>
13#include <linux/nls.h>
14
12#include "hfs_fs.h" 15#include "hfs_fs.h"
13 16
14/*================ Global functions ================*/ 17/*================ Global functions ================*/
15 18
16/* 19/*
17 * hfs_mac2triv() 20 * hfs_mac2asc()
18 * 21 *
19 * Given a 'Pascal String' (a string preceded by a length byte) in 22 * Given a 'Pascal String' (a string preceded by a length byte) in
20 * the Macintosh character set produce the corresponding filename using 23 * the Macintosh character set produce the corresponding filename using
@@ -27,23 +30,58 @@
27 * by ':' which never appears in HFS filenames. All other characters 30 * by ':' which never appears in HFS filenames. All other characters
28 * are passed unchanged from input to output. 31 * are passed unchanged from input to output.
29 */ 32 */
30int hfs_mac2triv(char *out, const struct hfs_name *in) 33int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
31{ 34{
32 const char *p; 35 struct nls_table *nls_disk = HFS_SB(sb)->nls_disk;
33 char c; 36 struct nls_table *nls_io = HFS_SB(sb)->nls_io;
34 int i, len; 37 const char *src;
38 char *dst;
39 int srclen, dstlen, size;
40
41 src = in->name;
42 srclen = in->len;
43 dst = out;
44 dstlen = HFS_MAX_NAMELEN;
45 if (nls_io) {
46 wchar_t ch;
35 47
36 len = in->len; 48 while (srclen > 0) {
37 p = in->name; 49 if (nls_disk) {
38 for (i = 0; i < len; i++) { 50 size = nls_disk->char2uni(src, srclen, &ch);
39 c = *p++; 51 if (size <= 0) {
40 *out++ = c == '/' ? ':' : c; 52 ch = '?';
53 size = 1;
54 }
55 src += size;
56 srclen -= size;
57 } else {
58 ch = *src++;
59 srclen--;
60 }
61 if (ch == '/')
62 ch = ':';
63 size = nls_io->uni2char(ch, dst, dstlen);
64 if (size < 0) {
65 if (size == -ENAMETOOLONG)
66 goto out;
67 *dst = '?';
68 size = 1;
69 }
70 dst += size;
71 dstlen -= size;
72 }
73 } else {
74 char ch;
75
76 while (--srclen >= 0)
77 *dst++ = (ch = *src++) == '/' ? ':' : ch;
41 } 78 }
42 return i; 79out:
80 return dst - out;
43} 81}
44 82
45/* 83/*
46 * hfs_triv2mac() 84 * hfs_asc2mac()
47 * 85 *
48 * Given an ASCII string (not null-terminated) and its length, 86 * Given an ASCII string (not null-terminated) and its length,
49 * generate the corresponding filename in the Macintosh character set 87 * generate the corresponding filename in the Macintosh character set
@@ -54,19 +92,57 @@ int hfs_mac2triv(char *out, const struct hfs_name *in)
54 * This routine is a inverse to hfs_mac2triv(). 92 * This routine is a inverse to hfs_mac2triv().
55 * A ':' is replaced by a '/'. 93 * A ':' is replaced by a '/'.
56 */ 94 */
57void hfs_triv2mac(struct hfs_name *out, struct qstr *in) 95void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, struct qstr *in)
58{ 96{
97 struct nls_table *nls_disk = HFS_SB(sb)->nls_disk;
98 struct nls_table *nls_io = HFS_SB(sb)->nls_io;
59 const char *src; 99 const char *src;
60 char *dst, c; 100 char *dst;
61 int i, len; 101 int srclen, dstlen, size;
62 102
63 out->len = len = min((unsigned int)HFS_NAMELEN, in->len);
64 src = in->name; 103 src = in->name;
104 srclen = in->len;
65 dst = out->name; 105 dst = out->name;
66 for (i = 0; i < len; i++) { 106 dstlen = HFS_NAMELEN;
67 c = *src++; 107 if (nls_io) {
68 *dst++ = c == ':' ? '/' : c; 108 wchar_t ch;
109
110 while (srclen > 0) {
111 size = nls_io->char2uni(src, srclen, &ch);
112 if (size < 0) {
113 ch = '?';
114 size = 1;
115 }
116 src += size;
117 srclen -= size;
118 if (ch == ':')
119 ch = '/';
120 if (nls_disk) {
121 size = nls_disk->uni2char(ch, dst, dstlen);
122 if (size < 0) {
123 if (size == -ENAMETOOLONG)
124 goto out;
125 *dst = '?';
126 size = 1;
127 }
128 dst += size;
129 dstlen -= size;
130 } else {
131 *dst++ = ch > 0xff ? '?' : ch;
132 dstlen--;
133 }
134 }
135 } else {
136 char ch;
137
138 if (dstlen > srclen)
139 dstlen = srclen;
140 while (--dstlen >= 0)
141 *dst++ = (ch = *src++) == ':' ? '/' : ch;
69 } 142 }
70 for (; i < HFS_NAMELEN; i++) 143out:
144 out->len = dst - (char *)out->name;
145 dstlen = HFS_NAMELEN - out->len;
146 while (--dstlen >= 0)
71 *dst++ = 0; 147 *dst++ = 0;
72} 148}
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 8868d3b766fd..b85abc6e6f83 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -18,8 +18,6 @@
18#include "hfsplus_fs.h" 18#include "hfsplus_fs.h"
19#include "hfsplus_raw.h" 19#include "hfsplus_raw.h"
20 20
21#define REF_PAGES 0
22
23/* Copy a specified range of bytes from the raw data of a node */ 21/* Copy a specified range of bytes from the raw data of a node */
24void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) 22void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
25{ 23{
@@ -450,9 +448,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
450 page_cache_release(page); 448 page_cache_release(page);
451 goto fail; 449 goto fail;
452 } 450 }
453#if !REF_PAGES
454 page_cache_release(page); 451 page_cache_release(page);
455#endif
456 node->page[i] = page; 452 node->page[i] = page;
457 } 453 }
458 454
@@ -612,13 +608,6 @@ void hfs_bnode_get(struct hfs_bnode *node)
612{ 608{
613 if (node) { 609 if (node) {
614 atomic_inc(&node->refcnt); 610 atomic_inc(&node->refcnt);
615#if REF_PAGES
616 {
617 int i;
618 for (i = 0; i < node->tree->pages_per_bnode; i++)
619 get_page(node->page[i]);
620 }
621#endif
622 dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", 611 dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n",
623 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 612 node->tree->cnid, node->this, atomic_read(&node->refcnt));
624 } 613 }
@@ -635,20 +624,12 @@ void hfs_bnode_put(struct hfs_bnode *node)
635 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 624 node->tree->cnid, node->this, atomic_read(&node->refcnt));
636 if (!atomic_read(&node->refcnt)) 625 if (!atomic_read(&node->refcnt))
637 BUG(); 626 BUG();
638 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) { 627 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
639#if REF_PAGES
640 for (i = 0; i < tree->pages_per_bnode; i++)
641 put_page(node->page[i]);
642#endif
643 return; 628 return;
644 }
645 for (i = 0; i < tree->pages_per_bnode; i++) { 629 for (i = 0; i < tree->pages_per_bnode; i++) {
646 if (!node->page[i]) 630 if (!node->page[i])
647 continue; 631 continue;
648 mark_page_accessed(node->page[i]); 632 mark_page_accessed(node->page[i]);
649#if REF_PAGES
650 put_page(node->page[i]);
651#endif
652 } 633 }
653 634
654 if (test_bit(HFS_BNODE_DELETED, &node->flags)) { 635 if (test_bit(HFS_BNODE_DELETED, &node->flags)) {
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 533094a570df..2bc0cdd30e56 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -343,8 +343,9 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
343ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); 343ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
344 344
345/* options.c */ 345/* options.c */
346int parse_options(char *, struct hfsplus_sb_info *); 346int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
347void fill_defaults(struct hfsplus_sb_info *); 347void hfsplus_fill_defaults(struct hfsplus_sb_info *);
348int hfsplus_show_options(struct seq_file *, struct vfsmount *);
348 349
349/* tables.c */ 350/* tables.c */
350extern u16 hfsplus_case_fold_table[]; 351extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 1cca0102c98d..cca0818aa4ca 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -13,6 +13,8 @@
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/parser.h> 14#include <linux/parser.h>
15#include <linux/nls.h> 15#include <linux/nls.h>
16#include <linux/mount.h>
17#include <linux/seq_file.h>
16#include "hfsplus_fs.h" 18#include "hfsplus_fs.h"
17 19
18enum { 20enum {
@@ -38,7 +40,7 @@ static match_table_t tokens = {
38}; 40};
39 41
40/* Initialize an options object to reasonable defaults */ 42/* Initialize an options object to reasonable defaults */
41void fill_defaults(struct hfsplus_sb_info *opts) 43void hfsplus_fill_defaults(struct hfsplus_sb_info *opts)
42{ 44{
43 if (!opts) 45 if (!opts)
44 return; 46 return;
@@ -63,7 +65,7 @@ static inline int match_fourchar(substring_t *arg, u32 *result)
63 65
64/* Parse options from mount. Returns 0 on failure */ 66/* Parse options from mount. Returns 0 on failure */
65/* input is the options passed to mount() as a string */ 67/* input is the options passed to mount() as a string */
66int parse_options(char *input, struct hfsplus_sb_info *sbi) 68int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
67{ 69{
68 char *p; 70 char *p;
69 substring_t args[MAX_OPT_ARGS]; 71 substring_t args[MAX_OPT_ARGS];
@@ -160,3 +162,23 @@ done:
160 162
161 return 1; 163 return 1;
162} 164}
165
166int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
167{
168 struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb);
169
170 if (sbi->creator != HFSPLUS_DEF_CR_TYPE)
171 seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
172 if (sbi->type != HFSPLUS_DEF_CR_TYPE)
173 seq_printf(seq, ",type=%.4s", (char *)&sbi->type);
174 seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, sbi->uid, sbi->gid);
175 if (sbi->part >= 0)
176 seq_printf(seq, ",part=%u", sbi->part);
177 if (sbi->session >= 0)
178 seq_printf(seq, ",session=%u", sbi->session);
179 if (sbi->nls)
180 seq_printf(seq, ",nls=%s", sbi->nls->charset);
181 if (sbi->flags & HFSPLUS_SB_NODECOMPOSE)
182 seq_printf(seq, ",nodecompose");
183 return 0;
184}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index d55ad67b8e42..fd0f0f050e1d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -217,8 +217,7 @@ static void hfsplus_put_super(struct super_block *sb)
217 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); 217 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT);
218 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); 218 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT);
219 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); 219 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
220 ll_rw_block(WRITE, 1, &HFSPLUS_SB(sb).s_vhbh); 220 sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh);
221 wait_on_buffer(HFSPLUS_SB(sb).s_vhbh);
222 } 221 }
223 222
224 hfs_btree_close(HFSPLUS_SB(sb).cat_tree); 223 hfs_btree_close(HFSPLUS_SB(sb).cat_tree);
@@ -277,6 +276,7 @@ static struct super_operations hfsplus_sops = {
277 .write_super = hfsplus_write_super, 276 .write_super = hfsplus_write_super,
278 .statfs = hfsplus_statfs, 277 .statfs = hfsplus_statfs,
279 .remount_fs = hfsplus_remount, 278 .remount_fs = hfsplus_remount,
279 .show_options = hfsplus_show_options,
280}; 280};
281 281
282static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) 282static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
@@ -297,8 +297,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
297 memset(sbi, 0, sizeof(HFSPLUS_SB(sb))); 297 memset(sbi, 0, sizeof(HFSPLUS_SB(sb)));
298 sb->s_fs_info = sbi; 298 sb->s_fs_info = sbi;
299 INIT_HLIST_HEAD(&sbi->rsrc_inodes); 299 INIT_HLIST_HEAD(&sbi->rsrc_inodes);
300 fill_defaults(sbi); 300 hfsplus_fill_defaults(sbi);
301 if (!parse_options(data, sbi)) { 301 if (!hfsplus_parse_options(data, sbi)) {
302 if (!silent) 302 if (!silent)
303 printk("HFS+-fs: unable to parse mount options\n"); 303 printk("HFS+-fs: unable to parse mount options\n");
304 err = -EINVAL; 304 err = -EINVAL;
@@ -415,8 +415,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
415 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); 415 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
416 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); 416 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
417 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); 417 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
418 ll_rw_block(WRITE, 1, &HFSPLUS_SB(sb).s_vhbh); 418 sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh);
419 wait_on_buffer(HFSPLUS_SB(sb).s_vhbh);
420 419
421 if (!HFSPLUS_SB(sb).hidden_dir) { 420 if (!HFSPLUS_SB(sb).hidden_dir) {
422 printk("HFS+: create hidden dir...\n"); 421 printk("HFS+: create hidden dir...\n");
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 67bca0d4a33b..cca3fb693f99 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -49,7 +49,6 @@ struct hostfs_iattr {
49 struct timespec ia_atime; 49 struct timespec ia_atime;
50 struct timespec ia_mtime; 50 struct timespec ia_mtime;
51 struct timespec ia_ctime; 51 struct timespec ia_ctime;
52 unsigned int ia_attr_flags;
53}; 52};
54 53
55extern int stat_file(const char *path, unsigned long long *inode_out, 54extern int stat_file(const char *path, unsigned long long *inode_out,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b2d18200a003..59c5062cd63f 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -284,6 +284,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
284 284
285static void hostfs_delete_inode(struct inode *inode) 285static void hostfs_delete_inode(struct inode *inode)
286{ 286{
287 truncate_inode_pages(&inode->i_data, 0);
287 if(HOSTFS_I(inode)->fd != -1) { 288 if(HOSTFS_I(inode)->fd != -1) {
288 close_file(&HOSTFS_I(inode)->fd); 289 close_file(&HOSTFS_I(inode)->fd);
289 HOSTFS_I(inode)->fd = -1; 290 HOSTFS_I(inode)->fd = -1;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 38b1741fa539..e3d17e9ea6c1 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -284,6 +284,7 @@ void hpfs_write_if_changed(struct inode *inode)
284 284
285void hpfs_delete_inode(struct inode *inode) 285void hpfs_delete_inode(struct inode *inode)
286{ 286{
287 truncate_inode_pages(&inode->i_data, 0);
287 lock_kernel(); 288 lock_kernel();
288 hpfs_remove_fnode(inode->i_sb, inode->i_ino); 289 hpfs_remove_fnode(inode->i_sb, inode->i_ino);
289 unlock_kernel(); 290 unlock_kernel();
diff --git a/fs/inode.c b/fs/inode.c
index e57f1724db3e..f80a79ff156b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1034,19 +1034,21 @@ void generic_delete_inode(struct inode *inode)
1034 inodes_stat.nr_inodes--; 1034 inodes_stat.nr_inodes--;
1035 spin_unlock(&inode_lock); 1035 spin_unlock(&inode_lock);
1036 1036
1037 if (inode->i_data.nrpages)
1038 truncate_inode_pages(&inode->i_data, 0);
1039
1040 security_inode_delete(inode); 1037 security_inode_delete(inode);
1041 1038
1042 if (op->delete_inode) { 1039 if (op->delete_inode) {
1043 void (*delete)(struct inode *) = op->delete_inode; 1040 void (*delete)(struct inode *) = op->delete_inode;
1044 if (!is_bad_inode(inode)) 1041 if (!is_bad_inode(inode))
1045 DQUOT_INIT(inode); 1042 DQUOT_INIT(inode);
1046 /* s_op->delete_inode internally recalls clear_inode() */ 1043 /* Filesystems implementing their own
1044 * s_op->delete_inode are required to call
1045 * truncate_inode_pages and clear_inode()
1046 * internally */
1047 delete(inode); 1047 delete(inode);
1048 } else 1048 } else {
1049 truncate_inode_pages(&inode->i_data, 0);
1049 clear_inode(inode); 1050 clear_inode(inode);
1051 }
1050 spin_lock(&inode_lock); 1052 spin_lock(&inode_lock);
1051 hlist_del_init(&inode->i_hash); 1053 hlist_del_init(&inode->i_hash);
1052 spin_unlock(&inode_lock); 1054 spin_unlock(&inode_lock);
@@ -1195,9 +1197,6 @@ void update_atime(struct inode *inode)
1195 if (!timespec_equal(&inode->i_atime, &now)) { 1197 if (!timespec_equal(&inode->i_atime, &now)) {
1196 inode->i_atime = now; 1198 inode->i_atime = now;
1197 mark_inode_dirty_sync(inode); 1199 mark_inode_dirty_sync(inode);
1198 } else {
1199 if (!timespec_equal(&inode->i_atime, &now))
1200 inode->i_atime = now;
1201 } 1200 }
1202} 1201}
1203 1202
diff --git a/fs/inotify.c b/fs/inotify.c
index 2e4e2a57708c..a37e9fb1da58 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -37,6 +37,7 @@
37#include <asm/ioctls.h> 37#include <asm/ioctls.h>
38 38
39static atomic_t inotify_cookie; 39static atomic_t inotify_cookie;
40static atomic_t inotify_watches;
40 41
41static kmem_cache_t *watch_cachep; 42static kmem_cache_t *watch_cachep;
42static kmem_cache_t *event_cachep; 43static kmem_cache_t *event_cachep;
@@ -422,6 +423,7 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
422 get_inotify_watch(watch); 423 get_inotify_watch(watch);
423 424
424 atomic_inc(&dev->user->inotify_watches); 425 atomic_inc(&dev->user->inotify_watches);
426 atomic_inc(&inotify_watches);
425 427
426 return watch; 428 return watch;
427} 429}
@@ -454,6 +456,7 @@ static void remove_watch_no_event(struct inotify_watch *watch,
454 list_del(&watch->d_list); 456 list_del(&watch->d_list);
455 457
456 atomic_dec(&dev->user->inotify_watches); 458 atomic_dec(&dev->user->inotify_watches);
459 atomic_dec(&inotify_watches);
457 idr_remove(&dev->idr, watch->wd); 460 idr_remove(&dev->idr, watch->wd);
458 put_inotify_watch(watch); 461 put_inotify_watch(watch);
459} 462}
@@ -532,6 +535,9 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
532 struct dentry *parent; 535 struct dentry *parent;
533 struct inode *inode; 536 struct inode *inode;
534 537
538 if (!atomic_read (&inotify_watches))
539 return;
540
535 spin_lock(&dentry->d_lock); 541 spin_lock(&dentry->d_lock);
536 parent = dentry->d_parent; 542 parent = dentry->d_parent;
537 inode = parent->d_inode; 543 inode = parent->d_inode;
@@ -925,6 +931,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
925 struct nameidata nd; 931 struct nameidata nd;
926 struct file *filp; 932 struct file *filp;
927 int ret, fput_needed; 933 int ret, fput_needed;
934 int mask_add = 0;
928 935
929 filp = fget_light(fd, &fput_needed); 936 filp = fget_light(fd, &fput_needed);
930 if (unlikely(!filp)) 937 if (unlikely(!filp))
@@ -947,6 +954,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
947 down(&inode->inotify_sem); 954 down(&inode->inotify_sem);
948 down(&dev->sem); 955 down(&dev->sem);
949 956
957 if (mask & IN_MASK_ADD)
958 mask_add = 1;
959
950 /* don't let user-space set invalid bits: we don't want flags set */ 960 /* don't let user-space set invalid bits: we don't want flags set */
951 mask &= IN_ALL_EVENTS; 961 mask &= IN_ALL_EVENTS;
952 if (unlikely(!mask)) { 962 if (unlikely(!mask)) {
@@ -960,7 +970,10 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
960 */ 970 */
961 old = inode_find_dev(inode, dev); 971 old = inode_find_dev(inode, dev);
962 if (unlikely(old)) { 972 if (unlikely(old)) {
963 old->mask = mask; 973 if (mask_add)
974 old->mask |= mask;
975 else
976 old->mask = mask;
964 ret = old->wd; 977 ret = old->wd;
965 goto out; 978 goto out;
966 } 979 }
@@ -1043,6 +1056,7 @@ static int __init inotify_setup(void)
1043 inotify_max_user_watches = 8192; 1056 inotify_max_user_watches = 8192;
1044 1057
1045 atomic_set(&inotify_cookie, 0); 1058 atomic_set(&inotify_cookie, 0);
1059 atomic_set(&inotify_watches, 0);
1046 1060
1047 watch_cachep = kmem_cache_create("inotify_watch_cache", 1061 watch_cachep = kmem_cache_create("inotify_watch_cache",
1048 sizeof(struct inotify_watch), 1062 sizeof(struct inotify_watch),
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 5a97e346bd95..014a51fd00d7 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -204,7 +204,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
204 int i; 204 int i;
205 205
206 spin_unlock(&journal->j_list_lock); 206 spin_unlock(&journal->j_list_lock);
207 ll_rw_block(WRITE, *batch_count, bhs); 207 ll_rw_block(SWRITE, *batch_count, bhs);
208 spin_lock(&journal->j_list_lock); 208 spin_lock(&journal->j_list_lock);
209 for (i = 0; i < *batch_count; i++) { 209 for (i = 0; i < *batch_count; i++) {
210 struct buffer_head *bh = bhs[i]; 210 struct buffer_head *bh = bhs[i];
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index dac720c837ab..2a3e310f79ef 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -358,7 +358,7 @@ write_out_data:
358 jbd_debug(2, "submit %d writes\n", 358 jbd_debug(2, "submit %d writes\n",
359 bufs); 359 bufs);
360 spin_unlock(&journal->j_list_lock); 360 spin_unlock(&journal->j_list_lock);
361 ll_rw_block(WRITE, bufs, wbuf); 361 ll_rw_block(SWRITE, bufs, wbuf);
362 journal_brelse_array(wbuf, bufs); 362 journal_brelse_array(wbuf, bufs);
363 bufs = 0; 363 bufs = 0;
364 goto write_out_data; 364 goto write_out_data;
@@ -381,7 +381,7 @@ write_out_data:
381 381
382 if (bufs) { 382 if (bufs) {
383 spin_unlock(&journal->j_list_lock); 383 spin_unlock(&journal->j_list_lock);
384 ll_rw_block(WRITE, bufs, wbuf); 384 ll_rw_block(SWRITE, bufs, wbuf);
385 journal_brelse_array(wbuf, bufs); 385 journal_brelse_array(wbuf, bufs);
386 spin_lock(&journal->j_list_lock); 386 spin_lock(&journal->j_list_lock);
387 } 387 }
@@ -720,11 +720,17 @@ wait_for_iobuf:
720 J_ASSERT(commit_transaction->t_log_list == NULL); 720 J_ASSERT(commit_transaction->t_log_list == NULL);
721 721
722restart_loop: 722restart_loop:
723 /*
724 * As there are other places (journal_unmap_buffer()) adding buffers
725 * to this list we have to be careful and hold the j_list_lock.
726 */
727 spin_lock(&journal->j_list_lock);
723 while (commit_transaction->t_forget) { 728 while (commit_transaction->t_forget) {
724 transaction_t *cp_transaction; 729 transaction_t *cp_transaction;
725 struct buffer_head *bh; 730 struct buffer_head *bh;
726 731
727 jh = commit_transaction->t_forget; 732 jh = commit_transaction->t_forget;
733 spin_unlock(&journal->j_list_lock);
728 bh = jh2bh(jh); 734 bh = jh2bh(jh);
729 jbd_lock_bh_state(bh); 735 jbd_lock_bh_state(bh);
730 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || 736 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
@@ -792,9 +798,25 @@ restart_loop:
792 journal_remove_journal_head(bh); /* needs a brelse */ 798 journal_remove_journal_head(bh); /* needs a brelse */
793 release_buffer_page(bh); 799 release_buffer_page(bh);
794 } 800 }
801 cond_resched_lock(&journal->j_list_lock);
802 }
803 spin_unlock(&journal->j_list_lock);
804 /*
805 * This is a bit sleazy. We borrow j_list_lock to protect
806 * journal->j_committing_transaction in __journal_remove_checkpoint.
807 * Really, __journal_remove_checkpoint should be using j_state_lock but
808 * it's a bit hassle to hold that across __journal_remove_checkpoint
809 */
810 spin_lock(&journal->j_state_lock);
811 spin_lock(&journal->j_list_lock);
812 /*
813 * Now recheck if some buffers did not get attached to the transaction
814 * while the lock was dropped...
815 */
816 if (commit_transaction->t_forget) {
795 spin_unlock(&journal->j_list_lock); 817 spin_unlock(&journal->j_list_lock);
796 if (cond_resched()) 818 spin_unlock(&journal->j_state_lock);
797 goto restart_loop; 819 goto restart_loop;
798 } 820 }
799 821
800 /* Done with this transaction! */ 822 /* Done with this transaction! */
@@ -803,14 +825,6 @@ restart_loop:
803 825
804 J_ASSERT(commit_transaction->t_state == T_COMMIT); 826 J_ASSERT(commit_transaction->t_state == T_COMMIT);
805 827
806 /*
807 * This is a bit sleazy. We borrow j_list_lock to protect
808 * journal->j_committing_transaction in __journal_remove_checkpoint.
809 * Really, __jornal_remove_checkpoint should be using j_state_lock but
810 * it's a bit hassle to hold that across __journal_remove_checkpoint
811 */
812 spin_lock(&journal->j_state_lock);
813 spin_lock(&journal->j_list_lock);
814 commit_transaction->t_state = T_FINISHED; 828 commit_transaction->t_state = T_FINISHED;
815 J_ASSERT(commit_transaction == journal->j_committing_transaction); 829 J_ASSERT(commit_transaction == journal->j_committing_transaction);
816 journal->j_commit_sequence = commit_transaction->t_tid; 830 journal->j_commit_sequence = commit_transaction->t_tid;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 5e7b43949517..7ae2c4fe506b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -65,7 +65,6 @@ EXPORT_SYMBOL(journal_set_features);
65EXPORT_SYMBOL(journal_create); 65EXPORT_SYMBOL(journal_create);
66EXPORT_SYMBOL(journal_load); 66EXPORT_SYMBOL(journal_load);
67EXPORT_SYMBOL(journal_destroy); 67EXPORT_SYMBOL(journal_destroy);
68EXPORT_SYMBOL(journal_recover);
69EXPORT_SYMBOL(journal_update_superblock); 68EXPORT_SYMBOL(journal_update_superblock);
70EXPORT_SYMBOL(journal_abort); 69EXPORT_SYMBOL(journal_abort);
71EXPORT_SYMBOL(journal_errno); 70EXPORT_SYMBOL(journal_errno);
@@ -81,6 +80,7 @@ EXPORT_SYMBOL(journal_try_to_free_buffers);
81EXPORT_SYMBOL(journal_force_commit); 80EXPORT_SYMBOL(journal_force_commit);
82 81
83static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 82static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
83static void __journal_abort_soft (journal_t *journal, int errno);
84 84
85/* 85/*
86 * Helper function used to manage commit timeouts 86 * Helper function used to manage commit timeouts
@@ -93,16 +93,6 @@ static void commit_timeout(unsigned long __data)
93 wake_up_process(p); 93 wake_up_process(p);
94} 94}
95 95
96/* Static check for data structure consistency. There's no code
97 * invoked --- we'll just get a linker failure if things aren't right.
98 */
99void __journal_internal_check(void)
100{
101 extern void journal_bad_superblock_size(void);
102 if (sizeof(struct journal_superblock_s) != 1024)
103 journal_bad_superblock_size();
104}
105
106/* 96/*
107 * kjournald: The main thread function used to manage a logging device 97 * kjournald: The main thread function used to manage a logging device
108 * journal. 98 * journal.
@@ -119,16 +109,12 @@ void __journal_internal_check(void)
119 * known as checkpointing, and this thread is responsible for that job. 109 * known as checkpointing, and this thread is responsible for that job.
120 */ 110 */
121 111
122journal_t *current_journal; // AKPM: debug 112static int kjournald(void *arg)
123
124int kjournald(void *arg)
125{ 113{
126 journal_t *journal = (journal_t *) arg; 114 journal_t *journal = (journal_t *) arg;
127 transaction_t *transaction; 115 transaction_t *transaction;
128 struct timer_list timer; 116 struct timer_list timer;
129 117
130 current_journal = journal;
131
132 daemonize("kjournald"); 118 daemonize("kjournald");
133 119
134 /* Set up an interval timer which can be used to trigger a 120 /* Set up an interval timer which can be used to trigger a
@@ -193,6 +179,8 @@ loop:
193 if (transaction && time_after_eq(jiffies, 179 if (transaction && time_after_eq(jiffies,
194 transaction->t_expires)) 180 transaction->t_expires))
195 should_sleep = 0; 181 should_sleep = 0;
182 if (journal->j_flags & JFS_UNMOUNT)
183 should_sleep = 0;
196 if (should_sleep) { 184 if (should_sleep) {
197 spin_unlock(&journal->j_state_lock); 185 spin_unlock(&journal->j_state_lock);
198 schedule(); 186 schedule();
@@ -969,7 +957,7 @@ void journal_update_superblock(journal_t *journal, int wait)
969 if (wait) 957 if (wait)
970 sync_dirty_buffer(bh); 958 sync_dirty_buffer(bh);
971 else 959 else
972 ll_rw_block(WRITE, 1, &bh); 960 ll_rw_block(SWRITE, 1, &bh);
973 961
974out: 962out:
975 /* If we have just flushed the log (by marking s_start==0), then 963 /* If we have just flushed the log (by marking s_start==0), then
@@ -1439,7 +1427,7 @@ int journal_wipe(journal_t *journal, int write)
1439 * device this journal is present. 1427 * device this journal is present.
1440 */ 1428 */
1441 1429
1442const char *journal_dev_name(journal_t *journal, char *buffer) 1430static const char *journal_dev_name(journal_t *journal, char *buffer)
1443{ 1431{
1444 struct block_device *bdev; 1432 struct block_device *bdev;
1445 1433
@@ -1485,7 +1473,7 @@ void __journal_abort_hard(journal_t *journal)
1485 1473
1486/* Soft abort: record the abort error status in the journal superblock, 1474/* Soft abort: record the abort error status in the journal superblock,
1487 * but don't do any other IO. */ 1475 * but don't do any other IO. */
1488void __journal_abort_soft (journal_t *journal, int errno) 1476static void __journal_abort_soft (journal_t *journal, int errno)
1489{ 1477{
1490 if (journal->j_flags & JFS_ABORT) 1478 if (journal->j_flags & JFS_ABORT)
1491 return; 1479 return;
@@ -1880,7 +1868,7 @@ EXPORT_SYMBOL(journal_enable_debug);
1880 1868
1881static struct proc_dir_entry *proc_jbd_debug; 1869static struct proc_dir_entry *proc_jbd_debug;
1882 1870
1883int read_jbd_debug(char *page, char **start, off_t off, 1871static int read_jbd_debug(char *page, char **start, off_t off,
1884 int count, int *eof, void *data) 1872 int count, int *eof, void *data)
1885{ 1873{
1886 int ret; 1874 int ret;
@@ -1890,7 +1878,7 @@ int read_jbd_debug(char *page, char **start, off_t off,
1890 return ret; 1878 return ret;
1891} 1879}
1892 1880
1893int write_jbd_debug(struct file *file, const char __user *buffer, 1881static int write_jbd_debug(struct file *file, const char __user *buffer,
1894 unsigned long count, void *data) 1882 unsigned long count, void *data)
1895{ 1883{
1896 char buf[32]; 1884 char buf[32];
@@ -1979,6 +1967,14 @@ static int __init journal_init(void)
1979{ 1967{
1980 int ret; 1968 int ret;
1981 1969
1970/* Static check for data structure consistency. There's no code
1971 * invoked --- we'll just get a linker failure if things aren't right.
1972 */
1973 extern void journal_bad_superblock_size(void);
1974 if (sizeof(struct journal_superblock_s) != 1024)
1975 journal_bad_superblock_size();
1976
1977
1982 ret = journal_init_caches(); 1978 ret = journal_init_caches();
1983 if (ret != 0) 1979 if (ret != 0)
1984 journal_destroy_caches(); 1980 journal_destroy_caches();
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index d327a598f861..a56144183462 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -116,7 +116,8 @@ static inline int hash(journal_t *journal, unsigned long block)
116 (block << (hash_shift - 12))) & (table->hash_size - 1); 116 (block << (hash_shift - 12))) & (table->hash_size - 1);
117} 117}
118 118
119int insert_revoke_hash(journal_t *journal, unsigned long blocknr, tid_t seq) 119static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
120 tid_t seq)
120{ 121{
121 struct list_head *hash_list; 122 struct list_head *hash_list;
122 struct jbd_revoke_record_s *record; 123 struct jbd_revoke_record_s *record;
@@ -613,7 +614,7 @@ static void flush_descriptor(journal_t *journal,
613 set_buffer_jwrite(bh); 614 set_buffer_jwrite(bh);
614 BUFFER_TRACE(bh, "write"); 615 BUFFER_TRACE(bh, "write");
615 set_buffer_dirty(bh); 616 set_buffer_dirty(bh);
616 ll_rw_block(WRITE, 1, &bh); 617 ll_rw_block(SWRITE, 1, &bh);
617} 618}
618#endif 619#endif
619 620
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 77b7662b840b..49bbc2be3d72 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -490,23 +490,21 @@ void journal_unlock_updates (journal_t *journal)
490 */ 490 */
491static void jbd_unexpected_dirty_buffer(struct journal_head *jh) 491static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
492{ 492{
493 struct buffer_head *bh = jh2bh(jh);
494 int jlist; 493 int jlist;
495 494
496 if (buffer_dirty(bh)) { 495 /* If this buffer is one which might reasonably be dirty
497 /* If this buffer is one which might reasonably be dirty 496 * --- ie. data, or not part of this journal --- then
498 * --- ie. data, or not part of this journal --- then 497 * we're OK to leave it alone, but otherwise we need to
499 * we're OK to leave it alone, but otherwise we need to 498 * move the dirty bit to the journal's own internal
500 * move the dirty bit to the journal's own internal 499 * JBDDirty bit. */
501 * JBDDirty bit. */ 500 jlist = jh->b_jlist;
502 jlist = jh->b_jlist; 501
503 502 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
504 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 503 jlist == BJ_Shadow || jlist == BJ_Forget) {
505 jlist == BJ_Shadow || jlist == BJ_Forget) { 504 struct buffer_head *bh = jh2bh(jh);
506 if (test_clear_buffer_dirty(jh2bh(jh))) { 505
507 set_bit(BH_JBDDirty, &jh2bh(jh)->b_state); 506 if (test_clear_buffer_dirty(bh))
508 } 507 set_buffer_jbddirty(bh);
509 }
510 } 508 }
511} 509}
512 510
@@ -574,9 +572,14 @@ repeat:
574 if (jh->b_next_transaction) 572 if (jh->b_next_transaction)
575 J_ASSERT_JH(jh, jh->b_next_transaction == 573 J_ASSERT_JH(jh, jh->b_next_transaction ==
576 transaction); 574 transaction);
577 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 575 }
578 jbd_unexpected_dirty_buffer(jh); 576 /*
579 } 577 * In any case we need to clean the dirty flag and we must
578 * do it under the buffer lock to be sure we don't race
579 * with running write-out.
580 */
581 JBUFFER_TRACE(jh, "Unexpected dirty buffer");
582 jbd_unexpected_dirty_buffer(jh);
580 } 583 }
581 584
582 unlock_buffer(bh); 585 unlock_buffer(bh);
@@ -1337,8 +1340,7 @@ int journal_stop(handle_t *handle)
1337 if (handle->h_sync) { 1340 if (handle->h_sync) {
1338 do { 1341 do {
1339 old_handle_count = transaction->t_handle_count; 1342 old_handle_count = transaction->t_handle_count;
1340 set_current_state(TASK_UNINTERRUPTIBLE); 1343 schedule_timeout_uninterruptible(1);
1341 schedule_timeout(1);
1342 } while (old_handle_count != transaction->t_handle_count); 1344 } while (old_handle_count != transaction->t_handle_count);
1343 } 1345 }
1344 1346
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index bfbeb4c86e03..3dcc6d2162cb 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1629,9 +1629,6 @@ static int jffs_fsync(struct file *f, struct dentry *d, int datasync)
1629} 1629}
1630 1630
1631 1631
1632extern int generic_file_open(struct inode *, struct file *) __attribute__((weak));
1633extern loff_t generic_file_llseek(struct file *, loff_t, int) __attribute__((weak));
1634
1635static struct file_operations jffs_file_operations = 1632static struct file_operations jffs_file_operations =
1636{ 1633{
1637 .open = generic_file_open, 1634 .open = generic_file_open,
@@ -1747,6 +1744,7 @@ jffs_delete_inode(struct inode *inode)
1747 D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n", 1744 D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n",
1748 inode->i_ino)); 1745 inode->i_ino));
1749 1746
1747 truncate_inode_pages(&inode->i_data, 0);
1750 lock_kernel(); 1748 lock_kernel();
1751 inode->i_size = 0; 1749 inode->i_size = 0;
1752 inode->i_blocks = 0; 1750 inode->i_blocks = 0;
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 456d7e6e29c2..27f199e94cfc 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -1701,12 +1701,10 @@ jffs_find_file(struct jffs_control *c, __u32 ino)
1701{ 1701{
1702 struct jffs_file *f; 1702 struct jffs_file *f;
1703 int i = ino % c->hash_len; 1703 int i = ino % c->hash_len;
1704 struct list_head *tmp;
1705 1704
1706 D3(printk("jffs_find_file(): ino: %u\n", ino)); 1705 D3(printk("jffs_find_file(): ino: %u\n", ino));
1707 1706
1708 for (tmp = c->hash[i].next; tmp != &c->hash[i]; tmp = tmp->next) { 1707 list_for_each_entry(f, &c->hash[i], hash) {
1709 f = list_entry(tmp, struct jffs_file, hash);
1710 if (ino != f->ino) 1708 if (ino != f->ino)
1711 continue; 1709 continue;
1712 D3(printk("jffs_find_file(): Found file with ino " 1710 D3(printk("jffs_find_file(): Found file with ino "
@@ -2102,13 +2100,12 @@ jffs_foreach_file(struct jffs_control *c, int (*func)(struct jffs_file *))
2102 int result = 0; 2100 int result = 0;
2103 2101
2104 for (pos = 0; pos < c->hash_len; pos++) { 2102 for (pos = 0; pos < c->hash_len; pos++) {
2105 struct list_head *p, *next; 2103 struct jffs_file *f, *next;
2106 for (p = c->hash[pos].next; p != &c->hash[pos]; p = next) { 2104
2107 /* We need a reference to the next file in the 2105 /* We must do _safe, because 'func' might remove the
2108 list because `func' might remove the current 2106 current file 'f' from the list. */
2109 file `f'. */ 2107 list_for_each_entry_safe(f, next, &c->hash[pos], hash) {
2110 next = p->next; 2108 r = func(f);
2111 r = func(list_entry(p, struct jffs_file, hash));
2112 if (r < 0) 2109 if (r < 0)
2113 return r; 2110 return r;
2114 result += r; 2111 result += r;
@@ -2613,9 +2610,8 @@ jffs_print_hash_table(struct jffs_control *c)
2613 2610
2614 printk("JFFS: Dumping the file system's hash table...\n"); 2611 printk("JFFS: Dumping the file system's hash table...\n");
2615 for (i = 0; i < c->hash_len; i++) { 2612 for (i = 0; i < c->hash_len; i++) {
2616 struct list_head *p; 2613 struct jffs_file *f;
2617 for (p = c->hash[i].next; p != &c->hash[i]; p = p->next) { 2614 list_for_each_entry(f, &c->hash[i], hash) {
2618 struct jffs_file *f=list_entry(p,struct jffs_file,hash);
2619 printk("*** c->hash[%u]: \"%s\" " 2615 printk("*** c->hash[%u]: \"%s\" "
2620 "(ino: %u, pino: %u)\n", 2616 "(ino: %u, pino: %u)\n",
2621 i, (f->name ? f->name : ""), 2617 i, (f->name ? f->name : ""),
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index bd9ed9b0247b..8279bf0133ff 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -21,9 +21,6 @@
21#include <linux/jffs2.h> 21#include <linux/jffs2.h>
22#include "nodelist.h" 22#include "nodelist.h"
23 23
24extern int generic_file_open(struct inode *, struct file *) __attribute__((weak));
25extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) __attribute__((weak));
26
27static int jffs2_commit_write (struct file *filp, struct page *pg, 24static int jffs2_commit_write (struct file *filp, struct page *pg,
28 unsigned start, unsigned end); 25 unsigned start, unsigned end);
29static int jffs2_prepare_write (struct file *filp, struct page *pg, 26static int jffs2_prepare_write (struct file *filp, struct page *pg,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 37da3e33e750..0ec62d5310db 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -131,6 +131,8 @@ void jfs_delete_inode(struct inode *inode)
131 if (!is_bad_inode(inode) && 131 if (!is_bad_inode(inode) &&
132 (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) { 132 (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) {
133 133
134 truncate_inode_pages(&inode->i_data, 0);
135
134 if (test_cflag(COMMIT_Freewmap, inode)) 136 if (test_cflag(COMMIT_Freewmap, inode))
135 jfs_free_zero_link(inode); 137 jfs_free_zero_link(inode);
136 138
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 86ccac80f0ab..72a5588faeca 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -37,6 +37,9 @@
37#define JFS_ERR_CONTINUE 0x00000004 /* continue */ 37#define JFS_ERR_CONTINUE 0x00000004 /* continue */
38#define JFS_ERR_PANIC 0x00000008 /* panic */ 38#define JFS_ERR_PANIC 0x00000008 /* panic */
39 39
40#define JFS_USRQUOTA 0x00000010
41#define JFS_GRPQUOTA 0x00000020
42
40/* platform option (conditional compilation) */ 43/* platform option (conditional compilation) */
41#define JFS_AIX 0x80000000 /* AIX support */ 44#define JFS_AIX 0x80000000 /* AIX support */
42/* POSIX name/directory support */ 45/* POSIX name/directory support */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 9ff89720f93b..71bc34b96b2b 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -23,9 +23,11 @@
23#include <linux/parser.h> 23#include <linux/parser.h>
24#include <linux/completion.h> 24#include <linux/completion.h>
25#include <linux/vfs.h> 25#include <linux/vfs.h>
26#include <linux/mount.h>
26#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
27#include <linux/posix_acl.h> 28#include <linux/posix_acl.h>
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
30#include <linux/seq_file.h>
29 31
30#include "jfs_incore.h" 32#include "jfs_incore.h"
31#include "jfs_filsys.h" 33#include "jfs_filsys.h"
@@ -192,7 +194,8 @@ static void jfs_put_super(struct super_block *sb)
192 194
193enum { 195enum {
194 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, 196 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
195 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, 197 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
198 Opt_usrquota, Opt_grpquota
196}; 199};
197 200
198static match_table_t tokens = { 201static match_table_t tokens = {
@@ -204,8 +207,8 @@ static match_table_t tokens = {
204 {Opt_errors, "errors=%s"}, 207 {Opt_errors, "errors=%s"},
205 {Opt_ignore, "noquota"}, 208 {Opt_ignore, "noquota"},
206 {Opt_ignore, "quota"}, 209 {Opt_ignore, "quota"},
207 {Opt_ignore, "usrquota"}, 210 {Opt_usrquota, "usrquota"},
208 {Opt_ignore, "grpquota"}, 211 {Opt_grpquota, "grpquota"},
209 {Opt_err, NULL} 212 {Opt_err, NULL}
210}; 213};
211 214
@@ -293,6 +296,24 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
293 } 296 }
294 break; 297 break;
295 } 298 }
299
300#if defined(CONFIG_QUOTA)
301 case Opt_quota:
302 case Opt_usrquota:
303 *flag |= JFS_USRQUOTA;
304 break;
305 case Opt_grpquota:
306 *flag |= JFS_GRPQUOTA;
307 break;
308#else
309 case Opt_usrquota:
310 case Opt_grpquota:
311 case Opt_quota:
312 printk(KERN_ERR
313 "JFS: quota operations not supported\n");
314 break;
315#endif
316
296 default: 317 default:
297 printk("jfs: Unrecognized mount option \"%s\" " 318 printk("jfs: Unrecognized mount option \"%s\" "
298 " or missing value\n", p); 319 " or missing value\n", p);
@@ -539,6 +560,26 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
539 return 0; 560 return 0;
540} 561}
541 562
563static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
564{
565 struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb);
566
567 if (sbi->flag & JFS_NOINTEGRITY)
568 seq_puts(seq, ",nointegrity");
569 else
570 seq_puts(seq, ",integrity");
571
572#if defined(CONFIG_QUOTA)
573 if (sbi->flag & JFS_USRQUOTA)
574 seq_puts(seq, ",usrquota");
575
576 if (sbi->flag & JFS_GRPQUOTA)
577 seq_puts(seq, ",grpquota");
578#endif
579
580 return 0;
581}
582
542static struct super_operations jfs_super_operations = { 583static struct super_operations jfs_super_operations = {
543 .alloc_inode = jfs_alloc_inode, 584 .alloc_inode = jfs_alloc_inode,
544 .destroy_inode = jfs_destroy_inode, 585 .destroy_inode = jfs_destroy_inode,
@@ -552,6 +593,7 @@ static struct super_operations jfs_super_operations = {
552 .unlockfs = jfs_unlockfs, 593 .unlockfs = jfs_unlockfs,
553 .statfs = jfs_statfs, 594 .statfs = jfs_statfs,
554 .remount_fs = jfs_remount, 595 .remount_fs = jfs_remount,
596 .show_options = jfs_show_options
555}; 597};
556 598
557static struct export_operations jfs_export_operations = { 599static struct export_operations jfs_export_operations = {
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 14b3ce87fa29..87332f30141b 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -299,8 +299,7 @@ nlmclnt_alloc_call(void)
299 return call; 299 return call;
300 } 300 }
301 printk("nlmclnt_alloc_call: failed, waiting for memory\n"); 301 printk("nlmclnt_alloc_call: failed, waiting for memory\n");
302 current->state = TASK_INTERRUPTIBLE; 302 schedule_timeout_interruptible(5*HZ);
303 schedule_timeout(5*HZ);
304 } 303 }
305 return NULL; 304 return NULL;
306} 305}
diff --git a/fs/locks.c b/fs/locks.c
index 11956b6179ff..c2c09b4798d6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2198,21 +2198,23 @@ void steal_locks(fl_owner_t from)
2198{ 2198{
2199 struct files_struct *files = current->files; 2199 struct files_struct *files = current->files;
2200 int i, j; 2200 int i, j;
2201 struct fdtable *fdt;
2201 2202
2202 if (from == files) 2203 if (from == files)
2203 return; 2204 return;
2204 2205
2205 lock_kernel(); 2206 lock_kernel();
2206 j = 0; 2207 j = 0;
2208 fdt = files_fdtable(files);
2207 for (;;) { 2209 for (;;) {
2208 unsigned long set; 2210 unsigned long set;
2209 i = j * __NFDBITS; 2211 i = j * __NFDBITS;
2210 if (i >= files->max_fdset || i >= files->max_fds) 2212 if (i >= fdt->max_fdset || i >= fdt->max_fds)
2211 break; 2213 break;
2212 set = files->open_fds->fds_bits[j++]; 2214 set = fdt->open_fds->fds_bits[j++];
2213 while (set) { 2215 while (set) {
2214 if (set & 1) { 2216 if (set & 1) {
2215 struct file *file = files->fd[i]; 2217 struct file *file = fdt->fd[i];
2216 if (file) 2218 if (file)
2217 __steal_locks(file, from); 2219 __steal_locks(file, from);
2218 } 2220 }
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 3f18c21198d7..790cc0d0e970 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -24,6 +24,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data);
24 24
25static void minix_delete_inode(struct inode *inode) 25static void minix_delete_inode(struct inode *inode)
26{ 26{
27 truncate_inode_pages(&inode->i_data, 0);
27 inode->i_size = 0; 28 inode->i_size = 0;
28 minix_truncate(inode); 29 minix_truncate(inode);
29 minix_free_inode(inode); 30 minix_free_inode(inode);
diff --git a/fs/namei.c b/fs/namei.c
index 6ec1f0fefc5b..21d85f1ac839 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -525,6 +525,22 @@ static inline int __do_follow_link(struct path *path, struct nameidata *nd)
525 return error; 525 return error;
526} 526}
527 527
528static inline void dput_path(struct path *path, struct nameidata *nd)
529{
530 dput(path->dentry);
531 if (path->mnt != nd->mnt)
532 mntput(path->mnt);
533}
534
535static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
536{
537 dput(nd->dentry);
538 if (nd->mnt != path->mnt)
539 mntput(nd->mnt);
540 nd->mnt = path->mnt;
541 nd->dentry = path->dentry;
542}
543
528/* 544/*
529 * This limits recursive symlink follows to 8, while 545 * This limits recursive symlink follows to 8, while
530 * limiting consecutive symlinks to 40. 546 * limiting consecutive symlinks to 40.
@@ -552,9 +568,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
552 nd->depth--; 568 nd->depth--;
553 return err; 569 return err;
554loop: 570loop:
555 dput(path->dentry); 571 dput_path(path, nd);
556 if (path->mnt != nd->mnt)
557 mntput(path->mnt);
558 path_release(nd); 572 path_release(nd);
559 return err; 573 return err;
560} 574}
@@ -813,13 +827,8 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
813 err = -ENOTDIR; 827 err = -ENOTDIR;
814 if (!inode->i_op) 828 if (!inode->i_op)
815 break; 829 break;
816 } else { 830 } else
817 dput(nd->dentry); 831 path_to_nameidata(&next, nd);
818 if (nd->mnt != next.mnt)
819 mntput(nd->mnt);
820 nd->mnt = next.mnt;
821 nd->dentry = next.dentry;
822 }
823 err = -ENOTDIR; 832 err = -ENOTDIR;
824 if (!inode->i_op->lookup) 833 if (!inode->i_op->lookup)
825 break; 834 break;
@@ -859,13 +868,8 @@ last_component:
859 if (err) 868 if (err)
860 goto return_err; 869 goto return_err;
861 inode = nd->dentry->d_inode; 870 inode = nd->dentry->d_inode;
862 } else { 871 } else
863 dput(nd->dentry); 872 path_to_nameidata(&next, nd);
864 if (nd->mnt != next.mnt)
865 mntput(nd->mnt);
866 nd->mnt = next.mnt;
867 nd->dentry = next.dentry;
868 }
869 err = -ENOENT; 873 err = -ENOENT;
870 if (!inode) 874 if (!inode)
871 break; 875 break;
@@ -901,9 +905,7 @@ return_reval:
901return_base: 905return_base:
902 return 0; 906 return 0;
903out_dput: 907out_dput:
904 dput(next.dentry); 908 dput_path(&next, nd);
905 if (nd->mnt != next.mnt)
906 mntput(next.mnt);
907 break; 909 break;
908 } 910 }
909 path_release(nd); 911 path_release(nd);
@@ -1314,10 +1316,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1314 return error; 1316 return error;
1315 DQUOT_INIT(dir); 1317 DQUOT_INIT(dir);
1316 error = dir->i_op->create(dir, dentry, mode, nd); 1318 error = dir->i_op->create(dir, dentry, mode, nd);
1317 if (!error) { 1319 if (!error)
1318 fsnotify_create(dir, dentry->d_name.name); 1320 fsnotify_create(dir, dentry->d_name.name);
1319 security_inode_post_create(dir, dentry, mode);
1320 }
1321 return error; 1321 return error;
1322} 1322}
1323 1323
@@ -1507,11 +1507,7 @@ do_last:
1507 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1507 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
1508 goto do_link; 1508 goto do_link;
1509 1509
1510 dput(nd->dentry); 1510 path_to_nameidata(&path, nd);
1511 nd->dentry = path.dentry;
1512 if (nd->mnt != path.mnt)
1513 mntput(nd->mnt);
1514 nd->mnt = path.mnt;
1515 error = -EISDIR; 1511 error = -EISDIR;
1516 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1512 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
1517 goto exit; 1513 goto exit;
@@ -1522,9 +1518,7 @@ ok:
1522 return 0; 1518 return 0;
1523 1519
1524exit_dput: 1520exit_dput:
1525 dput(path.dentry); 1521 dput_path(&path, nd);
1526 if (nd->mnt != path.mnt)
1527 mntput(path.mnt);
1528exit: 1522exit:
1529 path_release(nd); 1523 path_release(nd);
1530 return error; 1524 return error;
@@ -1639,10 +1633,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1639 1633
1640 DQUOT_INIT(dir); 1634 DQUOT_INIT(dir);
1641 error = dir->i_op->mknod(dir, dentry, mode, dev); 1635 error = dir->i_op->mknod(dir, dentry, mode, dev);
1642 if (!error) { 1636 if (!error)
1643 fsnotify_create(dir, dentry->d_name.name); 1637 fsnotify_create(dir, dentry->d_name.name);
1644 security_inode_post_mknod(dir, dentry, mode, dev);
1645 }
1646 return error; 1638 return error;
1647} 1639}
1648 1640
@@ -1712,10 +1704,8 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1712 1704
1713 DQUOT_INIT(dir); 1705 DQUOT_INIT(dir);
1714 error = dir->i_op->mkdir(dir, dentry, mode); 1706 error = dir->i_op->mkdir(dir, dentry, mode);
1715 if (!error) { 1707 if (!error)
1716 fsnotify_mkdir(dir, dentry->d_name.name); 1708 fsnotify_mkdir(dir, dentry->d_name.name);
1717 security_inode_post_mkdir(dir,dentry, mode);
1718 }
1719 return error; 1709 return error;
1720} 1710}
1721 1711
@@ -1951,10 +1941,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
1951 1941
1952 DQUOT_INIT(dir); 1942 DQUOT_INIT(dir);
1953 error = dir->i_op->symlink(dir, dentry, oldname); 1943 error = dir->i_op->symlink(dir, dentry, oldname);
1954 if (!error) { 1944 if (!error)
1955 fsnotify_create(dir, dentry->d_name.name); 1945 fsnotify_create(dir, dentry->d_name.name);
1956 security_inode_post_symlink(dir, dentry, oldname);
1957 }
1958 return error; 1946 return error;
1959} 1947}
1960 1948
@@ -2024,10 +2012,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2024 DQUOT_INIT(dir); 2012 DQUOT_INIT(dir);
2025 error = dir->i_op->link(old_dentry, dir, new_dentry); 2013 error = dir->i_op->link(old_dentry, dir, new_dentry);
2026 up(&old_dentry->d_inode->i_sem); 2014 up(&old_dentry->d_inode->i_sem);
2027 if (!error) { 2015 if (!error)
2028 fsnotify_create(dir, new_dentry->d_name.name); 2016 fsnotify_create(dir, new_dentry->d_name.name);
2029 security_inode_post_link(old_dentry, dir, new_dentry);
2030 }
2031 return error; 2017 return error;
2032} 2018}
2033 2019
@@ -2146,11 +2132,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2146 d_rehash(new_dentry); 2132 d_rehash(new_dentry);
2147 dput(new_dentry); 2133 dput(new_dentry);
2148 } 2134 }
2149 if (!error) { 2135 if (!error)
2150 d_move(old_dentry,new_dentry); 2136 d_move(old_dentry,new_dentry);
2151 security_inode_post_rename(old_dir, old_dentry,
2152 new_dir, new_dentry);
2153 }
2154 return error; 2137 return error;
2155} 2138}
2156 2139
@@ -2176,7 +2159,6 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2176 /* The following d_move() should become unconditional */ 2159 /* The following d_move() should become unconditional */
2177 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) 2160 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
2178 d_move(old_dentry, new_dentry); 2161 d_move(old_dentry, new_dentry);
2179 security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry);
2180 } 2162 }
2181 if (target) 2163 if (target)
2182 up(&target->i_sem); 2164 up(&target->i_sem);
diff --git a/fs/namespace.c b/fs/namespace.c
index 79bd8a46e1e7..2fa9fdf7d6f5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -40,7 +40,7 @@ static inline int sysfs_init(void)
40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
41 41
42static struct list_head *mount_hashtable; 42static struct list_head *mount_hashtable;
43static int hash_mask, hash_bits; 43static int hash_mask __read_mostly, hash_bits __read_mostly;
44static kmem_cache_t *mnt_cache; 44static kmem_cache_t *mnt_cache;
45 45
46static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 46static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
@@ -537,7 +537,6 @@ lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
537static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) 537static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
538{ 538{
539 struct vfsmount *res, *p, *q, *r, *s; 539 struct vfsmount *res, *p, *q, *r, *s;
540 struct list_head *h;
541 struct nameidata nd; 540 struct nameidata nd;
542 541
543 res = q = clone_mnt(mnt, dentry); 542 res = q = clone_mnt(mnt, dentry);
@@ -546,8 +545,7 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
546 q->mnt_mountpoint = mnt->mnt_mountpoint; 545 q->mnt_mountpoint = mnt->mnt_mountpoint;
547 546
548 p = mnt; 547 p = mnt;
549 for (h = mnt->mnt_mounts.next; h != &mnt->mnt_mounts; h = h->next) { 548 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
550 r = list_entry(h, struct vfsmount, mnt_child);
551 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) 549 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry))
552 continue; 550 continue;
553 551
@@ -1334,8 +1332,12 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
1334 error = -EINVAL; 1332 error = -EINVAL;
1335 if (user_nd.mnt->mnt_root != user_nd.dentry) 1333 if (user_nd.mnt->mnt_root != user_nd.dentry)
1336 goto out2; /* not a mountpoint */ 1334 goto out2; /* not a mountpoint */
1335 if (user_nd.mnt->mnt_parent == user_nd.mnt)
1336 goto out2; /* not attached */
1337 if (new_nd.mnt->mnt_root != new_nd.dentry) 1337 if (new_nd.mnt->mnt_root != new_nd.dentry)
1338 goto out2; /* not a mountpoint */ 1338 goto out2; /* not a mountpoint */
1339 if (new_nd.mnt->mnt_parent == new_nd.mnt)
1340 goto out2; /* not attached */
1339 tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ 1341 tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
1340 spin_lock(&vfsmount_lock); 1342 spin_lock(&vfsmount_lock);
1341 if (tmp != new_nd.mnt) { 1343 if (tmp != new_nd.mnt) {
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 44795d2f4b30..8c8839203cd5 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -286,6 +286,8 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
286static void 286static void
287ncp_delete_inode(struct inode *inode) 287ncp_delete_inode(struct inode *inode)
288{ 288{
289 truncate_inode_pages(&inode->i_data, 0);
290
289 if (S_ISDIR(inode->i_mode)) { 291 if (S_ISDIR(inode->i_mode)) {
290 DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); 292 DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino);
291 } 293 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 541b418327c8..6922469d6fc5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -146,6 +146,8 @@ nfs_delete_inode(struct inode * inode)
146{ 146{
147 dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); 147 dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
148 148
149 truncate_inode_pages(&inode->i_data, 0);
150
149 nfs_wb_all(inode); 151 nfs_wb_all(inode);
150 /* 152 /*
151 * The following should never happen... 153 * The following should never happen...
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2681485cf2d0..edc95514046d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -34,8 +34,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
34 res = rpc_call_sync(clnt, msg, flags); 34 res = rpc_call_sync(clnt, msg, flags);
35 if (res != -EJUKEBOX) 35 if (res != -EJUKEBOX)
36 break; 36 break;
37 set_current_state(TASK_INTERRUPTIBLE); 37 schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME);
38 schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
39 res = -ERESTARTSYS; 38 res = -ERESTARTSYS;
40 } while (!signalled()); 39 } while (!signalled());
41 rpc_clnt_sigunmask(clnt, &oldset); 40 rpc_clnt_sigunmask(clnt, &oldset);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0c5a308e4963..9701ca8c9428 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2418,14 +2418,11 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
2418 *timeout = NFS4_POLL_RETRY_MAX; 2418 *timeout = NFS4_POLL_RETRY_MAX;
2419 rpc_clnt_sigmask(clnt, &oldset); 2419 rpc_clnt_sigmask(clnt, &oldset);
2420 if (clnt->cl_intr) { 2420 if (clnt->cl_intr) {
2421 set_current_state(TASK_INTERRUPTIBLE); 2421 schedule_timeout_interruptible(*timeout);
2422 schedule_timeout(*timeout);
2423 if (signalled()) 2422 if (signalled())
2424 res = -ERESTARTSYS; 2423 res = -ERESTARTSYS;
2425 } else { 2424 } else
2426 set_current_state(TASK_UNINTERRUPTIBLE); 2425 schedule_timeout_uninterruptible(*timeout);
2427 schedule_timeout(*timeout);
2428 }
2429 rpc_clnt_sigunmask(clnt, &oldset); 2426 rpc_clnt_sigunmask(clnt, &oldset);
2430 *timeout <<= 1; 2427 *timeout <<= 1;
2431 return res; 2428 return res;
@@ -2578,8 +2575,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
2578static unsigned long 2575static unsigned long
2579nfs4_set_lock_task_retry(unsigned long timeout) 2576nfs4_set_lock_task_retry(unsigned long timeout)
2580{ 2577{
2581 current->state = TASK_INTERRUPTIBLE; 2578 schedule_timeout_interruptible(timeout);
2582 schedule_timeout(timeout);
2583 timeout <<= 1; 2579 timeout <<= 1;
2584 if (timeout > NFS4_LOCK_MAXTIMEOUT) 2580 if (timeout > NFS4_LOCK_MAXTIMEOUT)
2585 return NFS4_LOCK_MAXTIMEOUT; 2581 return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 9a11aa39e2e4..057aff745506 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -26,6 +26,7 @@
26#include <linux/namei.h> 26#include <linux/namei.h>
27#include <linux/mount.h> 27#include <linux/mount.h>
28#include <linux/hash.h> 28#include <linux/hash.h>
29#include <linux/module.h>
29 30
30#include <linux/sunrpc/svc.h> 31#include <linux/sunrpc/svc.h>
31#include <linux/nfsd/nfsd.h> 32#include <linux/nfsd/nfsd.h>
@@ -221,6 +222,7 @@ static int expkey_show(struct seq_file *m,
221} 222}
222 223
223struct cache_detail svc_expkey_cache = { 224struct cache_detail svc_expkey_cache = {
225 .owner = THIS_MODULE,
224 .hash_size = EXPKEY_HASHMAX, 226 .hash_size = EXPKEY_HASHMAX,
225 .hash_table = expkey_table, 227 .hash_table = expkey_table,
226 .name = "nfsd.fh", 228 .name = "nfsd.fh",
@@ -456,6 +458,7 @@ static int svc_export_show(struct seq_file *m,
456 return 0; 458 return 0;
457} 459}
458struct cache_detail svc_export_cache = { 460struct cache_detail svc_export_cache = {
461 .owner = THIS_MODULE,
459 .hash_size = EXPORT_HASHMAX, 462 .hash_size = EXPORT_HASHMAX,
460 .hash_table = export_table, 463 .hash_table = export_table,
461 .name = "nfsd.export", 464 .name = "nfsd.export",
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5605a26efc57..13369650cdf9 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -187,6 +187,7 @@ static int idtoname_parse(struct cache_detail *, char *, int);
187static struct ent *idtoname_lookup(struct ent *, int); 187static struct ent *idtoname_lookup(struct ent *, int);
188 188
189static struct cache_detail idtoname_cache = { 189static struct cache_detail idtoname_cache = {
190 .owner = THIS_MODULE,
190 .hash_size = ENT_HASHMAX, 191 .hash_size = ENT_HASHMAX,
191 .hash_table = idtoname_table, 192 .hash_table = idtoname_table,
192 .name = "nfs4.idtoname", 193 .name = "nfs4.idtoname",
@@ -320,6 +321,7 @@ static struct ent *nametoid_lookup(struct ent *, int);
320static int nametoid_parse(struct cache_detail *, char *, int); 321static int nametoid_parse(struct cache_detail *, char *, int);
321 322
322static struct cache_detail nametoid_cache = { 323static struct cache_detail nametoid_cache = {
324 .owner = THIS_MODULE,
323 .hash_size = ENT_HASHMAX, 325 .hash_size = ENT_HASHMAX,
324 .hash_table = nametoid_table, 326 .hash_table = nametoid_table,
325 .name = "nfs4.nametoid", 327 .name = "nfs4.nametoid",
@@ -404,8 +406,10 @@ nfsd_idmap_init(void)
404void 406void
405nfsd_idmap_shutdown(void) 407nfsd_idmap_shutdown(void)
406{ 408{
407 cache_unregister(&idtoname_cache); 409 if (cache_unregister(&idtoname_cache))
408 cache_unregister(&nametoid_cache); 410 printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n");
411 if (cache_unregister(&nametoid_cache))
412 printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n");
409} 413}
410 414
411/* 415/*
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 57ed50fe7f85..954cf893d50c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -93,7 +93,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
93 93
94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", 94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
95 clname->len, clname->data); 95 clname->len, clname->data);
96 tfm = crypto_alloc_tfm("md5", 0); 96 tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
97 if (tfm == NULL) 97 if (tfm == NULL)
98 goto out; 98 goto out;
99 cksum.len = crypto_tfm_alg_digestsize(tfm); 99 cksum.len = crypto_tfm_alg_digestsize(tfm);
@@ -114,8 +114,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
114 kfree(cksum.data); 114 kfree(cksum.data);
115 status = nfs_ok; 115 status = nfs_ok;
116out: 116out:
117 if (tfm) 117 crypto_free_tfm(tfm);
118 crypto_free_tfm(tfm);
119 return status; 118 return status;
120} 119}
121 120
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9eecc9939dfe..e4fd6134244d 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -22,6 +22,76 @@ ToDo/Notes:
22 - Enable the code for setting the NT4 compatibility flag when we start 22 - Enable the code for setting the NT4 compatibility flag when we start
23 making NTFS 1.2 specific modifications. 23 making NTFS 1.2 specific modifications.
24 24
252.1.24 - Lots of bug fixes and support more clean journal states.
26
27 - Support journals ($LogFile) which have been modified by chkdsk. This
28 means users can boot into Windows after we marked the volume dirty.
29 The Windows boot will run chkdsk and then reboot. The user can then
30 immediately boot into Linux rather than having to do a full Windows
31 boot first before rebooting into Linux and we will recognize such a
32 journal and empty it as it is clean by definition.
33 - Support journals ($LogFile) with only one restart page as well as
34 journals with two different restart pages. We sanity check both and
35 either use the only sane one or the more recent one of the two in the
36 case that both are valid.
37 - Modify fs/ntfs/malloc.h::ntfs_malloc_nofs() to do the kmalloc() based
38 allocations with __GFP_HIGHMEM, analogous to how the vmalloc() based
39 allocations are done.
40 - Add fs/ntfs/malloc.h::ntfs_malloc_nofs_nofail() which is analogous to
41 ntfs_malloc_nofs() but it performs allocations with __GFP_NOFAIL and
42 hence cannot fail.
43 - Use ntfs_malloc_nofs_nofail() in the two critical regions in
44 fs/ntfs/runlist.c::ntfs_runlists_merge(). This means we no longer
45 need to panic() if the allocation fails as it now cannot fail.
46 - Fix two nasty runlist merging bugs that had gone unnoticed so far.
47 Thanks to Stefano Picerno for the bug report.
48 - Remove two bogus BUG_ON()s from fs/ntfs/mft.c.
49 - Fix handling of valid but empty mapping pairs array in
50 fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress().
51 - Report unrepresentable inodes during ntfs_readdir() as KERN_WARNING
52 messages and include the inode number. Thanks to Yura Pakhuchiy for
53 pointing this out.
54 - Change ntfs_rl_truncate_nolock() to throw away the runlist if the new
55 length is zero.
56 - Add runlist.[hc]::ntfs_rl_punch_nolock() which punches a caller
57 specified hole into a runlist.
58 - Fix a bug in fs/ntfs/index.c::ntfs_index_lookup(). When the returned
59 index entry is in the index root, we forgot to set the @ir pointer in
60 the index context. Thanks to Yura Pakhuchiy for finding this bug.
61 - Remove bogus setting of PageError in ntfs_read_compressed_block().
62 - Add fs/ntfs/attrib.[hc]::ntfs_resident_attr_value_resize().
63 - Fix a bug in ntfs_map_runlist_nolock() where we forgot to protect
64 access to the allocated size in the ntfs inode with the size lock.
65 - Fix ntfs_attr_vcn_to_lcn_nolock() and ntfs_attr_find_vcn_nolock() to
66 return LCN_ENOENT when there is no runlist and the allocated size is
67 zero.
68 - Fix load_attribute_list() to handle the case of a NULL runlist.
69 - Fix handling of sparse attributes in ntfs_attr_make_non_resident().
70 - Add BUG() checks to ntfs_attr_make_non_resident() and ntfs_attr_set()
71 to ensure that these functions are never called for compressed or
72 encrypted attributes.
73 - Fix cluster (de)allocators to work when the runlist is NULL and more
74 importantly to take a locked runlist rather than them locking it
75 which leads to lock reversal.
76 - Truncate {a,c,m}time to the ntfs supported time granularity when
77 updating the times in the inode in ntfs_setattr().
78 - Fixup handling of sparse, compressed, and encrypted attributes in
79 fs/ntfs/inode.c::ntfs_read_locked_{,attr_,index_}inode(),
80 fs/ntfs/aops.c::ntfs_{read,write}page().
81 - Make ntfs_write_block() not instantiate sparse blocks if they contain
82 only zeroes.
83 - Optimize fs/ntfs/aops.c::ntfs_write_block() by extending the page
84 lock protection over the buffer submission for i/o which allows the
85 removal of the get_bh()/put_bh() pairs for each buffer.
86 - Fix fs/ntfs/aops.c::ntfs_{read,write}_block() to handle the case
87 where a concurrent truncate has truncated the runlist under our feet.
88 - Fix page_has_buffers()/page_buffers() handling in fs/ntfs/aops.c.
89 - In fs/ntfs/aops.c::ntfs_end_buffer_async_read(), use a bit spin lock
90 in the first buffer head instead of a driver global spin lock to
91 improve scalability.
92 - Minor fix to error handling and error message display in
93 fs/ntfs/aops.c::ntfs_prepare_nonresident_write().
94
252.1.23 - Implement extension of resident files and make writing safe as well as 952.1.23 - Implement extension of resident files and make writing safe as well as
26 many bug fixes, cleanups, and enhancements... 96 many bug fixes, cleanups, and enhancements...
27 97
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index f083f27d8b69..894b2b876d35 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\" 9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\"
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ifeq ($(CONFIG_NTFS_DEBUG),y)
12EXTRA_CFLAGS += -DDEBUG 12EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 78adad7a988d..b6cc8cf24626 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -27,6 +27,7 @@
27#include <linux/swap.h> 27#include <linux/swap.h>
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/writeback.h> 29#include <linux/writeback.h>
30#include <linux/bit_spinlock.h>
30 31
31#include "aops.h" 32#include "aops.h"
32#include "attrib.h" 33#include "attrib.h"
@@ -55,9 +56,8 @@
55 */ 56 */
56static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) 57static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
57{ 58{
58 static DEFINE_SPINLOCK(page_uptodate_lock);
59 unsigned long flags; 59 unsigned long flags;
60 struct buffer_head *tmp; 60 struct buffer_head *first, *tmp;
61 struct page *page; 61 struct page *page;
62 ntfs_inode *ni; 62 ntfs_inode *ni;
63 int page_uptodate = 1; 63 int page_uptodate = 1;
@@ -89,11 +89,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
89 } 89 }
90 } else { 90 } else {
91 clear_buffer_uptodate(bh); 91 clear_buffer_uptodate(bh);
92 SetPageError(page);
92 ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", 93 ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
93 (unsigned long long)bh->b_blocknr); 94 (unsigned long long)bh->b_blocknr);
94 SetPageError(page);
95 } 95 }
96 spin_lock_irqsave(&page_uptodate_lock, flags); 96 first = page_buffers(page);
97 local_irq_save(flags);
98 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
97 clear_buffer_async_read(bh); 99 clear_buffer_async_read(bh);
98 unlock_buffer(bh); 100 unlock_buffer(bh);
99 tmp = bh; 101 tmp = bh;
@@ -108,7 +110,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
108 } 110 }
109 tmp = tmp->b_this_page; 111 tmp = tmp->b_this_page;
110 } while (tmp != bh); 112 } while (tmp != bh);
111 spin_unlock_irqrestore(&page_uptodate_lock, flags); 113 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
114 local_irq_restore(flags);
112 /* 115 /*
113 * If none of the buffers had errors then we can set the page uptodate, 116 * If none of the buffers had errors then we can set the page uptodate,
114 * but we first have to perform the post read mst fixups, if the 117 * but we first have to perform the post read mst fixups, if the
@@ -141,7 +144,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
141 unlock_page(page); 144 unlock_page(page);
142 return; 145 return;
143still_busy: 146still_busy:
144 spin_unlock_irqrestore(&page_uptodate_lock, flags); 147 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
148 local_irq_restore(flags);
145 return; 149 return;
146} 150}
147 151
@@ -185,13 +189,15 @@ static int ntfs_read_block(struct page *page)
185 blocksize_bits = VFS_I(ni)->i_blkbits; 189 blocksize_bits = VFS_I(ni)->i_blkbits;
186 blocksize = 1 << blocksize_bits; 190 blocksize = 1 << blocksize_bits;
187 191
188 if (!page_has_buffers(page)) 192 if (!page_has_buffers(page)) {
189 create_empty_buffers(page, blocksize, 0); 193 create_empty_buffers(page, blocksize, 0);
190 bh = head = page_buffers(page); 194 if (unlikely(!page_has_buffers(page))) {
191 if (unlikely(!bh)) { 195 unlock_page(page);
192 unlock_page(page); 196 return -ENOMEM;
193 return -ENOMEM; 197 }
194 } 198 }
199 bh = head = page_buffers(page);
200 BUG_ON(!bh);
195 201
196 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 202 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
197 read_lock_irqsave(&ni->size_lock, flags); 203 read_lock_irqsave(&ni->size_lock, flags);
@@ -204,6 +210,7 @@ static int ntfs_read_block(struct page *page)
204 nr = i = 0; 210 nr = i = 0;
205 do { 211 do {
206 u8 *kaddr; 212 u8 *kaddr;
213 int err;
207 214
208 if (unlikely(buffer_uptodate(bh))) 215 if (unlikely(buffer_uptodate(bh)))
209 continue; 216 continue;
@@ -211,6 +218,7 @@ static int ntfs_read_block(struct page *page)
211 arr[nr++] = bh; 218 arr[nr++] = bh;
212 continue; 219 continue;
213 } 220 }
221 err = 0;
214 bh->b_bdev = vol->sb->s_bdev; 222 bh->b_bdev = vol->sb->s_bdev;
215 /* Is the block within the allowed limits? */ 223 /* Is the block within the allowed limits? */
216 if (iblock < lblock) { 224 if (iblock < lblock) {
@@ -252,7 +260,6 @@ lock_retry_remap:
252 goto handle_hole; 260 goto handle_hole;
253 /* If first try and runlist unmapped, map and retry. */ 261 /* If first try and runlist unmapped, map and retry. */
254 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { 262 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
255 int err;
256 is_retry = TRUE; 263 is_retry = TRUE;
257 /* 264 /*
258 * Attempt to map runlist, dropping lock for 265 * Attempt to map runlist, dropping lock for
@@ -263,20 +270,30 @@ lock_retry_remap:
263 if (likely(!err)) 270 if (likely(!err))
264 goto lock_retry_remap; 271 goto lock_retry_remap;
265 rl = NULL; 272 rl = NULL;
266 lcn = err;
267 } else if (!rl) 273 } else if (!rl)
268 up_read(&ni->runlist.lock); 274 up_read(&ni->runlist.lock);
275 /*
276 * If buffer is outside the runlist, treat it as a
277 * hole. This can happen due to concurrent truncate
278 * for example.
279 */
280 if (err == -ENOENT || lcn == LCN_ENOENT) {
281 err = 0;
282 goto handle_hole;
283 }
269 /* Hard error, zero out region. */ 284 /* Hard error, zero out region. */
285 if (!err)
286 err = -EIO;
270 bh->b_blocknr = -1; 287 bh->b_blocknr = -1;
271 SetPageError(page); 288 SetPageError(page);
272 ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " 289 ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
273 "attribute type 0x%x, vcn 0x%llx, " 290 "attribute type 0x%x, vcn 0x%llx, "
274 "offset 0x%x because its location on " 291 "offset 0x%x because its location on "
275 "disk could not be determined%s " 292 "disk could not be determined%s "
276 "(error code %lli).", ni->mft_no, 293 "(error code %i).", ni->mft_no,
277 ni->type, (unsigned long long)vcn, 294 ni->type, (unsigned long long)vcn,
278 vcn_ofs, is_retry ? " even after " 295 vcn_ofs, is_retry ? " even after "
279 "retrying" : "", (long long)lcn); 296 "retrying" : "", err);
280 } 297 }
281 /* 298 /*
282 * Either iblock was outside lblock limits or 299 * Either iblock was outside lblock limits or
@@ -289,9 +306,10 @@ handle_hole:
289handle_zblock: 306handle_zblock:
290 kaddr = kmap_atomic(page, KM_USER0); 307 kaddr = kmap_atomic(page, KM_USER0);
291 memset(kaddr + i * blocksize, 0, blocksize); 308 memset(kaddr + i * blocksize, 0, blocksize);
292 flush_dcache_page(page);
293 kunmap_atomic(kaddr, KM_USER0); 309 kunmap_atomic(kaddr, KM_USER0);
294 set_buffer_uptodate(bh); 310 flush_dcache_page(page);
311 if (likely(!err))
312 set_buffer_uptodate(bh);
295 } while (i++, iblock++, (bh = bh->b_this_page) != head); 313 } while (i++, iblock++, (bh = bh->b_this_page) != head);
296 314
297 /* Release the lock if we took it. */ 315 /* Release the lock if we took it. */
@@ -367,31 +385,38 @@ retry_readpage:
367 return 0; 385 return 0;
368 } 386 }
369 ni = NTFS_I(page->mapping->host); 387 ni = NTFS_I(page->mapping->host);
370 388 /*
389 * Only $DATA attributes can be encrypted and only unnamed $DATA
390 * attributes can be compressed. Index root can have the flags set but
391 * this means to create compressed/encrypted files, not that the
392 * attribute is compressed/encrypted.
393 */
394 if (ni->type != AT_INDEX_ROOT) {
395 /* If attribute is encrypted, deny access, just like NT4. */
396 if (NInoEncrypted(ni)) {
397 BUG_ON(ni->type != AT_DATA);
398 err = -EACCES;
399 goto err_out;
400 }
401 /* Compressed data streams are handled in compress.c. */
402 if (NInoNonResident(ni) && NInoCompressed(ni)) {
403 BUG_ON(ni->type != AT_DATA);
404 BUG_ON(ni->name_len);
405 return ntfs_read_compressed_block(page);
406 }
407 }
371 /* NInoNonResident() == NInoIndexAllocPresent() */ 408 /* NInoNonResident() == NInoIndexAllocPresent() */
372 if (NInoNonResident(ni)) { 409 if (NInoNonResident(ni)) {
373 /* 410 /* Normal, non-resident data stream. */
374 * Only unnamed $DATA attributes can be compressed or
375 * encrypted.
376 */
377 if (ni->type == AT_DATA && !ni->name_len) {
378 /* If file is encrypted, deny access, just like NT4. */
379 if (NInoEncrypted(ni)) {
380 err = -EACCES;
381 goto err_out;
382 }
383 /* Compressed data streams are handled in compress.c. */
384 if (NInoCompressed(ni))
385 return ntfs_read_compressed_block(page);
386 }
387 /* Normal data stream. */
388 return ntfs_read_block(page); 411 return ntfs_read_block(page);
389 } 412 }
390 /* 413 /*
391 * Attribute is resident, implying it is not compressed or encrypted. 414 * Attribute is resident, implying it is not compressed or encrypted.
392 * This also means the attribute is smaller than an mft record and 415 * This also means the attribute is smaller than an mft record and
393 * hence smaller than a page, so can simply zero out any pages with 416 * hence smaller than a page, so can simply zero out any pages with
394 * index above 0. 417 * index above 0. Note the attribute can actually be marked compressed
418 * but if it is resident the actual data is not compressed so we are
419 * ok to ignore the compressed flag here.
395 */ 420 */
396 if (unlikely(page->index > 0)) { 421 if (unlikely(page->index > 0)) {
397 kaddr = kmap_atomic(page, KM_USER0); 422 kaddr = kmap_atomic(page, KM_USER0);
@@ -511,19 +536,21 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
511 BUG_ON(!PageUptodate(page)); 536 BUG_ON(!PageUptodate(page));
512 create_empty_buffers(page, blocksize, 537 create_empty_buffers(page, blocksize,
513 (1 << BH_Uptodate) | (1 << BH_Dirty)); 538 (1 << BH_Uptodate) | (1 << BH_Dirty));
539 if (unlikely(!page_has_buffers(page))) {
540 ntfs_warning(vol->sb, "Error allocating page "
541 "buffers. Redirtying page so we try "
542 "again later.");
543 /*
544 * Put the page back on mapping->dirty_pages, but leave
545 * its buffers' dirty state as-is.
546 */
547 redirty_page_for_writepage(wbc, page);
548 unlock_page(page);
549 return 0;
550 }
514 } 551 }
515 bh = head = page_buffers(page); 552 bh = head = page_buffers(page);
516 if (unlikely(!bh)) { 553 BUG_ON(!bh);
517 ntfs_warning(vol->sb, "Error allocating page buffers. "
518 "Redirtying page so we try again later.");
519 /*
520 * Put the page back on mapping->dirty_pages, but leave its
521 * buffer's dirty state as-is.
522 */
523 redirty_page_for_writepage(wbc, page);
524 unlock_page(page);
525 return 0;
526 }
527 554
528 /* NOTE: Different naming scheme to ntfs_read_block()! */ 555 /* NOTE: Different naming scheme to ntfs_read_block()! */
529 556
@@ -670,6 +697,27 @@ lock_retry_remap:
670 } 697 }
671 /* It is a hole, need to instantiate it. */ 698 /* It is a hole, need to instantiate it. */
672 if (lcn == LCN_HOLE) { 699 if (lcn == LCN_HOLE) {
700 u8 *kaddr;
701 unsigned long *bpos, *bend;
702
703 /* Check if the buffer is zero. */
704 kaddr = kmap_atomic(page, KM_USER0);
705 bpos = (unsigned long *)(kaddr + bh_offset(bh));
706 bend = (unsigned long *)((u8*)bpos + blocksize);
707 do {
708 if (unlikely(*bpos))
709 break;
710 } while (likely(++bpos < bend));
711 kunmap_atomic(kaddr, KM_USER0);
712 if (bpos == bend) {
713 /*
714 * Buffer is zero and sparse, no need to write
715 * it.
716 */
717 bh->b_blocknr = -1;
718 clear_buffer_dirty(bh);
719 continue;
720 }
673 // TODO: Instantiate the hole. 721 // TODO: Instantiate the hole.
674 // clear_buffer_new(bh); 722 // clear_buffer_new(bh);
675 // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); 723 // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
@@ -690,20 +738,37 @@ lock_retry_remap:
690 if (likely(!err)) 738 if (likely(!err))
691 goto lock_retry_remap; 739 goto lock_retry_remap;
692 rl = NULL; 740 rl = NULL;
693 lcn = err;
694 } else if (!rl) 741 } else if (!rl)
695 up_read(&ni->runlist.lock); 742 up_read(&ni->runlist.lock);
743 /*
744 * If buffer is outside the runlist, truncate has cut it out
745 * of the runlist. Just clean and clear the buffer and set it
746 * uptodate so it can get discarded by the VM.
747 */
748 if (err == -ENOENT || lcn == LCN_ENOENT) {
749 u8 *kaddr;
750
751 bh->b_blocknr = -1;
752 clear_buffer_dirty(bh);
753 kaddr = kmap_atomic(page, KM_USER0);
754 memset(kaddr + bh_offset(bh), 0, blocksize);
755 kunmap_atomic(kaddr, KM_USER0);
756 flush_dcache_page(page);
757 set_buffer_uptodate(bh);
758 err = 0;
759 continue;
760 }
696 /* Failed to map the buffer, even after retrying. */ 761 /* Failed to map the buffer, even after retrying. */
762 if (!err)
763 err = -EIO;
697 bh->b_blocknr = -1; 764 bh->b_blocknr = -1;
698 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " 765 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
699 "attribute type 0x%x, vcn 0x%llx, offset 0x%x " 766 "attribute type 0x%x, vcn 0x%llx, offset 0x%x "
700 "because its location on disk could not be " 767 "because its location on disk could not be "
701 "determined%s (error code %lli).", ni->mft_no, 768 "determined%s (error code %i).", ni->mft_no,
702 ni->type, (unsigned long long)vcn, 769 ni->type, (unsigned long long)vcn,
703 vcn_ofs, is_retry ? " even after " 770 vcn_ofs, is_retry ? " even after "
704 "retrying" : "", (long long)lcn); 771 "retrying" : "", err);
705 if (!err)
706 err = -EIO;
707 break; 772 break;
708 } while (block++, (bh = bh->b_this_page) != head); 773 } while (block++, (bh = bh->b_this_page) != head);
709 774
@@ -714,7 +779,7 @@ lock_retry_remap:
714 /* For the error case, need to reset bh to the beginning. */ 779 /* For the error case, need to reset bh to the beginning. */
715 bh = head; 780 bh = head;
716 781
717 /* Just an optimization, so ->readpage() isn't called later. */ 782 /* Just an optimization, so ->readpage() is not called later. */
718 if (unlikely(!PageUptodate(page))) { 783 if (unlikely(!PageUptodate(page))) {
719 int uptodate = 1; 784 int uptodate = 1;
720 do { 785 do {
@@ -730,7 +795,6 @@ lock_retry_remap:
730 795
731 /* Setup all mapped, dirty buffers for async write i/o. */ 796 /* Setup all mapped, dirty buffers for async write i/o. */
732 do { 797 do {
733 get_bh(bh);
734 if (buffer_mapped(bh) && buffer_dirty(bh)) { 798 if (buffer_mapped(bh) && buffer_dirty(bh)) {
735 lock_buffer(bh); 799 lock_buffer(bh);
736 if (test_clear_buffer_dirty(bh)) { 800 if (test_clear_buffer_dirty(bh)) {
@@ -768,14 +832,8 @@ lock_retry_remap:
768 832
769 BUG_ON(PageWriteback(page)); 833 BUG_ON(PageWriteback(page));
770 set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ 834 set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
771 unlock_page(page);
772 835
773 /* 836 /* Submit the prepared buffers for i/o. */
774 * Submit the prepared buffers for i/o. Note the page is unlocked,
775 * and the async write i/o completion handler can end_page_writeback()
776 * at any time after the *first* submit_bh(). So the buffers can then
777 * disappear...
778 */
779 need_end_writeback = TRUE; 837 need_end_writeback = TRUE;
780 do { 838 do {
781 struct buffer_head *next = bh->b_this_page; 839 struct buffer_head *next = bh->b_this_page;
@@ -783,9 +841,9 @@ lock_retry_remap:
783 submit_bh(WRITE, bh); 841 submit_bh(WRITE, bh);
784 need_end_writeback = FALSE; 842 need_end_writeback = FALSE;
785 } 843 }
786 put_bh(bh);
787 bh = next; 844 bh = next;
788 } while (bh != head); 845 } while (bh != head);
846 unlock_page(page);
789 847
790 /* If no i/o was started, need to end_page_writeback(). */ 848 /* If no i/o was started, need to end_page_writeback(). */
791 if (unlikely(need_end_writeback)) 849 if (unlikely(need_end_writeback))
@@ -860,7 +918,6 @@ static int ntfs_write_mst_block(struct page *page,
860 sync = (wbc->sync_mode == WB_SYNC_ALL); 918 sync = (wbc->sync_mode == WB_SYNC_ALL);
861 919
862 /* Make sure we have mapped buffers. */ 920 /* Make sure we have mapped buffers. */
863 BUG_ON(!page_has_buffers(page));
864 bh = head = page_buffers(page); 921 bh = head = page_buffers(page);
865 BUG_ON(!bh); 922 BUG_ON(!bh);
866 923
@@ -1280,38 +1337,42 @@ retry_writepage:
1280 ntfs_debug("Write outside i_size - truncated?"); 1337 ntfs_debug("Write outside i_size - truncated?");
1281 return 0; 1338 return 0;
1282 } 1339 }
1340 /*
1341 * Only $DATA attributes can be encrypted and only unnamed $DATA
1342 * attributes can be compressed. Index root can have the flags set but
1343 * this means to create compressed/encrypted files, not that the
1344 * attribute is compressed/encrypted.
1345 */
1346 if (ni->type != AT_INDEX_ROOT) {
1347 /* If file is encrypted, deny access, just like NT4. */
1348 if (NInoEncrypted(ni)) {
1349 unlock_page(page);
1350 BUG_ON(ni->type != AT_DATA);
1351 ntfs_debug("Denying write access to encrypted "
1352 "file.");
1353 return -EACCES;
1354 }
1355 /* Compressed data streams are handled in compress.c. */
1356 if (NInoNonResident(ni) && NInoCompressed(ni)) {
1357 BUG_ON(ni->type != AT_DATA);
1358 BUG_ON(ni->name_len);
1359 // TODO: Implement and replace this with
1360 // return ntfs_write_compressed_block(page);
1361 unlock_page(page);
1362 ntfs_error(vi->i_sb, "Writing to compressed files is "
1363 "not supported yet. Sorry.");
1364 return -EOPNOTSUPP;
1365 }
1366 // TODO: Implement and remove this check.
1367 if (NInoNonResident(ni) && NInoSparse(ni)) {
1368 unlock_page(page);
1369 ntfs_error(vi->i_sb, "Writing to sparse files is not "
1370 "supported yet. Sorry.");
1371 return -EOPNOTSUPP;
1372 }
1373 }
1283 /* NInoNonResident() == NInoIndexAllocPresent() */ 1374 /* NInoNonResident() == NInoIndexAllocPresent() */
1284 if (NInoNonResident(ni)) { 1375 if (NInoNonResident(ni)) {
1285 /*
1286 * Only unnamed $DATA attributes can be compressed, encrypted,
1287 * and/or sparse.
1288 */
1289 if (ni->type == AT_DATA && !ni->name_len) {
1290 /* If file is encrypted, deny access, just like NT4. */
1291 if (NInoEncrypted(ni)) {
1292 unlock_page(page);
1293 ntfs_debug("Denying write access to encrypted "
1294 "file.");
1295 return -EACCES;
1296 }
1297 /* Compressed data streams are handled in compress.c. */
1298 if (NInoCompressed(ni)) {
1299 // TODO: Implement and replace this check with
1300 // return ntfs_write_compressed_block(page);
1301 unlock_page(page);
1302 ntfs_error(vi->i_sb, "Writing to compressed "
1303 "files is not supported yet. "
1304 "Sorry.");
1305 return -EOPNOTSUPP;
1306 }
1307 // TODO: Implement and remove this check.
1308 if (NInoSparse(ni)) {
1309 unlock_page(page);
1310 ntfs_error(vi->i_sb, "Writing to sparse files "
1311 "is not supported yet. Sorry.");
1312 return -EOPNOTSUPP;
1313 }
1314 }
1315 /* We have to zero every time due to mmap-at-end-of-file. */ 1376 /* We have to zero every time due to mmap-at-end-of-file. */
1316 if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { 1377 if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
1317 /* The page straddles i_size. */ 1378 /* The page straddles i_size. */
@@ -1324,14 +1385,16 @@ retry_writepage:
1324 /* Handle mst protected attributes. */ 1385 /* Handle mst protected attributes. */
1325 if (NInoMstProtected(ni)) 1386 if (NInoMstProtected(ni))
1326 return ntfs_write_mst_block(page, wbc); 1387 return ntfs_write_mst_block(page, wbc);
1327 /* Normal data stream. */ 1388 /* Normal, non-resident data stream. */
1328 return ntfs_write_block(page, wbc); 1389 return ntfs_write_block(page, wbc);
1329 } 1390 }
1330 /* 1391 /*
1331 * Attribute is resident, implying it is not compressed, encrypted, 1392 * Attribute is resident, implying it is not compressed, encrypted, or
1332 * sparse, or mst protected. This also means the attribute is smaller 1393 * mst protected. This also means the attribute is smaller than an mft
1333 * than an mft record and hence smaller than a page, so can simply 1394 * record and hence smaller than a page, so can simply return error on
1334 * return error on any pages with index above 0. 1395 * any pages with index above 0. Note the attribute can actually be
1396 * marked compressed but if it is resident the actual data is not
1397 * compressed so we are ok to ignore the compressed flag here.
1335 */ 1398 */
1336 BUG_ON(page_has_buffers(page)); 1399 BUG_ON(page_has_buffers(page));
1337 BUG_ON(!PageUptodate(page)); 1400 BUG_ON(!PageUptodate(page));
@@ -1380,30 +1443,14 @@ retry_writepage:
1380 BUG_ON(PageWriteback(page)); 1443 BUG_ON(PageWriteback(page));
1381 set_page_writeback(page); 1444 set_page_writeback(page);
1382 unlock_page(page); 1445 unlock_page(page);
1383
1384 /* 1446 /*
1385 * Here, we don't need to zero the out of bounds area everytime because 1447 * Here, we do not need to zero the out of bounds area everytime
1386 * the below memcpy() already takes care of the mmap-at-end-of-file 1448 * because the below memcpy() already takes care of the
1387 * requirements. If the file is converted to a non-resident one, then 1449 * mmap-at-end-of-file requirements. If the file is converted to a
1388 * the code path use is switched to the non-resident one where the 1450 * non-resident one, then the code path use is switched to the
1389 * zeroing happens on each ntfs_writepage() invocation. 1451 * non-resident one where the zeroing happens on each ntfs_writepage()
1390 * 1452 * invocation.
1391 * The above also applies nicely when i_size is decreased.
1392 *
1393 * When i_size is increased, the memory between the old and new i_size
1394 * _must_ be zeroed (or overwritten with new data). Otherwise we will
1395 * expose data to userspace/disk which should never have been exposed.
1396 *
1397 * FIXME: Ensure that i_size increases do the zeroing/overwriting and
1398 * if we cannot guarantee that, then enable the zeroing below. If the
1399 * zeroing below is enabled, we MUST move the unlock_page() from above
1400 * to after the kunmap_atomic(), i.e. just before the
1401 * end_page_writeback().
1402 * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
1403 * increases for resident attributes so those are ok.
1404 * TODO: ntfs_truncate(), others?
1405 */ 1453 */
1406
1407 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 1454 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
1408 i_size = i_size_read(vi); 1455 i_size = i_size_read(vi);
1409 if (unlikely(attr_len > i_size)) { 1456 if (unlikely(attr_len > i_size)) {
@@ -1681,27 +1728,25 @@ lock_retry_remap:
1681 if (likely(!err)) 1728 if (likely(!err))
1682 goto lock_retry_remap; 1729 goto lock_retry_remap;
1683 rl = NULL; 1730 rl = NULL;
1684 lcn = err;
1685 } else if (!rl) 1731 } else if (!rl)
1686 up_read(&ni->runlist.lock); 1732 up_read(&ni->runlist.lock);
1687 /* 1733 /*
1688 * Failed to map the buffer, even after 1734 * Failed to map the buffer, even after
1689 * retrying. 1735 * retrying.
1690 */ 1736 */
1737 if (!err)
1738 err = -EIO;
1691 bh->b_blocknr = -1; 1739 bh->b_blocknr = -1;
1692 ntfs_error(vol->sb, "Failed to write to inode " 1740 ntfs_error(vol->sb, "Failed to write to inode "
1693 "0x%lx, attribute type 0x%x, " 1741 "0x%lx, attribute type 0x%x, "
1694 "vcn 0x%llx, offset 0x%x " 1742 "vcn 0x%llx, offset 0x%x "
1695 "because its location on disk " 1743 "because its location on disk "
1696 "could not be determined%s " 1744 "could not be determined%s "
1697 "(error code %lli).", 1745 "(error code %i).",
1698 ni->mft_no, ni->type, 1746 ni->mft_no, ni->type,
1699 (unsigned long long)vcn, 1747 (unsigned long long)vcn,
1700 vcn_ofs, is_retry ? " even " 1748 vcn_ofs, is_retry ? " even "
1701 "after retrying" : "", 1749 "after retrying" : "", err);
1702 (long long)lcn);
1703 if (!err)
1704 err = -EIO;
1705 goto err_out; 1750 goto err_out;
1706 } 1751 }
1707 /* We now have a successful remap, i.e. lcn >= 0. */ 1752 /* We now have a successful remap, i.e. lcn >= 0. */
@@ -2357,6 +2402,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
2357 buffers_to_free = bh; 2402 buffers_to_free = bh;
2358 } 2403 }
2359 bh = head = page_buffers(page); 2404 bh = head = page_buffers(page);
2405 BUG_ON(!bh);
2360 do { 2406 do {
2361 bh_ofs = bh_offset(bh); 2407 bh_ofs = bh_offset(bh);
2362 if (bh_ofs + bh_size <= ofs) 2408 if (bh_ofs + bh_size <= ofs)
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index cd0f9e740b14..3f9a4ff42ee5 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -43,6 +43,9 @@
43 * which is not an error as such. This is -ENOENT. It means that @vcn is out 43 * which is not an error as such. This is -ENOENT. It means that @vcn is out
44 * of bounds of the runlist. 44 * of bounds of the runlist.
45 * 45 *
46 * Note the runlist can be NULL after this function returns if @vcn is zero and
47 * the attribute has zero allocated size, i.e. there simply is no runlist.
48 *
46 * Locking: - The runlist must be locked for writing. 49 * Locking: - The runlist must be locked for writing.
47 * - This function modifies the runlist. 50 * - This function modifies the runlist.
48 */ 51 */
@@ -54,6 +57,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
54 ATTR_RECORD *a; 57 ATTR_RECORD *a;
55 ntfs_attr_search_ctx *ctx; 58 ntfs_attr_search_ctx *ctx;
56 runlist_element *rl; 59 runlist_element *rl;
60 unsigned long flags;
57 int err = 0; 61 int err = 0;
58 62
59 ntfs_debug("Mapping runlist part containing vcn 0x%llx.", 63 ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
@@ -85,8 +89,11 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
85 * ntfs_mapping_pairs_decompress() fails. 89 * ntfs_mapping_pairs_decompress() fails.
86 */ 90 */
87 end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1; 91 end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
88 if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) 92 if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) {
93 read_lock_irqsave(&ni->size_lock, flags);
89 end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits; 94 end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits;
95 read_unlock_irqrestore(&ni->size_lock, flags);
96 }
90 if (unlikely(vcn >= end_vcn)) { 97 if (unlikely(vcn >= end_vcn)) {
91 err = -ENOENT; 98 err = -ENOENT;
92 goto err_out; 99 goto err_out;
@@ -165,6 +172,7 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
165 const BOOL write_locked) 172 const BOOL write_locked)
166{ 173{
167 LCN lcn; 174 LCN lcn;
175 unsigned long flags;
168 BOOL is_retry = FALSE; 176 BOOL is_retry = FALSE;
169 177
170 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", 178 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
@@ -173,6 +181,14 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
173 BUG_ON(!ni); 181 BUG_ON(!ni);
174 BUG_ON(!NInoNonResident(ni)); 182 BUG_ON(!NInoNonResident(ni));
175 BUG_ON(vcn < 0); 183 BUG_ON(vcn < 0);
184 if (!ni->runlist.rl) {
185 read_lock_irqsave(&ni->size_lock, flags);
186 if (!ni->allocated_size) {
187 read_unlock_irqrestore(&ni->size_lock, flags);
188 return LCN_ENOENT;
189 }
190 read_unlock_irqrestore(&ni->size_lock, flags);
191 }
176retry_remap: 192retry_remap:
177 /* Convert vcn to lcn. If that fails map the runlist and retry once. */ 193 /* Convert vcn to lcn. If that fails map the runlist and retry once. */
178 lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn); 194 lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn);
@@ -255,6 +271,7 @@ retry_remap:
255runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, 271runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
256 const BOOL write_locked) 272 const BOOL write_locked)
257{ 273{
274 unsigned long flags;
258 runlist_element *rl; 275 runlist_element *rl;
259 int err = 0; 276 int err = 0;
260 BOOL is_retry = FALSE; 277 BOOL is_retry = FALSE;
@@ -265,6 +282,14 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
265 BUG_ON(!ni); 282 BUG_ON(!ni);
266 BUG_ON(!NInoNonResident(ni)); 283 BUG_ON(!NInoNonResident(ni));
267 BUG_ON(vcn < 0); 284 BUG_ON(vcn < 0);
285 if (!ni->runlist.rl) {
286 read_lock_irqsave(&ni->size_lock, flags);
287 if (!ni->allocated_size) {
288 read_unlock_irqrestore(&ni->size_lock, flags);
289 return ERR_PTR(-ENOENT);
290 }
291 read_unlock_irqrestore(&ni->size_lock, flags);
292 }
268retry_remap: 293retry_remap:
269 rl = ni->runlist.rl; 294 rl = ni->runlist.rl;
270 if (likely(rl && vcn >= rl[0].vcn)) { 295 if (likely(rl && vcn >= rl[0].vcn)) {
@@ -528,6 +553,11 @@ int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start,
528 block_size_bits = sb->s_blocksize_bits; 553 block_size_bits = sb->s_blocksize_bits;
529 down_read(&runlist->lock); 554 down_read(&runlist->lock);
530 rl = runlist->rl; 555 rl = runlist->rl;
556 if (!rl) {
557 ntfs_error(sb, "Cannot read attribute list since runlist is "
558 "missing.");
559 goto err_out;
560 }
531 /* Read all clusters specified by the runlist one run at a time. */ 561 /* Read all clusters specified by the runlist one run at a time. */
532 while (rl->length) { 562 while (rl->length) {
533 lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn); 563 lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn);
@@ -1247,6 +1277,46 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
1247} 1277}
1248 1278
1249/** 1279/**
1280 * ntfs_resident_attr_value_resize - resize the value of a resident attribute
1281 * @m: mft record containing attribute record
1282 * @a: attribute record whose value to resize
1283 * @new_size: new size in bytes to which to resize the attribute value of @a
1284 *
1285 * Resize the value of the attribute @a in the mft record @m to @new_size bytes.
1286 * If the value is made bigger, the newly allocated space is cleared.
1287 *
1288 * Return 0 on success and -errno on error. The following error codes are
1289 * defined:
1290 * -ENOSPC - Not enough space in the mft record @m to perform the resize.
1291 *
1292 * Note: On error, no modifications have been performed whatsoever.
1293 *
1294 * Warning: If you make a record smaller without having copied all the data you
1295 * are interested in the data may be overwritten.
1296 */
1297int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
1298 const u32 new_size)
1299{
1300 u32 old_size;
1301
1302 /* Resize the resident part of the attribute record. */
1303 if (ntfs_attr_record_resize(m, a,
1304 le16_to_cpu(a->data.resident.value_offset) + new_size))
1305 return -ENOSPC;
1306 /*
1307 * The resize succeeded! If we made the attribute value bigger, clear
1308 * the area between the old size and @new_size.
1309 */
1310 old_size = le32_to_cpu(a->data.resident.value_length);
1311 if (new_size > old_size)
1312 memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
1313 old_size, 0, new_size - old_size);
1314 /* Finally update the length of the attribute value. */
1315 a->data.resident.value_length = cpu_to_le32(new_size);
1316 return 0;
1317}
1318
1319/**
1250 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute 1320 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
1251 * @ni: ntfs inode describing the attribute to convert 1321 * @ni: ntfs inode describing the attribute to convert
1252 * 1322 *
@@ -1302,6 +1372,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1302 return err; 1372 return err;
1303 } 1373 }
1304 /* 1374 /*
1375 * FIXME: Compressed and encrypted attributes are not supported when
1376 * writing and we should never have gotten here for them.
1377 */
1378 BUG_ON(NInoCompressed(ni));
1379 BUG_ON(NInoEncrypted(ni));
1380 /*
1305 * The size needs to be aligned to a cluster boundary for allocation 1381 * The size needs to be aligned to a cluster boundary for allocation
1306 * purposes. 1382 * purposes.
1307 */ 1383 */
@@ -1377,10 +1453,15 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1377 BUG_ON(a->non_resident); 1453 BUG_ON(a->non_resident);
1378 /* 1454 /*
1379 * Calculate new offsets for the name and the mapping pairs array. 1455 * Calculate new offsets for the name and the mapping pairs array.
1380 * We assume the attribute is not compressed or sparse.
1381 */ 1456 */
1382 name_ofs = (offsetof(ATTR_REC, 1457 if (NInoSparse(ni) || NInoCompressed(ni))
1383 data.non_resident.compressed_size) + 7) & ~7; 1458 name_ofs = (offsetof(ATTR_REC,
1459 data.non_resident.compressed_size) +
1460 sizeof(a->data.non_resident.compressed_size) +
1461 7) & ~7;
1462 else
1463 name_ofs = (offsetof(ATTR_REC,
1464 data.non_resident.compressed_size) + 7) & ~7;
1384 mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7; 1465 mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
1385 /* 1466 /*
1386 * Determine the size of the resident part of the now non-resident 1467 * Determine the size of the resident part of the now non-resident
@@ -1419,24 +1500,23 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1419 memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset), 1500 memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
1420 a->name_length * sizeof(ntfschar)); 1501 a->name_length * sizeof(ntfschar));
1421 a->name_offset = cpu_to_le16(name_ofs); 1502 a->name_offset = cpu_to_le16(name_ofs);
1422 /*
1423 * FIXME: For now just clear all of these as we do not support them
1424 * when writing.
1425 */
1426 a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE |
1427 ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK));
1428 /* Setup the fields specific to non-resident attributes. */ 1503 /* Setup the fields specific to non-resident attributes. */
1429 a->data.non_resident.lowest_vcn = 0; 1504 a->data.non_resident.lowest_vcn = 0;
1430 a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >> 1505 a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >>
1431 vol->cluster_size_bits); 1506 vol->cluster_size_bits);
1432 a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs); 1507 a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs);
1433 a->data.non_resident.compression_unit = 0;
1434 memset(&a->data.non_resident.reserved, 0, 1508 memset(&a->data.non_resident.reserved, 0,
1435 sizeof(a->data.non_resident.reserved)); 1509 sizeof(a->data.non_resident.reserved));
1436 a->data.non_resident.allocated_size = cpu_to_sle64(new_size); 1510 a->data.non_resident.allocated_size = cpu_to_sle64(new_size);
1437 a->data.non_resident.data_size = 1511 a->data.non_resident.data_size =
1438 a->data.non_resident.initialized_size = 1512 a->data.non_resident.initialized_size =
1439 cpu_to_sle64(attr_size); 1513 cpu_to_sle64(attr_size);
1514 if (NInoSparse(ni) || NInoCompressed(ni)) {
1515 a->data.non_resident.compression_unit = 4;
1516 a->data.non_resident.compressed_size =
1517 a->data.non_resident.allocated_size;
1518 } else
1519 a->data.non_resident.compression_unit = 0;
1440 /* Generate the mapping pairs array into the attribute record. */ 1520 /* Generate the mapping pairs array into the attribute record. */
1441 err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs, 1521 err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
1442 arec_size - mp_ofs, rl, 0, -1, NULL); 1522 arec_size - mp_ofs, rl, 0, -1, NULL);
@@ -1446,16 +1526,19 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1446 goto undo_err_out; 1526 goto undo_err_out;
1447 } 1527 }
1448 /* Setup the in-memory attribute structure to be non-resident. */ 1528 /* Setup the in-memory attribute structure to be non-resident. */
1449 /*
1450 * FIXME: For now just clear all of these as we do not support them
1451 * when writing.
1452 */
1453 NInoClearSparse(ni);
1454 NInoClearEncrypted(ni);
1455 NInoClearCompressed(ni);
1456 ni->runlist.rl = rl; 1529 ni->runlist.rl = rl;
1457 write_lock_irqsave(&ni->size_lock, flags); 1530 write_lock_irqsave(&ni->size_lock, flags);
1458 ni->allocated_size = new_size; 1531 ni->allocated_size = new_size;
1532 if (NInoSparse(ni) || NInoCompressed(ni)) {
1533 ni->itype.compressed.size = ni->allocated_size;
1534 ni->itype.compressed.block_size = 1U <<
1535 (a->data.non_resident.compression_unit +
1536 vol->cluster_size_bits);
1537 ni->itype.compressed.block_size_bits =
1538 ffs(ni->itype.compressed.block_size) - 1;
1539 ni->itype.compressed.block_clusters = 1U <<
1540 a->data.non_resident.compression_unit;
1541 }
1459 write_unlock_irqrestore(&ni->size_lock, flags); 1542 write_unlock_irqrestore(&ni->size_lock, flags);
1460 /* 1543 /*
1461 * This needs to be last since the address space operations ->readpage 1544 * This needs to be last since the address space operations ->readpage
@@ -1603,6 +1686,12 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
1603 BUG_ON(cnt < 0); 1686 BUG_ON(cnt < 0);
1604 if (!cnt) 1687 if (!cnt)
1605 goto done; 1688 goto done;
1689 /*
1690 * FIXME: Compressed and encrypted attributes are not supported when
1691 * writing and we should never have gotten here for them.
1692 */
1693 BUG_ON(NInoCompressed(ni));
1694 BUG_ON(NInoEncrypted(ni));
1606 mapping = VFS_I(ni)->i_mapping; 1695 mapping = VFS_I(ni)->i_mapping;
1607 /* Work out the starting index and page offset. */ 1696 /* Work out the starting index and page offset. */
1608 idx = ofs >> PAGE_CACHE_SHIFT; 1697 idx = ofs >> PAGE_CACHE_SHIFT;
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index 0e4ac6d3c0e7..0618ed6fd7b3 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -99,6 +99,8 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol,
99 const ATTR_TYPE type); 99 const ATTR_TYPE type);
100 100
101extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); 101extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
102extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
103 const u32 new_size);
102 104
103extern int ntfs_attr_make_non_resident(ntfs_inode *ni); 105extern int ntfs_attr_make_non_resident(ntfs_inode *ni);
104 106
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 6d265cfd49aa..25d24106f893 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -539,7 +539,6 @@ int ntfs_read_compressed_block(struct page *page)
539 if (unlikely(!pages || !bhs)) { 539 if (unlikely(!pages || !bhs)) {
540 kfree(bhs); 540 kfree(bhs);
541 kfree(pages); 541 kfree(pages);
542 SetPageError(page);
543 unlock_page(page); 542 unlock_page(page);
544 ntfs_error(vol->sb, "Failed to allocate internal buffers."); 543 ntfs_error(vol->sb, "Failed to allocate internal buffers.");
545 return -ENOMEM; 544 return -ENOMEM;
@@ -871,9 +870,6 @@ lock_retry_remap:
871 for (; prev_cur_page < cur_page; prev_cur_page++) { 870 for (; prev_cur_page < cur_page; prev_cur_page++) {
872 page = pages[prev_cur_page]; 871 page = pages[prev_cur_page];
873 if (page) { 872 if (page) {
874 if (prev_cur_page == xpage &&
875 !xpage_done)
876 SetPageError(page);
877 flush_dcache_page(page); 873 flush_dcache_page(page);
878 kunmap(page); 874 kunmap(page);
879 unlock_page(page); 875 unlock_page(page);
@@ -904,8 +900,6 @@ lock_retry_remap:
904 "Terminating them with extreme " 900 "Terminating them with extreme "
905 "prejudice. Inode 0x%lx, page index " 901 "prejudice. Inode 0x%lx, page index "
906 "0x%lx.", ni->mft_no, page->index); 902 "0x%lx.", ni->mft_no, page->index);
907 if (cur_page == xpage && !xpage_done)
908 SetPageError(page);
909 flush_dcache_page(page); 903 flush_dcache_page(page);
910 kunmap(page); 904 kunmap(page);
911 unlock_page(page); 905 unlock_page(page);
@@ -953,8 +947,6 @@ err_out:
953 for (i = cur_page; i < max_page; i++) { 947 for (i = cur_page; i < max_page; i++) {
954 page = pages[i]; 948 page = pages[i];
955 if (page) { 949 if (page) {
956 if (i == xpage && !xpage_done)
957 SetPageError(page);
958 flush_dcache_page(page); 950 flush_dcache_page(page);
959 kunmap(page); 951 kunmap(page);
960 unlock_page(page); 952 unlock_page(page);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 46779471c542..795c3d1930f5 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1051,7 +1051,8 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
1051 ie->key.file_name.file_name_length, &name, 1051 ie->key.file_name.file_name_length, &name,
1052 NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); 1052 NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
1053 if (name_len <= 0) { 1053 if (name_len <= 0) {
1054 ntfs_debug("Skipping unrepresentable file."); 1054 ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
1055 (long long)MREF_LE(ie->data.dir.indexed_file));
1055 return 0; 1056 return 0;
1056 } 1057 }
1057 if (ie->key.file_name.file_attributes & 1058 if (ie->key.file_name.file_attributes &
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index e0f530ce6b99..be9fd1dd423d 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -94,6 +94,11 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
94 if (!datasync || !NInoNonResident(NTFS_I(vi))) 94 if (!datasync || !NInoNonResident(NTFS_I(vi)))
95 ret = ntfs_write_inode(vi, 1); 95 ret = ntfs_write_inode(vi, 1);
96 write_inode_now(vi, !datasync); 96 write_inode_now(vi, !datasync);
97 /*
98 * NOTE: If we were to use mapping->private_list (see ext2 and
99 * fs/buffer.c) for dirty blocks then we could optimize the below to be
100 * sync_mapping_buffers(vi->i_mapping).
101 */
97 err = sync_blockdev(vi->i_sb->s_bdev); 102 err = sync_blockdev(vi->i_sb->s_bdev);
98 if (unlikely(err && !ret)) 103 if (unlikely(err && !ret))
99 ret = err; 104 ret = err;
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 11fd5307d780..8f2d5727546f 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -205,6 +205,7 @@ int ntfs_index_lookup(const void *key, const int key_len,
205 &ie->key, key_len)) { 205 &ie->key, key_len)) {
206ir_done: 206ir_done:
207 ictx->is_in_root = TRUE; 207 ictx->is_in_root = TRUE;
208 ictx->ir = ir;
208 ictx->actx = actx; 209 ictx->actx = actx;
209 ictx->base_ni = base_ni; 210 ictx->base_ni = base_ni;
210 ictx->ia = NULL; 211 ictx->ia = NULL;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 886214a77f90..dc4bbe3acf5c 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1013,41 +1013,50 @@ skip_large_dir_stuff:
1013 } 1013 }
1014 a = ctx->attr; 1014 a = ctx->attr;
1015 /* Setup the state. */ 1015 /* Setup the state. */
1016 if (a->non_resident) { 1016 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1017 NInoSetNonResident(ni); 1017 if (a->flags & ATTR_COMPRESSION_MASK) {
1018 if (a->flags & (ATTR_COMPRESSION_MASK | 1018 NInoSetCompressed(ni);
1019 ATTR_IS_SPARSE)) { 1019 if (vol->cluster_size > 4096) {
1020 if (a->flags & ATTR_COMPRESSION_MASK) { 1020 ntfs_error(vi->i_sb, "Found "
1021 NInoSetCompressed(ni);
1022 if (vol->cluster_size > 4096) {
1023 ntfs_error(vi->i_sb, "Found "
1024 "compressed data but " 1021 "compressed data but "
1025 "compression is " 1022 "compression is "
1026 "disabled due to " 1023 "disabled due to "
1027 "cluster size (%i) > " 1024 "cluster size (%i) > "
1028 "4kiB.", 1025 "4kiB.",
1029 vol->cluster_size); 1026 vol->cluster_size);
1030 goto unm_err_out; 1027 goto unm_err_out;
1031 } 1028 }
1032 if ((a->flags & ATTR_COMPRESSION_MASK) 1029 if ((a->flags & ATTR_COMPRESSION_MASK)
1033 != ATTR_IS_COMPRESSED) { 1030 != ATTR_IS_COMPRESSED) {
1034 ntfs_error(vi->i_sb, "Found " 1031 ntfs_error(vi->i_sb, "Found unknown "
1035 "unknown compression " 1032 "compression method "
1036 "method or corrupt " 1033 "or corrupt file.");
1037 "file."); 1034 goto unm_err_out;
1038 goto unm_err_out;
1039 }
1040 } 1035 }
1041 if (a->flags & ATTR_IS_SPARSE) 1036 }
1042 NInoSetSparse(ni); 1037 if (a->flags & ATTR_IS_SPARSE)
1038 NInoSetSparse(ni);
1039 }
1040 if (a->flags & ATTR_IS_ENCRYPTED) {
1041 if (NInoCompressed(ni)) {
1042 ntfs_error(vi->i_sb, "Found encrypted and "
1043 "compressed data.");
1044 goto unm_err_out;
1045 }
1046 NInoSetEncrypted(ni);
1047 }
1048 if (a->non_resident) {
1049 NInoSetNonResident(ni);
1050 if (NInoCompressed(ni) || NInoSparse(ni)) {
1043 if (a->data.non_resident.compression_unit != 1051 if (a->data.non_resident.compression_unit !=
1044 4) { 1052 4) {
1045 ntfs_error(vi->i_sb, "Found " 1053 ntfs_error(vi->i_sb, "Found "
1046 "nonstandard compression unit " 1054 "nonstandard "
1047 "(%u instead of 4). Cannot " 1055 "compression unit (%u "
1048 "handle this.", 1056 "instead of 4). "
1049 a->data.non_resident. 1057 "Cannot handle this.",
1050 compression_unit); 1058 a->data.non_resident.
1059 compression_unit);
1051 err = -EOPNOTSUPP; 1060 err = -EOPNOTSUPP;
1052 goto unm_err_out; 1061 goto unm_err_out;
1053 } 1062 }
@@ -1065,14 +1074,6 @@ skip_large_dir_stuff:
1065 a->data.non_resident. 1074 a->data.non_resident.
1066 compressed_size); 1075 compressed_size);
1067 } 1076 }
1068 if (a->flags & ATTR_IS_ENCRYPTED) {
1069 if (a->flags & ATTR_COMPRESSION_MASK) {
1070 ntfs_error(vi->i_sb, "Found encrypted "
1071 "and compressed data.");
1072 goto unm_err_out;
1073 }
1074 NInoSetEncrypted(ni);
1075 }
1076 if (a->data.non_resident.lowest_vcn) { 1077 if (a->data.non_resident.lowest_vcn) {
1077 ntfs_error(vi->i_sb, "First extent of $DATA " 1078 ntfs_error(vi->i_sb, "First extent of $DATA "
1078 "attribute has non zero " 1079 "attribute has non zero "
@@ -1212,6 +1213,75 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1212 if (unlikely(err)) 1213 if (unlikely(err))
1213 goto unm_err_out; 1214 goto unm_err_out;
1214 a = ctx->attr; 1215 a = ctx->attr;
1216 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1217 if (a->flags & ATTR_COMPRESSION_MASK) {
1218 NInoSetCompressed(ni);
1219 if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
1220 ni->name_len)) {
1221 ntfs_error(vi->i_sb, "Found compressed "
1222 "non-data or named data "
1223 "attribute. Please report "
1224 "you saw this message to "
1225 "linux-ntfs-dev@lists."
1226 "sourceforge.net");
1227 goto unm_err_out;
1228 }
1229 if (vol->cluster_size > 4096) {
1230 ntfs_error(vi->i_sb, "Found compressed "
1231 "attribute but compression is "
1232 "disabled due to cluster size "
1233 "(%i) > 4kiB.",
1234 vol->cluster_size);
1235 goto unm_err_out;
1236 }
1237 if ((a->flags & ATTR_COMPRESSION_MASK) !=
1238 ATTR_IS_COMPRESSED) {
1239 ntfs_error(vi->i_sb, "Found unknown "
1240 "compression method.");
1241 goto unm_err_out;
1242 }
1243 }
1244 /*
1245 * The encryption flag set in an index root just means to
1246 * compress all files.
1247 */
1248 if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1249 ntfs_error(vi->i_sb, "Found mst protected attribute "
1250 "but the attribute is %s. Please "
1251 "report you saw this message to "
1252 "linux-ntfs-dev@lists.sourceforge.net",
1253 NInoCompressed(ni) ? "compressed" :
1254 "sparse");
1255 goto unm_err_out;
1256 }
1257 if (a->flags & ATTR_IS_SPARSE)
1258 NInoSetSparse(ni);
1259 }
1260 if (a->flags & ATTR_IS_ENCRYPTED) {
1261 if (NInoCompressed(ni)) {
1262 ntfs_error(vi->i_sb, "Found encrypted and compressed "
1263 "data.");
1264 goto unm_err_out;
1265 }
1266 /*
1267 * The encryption flag set in an index root just means to
1268 * encrypt all files.
1269 */
1270 if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1271 ntfs_error(vi->i_sb, "Found mst protected attribute "
1272 "but the attribute is encrypted. "
1273 "Please report you saw this message "
1274 "to linux-ntfs-dev@lists.sourceforge."
1275 "net");
1276 goto unm_err_out;
1277 }
1278 if (ni->type != AT_DATA) {
1279 ntfs_error(vi->i_sb, "Found encrypted non-data "
1280 "attribute.");
1281 goto unm_err_out;
1282 }
1283 NInoSetEncrypted(ni);
1284 }
1215 if (!a->non_resident) { 1285 if (!a->non_resident) {
1216 /* Ensure the attribute name is placed before the value. */ 1286 /* Ensure the attribute name is placed before the value. */
1217 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= 1287 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
@@ -1220,11 +1290,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1220 "the attribute value."); 1290 "the attribute value.");
1221 goto unm_err_out; 1291 goto unm_err_out;
1222 } 1292 }
1223 if (NInoMstProtected(ni) || a->flags) { 1293 if (NInoMstProtected(ni)) {
1224 ntfs_error(vi->i_sb, "Found mst protected attribute " 1294 ntfs_error(vi->i_sb, "Found mst protected attribute "
1225 "or attribute with non-zero flags but " 1295 "but the attribute is resident. "
1226 "the attribute is resident. Please " 1296 "Please report you saw this message to "
1227 "report you saw this message to "
1228 "linux-ntfs-dev@lists.sourceforge.net"); 1297 "linux-ntfs-dev@lists.sourceforge.net");
1229 goto unm_err_out; 1298 goto unm_err_out;
1230 } 1299 }
@@ -1250,50 +1319,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1250 "the mapping pairs array."); 1319 "the mapping pairs array.");
1251 goto unm_err_out; 1320 goto unm_err_out;
1252 } 1321 }
1253 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { 1322 if ((NInoCompressed(ni) || NInoSparse(ni)) &&
1254 if (a->flags & ATTR_COMPRESSION_MASK) { 1323 ni->type != AT_INDEX_ROOT) {
1255 NInoSetCompressed(ni);
1256 if ((ni->type != AT_DATA) || (ni->type ==
1257 AT_DATA && ni->name_len)) {
1258 ntfs_error(vi->i_sb, "Found compressed "
1259 "non-data or named "
1260 "data attribute. "
1261 "Please report you "
1262 "saw this message to "
1263 "linux-ntfs-dev@lists."
1264 "sourceforge.net");
1265 goto unm_err_out;
1266 }
1267 if (vol->cluster_size > 4096) {
1268 ntfs_error(vi->i_sb, "Found compressed "
1269 "attribute but "
1270 "compression is "
1271 "disabled due to "
1272 "cluster size (%i) > "
1273 "4kiB.",
1274 vol->cluster_size);
1275 goto unm_err_out;
1276 }
1277 if ((a->flags & ATTR_COMPRESSION_MASK) !=
1278 ATTR_IS_COMPRESSED) {
1279 ntfs_error(vi->i_sb, "Found unknown "
1280 "compression method.");
1281 goto unm_err_out;
1282 }
1283 }
1284 if (NInoMstProtected(ni)) {
1285 ntfs_error(vi->i_sb, "Found mst protected "
1286 "attribute but the attribute "
1287 "is %s. Please report you "
1288 "saw this message to "
1289 "linux-ntfs-dev@lists."
1290 "sourceforge.net",
1291 NInoCompressed(ni) ?
1292 "compressed" : "sparse");
1293 goto unm_err_out;
1294 }
1295 if (a->flags & ATTR_IS_SPARSE)
1296 NInoSetSparse(ni);
1297 if (a->data.non_resident.compression_unit != 4) { 1324 if (a->data.non_resident.compression_unit != 4) {
1298 ntfs_error(vi->i_sb, "Found nonstandard " 1325 ntfs_error(vi->i_sb, "Found nonstandard "
1299 "compression unit (%u instead " 1326 "compression unit (%u instead "
@@ -1313,23 +1340,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1313 ni->itype.compressed.size = sle64_to_cpu( 1340 ni->itype.compressed.size = sle64_to_cpu(
1314 a->data.non_resident.compressed_size); 1341 a->data.non_resident.compressed_size);
1315 } 1342 }
1316 if (a->flags & ATTR_IS_ENCRYPTED) {
1317 if (a->flags & ATTR_COMPRESSION_MASK) {
1318 ntfs_error(vi->i_sb, "Found encrypted and "
1319 "compressed data.");
1320 goto unm_err_out;
1321 }
1322 if (NInoMstProtected(ni)) {
1323 ntfs_error(vi->i_sb, "Found mst protected "
1324 "attribute but the attribute "
1325 "is encrypted. Please report "
1326 "you saw this message to "
1327 "linux-ntfs-dev@lists."
1328 "sourceforge.net");
1329 goto unm_err_out;
1330 }
1331 NInoSetEncrypted(ni);
1332 }
1333 if (a->data.non_resident.lowest_vcn) { 1343 if (a->data.non_resident.lowest_vcn) {
1334 ntfs_error(vi->i_sb, "First extent of attribute has " 1344 ntfs_error(vi->i_sb, "First extent of attribute has "
1335 "non-zero lowest_vcn."); 1345 "non-zero lowest_vcn.");
@@ -1348,12 +1358,12 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1348 vi->i_mapping->a_ops = &ntfs_mst_aops; 1358 vi->i_mapping->a_ops = &ntfs_mst_aops;
1349 else 1359 else
1350 vi->i_mapping->a_ops = &ntfs_aops; 1360 vi->i_mapping->a_ops = &ntfs_aops;
1351 if (NInoCompressed(ni) || NInoSparse(ni)) 1361 if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
1352 vi->i_blocks = ni->itype.compressed.size >> 9; 1362 vi->i_blocks = ni->itype.compressed.size >> 9;
1353 else 1363 else
1354 vi->i_blocks = ni->allocated_size >> 9; 1364 vi->i_blocks = ni->allocated_size >> 9;
1355 /* 1365 /*
1356 * Make sure the base inode doesn't go away and attach it to the 1366 * Make sure the base inode does not go away and attach it to the
1357 * attribute inode. 1367 * attribute inode.
1358 */ 1368 */
1359 igrab(base_vi); 1369 igrab(base_vi);
@@ -1480,7 +1490,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1480 "after the attribute value."); 1490 "after the attribute value.");
1481 goto unm_err_out; 1491 goto unm_err_out;
1482 } 1492 }
1483 /* Compressed/encrypted/sparse index root is not allowed. */ 1493 /*
1494 * Compressed/encrypted/sparse index root is not allowed, except for
1495 * directories of course but those are not dealt with here.
1496 */
1484 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | 1497 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
1485 ATTR_IS_SPARSE)) { 1498 ATTR_IS_SPARSE)) {
1486 ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index " 1499 ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
@@ -2430,16 +2443,18 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
2430 * We skipped the truncate but must still update 2443 * We skipped the truncate but must still update
2431 * timestamps. 2444 * timestamps.
2432 */ 2445 */
2433 ia_valid |= ATTR_MTIME|ATTR_CTIME; 2446 ia_valid |= ATTR_MTIME | ATTR_CTIME;
2434 } 2447 }
2435 } 2448 }
2436
2437 if (ia_valid & ATTR_ATIME) 2449 if (ia_valid & ATTR_ATIME)
2438 vi->i_atime = attr->ia_atime; 2450 vi->i_atime = timespec_trunc(attr->ia_atime,
2451 vi->i_sb->s_time_gran);
2439 if (ia_valid & ATTR_MTIME) 2452 if (ia_valid & ATTR_MTIME)
2440 vi->i_mtime = attr->ia_mtime; 2453 vi->i_mtime = timespec_trunc(attr->ia_mtime,
2454 vi->i_sb->s_time_gran);
2441 if (ia_valid & ATTR_CTIME) 2455 if (ia_valid & ATTR_CTIME)
2442 vi->i_ctime = attr->ia_ctime; 2456 vi->i_ctime = timespec_trunc(attr->ia_ctime,
2457 vi->i_sb->s_time_gran);
2443 mark_inode_dirty(vi); 2458 mark_inode_dirty(vi);
2444out: 2459out:
2445 return err; 2460 return err;
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index a4bc07616e5d..7b5934290685 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -54,6 +54,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
54 int ret = 0; 54 int ret = 0;
55 55
56 ntfs_debug("Entering."); 56 ntfs_debug("Entering.");
57 if (!rl)
58 return 0;
57 for (; rl->length; rl++) { 59 for (; rl->length; rl++) {
58 int err; 60 int err;
59 61
@@ -163,17 +165,9 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
163 BUG_ON(zone < FIRST_ZONE); 165 BUG_ON(zone < FIRST_ZONE);
164 BUG_ON(zone > LAST_ZONE); 166 BUG_ON(zone > LAST_ZONE);
165 167
166 /* Return empty runlist if @count == 0 */ 168 /* Return NULL if @count is zero. */
167 // FIXME: Do we want to just return NULL instead? (AIA) 169 if (!count)
168 if (!count) { 170 return NULL;
169 rl = ntfs_malloc_nofs(PAGE_SIZE);
170 if (!rl)
171 return ERR_PTR(-ENOMEM);
172 rl[0].vcn = start_vcn;
173 rl[0].lcn = LCN_RL_NOT_MAPPED;
174 rl[0].length = 0;
175 return rl;
176 }
177 /* Take the lcnbmp lock for writing. */ 171 /* Take the lcnbmp lock for writing. */
178 down_write(&vol->lcnbmp_lock); 172 down_write(&vol->lcnbmp_lock);
179 /* 173 /*
@@ -788,7 +782,8 @@ out:
788 * @vi: vfs inode whose runlist describes the clusters to free 782 * @vi: vfs inode whose runlist describes the clusters to free
789 * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters 783 * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters
790 * @count: number of clusters to free or -1 for all clusters 784 * @count: number of clusters to free or -1 for all clusters
791 * @is_rollback: if TRUE this is a rollback operation 785 * @write_locked: true if the runlist is locked for writing
786 * @is_rollback: true if this is a rollback operation
792 * 787 *
793 * Free @count clusters starting at the cluster @start_vcn in the runlist 788 * Free @count clusters starting at the cluster @start_vcn in the runlist
794 * described by the vfs inode @vi. 789 * described by the vfs inode @vi.
@@ -806,17 +801,17 @@ out:
806 * Return the number of deallocated clusters (not counting sparse ones) on 801 * Return the number of deallocated clusters (not counting sparse ones) on
807 * success and -errno on error. 802 * success and -errno on error.
808 * 803 *
809 * Locking: - The runlist described by @vi must be unlocked on entry and is 804 * Locking: - The runlist described by @vi must be locked on entry and is
810 * unlocked on return. 805 * locked on return. Note if the runlist is locked for reading the
811 * - This function takes the runlist lock of @vi for reading and 806 * lock may be dropped and reacquired. Note the runlist may be
812 * sometimes for writing and sometimes modifies the runlist. 807 * modified when needed runlist fragments need to be mapped.
813 * - The volume lcn bitmap must be unlocked on entry and is unlocked 808 * - The volume lcn bitmap must be unlocked on entry and is unlocked
814 * on return. 809 * on return.
815 * - This function takes the volume lcn bitmap lock for writing and 810 * - This function takes the volume lcn bitmap lock for writing and
816 * modifies the bitmap contents. 811 * modifies the bitmap contents.
817 */ 812 */
818s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, 813s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
819 const BOOL is_rollback) 814 const BOOL write_locked, const BOOL is_rollback)
820{ 815{
821 s64 delta, to_free, total_freed, real_freed; 816 s64 delta, to_free, total_freed, real_freed;
822 ntfs_inode *ni; 817 ntfs_inode *ni;
@@ -848,8 +843,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
848 843
849 total_freed = real_freed = 0; 844 total_freed = real_freed = 0;
850 845
851 down_read(&ni->runlist.lock); 846 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, write_locked);
852 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE);
853 if (IS_ERR(rl)) { 847 if (IS_ERR(rl)) {
854 if (!is_rollback) 848 if (!is_rollback)
855 ntfs_error(vol->sb, "Failed to find first runlist " 849 ntfs_error(vol->sb, "Failed to find first runlist "
@@ -903,7 +897,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
903 897
904 /* Attempt to map runlist. */ 898 /* Attempt to map runlist. */
905 vcn = rl->vcn; 899 vcn = rl->vcn;
906 rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE); 900 rl = ntfs_attr_find_vcn_nolock(ni, vcn, write_locked);
907 if (IS_ERR(rl)) { 901 if (IS_ERR(rl)) {
908 err = PTR_ERR(rl); 902 err = PTR_ERR(rl);
909 if (!is_rollback) 903 if (!is_rollback)
@@ -950,7 +944,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
950 /* Update the total done clusters. */ 944 /* Update the total done clusters. */
951 total_freed += to_free; 945 total_freed += to_free;
952 } 946 }
953 up_read(&ni->runlist.lock);
954 if (likely(!is_rollback)) 947 if (likely(!is_rollback))
955 up_write(&vol->lcnbmp_lock); 948 up_write(&vol->lcnbmp_lock);
956 949
@@ -960,7 +953,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
960 ntfs_debug("Done."); 953 ntfs_debug("Done.");
961 return real_freed; 954 return real_freed;
962err_out: 955err_out:
963 up_read(&ni->runlist.lock);
964 if (is_rollback) 956 if (is_rollback)
965 return err; 957 return err;
966 /* If no real clusters were freed, no need to rollback. */ 958 /* If no real clusters were freed, no need to rollback. */
@@ -973,7 +965,8 @@ err_out:
973 * If rollback fails, set the volume errors flag, emit an error 965 * If rollback fails, set the volume errors flag, emit an error
974 * message, and return the error code. 966 * message, and return the error code.
975 */ 967 */
976 delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE); 968 delta = __ntfs_cluster_free(vi, start_vcn, total_freed, write_locked,
969 TRUE);
977 if (delta < 0) { 970 if (delta < 0) {
978 ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " 971 ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving "
979 "inconsistent metadata! Unmount and run " 972 "inconsistent metadata! Unmount and run "
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h
index 4cac1c024af6..e4d7fb98d685 100644
--- a/fs/ntfs/lcnalloc.h
+++ b/fs/ntfs/lcnalloc.h
@@ -43,13 +43,14 @@ extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol,
43 const NTFS_CLUSTER_ALLOCATION_ZONES zone); 43 const NTFS_CLUSTER_ALLOCATION_ZONES zone);
44 44
45extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, 45extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
46 s64 count, const BOOL is_rollback); 46 s64 count, const BOOL write_locked, const BOOL is_rollback);
47 47
48/** 48/**
49 * ntfs_cluster_free - free clusters on an ntfs volume 49 * ntfs_cluster_free - free clusters on an ntfs volume
50 * @vi: vfs inode whose runlist describes the clusters to free 50 * @vi: vfs inode whose runlist describes the clusters to free
51 * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters 51 * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters
52 * @count: number of clusters to free or -1 for all clusters 52 * @count: number of clusters to free or -1 for all clusters
53 * @write_locked: true if the runlist is locked for writing
53 * 54 *
54 * Free @count clusters starting at the cluster @start_vcn in the runlist 55 * Free @count clusters starting at the cluster @start_vcn in the runlist
55 * described by the vfs inode @vi. 56 * described by the vfs inode @vi.
@@ -64,19 +65,19 @@ extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
64 * Return the number of deallocated clusters (not counting sparse ones) on 65 * Return the number of deallocated clusters (not counting sparse ones) on
65 * success and -errno on error. 66 * success and -errno on error.
66 * 67 *
67 * Locking: - The runlist described by @vi must be unlocked on entry and is 68 * Locking: - The runlist described by @vi must be locked on entry and is
68 * unlocked on return. 69 * locked on return. Note if the runlist is locked for reading the
69 * - This function takes the runlist lock of @vi for reading and 70 * lock may be dropped and reacquired. Note the runlist may be
70 * sometimes for writing and sometimes modifies the runlist. 71 * modified when needed runlist fragments need to be mapped.
71 * - The volume lcn bitmap must be unlocked on entry and is unlocked 72 * - The volume lcn bitmap must be unlocked on entry and is unlocked
72 * on return. 73 * on return.
73 * - This function takes the volume lcn bitmap lock for writing and 74 * - This function takes the volume lcn bitmap lock for writing and
74 * modifies the bitmap contents. 75 * modifies the bitmap contents.
75 */ 76 */
76static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn, 77static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
77 s64 count) 78 s64 count, const BOOL write_locked)
78{ 79{
79 return __ntfs_cluster_free(vi, start_vcn, count, FALSE); 80 return __ntfs_cluster_free(vi, start_vcn, count, write_locked, FALSE);
80} 81}
81 82
82extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, 83extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
@@ -93,8 +94,10 @@ extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
93 * 94 *
94 * Return 0 on success and -errno on error. 95 * Return 0 on success and -errno on error.
95 * 96 *
96 * Locking: This function takes the volume lcn bitmap lock for writing and 97 * Locking: - This function takes the volume lcn bitmap lock for writing and
97 * modifies the bitmap contents. 98 * modifies the bitmap contents.
99 * - The caller must have locked the runlist @rl for reading or
100 * writing.
98 */ 101 */
99static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol, 102static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol,
100 const runlist_element *rl) 103 const runlist_element *rl)
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 8edb8e20fb08..0173e95500d9 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -121,7 +121,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
121 */ 121 */
122 if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) { 122 if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) {
123 ntfs_error(vi->i_sb, "$LogFile restart page is not modified " 123 ntfs_error(vi->i_sb, "$LogFile restart page is not modified "
124 "chkdsk but a chkdsk LSN is specified."); 124 "by chkdsk but a chkdsk LSN is specified.");
125 return FALSE; 125 return FALSE;
126 } 126 }
127 ntfs_debug("Done."); 127 ntfs_debug("Done.");
@@ -312,10 +312,12 @@ err_out:
312 * @vi: $LogFile inode to which the restart page belongs 312 * @vi: $LogFile inode to which the restart page belongs
313 * @rp: restart page to check 313 * @rp: restart page to check
314 * @pos: position in @vi at which the restart page resides 314 * @pos: position in @vi at which the restart page resides
315 * @wrp: copy of the multi sector transfer deprotected restart page 315 * @wrp: [OUT] copy of the multi sector transfer deprotected restart page
316 * @lsn: [OUT] set to the current logfile lsn on success
316 * 317 *
317 * Check the restart page @rp for consistency and return TRUE if it is 318 * Check the restart page @rp for consistency and return 0 if it is consistent
318 * consistent and FALSE otherwise. 319 * and -errno otherwise. The restart page may have been modified by chkdsk in
320 * which case its magic is CHKD instead of RSTR.
319 * 321 *
320 * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not 322 * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not
321 * require the full restart page. 323 * require the full restart page.
@@ -323,25 +325,33 @@ err_out:
323 * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a 325 * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a
324 * copy of the complete multi sector transfer deprotected page. On failure, 326 * copy of the complete multi sector transfer deprotected page. On failure,
325 * *@wrp is undefined. 327 * *@wrp is undefined.
328 *
329 * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current
330 * logfile lsn according to this restart page. On failure, *@lsn is undefined.
331 *
332 * The following error codes are defined:
333 * -EINVAL - The restart page is inconsistent.
334 * -ENOMEM - Not enough memory to load the restart page.
335 * -EIO - Failed to reading from $LogFile.
326 */ 336 */
327static BOOL ntfs_check_and_load_restart_page(struct inode *vi, 337static int ntfs_check_and_load_restart_page(struct inode *vi,
328 RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp) 338 RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp,
339 LSN *lsn)
329{ 340{
330 RESTART_AREA *ra; 341 RESTART_AREA *ra;
331 RESTART_PAGE_HEADER *trp; 342 RESTART_PAGE_HEADER *trp;
332 int size; 343 int size, err;
333 BOOL ret;
334 344
335 ntfs_debug("Entering."); 345 ntfs_debug("Entering.");
336 /* Check the restart page header for consistency. */ 346 /* Check the restart page header for consistency. */
337 if (!ntfs_check_restart_page_header(vi, rp, pos)) { 347 if (!ntfs_check_restart_page_header(vi, rp, pos)) {
338 /* Error output already done inside the function. */ 348 /* Error output already done inside the function. */
339 return FALSE; 349 return -EINVAL;
340 } 350 }
341 /* Check the restart area for consistency. */ 351 /* Check the restart area for consistency. */
342 if (!ntfs_check_restart_area(vi, rp)) { 352 if (!ntfs_check_restart_area(vi, rp)) {
343 /* Error output already done inside the function. */ 353 /* Error output already done inside the function. */
344 return FALSE; 354 return -EINVAL;
345 } 355 }
346 ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); 356 ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
347 /* 357 /*
@@ -352,7 +362,7 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
352 if (!trp) { 362 if (!trp) {
353 ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile " 363 ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile "
354 "restart page buffer."); 364 "restart page buffer.");
355 return FALSE; 365 return -ENOMEM;
356 } 366 }
357 /* 367 /*
358 * Read the whole of the restart page into the buffer. If it fits 368 * Read the whole of the restart page into the buffer. If it fits
@@ -379,6 +389,9 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
379 if (IS_ERR(page)) { 389 if (IS_ERR(page)) {
380 ntfs_error(vi->i_sb, "Error mapping $LogFile " 390 ntfs_error(vi->i_sb, "Error mapping $LogFile "
381 "page (index %lu).", idx); 391 "page (index %lu).", idx);
392 err = PTR_ERR(page);
393 if (err != -EIO && err != -ENOMEM)
394 err = -EIO;
382 goto err_out; 395 goto err_out;
383 } 396 }
384 size = min_t(int, to_read, PAGE_CACHE_SIZE); 397 size = min_t(int, to_read, PAGE_CACHE_SIZE);
@@ -392,29 +405,57 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
392 /* Perform the multi sector transfer deprotection on the buffer. */ 405 /* Perform the multi sector transfer deprotection on the buffer. */
393 if (post_read_mst_fixup((NTFS_RECORD*)trp, 406 if (post_read_mst_fixup((NTFS_RECORD*)trp,
394 le32_to_cpu(rp->system_page_size))) { 407 le32_to_cpu(rp->system_page_size))) {
395 ntfs_error(vi->i_sb, "Multi sector transfer error detected in " 408 /*
396 "$LogFile restart page."); 409 * A multi sector tranfer error was detected. We only need to
397 goto err_out; 410 * abort if the restart page contents exceed the multi sector
411 * transfer fixup of the first sector.
412 */
413 if (le16_to_cpu(rp->restart_area_offset) +
414 le16_to_cpu(ra->restart_area_length) >
415 NTFS_BLOCK_SIZE - sizeof(u16)) {
416 ntfs_error(vi->i_sb, "Multi sector transfer error "
417 "detected in $LogFile restart page.");
418 err = -EINVAL;
419 goto err_out;
420 }
421 }
422 /*
423 * If the restart page is modified by chkdsk or there are no active
424 * logfile clients, the logfile is consistent. Otherwise, need to
425 * check the log client records for consistency, too.
426 */
427 err = 0;
428 if (ntfs_is_rstr_record(rp->magic) &&
429 ra->client_in_use_list != LOGFILE_NO_CLIENT) {
430 if (!ntfs_check_log_client_array(vi, trp)) {
431 err = -EINVAL;
432 goto err_out;
433 }
434 }
435 if (lsn) {
436 if (ntfs_is_rstr_record(rp->magic))
437 *lsn = sle64_to_cpu(ra->current_lsn);
438 else /* if (ntfs_is_chkd_record(rp->magic)) */
439 *lsn = sle64_to_cpu(rp->chkdsk_lsn);
398 } 440 }
399 /* Check the log client records for consistency. */
400 ret = ntfs_check_log_client_array(vi, trp);
401 if (ret && wrp)
402 *wrp = trp;
403 else
404 ntfs_free(trp);
405 ntfs_debug("Done."); 441 ntfs_debug("Done.");
406 return ret; 442 if (wrp)
443 *wrp = trp;
444 else {
407err_out: 445err_out:
408 ntfs_free(trp); 446 ntfs_free(trp);
409 return FALSE; 447 }
448 return err;
410} 449}
411 450
412/** 451/**
413 * ntfs_check_logfile - check the journal for consistency 452 * ntfs_check_logfile - check the journal for consistency
414 * @log_vi: struct inode of loaded journal $LogFile to check 453 * @log_vi: struct inode of loaded journal $LogFile to check
454 * @rp: [OUT] on success this is a copy of the current restart page
415 * 455 *
416 * Check the $LogFile journal for consistency and return TRUE if it is 456 * Check the $LogFile journal for consistency and return TRUE if it is
417 * consistent and FALSE if not. 457 * consistent and FALSE if not. On success, the current restart page is
458 * returned in *@rp. Caller must call ntfs_free(*@rp) when finished with it.
418 * 459 *
419 * At present we only check the two restart pages and ignore the log record 460 * At present we only check the two restart pages and ignore the log record
420 * pages. 461 * pages.
@@ -424,19 +465,18 @@ err_out:
424 * if the $LogFile was created on a system with a different page size to ours 465 * if the $LogFile was created on a system with a different page size to ours
425 * yet and mst deprotection would fail if our page size is smaller. 466 * yet and mst deprotection would fail if our page size is smaller.
426 */ 467 */
427BOOL ntfs_check_logfile(struct inode *log_vi) 468BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
428{ 469{
429 s64 size, pos, rstr1_pos, rstr2_pos; 470 s64 size, pos;
471 LSN rstr1_lsn, rstr2_lsn;
430 ntfs_volume *vol = NTFS_SB(log_vi->i_sb); 472 ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
431 struct address_space *mapping = log_vi->i_mapping; 473 struct address_space *mapping = log_vi->i_mapping;
432 struct page *page = NULL; 474 struct page *page = NULL;
433 u8 *kaddr = NULL; 475 u8 *kaddr = NULL;
434 RESTART_PAGE_HEADER *rstr1_ph = NULL; 476 RESTART_PAGE_HEADER *rstr1_ph = NULL;
435 RESTART_PAGE_HEADER *rstr2_ph = NULL; 477 RESTART_PAGE_HEADER *rstr2_ph = NULL;
436 int log_page_size, log_page_mask, ofs; 478 int log_page_size, log_page_mask, err;
437 BOOL logfile_is_empty = TRUE; 479 BOOL logfile_is_empty = TRUE;
438 BOOL rstr1_found = FALSE;
439 BOOL rstr2_found = FALSE;
440 u8 log_page_bits; 480 u8 log_page_bits;
441 481
442 ntfs_debug("Entering."); 482 ntfs_debug("Entering.");
@@ -491,7 +531,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
491 if (IS_ERR(page)) { 531 if (IS_ERR(page)) {
492 ntfs_error(vol->sb, "Error mapping $LogFile " 532 ntfs_error(vol->sb, "Error mapping $LogFile "
493 "page (index %lu).", idx); 533 "page (index %lu).", idx);
494 return FALSE; 534 goto err_out;
495 } 535 }
496 } 536 }
497 kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK); 537 kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK);
@@ -510,99 +550,95 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
510 */ 550 */
511 if (ntfs_is_rcrd_recordp((le32*)kaddr)) 551 if (ntfs_is_rcrd_recordp((le32*)kaddr))
512 break; 552 break;
513 /* 553 /* If not a (modified by chkdsk) restart page, continue. */
514 * A modified by chkdsk restart page means we cannot handle 554 if (!ntfs_is_rstr_recordp((le32*)kaddr) &&
515 * this log file. 555 !ntfs_is_chkd_recordp((le32*)kaddr)) {
516 */
517 if (ntfs_is_chkd_recordp((le32*)kaddr)) {
518 ntfs_error(vol->sb, "$LogFile has been modified by "
519 "chkdsk. Mount this volume in "
520 "Windows.");
521 goto err_out;
522 }
523 /* If not a restart page, continue. */
524 if (!ntfs_is_rstr_recordp((le32*)kaddr)) {
525 /* Skip to the minimum page size for the next one. */
526 if (!pos) 556 if (!pos)
527 pos = NTFS_BLOCK_SIZE >> 1; 557 pos = NTFS_BLOCK_SIZE >> 1;
528 continue; 558 continue;
529 } 559 }
530 /* We now know we have a restart page. */
531 if (!pos) {
532 rstr1_found = TRUE;
533 rstr1_pos = pos;
534 } else {
535 if (rstr2_found) {
536 ntfs_error(vol->sb, "Found more than two "
537 "restart pages in $LogFile.");
538 goto err_out;
539 }
540 rstr2_found = TRUE;
541 rstr2_pos = pos;
542 }
543 /* 560 /*
544 * Check the restart page for consistency and get a copy of the 561 * Check the (modified by chkdsk) restart page for consistency
545 * complete multi sector transfer deprotected restart page. 562 * and get a copy of the complete multi sector transfer
563 * deprotected restart page.
546 */ 564 */
547 if (!ntfs_check_and_load_restart_page(log_vi, 565 err = ntfs_check_and_load_restart_page(log_vi,
548 (RESTART_PAGE_HEADER*)kaddr, pos, 566 (RESTART_PAGE_HEADER*)kaddr, pos,
549 !pos ? &rstr1_ph : &rstr2_ph)) { 567 !rstr1_ph ? &rstr1_ph : &rstr2_ph,
550 /* Error output already done inside the function. */ 568 !rstr1_ph ? &rstr1_lsn : &rstr2_lsn);
551 goto err_out; 569 if (!err) {
570 /*
571 * If we have now found the first (modified by chkdsk)
572 * restart page, continue looking for the second one.
573 */
574 if (!pos) {
575 pos = NTFS_BLOCK_SIZE >> 1;
576 continue;
577 }
578 /*
579 * We have now found the second (modified by chkdsk)
580 * restart page, so we can stop looking.
581 */
582 break;
552 } 583 }
553 /* 584 /*
554 * We have a valid restart page. The next one must be after 585 * Error output already done inside the function. Note, we do
555 * a whole system page size as specified by the valid restart 586 * not abort if the restart page was invalid as we might still
556 * page. 587 * find a valid one further in the file.
557 */ 588 */
589 if (err != -EINVAL) {
590 ntfs_unmap_page(page);
591 goto err_out;
592 }
593 /* Continue looking. */
558 if (!pos) 594 if (!pos)
559 pos = le32_to_cpu(rstr1_ph->system_page_size) >> 1; 595 pos = NTFS_BLOCK_SIZE >> 1;
560 } 596 }
561 if (page) { 597 if (page)
562 ntfs_unmap_page(page); 598 ntfs_unmap_page(page);
563 page = NULL;
564 }
565 if (logfile_is_empty) { 599 if (logfile_is_empty) {
566 NVolSetLogFileEmpty(vol); 600 NVolSetLogFileEmpty(vol);
567is_empty: 601is_empty:
568 ntfs_debug("Done. ($LogFile is empty.)"); 602 ntfs_debug("Done. ($LogFile is empty.)");
569 return TRUE; 603 return TRUE;
570 } 604 }
571 if (!rstr1_found || !rstr2_found) { 605 if (!rstr1_ph) {
572 ntfs_error(vol->sb, "Did not find two restart pages in " 606 BUG_ON(rstr2_ph);
573 "$LogFile."); 607 ntfs_error(vol->sb, "Did not find any restart pages in "
574 goto err_out; 608 "$LogFile and it was not empty.");
609 return FALSE;
610 }
611 /* If both restart pages were found, use the more recent one. */
612 if (rstr2_ph) {
613 /*
614 * If the second restart area is more recent, switch to it.
615 * Otherwise just throw it away.
616 */
617 if (rstr2_lsn > rstr1_lsn) {
618 ntfs_free(rstr1_ph);
619 rstr1_ph = rstr2_ph;
620 /* rstr1_lsn = rstr2_lsn; */
621 } else
622 ntfs_free(rstr2_ph);
623 rstr2_ph = NULL;
575 } 624 }
576 /*
577 * The two restart areas must be identical except for the update
578 * sequence number.
579 */
580 ofs = le16_to_cpu(rstr1_ph->usa_ofs);
581 if (memcmp(rstr1_ph, rstr2_ph, ofs) || (ofs += sizeof(u16),
582 memcmp((u8*)rstr1_ph + ofs, (u8*)rstr2_ph + ofs,
583 le32_to_cpu(rstr1_ph->system_page_size) - ofs))) {
584 ntfs_error(vol->sb, "The two restart pages in $LogFile do not "
585 "match.");
586 goto err_out;
587 }
588 ntfs_free(rstr1_ph);
589 ntfs_free(rstr2_ph);
590 /* All consistency checks passed. */ 625 /* All consistency checks passed. */
626 if (rp)
627 *rp = rstr1_ph;
628 else
629 ntfs_free(rstr1_ph);
591 ntfs_debug("Done."); 630 ntfs_debug("Done.");
592 return TRUE; 631 return TRUE;
593err_out: 632err_out:
594 if (page)
595 ntfs_unmap_page(page);
596 if (rstr1_ph) 633 if (rstr1_ph)
597 ntfs_free(rstr1_ph); 634 ntfs_free(rstr1_ph);
598 if (rstr2_ph)
599 ntfs_free(rstr2_ph);
600 return FALSE; 635 return FALSE;
601} 636}
602 637
603/** 638/**
604 * ntfs_is_logfile_clean - check in the journal if the volume is clean 639 * ntfs_is_logfile_clean - check in the journal if the volume is clean
605 * @log_vi: struct inode of loaded journal $LogFile to check 640 * @log_vi: struct inode of loaded journal $LogFile to check
641 * @rp: copy of the current restart page
606 * 642 *
607 * Analyze the $LogFile journal and return TRUE if it indicates the volume was 643 * Analyze the $LogFile journal and return TRUE if it indicates the volume was
608 * shutdown cleanly and FALSE if not. 644 * shutdown cleanly and FALSE if not.
@@ -619,11 +655,9 @@ err_out:
619 * is empty this function requires that NVolLogFileEmpty() is true otherwise an 655 * is empty this function requires that NVolLogFileEmpty() is true otherwise an
620 * empty volume will be reported as dirty. 656 * empty volume will be reported as dirty.
621 */ 657 */
622BOOL ntfs_is_logfile_clean(struct inode *log_vi) 658BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp)
623{ 659{
624 ntfs_volume *vol = NTFS_SB(log_vi->i_sb); 660 ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
625 struct page *page;
626 RESTART_PAGE_HEADER *rp;
627 RESTART_AREA *ra; 661 RESTART_AREA *ra;
628 662
629 ntfs_debug("Entering."); 663 ntfs_debug("Entering.");
@@ -632,24 +666,15 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi)
632 ntfs_debug("Done. ($LogFile is empty.)"); 666 ntfs_debug("Done. ($LogFile is empty.)");
633 return TRUE; 667 return TRUE;
634 } 668 }
635 /* 669 BUG_ON(!rp);
636 * Read the first restart page. It will be possibly incomplete and 670 if (!ntfs_is_rstr_record(rp->magic) &&
637 * will not be multi sector transfer deprotected but we only need the 671 !ntfs_is_chkd_record(rp->magic)) {
638 * first NTFS_BLOCK_SIZE bytes so it does not matter. 672 ntfs_error(vol->sb, "Restart page buffer is invalid. This is "
639 */ 673 "probably a bug in that the $LogFile should "
640 page = ntfs_map_page(log_vi->i_mapping, 0); 674 "have been consistency checked before calling "
641 if (IS_ERR(page)) { 675 "this function.");
642 ntfs_error(vol->sb, "Error mapping $LogFile page (index 0).");
643 return FALSE; 676 return FALSE;
644 } 677 }
645 rp = (RESTART_PAGE_HEADER*)page_address(page);
646 if (!ntfs_is_rstr_record(rp->magic)) {
647 ntfs_error(vol->sb, "No restart page found at offset zero in "
648 "$LogFile. This is probably a bug in that "
649 "the $LogFile should have been consistency "
650 "checked before calling this function.");
651 goto err_out;
652 }
653 ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); 678 ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
654 /* 679 /*
655 * If the $LogFile has active clients, i.e. it is open, and we do not 680 * If the $LogFile has active clients, i.e. it is open, and we do not
@@ -659,15 +684,11 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi)
659 if (ra->client_in_use_list != LOGFILE_NO_CLIENT && 684 if (ra->client_in_use_list != LOGFILE_NO_CLIENT &&
660 !(ra->flags & RESTART_VOLUME_IS_CLEAN)) { 685 !(ra->flags & RESTART_VOLUME_IS_CLEAN)) {
661 ntfs_debug("Done. $LogFile indicates a dirty shutdown."); 686 ntfs_debug("Done. $LogFile indicates a dirty shutdown.");
662 goto err_out; 687 return FALSE;
663 } 688 }
664 ntfs_unmap_page(page);
665 /* $LogFile indicates a clean shutdown. */ 689 /* $LogFile indicates a clean shutdown. */
666 ntfs_debug("Done. $LogFile indicates a clean shutdown."); 690 ntfs_debug("Done. $LogFile indicates a clean shutdown.");
667 return TRUE; 691 return TRUE;
668err_out:
669 ntfs_unmap_page(page);
670 return FALSE;
671} 692}
672 693
673/** 694/**
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index 4ee4378de061..42388f95ea6d 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -2,7 +2,7 @@
2 * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of 2 * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of
3 * the Linux-NTFS project. 3 * the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2000-2004 Anton Altaparmakov 5 * Copyright (c) 2000-2005 Anton Altaparmakov
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as published 8 * modify it under the terms of the GNU General Public License as published
@@ -296,9 +296,11 @@ typedef struct {
296/* sizeof() = 160 (0xa0) bytes */ 296/* sizeof() = 160 (0xa0) bytes */
297} __attribute__ ((__packed__)) LOG_CLIENT_RECORD; 297} __attribute__ ((__packed__)) LOG_CLIENT_RECORD;
298 298
299extern BOOL ntfs_check_logfile(struct inode *log_vi); 299extern BOOL ntfs_check_logfile(struct inode *log_vi,
300 RESTART_PAGE_HEADER **rp);
300 301
301extern BOOL ntfs_is_logfile_clean(struct inode *log_vi); 302extern BOOL ntfs_is_logfile_clean(struct inode *log_vi,
303 const RESTART_PAGE_HEADER *rp);
302 304
303extern BOOL ntfs_empty_logfile(struct inode *log_vi); 305extern BOOL ntfs_empty_logfile(struct inode *log_vi);
304 306
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index fac5944df6d8..9994e019a3cf 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -27,27 +27,63 @@
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28 28
29/** 29/**
30 * ntfs_malloc_nofs - allocate memory in multiples of pages 30 * __ntfs_malloc - allocate memory in multiples of pages
31 * @size number of bytes to allocate 31 * @size: number of bytes to allocate
32 * @gfp_mask: extra flags for the allocator
33 *
34 * Internal function. You probably want ntfs_malloc_nofs()...
32 * 35 *
33 * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and 36 * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
34 * returns a pointer to the allocated memory. 37 * returns a pointer to the allocated memory.
35 * 38 *
36 * If there was insufficient memory to complete the request, return NULL. 39 * If there was insufficient memory to complete the request, return NULL.
40 * Depending on @gfp_mask the allocation may be guaranteed to succeed.
37 */ 41 */
38static inline void *ntfs_malloc_nofs(unsigned long size) 42static inline void *__ntfs_malloc(unsigned long size,
43 unsigned int __nocast gfp_mask)
39{ 44{
40 if (likely(size <= PAGE_SIZE)) { 45 if (likely(size <= PAGE_SIZE)) {
41 BUG_ON(!size); 46 BUG_ON(!size);
42 /* kmalloc() has per-CPU caches so is faster for now. */ 47 /* kmalloc() has per-CPU caches so is faster for now. */
43 return kmalloc(PAGE_SIZE, GFP_NOFS); 48 return kmalloc(PAGE_SIZE, gfp_mask);
44 /* return (void *)__get_free_page(GFP_NOFS | __GFP_HIGHMEM); */ 49 /* return (void *)__get_free_page(gfp_mask); */
45 } 50 }
46 if (likely(size >> PAGE_SHIFT < num_physpages)) 51 if (likely(size >> PAGE_SHIFT < num_physpages))
47 return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL); 52 return __vmalloc(size, gfp_mask, PAGE_KERNEL);
48 return NULL; 53 return NULL;
49} 54}
50 55
56/**
57 * ntfs_malloc_nofs - allocate memory in multiples of pages
58 * @size: number of bytes to allocate
59 *
60 * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
61 * returns a pointer to the allocated memory.
62 *
63 * If there was insufficient memory to complete the request, return NULL.
64 */
65static inline void *ntfs_malloc_nofs(unsigned long size)
66{
67 return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM);
68}
69
70/**
71 * ntfs_malloc_nofs_nofail - allocate memory in multiples of pages
72 * @size: number of bytes to allocate
73 *
74 * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
75 * returns a pointer to the allocated memory.
76 *
77 * This function guarantees that the allocation will succeed. It will sleep
78 * for as long as it takes to complete the allocation.
79 *
80 * If there was insufficient memory to complete the request, return NULL.
81 */
82static inline void *ntfs_malloc_nofs_nofail(unsigned long size)
83{
84 return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOFAIL);
85}
86
51static inline void ntfs_free(void *addr) 87static inline void ntfs_free(void *addr)
52{ 88{
53 if (likely(((unsigned long)addr < VMALLOC_START) || 89 if (likely(((unsigned long)addr < VMALLOC_START) ||
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 317f7c679fd3..2c32b84385a8 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -511,7 +511,6 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
511 } while (bh); 511 } while (bh);
512 tail->b_this_page = head; 512 tail->b_this_page = head;
513 attach_page_buffers(page, head); 513 attach_page_buffers(page, head);
514 BUG_ON(!page_has_buffers(page));
515 } 514 }
516 bh = head = page_buffers(page); 515 bh = head = page_buffers(page);
517 BUG_ON(!bh); 516 BUG_ON(!bh);
@@ -692,7 +691,6 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
692 */ 691 */
693 if (!NInoTestClearDirty(ni)) 692 if (!NInoTestClearDirty(ni))
694 goto done; 693 goto done;
695 BUG_ON(!page_has_buffers(page));
696 bh = head = page_buffers(page); 694 bh = head = page_buffers(page);
697 BUG_ON(!bh); 695 BUG_ON(!bh);
698 rl = NULL; 696 rl = NULL;
@@ -1955,7 +1953,7 @@ restore_undo_alloc:
1955 a = ctx->attr; 1953 a = ctx->attr;
1956 a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); 1954 a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1);
1957undo_alloc: 1955undo_alloc:
1958 if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1) < 0) { 1956 if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1, TRUE) < 0) {
1959 ntfs_error(vol->sb, "Failed to free clusters from mft data " 1957 ntfs_error(vol->sb, "Failed to free clusters from mft data "
1960 "attribute.%s", es); 1958 "attribute.%s", es);
1961 NVolSetErrors(vol); 1959 NVolSetErrors(vol);
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 758855b0414e..f5b2ac929081 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -35,7 +35,7 @@ static inline void ntfs_rl_mm(runlist_element *base, int dst, int src,
35 int size) 35 int size)
36{ 36{
37 if (likely((dst != src) && (size > 0))) 37 if (likely((dst != src) && (size > 0)))
38 memmove(base + dst, base + src, size * sizeof (*base)); 38 memmove(base + dst, base + src, size * sizeof(*base));
39} 39}
40 40
41/** 41/**
@@ -95,6 +95,51 @@ static inline runlist_element *ntfs_rl_realloc(runlist_element *rl,
95} 95}
96 96
97/** 97/**
98 * ntfs_rl_realloc_nofail - Reallocate memory for runlists
99 * @rl: original runlist
100 * @old_size: number of runlist elements in the original runlist @rl
101 * @new_size: number of runlist elements we need space for
102 *
103 * As the runlists grow, more memory will be required. To prevent the
104 * kernel having to allocate and reallocate large numbers of small bits of
105 * memory, this function returns an entire page of memory.
106 *
107 * This function guarantees that the allocation will succeed. It will sleep
108 * for as long as it takes to complete the allocation.
109 *
110 * It is up to the caller to serialize access to the runlist @rl.
111 *
112 * N.B. If the new allocation doesn't require a different number of pages in
113 * memory, the function will return the original pointer.
114 *
115 * On success, return a pointer to the newly allocated, or recycled, memory.
116 * On error, return -errno. The following error codes are defined:
117 * -ENOMEM - Not enough memory to allocate runlist array.
118 * -EINVAL - Invalid parameters were passed in.
119 */
120static inline runlist_element *ntfs_rl_realloc_nofail(runlist_element *rl,
121 int old_size, int new_size)
122{
123 runlist_element *new_rl;
124
125 old_size = PAGE_ALIGN(old_size * sizeof(*rl));
126 new_size = PAGE_ALIGN(new_size * sizeof(*rl));
127 if (old_size == new_size)
128 return rl;
129
130 new_rl = ntfs_malloc_nofs_nofail(new_size);
131 BUG_ON(!new_rl);
132
133 if (likely(rl != NULL)) {
134 if (unlikely(old_size > new_size))
135 old_size = new_size;
136 memcpy(new_rl, rl, old_size);
137 ntfs_free(rl);
138 }
139 return new_rl;
140}
141
142/**
98 * ntfs_are_rl_mergeable - test if two runlists can be joined together 143 * ntfs_are_rl_mergeable - test if two runlists can be joined together
99 * @dst: original runlist 144 * @dst: original runlist
100 * @src: new runlist to test for mergeability with @dst 145 * @src: new runlist to test for mergeability with @dst
@@ -497,6 +542,7 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
497 /* Scan to the end of the source runlist. */ 542 /* Scan to the end of the source runlist. */
498 for (dend = 0; likely(drl[dend].length); dend++) 543 for (dend = 0; likely(drl[dend].length); dend++)
499 ; 544 ;
545 dend++;
500 drl = ntfs_rl_realloc(drl, dend, dend + 1); 546 drl = ntfs_rl_realloc(drl, dend, dend + 1);
501 if (IS_ERR(drl)) 547 if (IS_ERR(drl))
502 return drl; 548 return drl;
@@ -566,8 +612,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
566 ((drl[dins].vcn + drl[dins].length) <= /* End of hole */ 612 ((drl[dins].vcn + drl[dins].length) <= /* End of hole */
567 (srl[send - 1].vcn + srl[send - 1].length))); 613 (srl[send - 1].vcn + srl[send - 1].length)));
568 614
569 /* Or we'll lose an end marker */ 615 /* Or we will lose an end marker. */
570 if (start && finish && (drl[dins].length == 0)) 616 if (finish && !drl[dins].length)
571 ss++; 617 ss++;
572 if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn)) 618 if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn))
573 finish = FALSE; 619 finish = FALSE;
@@ -621,11 +667,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
621 if (drl[ds].lcn != LCN_RL_NOT_MAPPED) { 667 if (drl[ds].lcn != LCN_RL_NOT_MAPPED) {
622 /* Add an unmapped runlist element. */ 668 /* Add an unmapped runlist element. */
623 if (!slots) { 669 if (!slots) {
624 /* FIXME/TODO: We need to have the 670 drl = ntfs_rl_realloc_nofail(drl, ds,
625 * extra memory already! (AIA) */ 671 ds + 2);
626 drl = ntfs_rl_realloc(drl, ds, ds + 2);
627 if (!drl)
628 goto critical_error;
629 slots = 2; 672 slots = 2;
630 } 673 }
631 ds++; 674 ds++;
@@ -640,13 +683,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
640 drl[ds].length = marker_vcn - drl[ds].vcn; 683 drl[ds].length = marker_vcn - drl[ds].vcn;
641 /* Finally add the ENOENT terminator. */ 684 /* Finally add the ENOENT terminator. */
642 ds++; 685 ds++;
643 if (!slots) { 686 if (!slots)
644 /* FIXME/TODO: We need to have the extra 687 drl = ntfs_rl_realloc_nofail(drl, ds, ds + 1);
645 * memory already! (AIA) */
646 drl = ntfs_rl_realloc(drl, ds, ds + 1);
647 if (!drl)
648 goto critical_error;
649 }
650 drl[ds].vcn = marker_vcn; 688 drl[ds].vcn = marker_vcn;
651 drl[ds].lcn = LCN_ENOENT; 689 drl[ds].lcn = LCN_ENOENT;
652 drl[ds].length = (s64)0; 690 drl[ds].length = (s64)0;
@@ -659,11 +697,6 @@ finished:
659 ntfs_debug("Merged runlist:"); 697 ntfs_debug("Merged runlist:");
660 ntfs_debug_dump_runlist(drl); 698 ntfs_debug_dump_runlist(drl);
661 return drl; 699 return drl;
662
663critical_error:
664 /* Critical error! We cannot afford to fail here. */
665 ntfs_error(NULL, "Critical error! Not enough memory.");
666 panic("NTFS: Cannot continue.");
667} 700}
668 701
669/** 702/**
@@ -727,6 +760,9 @@ runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol,
727 ntfs_error(vol->sb, "Corrupt attribute."); 760 ntfs_error(vol->sb, "Corrupt attribute.");
728 return ERR_PTR(-EIO); 761 return ERR_PTR(-EIO);
729 } 762 }
763 /* If the mapping pairs array is valid but empty, nothing to do. */
764 if (!vcn && !*buf)
765 return old_rl;
730 /* Current position in runlist array. */ 766 /* Current position in runlist array. */
731 rlpos = 0; 767 rlpos = 0;
732 /* Allocate first page and set current runlist size to one page. */ 768 /* Allocate first page and set current runlist size to one page. */
@@ -1419,6 +1455,7 @@ err_out:
1419 1455
1420/** 1456/**
1421 * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn 1457 * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn
1458 * @vol: ntfs volume (needed for error output)
1422 * @runlist: runlist to truncate 1459 * @runlist: runlist to truncate
1423 * @new_length: the new length of the runlist in VCNs 1460 * @new_length: the new length of the runlist in VCNs
1424 * 1461 *
@@ -1426,12 +1463,16 @@ err_out:
1426 * holding the runlist elements to a length of @new_length VCNs. 1463 * holding the runlist elements to a length of @new_length VCNs.
1427 * 1464 *
1428 * If @new_length lies within the runlist, the runlist elements with VCNs of 1465 * If @new_length lies within the runlist, the runlist elements with VCNs of
1429 * @new_length and above are discarded. 1466 * @new_length and above are discarded. As a special case if @new_length is
1467 * zero, the runlist is discarded and set to NULL.
1430 * 1468 *
1431 * If @new_length lies beyond the runlist, a sparse runlist element is added to 1469 * If @new_length lies beyond the runlist, a sparse runlist element is added to
1432 * the end of the runlist @runlist or if the last runlist element is a sparse 1470 * the end of the runlist @runlist or if the last runlist element is a sparse
1433 * one already, this is extended. 1471 * one already, this is extended.
1434 * 1472 *
1473 * Note, no checking is done for unmapped runlist elements. It is assumed that
1474 * the caller has mapped any elements that need to be mapped already.
1475 *
1435 * Return 0 on success and -errno on error. 1476 * Return 0 on success and -errno on error.
1436 * 1477 *
1437 * Locking: The caller must hold @runlist->lock for writing. 1478 * Locking: The caller must hold @runlist->lock for writing.
@@ -1446,6 +1487,13 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
1446 BUG_ON(!runlist); 1487 BUG_ON(!runlist);
1447 BUG_ON(new_length < 0); 1488 BUG_ON(new_length < 0);
1448 rl = runlist->rl; 1489 rl = runlist->rl;
1490 if (!new_length) {
1491 ntfs_debug("Freeing runlist.");
1492 runlist->rl = NULL;
1493 if (rl)
1494 ntfs_free(rl);
1495 return 0;
1496 }
1449 if (unlikely(!rl)) { 1497 if (unlikely(!rl)) {
1450 /* 1498 /*
1451 * Create a runlist consisting of a sparse runlist element of 1499 * Create a runlist consisting of a sparse runlist element of
@@ -1553,4 +1601,288 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
1553 return 0; 1601 return 0;
1554} 1602}
1555 1603
1604/**
1605 * ntfs_rl_punch_nolock - punch a hole into a runlist
1606 * @vol: ntfs volume (needed for error output)
1607 * @runlist: runlist to punch a hole into
1608 * @start: starting VCN of the hole to be created
1609 * @length: size of the hole to be created in units of clusters
1610 *
1611 * Punch a hole into the runlist @runlist starting at VCN @start and of size
1612 * @length clusters.
1613 *
1614 * Return 0 on success and -errno on error, in which case @runlist has not been
1615 * modified.
1616 *
1617 * If @start and/or @start + @length are outside the runlist return error code
1618 * -ENOENT.
1619 *
1620 * If the runlist contains unmapped or error elements between @start and @start
1621 * + @length return error code -EINVAL.
1622 *
1623 * Locking: The caller must hold @runlist->lock for writing.
1624 */
1625int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
1626 const VCN start, const s64 length)
1627{
1628 const VCN end = start + length;
1629 s64 delta;
1630 runlist_element *rl, *rl_end, *rl_real_end, *trl;
1631 int old_size;
1632 BOOL lcn_fixup = FALSE;
1633
1634 ntfs_debug("Entering for start 0x%llx, length 0x%llx.",
1635 (long long)start, (long long)length);
1636 BUG_ON(!runlist);
1637 BUG_ON(start < 0);
1638 BUG_ON(length < 0);
1639 BUG_ON(end < 0);
1640 rl = runlist->rl;
1641 if (unlikely(!rl)) {
1642 if (likely(!start && !length))
1643 return 0;
1644 return -ENOENT;
1645 }
1646 /* Find @start in the runlist. */
1647 while (likely(rl->length && start >= rl[1].vcn))
1648 rl++;
1649 rl_end = rl;
1650 /* Find @end in the runlist. */
1651 while (likely(rl_end->length && end >= rl_end[1].vcn)) {
1652 /* Verify there are no unmapped or error elements. */
1653 if (unlikely(rl_end->lcn < LCN_HOLE))
1654 return -EINVAL;
1655 rl_end++;
1656 }
1657 /* Check the last element. */
1658 if (unlikely(rl_end->length && rl_end->lcn < LCN_HOLE))
1659 return -EINVAL;
1660 /* This covers @start being out of bounds, too. */
1661 if (!rl_end->length && end > rl_end->vcn)
1662 return -ENOENT;
1663 if (!length)
1664 return 0;
1665 if (!rl->length)
1666 return -ENOENT;
1667 rl_real_end = rl_end;
1668 /* Determine the runlist size. */
1669 while (likely(rl_real_end->length))
1670 rl_real_end++;
1671 old_size = rl_real_end - runlist->rl + 1;
1672 /* If @start is in a hole simply extend the hole. */
1673 if (rl->lcn == LCN_HOLE) {
1674 /*
1675 * If both @start and @end are in the same sparse run, we are
1676 * done.
1677 */
1678 if (end <= rl[1].vcn) {
1679 ntfs_debug("Done (requested hole is already sparse).");
1680 return 0;
1681 }
1682extend_hole:
1683 /* Extend the hole. */
1684 rl->length = end - rl->vcn;
1685 /* If @end is in a hole, merge it with the current one. */
1686 if (rl_end->lcn == LCN_HOLE) {
1687 rl_end++;
1688 rl->length = rl_end->vcn - rl->vcn;
1689 }
1690 /* We have done the hole. Now deal with the remaining tail. */
1691 rl++;
1692 /* Cut out all runlist elements up to @end. */
1693 if (rl < rl_end)
1694 memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
1695 sizeof(*rl));
1696 /* Adjust the beginning of the tail if necessary. */
1697 if (end > rl->vcn) {
1698 s64 delta = end - rl->vcn;
1699 rl->vcn = end;
1700 rl->length -= delta;
1701 /* Only adjust the lcn if it is real. */
1702 if (rl->lcn >= 0)
1703 rl->lcn += delta;
1704 }
1705shrink_allocation:
1706 /* Reallocate memory if the allocation changed. */
1707 if (rl < rl_end) {
1708 rl = ntfs_rl_realloc(runlist->rl, old_size,
1709 old_size - (rl_end - rl));
1710 if (IS_ERR(rl))
1711 ntfs_warning(vol->sb, "Failed to shrink "
1712 "runlist buffer. This just "
1713 "wastes a bit of memory "
1714 "temporarily so we ignore it "
1715 "and return success.");
1716 else
1717 runlist->rl = rl;
1718 }
1719 ntfs_debug("Done (extend hole).");
1720 return 0;
1721 }
1722 /*
1723 * If @start is at the beginning of a run things are easier as there is
1724 * no need to split the first run.
1725 */
1726 if (start == rl->vcn) {
1727 /*
1728 * @start is at the beginning of a run.
1729 *
1730 * If the previous run is sparse, extend its hole.
1731 *
1732 * If @end is not in the same run, switch the run to be sparse
1733 * and extend the newly created hole.
1734 *
1735 * Thus both of these cases reduce the problem to the above
1736 * case of "@start is in a hole".
1737 */
1738 if (rl > runlist->rl && (rl - 1)->lcn == LCN_HOLE) {
1739 rl--;
1740 goto extend_hole;
1741 }
1742 if (end >= rl[1].vcn) {
1743 rl->lcn = LCN_HOLE;
1744 goto extend_hole;
1745 }
1746 /*
1747 * The final case is when @end is in the same run as @start.
1748 * For this need to split the run into two. One run for the
1749 * sparse region between the beginning of the old run, i.e.
1750 * @start, and @end and one for the remaining non-sparse
1751 * region, i.e. between @end and the end of the old run.
1752 */
1753 trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
1754 if (IS_ERR(trl))
1755 goto enomem_out;
1756 old_size++;
1757 if (runlist->rl != trl) {
1758 rl = trl + (rl - runlist->rl);
1759 rl_end = trl + (rl_end - runlist->rl);
1760 rl_real_end = trl + (rl_real_end - runlist->rl);
1761 runlist->rl = trl;
1762 }
1763split_end:
1764 /* Shift all the runs up by one. */
1765 memmove(rl + 1, rl, (rl_real_end - rl + 1) * sizeof(*rl));
1766 /* Finally, setup the two split runs. */
1767 rl->lcn = LCN_HOLE;
1768 rl->length = length;
1769 rl++;
1770 rl->vcn += length;
1771 /* Only adjust the lcn if it is real. */
1772 if (rl->lcn >= 0 || lcn_fixup)
1773 rl->lcn += length;
1774 rl->length -= length;
1775 ntfs_debug("Done (split one).");
1776 return 0;
1777 }
1778 /*
1779 * @start is neither in a hole nor at the beginning of a run.
1780 *
1781 * If @end is in a hole, things are easier as simply truncating the run
1782 * @start is in to end at @start - 1, deleting all runs after that up
1783 * to @end, and finally extending the beginning of the run @end is in
1784 * to be @start is all that is needed.
1785 */
1786 if (rl_end->lcn == LCN_HOLE) {
1787 /* Truncate the run containing @start. */
1788 rl->length = start - rl->vcn;
1789 rl++;
1790 /* Cut out all runlist elements up to @end. */
1791 if (rl < rl_end)
1792 memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
1793 sizeof(*rl));
1794 /* Extend the beginning of the run @end is in to be @start. */
1795 rl->vcn = start;
1796 rl->length = rl[1].vcn - start;
1797 goto shrink_allocation;
1798 }
1799 /*
1800 * If @end is not in a hole there are still two cases to distinguish.
1801 * Either @end is or is not in the same run as @start.
1802 *
1803 * The second case is easier as it can be reduced to an already solved
1804 * problem by truncating the run @start is in to end at @start - 1.
1805 * Then, if @end is in the next run need to split the run into a sparse
1806 * run followed by a non-sparse run (already covered above) and if @end
1807 * is not in the next run switching it to be sparse, again reduces the
1808 * problem to the already covered case of "@start is in a hole".
1809 */
1810 if (end >= rl[1].vcn) {
1811 /*
1812 * If @end is not in the next run, reduce the problem to the
1813 * case of "@start is in a hole".
1814 */
1815 if (rl[1].length && end >= rl[2].vcn) {
1816 /* Truncate the run containing @start. */
1817 rl->length = start - rl->vcn;
1818 rl++;
1819 rl->vcn = start;
1820 rl->lcn = LCN_HOLE;
1821 goto extend_hole;
1822 }
1823 trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
1824 if (IS_ERR(trl))
1825 goto enomem_out;
1826 old_size++;
1827 if (runlist->rl != trl) {
1828 rl = trl + (rl - runlist->rl);
1829 rl_end = trl + (rl_end - runlist->rl);
1830 rl_real_end = trl + (rl_real_end - runlist->rl);
1831 runlist->rl = trl;
1832 }
1833 /* Truncate the run containing @start. */
1834 rl->length = start - rl->vcn;
1835 rl++;
1836 /*
1837 * @end is in the next run, reduce the problem to the case
1838 * where "@start is at the beginning of a run and @end is in
1839 * the same run as @start".
1840 */
1841 delta = rl->vcn - start;
1842 rl->vcn = start;
1843 if (rl->lcn >= 0) {
1844 rl->lcn -= delta;
1845 /* Need this in case the lcn just became negative. */
1846 lcn_fixup = TRUE;
1847 }
1848 rl->length += delta;
1849 goto split_end;
1850 }
1851 /*
1852 * The first case from above, i.e. @end is in the same run as @start.
1853 * We need to split the run into three. One run for the non-sparse
1854 * region between the beginning of the old run and @start, one for the
1855 * sparse region between @start and @end, and one for the remaining
1856 * non-sparse region, i.e. between @end and the end of the old run.
1857 */
1858 trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 2);
1859 if (IS_ERR(trl))
1860 goto enomem_out;
1861 old_size += 2;
1862 if (runlist->rl != trl) {
1863 rl = trl + (rl - runlist->rl);
1864 rl_end = trl + (rl_end - runlist->rl);
1865 rl_real_end = trl + (rl_real_end - runlist->rl);
1866 runlist->rl = trl;
1867 }
1868 /* Shift all the runs up by two. */
1869 memmove(rl + 2, rl, (rl_real_end - rl + 1) * sizeof(*rl));
1870 /* Finally, setup the three split runs. */
1871 rl->length = start - rl->vcn;
1872 rl++;
1873 rl->vcn = start;
1874 rl->lcn = LCN_HOLE;
1875 rl->length = length;
1876 rl++;
1877 delta = end - rl->vcn;
1878 rl->vcn = end;
1879 rl->lcn += delta;
1880 rl->length -= delta;
1881 ntfs_debug("Done (split both).");
1882 return 0;
1883enomem_out:
1884 ntfs_error(vol->sb, "Not enough memory to extend runlist buffer.");
1885 return -ENOMEM;
1886}
1887
1556#endif /* NTFS_RW */ 1888#endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
index aa0ee6540e7c..47728fbb610b 100644
--- a/fs/ntfs/runlist.h
+++ b/fs/ntfs/runlist.h
@@ -94,6 +94,9 @@ extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
94extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol, 94extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol,
95 runlist *const runlist, const s64 new_length); 95 runlist *const runlist, const s64 new_length);
96 96
97int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
98 const VCN start, const s64 length);
99
97#endif /* NTFS_RW */ 100#endif /* NTFS_RW */
98 101
99#endif /* _LINUX_NTFS_RUNLIST_H */ 102#endif /* _LINUX_NTFS_RUNLIST_H */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 41aa8eb6755b..b2b392961268 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1133,7 +1133,8 @@ mft_unmap_out:
1133 * 1133 *
1134 * Return TRUE on success or FALSE on error. 1134 * Return TRUE on success or FALSE on error.
1135 */ 1135 */
1136static BOOL load_and_check_logfile(ntfs_volume *vol) 1136static BOOL load_and_check_logfile(ntfs_volume *vol,
1137 RESTART_PAGE_HEADER **rp)
1137{ 1138{
1138 struct inode *tmp_ino; 1139 struct inode *tmp_ino;
1139 1140
@@ -1145,7 +1146,7 @@ static BOOL load_and_check_logfile(ntfs_volume *vol)
1145 /* Caller will display error message. */ 1146 /* Caller will display error message. */
1146 return FALSE; 1147 return FALSE;
1147 } 1148 }
1148 if (!ntfs_check_logfile(tmp_ino)) { 1149 if (!ntfs_check_logfile(tmp_ino, rp)) {
1149 iput(tmp_ino); 1150 iput(tmp_ino);
1150 /* ntfs_check_logfile() will have displayed error output. */ 1151 /* ntfs_check_logfile() will have displayed error output. */
1151 return FALSE; 1152 return FALSE;
@@ -1689,6 +1690,7 @@ static BOOL load_system_files(ntfs_volume *vol)
1689 VOLUME_INFORMATION *vi; 1690 VOLUME_INFORMATION *vi;
1690 ntfs_attr_search_ctx *ctx; 1691 ntfs_attr_search_ctx *ctx;
1691#ifdef NTFS_RW 1692#ifdef NTFS_RW
1693 RESTART_PAGE_HEADER *rp;
1692 int err; 1694 int err;
1693#endif /* NTFS_RW */ 1695#endif /* NTFS_RW */
1694 1696
@@ -1841,8 +1843,9 @@ get_ctx_vol_failed:
1841 * Get the inode for the logfile, check it and determine if the volume 1843 * Get the inode for the logfile, check it and determine if the volume
1842 * was shutdown cleanly. 1844 * was shutdown cleanly.
1843 */ 1845 */
1844 if (!load_and_check_logfile(vol) || 1846 rp = NULL;
1845 !ntfs_is_logfile_clean(vol->logfile_ino)) { 1847 if (!load_and_check_logfile(vol, &rp) ||
1848 !ntfs_is_logfile_clean(vol->logfile_ino, rp)) {
1846 static const char *es1a = "Failed to load $LogFile"; 1849 static const char *es1a = "Failed to load $LogFile";
1847 static const char *es1b = "$LogFile is not clean"; 1850 static const char *es1b = "$LogFile is not clean";
1848 static const char *es2 = ". Mount in Windows."; 1851 static const char *es2 = ". Mount in Windows.";
@@ -1857,6 +1860,10 @@ get_ctx_vol_failed:
1857 "continue nor on_errors=" 1860 "continue nor on_errors="
1858 "remount-ro was specified%s", 1861 "remount-ro was specified%s",
1859 es1, es2); 1862 es1, es2);
1863 if (vol->logfile_ino) {
1864 BUG_ON(!rp);
1865 ntfs_free(rp);
1866 }
1860 goto iput_logfile_err_out; 1867 goto iput_logfile_err_out;
1861 } 1868 }
1862 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1869 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
@@ -1867,6 +1874,7 @@ get_ctx_vol_failed:
1867 /* This will prevent a read-write remount. */ 1874 /* This will prevent a read-write remount. */
1868 NVolSetErrors(vol); 1875 NVolSetErrors(vol);
1869 } 1876 }
1877 ntfs_free(rp);
1870#endif /* NTFS_RW */ 1878#endif /* NTFS_RW */
1871 /* Get the root directory inode so we can do path lookups. */ 1879 /* Get the root directory inode so we can do path lookups. */
1872 vol->root_ino = ntfs_iget(sb, FILE_root); 1880 vol->root_ino = ntfs_iget(sb, FILE_root);
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 19c42e231b44..a389a5a16c84 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -372,7 +372,8 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
372 return -EINVAL; 372 return -EINVAL;
373conversion_err: 373conversion_err:
374 ntfs_error(vol->sb, "Unicode name contains characters that cannot be " 374 ntfs_error(vol->sb, "Unicode name contains characters that cannot be "
375 "converted to character set %s.", nls->charset); 375 "converted to character set %s. You might want to "
376 "try to use the mount option nls=utf8.", nls->charset);
376 if (ns != *outs) 377 if (ns != *outs)
377 kfree(ns); 378 kfree(ns);
378 if (wc != -ENAMETOOLONG) 379 if (wc != -ENAMETOOLONG)
diff --git a/fs/open.c b/fs/open.c
index 32bf05e2996d..2fac58c51910 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -24,6 +24,7 @@
24#include <linux/personality.h> 24#include <linux/personality.h>
25#include <linux/pagemap.h> 25#include <linux/pagemap.h>
26#include <linux/syscalls.h> 26#include <linux/syscalls.h>
27#include <linux/rcupdate.h>
27 28
28#include <asm/unistd.h> 29#include <asm/unistd.h>
29 30
@@ -842,14 +843,16 @@ int get_unused_fd(void)
842{ 843{
843 struct files_struct * files = current->files; 844 struct files_struct * files = current->files;
844 int fd, error; 845 int fd, error;
846 struct fdtable *fdt;
845 847
846 error = -EMFILE; 848 error = -EMFILE;
847 spin_lock(&files->file_lock); 849 spin_lock(&files->file_lock);
848 850
849repeat: 851repeat:
850 fd = find_next_zero_bit(files->open_fds->fds_bits, 852 fdt = files_fdtable(files);
851 files->max_fdset, 853 fd = find_next_zero_bit(fdt->open_fds->fds_bits,
852 files->next_fd); 854 fdt->max_fdset,
855 fdt->next_fd);
853 856
854 /* 857 /*
855 * N.B. For clone tasks sharing a files structure, this test 858 * N.B. For clone tasks sharing a files structure, this test
@@ -872,14 +875,14 @@ repeat:
872 goto repeat; 875 goto repeat;
873 } 876 }
874 877
875 FD_SET(fd, files->open_fds); 878 FD_SET(fd, fdt->open_fds);
876 FD_CLR(fd, files->close_on_exec); 879 FD_CLR(fd, fdt->close_on_exec);
877 files->next_fd = fd + 1; 880 fdt->next_fd = fd + 1;
878#if 1 881#if 1
879 /* Sanity check */ 882 /* Sanity check */
880 if (files->fd[fd] != NULL) { 883 if (fdt->fd[fd] != NULL) {
881 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); 884 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
882 files->fd[fd] = NULL; 885 fdt->fd[fd] = NULL;
883 } 886 }
884#endif 887#endif
885 error = fd; 888 error = fd;
@@ -893,9 +896,10 @@ EXPORT_SYMBOL(get_unused_fd);
893 896
894static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) 897static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
895{ 898{
896 __FD_CLR(fd, files->open_fds); 899 struct fdtable *fdt = files_fdtable(files);
897 if (fd < files->next_fd) 900 __FD_CLR(fd, fdt->open_fds);
898 files->next_fd = fd; 901 if (fd < fdt->next_fd)
902 fdt->next_fd = fd;
899} 903}
900 904
901void fastcall put_unused_fd(unsigned int fd) 905void fastcall put_unused_fd(unsigned int fd)
@@ -924,25 +928,21 @@ EXPORT_SYMBOL(put_unused_fd);
924void fastcall fd_install(unsigned int fd, struct file * file) 928void fastcall fd_install(unsigned int fd, struct file * file)
925{ 929{
926 struct files_struct *files = current->files; 930 struct files_struct *files = current->files;
931 struct fdtable *fdt;
927 spin_lock(&files->file_lock); 932 spin_lock(&files->file_lock);
928 if (unlikely(files->fd[fd] != NULL)) 933 fdt = files_fdtable(files);
929 BUG(); 934 BUG_ON(fdt->fd[fd] != NULL);
930 files->fd[fd] = file; 935 rcu_assign_pointer(fdt->fd[fd], file);
931 spin_unlock(&files->file_lock); 936 spin_unlock(&files->file_lock);
932} 937}
933 938
934EXPORT_SYMBOL(fd_install); 939EXPORT_SYMBOL(fd_install);
935 940
936asmlinkage long sys_open(const char __user * filename, int flags, int mode) 941long do_sys_open(const char __user *filename, int flags, int mode)
937{ 942{
938 char * tmp; 943 char *tmp = getname(filename);
939 int fd; 944 int fd = PTR_ERR(tmp);
940 945
941 if (force_o_largefile())
942 flags |= O_LARGEFILE;
943
944 tmp = getname(filename);
945 fd = PTR_ERR(tmp);
946 if (!IS_ERR(tmp)) { 946 if (!IS_ERR(tmp)) {
947 fd = get_unused_fd(); 947 fd = get_unused_fd();
948 if (fd >= 0) { 948 if (fd >= 0) {
@@ -959,6 +959,14 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode)
959 } 959 }
960 return fd; 960 return fd;
961} 961}
962
963asmlinkage long sys_open(const char __user *filename, int flags, int mode)
964{
965 if (force_o_largefile())
966 flags |= O_LARGEFILE;
967
968 return do_sys_open(filename, flags, mode);
969}
962EXPORT_SYMBOL_GPL(sys_open); 970EXPORT_SYMBOL_GPL(sys_open);
963 971
964#ifndef __alpha__ 972#ifndef __alpha__
@@ -1007,15 +1015,17 @@ asmlinkage long sys_close(unsigned int fd)
1007{ 1015{
1008 struct file * filp; 1016 struct file * filp;
1009 struct files_struct *files = current->files; 1017 struct files_struct *files = current->files;
1018 struct fdtable *fdt;
1010 1019
1011 spin_lock(&files->file_lock); 1020 spin_lock(&files->file_lock);
1012 if (fd >= files->max_fds) 1021 fdt = files_fdtable(files);
1022 if (fd >= fdt->max_fds)
1013 goto out_unlock; 1023 goto out_unlock;
1014 filp = files->fd[fd]; 1024 filp = fdt->fd[fd];
1015 if (!filp) 1025 if (!filp)
1016 goto out_unlock; 1026 goto out_unlock;
1017 files->fd[fd] = NULL; 1027 rcu_assign_pointer(fdt->fd[fd], NULL);
1018 FD_CLR(fd, files->close_on_exec); 1028 FD_CLR(fd, fdt->close_on_exec);
1019 __put_unused_fd(files, fd); 1029 __put_unused_fd(files, fd);
1020 spin_unlock(&files->file_lock); 1030 spin_unlock(&files->file_lock);
1021 return filp_close(filp, files); 1031 return filp_close(filp, files);
diff --git a/fs/pipe.c b/fs/pipe.c
index 25aa09f9d09d..66aa0b938d6a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -39,7 +39,11 @@ void pipe_wait(struct inode * inode)
39{ 39{
40 DEFINE_WAIT(wait); 40 DEFINE_WAIT(wait);
41 41
42 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE); 42 /*
43 * Pipes are system-local resources, so sleeping on them
44 * is considered a noninteractive wait:
45 */
46 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
43 up(PIPE_SEM(*inode)); 47 up(PIPE_SEM(*inode));
44 schedule(); 48 schedule();
45 finish_wait(PIPE_WAIT(*inode), &wait); 49 finish_wait(PIPE_WAIT(*inode), &wait);
@@ -415,6 +419,10 @@ pipe_poll(struct file *filp, poll_table *wait)
415 419
416 if (filp->f_mode & FMODE_WRITE) { 420 if (filp->f_mode & FMODE_WRITE) {
417 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; 421 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
422 /*
423 * Most Unices do not set POLLERR for FIFOs but on Linux they
424 * behave exactly like pipes for poll().
425 */
418 if (!PIPE_READERS(*inode)) 426 if (!PIPE_READERS(*inode))
419 mask |= POLLERR; 427 mask |= POLLERR;
420 } 428 }
@@ -422,9 +430,6 @@ pipe_poll(struct file *filp, poll_table *wait)
422 return mask; 430 return mask;
423} 431}
424 432
425/* FIXME: most Unices do not set POLLERR for fifos */
426#define fifo_poll pipe_poll
427
428static int 433static int
429pipe_release(struct inode *inode, int decr, int decw) 434pipe_release(struct inode *inode, int decr, int decw)
430{ 435{
@@ -568,7 +573,7 @@ struct file_operations read_fifo_fops = {
568 .read = pipe_read, 573 .read = pipe_read,
569 .readv = pipe_readv, 574 .readv = pipe_readv,
570 .write = bad_pipe_w, 575 .write = bad_pipe_w,
571 .poll = fifo_poll, 576 .poll = pipe_poll,
572 .ioctl = pipe_ioctl, 577 .ioctl = pipe_ioctl,
573 .open = pipe_read_open, 578 .open = pipe_read_open,
574 .release = pipe_read_release, 579 .release = pipe_read_release,
@@ -580,7 +585,7 @@ struct file_operations write_fifo_fops = {
580 .read = bad_pipe_r, 585 .read = bad_pipe_r,
581 .write = pipe_write, 586 .write = pipe_write,
582 .writev = pipe_writev, 587 .writev = pipe_writev,
583 .poll = fifo_poll, 588 .poll = pipe_poll,
584 .ioctl = pipe_ioctl, 589 .ioctl = pipe_ioctl,
585 .open = pipe_write_open, 590 .open = pipe_write_open,
586 .release = pipe_write_release, 591 .release = pipe_write_release,
@@ -593,7 +598,7 @@ struct file_operations rdwr_fifo_fops = {
593 .readv = pipe_readv, 598 .readv = pipe_readv,
594 .write = pipe_write, 599 .write = pipe_write,
595 .writev = pipe_writev, 600 .writev = pipe_writev,
596 .poll = fifo_poll, 601 .poll = pipe_poll,
597 .ioctl = pipe_ioctl, 602 .ioctl = pipe_ioctl,
598 .open = pipe_rdwr_open, 603 .open = pipe_rdwr_open,
599 .release = pipe_rdwr_release, 604 .release = pipe_rdwr_release,
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 37668fe998ad..d88d518d30f6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
159{ 159{
160 struct group_info *group_info; 160 struct group_info *group_info;
161 int g; 161 int g;
162 struct fdtable *fdt = NULL;
162 163
163 read_lock(&tasklist_lock); 164 read_lock(&tasklist_lock);
164 buffer += sprintf(buffer, 165 buffer += sprintf(buffer,
@@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer)
179 p->gid, p->egid, p->sgid, p->fsgid); 180 p->gid, p->egid, p->sgid, p->fsgid);
180 read_unlock(&tasklist_lock); 181 read_unlock(&tasklist_lock);
181 task_lock(p); 182 task_lock(p);
183 if (p->files)
184 fdt = files_fdtable(p->files);
182 buffer += sprintf(buffer, 185 buffer += sprintf(buffer,
183 "FDSize:\t%d\n" 186 "FDSize:\t%d\n"
184 "Groups:\t", 187 "Groups:\t",
185 p->files ? p->files->max_fds : 0); 188 fdt ? fdt->max_fds : 0);
186 189
187 group_info = p->group_info; 190 group_info = p->group_info;
188 get_group_info(group_info); 191 get_group_info(group_info);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 491f2d9f89ac..23db452ab428 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -11,6 +11,40 @@
11 * go into icache. We cache the reference to task_struct upon lookup too. 11 * go into icache. We cache the reference to task_struct upon lookup too.
12 * Eventually it should become a filesystem in its own. We don't use the 12 * Eventually it should become a filesystem in its own. We don't use the
13 * rest of procfs anymore. 13 * rest of procfs anymore.
14 *
15 *
16 * Changelog:
17 * 17-Jan-2005
18 * Allan Bezerra
19 * Bruna Moreira <bruna.moreira@indt.org.br>
20 * Edjard Mota <edjard.mota@indt.org.br>
21 * Ilias Biris <ilias.biris@indt.org.br>
22 * Mauricio Lin <mauricio.lin@indt.org.br>
23 *
24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
25 *
26 * A new process specific entry (smaps) included in /proc. It shows the
27 * size of rss for each memory area. The maps entry lacks information
28 * about physical memory size (rss) for each mapped file, i.e.,
29 * rss information for executables and library files.
30 * This additional information is useful for any tools that need to know
31 * about physical memory consumption for a process specific library.
32 *
33 * Changelog:
34 * 21-Feb-2005
35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
36 * Pud inclusion in the page table walking.
37 *
38 * ChangeLog:
39 * 10-Mar-2005
40 * 10LE Instituto Nokia de Tecnologia - INdT:
41 * A better way to walks through the page table as suggested by Hugh Dickins.
42 *
43 * Simo Piiroinen <simo.piiroinen@nokia.com>:
44 * Smaps information related to shared, private, clean and dirty pages.
45 *
46 * Paul Mundt <paul.mundt@nokia.com>:
47 * Overall revision about smaps.
14 */ 48 */
15 49
16#include <asm/uaccess.h> 50#include <asm/uaccess.h>
@@ -28,6 +62,7 @@
28#include <linux/namespace.h> 62#include <linux/namespace.h>
29#include <linux/mm.h> 63#include <linux/mm.h>
30#include <linux/smp_lock.h> 64#include <linux/smp_lock.h>
65#include <linux/rcupdate.h>
31#include <linux/kallsyms.h> 66#include <linux/kallsyms.h>
32#include <linux/mount.h> 67#include <linux/mount.h>
33#include <linux/security.h> 68#include <linux/security.h>
@@ -65,8 +100,10 @@ enum pid_directory_inos {
65 PROC_TGID_STAT, 100 PROC_TGID_STAT,
66 PROC_TGID_STATM, 101 PROC_TGID_STATM,
67 PROC_TGID_MAPS, 102 PROC_TGID_MAPS,
103 PROC_TGID_NUMA_MAPS,
68 PROC_TGID_MOUNTS, 104 PROC_TGID_MOUNTS,
69 PROC_TGID_WCHAN, 105 PROC_TGID_WCHAN,
106 PROC_TGID_SMAPS,
70#ifdef CONFIG_SCHEDSTATS 107#ifdef CONFIG_SCHEDSTATS
71 PROC_TGID_SCHEDSTAT, 108 PROC_TGID_SCHEDSTAT,
72#endif 109#endif
@@ -83,7 +120,6 @@ enum pid_directory_inos {
83#ifdef CONFIG_AUDITSYSCALL 120#ifdef CONFIG_AUDITSYSCALL
84 PROC_TGID_LOGINUID, 121 PROC_TGID_LOGINUID,
85#endif 122#endif
86 PROC_TGID_FD_DIR,
87 PROC_TGID_OOM_SCORE, 123 PROC_TGID_OOM_SCORE,
88 PROC_TGID_OOM_ADJUST, 124 PROC_TGID_OOM_ADJUST,
89 PROC_TID_INO, 125 PROC_TID_INO,
@@ -102,8 +138,10 @@ enum pid_directory_inos {
102 PROC_TID_STAT, 138 PROC_TID_STAT,
103 PROC_TID_STATM, 139 PROC_TID_STATM,
104 PROC_TID_MAPS, 140 PROC_TID_MAPS,
141 PROC_TID_NUMA_MAPS,
105 PROC_TID_MOUNTS, 142 PROC_TID_MOUNTS,
106 PROC_TID_WCHAN, 143 PROC_TID_WCHAN,
144 PROC_TID_SMAPS,
107#ifdef CONFIG_SCHEDSTATS 145#ifdef CONFIG_SCHEDSTATS
108 PROC_TID_SCHEDSTAT, 146 PROC_TID_SCHEDSTAT,
109#endif 147#endif
@@ -120,9 +158,11 @@ enum pid_directory_inos {
120#ifdef CONFIG_AUDITSYSCALL 158#ifdef CONFIG_AUDITSYSCALL
121 PROC_TID_LOGINUID, 159 PROC_TID_LOGINUID,
122#endif 160#endif
123 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
124 PROC_TID_OOM_SCORE, 161 PROC_TID_OOM_SCORE,
125 PROC_TID_OOM_ADJUST, 162 PROC_TID_OOM_ADJUST,
163
164 /* Add new entries before this */
165 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
126}; 166};
127 167
128struct pid_entry { 168struct pid_entry {
@@ -144,6 +184,9 @@ static struct pid_entry tgid_base_stuff[] = {
144 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), 184 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO),
145 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), 185 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO),
146 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), 186 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO),
187#ifdef CONFIG_NUMA
188 E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
189#endif
147 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 190 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
148#ifdef CONFIG_SECCOMP 191#ifdef CONFIG_SECCOMP
149 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 192 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
@@ -152,6 +195,7 @@ static struct pid_entry tgid_base_stuff[] = {
152 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), 195 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO),
153 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), 196 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO),
154 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 197 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
198 E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO),
155#ifdef CONFIG_SECURITY 199#ifdef CONFIG_SECURITY
156 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 200 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO),
157#endif 201#endif
@@ -180,6 +224,9 @@ static struct pid_entry tid_base_stuff[] = {
180 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), 224 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO),
181 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), 225 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO),
182 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), 226 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO),
227#ifdef CONFIG_NUMA
228 E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
229#endif
183 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 230 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
184#ifdef CONFIG_SECCOMP 231#ifdef CONFIG_SECCOMP
185 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 232 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
@@ -188,6 +235,7 @@ static struct pid_entry tid_base_stuff[] = {
188 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), 235 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO),
189 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), 236 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO),
190 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 237 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
238 E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO),
191#ifdef CONFIG_SECURITY 239#ifdef CONFIG_SECURITY
192 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 240 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO),
193#endif 241#endif
@@ -236,30 +284,36 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
236 284
237 files = get_files_struct(task); 285 files = get_files_struct(task);
238 if (files) { 286 if (files) {
239 spin_lock(&files->file_lock); 287 rcu_read_lock();
240 file = fcheck_files(files, fd); 288 file = fcheck_files(files, fd);
241 if (file) { 289 if (file) {
242 *mnt = mntget(file->f_vfsmnt); 290 *mnt = mntget(file->f_vfsmnt);
243 *dentry = dget(file->f_dentry); 291 *dentry = dget(file->f_dentry);
244 spin_unlock(&files->file_lock); 292 rcu_read_unlock();
245 put_files_struct(files); 293 put_files_struct(files);
246 return 0; 294 return 0;
247 } 295 }
248 spin_unlock(&files->file_lock); 296 rcu_read_unlock();
249 put_files_struct(files); 297 put_files_struct(files);
250 } 298 }
251 return -ENOENT; 299 return -ENOENT;
252} 300}
253 301
254static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 302static struct fs_struct *get_fs_struct(struct task_struct *task)
255{ 303{
256 struct fs_struct *fs; 304 struct fs_struct *fs;
257 int result = -ENOENT; 305 task_lock(task);
258 task_lock(proc_task(inode)); 306 fs = task->fs;
259 fs = proc_task(inode)->fs;
260 if(fs) 307 if(fs)
261 atomic_inc(&fs->count); 308 atomic_inc(&fs->count);
262 task_unlock(proc_task(inode)); 309 task_unlock(task);
310 return fs;
311}
312
313static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
314{
315 struct fs_struct *fs = get_fs_struct(proc_task(inode));
316 int result = -ENOENT;
263 if (fs) { 317 if (fs) {
264 read_lock(&fs->lock); 318 read_lock(&fs->lock);
265 *mnt = mntget(fs->pwdmnt); 319 *mnt = mntget(fs->pwdmnt);
@@ -273,13 +327,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs
273 327
274static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 328static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
275{ 329{
276 struct fs_struct *fs; 330 struct fs_struct *fs = get_fs_struct(proc_task(inode));
277 int result = -ENOENT; 331 int result = -ENOENT;
278 task_lock(proc_task(inode));
279 fs = proc_task(inode)->fs;
280 if(fs)
281 atomic_inc(&fs->count);
282 task_unlock(proc_task(inode));
283 if (fs) { 332 if (fs) {
284 read_lock(&fs->lock); 333 read_lock(&fs->lock);
285 *mnt = mntget(fs->rootmnt); 334 *mnt = mntget(fs->rootmnt);
@@ -298,33 +347,6 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
298 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ 347 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
299 security_ptrace(current,task) == 0)) 348 security_ptrace(current,task) == 0))
300 349
301static int may_ptrace_attach(struct task_struct *task)
302{
303 int retval = 0;
304
305 task_lock(task);
306
307 if (!task->mm)
308 goto out;
309 if (((current->uid != task->euid) ||
310 (current->uid != task->suid) ||
311 (current->uid != task->uid) ||
312 (current->gid != task->egid) ||
313 (current->gid != task->sgid) ||
314 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
315 goto out;
316 rmb();
317 if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE))
318 goto out;
319 if (security_ptrace(current, task))
320 goto out;
321
322 retval = 1;
323out:
324 task_unlock(task);
325 return retval;
326}
327
328static int proc_pid_environ(struct task_struct *task, char * buffer) 350static int proc_pid_environ(struct task_struct *task, char * buffer)
329{ 351{
330 int res = 0; 352 int res = 0;
@@ -334,7 +356,7 @@ static int proc_pid_environ(struct task_struct *task, char * buffer)
334 if (len > PAGE_SIZE) 356 if (len > PAGE_SIZE)
335 len = PAGE_SIZE; 357 len = PAGE_SIZE;
336 res = access_process_vm(task, mm->env_start, buffer, len, 0); 358 res = access_process_vm(task, mm->env_start, buffer, len, 0);
337 if (!may_ptrace_attach(task)) 359 if (!ptrace_may_attach(task))
338 res = -ESRCH; 360 res = -ESRCH;
339 mmput(mm); 361 mmput(mm);
340 } 362 }
@@ -515,6 +537,46 @@ static struct file_operations proc_maps_operations = {
515 .release = seq_release, 537 .release = seq_release,
516}; 538};
517 539
540#ifdef CONFIG_NUMA
541extern struct seq_operations proc_pid_numa_maps_op;
542static int numa_maps_open(struct inode *inode, struct file *file)
543{
544 struct task_struct *task = proc_task(inode);
545 int ret = seq_open(file, &proc_pid_numa_maps_op);
546 if (!ret) {
547 struct seq_file *m = file->private_data;
548 m->private = task;
549 }
550 return ret;
551}
552
553static struct file_operations proc_numa_maps_operations = {
554 .open = numa_maps_open,
555 .read = seq_read,
556 .llseek = seq_lseek,
557 .release = seq_release,
558};
559#endif
560
561extern struct seq_operations proc_pid_smaps_op;
562static int smaps_open(struct inode *inode, struct file *file)
563{
564 struct task_struct *task = proc_task(inode);
565 int ret = seq_open(file, &proc_pid_smaps_op);
566 if (!ret) {
567 struct seq_file *m = file->private_data;
568 m->private = task;
569 }
570 return ret;
571}
572
573static struct file_operations proc_smaps_operations = {
574 .open = smaps_open,
575 .read = seq_read,
576 .llseek = seq_lseek,
577 .release = seq_release,
578};
579
518extern struct seq_operations mounts_op; 580extern struct seq_operations mounts_op;
519static int mounts_open(struct inode *inode, struct file *file) 581static int mounts_open(struct inode *inode, struct file *file)
520{ 582{
@@ -597,7 +659,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
597 int ret = -ESRCH; 659 int ret = -ESRCH;
598 struct mm_struct *mm; 660 struct mm_struct *mm;
599 661
600 if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) 662 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
601 goto out; 663 goto out;
602 664
603 ret = -ENOMEM; 665 ret = -ENOMEM;
@@ -623,7 +685,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
623 685
624 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 686 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
625 retval = access_process_vm(task, src, page, this_len, 0); 687 retval = access_process_vm(task, src, page, this_len, 0);
626 if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { 688 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) {
627 if (!ret) 689 if (!ret)
628 ret = -EIO; 690 ret = -EIO;
629 break; 691 break;
@@ -661,7 +723,7 @@ static ssize_t mem_write(struct file * file, const char * buf,
661 struct task_struct *task = proc_task(file->f_dentry->d_inode); 723 struct task_struct *task = proc_task(file->f_dentry->d_inode);
662 unsigned long dst = *ppos; 724 unsigned long dst = *ppos;
663 725
664 if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) 726 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
665 return -ESRCH; 727 return -ESRCH;
666 728
667 page = (char *)__get_free_page(GFP_USER); 729 page = (char *)__get_free_page(GFP_USER);
@@ -978,6 +1040,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
978 int retval; 1040 int retval;
979 char buf[NUMBUF]; 1041 char buf[NUMBUF];
980 struct files_struct * files; 1042 struct files_struct * files;
1043 struct fdtable *fdt;
981 1044
982 retval = -ENOENT; 1045 retval = -ENOENT;
983 if (!pid_alive(p)) 1046 if (!pid_alive(p))
@@ -1000,15 +1063,16 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1000 files = get_files_struct(p); 1063 files = get_files_struct(p);
1001 if (!files) 1064 if (!files)
1002 goto out; 1065 goto out;
1003 spin_lock(&files->file_lock); 1066 rcu_read_lock();
1067 fdt = files_fdtable(files);
1004 for (fd = filp->f_pos-2; 1068 for (fd = filp->f_pos-2;
1005 fd < files->max_fds; 1069 fd < fdt->max_fds;
1006 fd++, filp->f_pos++) { 1070 fd++, filp->f_pos++) {
1007 unsigned int i,j; 1071 unsigned int i,j;
1008 1072
1009 if (!fcheck_files(files, fd)) 1073 if (!fcheck_files(files, fd))
1010 continue; 1074 continue;
1011 spin_unlock(&files->file_lock); 1075 rcu_read_unlock();
1012 1076
1013 j = NUMBUF; 1077 j = NUMBUF;
1014 i = fd; 1078 i = fd;
@@ -1020,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1020 1084
1021 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1085 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1022 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1086 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1023 spin_lock(&files->file_lock); 1087 rcu_read_lock();
1024 break; 1088 break;
1025 } 1089 }
1026 spin_lock(&files->file_lock); 1090 rcu_read_lock();
1027 } 1091 }
1028 spin_unlock(&files->file_lock); 1092 rcu_read_unlock();
1029 put_files_struct(files); 1093 put_files_struct(files);
1030 } 1094 }
1031out: 1095out:
@@ -1200,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1200 1264
1201 files = get_files_struct(task); 1265 files = get_files_struct(task);
1202 if (files) { 1266 if (files) {
1203 spin_lock(&files->file_lock); 1267 rcu_read_lock();
1204 if (fcheck_files(files, fd)) { 1268 if (fcheck_files(files, fd)) {
1205 spin_unlock(&files->file_lock); 1269 rcu_read_unlock();
1206 put_files_struct(files); 1270 put_files_struct(files);
1207 if (task_dumpable(task)) { 1271 if (task_dumpable(task)) {
1208 inode->i_uid = task->euid; 1272 inode->i_uid = task->euid;
@@ -1214,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1214 security_task_to_inode(task, inode); 1278 security_task_to_inode(task, inode);
1215 return 1; 1279 return 1;
1216 } 1280 }
1217 spin_unlock(&files->file_lock); 1281 rcu_read_unlock();
1218 put_files_struct(files); 1282 put_files_struct(files);
1219 } 1283 }
1220 d_drop(dentry); 1284 d_drop(dentry);
@@ -1306,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1306 if (!files) 1370 if (!files)
1307 goto out_unlock; 1371 goto out_unlock;
1308 inode->i_mode = S_IFLNK; 1372 inode->i_mode = S_IFLNK;
1309 spin_lock(&files->file_lock); 1373 rcu_read_lock();
1310 file = fcheck_files(files, fd); 1374 file = fcheck_files(files, fd);
1311 if (!file) 1375 if (!file)
1312 goto out_unlock2; 1376 goto out_unlock2;
@@ -1314,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1314 inode->i_mode |= S_IRUSR | S_IXUSR; 1378 inode->i_mode |= S_IRUSR | S_IXUSR;
1315 if (file->f_mode & 2) 1379 if (file->f_mode & 2)
1316 inode->i_mode |= S_IWUSR | S_IXUSR; 1380 inode->i_mode |= S_IWUSR | S_IXUSR;
1317 spin_unlock(&files->file_lock); 1381 rcu_read_unlock();
1318 put_files_struct(files); 1382 put_files_struct(files);
1319 inode->i_op = &proc_pid_link_inode_operations; 1383 inode->i_op = &proc_pid_link_inode_operations;
1320 inode->i_size = 64; 1384 inode->i_size = 64;
@@ -1324,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1324 return NULL; 1388 return NULL;
1325 1389
1326out_unlock2: 1390out_unlock2:
1327 spin_unlock(&files->file_lock); 1391 rcu_read_unlock();
1328 put_files_struct(files); 1392 put_files_struct(files);
1329out_unlock: 1393out_unlock:
1330 iput(inode); 1394 iput(inode);
@@ -1524,6 +1588,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1524 case PROC_TGID_MAPS: 1588 case PROC_TGID_MAPS:
1525 inode->i_fop = &proc_maps_operations; 1589 inode->i_fop = &proc_maps_operations;
1526 break; 1590 break;
1591#ifdef CONFIG_NUMA
1592 case PROC_TID_NUMA_MAPS:
1593 case PROC_TGID_NUMA_MAPS:
1594 inode->i_fop = &proc_numa_maps_operations;
1595 break;
1596#endif
1527 case PROC_TID_MEM: 1597 case PROC_TID_MEM:
1528 case PROC_TGID_MEM: 1598 case PROC_TGID_MEM:
1529 inode->i_op = &proc_mem_inode_operations; 1599 inode->i_op = &proc_mem_inode_operations;
@@ -1539,6 +1609,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1539 case PROC_TGID_MOUNTS: 1609 case PROC_TGID_MOUNTS:
1540 inode->i_fop = &proc_mounts_operations; 1610 inode->i_fop = &proc_mounts_operations;
1541 break; 1611 break;
1612 case PROC_TID_SMAPS:
1613 case PROC_TGID_SMAPS:
1614 inode->i_fop = &proc_smaps_operations;
1615 break;
1542#ifdef CONFIG_SECURITY 1616#ifdef CONFIG_SECURITY
1543 case PROC_TID_ATTR: 1617 case PROC_TID_ATTR:
1544 inode->i_nlink = 2; 1618 inode->i_nlink = 2;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index abe8920313fb..8a8c34461d48 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -249,6 +249,18 @@ out:
249 return error; 249 return error;
250} 250}
251 251
252static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
253 struct kstat *stat)
254{
255 struct inode *inode = dentry->d_inode;
256 struct proc_dir_entry *de = PROC_I(inode)->pde;
257 if (de && de->nlink)
258 inode->i_nlink = de->nlink;
259
260 generic_fillattr(inode, stat);
261 return 0;
262}
263
252static struct inode_operations proc_file_inode_operations = { 264static struct inode_operations proc_file_inode_operations = {
253 .setattr = proc_notify_change, 265 .setattr = proc_notify_change,
254}; 266};
@@ -475,6 +487,7 @@ static struct file_operations proc_dir_operations = {
475 */ 487 */
476static struct inode_operations proc_dir_inode_operations = { 488static struct inode_operations proc_dir_inode_operations = {
477 .lookup = proc_lookup, 489 .lookup = proc_lookup,
490 .getattr = proc_getattr,
478 .setattr = proc_notify_change, 491 .setattr = proc_notify_change,
479}; 492};
480 493
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 133c28685105..effa6c0c467a 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -60,6 +60,8 @@ static void proc_delete_inode(struct inode *inode)
60 struct proc_dir_entry *de; 60 struct proc_dir_entry *de;
61 struct task_struct *tsk; 61 struct task_struct *tsk;
62 62
63 truncate_inode_pages(&inode->i_data, 0);
64
63 /* Let go of any associated process */ 65 /* Let go of any associated process */
64 tsk = PROC_I(inode)->task; 66 tsk = PROC_I(inode)->task;
65 if (tsk) 67 if (tsk)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 28b4a0253a92..c7ef3e48e35b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2,8 +2,13 @@
2#include <linux/hugetlb.h> 2#include <linux/hugetlb.h>
3#include <linux/mount.h> 3#include <linux/mount.h>
4#include <linux/seq_file.h> 4#include <linux/seq_file.h>
5#include <linux/highmem.h>
6#include <linux/pagemap.h>
7#include <linux/mempolicy.h>
8
5#include <asm/elf.h> 9#include <asm/elf.h>
6#include <asm/uaccess.h> 10#include <asm/uaccess.h>
11#include <asm/tlbflush.h>
7#include "internal.h" 12#include "internal.h"
8 13
9char *task_mem(struct mm_struct *mm, char *buffer) 14char *task_mem(struct mm_struct *mm, char *buffer)
@@ -87,49 +92,58 @@ static void pad_len_spaces(struct seq_file *m, int len)
87 seq_printf(m, "%*c", len, ' '); 92 seq_printf(m, "%*c", len, ' ');
88} 93}
89 94
90static int show_map(struct seq_file *m, void *v) 95struct mem_size_stats
96{
97 unsigned long resident;
98 unsigned long shared_clean;
99 unsigned long shared_dirty;
100 unsigned long private_clean;
101 unsigned long private_dirty;
102};
103
104static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
91{ 105{
92 struct task_struct *task = m->private; 106 struct task_struct *task = m->private;
93 struct vm_area_struct *map = v; 107 struct vm_area_struct *vma = v;
94 struct mm_struct *mm = map->vm_mm; 108 struct mm_struct *mm = vma->vm_mm;
95 struct file *file = map->vm_file; 109 struct file *file = vma->vm_file;
96 int flags = map->vm_flags; 110 int flags = vma->vm_flags;
97 unsigned long ino = 0; 111 unsigned long ino = 0;
98 dev_t dev = 0; 112 dev_t dev = 0;
99 int len; 113 int len;
100 114
101 if (file) { 115 if (file) {
102 struct inode *inode = map->vm_file->f_dentry->d_inode; 116 struct inode *inode = vma->vm_file->f_dentry->d_inode;
103 dev = inode->i_sb->s_dev; 117 dev = inode->i_sb->s_dev;
104 ino = inode->i_ino; 118 ino = inode->i_ino;
105 } 119 }
106 120
107 seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", 121 seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
108 map->vm_start, 122 vma->vm_start,
109 map->vm_end, 123 vma->vm_end,
110 flags & VM_READ ? 'r' : '-', 124 flags & VM_READ ? 'r' : '-',
111 flags & VM_WRITE ? 'w' : '-', 125 flags & VM_WRITE ? 'w' : '-',
112 flags & VM_EXEC ? 'x' : '-', 126 flags & VM_EXEC ? 'x' : '-',
113 flags & VM_MAYSHARE ? 's' : 'p', 127 flags & VM_MAYSHARE ? 's' : 'p',
114 map->vm_pgoff << PAGE_SHIFT, 128 vma->vm_pgoff << PAGE_SHIFT,
115 MAJOR(dev), MINOR(dev), ino, &len); 129 MAJOR(dev), MINOR(dev), ino, &len);
116 130
117 /* 131 /*
118 * Print the dentry name for named mappings, and a 132 * Print the dentry name for named mappings, and a
119 * special [heap] marker for the heap: 133 * special [heap] marker for the heap:
120 */ 134 */
121 if (map->vm_file) { 135 if (file) {
122 pad_len_spaces(m, len); 136 pad_len_spaces(m, len);
123 seq_path(m, file->f_vfsmnt, file->f_dentry, ""); 137 seq_path(m, file->f_vfsmnt, file->f_dentry, "\n");
124 } else { 138 } else {
125 if (mm) { 139 if (mm) {
126 if (map->vm_start <= mm->start_brk && 140 if (vma->vm_start <= mm->start_brk &&
127 map->vm_end >= mm->brk) { 141 vma->vm_end >= mm->brk) {
128 pad_len_spaces(m, len); 142 pad_len_spaces(m, len);
129 seq_puts(m, "[heap]"); 143 seq_puts(m, "[heap]");
130 } else { 144 } else {
131 if (map->vm_start <= mm->start_stack && 145 if (vma->vm_start <= mm->start_stack &&
132 map->vm_end >= mm->start_stack) { 146 vma->vm_end >= mm->start_stack) {
133 147
134 pad_len_spaces(m, len); 148 pad_len_spaces(m, len);
135 seq_puts(m, "[stack]"); 149 seq_puts(m, "[stack]");
@@ -141,24 +155,146 @@ static int show_map(struct seq_file *m, void *v)
141 } 155 }
142 } 156 }
143 seq_putc(m, '\n'); 157 seq_putc(m, '\n');
144 if (m->count < m->size) /* map is copied successfully */ 158
145 m->version = (map != get_gate_vma(task))? map->vm_start: 0; 159 if (mss)
160 seq_printf(m,
161 "Size: %8lu kB\n"
162 "Rss: %8lu kB\n"
163 "Shared_Clean: %8lu kB\n"
164 "Shared_Dirty: %8lu kB\n"
165 "Private_Clean: %8lu kB\n"
166 "Private_Dirty: %8lu kB\n",
167 (vma->vm_end - vma->vm_start) >> 10,
168 mss->resident >> 10,
169 mss->shared_clean >> 10,
170 mss->shared_dirty >> 10,
171 mss->private_clean >> 10,
172 mss->private_dirty >> 10);
173
174 if (m->count < m->size) /* vma is copied successfully */
175 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
146 return 0; 176 return 0;
147} 177}
148 178
179static int show_map(struct seq_file *m, void *v)
180{
181 return show_map_internal(m, v, 0);
182}
183
184static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
185 unsigned long addr, unsigned long end,
186 struct mem_size_stats *mss)
187{
188 pte_t *pte, ptent;
189 unsigned long pfn;
190 struct page *page;
191
192 pte = pte_offset_map(pmd, addr);
193 do {
194 ptent = *pte;
195 if (pte_none(ptent) || !pte_present(ptent))
196 continue;
197
198 mss->resident += PAGE_SIZE;
199 pfn = pte_pfn(ptent);
200 if (!pfn_valid(pfn))
201 continue;
202
203 page = pfn_to_page(pfn);
204 if (page_count(page) >= 2) {
205 if (pte_dirty(ptent))
206 mss->shared_dirty += PAGE_SIZE;
207 else
208 mss->shared_clean += PAGE_SIZE;
209 } else {
210 if (pte_dirty(ptent))
211 mss->private_dirty += PAGE_SIZE;
212 else
213 mss->private_clean += PAGE_SIZE;
214 }
215 } while (pte++, addr += PAGE_SIZE, addr != end);
216 pte_unmap(pte - 1);
217 cond_resched_lock(&vma->vm_mm->page_table_lock);
218}
219
220static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud,
221 unsigned long addr, unsigned long end,
222 struct mem_size_stats *mss)
223{
224 pmd_t *pmd;
225 unsigned long next;
226
227 pmd = pmd_offset(pud, addr);
228 do {
229 next = pmd_addr_end(addr, end);
230 if (pmd_none_or_clear_bad(pmd))
231 continue;
232 smaps_pte_range(vma, pmd, addr, next, mss);
233 } while (pmd++, addr = next, addr != end);
234}
235
236static inline void smaps_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
237 unsigned long addr, unsigned long end,
238 struct mem_size_stats *mss)
239{
240 pud_t *pud;
241 unsigned long next;
242
243 pud = pud_offset(pgd, addr);
244 do {
245 next = pud_addr_end(addr, end);
246 if (pud_none_or_clear_bad(pud))
247 continue;
248 smaps_pmd_range(vma, pud, addr, next, mss);
249 } while (pud++, addr = next, addr != end);
250}
251
252static inline void smaps_pgd_range(struct vm_area_struct *vma,
253 unsigned long addr, unsigned long end,
254 struct mem_size_stats *mss)
255{
256 pgd_t *pgd;
257 unsigned long next;
258
259 pgd = pgd_offset(vma->vm_mm, addr);
260 do {
261 next = pgd_addr_end(addr, end);
262 if (pgd_none_or_clear_bad(pgd))
263 continue;
264 smaps_pud_range(vma, pgd, addr, next, mss);
265 } while (pgd++, addr = next, addr != end);
266}
267
268static int show_smap(struct seq_file *m, void *v)
269{
270 struct vm_area_struct *vma = v;
271 struct mm_struct *mm = vma->vm_mm;
272 struct mem_size_stats mss;
273
274 memset(&mss, 0, sizeof mss);
275
276 if (mm) {
277 spin_lock(&mm->page_table_lock);
278 smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss);
279 spin_unlock(&mm->page_table_lock);
280 }
281
282 return show_map_internal(m, v, &mss);
283}
284
149static void *m_start(struct seq_file *m, loff_t *pos) 285static void *m_start(struct seq_file *m, loff_t *pos)
150{ 286{
151 struct task_struct *task = m->private; 287 struct task_struct *task = m->private;
152 unsigned long last_addr = m->version; 288 unsigned long last_addr = m->version;
153 struct mm_struct *mm; 289 struct mm_struct *mm;
154 struct vm_area_struct *map, *tail_map; 290 struct vm_area_struct *vma, *tail_vma;
155 loff_t l = *pos; 291 loff_t l = *pos;
156 292
157 /* 293 /*
158 * We remember last_addr rather than next_addr to hit with 294 * We remember last_addr rather than next_addr to hit with
159 * mmap_cache most of the time. We have zero last_addr at 295 * mmap_cache most of the time. We have zero last_addr at
160 * the begining and also after lseek. We will have -1 last_addr 296 * the beginning and also after lseek. We will have -1 last_addr
161 * after the end of the maps. 297 * after the end of the vmas.
162 */ 298 */
163 299
164 if (last_addr == -1UL) 300 if (last_addr == -1UL)
@@ -168,47 +304,47 @@ static void *m_start(struct seq_file *m, loff_t *pos)
168 if (!mm) 304 if (!mm)
169 return NULL; 305 return NULL;
170 306
171 tail_map = get_gate_vma(task); 307 tail_vma = get_gate_vma(task);
172 down_read(&mm->mmap_sem); 308 down_read(&mm->mmap_sem);
173 309
174 /* Start with last addr hint */ 310 /* Start with last addr hint */
175 if (last_addr && (map = find_vma(mm, last_addr))) { 311 if (last_addr && (vma = find_vma(mm, last_addr))) {
176 map = map->vm_next; 312 vma = vma->vm_next;
177 goto out; 313 goto out;
178 } 314 }
179 315
180 /* 316 /*
181 * Check the map index is within the range and do 317 * Check the vma index is within the range and do
182 * sequential scan until m_index. 318 * sequential scan until m_index.
183 */ 319 */
184 map = NULL; 320 vma = NULL;
185 if ((unsigned long)l < mm->map_count) { 321 if ((unsigned long)l < mm->map_count) {
186 map = mm->mmap; 322 vma = mm->mmap;
187 while (l-- && map) 323 while (l-- && vma)
188 map = map->vm_next; 324 vma = vma->vm_next;
189 goto out; 325 goto out;
190 } 326 }
191 327
192 if (l != mm->map_count) 328 if (l != mm->map_count)
193 tail_map = NULL; /* After gate map */ 329 tail_vma = NULL; /* After gate vma */
194 330
195out: 331out:
196 if (map) 332 if (vma)
197 return map; 333 return vma;
198 334
199 /* End of maps has reached */ 335 /* End of vmas has been reached */
200 m->version = (tail_map != NULL)? 0: -1UL; 336 m->version = (tail_vma != NULL)? 0: -1UL;
201 up_read(&mm->mmap_sem); 337 up_read(&mm->mmap_sem);
202 mmput(mm); 338 mmput(mm);
203 return tail_map; 339 return tail_vma;
204} 340}
205 341
206static void m_stop(struct seq_file *m, void *v) 342static void m_stop(struct seq_file *m, void *v)
207{ 343{
208 struct task_struct *task = m->private; 344 struct task_struct *task = m->private;
209 struct vm_area_struct *map = v; 345 struct vm_area_struct *vma = v;
210 if (map && map != get_gate_vma(task)) { 346 if (vma && vma != get_gate_vma(task)) {
211 struct mm_struct *mm = map->vm_mm; 347 struct mm_struct *mm = vma->vm_mm;
212 up_read(&mm->mmap_sem); 348 up_read(&mm->mmap_sem);
213 mmput(mm); 349 mmput(mm);
214 } 350 }
@@ -217,14 +353,14 @@ static void m_stop(struct seq_file *m, void *v)
217static void *m_next(struct seq_file *m, void *v, loff_t *pos) 353static void *m_next(struct seq_file *m, void *v, loff_t *pos)
218{ 354{
219 struct task_struct *task = m->private; 355 struct task_struct *task = m->private;
220 struct vm_area_struct *map = v; 356 struct vm_area_struct *vma = v;
221 struct vm_area_struct *tail_map = get_gate_vma(task); 357 struct vm_area_struct *tail_vma = get_gate_vma(task);
222 358
223 (*pos)++; 359 (*pos)++;
224 if (map && (map != tail_map) && map->vm_next) 360 if (vma && (vma != tail_vma) && vma->vm_next)
225 return map->vm_next; 361 return vma->vm_next;
226 m_stop(m, v); 362 m_stop(m, v);
227 return (map != tail_map)? tail_map: NULL; 363 return (vma != tail_vma)? tail_vma: NULL;
228} 364}
229 365
230struct seq_operations proc_pid_maps_op = { 366struct seq_operations proc_pid_maps_op = {
@@ -233,3 +369,140 @@ struct seq_operations proc_pid_maps_op = {
233 .stop = m_stop, 369 .stop = m_stop,
234 .show = show_map 370 .show = show_map
235}; 371};
372
373struct seq_operations proc_pid_smaps_op = {
374 .start = m_start,
375 .next = m_next,
376 .stop = m_stop,
377 .show = show_smap
378};
379
380#ifdef CONFIG_NUMA
381
382struct numa_maps {
383 unsigned long pages;
384 unsigned long anon;
385 unsigned long mapped;
386 unsigned long mapcount_max;
387 unsigned long node[MAX_NUMNODES];
388};
389
390/*
391 * Calculate numa node maps for a vma
392 */
393static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma)
394{
395 struct page *page;
396 unsigned long vaddr;
397 struct mm_struct *mm = vma->vm_mm;
398 int i;
399 struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
400
401 if (!md)
402 return NULL;
403 md->pages = 0;
404 md->anon = 0;
405 md->mapped = 0;
406 md->mapcount_max = 0;
407 for_each_node(i)
408 md->node[i] =0;
409
410 spin_lock(&mm->page_table_lock);
411 for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
412 page = follow_page(mm, vaddr, 0);
413 if (page) {
414 int count = page_mapcount(page);
415
416 if (count)
417 md->mapped++;
418 if (count > md->mapcount_max)
419 md->mapcount_max = count;
420 md->pages++;
421 if (PageAnon(page))
422 md->anon++;
423 md->node[page_to_nid(page)]++;
424 }
425 }
426 spin_unlock(&mm->page_table_lock);
427 return md;
428}
429
430static int show_numa_map(struct seq_file *m, void *v)
431{
432 struct task_struct *task = m->private;
433 struct vm_area_struct *vma = v;
434 struct mempolicy *pol;
435 struct numa_maps *md;
436 struct zone **z;
437 int n;
438 int first;
439
440 if (!vma->vm_mm)
441 return 0;
442
443 md = get_numa_maps(vma);
444 if (!md)
445 return 0;
446
447 seq_printf(m, "%08lx", vma->vm_start);
448 pol = get_vma_policy(task, vma, vma->vm_start);
449 /* Print policy */
450 switch (pol->policy) {
451 case MPOL_PREFERRED:
452 seq_printf(m, " prefer=%d", pol->v.preferred_node);
453 break;
454 case MPOL_BIND:
455 seq_printf(m, " bind={");
456 first = 1;
457 for (z = pol->v.zonelist->zones; *z; z++) {
458
459 if (!first)
460 seq_putc(m, ',');
461 else
462 first = 0;
463 seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id,
464 (*z)->name);
465 }
466 seq_putc(m, '}');
467 break;
468 case MPOL_INTERLEAVE:
469 seq_printf(m, " interleave={");
470 first = 1;
471 for_each_node(n) {
472 if (test_bit(n, pol->v.nodes)) {
473 if (!first)
474 seq_putc(m,',');
475 else
476 first = 0;
477 seq_printf(m, "%d",n);
478 }
479 }
480 seq_putc(m, '}');
481 break;
482 default:
483 seq_printf(m," default");
484 break;
485 }
486 seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu",
487 md->mapcount_max, md->pages, md->mapped);
488 if (md->anon)
489 seq_printf(m," Anon=%lu",md->anon);
490
491 for_each_online_node(n) {
492 if (md->node[n])
493 seq_printf(m, " N%d=%lu", n, md->node[n]);
494 }
495 seq_putc(m, '\n');
496 kfree(md);
497 if (m->count < m->size) /* vma is copied successfully */
498 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
499 return 0;
500}
501
502struct seq_operations proc_pid_numa_maps_op = {
503 .start = m_start,
504 .next = m_next,
505 .stop = m_stop,
506 .show = show_numa_map
507};
508#endif
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b79162a35478..80f32911c0cb 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -63,6 +63,7 @@ int qnx4_sync_inode(struct inode *inode)
63static void qnx4_delete_inode(struct inode *inode) 63static void qnx4_delete_inode(struct inode *inode)
64{ 64{
65 QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); 65 QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
66 truncate_inode_pages(&inode->i_data, 0);
66 inode->i_size = 0; 67 inode->i_size = 0;
67 qnx4_truncate(inode); 68 qnx4_truncate(inode);
68 lock_kernel(); 69 lock_kernel();
diff --git a/fs/read_write.c b/fs/read_write.c
index 563abd09b5c8..b60324aaa2b6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -188,7 +188,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
188 struct inode *inode; 188 struct inode *inode;
189 loff_t pos; 189 loff_t pos;
190 190
191 if (unlikely(count > file->f_maxcount)) 191 if (unlikely(count > INT_MAX))
192 goto Einval; 192 goto Einval;
193 pos = *ppos; 193 pos = *ppos;
194 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 194 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ff291c973a56..1a8a1bf2154d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -33,6 +33,8 @@ void reiserfs_delete_inode(struct inode *inode)
33 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 33 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
34 struct reiserfs_transaction_handle th; 34 struct reiserfs_transaction_handle th;
35 35
36 truncate_inode_pages(&inode->i_data, 0);
37
36 reiserfs_write_lock(inode->i_sb); 38 reiserfs_write_lock(inode->i_sb);
37 39
38 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 40 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index ca7989b04be3..4b15761434bc 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1034,7 +1034,7 @@ static int flush_commit_list(struct super_block *s,
1034 SB_ONDISK_JOURNAL_SIZE(s); 1034 SB_ONDISK_JOURNAL_SIZE(s);
1035 tbh = journal_find_get_block(s, bn); 1035 tbh = journal_find_get_block(s, bn);
1036 if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ 1036 if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */
1037 ll_rw_block(WRITE, 1, &tbh); 1037 ll_rw_block(SWRITE, 1, &tbh);
1038 put_bh(tbh); 1038 put_bh(tbh);
1039 } 1039 }
1040 atomic_dec(&journal->j_async_throttle); 1040 atomic_dec(&journal->j_async_throttle);
@@ -2172,7 +2172,7 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2172 /* flush out the real blocks */ 2172 /* flush out the real blocks */
2173 for (i = 0; i < get_desc_trans_len(desc); i++) { 2173 for (i = 0; i < get_desc_trans_len(desc); i++) {
2174 set_buffer_dirty(real_blocks[i]); 2174 set_buffer_dirty(real_blocks[i]);
2175 ll_rw_block(WRITE, 1, real_blocks + i); 2175 ll_rw_block(SWRITE, 1, real_blocks + i);
2176 } 2176 }
2177 for (i = 0; i < get_desc_trans_len(desc); i++) { 2177 for (i = 0; i < get_desc_trans_len(desc); i++) {
2178 wait_on_buffer(real_blocks[i]); 2178 wait_on_buffer(real_blocks[i]);
@@ -2868,8 +2868,7 @@ static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
2868 struct reiserfs_journal *journal = SB_JOURNAL(sb); 2868 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2869 unsigned long bcount = journal->j_bcount; 2869 unsigned long bcount = journal->j_bcount;
2870 while (1) { 2870 while (1) {
2871 set_current_state(TASK_UNINTERRUPTIBLE); 2871 schedule_timeout_uninterruptible(1);
2872 schedule_timeout(1);
2873 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 2872 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
2874 while ((atomic_read(&journal->j_wcount) > 0 || 2873 while ((atomic_read(&journal->j_wcount) > 0 ||
2875 atomic_read(&journal->j_jlock)) && 2874 atomic_read(&journal->j_jlock)) &&
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 6951c35755be..44b02fc02ebe 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1934,8 +1934,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1934 if (SB_AP_BITMAP(s)) 1934 if (SB_AP_BITMAP(s))
1935 brelse(SB_AP_BITMAP(s)[j].bh); 1935 brelse(SB_AP_BITMAP(s)[j].bh);
1936 } 1936 }
1937 if (SB_AP_BITMAP(s)) 1937 vfree(SB_AP_BITMAP(s));
1938 vfree(SB_AP_BITMAP(s));
1939 } 1938 }
1940 if (SB_BUFFER_WITH_SB(s)) 1939 if (SB_BUFFER_WITH_SB(s))
1941 brelse(SB_BUFFER_WITH_SB(s)); 1940 brelse(SB_BUFFER_WITH_SB(s));
diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile
new file mode 100644
index 000000000000..e76e182cdb38
--- /dev/null
+++ b/fs/relayfs/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_RELAYFS_FS) += relayfs.o
2
3relayfs-y := relay.o inode.o buffers.o
4
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c
new file mode 100644
index 000000000000..2aa8e2719999
--- /dev/null
+++ b/fs/relayfs/buffers.c
@@ -0,0 +1,189 @@
1/*
2 * RelayFS buffer management code.
3 *
4 * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
5 * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
6 *
7 * This file is released under the GPL.
8 */
9
10#include <linux/module.h>
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/relayfs_fs.h>
14#include "relay.h"
15#include "buffers.h"
16
17/*
18 * close() vm_op implementation for relayfs file mapping.
19 */
20static void relay_file_mmap_close(struct vm_area_struct *vma)
21{
22 struct rchan_buf *buf = vma->vm_private_data;
23 buf->chan->cb->buf_unmapped(buf, vma->vm_file);
24}
25
26/*
27 * nopage() vm_op implementation for relayfs file mapping.
28 */
29static struct page *relay_buf_nopage(struct vm_area_struct *vma,
30 unsigned long address,
31 int *type)
32{
33 struct page *page;
34 struct rchan_buf *buf = vma->vm_private_data;
35 unsigned long offset = address - vma->vm_start;
36
37 if (address > vma->vm_end)
38 return NOPAGE_SIGBUS; /* Disallow mremap */
39 if (!buf)
40 return NOPAGE_OOM;
41
42 page = vmalloc_to_page(buf->start + offset);
43 if (!page)
44 return NOPAGE_OOM;
45 get_page(page);
46
47 if (type)
48 *type = VM_FAULT_MINOR;
49
50 return page;
51}
52
53/*
54 * vm_ops for relay file mappings.
55 */
56static struct vm_operations_struct relay_file_mmap_ops = {
57 .nopage = relay_buf_nopage,
58 .close = relay_file_mmap_close,
59};
60
61/**
62 * relay_mmap_buf: - mmap channel buffer to process address space
63 * @buf: relay channel buffer
64 * @vma: vm_area_struct describing memory to be mapped
65 *
66 * Returns 0 if ok, negative on error
67 *
68 * Caller should already have grabbed mmap_sem.
69 */
70int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
71{
72 unsigned long length = vma->vm_end - vma->vm_start;
73 struct file *filp = vma->vm_file;
74
75 if (!buf)
76 return -EBADF;
77
78 if (length != (unsigned long)buf->chan->alloc_size)
79 return -EINVAL;
80
81 vma->vm_ops = &relay_file_mmap_ops;
82 vma->vm_private_data = buf;
83 buf->chan->cb->buf_mapped(buf, filp);
84
85 return 0;
86}
87
88/**
89 * relay_alloc_buf - allocate a channel buffer
90 * @buf: the buffer struct
91 * @size: total size of the buffer
92 *
93 * Returns a pointer to the resulting buffer, NULL if unsuccessful
94 */
95static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size)
96{
97 void *mem;
98 unsigned int i, j, n_pages;
99
100 size = PAGE_ALIGN(size);
101 n_pages = size >> PAGE_SHIFT;
102
103 buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL);
104 if (!buf->page_array)
105 return NULL;
106
107 for (i = 0; i < n_pages; i++) {
108 buf->page_array[i] = alloc_page(GFP_KERNEL);
109 if (unlikely(!buf->page_array[i]))
110 goto depopulate;
111 }
112 mem = vmap(buf->page_array, n_pages, GFP_KERNEL, PAGE_KERNEL);
113 if (!mem)
114 goto depopulate;
115
116 memset(mem, 0, size);
117 buf->page_count = n_pages;
118 return mem;
119
120depopulate:
121 for (j = 0; j < i; j++)
122 __free_page(buf->page_array[j]);
123 kfree(buf->page_array);
124 return NULL;
125}
126
127/**
128 * relay_create_buf - allocate and initialize a channel buffer
129 * @alloc_size: size of the buffer to allocate
130 * @n_subbufs: number of sub-buffers in the channel
131 *
132 * Returns channel buffer if successful, NULL otherwise
133 */
134struct rchan_buf *relay_create_buf(struct rchan *chan)
135{
136 struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL);
137 if (!buf)
138 return NULL;
139
140 buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
141 if (!buf->padding)
142 goto free_buf;
143
144 buf->start = relay_alloc_buf(buf, chan->alloc_size);
145 if (!buf->start)
146 goto free_buf;
147
148 buf->chan = chan;
149 kref_get(&buf->chan->kref);
150 return buf;
151
152free_buf:
153 kfree(buf->padding);
154 kfree(buf);
155 return NULL;
156}
157
158/**
159 * relay_destroy_buf - destroy an rchan_buf struct and associated buffer
160 * @buf: the buffer struct
161 */
162void relay_destroy_buf(struct rchan_buf *buf)
163{
164 struct rchan *chan = buf->chan;
165 unsigned int i;
166
167 if (likely(buf->start)) {
168 vunmap(buf->start);
169 for (i = 0; i < buf->page_count; i++)
170 __free_page(buf->page_array[i]);
171 kfree(buf->page_array);
172 }
173 kfree(buf->padding);
174 kfree(buf);
175 kref_put(&chan->kref, relay_destroy_channel);
176}
177
178/**
179 * relay_remove_buf - remove a channel buffer
180 *
181 * Removes the file from the relayfs fileystem, which also frees the
182 * rchan_buf_struct and the channel buffer. Should only be called from
183 * kref_put().
184 */
185void relay_remove_buf(struct kref *kref)
186{
187 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
188 relayfs_remove(buf->dentry);
189}
diff --git a/fs/relayfs/buffers.h b/fs/relayfs/buffers.h
new file mode 100644
index 000000000000..37a12493f641
--- /dev/null
+++ b/fs/relayfs/buffers.h
@@ -0,0 +1,12 @@
1#ifndef _BUFFERS_H
2#define _BUFFERS_H
3
4/* This inspired by rtai/shmem */
5#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE
6
7extern int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma);
8extern struct rchan_buf *relay_create_buf(struct rchan *chan);
9extern void relay_destroy_buf(struct rchan_buf *buf);
10extern void relay_remove_buf(struct kref *kref);
11
12#endif/* _BUFFERS_H */
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
new file mode 100644
index 000000000000..0f7f88d067ad
--- /dev/null
+++ b/fs/relayfs/inode.c
@@ -0,0 +1,609 @@
1/*
2 * VFS-related code for RelayFS, a high-speed data relay filesystem.
3 *
4 * Copyright (C) 2003-2005 - Tom Zanussi <zanussi@us.ibm.com>, IBM Corp
5 * Copyright (C) 2003-2005 - Karim Yaghmour <karim@opersys.com>
6 *
7 * Based on ramfs, Copyright (C) 2002 - Linus Torvalds
8 *
9 * This file is released under the GPL.
10 */
11
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/mount.h>
15#include <linux/pagemap.h>
16#include <linux/init.h>
17#include <linux/string.h>
18#include <linux/backing-dev.h>
19#include <linux/namei.h>
20#include <linux/poll.h>
21#include <linux/relayfs_fs.h>
22#include "relay.h"
23#include "buffers.h"
24
25#define RELAYFS_MAGIC 0xF0B4A981
26
27static struct vfsmount * relayfs_mount;
28static int relayfs_mount_count;
29static kmem_cache_t * relayfs_inode_cachep;
30
31static struct backing_dev_info relayfs_backing_dev_info = {
32 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
34};
35
36static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
37 struct rchan *chan)
38{
39 struct rchan_buf *buf = NULL;
40 struct inode *inode;
41
42 if (S_ISREG(mode)) {
43 BUG_ON(!chan);
44 buf = relay_create_buf(chan);
45 if (!buf)
46 return NULL;
47 }
48
49 inode = new_inode(sb);
50 if (!inode) {
51 relay_destroy_buf(buf);
52 return NULL;
53 }
54
55 inode->i_mode = mode;
56 inode->i_uid = 0;
57 inode->i_gid = 0;
58 inode->i_blksize = PAGE_CACHE_SIZE;
59 inode->i_blocks = 0;
60 inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info;
61 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
62 switch (mode & S_IFMT) {
63 case S_IFREG:
64 inode->i_fop = &relayfs_file_operations;
65 RELAYFS_I(inode)->buf = buf;
66 break;
67 case S_IFDIR:
68 inode->i_op = &simple_dir_inode_operations;
69 inode->i_fop = &simple_dir_operations;
70
71 /* directory inodes start off with i_nlink == 2 (for "." entry) */
72 inode->i_nlink++;
73 break;
74 default:
75 break;
76 }
77
78 return inode;
79}
80
81/**
82 * relayfs_create_entry - create a relayfs directory or file
83 * @name: the name of the file to create
84 * @parent: parent directory
85 * @mode: mode
86 * @chan: relay channel associated with the file
87 *
88 * Returns the new dentry, NULL on failure
89 *
90 * Creates a file or directory with the specifed permissions.
91 */
92static struct dentry *relayfs_create_entry(const char *name,
93 struct dentry *parent,
94 int mode,
95 struct rchan *chan)
96{
97 struct dentry *d;
98 struct inode *inode;
99 int error = 0;
100
101 BUG_ON(!name || !(S_ISREG(mode) || S_ISDIR(mode)));
102
103 error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count);
104 if (error) {
105 printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error);
106 return NULL;
107 }
108
109 if (!parent && relayfs_mount && relayfs_mount->mnt_sb)
110 parent = relayfs_mount->mnt_sb->s_root;
111
112 if (!parent) {
113 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
114 return NULL;
115 }
116
117 parent = dget(parent);
118 down(&parent->d_inode->i_sem);
119 d = lookup_one_len(name, parent, strlen(name));
120 if (IS_ERR(d)) {
121 d = NULL;
122 goto release_mount;
123 }
124
125 if (d->d_inode) {
126 d = NULL;
127 goto release_mount;
128 }
129
130 inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan);
131 if (!inode) {
132 d = NULL;
133 goto release_mount;
134 }
135
136 d_instantiate(d, inode);
137 dget(d); /* Extra count - pin the dentry in core */
138
139 if (S_ISDIR(mode))
140 parent->d_inode->i_nlink++;
141
142 goto exit;
143
144release_mount:
145 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
146
147exit:
148 up(&parent->d_inode->i_sem);
149 dput(parent);
150 return d;
151}
152
153/**
154 * relayfs_create_file - create a file in the relay filesystem
155 * @name: the name of the file to create
156 * @parent: parent directory
157 * @mode: mode, if not specied the default perms are used
158 * @chan: channel associated with the file
159 *
160 * Returns file dentry if successful, NULL otherwise.
161 *
162 * The file will be created user r on behalf of current user.
163 */
164struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
165 int mode, struct rchan *chan)
166{
167 if (!mode)
168 mode = S_IRUSR;
169 mode = (mode & S_IALLUGO) | S_IFREG;
170
171 return relayfs_create_entry(name, parent, mode, chan);
172}
173
174/**
175 * relayfs_create_dir - create a directory in the relay filesystem
176 * @name: the name of the directory to create
177 * @parent: parent directory, NULL if parent should be fs root
178 *
179 * Returns directory dentry if successful, NULL otherwise.
180 *
181 * The directory will be created world rwx on behalf of current user.
182 */
183struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
184{
185 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
186 return relayfs_create_entry(name, parent, mode, NULL);
187}
188
189/**
190 * relayfs_remove - remove a file or directory in the relay filesystem
191 * @dentry: file or directory dentry
192 *
193 * Returns 0 if successful, negative otherwise.
194 */
195int relayfs_remove(struct dentry *dentry)
196{
197 struct dentry *parent;
198 int error = 0;
199
200 if (!dentry)
201 return -EINVAL;
202 parent = dentry->d_parent;
203 if (!parent)
204 return -EINVAL;
205
206 parent = dget(parent);
207 down(&parent->d_inode->i_sem);
208 if (dentry->d_inode) {
209 if (S_ISDIR(dentry->d_inode->i_mode))
210 error = simple_rmdir(parent->d_inode, dentry);
211 else
212 error = simple_unlink(parent->d_inode, dentry);
213 if (!error)
214 d_delete(dentry);
215 }
216 if (!error)
217 dput(dentry);
218 up(&parent->d_inode->i_sem);
219 dput(parent);
220
221 if (!error)
222 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
223
224 return error;
225}
226
227/**
228 * relayfs_remove_dir - remove a directory in the relay filesystem
229 * @dentry: directory dentry
230 *
231 * Returns 0 if successful, negative otherwise.
232 */
233int relayfs_remove_dir(struct dentry *dentry)
234{
235 return relayfs_remove(dentry);
236}
237
238/**
239 * relayfs_open - open file op for relayfs files
240 * @inode: the inode
241 * @filp: the file
242 *
243 * Increments the channel buffer refcount.
244 */
245static int relayfs_open(struct inode *inode, struct file *filp)
246{
247 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
248 kref_get(&buf->kref);
249
250 return 0;
251}
252
253/**
254 * relayfs_mmap - mmap file op for relayfs files
255 * @filp: the file
256 * @vma: the vma describing what to map
257 *
258 * Calls upon relay_mmap_buf to map the file into user space.
259 */
260static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma)
261{
262 struct inode *inode = filp->f_dentry->d_inode;
263 return relay_mmap_buf(RELAYFS_I(inode)->buf, vma);
264}
265
266/**
267 * relayfs_poll - poll file op for relayfs files
268 * @filp: the file
269 * @wait: poll table
270 *
271 * Poll implemention.
272 */
273static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
274{
275 unsigned int mask = 0;
276 struct inode *inode = filp->f_dentry->d_inode;
277 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
278
279 if (buf->finalized)
280 return POLLERR;
281
282 if (filp->f_mode & FMODE_READ) {
283 poll_wait(filp, &buf->read_wait, wait);
284 if (!relay_buf_empty(buf))
285 mask |= POLLIN | POLLRDNORM;
286 }
287
288 return mask;
289}
290
291/**
292 * relayfs_release - release file op for relayfs files
293 * @inode: the inode
294 * @filp: the file
295 *
296 * Decrements the channel refcount, as the filesystem is
297 * no longer using it.
298 */
299static int relayfs_release(struct inode *inode, struct file *filp)
300{
301 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
302 kref_put(&buf->kref, relay_remove_buf);
303
304 return 0;
305}
306
307/**
308 * relayfs_read_consume - update the consumed count for the buffer
309 */
310static void relayfs_read_consume(struct rchan_buf *buf,
311 size_t read_pos,
312 size_t bytes_consumed)
313{
314 size_t subbuf_size = buf->chan->subbuf_size;
315 size_t n_subbufs = buf->chan->n_subbufs;
316 size_t read_subbuf;
317
318 if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
319 relay_subbufs_consumed(buf->chan, buf->cpu, 1);
320 buf->bytes_consumed = 0;
321 }
322
323 buf->bytes_consumed += bytes_consumed;
324 read_subbuf = read_pos / buf->chan->subbuf_size;
325 if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) {
326 if ((read_subbuf == buf->subbufs_produced % n_subbufs) &&
327 (buf->offset == subbuf_size))
328 return;
329 relay_subbufs_consumed(buf->chan, buf->cpu, 1);
330 buf->bytes_consumed = 0;
331 }
332}
333
334/**
335 * relayfs_read_avail - boolean, are there unconsumed bytes available?
336 */
337static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
338{
339 size_t bytes_produced, bytes_consumed, write_offset;
340 size_t subbuf_size = buf->chan->subbuf_size;
341 size_t n_subbufs = buf->chan->n_subbufs;
342 size_t produced = buf->subbufs_produced % n_subbufs;
343 size_t consumed = buf->subbufs_consumed % n_subbufs;
344
345 write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
346
347 if (consumed > produced) {
348 if ((produced > n_subbufs) &&
349 (produced + n_subbufs - consumed <= n_subbufs))
350 produced += n_subbufs;
351 } else if (consumed == produced) {
352 if (buf->offset > subbuf_size) {
353 produced += n_subbufs;
354 if (buf->subbufs_produced == buf->subbufs_consumed)
355 consumed += n_subbufs;
356 }
357 }
358
359 if (buf->offset > subbuf_size)
360 bytes_produced = (produced - 1) * subbuf_size + write_offset;
361 else
362 bytes_produced = produced * subbuf_size + write_offset;
363 bytes_consumed = consumed * subbuf_size + buf->bytes_consumed;
364
365 if (bytes_produced == bytes_consumed)
366 return 0;
367
368 relayfs_read_consume(buf, read_pos, 0);
369
370 return 1;
371}
372
373/**
374 * relayfs_read_subbuf_avail - return bytes available in sub-buffer
375 */
376static size_t relayfs_read_subbuf_avail(size_t read_pos,
377 struct rchan_buf *buf)
378{
379 size_t padding, avail = 0;
380 size_t read_subbuf, read_offset, write_subbuf, write_offset;
381 size_t subbuf_size = buf->chan->subbuf_size;
382
383 write_subbuf = (buf->data - buf->start) / subbuf_size;
384 write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
385 read_subbuf = read_pos / subbuf_size;
386 read_offset = read_pos % subbuf_size;
387 padding = buf->padding[read_subbuf];
388
389 if (read_subbuf == write_subbuf) {
390 if (read_offset + padding < write_offset)
391 avail = write_offset - (read_offset + padding);
392 } else
393 avail = (subbuf_size - padding) - read_offset;
394
395 return avail;
396}
397
398/**
399 * relayfs_read_start_pos - find the first available byte to read
400 *
401 * If the read_pos is in the middle of padding, return the
402 * position of the first actually available byte, otherwise
403 * return the original value.
404 */
405static size_t relayfs_read_start_pos(size_t read_pos,
406 struct rchan_buf *buf)
407{
408 size_t read_subbuf, padding, padding_start, padding_end;
409 size_t subbuf_size = buf->chan->subbuf_size;
410 size_t n_subbufs = buf->chan->n_subbufs;
411
412 read_subbuf = read_pos / subbuf_size;
413 padding = buf->padding[read_subbuf];
414 padding_start = (read_subbuf + 1) * subbuf_size - padding;
415 padding_end = (read_subbuf + 1) * subbuf_size;
416 if (read_pos >= padding_start && read_pos < padding_end) {
417 read_subbuf = (read_subbuf + 1) % n_subbufs;
418 read_pos = read_subbuf * subbuf_size;
419 }
420
421 return read_pos;
422}
423
424/**
425 * relayfs_read_end_pos - return the new read position
426 */
427static size_t relayfs_read_end_pos(struct rchan_buf *buf,
428 size_t read_pos,
429 size_t count)
430{
431 size_t read_subbuf, padding, end_pos;
432 size_t subbuf_size = buf->chan->subbuf_size;
433 size_t n_subbufs = buf->chan->n_subbufs;
434
435 read_subbuf = read_pos / subbuf_size;
436 padding = buf->padding[read_subbuf];
437 if (read_pos % subbuf_size + count + padding == subbuf_size)
438 end_pos = (read_subbuf + 1) * subbuf_size;
439 else
440 end_pos = read_pos + count;
441 if (end_pos >= subbuf_size * n_subbufs)
442 end_pos = 0;
443
444 return end_pos;
445}
446
447/**
448 * relayfs_read - read file op for relayfs files
449 * @filp: the file
450 * @buffer: the userspace buffer
451 * @count: number of bytes to read
452 * @ppos: position to read from
453 *
454 * Reads count bytes or the number of bytes available in the
455 * current sub-buffer being read, whichever is smaller.
456 */
457static ssize_t relayfs_read(struct file *filp,
458 char __user *buffer,
459 size_t count,
460 loff_t *ppos)
461{
462 struct inode *inode = filp->f_dentry->d_inode;
463 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
464 size_t read_start, avail;
465 ssize_t ret = 0;
466 void *from;
467
468 down(&inode->i_sem);
469 if(!relayfs_read_avail(buf, *ppos))
470 goto out;
471
472 read_start = relayfs_read_start_pos(*ppos, buf);
473 avail = relayfs_read_subbuf_avail(read_start, buf);
474 if (!avail)
475 goto out;
476
477 from = buf->start + read_start;
478 ret = count = min(count, avail);
479 if (copy_to_user(buffer, from, count)) {
480 ret = -EFAULT;
481 goto out;
482 }
483 relayfs_read_consume(buf, read_start, count);
484 *ppos = relayfs_read_end_pos(buf, read_start, count);
485out:
486 up(&inode->i_sem);
487 return ret;
488}
489
490/**
491 * relayfs alloc_inode() implementation
492 */
493static struct inode *relayfs_alloc_inode(struct super_block *sb)
494{
495 struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL);
496 if (!p)
497 return NULL;
498 p->buf = NULL;
499
500 return &p->vfs_inode;
501}
502
503/**
504 * relayfs destroy_inode() implementation
505 */
506static void relayfs_destroy_inode(struct inode *inode)
507{
508 if (RELAYFS_I(inode)->buf)
509 relay_destroy_buf(RELAYFS_I(inode)->buf);
510
511 kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode));
512}
513
514static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
515{
516 struct relayfs_inode_info *i = p;
517 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
518 inode_init_once(&i->vfs_inode);
519}
520
521struct file_operations relayfs_file_operations = {
522 .open = relayfs_open,
523 .poll = relayfs_poll,
524 .mmap = relayfs_mmap,
525 .read = relayfs_read,
526 .llseek = no_llseek,
527 .release = relayfs_release,
528};
529
530static struct super_operations relayfs_ops = {
531 .statfs = simple_statfs,
532 .drop_inode = generic_delete_inode,
533 .alloc_inode = relayfs_alloc_inode,
534 .destroy_inode = relayfs_destroy_inode,
535};
536
537static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
538{
539 struct inode *inode;
540 struct dentry *root;
541 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
542
543 sb->s_blocksize = PAGE_CACHE_SIZE;
544 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
545 sb->s_magic = RELAYFS_MAGIC;
546 sb->s_op = &relayfs_ops;
547 inode = relayfs_get_inode(sb, mode, NULL);
548
549 if (!inode)
550 return -ENOMEM;
551
552 root = d_alloc_root(inode);
553 if (!root) {
554 iput(inode);
555 return -ENOMEM;
556 }
557 sb->s_root = root;
558
559 return 0;
560}
561
562static struct super_block * relayfs_get_sb(struct file_system_type *fs_type,
563 int flags, const char *dev_name,
564 void *data)
565{
566 return get_sb_single(fs_type, flags, data, relayfs_fill_super);
567}
568
569static struct file_system_type relayfs_fs_type = {
570 .owner = THIS_MODULE,
571 .name = "relayfs",
572 .get_sb = relayfs_get_sb,
573 .kill_sb = kill_litter_super,
574};
575
576static int __init init_relayfs_fs(void)
577{
578 int err;
579
580 relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache",
581 sizeof(struct relayfs_inode_info), 0,
582 0, init_once, NULL);
583 if (!relayfs_inode_cachep)
584 return -ENOMEM;
585
586 err = register_filesystem(&relayfs_fs_type);
587 if (err)
588 kmem_cache_destroy(relayfs_inode_cachep);
589
590 return err;
591}
592
593static void __exit exit_relayfs_fs(void)
594{
595 unregister_filesystem(&relayfs_fs_type);
596 kmem_cache_destroy(relayfs_inode_cachep);
597}
598
599module_init(init_relayfs_fs)
600module_exit(exit_relayfs_fs)
601
602EXPORT_SYMBOL_GPL(relayfs_file_operations);
603EXPORT_SYMBOL_GPL(relayfs_create_dir);
604EXPORT_SYMBOL_GPL(relayfs_remove_dir);
605
606MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
607MODULE_DESCRIPTION("Relay Filesystem");
608MODULE_LICENSE("GPL");
609
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
new file mode 100644
index 000000000000..16446a15c96d
--- /dev/null
+++ b/fs/relayfs/relay.c
@@ -0,0 +1,431 @@
1/*
2 * Public API and common code for RelayFS.
3 *
4 * See Documentation/filesystems/relayfs.txt for an overview of relayfs.
5 *
6 * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
7 * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
8 *
9 * This file is released under the GPL.
10 */
11
12#include <linux/errno.h>
13#include <linux/stddef.h>
14#include <linux/slab.h>
15#include <linux/module.h>
16#include <linux/string.h>
17#include <linux/relayfs_fs.h>
18#include "relay.h"
19#include "buffers.h"
20
21/**
22 * relay_buf_empty - boolean, is the channel buffer empty?
23 * @buf: channel buffer
24 *
25 * Returns 1 if the buffer is empty, 0 otherwise.
26 */
27int relay_buf_empty(struct rchan_buf *buf)
28{
29 return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
30}
31
32/**
33 * relay_buf_full - boolean, is the channel buffer full?
34 * @buf: channel buffer
35 *
36 * Returns 1 if the buffer is full, 0 otherwise.
37 */
38int relay_buf_full(struct rchan_buf *buf)
39{
40 size_t ready = buf->subbufs_produced - buf->subbufs_consumed;
41 return (ready >= buf->chan->n_subbufs) ? 1 : 0;
42}
43
44/*
45 * High-level relayfs kernel API and associated functions.
46 */
47
48/*
49 * rchan_callback implementations defining default channel behavior. Used
50 * in place of corresponding NULL values in client callback struct.
51 */
52
53/*
54 * subbuf_start() default callback. Does nothing.
55 */
56static int subbuf_start_default_callback (struct rchan_buf *buf,
57 void *subbuf,
58 void *prev_subbuf,
59 size_t prev_padding)
60{
61 if (relay_buf_full(buf))
62 return 0;
63
64 return 1;
65}
66
67/*
68 * buf_mapped() default callback. Does nothing.
69 */
70static void buf_mapped_default_callback(struct rchan_buf *buf,
71 struct file *filp)
72{
73}
74
75/*
76 * buf_unmapped() default callback. Does nothing.
77 */
78static void buf_unmapped_default_callback(struct rchan_buf *buf,
79 struct file *filp)
80{
81}
82
83/* relay channel default callbacks */
84static struct rchan_callbacks default_channel_callbacks = {
85 .subbuf_start = subbuf_start_default_callback,
86 .buf_mapped = buf_mapped_default_callback,
87 .buf_unmapped = buf_unmapped_default_callback,
88};
89
90/**
91 * wakeup_readers - wake up readers waiting on a channel
92 * @private: the channel buffer
93 *
94 * This is the work function used to defer reader waking. The
95 * reason waking is deferred is that calling directly from write
96 * causes problems if you're writing from say the scheduler.
97 */
98static void wakeup_readers(void *private)
99{
100 struct rchan_buf *buf = private;
101 wake_up_interruptible(&buf->read_wait);
102}
103
104/**
105 * __relay_reset - reset a channel buffer
106 * @buf: the channel buffer
107 * @init: 1 if this is a first-time initialization
108 *
109 * See relay_reset for description of effect.
110 */
111static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
112{
113 size_t i;
114
115 if (init) {
116 init_waitqueue_head(&buf->read_wait);
117 kref_init(&buf->kref);
118 INIT_WORK(&buf->wake_readers, NULL, NULL);
119 } else {
120 cancel_delayed_work(&buf->wake_readers);
121 flush_scheduled_work();
122 }
123
124 buf->subbufs_produced = 0;
125 buf->subbufs_consumed = 0;
126 buf->bytes_consumed = 0;
127 buf->finalized = 0;
128 buf->data = buf->start;
129 buf->offset = 0;
130
131 for (i = 0; i < buf->chan->n_subbufs; i++)
132 buf->padding[i] = 0;
133
134 buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0);
135}
136
137/**
138 * relay_reset - reset the channel
139 * @chan: the channel
140 *
141 * This has the effect of erasing all data from all channel buffers
142 * and restarting the channel in its initial state. The buffers
143 * are not freed, so any mappings are still in effect.
144 *
145 * NOTE: Care should be taken that the channel isn't actually
146 * being used by anything when this call is made.
147 */
148void relay_reset(struct rchan *chan)
149{
150 unsigned int i;
151
152 if (!chan)
153 return;
154
155 for (i = 0; i < NR_CPUS; i++) {
156 if (!chan->buf[i])
157 continue;
158 __relay_reset(chan->buf[i], 0);
159 }
160}
161
162/**
163 * relay_open_buf - create a new channel buffer in relayfs
164 *
165 * Internal - used by relay_open().
166 */
167static struct rchan_buf *relay_open_buf(struct rchan *chan,
168 const char *filename,
169 struct dentry *parent)
170{
171 struct rchan_buf *buf;
172 struct dentry *dentry;
173
174 /* Create file in fs */
175 dentry = relayfs_create_file(filename, parent, S_IRUSR, chan);
176 if (!dentry)
177 return NULL;
178
179 buf = RELAYFS_I(dentry->d_inode)->buf;
180 buf->dentry = dentry;
181 __relay_reset(buf, 1);
182
183 return buf;
184}
185
186/**
187 * relay_close_buf - close a channel buffer
188 * @buf: channel buffer
189 *
190 * Marks the buffer finalized and restores the default callbacks.
191 * The channel buffer and channel buffer data structure are then freed
192 * automatically when the last reference is given up.
193 */
194static inline void relay_close_buf(struct rchan_buf *buf)
195{
196 buf->finalized = 1;
197 buf->chan->cb = &default_channel_callbacks;
198 cancel_delayed_work(&buf->wake_readers);
199 flush_scheduled_work();
200 kref_put(&buf->kref, relay_remove_buf);
201}
202
203static inline void setup_callbacks(struct rchan *chan,
204 struct rchan_callbacks *cb)
205{
206 if (!cb) {
207 chan->cb = &default_channel_callbacks;
208 return;
209 }
210
211 if (!cb->subbuf_start)
212 cb->subbuf_start = subbuf_start_default_callback;
213 if (!cb->buf_mapped)
214 cb->buf_mapped = buf_mapped_default_callback;
215 if (!cb->buf_unmapped)
216 cb->buf_unmapped = buf_unmapped_default_callback;
217 chan->cb = cb;
218}
219
220/**
221 * relay_open - create a new relayfs channel
222 * @base_filename: base name of files to create
223 * @parent: dentry of parent directory, NULL for root directory
224 * @subbuf_size: size of sub-buffers
225 * @n_subbufs: number of sub-buffers
226 * @cb: client callback functions
227 *
228 * Returns channel pointer if successful, NULL otherwise.
229 *
230 * Creates a channel buffer for each cpu using the sizes and
231 * attributes specified. The created channel buffer files
232 * will be named base_filename0...base_filenameN-1. File
233 * permissions will be S_IRUSR.
234 */
235struct rchan *relay_open(const char *base_filename,
236 struct dentry *parent,
237 size_t subbuf_size,
238 size_t n_subbufs,
239 struct rchan_callbacks *cb)
240{
241 unsigned int i;
242 struct rchan *chan;
243 char *tmpname;
244
245 if (!base_filename)
246 return NULL;
247
248 if (!(subbuf_size && n_subbufs))
249 return NULL;
250
251 chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL);
252 if (!chan)
253 return NULL;
254
255 chan->version = RELAYFS_CHANNEL_VERSION;
256 chan->n_subbufs = n_subbufs;
257 chan->subbuf_size = subbuf_size;
258 chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
259 setup_callbacks(chan, cb);
260 kref_init(&chan->kref);
261
262 tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL);
263 if (!tmpname)
264 goto free_chan;
265
266 for_each_online_cpu(i) {
267 sprintf(tmpname, "%s%d", base_filename, i);
268 chan->buf[i] = relay_open_buf(chan, tmpname, parent);
269 chan->buf[i]->cpu = i;
270 if (!chan->buf[i])
271 goto free_bufs;
272 }
273
274 kfree(tmpname);
275 return chan;
276
277free_bufs:
278 for (i = 0; i < NR_CPUS; i++) {
279 if (!chan->buf[i])
280 break;
281 relay_close_buf(chan->buf[i]);
282 }
283 kfree(tmpname);
284
285free_chan:
286 kref_put(&chan->kref, relay_destroy_channel);
287 return NULL;
288}
289
290/**
291 * relay_switch_subbuf - switch to a new sub-buffer
292 * @buf: channel buffer
293 * @length: size of current event
294 *
295 * Returns either the length passed in or 0 if full.
296
297 * Performs sub-buffer-switch tasks such as invoking callbacks,
298 * updating padding counts, waking up readers, etc.
299 */
300size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
301{
302 void *old, *new;
303 size_t old_subbuf, new_subbuf;
304
305 if (unlikely(length > buf->chan->subbuf_size))
306 goto toobig;
307
308 if (buf->offset != buf->chan->subbuf_size + 1) {
309 buf->prev_padding = buf->chan->subbuf_size - buf->offset;
310 old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
311 buf->padding[old_subbuf] = buf->prev_padding;
312 buf->subbufs_produced++;
313 if (waitqueue_active(&buf->read_wait)) {
314 PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf);
315 schedule_delayed_work(&buf->wake_readers, 1);
316 }
317 }
318
319 old = buf->data;
320 new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
321 new = buf->start + new_subbuf * buf->chan->subbuf_size;
322 buf->offset = 0;
323 if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) {
324 buf->offset = buf->chan->subbuf_size + 1;
325 return 0;
326 }
327 buf->data = new;
328 buf->padding[new_subbuf] = 0;
329
330 if (unlikely(length + buf->offset > buf->chan->subbuf_size))
331 goto toobig;
332
333 return length;
334
335toobig:
336 printk(KERN_WARNING "relayfs: event too large (%Zd)\n", length);
337 WARN_ON(1);
338 return 0;
339}
340
341/**
342 * relay_subbufs_consumed - update the buffer's sub-buffers-consumed count
343 * @chan: the channel
344 * @cpu: the cpu associated with the channel buffer to update
345 * @subbufs_consumed: number of sub-buffers to add to current buf's count
346 *
347 * Adds to the channel buffer's consumed sub-buffer count.
348 * subbufs_consumed should be the number of sub-buffers newly consumed,
349 * not the total consumed.
350 *
351 * NOTE: kernel clients don't need to call this function if the channel
352 * mode is 'overwrite'.
353 */
354void relay_subbufs_consumed(struct rchan *chan,
355 unsigned int cpu,
356 size_t subbufs_consumed)
357{
358 struct rchan_buf *buf;
359
360 if (!chan)
361 return;
362
363 if (cpu >= NR_CPUS || !chan->buf[cpu])
364 return;
365
366 buf = chan->buf[cpu];
367 buf->subbufs_consumed += subbufs_consumed;
368 if (buf->subbufs_consumed > buf->subbufs_produced)
369 buf->subbufs_consumed = buf->subbufs_produced;
370}
371
372/**
373 * relay_destroy_channel - free the channel struct
374 *
375 * Should only be called from kref_put().
376 */
377void relay_destroy_channel(struct kref *kref)
378{
379 struct rchan *chan = container_of(kref, struct rchan, kref);
380 kfree(chan);
381}
382
383/**
384 * relay_close - close the channel
385 * @chan: the channel
386 *
387 * Closes all channel buffers and frees the channel.
388 */
389void relay_close(struct rchan *chan)
390{
391 unsigned int i;
392
393 if (!chan)
394 return;
395
396 for (i = 0; i < NR_CPUS; i++) {
397 if (!chan->buf[i])
398 continue;
399 relay_close_buf(chan->buf[i]);
400 }
401
402 kref_put(&chan->kref, relay_destroy_channel);
403}
404
405/**
406 * relay_flush - close the channel
407 * @chan: the channel
408 *
409 * Flushes all channel buffers i.e. forces buffer switch.
410 */
411void relay_flush(struct rchan *chan)
412{
413 unsigned int i;
414
415 if (!chan)
416 return;
417
418 for (i = 0; i < NR_CPUS; i++) {
419 if (!chan->buf[i])
420 continue;
421 relay_switch_subbuf(chan->buf[i], 0);
422 }
423}
424
425EXPORT_SYMBOL_GPL(relay_open);
426EXPORT_SYMBOL_GPL(relay_close);
427EXPORT_SYMBOL_GPL(relay_flush);
428EXPORT_SYMBOL_GPL(relay_reset);
429EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
430EXPORT_SYMBOL_GPL(relay_switch_subbuf);
431EXPORT_SYMBOL_GPL(relay_buf_full);
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h
new file mode 100644
index 000000000000..703503fa22b6
--- /dev/null
+++ b/fs/relayfs/relay.h
@@ -0,0 +1,12 @@
1#ifndef _RELAY_H
2#define _RELAY_H
3
4struct dentry *relayfs_create_file(const char *name,
5 struct dentry *parent,
6 int mode,
7 struct rchan *chan);
8extern int relayfs_remove(struct dentry *dentry);
9extern int relay_buf_empty(struct rchan_buf *buf);
10extern void relay_destroy_channel(struct kref *kref);
11
12#endif /* _RELAY_H */
diff --git a/fs/select.c b/fs/select.c
index b80e7eb0ac0d..f10a10317d54 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -22,6 +22,7 @@
22#include <linux/personality.h> /* for STICKY_TIMEOUTS */ 22#include <linux/personality.h> /* for STICKY_TIMEOUTS */
23#include <linux/file.h> 23#include <linux/file.h>
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/rcupdate.h>
25 26
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
27 28
@@ -132,11 +133,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
132 unsigned long *open_fds; 133 unsigned long *open_fds;
133 unsigned long set; 134 unsigned long set;
134 int max; 135 int max;
136 struct fdtable *fdt;
135 137
136 /* handle last in-complete long-word first */ 138 /* handle last in-complete long-word first */
137 set = ~(~0UL << (n & (__NFDBITS-1))); 139 set = ~(~0UL << (n & (__NFDBITS-1)));
138 n /= __NFDBITS; 140 n /= __NFDBITS;
139 open_fds = current->files->open_fds->fds_bits+n; 141 fdt = files_fdtable(current->files);
142 open_fds = fdt->open_fds->fds_bits+n;
140 max = 0; 143 max = 0;
141 if (set) { 144 if (set) {
142 set &= BITS(fds, n); 145 set &= BITS(fds, n);
@@ -183,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
183 int retval, i; 186 int retval, i;
184 long __timeout = *timeout; 187 long __timeout = *timeout;
185 188
186 spin_lock(&current->files->file_lock); 189 rcu_read_lock();
187 retval = max_select_fd(n, fds); 190 retval = max_select_fd(n, fds);
188 spin_unlock(&current->files->file_lock); 191 rcu_read_unlock();
189 192
190 if (retval < 0) 193 if (retval < 0)
191 return retval; 194 return retval;
@@ -299,6 +302,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
299 char *bits; 302 char *bits;
300 long timeout; 303 long timeout;
301 int ret, size, max_fdset; 304 int ret, size, max_fdset;
305 struct fdtable *fdt;
302 306
303 timeout = MAX_SCHEDULE_TIMEOUT; 307 timeout = MAX_SCHEDULE_TIMEOUT;
304 if (tvp) { 308 if (tvp) {
@@ -326,7 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
326 goto out_nofds; 330 goto out_nofds;
327 331
328 /* max_fdset can increase, so grab it once to avoid race */ 332 /* max_fdset can increase, so grab it once to avoid race */
329 max_fdset = current->files->max_fdset; 333 rcu_read_lock();
334 fdt = files_fdtable(current->files);
335 max_fdset = fdt->max_fdset;
336 rcu_read_unlock();
330 if (n > max_fdset) 337 if (n > max_fdset)
331 n = max_fdset; 338 n = max_fdset;
332 339
@@ -464,9 +471,15 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
464 unsigned int i; 471 unsigned int i;
465 struct poll_list *head; 472 struct poll_list *head;
466 struct poll_list *walk; 473 struct poll_list *walk;
474 struct fdtable *fdt;
475 int max_fdset;
467 476
468 /* Do a sanity check on nfds ... */ 477 /* Do a sanity check on nfds ... */
469 if (nfds > current->files->max_fdset && nfds > OPEN_MAX) 478 rcu_read_lock();
479 fdt = files_fdtable(current->files);
480 max_fdset = fdt->max_fdset;
481 rcu_read_unlock();
482 if (nfds > max_fdset && nfds > OPEN_MAX)
470 return -EINVAL; 483 return -EINVAL;
471 484
472 if (timeout) { 485 if (timeout) {
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 4765aaac9fd2..10b994428fef 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -331,6 +331,7 @@ static void
331smb_delete_inode(struct inode *ino) 331smb_delete_inode(struct inode *ino)
332{ 332{
333 DEBUG1("ino=%ld\n", ino->i_ino); 333 DEBUG1("ino=%ld\n", ino->i_ino);
334 truncate_inode_pages(&ino->i_data, 0);
334 lock_kernel(); 335 lock_kernel();
335 if (smb_close(ino)) 336 if (smb_close(ino))
336 PARANOIA("could not close inode %ld\n", ino->i_ino); 337 PARANOIA("could not close inode %ld\n", ino->i_ino);
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 220babe91efd..38ab558835c4 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -2397,8 +2397,7 @@ smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
2397 if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) { 2397 if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
2398 /* a damn Win95 bug - sometimes it clags if you 2398 /* a damn Win95 bug - sometimes it clags if you
2399 ask it too fast */ 2399 ask it too fast */
2400 current->state = TASK_INTERRUPTIBLE; 2400 schedule_timeout_interruptible(msecs_to_jiffies(200));
2401 schedule_timeout(HZ/5);
2402 continue; 2401 continue;
2403 } 2402 }
2404 2403
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 0530077d9dd8..fa33eceb0011 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -292,6 +292,7 @@ int sysv_sync_inode(struct inode * inode)
292 292
293static void sysv_delete_inode(struct inode *inode) 293static void sysv_delete_inode(struct inode *inode)
294{ 294{
295 truncate_inode_pages(&inode->i_data, 0);
295 inode->i_size = 0; 296 inode->i_size = 0;
296 sysv_truncate(inode); 297 sysv_truncate(inode);
297 lock_kernel(); 298 lock_kernel();
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 3d68de39fad6..b83890beaaac 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -87,6 +87,8 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
87 */ 87 */
88void udf_delete_inode(struct inode * inode) 88void udf_delete_inode(struct inode * inode)
89{ 89{
90 truncate_inode_pages(&inode->i_data, 0);
91
90 if (is_bad_inode(inode)) 92 if (is_bad_inode(inode))
91 goto no_delete; 93 goto no_delete;
92 94
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 997640c99c7d..faf1512173eb 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -114,8 +114,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
114 ubh_mark_buffer_dirty (USPI_UBH); 114 ubh_mark_buffer_dirty (USPI_UBH);
115 ubh_mark_buffer_dirty (UCPI_UBH); 115 ubh_mark_buffer_dirty (UCPI_UBH);
116 if (sb->s_flags & MS_SYNCHRONOUS) { 116 if (sb->s_flags & MS_SYNCHRONOUS) {
117 ubh_wait_on_buffer (UCPI_UBH); 117 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
118 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
119 ubh_wait_on_buffer (UCPI_UBH); 118 ubh_wait_on_buffer (UCPI_UBH);
120 } 119 }
121 sb->s_dirt = 1; 120 sb->s_dirt = 1;
@@ -200,8 +199,7 @@ do_more:
200 ubh_mark_buffer_dirty (USPI_UBH); 199 ubh_mark_buffer_dirty (USPI_UBH);
201 ubh_mark_buffer_dirty (UCPI_UBH); 200 ubh_mark_buffer_dirty (UCPI_UBH);
202 if (sb->s_flags & MS_SYNCHRONOUS) { 201 if (sb->s_flags & MS_SYNCHRONOUS) {
203 ubh_wait_on_buffer (UCPI_UBH); 202 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
204 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
205 ubh_wait_on_buffer (UCPI_UBH); 203 ubh_wait_on_buffer (UCPI_UBH);
206 } 204 }
207 205
@@ -459,8 +457,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
459 ubh_mark_buffer_dirty (USPI_UBH); 457 ubh_mark_buffer_dirty (USPI_UBH);
460 ubh_mark_buffer_dirty (UCPI_UBH); 458 ubh_mark_buffer_dirty (UCPI_UBH);
461 if (sb->s_flags & MS_SYNCHRONOUS) { 459 if (sb->s_flags & MS_SYNCHRONOUS) {
462 ubh_wait_on_buffer (UCPI_UBH); 460 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
463 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
464 ubh_wait_on_buffer (UCPI_UBH); 461 ubh_wait_on_buffer (UCPI_UBH);
465 } 462 }
466 sb->s_dirt = 1; 463 sb->s_dirt = 1;
@@ -585,8 +582,7 @@ succed:
585 ubh_mark_buffer_dirty (USPI_UBH); 582 ubh_mark_buffer_dirty (USPI_UBH);
586 ubh_mark_buffer_dirty (UCPI_UBH); 583 ubh_mark_buffer_dirty (UCPI_UBH);
587 if (sb->s_flags & MS_SYNCHRONOUS) { 584 if (sb->s_flags & MS_SYNCHRONOUS) {
588 ubh_wait_on_buffer (UCPI_UBH); 585 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
589 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
590 ubh_wait_on_buffer (UCPI_UBH); 586 ubh_wait_on_buffer (UCPI_UBH);
591 } 587 }
592 sb->s_dirt = 1; 588 sb->s_dirt = 1;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 61a6b1542fc5..0938945b9cbc 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -124,8 +124,7 @@ void ufs_free_inode (struct inode * inode)
124 ubh_mark_buffer_dirty (USPI_UBH); 124 ubh_mark_buffer_dirty (USPI_UBH);
125 ubh_mark_buffer_dirty (UCPI_UBH); 125 ubh_mark_buffer_dirty (UCPI_UBH);
126 if (sb->s_flags & MS_SYNCHRONOUS) { 126 if (sb->s_flags & MS_SYNCHRONOUS) {
127 ubh_wait_on_buffer (UCPI_UBH); 127 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi);
128 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **) &ucpi);
129 ubh_wait_on_buffer (UCPI_UBH); 128 ubh_wait_on_buffer (UCPI_UBH);
130 } 129 }
131 130
@@ -249,8 +248,7 @@ cg_found:
249 ubh_mark_buffer_dirty (USPI_UBH); 248 ubh_mark_buffer_dirty (USPI_UBH);
250 ubh_mark_buffer_dirty (UCPI_UBH); 249 ubh_mark_buffer_dirty (UCPI_UBH);
251 if (sb->s_flags & MS_SYNCHRONOUS) { 250 if (sb->s_flags & MS_SYNCHRONOUS) {
252 ubh_wait_on_buffer (UCPI_UBH); 251 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi);
253 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **) &ucpi);
254 ubh_wait_on_buffer (UCPI_UBH); 252 ubh_wait_on_buffer (UCPI_UBH);
255 } 253 }
256 sb->s_dirt = 1; 254 sb->s_dirt = 1;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 718627ca8b5c..55f4aa16e3fc 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -804,6 +804,7 @@ int ufs_sync_inode (struct inode *inode)
804 804
805void ufs_delete_inode (struct inode * inode) 805void ufs_delete_inode (struct inode * inode)
806{ 806{
807 truncate_inode_pages(&inode->i_data, 0);
807 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ 808 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
808 lock_kernel(); 809 lock_kernel();
809 mark_inode_dirty(inode); 810 mark_inode_dirty(inode);
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index e312bf8bad9f..61d2e35012a4 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -285,8 +285,7 @@ next:;
285 } 285 }
286 } 286 }
287 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { 287 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) {
288 ubh_wait_on_buffer (ind_ubh); 288 ubh_ll_rw_block (SWRITE, 1, &ind_ubh);
289 ubh_ll_rw_block (WRITE, 1, &ind_ubh);
290 ubh_wait_on_buffer (ind_ubh); 289 ubh_wait_on_buffer (ind_ubh);
291 } 290 }
292 ubh_brelse (ind_ubh); 291 ubh_brelse (ind_ubh);
@@ -353,8 +352,7 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
353 } 352 }
354 } 353 }
355 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { 354 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) {
356 ubh_wait_on_buffer (dind_bh); 355 ubh_ll_rw_block (SWRITE, 1, &dind_bh);
357 ubh_ll_rw_block (WRITE, 1, &dind_bh);
358 ubh_wait_on_buffer (dind_bh); 356 ubh_wait_on_buffer (dind_bh);
359 } 357 }
360 ubh_brelse (dind_bh); 358 ubh_brelse (dind_bh);
@@ -418,8 +416,7 @@ static int ufs_trunc_tindirect (struct inode * inode)
418 } 416 }
419 } 417 }
420 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { 418 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) {
421 ubh_wait_on_buffer (tind_bh); 419 ubh_ll_rw_block (SWRITE, 1, &tind_bh);
422 ubh_ll_rw_block (WRITE, 1, &tind_bh);
423 ubh_wait_on_buffer (tind_bh); 420 ubh_wait_on_buffer (tind_bh);
424 } 421 }
425 ubh_brelse (tind_bh); 422 ubh_brelse (tind_bh);
diff --git a/fs/umsdos/notes b/fs/umsdos/notes
deleted file mode 100644
index 3c47d1f4fc47..000000000000
--- a/fs/umsdos/notes
+++ /dev/null
@@ -1,17 +0,0 @@
1This file contain idea and things I don't want to forget
2
3Possible bug in fs/read_write.c
4Function sys_readdir()
5
6 There is a call the verify_area that does not take in account
7 the count parameter. I guess it should read
8
9 error = verify_area(VERIFY_WRITE, dirent, count*sizeof (*dirent));
10
11 instead of
12
13 error = verify_area(VERIFY_WRITE, dirent, sizeof (*dirent));
14
15 Of course, now , count is always 1
16
17
diff --git a/fs/xattr.c b/fs/xattr.c
index 6acd5c63da91..3f9c64bea151 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -51,20 +51,29 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
51 } 51 }
52 } 52 }
53 53
54 down(&d->d_inode->i_sem);
55 error = security_inode_setxattr(d, kname, kvalue, size, flags);
56 if (error)
57 goto out;
54 error = -EOPNOTSUPP; 58 error = -EOPNOTSUPP;
55 if (d->d_inode->i_op && d->d_inode->i_op->setxattr) { 59 if (d->d_inode->i_op && d->d_inode->i_op->setxattr) {
56 down(&d->d_inode->i_sem); 60 error = d->d_inode->i_op->setxattr(d, kname, kvalue,
57 error = security_inode_setxattr(d, kname, kvalue, size, flags); 61 size, flags);
58 if (error)
59 goto out;
60 error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags);
61 if (!error) { 62 if (!error) {
62 fsnotify_xattr(d); 63 fsnotify_xattr(d);
63 security_inode_post_setxattr(d, kname, kvalue, size, flags); 64 security_inode_post_setxattr(d, kname, kvalue,
65 size, flags);
64 } 66 }
65out: 67 } else if (!strncmp(kname, XATTR_SECURITY_PREFIX,
66 up(&d->d_inode->i_sem); 68 sizeof XATTR_SECURITY_PREFIX - 1)) {
69 const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
70 error = security_inode_setsecurity(d->d_inode, suffix, kvalue,
71 size, flags);
72 if (!error)
73 fsnotify_xattr(d);
67 } 74 }
75out:
76 up(&d->d_inode->i_sem);
68 if (kvalue) 77 if (kvalue)
69 kfree(kvalue); 78 kfree(kvalue);
70 return error; 79 return error;
@@ -139,20 +148,25 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
139 return -ENOMEM; 148 return -ENOMEM;
140 } 149 }
141 150
151 error = security_inode_getxattr(d, kname);
152 if (error)
153 goto out;
142 error = -EOPNOTSUPP; 154 error = -EOPNOTSUPP;
143 if (d->d_inode->i_op && d->d_inode->i_op->getxattr) { 155 if (d->d_inode->i_op && d->d_inode->i_op->getxattr)
144 error = security_inode_getxattr(d, kname);
145 if (error)
146 goto out;
147 error = d->d_inode->i_op->getxattr(d, kname, kvalue, size); 156 error = d->d_inode->i_op->getxattr(d, kname, kvalue, size);
148 if (error > 0) { 157 else if (!strncmp(kname, XATTR_SECURITY_PREFIX,
149 if (size && copy_to_user(value, kvalue, error)) 158 sizeof XATTR_SECURITY_PREFIX - 1)) {
150 error = -EFAULT; 159 const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
151 } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { 160 error = security_inode_getsecurity(d->d_inode, suffix, kvalue,
152 /* The file system tried to returned a value bigger 161 size);
153 than XATTR_SIZE_MAX bytes. Not possible. */ 162 }
154 error = -E2BIG; 163 if (error > 0) {
155 } 164 if (size && copy_to_user(value, kvalue, error))
165 error = -EFAULT;
166 } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
167 /* The file system tried to returned a value bigger
168 than XATTR_SIZE_MAX bytes. Not possible. */
169 error = -E2BIG;
156 } 170 }
157out: 171out:
158 if (kvalue) 172 if (kvalue)
@@ -221,20 +235,24 @@ listxattr(struct dentry *d, char __user *list, size_t size)
221 return -ENOMEM; 235 return -ENOMEM;
222 } 236 }
223 237
238 error = security_inode_listxattr(d);
239 if (error)
240 goto out;
224 error = -EOPNOTSUPP; 241 error = -EOPNOTSUPP;
225 if (d->d_inode->i_op && d->d_inode->i_op->listxattr) { 242 if (d->d_inode->i_op && d->d_inode->i_op->listxattr) {
226 error = security_inode_listxattr(d);
227 if (error)
228 goto out;
229 error = d->d_inode->i_op->listxattr(d, klist, size); 243 error = d->d_inode->i_op->listxattr(d, klist, size);
230 if (error > 0) { 244 } else {
231 if (size && copy_to_user(list, klist, error)) 245 error = security_inode_listsecurity(d->d_inode, klist, size);
232 error = -EFAULT; 246 if (size && error >= size)
233 } else if (error == -ERANGE && size >= XATTR_LIST_MAX) { 247 error = -ERANGE;
234 /* The file system tried to returned a list bigger 248 }
235 than XATTR_LIST_MAX bytes. Not possible. */ 249 if (error > 0) {
236 error = -E2BIG; 250 if (size && copy_to_user(list, klist, error))
237 } 251 error = -EFAULT;
252 } else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
253 /* The file system tried to returned a list bigger
254 than XATTR_LIST_MAX bytes. Not possible. */
255 error = -E2BIG;
238 } 256 }
239out: 257out:
240 if (klist) 258 if (klist)
@@ -307,6 +325,8 @@ removexattr(struct dentry *d, char __user *name)
307 down(&d->d_inode->i_sem); 325 down(&d->d_inode->i_sem);
308 error = d->d_inode->i_op->removexattr(d, kname); 326 error = d->d_inode->i_op->removexattr(d, kname);
309 up(&d->d_inode->i_sem); 327 up(&d->d_inode->i_sem);
328 if (!error)
329 fsnotify_xattr(d);
310 } 330 }
311out: 331out:
312 return error; 332 return error;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index c92306f0fdc5..8e8f32dabe53 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,5 +1,3 @@
1menu "XFS support"
2
3config XFS_FS 1config XFS_FS
4 tristate "XFS filesystem support" 2 tristate "XFS filesystem support"
5 select EXPORTFS if NFSD!=n 3 select EXPORTFS if NFSD!=n
@@ -22,27 +20,11 @@ config XFS_FS
22 20
23config XFS_EXPORT 21config XFS_EXPORT
24 bool 22 bool
25 default y if XFS_FS && EXPORTFS 23 depends on XFS_FS && EXPORTFS
26 24 default y
27config XFS_RT
28 bool "Realtime support (EXPERIMENTAL)"
29 depends on XFS_FS && EXPERIMENTAL
30 help
31 If you say Y here you will be able to mount and use XFS filesystems
32 which contain a realtime subvolume. The realtime subvolume is a
33 separate area of disk space where only file data is stored. The
34 realtime subvolume is designed to provide very deterministic
35 data rates suitable for media streaming applications.
36
37 See the xfs man page in section 5 for a bit more information.
38
39 This feature is unsupported at this time, is not yet fully
40 functional, and may cause serious problems.
41
42 If unsure, say N.
43 25
44config XFS_QUOTA 26config XFS_QUOTA
45 bool "Quota support" 27 tristate "XFS Quota support"
46 depends on XFS_FS 28 depends on XFS_FS
47 help 29 help
48 If you say Y here, you will be able to set limits for disk usage on 30 If you say Y here, you will be able to set limits for disk usage on
@@ -59,7 +41,7 @@ config XFS_QUOTA
59 they are completely independent subsystems. 41 they are completely independent subsystems.
60 42
61config XFS_SECURITY 43config XFS_SECURITY
62 bool "Security Label support" 44 bool "XFS Security Label support"
63 depends on XFS_FS 45 depends on XFS_FS
64 help 46 help
65 Security labels support alternative access control models 47 Security labels support alternative access control models
@@ -71,7 +53,7 @@ config XFS_SECURITY
71 extended attributes for inode security labels, say N. 53 extended attributes for inode security labels, say N.
72 54
73config XFS_POSIX_ACL 55config XFS_POSIX_ACL
74 bool "POSIX ACL support" 56 bool "XFS POSIX ACL support"
75 depends on XFS_FS 57 depends on XFS_FS
76 help 58 help
77 POSIX Access Control Lists (ACLs) support permissions for users and 59 POSIX Access Control Lists (ACLs) support permissions for users and
@@ -82,4 +64,19 @@ config XFS_POSIX_ACL
82 64
83 If you don't know what Access Control Lists are, say N. 65 If you don't know what Access Control Lists are, say N.
84 66
85endmenu 67config XFS_RT
68 bool "XFS Realtime support (EXPERIMENTAL)"
69 depends on XFS_FS && EXPERIMENTAL
70 help
71 If you say Y here you will be able to mount and use XFS filesystems
72 which contain a realtime subvolume. The realtime subvolume is a
73 separate area of disk space where only file data is stored. The
74 realtime subvolume is designed to provide very deterministic
75 data rates suitable for media streaming applications.
76
77 See the xfs man page in section 5 for a bit more information.
78
79 This feature is unsupported at this time, is not yet fully
80 functional, and may cause serious problems.
81
82 If unsure, say N.
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index d3ff78354638..49e3e7e5e3dc 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -1,150 +1 @@
1# include $(TOPDIR)/fs/xfs/Makefile-linux-$(VERSION).$(PATCHLEVEL)
2# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms of version 2 of the GNU General Public License as
6# published by the Free Software Foundation.
7#
8# This program is distributed in the hope that it would be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11#
12# Further, this software is distributed without any warranty that it is
13# free of the rightful claim of any third person regarding infringement
14# or the like. Any license provided herein, whether implied or
15# otherwise, applies only to this software file. Patent licenses, if
16# any, provided herein do not apply to combinations of this program with
17# other software, or any other product whatsoever.
18#
19# You should have received a copy of the GNU General Public License along
20# with this program; if not, write the Free Software Foundation, Inc., 59
21# Temple Place - Suite 330, Boston MA 02111-1307, USA.
22#
23# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24# Mountain View, CA 94043, or:
25#
26# http://www.sgi.com
27#
28# For further information regarding this notice, see:
29#
30# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31#
32
33EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
34
35ifeq ($(CONFIG_XFS_DEBUG),y)
36 EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
37 EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
38endif
39ifeq ($(CONFIG_XFS_TRACE),y)
40 EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
41 EXTRA_CFLAGS += -DXFS_ATTR_TRACE
42 EXTRA_CFLAGS += -DXFS_BLI_TRACE
43 EXTRA_CFLAGS += -DXFS_BMAP_TRACE
44 EXTRA_CFLAGS += -DXFS_BMBT_TRACE
45 EXTRA_CFLAGS += -DXFS_DIR_TRACE
46 EXTRA_CFLAGS += -DXFS_DIR2_TRACE
47 EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
48 EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
49 EXTRA_CFLAGS += -DXFS_LOG_TRACE
50 EXTRA_CFLAGS += -DXFS_RW_TRACE
51 EXTRA_CFLAGS += -DPAGEBUF_TRACE
52 EXTRA_CFLAGS += -DXFS_VNODE_TRACE
53endif
54
55obj-$(CONFIG_XFS_FS) += xfs.o
56
57xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
58 xfs_dquot.o \
59 xfs_dquot_item.o \
60 xfs_trans_dquot.o \
61 xfs_qm_syscalls.o \
62 xfs_qm_bhv.o \
63 xfs_qm.o)
64ifeq ($(CONFIG_XFS_QUOTA),y)
65xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
66endif
67
68xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
69xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
70xfs-$(CONFIG_PROC_FS) += linux-2.6/xfs_stats.o
71xfs-$(CONFIG_SYSCTL) += linux-2.6/xfs_sysctl.o
72xfs-$(CONFIG_COMPAT) += linux-2.6/xfs_ioctl32.o
73xfs-$(CONFIG_XFS_EXPORT) += linux-2.6/xfs_export.o
74
75
76xfs-y += xfs_alloc.o \
77 xfs_alloc_btree.o \
78 xfs_attr.o \
79 xfs_attr_leaf.o \
80 xfs_behavior.o \
81 xfs_bit.o \
82 xfs_bmap.o \
83 xfs_bmap_btree.o \
84 xfs_btree.o \
85 xfs_buf_item.o \
86 xfs_da_btree.o \
87 xfs_dir.o \
88 xfs_dir2.o \
89 xfs_dir2_block.o \
90 xfs_dir2_data.o \
91 xfs_dir2_leaf.o \
92 xfs_dir2_node.o \
93 xfs_dir2_sf.o \
94 xfs_dir_leaf.o \
95 xfs_error.o \
96 xfs_extfree_item.o \
97 xfs_fsops.o \
98 xfs_ialloc.o \
99 xfs_ialloc_btree.o \
100 xfs_iget.o \
101 xfs_inode.o \
102 xfs_inode_item.o \
103 xfs_iocore.o \
104 xfs_iomap.o \
105 xfs_itable.o \
106 xfs_dfrag.o \
107 xfs_log.o \
108 xfs_log_recover.o \
109 xfs_macros.o \
110 xfs_mount.o \
111 xfs_rename.o \
112 xfs_trans.o \
113 xfs_trans_ail.o \
114 xfs_trans_buf.o \
115 xfs_trans_extfree.o \
116 xfs_trans_inode.o \
117 xfs_trans_item.o \
118 xfs_utils.o \
119 xfs_vfsops.o \
120 xfs_vnodeops.o \
121 xfs_rw.o \
122 xfs_dmops.o \
123 xfs_qmops.o
124
125xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o
126
127# Objects in linux-2.6/
128xfs-y += $(addprefix linux-2.6/, \
129 kmem.o \
130 xfs_aops.o \
131 xfs_buf.o \
132 xfs_file.o \
133 xfs_fs_subr.o \
134 xfs_globals.o \
135 xfs_ioctl.o \
136 xfs_iops.o \
137 xfs_lrw.o \
138 xfs_super.o \
139 xfs_vfs.o \
140 xfs_vnode.o)
141
142# Objects in support/
143xfs-y += $(addprefix support/, \
144 debug.o \
145 move.o \
146 qsort.o \
147 uuid.o)
148
149xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o
150
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
new file mode 100644
index 000000000000..d8c87fa21ad1
--- /dev/null
+++ b/fs/xfs/Makefile-linux-2.6
@@ -0,0 +1,152 @@
1#
2# Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms of version 2 of the GNU General Public License as
6# published by the Free Software Foundation.
7#
8# This program is distributed in the hope that it would be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11#
12# Further, this software is distributed without any warranty that it is
13# free of the rightful claim of any third person regarding infringement
14# or the like. Any license provided herein, whether implied or
15# otherwise, applies only to this software file. Patent licenses, if
16# any, provided herein do not apply to combinations of this program with
17# other software, or any other product whatsoever.
18#
19# You should have received a copy of the GNU General Public License along
20# with this program; if not, write the Free Software Foundation, Inc., 59
21# Temple Place - Suite 330, Boston MA 02111-1307, USA.
22#
23# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24# Mountain View, CA 94043, or:
25#
26# http://www.sgi.com
27#
28# For further information regarding this notice, see:
29#
30# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31#
32
33EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
34
35XFS_LINUX := linux-2.6
36
37ifeq ($(CONFIG_XFS_DEBUG),y)
38 EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
39 EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
40endif
41ifeq ($(CONFIG_XFS_TRACE),y)
42 EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
43 EXTRA_CFLAGS += -DXFS_ATTR_TRACE
44 EXTRA_CFLAGS += -DXFS_BLI_TRACE
45 EXTRA_CFLAGS += -DXFS_BMAP_TRACE
46 EXTRA_CFLAGS += -DXFS_BMBT_TRACE
47 EXTRA_CFLAGS += -DXFS_DIR_TRACE
48 EXTRA_CFLAGS += -DXFS_DIR2_TRACE
49 EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
50 EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
51 EXTRA_CFLAGS += -DXFS_LOG_TRACE
52 EXTRA_CFLAGS += -DXFS_RW_TRACE
53 EXTRA_CFLAGS += -DPAGEBUF_TRACE
54 EXTRA_CFLAGS += -DXFS_VNODE_TRACE
55endif
56
57obj-$(CONFIG_XFS_FS) += xfs.o
58
59xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
60 xfs_dquot.o \
61 xfs_dquot_item.o \
62 xfs_trans_dquot.o \
63 xfs_qm_syscalls.o \
64 xfs_qm_bhv.o \
65 xfs_qm.o)
66
67ifeq ($(CONFIG_XFS_QUOTA),y)
68xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
69endif
70
71xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
72xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
73xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o
74xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o
75xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o
76xfs-$(CONFIG_XFS_EXPORT) += $(XFS_LINUX)/xfs_export.o
77
78
79xfs-y += xfs_alloc.o \
80 xfs_alloc_btree.o \
81 xfs_attr.o \
82 xfs_attr_leaf.o \
83 xfs_behavior.o \
84 xfs_bit.o \
85 xfs_bmap.o \
86 xfs_bmap_btree.o \
87 xfs_btree.o \
88 xfs_buf_item.o \
89 xfs_da_btree.o \
90 xfs_dir.o \
91 xfs_dir2.o \
92 xfs_dir2_block.o \
93 xfs_dir2_data.o \
94 xfs_dir2_leaf.o \
95 xfs_dir2_node.o \
96 xfs_dir2_sf.o \
97 xfs_dir_leaf.o \
98 xfs_error.o \
99 xfs_extfree_item.o \
100 xfs_fsops.o \
101 xfs_ialloc.o \
102 xfs_ialloc_btree.o \
103 xfs_iget.o \
104 xfs_inode.o \
105 xfs_inode_item.o \
106 xfs_iocore.o \
107 xfs_iomap.o \
108 xfs_itable.o \
109 xfs_dfrag.o \
110 xfs_log.o \
111 xfs_log_recover.o \
112 xfs_macros.o \
113 xfs_mount.o \
114 xfs_rename.o \
115 xfs_trans.o \
116 xfs_trans_ail.o \
117 xfs_trans_buf.o \
118 xfs_trans_extfree.o \
119 xfs_trans_inode.o \
120 xfs_trans_item.o \
121 xfs_utils.o \
122 xfs_vfsops.o \
123 xfs_vnodeops.o \
124 xfs_rw.o \
125 xfs_dmops.o \
126 xfs_qmops.o
127
128xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o
129
130# Objects in linux/
131xfs-y += $(addprefix $(XFS_LINUX)/, \
132 kmem.o \
133 xfs_aops.o \
134 xfs_buf.o \
135 xfs_file.o \
136 xfs_fs_subr.o \
137 xfs_globals.o \
138 xfs_ioctl.o \
139 xfs_iops.o \
140 xfs_lrw.o \
141 xfs_super.o \
142 xfs_vfs.o \
143 xfs_vnode.o)
144
145# Objects in support/
146xfs-y += $(addprefix support/, \
147 debug.o \
148 move.o \
149 uuid.o)
150
151xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o
152
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 364ea8c386b1..4b184559f231 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -45,11 +45,11 @@
45 45
46 46
47void * 47void *
48kmem_alloc(size_t size, int flags) 48kmem_alloc(size_t size, unsigned int __nocast flags)
49{ 49{
50 int retries = 0; 50 int retries = 0;
51 int lflags = kmem_flags_convert(flags); 51 unsigned int lflags = kmem_flags_convert(flags);
52 void *ptr; 52 void *ptr;
53 53
54 do { 54 do {
55 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) 55 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
@@ -67,7 +67,7 @@ kmem_alloc(size_t size, int flags)
67} 67}
68 68
69void * 69void *
70kmem_zalloc(size_t size, int flags) 70kmem_zalloc(size_t size, unsigned int __nocast flags)
71{ 71{
72 void *ptr; 72 void *ptr;
73 73
@@ -89,7 +89,8 @@ kmem_free(void *ptr, size_t size)
89} 89}
90 90
91void * 91void *
92kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) 92kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
93 unsigned int __nocast flags)
93{ 94{
94 void *new; 95 void *new;
95 96
@@ -104,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
104} 105}
105 106
106void * 107void *
107kmem_zone_alloc(kmem_zone_t *zone, int flags) 108kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
108{ 109{
109 int retries = 0; 110 int retries = 0;
110 int lflags = kmem_flags_convert(flags); 111 unsigned int lflags = kmem_flags_convert(flags);
111 void *ptr; 112 void *ptr;
112 113
113 do { 114 do {
114 ptr = kmem_cache_alloc(zone, lflags); 115 ptr = kmem_cache_alloc(zone, lflags);
@@ -123,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, int flags)
123} 124}
124 125
125void * 126void *
126kmem_zone_zalloc(kmem_zone_t *zone, int flags) 127kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
127{ 128{
128 void *ptr; 129 void *ptr;
129 130
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 1397b669b059..109fcf27e256 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -39,10 +39,10 @@
39/* 39/*
40 * memory management routines 40 * memory management routines
41 */ 41 */
42#define KM_SLEEP 0x0001 42#define KM_SLEEP 0x0001u
43#define KM_NOSLEEP 0x0002 43#define KM_NOSLEEP 0x0002u
44#define KM_NOFS 0x0004 44#define KM_NOFS 0x0004u
45#define KM_MAYFAIL 0x0008 45#define KM_MAYFAIL 0x0008u
46 46
47#define kmem_zone kmem_cache_s 47#define kmem_zone kmem_cache_s
48#define kmem_zone_t kmem_cache_t 48#define kmem_zone_t kmem_cache_t
@@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t;
81 *(NSTATEP) = *(OSTATEP); \ 81 *(NSTATEP) = *(OSTATEP); \
82} while (0) 82} while (0)
83 83
84static __inline unsigned int kmem_flags_convert(int flags) 84static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags)
85{ 85{
86 int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ 86 unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */
87 87
88#ifdef DEBUG 88#ifdef DEBUG
89 if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { 89 if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
@@ -125,12 +125,13 @@ kmem_zone_destroy(kmem_zone_t *zone)
125 BUG(); 125 BUG();
126} 126}
127 127
128extern void *kmem_zone_zalloc(kmem_zone_t *, int); 128extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
129extern void *kmem_zone_alloc(kmem_zone_t *, int); 129extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
130 130
131extern void *kmem_alloc(size_t, int); 131extern void *kmem_alloc(size_t, unsigned int __nocast);
132extern void *kmem_realloc(void *, size_t, size_t, int); 132extern void *kmem_realloc(void *, size_t, size_t,
133extern void *kmem_zalloc(size_t, int); 133 unsigned int __nocast);
134extern void *kmem_zalloc(size_t, unsigned int __nocast);
134extern void kmem_free(void *, size_t); 135extern void kmem_free(void *, size_t);
135 136
136typedef struct shrinker *kmem_shaker_t; 137typedef struct shrinker *kmem_shaker_t;
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h
index bcf60a0b8df0..0039504069a5 100644
--- a/fs/xfs/linux-2.6/spin.h
+++ b/fs/xfs/linux-2.6/spin.h
@@ -45,6 +45,9 @@
45typedef spinlock_t lock_t; 45typedef spinlock_t lock_t;
46 46
47#define SPLDECL(s) unsigned long s 47#define SPLDECL(s) unsigned long s
48#ifndef DEFINE_SPINLOCK
49#define DEFINE_SPINLOCK(s) spinlock_t s = SPIN_LOCK_UNLOCKED
50#endif
48 51
49#define spinlock_init(lock, name) spin_lock_init(lock) 52#define spinlock_init(lock, name) spin_lock_init(lock)
50#define spinlock_destroy(lock) 53#define spinlock_destroy(lock)
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
index 6c6fd0faa8e1..b0d2873ab274 100644
--- a/fs/xfs/linux-2.6/time.h
+++ b/fs/xfs/linux-2.6/time.h
@@ -39,8 +39,7 @@ typedef struct timespec timespec_t;
39 39
40static inline void delay(long ticks) 40static inline void delay(long ticks)
41{ 41{
42 set_current_state(TASK_UNINTERRUPTIBLE); 42 schedule_timeout_uninterruptible(ticks);
43 schedule_timeout(ticks);
44} 43}
45 44
46static inline void nanotime(struct timespec *tvp) 45static inline void nanotime(struct timespec *tvp)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a3a4b5aaf5d9..c6c077978fe3 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -104,66 +104,114 @@ xfs_page_trace(
104#define xfs_page_trace(tag, inode, page, mask) 104#define xfs_page_trace(tag, inode, page, mask)
105#endif 105#endif
106 106
107void 107/*
108linvfs_unwritten_done( 108 * Schedule IO completion handling on a xfsdatad if this was
109 struct buffer_head *bh, 109 * the final hold on this ioend.
110 int uptodate) 110 */
111STATIC void
112xfs_finish_ioend(
113 xfs_ioend_t *ioend)
111{ 114{
112 xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; 115 if (atomic_dec_and_test(&ioend->io_remaining))
116 queue_work(xfsdatad_workqueue, &ioend->io_work);
117}
113 118
114 ASSERT(buffer_unwritten(bh)); 119STATIC void
115 bh->b_end_io = NULL; 120xfs_destroy_ioend(
116 clear_buffer_unwritten(bh); 121 xfs_ioend_t *ioend)
117 if (!uptodate) 122{
118 pagebuf_ioerror(pb, EIO); 123 vn_iowake(ioend->io_vnode);
119 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { 124 mempool_free(ioend, xfs_ioend_pool);
120 pagebuf_iodone(pb, 1, 1);
121 }
122 end_buffer_async_write(bh, uptodate);
123} 125}
124 126
125/* 127/*
126 * Issue transactions to convert a buffer range from unwritten 128 * Issue transactions to convert a buffer range from unwritten
127 * to written extents (buffered IO). 129 * to written extents.
128 */ 130 */
129STATIC void 131STATIC void
130linvfs_unwritten_convert( 132xfs_end_bio_unwritten(
131 xfs_buf_t *bp) 133 void *data)
132{ 134{
133 vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); 135 xfs_ioend_t *ioend = data;
134 int error; 136 vnode_t *vp = ioend->io_vnode;
137 xfs_off_t offset = ioend->io_offset;
138 size_t size = ioend->io_size;
139 struct buffer_head *bh, *next;
140 int error;
141
142 if (ioend->io_uptodate)
143 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
144
145 /* ioend->io_buffer_head is only non-NULL for buffered I/O */
146 for (bh = ioend->io_buffer_head; bh; bh = next) {
147 next = bh->b_private;
148
149 bh->b_end_io = NULL;
150 clear_buffer_unwritten(bh);
151 end_buffer_async_write(bh, ioend->io_uptodate);
152 }
135 153
136 BUG_ON(atomic_read(&bp->pb_hold) < 1); 154 xfs_destroy_ioend(ioend);
137 VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
138 BMAPI_UNWRITTEN, NULL, NULL, error);
139 XFS_BUF_SET_FSPRIVATE(bp, NULL);
140 XFS_BUF_CLR_IODONE_FUNC(bp);
141 XFS_BUF_UNDATAIO(bp);
142 iput(LINVFS_GET_IP(vp));
143 pagebuf_iodone(bp, 0, 0);
144} 155}
145 156
146/* 157/*
147 * Issue transactions to convert a buffer range from unwritten 158 * Allocate and initialise an IO completion structure.
148 * to written extents (direct IO). 159 * We need to track unwritten extent write completion here initially.
160 * We'll need to extend this for updating the ondisk inode size later
161 * (vs. incore size).
149 */ 162 */
150STATIC void 163STATIC xfs_ioend_t *
151linvfs_unwritten_convert_direct( 164xfs_alloc_ioend(
152 struct kiocb *iocb, 165 struct inode *inode)
153 loff_t offset,
154 ssize_t size,
155 void *private)
156{ 166{
157 struct inode *inode = iocb->ki_filp->f_dentry->d_inode; 167 xfs_ioend_t *ioend;
158 ASSERT(!private || inode == (struct inode *)private);
159 168
160 /* private indicates an unwritten extent lay beneath this IO */ 169 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
161 if (private && size > 0) {
162 vnode_t *vp = LINVFS_GET_VP(inode);
163 int error;
164 170
165 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); 171 /*
166 } 172 * Set the count to 1 initially, which will prevent an I/O
173 * completion callback from happening before we have started
174 * all the I/O from calling the completion routine too early.
175 */
176 atomic_set(&ioend->io_remaining, 1);
177 ioend->io_uptodate = 1; /* cleared if any I/O fails */
178 ioend->io_vnode = LINVFS_GET_VP(inode);
179 ioend->io_buffer_head = NULL;
180 atomic_inc(&ioend->io_vnode->v_iocount);
181 ioend->io_offset = 0;
182 ioend->io_size = 0;
183
184 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
185
186 return ioend;
187}
188
189void
190linvfs_unwritten_done(
191 struct buffer_head *bh,
192 int uptodate)
193{
194 xfs_ioend_t *ioend = bh->b_private;
195 static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;
196 unsigned long flags;
197
198 ASSERT(buffer_unwritten(bh));
199 bh->b_end_io = NULL;
200
201 if (!uptodate)
202 ioend->io_uptodate = 0;
203
204 /*
205 * Deep magic here. We reuse b_private in the buffer_heads to build
206 * a chain for completing the I/O from user context after we've issued
207 * a transaction to convert the unwritten extent.
208 */
209 spin_lock_irqsave(&unwritten_done_lock, flags);
210 bh->b_private = ioend->io_buffer_head;
211 ioend->io_buffer_head = bh;
212 spin_unlock_irqrestore(&unwritten_done_lock, flags);
213
214 xfs_finish_ioend(ioend);
167} 215}
168 216
169STATIC int 217STATIC int
@@ -255,7 +303,7 @@ xfs_probe_unwritten_page(
255 struct address_space *mapping, 303 struct address_space *mapping,
256 pgoff_t index, 304 pgoff_t index,
257 xfs_iomap_t *iomapp, 305 xfs_iomap_t *iomapp,
258 xfs_buf_t *pb, 306 xfs_ioend_t *ioend,
259 unsigned long max_offset, 307 unsigned long max_offset,
260 unsigned long *fsbs, 308 unsigned long *fsbs,
261 unsigned int bbits) 309 unsigned int bbits)
@@ -283,7 +331,7 @@ xfs_probe_unwritten_page(
283 break; 331 break;
284 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); 332 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
285 set_buffer_unwritten_io(bh); 333 set_buffer_unwritten_io(bh);
286 bh->b_private = pb; 334 bh->b_private = ioend;
287 p_offset += bh->b_size; 335 p_offset += bh->b_size;
288 (*fsbs)++; 336 (*fsbs)++;
289 } while ((bh = bh->b_this_page) != head); 337 } while ((bh = bh->b_this_page) != head);
@@ -434,34 +482,15 @@ xfs_map_unwritten(
434{ 482{
435 struct buffer_head *bh = curr; 483 struct buffer_head *bh = curr;
436 xfs_iomap_t *tmp; 484 xfs_iomap_t *tmp;
437 xfs_buf_t *pb; 485 xfs_ioend_t *ioend;
438 loff_t offset, size; 486 loff_t offset;
439 unsigned long nblocks = 0; 487 unsigned long nblocks = 0;
440 488
441 offset = start_page->index; 489 offset = start_page->index;
442 offset <<= PAGE_CACHE_SHIFT; 490 offset <<= PAGE_CACHE_SHIFT;
443 offset += p_offset; 491 offset += p_offset;
444 492
445 /* get an "empty" pagebuf to manage IO completion 493 ioend = xfs_alloc_ioend(inode);
446 * Proper values will be set before returning */
447 pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
448 if (!pb)
449 return -EAGAIN;
450
451 /* Take a reference to the inode to prevent it from
452 * being reclaimed while we have outstanding unwritten
453 * extent IO on it.
454 */
455 if ((igrab(inode)) != inode) {
456 pagebuf_free(pb);
457 return -EAGAIN;
458 }
459
460 /* Set the count to 1 initially, this will stop an I/O
461 * completion callout which happens before we have started
462 * all the I/O from calling pagebuf_iodone too early.
463 */
464 atomic_set(&pb->pb_io_remaining, 1);
465 494
466 /* First map forwards in the page consecutive buffers 495 /* First map forwards in the page consecutive buffers
467 * covering this unwritten extent 496 * covering this unwritten extent
@@ -474,12 +503,12 @@ xfs_map_unwritten(
474 break; 503 break;
475 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); 504 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
476 set_buffer_unwritten_io(bh); 505 set_buffer_unwritten_io(bh);
477 bh->b_private = pb; 506 bh->b_private = ioend;
478 p_offset += bh->b_size; 507 p_offset += bh->b_size;
479 nblocks++; 508 nblocks++;
480 } while ((bh = bh->b_this_page) != head); 509 } while ((bh = bh->b_this_page) != head);
481 510
482 atomic_add(nblocks, &pb->pb_io_remaining); 511 atomic_add(nblocks, &ioend->io_remaining);
483 512
484 /* If we reached the end of the page, map forwards in any 513 /* If we reached the end of the page, map forwards in any
485 * following pages which are also covered by this extent. 514 * following pages which are also covered by this extent.
@@ -496,13 +525,13 @@ xfs_map_unwritten(
496 tloff = min(tlast, tloff); 525 tloff = min(tlast, tloff);
497 for (tindex = start_page->index + 1; tindex < tloff; tindex++) { 526 for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
498 page = xfs_probe_unwritten_page(mapping, 527 page = xfs_probe_unwritten_page(mapping,
499 tindex, iomapp, pb, 528 tindex, iomapp, ioend,
500 PAGE_CACHE_SIZE, &bs, bbits); 529 PAGE_CACHE_SIZE, &bs, bbits);
501 if (!page) 530 if (!page)
502 break; 531 break;
503 nblocks += bs; 532 nblocks += bs;
504 atomic_add(bs, &pb->pb_io_remaining); 533 atomic_add(bs, &ioend->io_remaining);
505 xfs_convert_page(inode, page, iomapp, wbc, pb, 534 xfs_convert_page(inode, page, iomapp, wbc, ioend,
506 startio, all_bh); 535 startio, all_bh);
507 /* stop if converting the next page might add 536 /* stop if converting the next page might add
508 * enough blocks that the corresponding byte 537 * enough blocks that the corresponding byte
@@ -514,12 +543,12 @@ xfs_map_unwritten(
514 if (tindex == tlast && 543 if (tindex == tlast &&
515 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { 544 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
516 page = xfs_probe_unwritten_page(mapping, 545 page = xfs_probe_unwritten_page(mapping,
517 tindex, iomapp, pb, 546 tindex, iomapp, ioend,
518 pg_offset, &bs, bbits); 547 pg_offset, &bs, bbits);
519 if (page) { 548 if (page) {
520 nblocks += bs; 549 nblocks += bs;
521 atomic_add(bs, &pb->pb_io_remaining); 550 atomic_add(bs, &ioend->io_remaining);
522 xfs_convert_page(inode, page, iomapp, wbc, pb, 551 xfs_convert_page(inode, page, iomapp, wbc, ioend,
523 startio, all_bh); 552 startio, all_bh);
524 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) 553 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
525 goto enough; 554 goto enough;
@@ -528,21 +557,9 @@ xfs_map_unwritten(
528 } 557 }
529 558
530enough: 559enough:
531 size = nblocks; /* NB: using 64bit number here */ 560 ioend->io_size = (xfs_off_t)nblocks << block_bits;
532 size <<= block_bits; /* convert fsb's to byte range */ 561 ioend->io_offset = offset;
533 562 xfs_finish_ioend(ioend);
534 XFS_BUF_DATAIO(pb);
535 XFS_BUF_ASYNC(pb);
536 XFS_BUF_SET_SIZE(pb, size);
537 XFS_BUF_SET_COUNT(pb, size);
538 XFS_BUF_SET_OFFSET(pb, offset);
539 XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
540 XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
541
542 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
543 pagebuf_iodone(pb, 1, 1);
544 }
545
546 return 0; 563 return 0;
547} 564}
548 565
@@ -787,7 +804,7 @@ xfs_page_state_convert(
787 continue; 804 continue;
788 if (!iomp) { 805 if (!iomp) {
789 err = xfs_map_blocks(inode, offset, len, &iomap, 806 err = xfs_map_blocks(inode, offset, len, &iomap,
790 BMAPI_READ|BMAPI_IGNSTATE); 807 BMAPI_WRITE|BMAPI_IGNSTATE);
791 if (err) { 808 if (err) {
792 goto error; 809 goto error;
793 } 810 }
@@ -1028,6 +1045,44 @@ linvfs_get_blocks_direct(
1028 create, 1, BMAPI_WRITE|BMAPI_DIRECT); 1045 create, 1, BMAPI_WRITE|BMAPI_DIRECT);
1029} 1046}
1030 1047
1048STATIC void
1049linvfs_end_io_direct(
1050 struct kiocb *iocb,
1051 loff_t offset,
1052 ssize_t size,
1053 void *private)
1054{
1055 xfs_ioend_t *ioend = iocb->private;
1056
1057 /*
1058 * Non-NULL private data means we need to issue a transaction to
1059 * convert a range from unwritten to written extents. This needs
1060 * to happen from process contect but aio+dio I/O completion
1061 * happens from irq context so we need to defer it to a workqueue.
1062 * This is not nessecary for synchronous direct I/O, but we do
1063 * it anyway to keep the code uniform and simpler.
1064 *
1065 * The core direct I/O code might be changed to always call the
1066 * completion handler in the future, in which case all this can
1067 * go away.
1068 */
1069 if (private && size > 0) {
1070 ioend->io_offset = offset;
1071 ioend->io_size = size;
1072 xfs_finish_ioend(ioend);
1073 } else {
1074 ASSERT(size >= 0);
1075 xfs_destroy_ioend(ioend);
1076 }
1077
1078 /*
1079 * blockdev_direct_IO can return an error even afer the I/O
1080 * completion handler was called. Thus we need to protect
1081 * against double-freeing.
1082 */
1083 iocb->private = NULL;
1084}
1085
1031STATIC ssize_t 1086STATIC ssize_t
1032linvfs_direct_IO( 1087linvfs_direct_IO(
1033 int rw, 1088 int rw,
@@ -1042,16 +1097,23 @@ linvfs_direct_IO(
1042 xfs_iomap_t iomap; 1097 xfs_iomap_t iomap;
1043 int maps = 1; 1098 int maps = 1;
1044 int error; 1099 int error;
1100 ssize_t ret;
1045 1101
1046 VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); 1102 VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
1047 if (error) 1103 if (error)
1048 return -error; 1104 return -error;
1049 1105
1050 return blockdev_direct_IO_own_locking(rw, iocb, inode, 1106 iocb->private = xfs_alloc_ioend(inode);
1107
1108 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
1051 iomap.iomap_target->pbr_bdev, 1109 iomap.iomap_target->pbr_bdev,
1052 iov, offset, nr_segs, 1110 iov, offset, nr_segs,
1053 linvfs_get_blocks_direct, 1111 linvfs_get_blocks_direct,
1054 linvfs_unwritten_convert_direct); 1112 linvfs_end_io_direct);
1113
1114 if (unlikely(ret <= 0 && iocb->private))
1115 xfs_destroy_ioend(iocb->private);
1116 return ret;
1055} 1117}
1056 1118
1057 1119
@@ -1202,6 +1264,16 @@ out_unlock:
1202 return error; 1264 return error;
1203} 1265}
1204 1266
1267STATIC int
1268linvfs_invalidate_page(
1269 struct page *page,
1270 unsigned long offset)
1271{
1272 xfs_page_trace(XFS_INVALIDPAGE_ENTER,
1273 page->mapping->host, page, offset);
1274 return block_invalidatepage(page, offset);
1275}
1276
1205/* 1277/*
1206 * Called to move a page into cleanable state - and from there 1278 * Called to move a page into cleanable state - and from there
1207 * to be released. Possibly the page is already clean. We always 1279 * to be released. Possibly the page is already clean. We always
@@ -1279,6 +1351,7 @@ struct address_space_operations linvfs_aops = {
1279 .writepage = linvfs_writepage, 1351 .writepage = linvfs_writepage,
1280 .sync_page = block_sync_page, 1352 .sync_page = block_sync_page,
1281 .releasepage = linvfs_release_page, 1353 .releasepage = linvfs_release_page,
1354 .invalidatepage = linvfs_invalidate_page,
1282 .prepare_write = linvfs_prepare_write, 1355 .prepare_write = linvfs_prepare_write,
1283 .commit_write = generic_commit_write, 1356 .commit_write = generic_commit_write,
1284 .bmap = linvfs_bmap, 1357 .bmap = linvfs_bmap,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
new file mode 100644
index 000000000000..2fa62974a04d
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -0,0 +1,50 @@
1/*
2 * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_AOPS_H__
33#define __XFS_AOPS_H__
34
35extern struct workqueue_struct *xfsdatad_workqueue;
36extern mempool_t *xfs_ioend_pool;
37
38typedef void (*xfs_ioend_func_t)(void *);
39
40typedef struct xfs_ioend {
41 unsigned int io_uptodate; /* I/O status register */
42 atomic_t io_remaining; /* hold count */
43 struct vnode *io_vnode; /* file being written to */
44 struct buffer_head *io_buffer_head;/* buffer linked list head */
45 size_t io_size; /* size of the extent */
46 xfs_off_t io_offset; /* offset in the file */
47 struct work_struct io_work; /* xfsdatad work queue */
48} xfs_ioend_t;
49
50#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index df0cba239dd5..e82cf72ac599 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -54,6 +54,7 @@
54#include <linux/percpu.h> 54#include <linux/percpu.h>
55#include <linux/blkdev.h> 55#include <linux/blkdev.h>
56#include <linux/hash.h> 56#include <linux/hash.h>
57#include <linux/kthread.h>
57 58
58#include "xfs_linux.h" 59#include "xfs_linux.h"
59 60
@@ -67,7 +68,7 @@ STATIC int xfsbufd_wakeup(int, unsigned int);
67STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); 68STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
68 69
69STATIC struct workqueue_struct *xfslogd_workqueue; 70STATIC struct workqueue_struct *xfslogd_workqueue;
70STATIC struct workqueue_struct *xfsdatad_workqueue; 71struct workqueue_struct *xfsdatad_workqueue;
71 72
72/* 73/*
73 * Pagebuf debugging 74 * Pagebuf debugging
@@ -590,8 +591,10 @@ found:
590 PB_SET_OWNER(pb); 591 PB_SET_OWNER(pb);
591 } 592 }
592 593
593 if (pb->pb_flags & PBF_STALE) 594 if (pb->pb_flags & PBF_STALE) {
595 ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0);
594 pb->pb_flags &= PBF_MAPPED; 596 pb->pb_flags &= PBF_MAPPED;
597 }
595 PB_TRACE(pb, "got_lock", 0); 598 PB_TRACE(pb, "got_lock", 0);
596 XFS_STATS_INC(pb_get_locked); 599 XFS_STATS_INC(pb_get_locked);
597 return (pb); 600 return (pb);
@@ -700,25 +703,6 @@ xfs_buf_read_flags(
700} 703}
701 704
702/* 705/*
703 * Create a skeletal pagebuf (no pages associated with it).
704 */
705xfs_buf_t *
706pagebuf_lookup(
707 xfs_buftarg_t *target,
708 loff_t ioff,
709 size_t isize,
710 page_buf_flags_t flags)
711{
712 xfs_buf_t *pb;
713
714 pb = pagebuf_allocate(flags);
715 if (pb) {
716 _pagebuf_initialize(pb, target, ioff, isize, flags);
717 }
718 return pb;
719}
720
721/*
722 * If we are not low on memory then do the readahead in a deadlock 706 * If we are not low on memory then do the readahead in a deadlock
723 * safe manner. 707 * safe manner.
724 */ 708 */
@@ -913,22 +897,23 @@ pagebuf_rele(
913 do_free = 0; 897 do_free = 0;
914 } 898 }
915 899
916 if (pb->pb_flags & PBF_DELWRI) { 900 if (pb->pb_flags & PBF_FS_MANAGED) {
917 pb->pb_flags |= PBF_ASYNC;
918 atomic_inc(&pb->pb_hold);
919 pagebuf_delwri_queue(pb, 0);
920 do_free = 0;
921 } else if (pb->pb_flags & PBF_FS_MANAGED) {
922 do_free = 0; 901 do_free = 0;
923 } 902 }
924 903
925 if (do_free) { 904 if (do_free) {
905 ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0);
926 list_del_init(&pb->pb_hash_list); 906 list_del_init(&pb->pb_hash_list);
927 spin_unlock(&hash->bh_lock); 907 spin_unlock(&hash->bh_lock);
928 pagebuf_free(pb); 908 pagebuf_free(pb);
929 } else { 909 } else {
930 spin_unlock(&hash->bh_lock); 910 spin_unlock(&hash->bh_lock);
931 } 911 }
912 } else {
913 /*
914 * Catch reference count leaks
915 */
916 ASSERT(atomic_read(&pb->pb_hold) >= 0);
932 } 917 }
933} 918}
934 919
@@ -1006,13 +991,24 @@ pagebuf_lock(
1006 * pagebuf_unlock 991 * pagebuf_unlock
1007 * 992 *
1008 * pagebuf_unlock releases the lock on the buffer object created by 993 * pagebuf_unlock releases the lock on the buffer object created by
1009 * pagebuf_lock or pagebuf_cond_lock (not any 994 * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
1010 * pinning of underlying pages created by pagebuf_pin). 995 * created by pagebuf_pin).
996 *
997 * If the buffer is marked delwri but is not queued, do so before we
998 * unlock the buffer as we need to set flags correctly. We also need to
999 * take a reference for the delwri queue because the unlocker is going to
1000 * drop their's and they don't know we just queued it.
1011 */ 1001 */
1012void 1002void
1013pagebuf_unlock( /* unlock buffer */ 1003pagebuf_unlock( /* unlock buffer */
1014 xfs_buf_t *pb) /* buffer to unlock */ 1004 xfs_buf_t *pb) /* buffer to unlock */
1015{ 1005{
1006 if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) {
1007 atomic_inc(&pb->pb_hold);
1008 pb->pb_flags |= PBF_ASYNC;
1009 pagebuf_delwri_queue(pb, 0);
1010 }
1011
1016 PB_CLEAR_OWNER(pb); 1012 PB_CLEAR_OWNER(pb);
1017 up(&pb->pb_sema); 1013 up(&pb->pb_sema);
1018 PB_TRACE(pb, "unlock", 0); 1014 PB_TRACE(pb, "unlock", 0);
@@ -1249,8 +1245,8 @@ bio_end_io_pagebuf(
1249 int error) 1245 int error)
1250{ 1246{
1251 xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; 1247 xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private;
1252 unsigned int i, blocksize = pb->pb_target->pbr_bsize; 1248 unsigned int blocksize = pb->pb_target->pbr_bsize;
1253 struct bio_vec *bvec = bio->bi_io_vec; 1249 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1254 1250
1255 if (bio->bi_size) 1251 if (bio->bi_size)
1256 return 1; 1252 return 1;
@@ -1258,10 +1254,12 @@ bio_end_io_pagebuf(
1258 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1254 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1259 pb->pb_error = EIO; 1255 pb->pb_error = EIO;
1260 1256
1261 for (i = 0; i < bio->bi_vcnt; i++, bvec++) { 1257 do {
1262 struct page *page = bvec->bv_page; 1258 struct page *page = bvec->bv_page;
1263 1259
1264 if (pb->pb_error) { 1260 if (unlikely(pb->pb_error)) {
1261 if (pb->pb_flags & PBF_READ)
1262 ClearPageUptodate(page);
1265 SetPageError(page); 1263 SetPageError(page);
1266 } else if (blocksize == PAGE_CACHE_SIZE) { 1264 } else if (blocksize == PAGE_CACHE_SIZE) {
1267 SetPageUptodate(page); 1265 SetPageUptodate(page);
@@ -1270,10 +1268,13 @@ bio_end_io_pagebuf(
1270 set_page_region(page, bvec->bv_offset, bvec->bv_len); 1268 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1271 } 1269 }
1272 1270
1271 if (--bvec >= bio->bi_io_vec)
1272 prefetchw(&bvec->bv_page->flags);
1273
1273 if (_pagebuf_iolocked(pb)) { 1274 if (_pagebuf_iolocked(pb)) {
1274 unlock_page(page); 1275 unlock_page(page);
1275 } 1276 }
1276 } 1277 } while (bvec >= bio->bi_io_vec);
1277 1278
1278 _pagebuf_iodone(pb, 1); 1279 _pagebuf_iodone(pb, 1);
1279 bio_put(bio); 1280 bio_put(bio);
@@ -1511,6 +1512,11 @@ again:
1511 ASSERT(btp == bp->pb_target); 1512 ASSERT(btp == bp->pb_target);
1512 if (!(bp->pb_flags & PBF_FS_MANAGED)) { 1513 if (!(bp->pb_flags & PBF_FS_MANAGED)) {
1513 spin_unlock(&hash->bh_lock); 1514 spin_unlock(&hash->bh_lock);
1515 /*
1516 * Catch superblock reference count leaks
1517 * immediately
1518 */
1519 BUG_ON(bp->pb_bn == 0);
1514 delay(100); 1520 delay(100);
1515 goto again; 1521 goto again;
1516 } 1522 }
@@ -1686,17 +1692,20 @@ pagebuf_delwri_queue(
1686 int unlock) 1692 int unlock)
1687{ 1693{
1688 PB_TRACE(pb, "delwri_q", (long)unlock); 1694 PB_TRACE(pb, "delwri_q", (long)unlock);
1689 ASSERT(pb->pb_flags & PBF_DELWRI); 1695 ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==
1696 (PBF_DELWRI|PBF_ASYNC));
1690 1697
1691 spin_lock(&pbd_delwrite_lock); 1698 spin_lock(&pbd_delwrite_lock);
1692 /* If already in the queue, dequeue and place at tail */ 1699 /* If already in the queue, dequeue and place at tail */
1693 if (!list_empty(&pb->pb_list)) { 1700 if (!list_empty(&pb->pb_list)) {
1701 ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
1694 if (unlock) { 1702 if (unlock) {
1695 atomic_dec(&pb->pb_hold); 1703 atomic_dec(&pb->pb_hold);
1696 } 1704 }
1697 list_del(&pb->pb_list); 1705 list_del(&pb->pb_list);
1698 } 1706 }
1699 1707
1708 pb->pb_flags |= _PBF_DELWRI_Q;
1700 list_add_tail(&pb->pb_list, &pbd_delwrite_queue); 1709 list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
1701 pb->pb_queuetime = jiffies; 1710 pb->pb_queuetime = jiffies;
1702 spin_unlock(&pbd_delwrite_lock); 1711 spin_unlock(&pbd_delwrite_lock);
@@ -1713,10 +1722,11 @@ pagebuf_delwri_dequeue(
1713 1722
1714 spin_lock(&pbd_delwrite_lock); 1723 spin_lock(&pbd_delwrite_lock);
1715 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { 1724 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
1725 ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
1716 list_del_init(&pb->pb_list); 1726 list_del_init(&pb->pb_list);
1717 dequeued = 1; 1727 dequeued = 1;
1718 } 1728 }
1719 pb->pb_flags &= ~PBF_DELWRI; 1729 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
1720 spin_unlock(&pbd_delwrite_lock); 1730 spin_unlock(&pbd_delwrite_lock);
1721 1731
1722 if (dequeued) 1732 if (dequeued)
@@ -1733,9 +1743,7 @@ pagebuf_runall_queues(
1733} 1743}
1734 1744
1735/* Defines for pagebuf daemon */ 1745/* Defines for pagebuf daemon */
1736STATIC DECLARE_COMPLETION(xfsbufd_done);
1737STATIC struct task_struct *xfsbufd_task; 1746STATIC struct task_struct *xfsbufd_task;
1738STATIC int xfsbufd_active;
1739STATIC int xfsbufd_force_flush; 1747STATIC int xfsbufd_force_flush;
1740STATIC int xfsbufd_force_sleep; 1748STATIC int xfsbufd_force_sleep;
1741 1749
@@ -1761,14 +1769,8 @@ xfsbufd(
1761 xfs_buftarg_t *target; 1769 xfs_buftarg_t *target;
1762 xfs_buf_t *pb, *n; 1770 xfs_buf_t *pb, *n;
1763 1771
1764 /* Set up the thread */
1765 daemonize("xfsbufd");
1766 current->flags |= PF_MEMALLOC; 1772 current->flags |= PF_MEMALLOC;
1767 1773
1768 xfsbufd_task = current;
1769 xfsbufd_active = 1;
1770 barrier();
1771
1772 INIT_LIST_HEAD(&tmp); 1774 INIT_LIST_HEAD(&tmp);
1773 do { 1775 do {
1774 if (unlikely(freezing(current))) { 1776 if (unlikely(freezing(current))) {
@@ -1778,10 +1780,10 @@ xfsbufd(
1778 xfsbufd_force_sleep = 0; 1780 xfsbufd_force_sleep = 0;
1779 } 1781 }
1780 1782
1781 set_current_state(TASK_INTERRUPTIBLE); 1783 schedule_timeout_interruptible
1782 schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100); 1784 (xfs_buf_timer_centisecs * msecs_to_jiffies(10));
1783 1785
1784 age = (xfs_buf_age_centisecs * HZ) / 100; 1786 age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1785 spin_lock(&pbd_delwrite_lock); 1787 spin_lock(&pbd_delwrite_lock);
1786 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1788 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
1787 PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); 1789 PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
@@ -1795,7 +1797,7 @@ xfsbufd(
1795 break; 1797 break;
1796 } 1798 }
1797 1799
1798 pb->pb_flags &= ~PBF_DELWRI; 1800 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
1799 pb->pb_flags |= PBF_WRITE; 1801 pb->pb_flags |= PBF_WRITE;
1800 list_move(&pb->pb_list, &tmp); 1802 list_move(&pb->pb_list, &tmp);
1801 } 1803 }
@@ -1816,9 +1818,9 @@ xfsbufd(
1816 purge_addresses(); 1818 purge_addresses();
1817 1819
1818 xfsbufd_force_flush = 0; 1820 xfsbufd_force_flush = 0;
1819 } while (xfsbufd_active); 1821 } while (!kthread_should_stop());
1820 1822
1821 complete_and_exit(&xfsbufd_done, 0); 1823 return 0;
1822} 1824}
1823 1825
1824/* 1826/*
@@ -1845,15 +1847,13 @@ xfs_flush_buftarg(
1845 if (pb->pb_target != target) 1847 if (pb->pb_target != target)
1846 continue; 1848 continue;
1847 1849
1848 ASSERT(pb->pb_flags & PBF_DELWRI); 1850 ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
1849 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); 1851 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
1850 if (pagebuf_ispin(pb)) { 1852 if (pagebuf_ispin(pb)) {
1851 pincount++; 1853 pincount++;
1852 continue; 1854 continue;
1853 } 1855 }
1854 1856
1855 pb->pb_flags &= ~PBF_DELWRI;
1856 pb->pb_flags |= PBF_WRITE;
1857 list_move(&pb->pb_list, &tmp); 1857 list_move(&pb->pb_list, &tmp);
1858 } 1858 }
1859 spin_unlock(&pbd_delwrite_lock); 1859 spin_unlock(&pbd_delwrite_lock);
@@ -1862,12 +1862,14 @@ xfs_flush_buftarg(
1862 * Dropped the delayed write list lock, now walk the temporary list 1862 * Dropped the delayed write list lock, now walk the temporary list
1863 */ 1863 */
1864 list_for_each_entry_safe(pb, n, &tmp, pb_list) { 1864 list_for_each_entry_safe(pb, n, &tmp, pb_list) {
1865 pagebuf_lock(pb);
1866 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
1867 pb->pb_flags |= PBF_WRITE;
1865 if (wait) 1868 if (wait)
1866 pb->pb_flags &= ~PBF_ASYNC; 1869 pb->pb_flags &= ~PBF_ASYNC;
1867 else 1870 else
1868 list_del_init(&pb->pb_list); 1871 list_del_init(&pb->pb_list);
1869 1872
1870 pagebuf_lock(pb);
1871 pagebuf_iostrategy(pb); 1873 pagebuf_iostrategy(pb);
1872 } 1874 }
1873 1875
@@ -1901,9 +1903,11 @@ xfs_buf_daemons_start(void)
1901 if (!xfsdatad_workqueue) 1903 if (!xfsdatad_workqueue)
1902 goto out_destroy_xfslogd_workqueue; 1904 goto out_destroy_xfslogd_workqueue;
1903 1905
1904 error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES); 1906 xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");
1905 if (error < 0) 1907 if (IS_ERR(xfsbufd_task)) {
1908 error = PTR_ERR(xfsbufd_task);
1906 goto out_destroy_xfsdatad_workqueue; 1909 goto out_destroy_xfsdatad_workqueue;
1910 }
1907 return 0; 1911 return 0;
1908 1912
1909 out_destroy_xfsdatad_workqueue: 1913 out_destroy_xfsdatad_workqueue:
@@ -1920,10 +1924,7 @@ xfs_buf_daemons_start(void)
1920STATIC void 1924STATIC void
1921xfs_buf_daemons_stop(void) 1925xfs_buf_daemons_stop(void)
1922{ 1926{
1923 xfsbufd_active = 0; 1927 kthread_stop(xfsbufd_task);
1924 barrier();
1925 wait_for_completion(&xfsbufd_done);
1926
1927 destroy_workqueue(xfslogd_workqueue); 1928 destroy_workqueue(xfslogd_workqueue);
1928 destroy_workqueue(xfsdatad_workqueue); 1929 destroy_workqueue(xfsdatad_workqueue);
1929} 1930}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 3f8f69a66aea..67c19f799232 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -89,6 +89,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */
89 _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 89 _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
90 _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ 90 _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
91 _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 91 _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
92 _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
92} page_buf_flags_t; 93} page_buf_flags_t;
93 94
94#define PBF_UPDATE (PBF_READ | PBF_WRITE) 95#define PBF_UPDATE (PBF_READ | PBF_WRITE)
@@ -206,13 +207,6 @@ extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */
206#define xfs_buf_read(target, blkno, len, flags) \ 207#define xfs_buf_read(target, blkno, len, flags) \
207 xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) 208 xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
208 209
209extern xfs_buf_t *pagebuf_lookup(
210 xfs_buftarg_t *,
211 loff_t, /* starting offset of range */
212 size_t, /* length of range */
213 page_buf_flags_t); /* PBF_READ, PBF_WRITE, */
214 /* PBF_FORCEIO, */
215
216extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ 210extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */
217 /* no memory or disk address */ 211 /* no memory or disk address */
218 size_t len, 212 size_t len,
@@ -344,8 +338,6 @@ extern void pagebuf_trace(
344 338
345 339
346 340
347
348
349/* These are just for xfs_syncsub... it sets an internal variable 341/* These are just for xfs_syncsub... it sets an internal variable
350 * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t 342 * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
351 */ 343 */
@@ -452,7 +444,7 @@ extern void pagebuf_trace(
452 444
453#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) 445#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr)
454 446
455extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) 447static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
456{ 448{
457 if (bp->pb_flags & PBF_MAPPED) 449 if (bp->pb_flags & PBF_MAPPED)
458 return XFS_BUF_PTR(bp) + offset; 450 return XFS_BUF_PTR(bp) + offset;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f1ce4323f56e..3881622bcf08 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -311,6 +311,31 @@ linvfs_fsync(
311 311
312#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) 312#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
313 313
314#ifdef CONFIG_XFS_DMAPI
315
316STATIC struct page *
317linvfs_filemap_nopage(
318 struct vm_area_struct *area,
319 unsigned long address,
320 int *type)
321{
322 struct inode *inode = area->vm_file->f_dentry->d_inode;
323 vnode_t *vp = LINVFS_GET_VP(inode);
324 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
325 int error;
326
327 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
328
329 error = XFS_SEND_MMAP(mp, area, 0);
330 if (error)
331 return NULL;
332
333 return filemap_nopage(area, address, type);
334}
335
336#endif /* CONFIG_XFS_DMAPI */
337
338
314STATIC int 339STATIC int
315linvfs_readdir( 340linvfs_readdir(
316 struct file *filp, 341 struct file *filp,
@@ -390,14 +415,6 @@ done:
390 return -error; 415 return -error;
391} 416}
392 417
393#ifdef CONFIG_XFS_DMAPI
394STATIC void
395linvfs_mmap_close(
396 struct vm_area_struct *vma)
397{
398 xfs_dm_mm_put(vma);
399}
400#endif /* CONFIG_XFS_DMAPI */
401 418
402STATIC int 419STATIC int
403linvfs_file_mmap( 420linvfs_file_mmap(
@@ -411,16 +428,11 @@ linvfs_file_mmap(
411 428
412 vma->vm_ops = &linvfs_file_vm_ops; 429 vma->vm_ops = &linvfs_file_vm_ops;
413 430
414 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
415 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
416
417 error = -XFS_SEND_MMAP(mp, vma, 0);
418 if (error)
419 return error;
420#ifdef CONFIG_XFS_DMAPI 431#ifdef CONFIG_XFS_DMAPI
432 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
421 vma->vm_ops = &linvfs_dmapi_file_vm_ops; 433 vma->vm_ops = &linvfs_dmapi_file_vm_ops;
422#endif
423 } 434 }
435#endif /* CONFIG_XFS_DMAPI */
424 436
425 VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); 437 VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
426 if (!error) 438 if (!error)
@@ -474,6 +486,7 @@ linvfs_ioctl_invis(
474 return error; 486 return error;
475} 487}
476 488
489#ifdef CONFIG_XFS_DMAPI
477#ifdef HAVE_VMOP_MPROTECT 490#ifdef HAVE_VMOP_MPROTECT
478STATIC int 491STATIC int
479linvfs_mprotect( 492linvfs_mprotect(
@@ -494,6 +507,7 @@ linvfs_mprotect(
494 return error; 507 return error;
495} 508}
496#endif /* HAVE_VMOP_MPROTECT */ 509#endif /* HAVE_VMOP_MPROTECT */
510#endif /* CONFIG_XFS_DMAPI */
497 511
498#ifdef HAVE_FOP_OPEN_EXEC 512#ifdef HAVE_FOP_OPEN_EXEC
499/* If the user is attempting to execute a file that is offline then 513/* If the user is attempting to execute a file that is offline then
@@ -528,49 +542,10 @@ open_exec_out:
528} 542}
529#endif /* HAVE_FOP_OPEN_EXEC */ 543#endif /* HAVE_FOP_OPEN_EXEC */
530 544
531/*
532 * Temporary workaround to the AIO direct IO write problem.
533 * This code can go and we can revert to do_sync_write once
534 * the writepage(s) rework is merged.
535 */
536STATIC ssize_t
537linvfs_write(
538 struct file *filp,
539 const char __user *buf,
540 size_t len,
541 loff_t *ppos)
542{
543 struct kiocb kiocb;
544 ssize_t ret;
545
546 init_sync_kiocb(&kiocb, filp);
547 kiocb.ki_pos = *ppos;
548 ret = __linvfs_write(&kiocb, buf, 0, len, kiocb.ki_pos);
549 *ppos = kiocb.ki_pos;
550 return ret;
551}
552STATIC ssize_t
553linvfs_write_invis(
554 struct file *filp,
555 const char __user *buf,
556 size_t len,
557 loff_t *ppos)
558{
559 struct kiocb kiocb;
560 ssize_t ret;
561
562 init_sync_kiocb(&kiocb, filp);
563 kiocb.ki_pos = *ppos;
564 ret = __linvfs_write(&kiocb, buf, IO_INVIS, len, kiocb.ki_pos);
565 *ppos = kiocb.ki_pos;
566 return ret;
567}
568
569
570struct file_operations linvfs_file_operations = { 545struct file_operations linvfs_file_operations = {
571 .llseek = generic_file_llseek, 546 .llseek = generic_file_llseek,
572 .read = do_sync_read, 547 .read = do_sync_read,
573 .write = linvfs_write, 548 .write = do_sync_write,
574 .readv = linvfs_readv, 549 .readv = linvfs_readv,
575 .writev = linvfs_writev, 550 .writev = linvfs_writev,
576 .aio_read = linvfs_aio_read, 551 .aio_read = linvfs_aio_read,
@@ -592,7 +567,7 @@ struct file_operations linvfs_file_operations = {
592struct file_operations linvfs_invis_file_operations = { 567struct file_operations linvfs_invis_file_operations = {
593 .llseek = generic_file_llseek, 568 .llseek = generic_file_llseek,
594 .read = do_sync_read, 569 .read = do_sync_read,
595 .write = linvfs_write_invis, 570 .write = do_sync_write,
596 .readv = linvfs_readv_invis, 571 .readv = linvfs_readv_invis,
597 .writev = linvfs_writev_invis, 572 .writev = linvfs_writev_invis,
598 .aio_read = linvfs_aio_read_invis, 573 .aio_read = linvfs_aio_read_invis,
@@ -626,8 +601,7 @@ static struct vm_operations_struct linvfs_file_vm_ops = {
626 601
627#ifdef CONFIG_XFS_DMAPI 602#ifdef CONFIG_XFS_DMAPI
628static struct vm_operations_struct linvfs_dmapi_file_vm_ops = { 603static struct vm_operations_struct linvfs_dmapi_file_vm_ops = {
629 .close = linvfs_mmap_close, 604 .nopage = linvfs_filemap_nopage,
630 .nopage = filemap_nopage,
631 .populate = filemap_populate, 605 .populate = filemap_populate,
632#ifdef HAVE_VMOP_MPROTECT 606#ifdef HAVE_VMOP_MPROTECT
633 .mprotect = linvfs_mprotect, 607 .mprotect = linvfs_mprotect,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 05a447e51cc0..6a3326bcd8d0 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -141,13 +141,19 @@ xfs_find_handle(
141 return -XFS_ERROR(EINVAL); 141 return -XFS_ERROR(EINVAL);
142 } 142 }
143 143
144 /* we need the vnode */ 144 switch (inode->i_mode & S_IFMT) {
145 vp = LINVFS_GET_VP(inode); 145 case S_IFREG:
146 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { 146 case S_IFDIR:
147 case S_IFLNK:
148 break;
149 default:
147 iput(inode); 150 iput(inode);
148 return -XFS_ERROR(EBADF); 151 return -XFS_ERROR(EBADF);
149 } 152 }
150 153
154 /* we need the vnode */
155 vp = LINVFS_GET_VP(inode);
156
151 /* now we can grab the fsid */ 157 /* now we can grab the fsid */
152 memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); 158 memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
153 hsize = sizeof(xfs_fsid_t); 159 hsize = sizeof(xfs_fsid_t);
@@ -386,7 +392,7 @@ xfs_readlink_by_handle(
386 return -error; 392 return -error;
387 393
388 /* Restrict this handle operation to symlinks only. */ 394 /* Restrict this handle operation to symlinks only. */
389 if (vp->v_type != VLNK) { 395 if (!S_ISLNK(inode->i_mode)) {
390 VN_RELE(vp); 396 VN_RELE(vp);
391 return -XFS_ERROR(EINVAL); 397 return -XFS_ERROR(EINVAL);
392 } 398 }
@@ -982,10 +988,10 @@ xfs_ioc_space(
982 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) 988 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
983 return -XFS_ERROR(EPERM); 989 return -XFS_ERROR(EPERM);
984 990
985 if (!(filp->f_flags & FMODE_WRITE)) 991 if (!(filp->f_mode & FMODE_WRITE))
986 return -XFS_ERROR(EBADF); 992 return -XFS_ERROR(EBADF);
987 993
988 if (vp->v_type != VREG) 994 if (!VN_ISREG(vp))
989 return -XFS_ERROR(EINVAL); 995 return -XFS_ERROR(EINVAL);
990 996
991 if (copy_from_user(&bf, arg, sizeof(bf))) 997 if (copy_from_user(&bf, arg, sizeof(bf)))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 0f8f1384eb36..4636b7f86f1f 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -47,8 +47,52 @@
47#include "xfs_vnode.h" 47#include "xfs_vnode.h"
48#include "xfs_dfrag.h" 48#include "xfs_dfrag.h"
49 49
50#define _NATIVE_IOC(cmd, type) \
51 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
52
50#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 53#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
51#define BROKEN_X86_ALIGNMENT 54#define BROKEN_X86_ALIGNMENT
55/* on ia32 l_start is on a 32-bit boundary */
56typedef struct xfs_flock64_32 {
57 __s16 l_type;
58 __s16 l_whence;
59 __s64 l_start __attribute__((packed));
60 /* len == 0 means until end of file */
61 __s64 l_len __attribute__((packed));
62 __s32 l_sysid;
63 __u32 l_pid;
64 __s32 l_pad[4]; /* reserve area */
65} xfs_flock64_32_t;
66
67#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32)
68#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32)
69#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32)
70#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32)
71#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32)
72#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32)
73#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32)
74#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32)
75
76/* just account for different alignment */
77STATIC unsigned long
78xfs_ioctl32_flock(
79 unsigned long arg)
80{
81 xfs_flock64_32_t __user *p32 = (void __user *)arg;
82 xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p));
83
84 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
85 copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) ||
86 copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) ||
87 copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) ||
88 copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) ||
89 copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) ||
90 copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
91 return -EFAULT;
92
93 return (unsigned long)p;
94}
95
52#else 96#else
53 97
54typedef struct xfs_fsop_bulkreq32 { 98typedef struct xfs_fsop_bulkreq32 {
@@ -103,7 +147,6 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
103/* not handled 147/* not handled
104 case XFS_IOC_FD_TO_HANDLE: 148 case XFS_IOC_FD_TO_HANDLE:
105 case XFS_IOC_PATH_TO_HANDLE: 149 case XFS_IOC_PATH_TO_HANDLE:
106 case XFS_IOC_PATH_TO_HANDLE:
107 case XFS_IOC_PATH_TO_FSHANDLE: 150 case XFS_IOC_PATH_TO_FSHANDLE:
108 case XFS_IOC_OPEN_BY_HANDLE: 151 case XFS_IOC_OPEN_BY_HANDLE:
109 case XFS_IOC_FSSETDM_BY_HANDLE: 152 case XFS_IOC_FSSETDM_BY_HANDLE:
@@ -124,8 +167,21 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
124 case XFS_IOC_ERROR_CLEARALL: 167 case XFS_IOC_ERROR_CLEARALL:
125 break; 168 break;
126 169
127#ifndef BROKEN_X86_ALIGNMENT 170#ifdef BROKEN_X86_ALIGNMENT
128 /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */ 171 /* xfs_flock_t has wrong u32 vs u64 alignment */
172 case XFS_IOC_ALLOCSP_32:
173 case XFS_IOC_FREESP_32:
174 case XFS_IOC_ALLOCSP64_32:
175 case XFS_IOC_FREESP64_32:
176 case XFS_IOC_RESVSP_32:
177 case XFS_IOC_UNRESVSP_32:
178 case XFS_IOC_RESVSP64_32:
179 case XFS_IOC_UNRESVSP64_32:
180 arg = xfs_ioctl32_flock(arg);
181 cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
182 break;
183
184#else /* These are handled fine if no alignment issues */
129 case XFS_IOC_ALLOCSP: 185 case XFS_IOC_ALLOCSP:
130 case XFS_IOC_FREESP: 186 case XFS_IOC_FREESP:
131 case XFS_IOC_RESVSP: 187 case XFS_IOC_RESVSP:
@@ -134,6 +190,9 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
134 case XFS_IOC_FREESP64: 190 case XFS_IOC_FREESP64:
135 case XFS_IOC_RESVSP64: 191 case XFS_IOC_RESVSP64:
136 case XFS_IOC_UNRESVSP64: 192 case XFS_IOC_UNRESVSP64:
193 break;
194
195 /* xfs_bstat_t still has wrong u32 vs u64 alignment */
137 case XFS_IOC_SWAPEXT: 196 case XFS_IOC_SWAPEXT:
138 break; 197 break;
139 198
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index f252605514eb..77708a8c9f87 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -140,7 +140,6 @@ linvfs_mknod(
140 140
141 memset(&va, 0, sizeof(va)); 141 memset(&va, 0, sizeof(va));
142 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; 142 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
143 va.va_type = IFTOVT(mode);
144 va.va_mode = mode; 143 va.va_mode = mode;
145 144
146 switch (mode & S_IFMT) { 145 switch (mode & S_IFMT) {
@@ -308,14 +307,13 @@ linvfs_symlink(
308 cvp = NULL; 307 cvp = NULL;
309 308
310 memset(&va, 0, sizeof(va)); 309 memset(&va, 0, sizeof(va));
311 va.va_type = VLNK; 310 va.va_mode = S_IFLNK |
312 va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; 311 (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
313 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; 312 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
314 313
315 error = 0; 314 error = 0;
316 VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); 315 VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
317 if (!error && cvp) { 316 if (!error && cvp) {
318 ASSERT(cvp->v_type == VLNK);
319 ip = LINVFS_GET_IP(cvp); 317 ip = LINVFS_GET_IP(cvp);
320 d_instantiate(dentry, ip); 318 d_instantiate(dentry, ip);
321 validate_fields(dir); 319 validate_fields(dir);
@@ -425,9 +423,14 @@ linvfs_follow_link(
425 return NULL; 423 return NULL;
426} 424}
427 425
428static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) 426STATIC void
427linvfs_put_link(
428 struct dentry *dentry,
429 struct nameidata *nd,
430 void *p)
429{ 431{
430 char *s = nd_get_link(nd); 432 char *s = nd_get_link(nd);
433
431 if (!IS_ERR(s)) 434 if (!IS_ERR(s))
432 kfree(s); 435 kfree(s);
433} 436}
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 42dc5e4662ed..68c5d885ed9c 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -64,7 +64,6 @@
64#include <sema.h> 64#include <sema.h>
65#include <time.h> 65#include <time.h>
66 66
67#include <support/qsort.h>
68#include <support/ktrace.h> 67#include <support/ktrace.h>
69#include <support/debug.h> 68#include <support/debug.h>
70#include <support/move.h> 69#include <support/move.h>
@@ -104,6 +103,7 @@
104#include <xfs_stats.h> 103#include <xfs_stats.h>
105#include <xfs_sysctl.h> 104#include <xfs_sysctl.h>
106#include <xfs_iops.h> 105#include <xfs_iops.h>
106#include <xfs_aops.h>
107#include <xfs_super.h> 107#include <xfs_super.h>
108#include <xfs_globals.h> 108#include <xfs_globals.h>
109#include <xfs_fs_subr.h> 109#include <xfs_fs_subr.h>
@@ -254,11 +254,18 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
254#define MAX(a,b) (max(a,b)) 254#define MAX(a,b) (max(a,b))
255#define howmany(x, y) (((x)+((y)-1))/(y)) 255#define howmany(x, y) (((x)+((y)-1))/(y))
256#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) 256#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
257#define qsort(a,n,s,fn) sort(a,n,s,fn,NULL)
257 258
259/*
260 * Various platform dependent calls that don't fit anywhere else
261 */
258#define xfs_stack_trace() dump_stack() 262#define xfs_stack_trace() dump_stack()
259
260#define xfs_itruncate_data(ip, off) \ 263#define xfs_itruncate_data(ip, off) \
261 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) 264 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
265#define xfs_statvfs_fsid(statp, mp) \
266 ({ u64 id = huge_encode_dev((mp)->m_dev); \
267 __kernel_fsid_t *fsid = &(statp)->f_fsid; \
268 (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
262 269
263 270
264/* Move the kernel do_div definition off to one side */ 271/* Move the kernel do_div definition off to one side */
@@ -371,6 +378,4 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
371 return(x * y); 378 return(x * y);
372} 379}
373 380
374#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL)
375
376#endif /* __XFS_LINUX__ */ 381#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index acab58c48043..3b5fabe8dae9 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -660,9 +660,6 @@ xfs_write(
660 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 660 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
661 mp->m_rtdev_targp : mp->m_ddev_targp; 661 mp->m_rtdev_targp : mp->m_ddev_targp;
662 662
663 if (ioflags & IO_ISAIO)
664 return XFS_ERROR(-ENOSYS);
665
666 if ((pos & target->pbr_smask) || (count & target->pbr_smask)) 663 if ((pos & target->pbr_smask) || (count & target->pbr_smask))
667 return XFS_ERROR(-EINVAL); 664 return XFS_ERROR(-EINVAL);
668 665
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index f197a720e394..6294dcdb797c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -70,9 +70,10 @@ struct xfs_iomap;
70#define XFS_SENDFILE_ENTER 21 70#define XFS_SENDFILE_ENTER 21
71#define XFS_WRITEPAGE_ENTER 22 71#define XFS_WRITEPAGE_ENTER 22
72#define XFS_RELEASEPAGE_ENTER 23 72#define XFS_RELEASEPAGE_ENTER 23
73#define XFS_IOMAP_ALLOC_ENTER 24 73#define XFS_INVALIDPAGE_ENTER 24
74#define XFS_IOMAP_ALLOC_MAP 25 74#define XFS_IOMAP_ALLOC_ENTER 25
75#define XFS_IOMAP_UNWRITTEN 26 75#define XFS_IOMAP_ALLOC_MAP 26
76#define XFS_IOMAP_UNWRITTEN 27
76extern void xfs_rw_enter_trace(int, struct xfs_iocore *, 77extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
77 void *, size_t, loff_t, int); 78 void *, size_t, loff_t, int);
78extern void xfs_inval_cached_trace(struct xfs_iocore *, 79extern void xfs_inval_cached_trace(struct xfs_iocore *,
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f6dd7de25927..2302454d8d47 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -70,11 +70,15 @@
70#include <linux/namei.h> 70#include <linux/namei.h>
71#include <linux/init.h> 71#include <linux/init.h>
72#include <linux/mount.h> 72#include <linux/mount.h>
73#include <linux/mempool.h>
73#include <linux/writeback.h> 74#include <linux/writeback.h>
75#include <linux/kthread.h>
74 76
75STATIC struct quotactl_ops linvfs_qops; 77STATIC struct quotactl_ops linvfs_qops;
76STATIC struct super_operations linvfs_sops; 78STATIC struct super_operations linvfs_sops;
77STATIC kmem_zone_t *linvfs_inode_zone; 79STATIC kmem_zone_t *xfs_vnode_zone;
80STATIC kmem_zone_t *xfs_ioend_zone;
81mempool_t *xfs_ioend_pool;
78 82
79STATIC struct xfs_mount_args * 83STATIC struct xfs_mount_args *
80xfs_args_allocate( 84xfs_args_allocate(
@@ -138,24 +142,25 @@ STATIC __inline__ void
138xfs_set_inodeops( 142xfs_set_inodeops(
139 struct inode *inode) 143 struct inode *inode)
140{ 144{
141 vnode_t *vp = LINVFS_GET_VP(inode); 145 switch (inode->i_mode & S_IFMT) {
142 146 case S_IFREG:
143 if (vp->v_type == VNON) {
144 vn_mark_bad(vp);
145 } else if (S_ISREG(inode->i_mode)) {
146 inode->i_op = &linvfs_file_inode_operations; 147 inode->i_op = &linvfs_file_inode_operations;
147 inode->i_fop = &linvfs_file_operations; 148 inode->i_fop = &linvfs_file_operations;
148 inode->i_mapping->a_ops = &linvfs_aops; 149 inode->i_mapping->a_ops = &linvfs_aops;
149 } else if (S_ISDIR(inode->i_mode)) { 150 break;
151 case S_IFDIR:
150 inode->i_op = &linvfs_dir_inode_operations; 152 inode->i_op = &linvfs_dir_inode_operations;
151 inode->i_fop = &linvfs_dir_operations; 153 inode->i_fop = &linvfs_dir_operations;
152 } else if (S_ISLNK(inode->i_mode)) { 154 break;
155 case S_IFLNK:
153 inode->i_op = &linvfs_symlink_inode_operations; 156 inode->i_op = &linvfs_symlink_inode_operations;
154 if (inode->i_blocks) 157 if (inode->i_blocks)
155 inode->i_mapping->a_ops = &linvfs_aops; 158 inode->i_mapping->a_ops = &linvfs_aops;
156 } else { 159 break;
160 default:
157 inode->i_op = &linvfs_file_inode_operations; 161 inode->i_op = &linvfs_file_inode_operations;
158 init_special_inode(inode, inode->i_mode, inode->i_rdev); 162 init_special_inode(inode, inode->i_mode, inode->i_rdev);
163 break;
159 } 164 }
160} 165}
161 166
@@ -167,16 +172,23 @@ xfs_revalidate_inode(
167{ 172{
168 struct inode *inode = LINVFS_GET_IP(vp); 173 struct inode *inode = LINVFS_GET_IP(vp);
169 174
170 inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type); 175 inode->i_mode = ip->i_d.di_mode;
171 inode->i_nlink = ip->i_d.di_nlink; 176 inode->i_nlink = ip->i_d.di_nlink;
172 inode->i_uid = ip->i_d.di_uid; 177 inode->i_uid = ip->i_d.di_uid;
173 inode->i_gid = ip->i_d.di_gid; 178 inode->i_gid = ip->i_d.di_gid;
174 if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { 179
180 switch (inode->i_mode & S_IFMT) {
181 case S_IFBLK:
182 case S_IFCHR:
183 inode->i_rdev =
184 MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
185 sysv_minor(ip->i_df.if_u2.if_rdev));
186 break;
187 default:
175 inode->i_rdev = 0; 188 inode->i_rdev = 0;
176 } else { 189 break;
177 xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
178 inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
179 } 190 }
191
180 inode->i_blksize = PAGE_CACHE_SIZE; 192 inode->i_blksize = PAGE_CACHE_SIZE;
181 inode->i_generation = ip->i_d.di_gen; 193 inode->i_generation = ip->i_d.di_gen;
182 i_size_write(inode, ip->i_d.di_size); 194 i_size_write(inode, ip->i_d.di_size);
@@ -231,7 +243,6 @@ xfs_initialize_vnode(
231 * finish our work. 243 * finish our work.
232 */ 244 */
233 if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) { 245 if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
234 vp->v_type = IFTOVT(ip->i_d.di_mode);
235 xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); 246 xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
236 xfs_set_inodeops(inode); 247 xfs_set_inodeops(inode);
237 248
@@ -274,8 +285,7 @@ linvfs_alloc_inode(
274{ 285{
275 vnode_t *vp; 286 vnode_t *vp;
276 287
277 vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, 288 vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP));
278 kmem_flags_convert(KM_SLEEP));
279 if (!vp) 289 if (!vp)
280 return NULL; 290 return NULL;
281 return LINVFS_GET_IP(vp); 291 return LINVFS_GET_IP(vp);
@@ -285,11 +295,11 @@ STATIC void
285linvfs_destroy_inode( 295linvfs_destroy_inode(
286 struct inode *inode) 296 struct inode *inode)
287{ 297{
288 kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode)); 298 kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
289} 299}
290 300
291STATIC void 301STATIC void
292init_once( 302linvfs_inode_init_once(
293 void *data, 303 void *data,
294 kmem_cache_t *cachep, 304 kmem_cache_t *cachep,
295 unsigned long flags) 305 unsigned long flags)
@@ -302,21 +312,41 @@ init_once(
302} 312}
303 313
304STATIC int 314STATIC int
305init_inodecache( void ) 315linvfs_init_zones(void)
306{ 316{
307 linvfs_inode_zone = kmem_cache_create("linvfs_icache", 317 xfs_vnode_zone = kmem_cache_create("xfs_vnode",
308 sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT, 318 sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
309 init_once, NULL); 319 linvfs_inode_init_once, NULL);
310 if (linvfs_inode_zone == NULL) 320 if (!xfs_vnode_zone)
311 return -ENOMEM; 321 goto out;
322
323 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
324 if (!xfs_ioend_zone)
325 goto out_destroy_vnode_zone;
326
327 xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
328 mempool_alloc_slab, mempool_free_slab,
329 xfs_ioend_zone);
330 if (!xfs_ioend_pool)
331 goto out_free_ioend_zone;
332
312 return 0; 333 return 0;
334
335
336 out_free_ioend_zone:
337 kmem_zone_destroy(xfs_ioend_zone);
338 out_destroy_vnode_zone:
339 kmem_zone_destroy(xfs_vnode_zone);
340 out:
341 return -ENOMEM;
313} 342}
314 343
315STATIC void 344STATIC void
316destroy_inodecache( void ) 345linvfs_destroy_zones(void)
317{ 346{
318 if (kmem_cache_destroy(linvfs_inode_zone)) 347 mempool_destroy(xfs_ioend_pool);
319 printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); 348 kmem_zone_destroy(xfs_vnode_zone);
349 kmem_zone_destroy(xfs_ioend_zone);
320} 350}
321 351
322/* 352/*
@@ -354,17 +384,38 @@ linvfs_clear_inode(
354 struct inode *inode) 384 struct inode *inode)
355{ 385{
356 vnode_t *vp = LINVFS_GET_VP(inode); 386 vnode_t *vp = LINVFS_GET_VP(inode);
387 int error, cache;
357 388
358 if (vp) { 389 vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address);
359 vn_rele(vp); 390
360 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 391 XFS_STATS_INC(vn_rele);
361 /* 392 XFS_STATS_INC(vn_remove);
362 * Do all our cleanup, and remove this vnode. 393 XFS_STATS_INC(vn_reclaim);
363 */ 394 XFS_STATS_DEC(vn_active);
364 vn_remove(vp); 395
396 /*
397 * This can happen because xfs_iget_core calls xfs_idestroy if we
398 * find an inode with di_mode == 0 but without IGET_CREATE set.
399 */
400 if (vp->v_fbhv)
401 VOP_INACTIVE(vp, NULL, cache);
402
403 VN_LOCK(vp);
404 vp->v_flag &= ~VMODIFIED;
405 VN_UNLOCK(vp, 0);
406
407 if (vp->v_fbhv) {
408 VOP_RECLAIM(vp, error);
409 if (error)
410 panic("vn_purge: cannot reclaim");
365 } 411 }
366}
367 412
413 ASSERT(vp->v_fbhv == NULL);
414
415#ifdef XFS_VNODE_TRACE
416 ktrace_free(vp->v_trace);
417#endif
418}
368 419
369/* 420/*
370 * Enqueue a work item to be picked up by the vfs xfssyncd thread. 421 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
@@ -416,7 +467,7 @@ xfs_flush_inode(
416 467
417 igrab(inode); 468 igrab(inode);
418 xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work); 469 xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
419 delay(HZ/2); 470 delay(msecs_to_jiffies(500));
420} 471}
421 472
422/* 473/*
@@ -441,7 +492,7 @@ xfs_flush_device(
441 492
442 igrab(inode); 493 igrab(inode);
443 xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work); 494 xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
444 delay(HZ/2); 495 delay(msecs_to_jiffies(500));
445 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); 496 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
446} 497}
447 498
@@ -466,25 +517,15 @@ xfssyncd(
466{ 517{
467 long timeleft; 518 long timeleft;
468 vfs_t *vfsp = (vfs_t *) arg; 519 vfs_t *vfsp = (vfs_t *) arg;
469 struct list_head tmp;
470 struct vfs_sync_work *work, *n; 520 struct vfs_sync_work *work, *n;
521 LIST_HEAD (tmp);
471 522
472 daemonize("xfssyncd"); 523 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
473
474 vfsp->vfs_sync_work.w_vfs = vfsp;
475 vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
476 vfsp->vfs_sync_task = current;
477 wmb();
478 wake_up(&vfsp->vfs_wait_sync_task);
479
480 INIT_LIST_HEAD(&tmp);
481 timeleft = (xfs_syncd_centisecs * HZ) / 100;
482 for (;;) { 524 for (;;) {
483 set_current_state(TASK_INTERRUPTIBLE); 525 timeleft = schedule_timeout_interruptible(timeleft);
484 timeleft = schedule_timeout(timeleft);
485 /* swsusp */ 526 /* swsusp */
486 try_to_freeze(); 527 try_to_freeze();
487 if (vfsp->vfs_flag & VFS_UMOUNT) 528 if (kthread_should_stop())
488 break; 529 break;
489 530
490 spin_lock(&vfsp->vfs_sync_lock); 531 spin_lock(&vfsp->vfs_sync_lock);
@@ -495,7 +536,8 @@ xfssyncd(
495 */ 536 */
496 if (!timeleft || list_empty(&vfsp->vfs_sync_list)) { 537 if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
497 if (!timeleft) 538 if (!timeleft)
498 timeleft = (xfs_syncd_centisecs * HZ) / 100; 539 timeleft = xfs_syncd_centisecs *
540 msecs_to_jiffies(10);
499 INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list); 541 INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
500 list_add_tail(&vfsp->vfs_sync_work.w_list, 542 list_add_tail(&vfsp->vfs_sync_work.w_list,
501 &vfsp->vfs_sync_list); 543 &vfsp->vfs_sync_list);
@@ -513,10 +555,6 @@ xfssyncd(
513 } 555 }
514 } 556 }
515 557
516 vfsp->vfs_sync_task = NULL;
517 wmb();
518 wake_up(&vfsp->vfs_wait_sync_task);
519
520 return 0; 558 return 0;
521} 559}
522 560
@@ -524,13 +562,11 @@ STATIC int
524linvfs_start_syncd( 562linvfs_start_syncd(
525 vfs_t *vfsp) 563 vfs_t *vfsp)
526{ 564{
527 int pid; 565 vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
528 566 vfsp->vfs_sync_work.w_vfs = vfsp;
529 pid = kernel_thread(xfssyncd, (void *) vfsp, 567 vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd");
530 CLONE_VM | CLONE_FS | CLONE_FILES); 568 if (IS_ERR(vfsp->vfs_sync_task))
531 if (pid < 0) 569 return -PTR_ERR(vfsp->vfs_sync_task);
532 return -pid;
533 wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
534 return 0; 570 return 0;
535} 571}
536 572
@@ -538,11 +574,7 @@ STATIC void
538linvfs_stop_syncd( 574linvfs_stop_syncd(
539 vfs_t *vfsp) 575 vfs_t *vfsp)
540{ 576{
541 vfsp->vfs_flag |= VFS_UMOUNT; 577 kthread_stop(vfsp->vfs_sync_task);
542 wmb();
543
544 wake_up_process(vfsp->vfs_sync_task);
545 wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task);
546} 578}
547 579
548STATIC void 580STATIC void
@@ -866,9 +898,9 @@ init_xfs_fs( void )
866 898
867 ktrace_init(64); 899 ktrace_init(64);
868 900
869 error = init_inodecache(); 901 error = linvfs_init_zones();
870 if (error < 0) 902 if (error < 0)
871 goto undo_inodecache; 903 goto undo_zones;
872 904
873 error = pagebuf_init(); 905 error = pagebuf_init();
874 if (error < 0) 906 if (error < 0)
@@ -889,9 +921,9 @@ undo_register:
889 pagebuf_terminate(); 921 pagebuf_terminate();
890 922
891undo_pagebuf: 923undo_pagebuf:
892 destroy_inodecache(); 924 linvfs_destroy_zones();
893 925
894undo_inodecache: 926undo_zones:
895 return error; 927 return error;
896} 928}
897 929
@@ -903,7 +935,7 @@ exit_xfs_fs( void )
903 unregister_filesystem(&xfs_fs_type); 935 unregister_filesystem(&xfs_fs_type);
904 xfs_cleanup(); 936 xfs_cleanup();
905 pagebuf_terminate(); 937 pagebuf_terminate();
906 destroy_inodecache(); 938 linvfs_destroy_zones();
907 ktrace_uninit(); 939 ktrace_uninit();
908} 940}
909 941
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
index 669c61644959..34cc902ec119 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -251,7 +251,6 @@ vfs_allocate( void )
251 bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); 251 bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
252 INIT_LIST_HEAD(&vfsp->vfs_sync_list); 252 INIT_LIST_HEAD(&vfsp->vfs_sync_list);
253 spin_lock_init(&vfsp->vfs_sync_lock); 253 spin_lock_init(&vfsp->vfs_sync_lock);
254 init_waitqueue_head(&vfsp->vfs_wait_sync_task);
255 init_waitqueue_head(&vfsp->vfs_wait_single_sync_task); 254 init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
256 return vfsp; 255 return vfsp;
257} 256}
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 7ee1f714e9ba..f0ab574fb47a 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -65,7 +65,6 @@ typedef struct vfs {
65 spinlock_t vfs_sync_lock; /* work item list lock */ 65 spinlock_t vfs_sync_lock; /* work item list lock */
66 int vfs_sync_seq; /* sync thread generation no. */ 66 int vfs_sync_seq; /* sync thread generation no. */
67 wait_queue_head_t vfs_wait_single_sync_task; 67 wait_queue_head_t vfs_wait_single_sync_task;
68 wait_queue_head_t vfs_wait_sync_task;
69} vfs_t; 68} vfs_t;
70 69
71#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ 70#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */
@@ -96,7 +95,6 @@ typedef enum {
96#define VFS_RDONLY 0x0001 /* read-only vfs */ 95#define VFS_RDONLY 0x0001 /* read-only vfs */
97#define VFS_GRPID 0x0002 /* group-ID assigned from directory */ 96#define VFS_GRPID 0x0002 /* group-ID assigned from directory */
98#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ 97#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */
99#define VFS_UMOUNT 0x0008 /* unmount in progress */
100#define VFS_END 0x0008 /* max flag */ 98#define VFS_END 0x0008 /* max flag */
101 99
102#define SYNC_ATTR 0x0001 /* sync attributes */ 100#define SYNC_ATTR 0x0001 /* sync attributes */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 250cad54e892..268f45bf6a9a 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -42,93 +42,33 @@ DEFINE_SPINLOCK(vnumber_lock);
42 */ 42 */
43#define NVSYNC 37 43#define NVSYNC 37
44#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) 44#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
45sv_t vsync[NVSYNC]; 45STATIC wait_queue_head_t vsync[NVSYNC];
46
47/*
48 * Translate stat(2) file types to vnode types and vice versa.
49 * Aware of numeric order of S_IFMT and vnode type values.
50 */
51enum vtype iftovt_tab[] = {
52 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
53 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
54};
55
56u_short vttoif_tab[] = {
57 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK
58};
59 46
60 47
61void 48void
62vn_init(void) 49vn_init(void)
63{ 50{
64 register sv_t *svp; 51 int i;
65 register int i;
66 52
67 for (svp = vsync, i = 0; i < NVSYNC; i++, svp++) 53 for (i = 0; i < NVSYNC; i++)
68 init_sv(svp, SV_DEFAULT, "vsy", i); 54 init_waitqueue_head(&vsync[i]);
69} 55}
70 56
71/* 57void
72 * Clean a vnode of filesystem-specific data and prepare it for reuse. 58vn_iowait(
73 */
74STATIC int
75vn_reclaim(
76 struct vnode *vp) 59 struct vnode *vp)
77{ 60{
78 int error; 61 wait_queue_head_t *wq = vptosync(vp);
79 62
80 XFS_STATS_INC(vn_reclaim); 63 wait_event(*wq, (atomic_read(&vp->v_iocount) == 0));
81 vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
82
83 /*
84 * Only make the VOP_RECLAIM call if there are behaviors
85 * to call.
86 */
87 if (vp->v_fbhv) {
88 VOP_RECLAIM(vp, error);
89 if (error)
90 return -error;
91 }
92 ASSERT(vp->v_fbhv == NULL);
93
94 VN_LOCK(vp);
95 vp->v_flag &= (VRECLM|VWAIT);
96 VN_UNLOCK(vp, 0);
97
98 vp->v_type = VNON;
99 vp->v_fbhv = NULL;
100
101#ifdef XFS_VNODE_TRACE
102 ktrace_free(vp->v_trace);
103 vp->v_trace = NULL;
104#endif
105
106 return 0;
107}
108
109STATIC void
110vn_wakeup(
111 struct vnode *vp)
112{
113 VN_LOCK(vp);
114 if (vp->v_flag & VWAIT)
115 sv_broadcast(vptosync(vp));
116 vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
117 VN_UNLOCK(vp, 0);
118} 64}
119 65
120int 66void
121vn_wait( 67vn_iowake(
122 struct vnode *vp) 68 struct vnode *vp)
123{ 69{
124 VN_LOCK(vp); 70 if (atomic_dec_and_test(&vp->v_iocount))
125 if (vp->v_flag & (VINACT | VRECLM)) { 71 wake_up(vptosync(vp));
126 vp->v_flag |= VWAIT;
127 sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
128 return 1;
129 }
130 VN_UNLOCK(vp, 0);
131 return 0;
132} 72}
133 73
134struct vnode * 74struct vnode *
@@ -154,6 +94,8 @@ vn_initialize(
154 /* Initialize the first behavior and the behavior chain head. */ 94 /* Initialize the first behavior and the behavior chain head. */
155 vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); 95 vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
156 96
97 atomic_set(&vp->v_iocount, 0);
98
157#ifdef XFS_VNODE_TRACE 99#ifdef XFS_VNODE_TRACE
158 vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); 100 vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
159#endif /* XFS_VNODE_TRACE */ 101#endif /* XFS_VNODE_TRACE */
@@ -163,30 +105,6 @@ vn_initialize(
163} 105}
164 106
165/* 107/*
166 * Get a reference on a vnode.
167 */
168vnode_t *
169vn_get(
170 struct vnode *vp,
171 vmap_t *vmap)
172{
173 struct inode *inode;
174
175 XFS_STATS_INC(vn_get);
176 inode = LINVFS_GET_IP(vp);
177 if (inode->i_state & I_FREEING)
178 return NULL;
179
180 inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
181 if (!inode) /* Inode not present */
182 return NULL;
183
184 vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
185
186 return vp;
187}
188
189/*
190 * Revalidate the Linux inode from the vattr. 108 * Revalidate the Linux inode from the vattr.
191 * Note: i_size _not_ updated; we must hold the inode 109 * Note: i_size _not_ updated; we must hold the inode
192 * semaphore when doing that - callers responsibility. 110 * semaphore when doing that - callers responsibility.
@@ -198,7 +116,7 @@ vn_revalidate_core(
198{ 116{
199 struct inode *inode = LINVFS_GET_IP(vp); 117 struct inode *inode = LINVFS_GET_IP(vp);
200 118
201 inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode; 119 inode->i_mode = vap->va_mode;
202 inode->i_nlink = vap->va_nlink; 120 inode->i_nlink = vap->va_nlink;
203 inode->i_uid = vap->va_uid; 121 inode->i_uid = vap->va_uid;
204 inode->i_gid = vap->va_gid; 122 inode->i_gid = vap->va_gid;
@@ -247,71 +165,6 @@ vn_revalidate(
247} 165}
248 166
249/* 167/*
250 * purge a vnode from the cache
251 * At this point the vnode is guaranteed to have no references (vn_count == 0)
252 * The caller has to make sure that there are no ways someone could
253 * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
254 */
255void
256vn_purge(
257 struct vnode *vp,
258 vmap_t *vmap)
259{
260 vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
261
262again:
263 /*
264 * Check whether vp has already been reclaimed since our caller
265 * sampled its version while holding a filesystem cache lock that
266 * its VOP_RECLAIM function acquires.
267 */
268 VN_LOCK(vp);
269 if (vp->v_number != vmap->v_number) {
270 VN_UNLOCK(vp, 0);
271 return;
272 }
273
274 /*
275 * If vp is being reclaimed or inactivated, wait until it is inert,
276 * then proceed. Can't assume that vnode is actually reclaimed
277 * just because the reclaimed flag is asserted -- a vn_alloc
278 * reclaim can fail.
279 */
280 if (vp->v_flag & (VINACT | VRECLM)) {
281 ASSERT(vn_count(vp) == 0);
282 vp->v_flag |= VWAIT;
283 sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
284 goto again;
285 }
286
287 /*
288 * Another process could have raced in and gotten this vnode...
289 */
290 if (vn_count(vp) > 0) {
291 VN_UNLOCK(vp, 0);
292 return;
293 }
294
295 XFS_STATS_DEC(vn_active);
296 vp->v_flag |= VRECLM;
297 VN_UNLOCK(vp, 0);
298
299 /*
300 * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells
301 * vp's filesystem to flush and invalidate all cached resources.
302 * When vn_reclaim returns, vp should have no private data,
303 * either in a system cache or attached to v_data.
304 */
305 if (vn_reclaim(vp) != 0)
306 panic("vn_purge: cannot reclaim");
307
308 /*
309 * Wakeup anyone waiting for vp to be reclaimed.
310 */
311 vn_wakeup(vp);
312}
313
314/*
315 * Add a reference to a referenced vnode. 168 * Add a reference to a referenced vnode.
316 */ 169 */
317struct vnode * 170struct vnode *
@@ -330,80 +183,6 @@ vn_hold(
330 return vp; 183 return vp;
331} 184}
332 185
333/*
334 * Call VOP_INACTIVE on last reference.
335 */
336void
337vn_rele(
338 struct vnode *vp)
339{
340 int vcnt;
341 int cache;
342
343 XFS_STATS_INC(vn_rele);
344
345 VN_LOCK(vp);
346
347 vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address);
348 vcnt = vn_count(vp);
349
350 /*
351 * Since we always get called from put_inode we know
352 * that i_count won't be decremented after we
353 * return.
354 */
355 if (!vcnt) {
356 /*
357 * As soon as we turn this on, noone can find us in vn_get
358 * until we turn off VINACT or VRECLM
359 */
360 vp->v_flag |= VINACT;
361 VN_UNLOCK(vp, 0);
362
363 /*
364 * Do not make the VOP_INACTIVE call if there
365 * are no behaviors attached to the vnode to call.
366 */
367 if (vp->v_fbhv)
368 VOP_INACTIVE(vp, NULL, cache);
369
370 VN_LOCK(vp);
371 if (vp->v_flag & VWAIT)
372 sv_broadcast(vptosync(vp));
373
374 vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
375 }
376
377 VN_UNLOCK(vp, 0);
378
379 vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address);
380}
381
382/*
383 * Finish the removal of a vnode.
384 */
385void
386vn_remove(
387 struct vnode *vp)
388{
389 vmap_t vmap;
390
391 /* Make sure we don't do this to the same vnode twice */
392 if (!(vp->v_fbhv))
393 return;
394
395 XFS_STATS_INC(vn_remove);
396 vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
397
398 /*
399 * After the following purge the vnode
400 * will no longer exist.
401 */
402 VMAP(vp, vmap);
403 vn_purge(vp, &vmap);
404}
405
406
407#ifdef XFS_VNODE_TRACE 186#ifdef XFS_VNODE_TRACE
408 187
409#define KTRACE_ENTER(vp, vk, s, line, ra) \ 188#define KTRACE_ENTER(vp, vk, s, line, ra) \
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index a6e57c647be4..35f306cebb87 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -65,10 +65,6 @@ struct vattr;
65struct xfs_iomap; 65struct xfs_iomap;
66struct attrlist_cursor_kern; 66struct attrlist_cursor_kern;
67 67
68/*
69 * Vnode types. VNON means no type.
70 */
71enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK };
72 68
73typedef xfs_ino_t vnumber_t; 69typedef xfs_ino_t vnumber_t;
74typedef struct dentry vname_t; 70typedef struct dentry vname_t;
@@ -77,15 +73,14 @@ typedef bhv_head_t vn_bhv_head_t;
77/* 73/*
78 * MP locking protocols: 74 * MP locking protocols:
79 * v_flag, v_vfsp VN_LOCK/VN_UNLOCK 75 * v_flag, v_vfsp VN_LOCK/VN_UNLOCK
80 * v_type read-only or fs-dependent
81 */ 76 */
82typedef struct vnode { 77typedef struct vnode {
83 __u32 v_flag; /* vnode flags (see below) */ 78 __u32 v_flag; /* vnode flags (see below) */
84 enum vtype v_type; /* vnode type */
85 struct vfs *v_vfsp; /* ptr to containing VFS */ 79 struct vfs *v_vfsp; /* ptr to containing VFS */
86 vnumber_t v_number; /* in-core vnode number */ 80 vnumber_t v_number; /* in-core vnode number */
87 vn_bhv_head_t v_bh; /* behavior head */ 81 vn_bhv_head_t v_bh; /* behavior head */
88 spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ 82 spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */
83 atomic_t v_iocount; /* outstanding I/O count */
89#ifdef XFS_VNODE_TRACE 84#ifdef XFS_VNODE_TRACE
90 struct ktrace *v_trace; /* trace header structure */ 85 struct ktrace *v_trace; /* trace header structure */
91#endif 86#endif
@@ -93,6 +88,12 @@ typedef struct vnode {
93 /* inode MUST be last */ 88 /* inode MUST be last */
94} vnode_t; 89} vnode_t;
95 90
91#define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode)
92#define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode)
93#define VN_ISDIR(vp) S_ISDIR((vp)->v_inode.i_mode)
94#define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode)
95#define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode)
96
96#define v_fbhv v_bh.bh_first /* first behavior */ 97#define v_fbhv v_bh.bh_first /* first behavior */
97#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ 98#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */
98 99
@@ -133,22 +134,8 @@ typedef enum {
133#define LINVFS_GET_IP(vp) (&(vp)->v_inode) 134#define LINVFS_GET_IP(vp) (&(vp)->v_inode)
134 135
135/* 136/*
136 * Convert between vnode types and inode formats (since POSIX.1
137 * defines mode word of stat structure in terms of inode formats).
138 */
139extern enum vtype iftovt_tab[];
140extern u_short vttoif_tab[];
141#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
142#define VTTOIF(indx) (vttoif_tab[(int)(indx)])
143#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
144
145
146/*
147 * Vnode flags. 137 * Vnode flags.
148 */ 138 */
149#define VINACT 0x1 /* vnode is being inactivated */
150#define VRECLM 0x2 /* vnode is being reclaimed */
151#define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */
152#define VMODIFIED 0x8 /* XFS inode state possibly differs */ 139#define VMODIFIED 0x8 /* XFS inode state possibly differs */
153 /* to the Linux inode state. */ 140 /* to the Linux inode state. */
154 141
@@ -408,7 +395,6 @@ typedef struct vnodeops {
408 */ 395 */
409typedef struct vattr { 396typedef struct vattr {
410 int va_mask; /* bit-mask of attributes present */ 397 int va_mask; /* bit-mask of attributes present */
411 enum vtype va_type; /* vnode type (for create) */
412 mode_t va_mode; /* file access mode and type */ 398 mode_t va_mode; /* file access mode and type */
413 xfs_nlink_t va_nlink; /* number of references to file */ 399 xfs_nlink_t va_nlink; /* number of references to file */
414 uid_t va_uid; /* owner user id */ 400 uid_t va_uid; /* owner user id */
@@ -498,27 +484,12 @@ typedef struct vattr {
498 * Check whether mandatory file locking is enabled. 484 * Check whether mandatory file locking is enabled.
499 */ 485 */
500#define MANDLOCK(vp, mode) \ 486#define MANDLOCK(vp, mode) \
501 ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) 487 (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
502 488
503extern void vn_init(void); 489extern void vn_init(void);
504extern int vn_wait(struct vnode *);
505extern vnode_t *vn_initialize(struct inode *); 490extern vnode_t *vn_initialize(struct inode *);
506 491
507/* 492/*
508 * Acquiring and invalidating vnodes:
509 *
510 * if (vn_get(vp, version, 0))
511 * ...;
512 * vn_purge(vp, version);
513 *
514 * vn_get and vn_purge must be called with vmap_t arguments, sampled
515 * while a lock that the vnode's VOP_RECLAIM function acquires is
516 * held, to ensure that the vnode sampled with the lock held isn't
517 * recycled (VOP_RECLAIMed) or deallocated between the release of the lock
518 * and the subsequent vn_get or vn_purge.
519 */
520
521/*
522 * vnode_map structures _must_ match vn_epoch and vnode structure sizes. 493 * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
523 */ 494 */
524typedef struct vnode_map { 495typedef struct vnode_map {
@@ -531,11 +502,11 @@ typedef struct vnode_map {
531 (vmap).v_number = (vp)->v_number, \ 502 (vmap).v_number = (vp)->v_number, \
532 (vmap).v_ino = (vp)->v_inode.i_ino; } 503 (vmap).v_ino = (vp)->v_inode.i_ino; }
533 504
534extern void vn_purge(struct vnode *, vmap_t *);
535extern vnode_t *vn_get(struct vnode *, vmap_t *);
536extern int vn_revalidate(struct vnode *); 505extern int vn_revalidate(struct vnode *);
537extern void vn_revalidate_core(struct vnode *, vattr_t *); 506extern void vn_revalidate_core(struct vnode *, vattr_t *);
538extern void vn_remove(struct vnode *); 507
508extern void vn_iowait(struct vnode *vp);
509extern void vn_iowake(struct vnode *vp);
539 510
540static inline int vn_count(struct vnode *vp) 511static inline int vn_count(struct vnode *vp)
541{ 512{
@@ -546,7 +517,6 @@ static inline int vn_count(struct vnode *vp)
546 * Vnode reference counting functions (and macros for compatibility). 517 * Vnode reference counting functions (and macros for compatibility).
547 */ 518 */
548extern vnode_t *vn_hold(struct vnode *); 519extern vnode_t *vn_hold(struct vnode *);
549extern void vn_rele(struct vnode *);
550 520
551#if defined(XFS_VNODE_TRACE) 521#if defined(XFS_VNODE_TRACE)
552#define VN_HOLD(vp) \ 522#define VN_HOLD(vp) \
@@ -560,6 +530,12 @@ extern void vn_rele(struct vnode *);
560#define VN_RELE(vp) (iput(LINVFS_GET_IP(vp))) 530#define VN_RELE(vp) (iput(LINVFS_GET_IP(vp)))
561#endif 531#endif
562 532
533static inline struct vnode *vn_grab(struct vnode *vp)
534{
535 struct inode *inode = igrab(LINVFS_GET_IP(vp));
536 return inode ? LINVFS_GET_VP(inode) : NULL;
537}
538
563/* 539/*
564 * Vname handling macros. 540 * Vname handling macros.
565 */ 541 */
diff --git a/fs/xfs/quota/Makefile b/fs/xfs/quota/Makefile
new file mode 100644
index 000000000000..7a4f725b2824
--- /dev/null
+++ b/fs/xfs/quota/Makefile
@@ -0,0 +1 @@
include $(TOPDIR)/fs/xfs/quota/Makefile-linux-$(VERSION).$(PATCHLEVEL)
diff --git a/fs/xfs/quota/Makefile-linux-2.6 b/fs/xfs/quota/Makefile-linux-2.6
new file mode 100644
index 000000000000..93e60e839355
--- /dev/null
+++ b/fs/xfs/quota/Makefile-linux-2.6
@@ -0,0 +1,53 @@
1#
2# Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms of version 2 of the GNU General Public License as
6# published by the Free Software Foundation.
7#
8# This program is distributed in the hope that it would be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11#
12# Further, this software is distributed without any warranty that it is
13# free of the rightful claim of any third person regarding infringement
14# or the like. Any license provided herein, whether implied or
15# otherwise, applies only to this software file. Patent licenses, if
16# any, provided herein do not apply to combinations of this program with
17# other software, or any other product whatsoever.
18#
19# You should have received a copy of the GNU General Public License along
20# with this program; if not, write the Free Software Foundation, Inc., 59
21# Temple Place - Suite 330, Boston MA 02111-1307, USA.
22#
23# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24# Mountain View, CA 94043, or:
25#
26# http://www.sgi.com
27#
28# For further information regarding this notice, see:
29#
30# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31#
32
33EXTRA_CFLAGS += -I $(TOPDIR)/fs/xfs -I $(TOPDIR)/fs/xfs/linux-2.6
34
35ifeq ($(CONFIG_XFS_DEBUG),y)
36 EXTRA_CFLAGS += -g -DDEBUG
37 #EXTRA_CFLAGS += -DQUOTADEBUG
38endif
39ifeq ($(CONFIG_XFS_TRACE),y)
40 EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
41 EXTRA_CFLAGS += -DXFS_VNODE_TRACE
42endif
43
44xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
45 xfs_dquot_item.o \
46 xfs_trans_dquot.o \
47 xfs_qm_syscalls.o \
48 xfs_qm_bhv.o \
49 xfs_qm.o
50
51ifeq ($(CONFIG_XFS_QUOTA),y)
52xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o
53endif
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 46ce1e3ce1d6..e2e8d35fa4d0 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -421,7 +421,7 @@ xfs_qm_init_dquot_blk(
421 */ 421 */
422STATIC int 422STATIC int
423xfs_qm_dqalloc( 423xfs_qm_dqalloc(
424 xfs_trans_t *tp, 424 xfs_trans_t **tpp,
425 xfs_mount_t *mp, 425 xfs_mount_t *mp,
426 xfs_dquot_t *dqp, 426 xfs_dquot_t *dqp,
427 xfs_inode_t *quotip, 427 xfs_inode_t *quotip,
@@ -433,6 +433,7 @@ xfs_qm_dqalloc(
433 xfs_bmbt_irec_t map; 433 xfs_bmbt_irec_t map;
434 int nmaps, error, committed; 434 int nmaps, error, committed;
435 xfs_buf_t *bp; 435 xfs_buf_t *bp;
436 xfs_trans_t *tp = *tpp;
436 437
437 ASSERT(tp != NULL); 438 ASSERT(tp != NULL);
438 xfs_dqtrace_entry(dqp, "DQALLOC"); 439 xfs_dqtrace_entry(dqp, "DQALLOC");
@@ -492,10 +493,32 @@ xfs_qm_dqalloc(
492 xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT), 493 xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
493 dqp->dq_flags & XFS_DQ_ALLTYPES, bp); 494 dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
494 495
495 if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) { 496 /*
497 * xfs_bmap_finish() may commit the current transaction and
498 * start a second transaction if the freelist is not empty.
499 *
500 * Since we still want to modify this buffer, we need to
501 * ensure that the buffer is not released on commit of
502 * the first transaction and ensure the buffer is added to the
503 * second transaction.
504 *
505 * If there is only one transaction then don't stop the buffer
506 * from being released when it commits later on.
507 */
508
509 xfs_trans_bhold(tp, bp);
510
511 if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) {
496 goto error1; 512 goto error1;
497 } 513 }
498 514
515 if (committed) {
516 tp = *tpp;
517 xfs_trans_bjoin(tp, bp);
518 } else {
519 xfs_trans_bhold_release(tp, bp);
520 }
521
499 *O_bpp = bp; 522 *O_bpp = bp;
500 return 0; 523 return 0;
501 524
@@ -514,7 +537,7 @@ xfs_qm_dqalloc(
514 */ 537 */
515STATIC int 538STATIC int
516xfs_qm_dqtobp( 539xfs_qm_dqtobp(
517 xfs_trans_t *tp, 540 xfs_trans_t **tpp,
518 xfs_dquot_t *dqp, 541 xfs_dquot_t *dqp,
519 xfs_disk_dquot_t **O_ddpp, 542 xfs_disk_dquot_t **O_ddpp,
520 xfs_buf_t **O_bpp, 543 xfs_buf_t **O_bpp,
@@ -528,6 +551,7 @@ xfs_qm_dqtobp(
528 xfs_disk_dquot_t *ddq; 551 xfs_disk_dquot_t *ddq;
529 xfs_dqid_t id; 552 xfs_dqid_t id;
530 boolean_t newdquot; 553 boolean_t newdquot;
554 xfs_trans_t *tp = (tpp ? *tpp : NULL);
531 555
532 mp = dqp->q_mount; 556 mp = dqp->q_mount;
533 id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT); 557 id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
@@ -579,9 +603,10 @@ xfs_qm_dqtobp(
579 return (ENOENT); 603 return (ENOENT);
580 604
581 ASSERT(tp); 605 ASSERT(tp);
582 if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip, 606 if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
583 dqp->q_fileoffset, &bp))) 607 dqp->q_fileoffset, &bp)))
584 return (error); 608 return (error);
609 tp = *tpp;
585 newdquot = B_TRUE; 610 newdquot = B_TRUE;
586 } else { 611 } else {
587 /* 612 /*
@@ -645,7 +670,7 @@ xfs_qm_dqtobp(
645/* ARGSUSED */ 670/* ARGSUSED */
646STATIC int 671STATIC int
647xfs_qm_dqread( 672xfs_qm_dqread(
648 xfs_trans_t *tp, 673 xfs_trans_t **tpp,
649 xfs_dqid_t id, 674 xfs_dqid_t id,
650 xfs_dquot_t *dqp, /* dquot to get filled in */ 675 xfs_dquot_t *dqp, /* dquot to get filled in */
651 uint flags) 676 uint flags)
@@ -653,15 +678,19 @@ xfs_qm_dqread(
653 xfs_disk_dquot_t *ddqp; 678 xfs_disk_dquot_t *ddqp;
654 xfs_buf_t *bp; 679 xfs_buf_t *bp;
655 int error; 680 int error;
681 xfs_trans_t *tp;
682
683 ASSERT(tpp);
656 684
657 /* 685 /*
658 * get a pointer to the on-disk dquot and the buffer containing it 686 * get a pointer to the on-disk dquot and the buffer containing it
659 * dqp already knows its own type (GROUP/USER). 687 * dqp already knows its own type (GROUP/USER).
660 */ 688 */
661 xfs_dqtrace_entry(dqp, "DQREAD"); 689 xfs_dqtrace_entry(dqp, "DQREAD");
662 if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) { 690 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
663 return (error); 691 return (error);
664 } 692 }
693 tp = *tpp;
665 694
666 /* copy everything from disk dquot to the incore dquot */ 695 /* copy everything from disk dquot to the incore dquot */
667 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); 696 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
@@ -740,7 +769,7 @@ xfs_qm_idtodq(
740 * Read it from disk; xfs_dqread() takes care of 769 * Read it from disk; xfs_dqread() takes care of
741 * all the necessary initialization of dquot's fields (locks, etc) 770 * all the necessary initialization of dquot's fields (locks, etc)
742 */ 771 */
743 if ((error = xfs_qm_dqread(tp, id, dqp, flags))) { 772 if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
744 /* 773 /*
745 * This can happen if quotas got turned off (ESRCH), 774 * This can happen if quotas got turned off (ESRCH),
746 * or if the dquot didn't exist on disk and we ask to 775 * or if the dquot didn't exist on disk and we ask to
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 39175103c8e0..8ebc87176c78 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -113,20 +113,6 @@ typedef struct xfs_dquot {
113 113
114#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) 114#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
115 115
116/*
117 * Quota Accounting/Enforcement flags
118 */
119#define XFS_ALL_QUOTA_ACCT \
120 (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
121#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
122#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
123
124#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
125#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
126#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
127#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT)
128#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
129
130#ifdef DEBUG 116#ifdef DEBUG
131static inline int 117static inline int
132XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) 118XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index f5271b7b1e84..e74eaa7dd1bc 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -509,6 +509,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf,
509 509
510 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); 510 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
511 log_vector->i_len = sizeof(xfs_qoff_logitem_t); 511 log_vector->i_len = sizeof(xfs_qoff_logitem_t);
512 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF);
512 qf->qql_format.qf_size = 1; 513 qf->qql_format.qf_size = 1;
513} 514}
514 515
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index f665ca8f9e96..efde16e0a913 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -365,16 +365,6 @@ xfs_qm_mount_quotas(
365 int error = 0; 365 int error = 0;
366 uint sbf; 366 uint sbf;
367 367
368 /*
369 * If a file system had quotas running earlier, but decided to
370 * mount without -o uquota/pquota/gquota options, revoke the
371 * quotachecked license, and bail out.
372 */
373 if (! XFS_IS_QUOTA_ON(mp) &&
374 (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) {
375 mp->m_qflags = 0;
376 goto write_changes;
377 }
378 368
379 /* 369 /*
380 * If quotas on realtime volumes is not supported, we disable 370 * If quotas on realtime volumes is not supported, we disable
@@ -388,11 +378,8 @@ xfs_qm_mount_quotas(
388 goto write_changes; 378 goto write_changes;
389 } 379 }
390 380
391#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
392 cmn_err(CE_NOTE, "Attempting to turn on disk quotas.");
393#endif
394
395 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 381 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
382
396 /* 383 /*
397 * Allocate the quotainfo structure inside the mount struct, and 384 * Allocate the quotainfo structure inside the mount struct, and
398 * create quotainode(s), and change/rev superblock if necessary. 385 * create quotainode(s), and change/rev superblock if necessary.
@@ -410,19 +397,14 @@ xfs_qm_mount_quotas(
410 */ 397 */
411 if (XFS_QM_NEED_QUOTACHECK(mp) && 398 if (XFS_QM_NEED_QUOTACHECK(mp) &&
412 !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { 399 !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
413#ifdef DEBUG
414 cmn_err(CE_NOTE, "Doing a quotacheck. Please wait.");
415#endif
416 if ((error = xfs_qm_quotacheck(mp))) { 400 if ((error = xfs_qm_quotacheck(mp))) {
417 /* Quotacheck has failed and quotas have 401 /* Quotacheck has failed and quotas have
418 * been disabled. 402 * been disabled.
419 */ 403 */
420 return XFS_ERROR(error); 404 return XFS_ERROR(error);
421 } 405 }
422#ifdef DEBUG
423 cmn_err(CE_NOTE, "Done quotacheck.");
424#endif
425 } 406 }
407
426 write_changes: 408 write_changes:
427 /* 409 /*
428 * We actually don't have to acquire the SB_LOCK at all. 410 * We actually don't have to acquire the SB_LOCK at all.
@@ -2010,7 +1992,7 @@ xfs_qm_quotacheck(
2010 ASSERT(mp->m_quotainfo != NULL); 1992 ASSERT(mp->m_quotainfo != NULL);
2011 ASSERT(xfs_Gqm != NULL); 1993 ASSERT(xfs_Gqm != NULL);
2012 xfs_qm_destroy_quotainfo(mp); 1994 xfs_qm_destroy_quotainfo(mp);
2013 xfs_mount_reset_sbqflags(mp); 1995 (void)xfs_mount_reset_sbqflags(mp);
2014 } else { 1996 } else {
2015 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); 1997 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
2016 } 1998 }
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index b03eecf3b6cb..0b00b3c67015 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -184,8 +184,6 @@ typedef struct xfs_dquot_acct {
184#define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) 184#define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++)
185#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) 185#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--)
186 186
187extern void xfs_mount_reset_sbqflags(xfs_mount_t *);
188
189extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); 187extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
190extern int xfs_qm_mount_quotas(xfs_mount_t *, int); 188extern int xfs_qm_mount_quotas(xfs_mount_t *, int);
191extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint); 189extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index dc3c37a1e158..8890a18a99d8 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -229,48 +229,6 @@ xfs_qm_syncall(
229 return error; 229 return error;
230} 230}
231 231
232/*
233 * Clear the quotaflags in memory and in the superblock.
234 */
235void
236xfs_mount_reset_sbqflags(
237 xfs_mount_t *mp)
238{
239 xfs_trans_t *tp;
240 unsigned long s;
241
242 mp->m_qflags = 0;
243 /*
244 * It is OK to look at sb_qflags here in mount path,
245 * without SB_LOCK.
246 */
247 if (mp->m_sb.sb_qflags == 0)
248 return;
249 s = XFS_SB_LOCK(mp);
250 mp->m_sb.sb_qflags = 0;
251 XFS_SB_UNLOCK(mp, s);
252
253 /*
254 * if the fs is readonly, let the incore superblock run
255 * with quotas off but don't flush the update out to disk
256 */
257 if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
258 return;
259#ifdef QUOTADEBUG
260 xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
261#endif
262 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
263 if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
264 XFS_DEFAULT_LOG_COUNT)) {
265 xfs_trans_cancel(tp, 0);
266 xfs_fs_cmn_err(CE_ALERT, mp,
267 "xfs_mount_reset_sbqflags: Superblock update failed!");
268 return;
269 }
270 xfs_mod_sb(tp, XFS_SB_QFLAGS);
271 xfs_trans_commit(tp, 0, NULL);
272}
273
274STATIC int 232STATIC int
275xfs_qm_newmount( 233xfs_qm_newmount(
276 xfs_mount_t *mp, 234 xfs_mount_t *mp,
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 68e98962dbef..15e02e8a9d4f 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1053,7 +1053,6 @@ xfs_qm_dqrele_all_inodes(
1053 struct xfs_mount *mp, 1053 struct xfs_mount *mp,
1054 uint flags) 1054 uint flags)
1055{ 1055{
1056 vmap_t vmap;
1057 xfs_inode_t *ip, *topino; 1056 xfs_inode_t *ip, *topino;
1058 uint ireclaims; 1057 uint ireclaims;
1059 vnode_t *vp; 1058 vnode_t *vp;
@@ -1061,8 +1060,8 @@ xfs_qm_dqrele_all_inodes(
1061 1060
1062 ASSERT(mp->m_quotainfo); 1061 ASSERT(mp->m_quotainfo);
1063 1062
1064again:
1065 XFS_MOUNT_ILOCK(mp); 1063 XFS_MOUNT_ILOCK(mp);
1064again:
1066 ip = mp->m_inodes; 1065 ip = mp->m_inodes;
1067 if (ip == NULL) { 1066 if (ip == NULL) {
1068 XFS_MOUNT_IUNLOCK(mp); 1067 XFS_MOUNT_IUNLOCK(mp);
@@ -1090,18 +1089,14 @@ again:
1090 } 1089 }
1091 vnode_refd = B_FALSE; 1090 vnode_refd = B_FALSE;
1092 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { 1091 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
1093 /*
1094 * Sample vp mapping while holding the mplock, lest
1095 * we come across a non-existent vnode.
1096 */
1097 VMAP(vp, vmap);
1098 ireclaims = mp->m_ireclaims; 1092 ireclaims = mp->m_ireclaims;
1099 topino = mp->m_inodes; 1093 topino = mp->m_inodes;
1100 XFS_MOUNT_IUNLOCK(mp); 1094 vp = vn_grab(vp);
1095 if (!vp)
1096 goto again;
1101 1097
1098 XFS_MOUNT_IUNLOCK(mp);
1102 /* XXX restart limit ? */ 1099 /* XXX restart limit ? */
1103 if ( ! (vp = vn_get(vp, &vmap)))
1104 goto again;
1105 xfs_ilock(ip, XFS_ILOCK_EXCL); 1100 xfs_ilock(ip, XFS_ILOCK_EXCL);
1106 vnode_refd = B_TRUE; 1101 vnode_refd = B_TRUE;
1107 } else { 1102 } else {
@@ -1137,7 +1132,6 @@ again:
1137 */ 1132 */
1138 if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { 1133 if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
1139 /* XXX use a sentinel */ 1134 /* XXX use a sentinel */
1140 XFS_MOUNT_IUNLOCK(mp);
1141 goto again; 1135 goto again;
1142 } 1136 }
1143 ip = ip->i_mnext; 1137 ip = ip->i_mnext;
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 4ed7b6928cd7..4e1a5ec22fa3 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -31,6 +31,7 @@
31 */ 31 */
32 32
33#include "debug.h" 33#include "debug.h"
34#include "spin.h"
34 35
35#include <asm/page.h> 36#include <asm/page.h>
36#include <linux/sched.h> 37#include <linux/sched.h>
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 3dae14c8c55a..fa8394f9437d 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -170,7 +170,7 @@ ktrace_enter(
170 void *val14, 170 void *val14,
171 void *val15) 171 void *val15)
172{ 172{
173 static lock_t wrap_lock = SPIN_LOCK_UNLOCKED; 173 static DEFINE_SPINLOCK(wrap_lock);
174 unsigned long flags; 174 unsigned long flags;
175 int index; 175 int index;
176 ktrace_entry_t *ktep; 176 ktrace_entry_t *ktep;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8d01dce8c532..92fd1d67f878 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -85,7 +85,7 @@ xfs_acl_vhasacl_default(
85{ 85{
86 int error; 86 int error;
87 87
88 if (vp->v_type != VDIR) 88 if (!VN_ISDIR(vp))
89 return 0; 89 return 0;
90 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); 90 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
91 return (error == 0); 91 return (error == 0);
@@ -389,7 +389,7 @@ xfs_acl_allow_set(
389 389
390 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) 390 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
391 return EPERM; 391 return EPERM;
392 if (kind == _ACL_TYPE_DEFAULT && vp->v_type != VDIR) 392 if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp))
393 return ENOTDIR; 393 return ENOTDIR;
394 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 394 if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
395 return EROFS; 395 return EROFS;
@@ -750,7 +750,7 @@ xfs_acl_inherit(
750 * If the new file is a directory, its default ACL is a copy of 750 * If the new file is a directory, its default ACL is a copy of
751 * the containing directory's default ACL. 751 * the containing directory's default ACL.
752 */ 752 */
753 if (vp->v_type == VDIR) 753 if (VN_ISDIR(vp))
754 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); 754 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
755 if (!error && !basicperms) 755 if (!error && !basicperms)
756 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); 756 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index ae35189b3d70..5ab0dd885b1b 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -40,22 +40,28 @@
40 40
41#include <asm/byteorder.h> 41#include <asm/byteorder.h>
42 42
43#ifdef __LITTLE_ENDIAN
44# define __BYTE_ORDER __LITTLE_ENDIAN
45#endif
46#ifdef __BIG_ENDIAN 43#ifdef __BIG_ENDIAN
47# define __BYTE_ORDER __BIG_ENDIAN 44#define XFS_NATIVE_HOST 1
45#else
46#undef XFS_NATIVE_HOST
47#endif
48
49#else /* __KERNEL__ */
50
51#if __BYTE_ORDER == __BIG_ENDIAN
52#define XFS_NATIVE_HOST 1
53#else
54#undef XFS_NATIVE_HOST
48#endif 55#endif
49 56
50#endif /* __KERNEL__ */ 57#endif /* __KERNEL__ */
51 58
52/* do we need conversion? */ 59/* do we need conversion? */
53
54#define ARCH_NOCONVERT 1 60#define ARCH_NOCONVERT 1
55#if __BYTE_ORDER == __LITTLE_ENDIAN 61#ifdef XFS_NATIVE_HOST
56# define ARCH_CONVERT 0
57#else
58# define ARCH_CONVERT ARCH_NOCONVERT 62# define ARCH_CONVERT ARCH_NOCONVERT
63#else
64# define ARCH_CONVERT 0
59#endif 65#endif
60 66
61/* generic swapping macros */ 67/* generic swapping macros */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 6f5d283888aa..3e76def1283d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4754,10 +4754,20 @@ xfs_bmapi(
4754 error = xfs_mod_incore_sb(mp, 4754 error = xfs_mod_incore_sb(mp,
4755 XFS_SBS_FDBLOCKS, 4755 XFS_SBS_FDBLOCKS,
4756 -(alen), rsvd); 4756 -(alen), rsvd);
4757 if (!error) 4757 if (!error) {
4758 error = xfs_mod_incore_sb(mp, 4758 error = xfs_mod_incore_sb(mp,
4759 XFS_SBS_FDBLOCKS, 4759 XFS_SBS_FDBLOCKS,
4760 -(indlen), rsvd); 4760 -(indlen), rsvd);
4761 if (error && rt) {
4762 xfs_mod_incore_sb(ip->i_mount,
4763 XFS_SBS_FREXTENTS,
4764 extsz, rsvd);
4765 } else if (error) {
4766 xfs_mod_incore_sb(ip->i_mount,
4767 XFS_SBS_FDBLOCKS,
4768 alen, rsvd);
4769 }
4770 }
4761 4771
4762 if (error) { 4772 if (error) {
4763 if (XFS_IS_QUOTA_ON(ip->i_mount)) 4773 if (XFS_IS_QUOTA_ON(ip->i_mount))
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 09c413576ba8..09a77b17565b 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -2017,7 +2017,7 @@ xfs_bmbt_get_state(
2017 ext_flag); 2017 ext_flag);
2018} 2018}
2019 2019
2020#if __BYTE_ORDER != __BIG_ENDIAN 2020#ifndef XFS_NATIVE_HOST
2021/* Endian flipping versions of the bmbt extraction functions */ 2021/* Endian flipping versions of the bmbt extraction functions */
2022void 2022void
2023xfs_bmbt_disk_get_all( 2023xfs_bmbt_disk_get_all(
@@ -2087,7 +2087,7 @@ xfs_bmbt_disk_get_state(
2087 return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r), 2087 return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r),
2088 ext_flag); 2088 ext_flag);
2089} 2089}
2090#endif 2090#endif /* XFS_NATIVE_HOST */
2091 2091
2092 2092
2093/* 2093/*
@@ -2531,7 +2531,7 @@ xfs_bmbt_set_allf(
2531#endif /* XFS_BIG_BLKNOS */ 2531#endif /* XFS_BIG_BLKNOS */
2532} 2532}
2533 2533
2534#if __BYTE_ORDER != __BIG_ENDIAN 2534#ifndef XFS_NATIVE_HOST
2535/* 2535/*
2536 * Set all the fields in a bmap extent record from the uncompressed form. 2536 * Set all the fields in a bmap extent record from the uncompressed form.
2537 */ 2537 */
@@ -2617,7 +2617,7 @@ xfs_bmbt_disk_set_allf(
2617 } 2617 }
2618#endif /* XFS_BIG_BLKNOS */ 2618#endif /* XFS_BIG_BLKNOS */
2619} 2619}
2620#endif 2620#endif /* XFS_NATIVE_HOST */
2621 2621
2622/* 2622/*
2623 * Set the blockcount field in a bmap extent record. 2623 * Set the blockcount field in a bmap extent record.
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 0a40cf126c28..2cf4fe45cbcb 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -62,7 +62,7 @@ typedef struct xfs_bmdr_block
62 * l1:0-20 are blockcount. 62 * l1:0-20 are blockcount.
63 */ 63 */
64 64
65#if __BYTE_ORDER == __LITTLE_ENDIAN 65#ifndef XFS_NATIVE_HOST
66 66
67#define BMBT_TOTAL_BITLEN 128 /* 128 bits, 16 bytes */ 67#define BMBT_TOTAL_BITLEN 128 /* 128 bits, 16 bytes */
68#define BMBT_EXNTFLAG_BITOFF 0 68#define BMBT_EXNTFLAG_BITOFF 0
@@ -87,7 +87,7 @@ typedef struct xfs_bmdr_block
87#define BMBT_BLOCKCOUNT_BITOFF 64 /* Start of second 64 bit container */ 87#define BMBT_BLOCKCOUNT_BITOFF 64 /* Start of second 64 bit container */
88#define BMBT_BLOCKCOUNT_BITLEN 21 88#define BMBT_BLOCKCOUNT_BITLEN 21
89 89
90#endif 90#endif /* XFS_NATIVE_HOST */
91 91
92 92
93#define BMBT_USE_64 1 93#define BMBT_USE_64 1
@@ -505,7 +505,7 @@ xfs_exntst_t
505xfs_bmbt_get_state( 505xfs_bmbt_get_state(
506 xfs_bmbt_rec_t *r); 506 xfs_bmbt_rec_t *r);
507 507
508#if __BYTE_ORDER != __BIG_ENDIAN 508#ifndef XFS_NATIVE_HOST
509void 509void
510xfs_bmbt_disk_get_all( 510xfs_bmbt_disk_get_all(
511 xfs_bmbt_rec_t *r, 511 xfs_bmbt_rec_t *r,
@@ -538,7 +538,7 @@ xfs_bmbt_disk_get_startoff(
538 xfs_bmbt_get_blockcount(r) 538 xfs_bmbt_get_blockcount(r)
539#define xfs_bmbt_disk_get_startoff(r) \ 539#define xfs_bmbt_disk_get_startoff(r) \
540 xfs_bmbt_get_startoff(r) 540 xfs_bmbt_get_startoff(r)
541#endif 541#endif /* XFS_NATIVE_HOST */
542 542
543int 543int
544xfs_bmbt_increment( 544xfs_bmbt_increment(
@@ -623,7 +623,7 @@ xfs_bmbt_set_state(
623 xfs_bmbt_rec_t *r, 623 xfs_bmbt_rec_t *r,
624 xfs_exntst_t v); 624 xfs_exntst_t v);
625 625
626#if __BYTE_ORDER != __BIG_ENDIAN 626#ifndef XFS_NATIVE_HOST
627void 627void
628xfs_bmbt_disk_set_all( 628xfs_bmbt_disk_set_all(
629 xfs_bmbt_rec_t *r, 629 xfs_bmbt_rec_t *r,
@@ -641,7 +641,7 @@ xfs_bmbt_disk_set_allf(
641 xfs_bmbt_set_all(r, s) 641 xfs_bmbt_set_all(r, s)
642#define xfs_bmbt_disk_set_allf(r, o, b, c, v) \ 642#define xfs_bmbt_disk_set_allf(r, o, b, c, v) \
643 xfs_bmbt_set_allf(r, o, b, c, v) 643 xfs_bmbt_set_allf(r, o, b, c, v)
644#endif 644#endif /* XFS_NATIVE_HOST */
645 645
646void 646void
647xfs_bmbt_to_bmdr( 647xfs_bmbt_to_bmdr(
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 30b8285ad476..a264657acfd9 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -274,6 +274,7 @@ xfs_buf_item_format(
274 ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); 274 ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
275 vecp->i_addr = (xfs_caddr_t)&bip->bli_format; 275 vecp->i_addr = (xfs_caddr_t)&bip->bli_format;
276 vecp->i_len = base_size; 276 vecp->i_len = base_size;
277 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT);
277 vecp++; 278 vecp++;
278 nvecs = 1; 279 nvecs = 1;
279 280
@@ -320,12 +321,14 @@ xfs_buf_item_format(
320 buffer_offset = first_bit * XFS_BLI_CHUNK; 321 buffer_offset = first_bit * XFS_BLI_CHUNK;
321 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 322 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
322 vecp->i_len = nbits * XFS_BLI_CHUNK; 323 vecp->i_len = nbits * XFS_BLI_CHUNK;
324 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
323 nvecs++; 325 nvecs++;
324 break; 326 break;
325 } else if (next_bit != last_bit + 1) { 327 } else if (next_bit != last_bit + 1) {
326 buffer_offset = first_bit * XFS_BLI_CHUNK; 328 buffer_offset = first_bit * XFS_BLI_CHUNK;
327 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 329 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
328 vecp->i_len = nbits * XFS_BLI_CHUNK; 330 vecp->i_len = nbits * XFS_BLI_CHUNK;
331 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
329 nvecs++; 332 nvecs++;
330 vecp++; 333 vecp++;
331 first_bit = next_bit; 334 first_bit = next_bit;
@@ -337,6 +340,7 @@ xfs_buf_item_format(
337 buffer_offset = first_bit * XFS_BLI_CHUNK; 340 buffer_offset = first_bit * XFS_BLI_CHUNK;
338 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 341 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
339 vecp->i_len = nbits * XFS_BLI_CHUNK; 342 vecp->i_len = nbits * XFS_BLI_CHUNK;
343 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
340/* You would think we need to bump the nvecs here too, but we do not 344/* You would think we need to bump the nvecs here too, but we do not
341 * this number is used by recovery, and it gets confused by the boundary 345 * this number is used by recovery, and it gets confused by the boundary
342 * split here 346 * split here
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
index dd423ce1bc8d..480bffc1f29f 100644
--- a/fs/xfs/xfs_dir_leaf.h
+++ b/fs/xfs/xfs_dir_leaf.h
@@ -127,13 +127,13 @@ typedef union {
127 * Watch the order here (endian-ness dependent). 127 * Watch the order here (endian-ness dependent).
128 */ 128 */
129 struct { 129 struct {
130#if __BYTE_ORDER == __LITTLE_ENDIAN 130#ifndef XFS_NATIVE_HOST
131 xfs_dahash_t h; /* hash value */ 131 xfs_dahash_t h; /* hash value */
132 __uint32_t be; /* block and entry */ 132 __uint32_t be; /* block and entry */
133#else /* __BYTE_ORDER == __BIG_ENDIAN */ 133#else
134 __uint32_t be; /* block and entry */ 134 __uint32_t be; /* block and entry */
135 xfs_dahash_t h; /* hash value */ 135 xfs_dahash_t h; /* hash value */
136#endif /* __BYTE_ORDER == __BIG_ENDIAN */ 136#endif /* XFS_NATIVE_HOST */
137 } s; 137 } s;
138} xfs_dircook_t; 138} xfs_dircook_t;
139 139
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 55c17adaaa37..19e872856f6b 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index db7cbd1bc857..cc7d1494a45d 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -107,6 +107,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
107 107
108 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); 108 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format);
109 log_vector->i_len = size; 109 log_vector->i_len = size;
110 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT);
110 ASSERT(size >= sizeof(xfs_efi_log_format_t)); 111 ASSERT(size >= sizeof(xfs_efi_log_format_t));
111} 112}
112 113
@@ -426,6 +427,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp,
426 427
427 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); 428 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format);
428 log_vector->i_len = size; 429 log_vector->i_len = size;
430 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT);
429 ASSERT(size >= sizeof(xfs_efd_log_format_t)); 431 ASSERT(size >= sizeof(xfs_efd_log_format_t));
430} 432}
431 433
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index d3da00045f26..0d9ae8fb4138 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -30,6 +30,8 @@
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ 30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */ 31 */
32 32
33#include <linux/delay.h>
34
33#include "xfs.h" 35#include "xfs.h"
34 36
35#include "xfs_macros.h" 37#include "xfs_macros.h"
@@ -505,17 +507,15 @@ xfs_iget(
505 vnode_t *vp = NULL; 507 vnode_t *vp = NULL;
506 int error; 508 int error;
507 509
508retry:
509 XFS_STATS_INC(xs_ig_attempts); 510 XFS_STATS_INC(xs_ig_attempts);
510 511
512retry:
511 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { 513 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
512 bhv_desc_t *bdp; 514 bhv_desc_t *bdp;
513 xfs_inode_t *ip; 515 xfs_inode_t *ip;
514 int newnode;
515 516
516 vp = LINVFS_GET_VP(inode); 517 vp = LINVFS_GET_VP(inode);
517 if (inode->i_state & I_NEW) { 518 if (inode->i_state & I_NEW) {
518inode_allocate:
519 vn_initialize(inode); 519 vn_initialize(inode);
520 error = xfs_iget_core(vp, mp, tp, ino, flags, 520 error = xfs_iget_core(vp, mp, tp, ino, flags,
521 lock_flags, ipp, bno); 521 lock_flags, ipp, bno);
@@ -526,32 +526,25 @@ inode_allocate:
526 iput(inode); 526 iput(inode);
527 } 527 }
528 } else { 528 } else {
529 /* These are true if the inode is in inactive or 529 /*
530 * reclaim. The linux inode is about to go away, 530 * If the inode is not fully constructed due to
531 * wait for that path to finish, and try again. 531 * filehandle mistmatches wait for the inode to go
532 * away and try again.
533 *
534 * iget_locked will call __wait_on_freeing_inode
535 * to wait for the inode to go away.
532 */ 536 */
533 if (vp->v_flag & (VINACT | VRECLM)) { 537 if (is_bad_inode(inode) ||
534 vn_wait(vp); 538 ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp),
539 &xfs_vnodeops)) == NULL)) {
535 iput(inode); 540 iput(inode);
541 delay(1);
536 goto retry; 542 goto retry;
537 } 543 }
538 544
539 if (is_bad_inode(inode)) {
540 iput(inode);
541 return EIO;
542 }
543
544 bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
545 if (bdp == NULL) {
546 XFS_STATS_INC(xs_ig_dup);
547 goto inode_allocate;
548 }
549 ip = XFS_BHVTOI(bdp); 545 ip = XFS_BHVTOI(bdp);
550 if (lock_flags != 0) 546 if (lock_flags != 0)
551 xfs_ilock(ip, lock_flags); 547 xfs_ilock(ip, lock_flags);
552 newnode = (ip->i_d.di_mode == 0);
553 if (newnode)
554 xfs_iocore_inode_reinit(ip);
555 XFS_STATS_INC(xs_ig_found); 548 XFS_STATS_INC(xs_ig_found);
556 *ipp = ip; 549 *ipp = ip;
557 error = 0; 550 error = 0;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 34bdf5909687..db43308aae93 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1128,7 +1128,6 @@ xfs_ialloc(
1128 ASSERT(ip != NULL); 1128 ASSERT(ip != NULL);
1129 1129
1130 vp = XFS_ITOV(ip); 1130 vp = XFS_ITOV(ip);
1131 vp->v_type = IFTOVT(mode);
1132 ip->i_d.di_mode = (__uint16_t)mode; 1131 ip->i_d.di_mode = (__uint16_t)mode;
1133 ip->i_d.di_onlink = 0; 1132 ip->i_d.di_onlink = 0;
1134 ip->i_d.di_nlink = nlink; 1133 ip->i_d.di_nlink = nlink;
@@ -1250,7 +1249,7 @@ xfs_ialloc(
1250 */ 1249 */
1251 xfs_trans_log_inode(tp, ip, flags); 1250 xfs_trans_log_inode(tp, ip, flags);
1252 1251
1253 /* now that we have a v_type we can set Linux inode ops (& unlock) */ 1252 /* now that we have an i_mode we can set Linux inode ops (& unlock) */
1254 VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); 1253 VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
1255 1254
1256 *ipp = ip; 1255 *ipp = ip;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 0eed30f5cb19..50e2cadf9091 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -248,6 +248,7 @@ xfs_inode_item_format(
248 248
249 vecp->i_addr = (xfs_caddr_t)&iip->ili_format; 249 vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
250 vecp->i_len = sizeof(xfs_inode_log_format_t); 250 vecp->i_len = sizeof(xfs_inode_log_format_t);
251 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT);
251 vecp++; 252 vecp++;
252 nvecs = 1; 253 nvecs = 1;
253 254
@@ -292,6 +293,7 @@ xfs_inode_item_format(
292 293
293 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 294 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
294 vecp->i_len = sizeof(xfs_dinode_core_t); 295 vecp->i_len = sizeof(xfs_dinode_core_t);
296 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
295 vecp++; 297 vecp++;
296 nvecs++; 298 nvecs++;
297 iip->ili_format.ilf_fields |= XFS_ILOG_CORE; 299 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
@@ -339,7 +341,7 @@ xfs_inode_item_format(
339 nrecs = ip->i_df.if_bytes / 341 nrecs = ip->i_df.if_bytes /
340 (uint)sizeof(xfs_bmbt_rec_t); 342 (uint)sizeof(xfs_bmbt_rec_t);
341 ASSERT(nrecs > 0); 343 ASSERT(nrecs > 0);
342#if __BYTE_ORDER == __BIG_ENDIAN 344#ifdef XFS_NATIVE_HOST
343 if (nrecs == ip->i_d.di_nextents) { 345 if (nrecs == ip->i_d.di_nextents) {
344 /* 346 /*
345 * There are no delayed allocation 347 * There are no delayed allocation
@@ -349,6 +351,7 @@ xfs_inode_item_format(
349 vecp->i_addr = 351 vecp->i_addr =
350 (char *)(ip->i_df.if_u1.if_extents); 352 (char *)(ip->i_df.if_u1.if_extents);
351 vecp->i_len = ip->i_df.if_bytes; 353 vecp->i_len = ip->i_df.if_bytes;
354 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
352 } else 355 } else
353#endif 356#endif
354 { 357 {
@@ -367,6 +370,7 @@ xfs_inode_item_format(
367 vecp->i_addr = (xfs_caddr_t)ext_buffer; 370 vecp->i_addr = (xfs_caddr_t)ext_buffer;
368 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 371 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
369 XFS_DATA_FORK); 372 XFS_DATA_FORK);
373 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
370 } 374 }
371 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 375 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
372 iip->ili_format.ilf_dsize = vecp->i_len; 376 iip->ili_format.ilf_dsize = vecp->i_len;
@@ -384,6 +388,7 @@ xfs_inode_item_format(
384 ASSERT(ip->i_df.if_broot != NULL); 388 ASSERT(ip->i_df.if_broot != NULL);
385 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; 389 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
386 vecp->i_len = ip->i_df.if_broot_bytes; 390 vecp->i_len = ip->i_df.if_broot_bytes;
391 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT);
387 vecp++; 392 vecp++;
388 nvecs++; 393 nvecs++;
389 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; 394 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
@@ -409,6 +414,7 @@ xfs_inode_item_format(
409 ASSERT((ip->i_df.if_real_bytes == 0) || 414 ASSERT((ip->i_df.if_real_bytes == 0) ||
410 (ip->i_df.if_real_bytes == data_bytes)); 415 (ip->i_df.if_real_bytes == data_bytes));
411 vecp->i_len = (int)data_bytes; 416 vecp->i_len = (int)data_bytes;
417 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL);
412 vecp++; 418 vecp++;
413 nvecs++; 419 nvecs++;
414 iip->ili_format.ilf_dsize = (unsigned)data_bytes; 420 iip->ili_format.ilf_dsize = (unsigned)data_bytes;
@@ -467,7 +473,7 @@ xfs_inode_item_format(
467#endif 473#endif
468 ASSERT(nrecs > 0); 474 ASSERT(nrecs > 0);
469 ASSERT(nrecs == ip->i_d.di_anextents); 475 ASSERT(nrecs == ip->i_d.di_anextents);
470#if __BYTE_ORDER == __BIG_ENDIAN 476#ifdef XFS_NATIVE_HOST
471 /* 477 /*
472 * There are not delayed allocation extents 478 * There are not delayed allocation extents
473 * for attributes, so just point at the array. 479 * for attributes, so just point at the array.
@@ -486,6 +492,7 @@ xfs_inode_item_format(
486 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 492 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
487 XFS_ATTR_FORK); 493 XFS_ATTR_FORK);
488#endif 494#endif
495 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT);
489 iip->ili_format.ilf_asize = vecp->i_len; 496 iip->ili_format.ilf_asize = vecp->i_len;
490 vecp++; 497 vecp++;
491 nvecs++; 498 nvecs++;
@@ -500,6 +507,7 @@ xfs_inode_item_format(
500 ASSERT(ip->i_afp->if_broot != NULL); 507 ASSERT(ip->i_afp->if_broot != NULL);
501 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; 508 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
502 vecp->i_len = ip->i_afp->if_broot_bytes; 509 vecp->i_len = ip->i_afp->if_broot_bytes;
510 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT);
503 vecp++; 511 vecp++;
504 nvecs++; 512 nvecs++;
505 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; 513 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
@@ -523,6 +531,7 @@ xfs_inode_item_format(
523 ASSERT((ip->i_afp->if_real_bytes == 0) || 531 ASSERT((ip->i_afp->if_real_bytes == 0) ||
524 (ip->i_afp->if_real_bytes == data_bytes)); 532 (ip->i_afp->if_real_bytes == data_bytes));
525 vecp->i_len = (int)data_bytes; 533 vecp->i_len = (int)data_bytes;
534 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL);
526 vecp++; 535 vecp++;
527 nvecs++; 536 nvecs++;
528 iip->ili_format.ilf_asize = (unsigned)data_bytes; 537 iip->ili_format.ilf_asize = (unsigned)data_bytes;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2edd6769e5d3..d0f5be63cddb 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -226,13 +226,12 @@ xfs_iomap(
226 xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count); 226 xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count);
227 lockmode = XFS_LCK_MAP_SHARED(mp, io); 227 lockmode = XFS_LCK_MAP_SHARED(mp, io);
228 bmapi_flags = XFS_BMAPI_ENTIRE; 228 bmapi_flags = XFS_BMAPI_ENTIRE;
229 if (flags & BMAPI_IGNSTATE)
230 bmapi_flags |= XFS_BMAPI_IGSTATE;
231 break; 229 break;
232 case BMAPI_WRITE: 230 case BMAPI_WRITE:
233 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count); 231 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count);
234 lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; 232 lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
235 bmapi_flags = 0; 233 if (flags & BMAPI_IGNSTATE)
234 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
236 XFS_ILOCK(mp, io, lockmode); 235 XFS_ILOCK(mp, io, lockmode);
237 break; 236 break;
238 case BMAPI_ALLOCATE: 237 case BMAPI_ALLOCATE:
@@ -391,9 +390,9 @@ xfs_iomap_write_direct(
391 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp; 390 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp;
392 xfs_bmap_free_t free_list; 391 xfs_bmap_free_t free_list;
393 int aeof; 392 int aeof;
394 xfs_filblks_t datablocks, qblocks, resblks; 393 xfs_filblks_t qblocks, resblks;
395 int committed; 394 int committed;
396 int numrtextents; 395 int resrtextents;
397 396
398 /* 397 /*
399 * Make sure that the dquots are there. This doesn't hold 398 * Make sure that the dquots are there. This doesn't hold
@@ -434,14 +433,14 @@ xfs_iomap_write_direct(
434 433
435 if (!(extsz = ip->i_d.di_extsize)) 434 if (!(extsz = ip->i_d.di_extsize))
436 extsz = mp->m_sb.sb_rextsize; 435 extsz = mp->m_sb.sb_rextsize;
437 numrtextents = qblocks = (count_fsb + extsz - 1); 436 resrtextents = qblocks = (count_fsb + extsz - 1);
438 do_div(numrtextents, mp->m_sb.sb_rextsize); 437 do_div(resrtextents, mp->m_sb.sb_rextsize);
438 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
439 quota_flag = XFS_QMOPT_RES_RTBLKS; 439 quota_flag = XFS_QMOPT_RES_RTBLKS;
440 datablocks = 0;
441 } else { 440 } else {
442 datablocks = qblocks = count_fsb; 441 resrtextents = 0;
442 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);
443 quota_flag = XFS_QMOPT_RES_REGBLKS; 443 quota_flag = XFS_QMOPT_RES_REGBLKS;
444 numrtextents = 0;
445 } 444 }
446 445
447 /* 446 /*
@@ -449,9 +448,8 @@ xfs_iomap_write_direct(
449 */ 448 */
450 xfs_iunlock(ip, XFS_ILOCK_EXCL); 449 xfs_iunlock(ip, XFS_ILOCK_EXCL);
451 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 450 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
452 resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
453 error = xfs_trans_reserve(tp, resblks, 451 error = xfs_trans_reserve(tp, resblks,
454 XFS_WRITE_LOG_RES(mp), numrtextents, 452 XFS_WRITE_LOG_RES(mp), resrtextents,
455 XFS_TRANS_PERM_LOG_RES, 453 XFS_TRANS_PERM_LOG_RES,
456 XFS_WRITE_LOG_COUNT); 454 XFS_WRITE_LOG_COUNT);
457 455
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 1cd2ac163877..54a6f1142403 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -159,11 +159,15 @@ xfs_buftarg_t *xlog_target;
159void 159void
160xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) 160xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
161{ 161{
162 if (! log->l_grant_trace) { 162 unsigned long cnts;
163 log->l_grant_trace = ktrace_alloc(1024, KM_NOSLEEP); 163
164 if (! log->l_grant_trace) 164 if (!log->l_grant_trace) {
165 log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP);
166 if (!log->l_grant_trace)
165 return; 167 return;
166 } 168 }
169 /* ticket counts are 1 byte each */
170 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
167 171
168 ktrace_enter(log->l_grant_trace, 172 ktrace_enter(log->l_grant_trace,
169 (void *)tic, 173 (void *)tic,
@@ -178,10 +182,10 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
178 (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)), 182 (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
179 (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)), 183 (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
180 (void *)string, 184 (void *)string,
181 (void *)((unsigned long)13), 185 (void *)((unsigned long)tic->t_trans_type),
182 (void *)((unsigned long)14), 186 (void *)cnts,
183 (void *)((unsigned long)15), 187 (void *)((unsigned long)tic->t_curr_res),
184 (void *)((unsigned long)16)); 188 (void *)((unsigned long)tic->t_unit_res));
185} 189}
186 190
187void 191void
@@ -274,9 +278,11 @@ xfs_log_done(xfs_mount_t *mp,
274 * Release ticket if not permanent reservation or a specifc 278 * Release ticket if not permanent reservation or a specifc
275 * request has been made to release a permanent reservation. 279 * request has been made to release a permanent reservation.
276 */ 280 */
281 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
277 xlog_ungrant_log_space(log, ticket); 282 xlog_ungrant_log_space(log, ticket);
278 xlog_state_put_ticket(log, ticket); 283 xlog_state_put_ticket(log, ticket);
279 } else { 284 } else {
285 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
280 xlog_regrant_reserve_log_space(log, ticket); 286 xlog_regrant_reserve_log_space(log, ticket);
281 } 287 }
282 288
@@ -399,7 +405,8 @@ xfs_log_reserve(xfs_mount_t *mp,
399 int cnt, 405 int cnt,
400 xfs_log_ticket_t *ticket, 406 xfs_log_ticket_t *ticket,
401 __uint8_t client, 407 __uint8_t client,
402 uint flags) 408 uint flags,
409 uint t_type)
403{ 410{
404 xlog_t *log = mp->m_log; 411 xlog_t *log = mp->m_log;
405 xlog_ticket_t *internal_ticket; 412 xlog_ticket_t *internal_ticket;
@@ -421,13 +428,19 @@ xfs_log_reserve(xfs_mount_t *mp,
421 if (*ticket != NULL) { 428 if (*ticket != NULL) {
422 ASSERT(flags & XFS_LOG_PERM_RESERV); 429 ASSERT(flags & XFS_LOG_PERM_RESERV);
423 internal_ticket = (xlog_ticket_t *)*ticket; 430 internal_ticket = (xlog_ticket_t *)*ticket;
431 xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)");
424 xlog_grant_push_ail(mp, internal_ticket->t_unit_res); 432 xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
425 retval = xlog_regrant_write_log_space(log, internal_ticket); 433 retval = xlog_regrant_write_log_space(log, internal_ticket);
426 } else { 434 } else {
427 /* may sleep if need to allocate more tickets */ 435 /* may sleep if need to allocate more tickets */
428 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, 436 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt,
429 client, flags); 437 client, flags);
438 internal_ticket->t_trans_type = t_type;
430 *ticket = internal_ticket; 439 *ticket = internal_ticket;
440 xlog_trace_loggrant(log, internal_ticket,
441 (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ?
442 "xfs_log_reserve: create new ticket (permanent trans)" :
443 "xfs_log_reserve: create new ticket");
431 xlog_grant_push_ail(mp, 444 xlog_grant_push_ail(mp,
432 (internal_ticket->t_unit_res * 445 (internal_ticket->t_unit_res *
433 internal_ticket->t_cnt)); 446 internal_ticket->t_cnt));
@@ -601,8 +614,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
601 if (! (XLOG_FORCED_SHUTDOWN(log))) { 614 if (! (XLOG_FORCED_SHUTDOWN(log))) {
602 reg[0].i_addr = (void*)&magic; 615 reg[0].i_addr = (void*)&magic;
603 reg[0].i_len = sizeof(magic); 616 reg[0].i_len = sizeof(magic);
617 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT);
604 618
605 error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0); 619 error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0);
606 if (!error) { 620 if (!error) {
607 /* remove inited flag */ 621 /* remove inited flag */
608 ((xlog_ticket_t *)tic)->t_flags = 0; 622 ((xlog_ticket_t *)tic)->t_flags = 0;
@@ -1272,6 +1286,7 @@ xlog_commit_record(xfs_mount_t *mp,
1272 1286
1273 reg[0].i_addr = NULL; 1287 reg[0].i_addr = NULL;
1274 reg[0].i_len = 0; 1288 reg[0].i_len = 0;
1289 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT);
1275 1290
1276 ASSERT_ALWAYS(iclog); 1291 ASSERT_ALWAYS(iclog);
1277 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1292 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
@@ -1605,6 +1620,117 @@ xlog_state_finish_copy(xlog_t *log,
1605 1620
1606 1621
1607/* 1622/*
1623 * print out info relating to regions written which consume
1624 * the reservation
1625 */
1626#if defined(XFS_LOG_RES_DEBUG)
1627STATIC void
1628xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1629{
1630 uint i;
1631 uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
1632
1633 /* match with XLOG_REG_TYPE_* in xfs_log.h */
1634 static char *res_type_str[XLOG_REG_TYPE_MAX] = {
1635 "bformat",
1636 "bchunk",
1637 "efi_format",
1638 "efd_format",
1639 "iformat",
1640 "icore",
1641 "iext",
1642 "ibroot",
1643 "ilocal",
1644 "iattr_ext",
1645 "iattr_broot",
1646 "iattr_local",
1647 "qformat",
1648 "dquot",
1649 "quotaoff",
1650 "LR header",
1651 "unmount",
1652 "commit",
1653 "trans header"
1654 };
1655 static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
1656 "SETATTR_NOT_SIZE",
1657 "SETATTR_SIZE",
1658 "INACTIVE",
1659 "CREATE",
1660 "CREATE_TRUNC",
1661 "TRUNCATE_FILE",
1662 "REMOVE",
1663 "LINK",
1664 "RENAME",
1665 "MKDIR",
1666 "RMDIR",
1667 "SYMLINK",
1668 "SET_DMATTRS",
1669 "GROWFS",
1670 "STRAT_WRITE",
1671 "DIOSTRAT",
1672 "WRITE_SYNC",
1673 "WRITEID",
1674 "ADDAFORK",
1675 "ATTRINVAL",
1676 "ATRUNCATE",
1677 "ATTR_SET",
1678 "ATTR_RM",
1679 "ATTR_FLAG",
1680 "CLEAR_AGI_BUCKET",
1681 "QM_SBCHANGE",
1682 "DUMMY1",
1683 "DUMMY2",
1684 "QM_QUOTAOFF",
1685 "QM_DQALLOC",
1686 "QM_SETQLIM",
1687 "QM_DQCLUSTER",
1688 "QM_QINOCREATE",
1689 "QM_QUOTAOFF_END",
1690 "SB_UNIT",
1691 "FSYNC_TS",
1692 "GROWFSRT_ALLOC",
1693 "GROWFSRT_ZERO",
1694 "GROWFSRT_FREE",
1695 "SWAPEXT"
1696 };
1697
1698 xfs_fs_cmn_err(CE_WARN, mp,
1699 "xfs_log_write: reservation summary:\n"
1700 " trans type = %s (%u)\n"
1701 " unit res = %d bytes\n"
1702 " current res = %d bytes\n"
1703 " total reg = %u bytes (o/flow = %u bytes)\n"
1704 " ophdrs = %u (ophdr space = %u bytes)\n"
1705 " ophdr + reg = %u bytes\n"
1706 " num regions = %u\n",
1707 ((ticket->t_trans_type <= 0 ||
1708 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
1709 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
1710 ticket->t_trans_type,
1711 ticket->t_unit_res,
1712 ticket->t_curr_res,
1713 ticket->t_res_arr_sum, ticket->t_res_o_flow,
1714 ticket->t_res_num_ophdrs, ophdr_spc,
1715 ticket->t_res_arr_sum +
1716 ticket->t_res_o_flow + ophdr_spc,
1717 ticket->t_res_num);
1718
1719 for (i = 0; i < ticket->t_res_num; i++) {
1720 uint r_type = ticket->t_res_arr[i].r_type;
1721 cmn_err(CE_WARN,
1722 "region[%u]: %s - %u bytes\n",
1723 i,
1724 ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
1725 "bad-rtype" : res_type_str[r_type-1]),
1726 ticket->t_res_arr[i].r_len);
1727 }
1728}
1729#else
1730#define xlog_print_tic_res(mp, ticket)
1731#endif
1732
1733/*
1608 * Write some region out to in-core log 1734 * Write some region out to in-core log
1609 * 1735 *
1610 * This will be called when writing externally provided regions or when 1736 * This will be called when writing externally provided regions or when
@@ -1677,16 +1803,21 @@ xlog_write(xfs_mount_t * mp,
1677 * xlog_op_header_t and may need to be double word aligned. 1803 * xlog_op_header_t and may need to be double word aligned.
1678 */ 1804 */
1679 len = 0; 1805 len = 0;
1680 if (ticket->t_flags & XLOG_TIC_INITED) /* acct for start rec of xact */ 1806 if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */
1681 len += sizeof(xlog_op_header_t); 1807 len += sizeof(xlog_op_header_t);
1808 XLOG_TIC_ADD_OPHDR(ticket);
1809 }
1682 1810
1683 for (index = 0; index < nentries; index++) { 1811 for (index = 0; index < nentries; index++) {
1684 len += sizeof(xlog_op_header_t); /* each region gets >= 1 */ 1812 len += sizeof(xlog_op_header_t); /* each region gets >= 1 */
1813 XLOG_TIC_ADD_OPHDR(ticket);
1685 len += reg[index].i_len; 1814 len += reg[index].i_len;
1815 XLOG_TIC_ADD_REGION(ticket, reg[index].i_len, reg[index].i_type);
1686 } 1816 }
1687 contwr = *start_lsn = 0; 1817 contwr = *start_lsn = 0;
1688 1818
1689 if (ticket->t_curr_res < len) { 1819 if (ticket->t_curr_res < len) {
1820 xlog_print_tic_res(mp, ticket);
1690#ifdef DEBUG 1821#ifdef DEBUG
1691 xlog_panic( 1822 xlog_panic(
1692 "xfs_log_write: reservation ran out. Need to up reservation"); 1823 "xfs_log_write: reservation ran out. Need to up reservation");
@@ -1790,6 +1921,7 @@ xlog_write(xfs_mount_t * mp,
1790 len += sizeof(xlog_op_header_t); /* from splitting of region */ 1921 len += sizeof(xlog_op_header_t); /* from splitting of region */
1791 /* account for new log op header */ 1922 /* account for new log op header */
1792 ticket->t_curr_res -= sizeof(xlog_op_header_t); 1923 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1924 XLOG_TIC_ADD_OPHDR(ticket);
1793 } 1925 }
1794 xlog_verify_dest_ptr(log, ptr); 1926 xlog_verify_dest_ptr(log, ptr);
1795 1927
@@ -2282,6 +2414,9 @@ restart:
2282 */ 2414 */
2283 if (log_offset == 0) { 2415 if (log_offset == 0) {
2284 ticket->t_curr_res -= log->l_iclog_hsize; 2416 ticket->t_curr_res -= log->l_iclog_hsize;
2417 XLOG_TIC_ADD_REGION(ticket,
2418 log->l_iclog_hsize,
2419 XLOG_REG_TYPE_LRHEADER);
2285 INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle); 2420 INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle);
2286 ASSIGN_LSN(head->h_lsn, log); 2421 ASSIGN_LSN(head->h_lsn, log);
2287 ASSERT(log->l_curr_block >= 0); 2422 ASSERT(log->l_curr_block >= 0);
@@ -2468,6 +2603,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2468#endif 2603#endif
2469 2604
2470 tic->t_curr_res = tic->t_unit_res; 2605 tic->t_curr_res = tic->t_unit_res;
2606 XLOG_TIC_RESET_RES(tic);
2471 2607
2472 if (tic->t_cnt > 0) 2608 if (tic->t_cnt > 0)
2473 return (0); 2609 return (0);
@@ -2608,6 +2744,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2608 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); 2744 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
2609 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r'); 2745 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
2610 ticket->t_curr_res = ticket->t_unit_res; 2746 ticket->t_curr_res = ticket->t_unit_res;
2747 XLOG_TIC_RESET_RES(ticket);
2611 xlog_trace_loggrant(log, ticket, 2748 xlog_trace_loggrant(log, ticket,
2612 "xlog_regrant_reserve_log_space: sub current res"); 2749 "xlog_regrant_reserve_log_space: sub current res");
2613 xlog_verify_grant_head(log, 1); 2750 xlog_verify_grant_head(log, 1);
@@ -2624,6 +2761,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2624 xlog_verify_grant_head(log, 0); 2761 xlog_verify_grant_head(log, 0);
2625 GRANT_UNLOCK(log, s); 2762 GRANT_UNLOCK(log, s);
2626 ticket->t_curr_res = ticket->t_unit_res; 2763 ticket->t_curr_res = ticket->t_unit_res;
2764 XLOG_TIC_RESET_RES(ticket);
2627} /* xlog_regrant_reserve_log_space */ 2765} /* xlog_regrant_reserve_log_space */
2628 2766
2629 2767
@@ -3179,29 +3317,57 @@ xlog_ticket_get(xlog_t *log,
3179 * and their unit amount is the total amount of space required. 3317 * and their unit amount is the total amount of space required.
3180 * 3318 *
3181 * The following lines of code account for non-transaction data 3319 * The following lines of code account for non-transaction data
3182 * which occupy space in the on-disk log. 3320 * which occupy space in the on-disk log.
3321 *
3322 * Normal form of a transaction is:
3323 * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
3324 * and then there are LR hdrs, split-recs and roundoff at end of syncs.
3325 *
3326 * We need to account for all the leadup data and trailer data
3327 * around the transaction data.
3328 * And then we need to account for the worst case in terms of using
3329 * more space.
3330 * The worst case will happen if:
3331 * - the placement of the transaction happens to be such that the
3332 * roundoff is at its maximum
3333 * - the transaction data is synced before the commit record is synced
3334 * i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
3335 * Therefore the commit record is in its own Log Record.
3336 * This can happen as the commit record is called with its
3337 * own region to xlog_write().
3338 * This then means that in the worst case, roundoff can happen for
3339 * the commit-rec as well.
3340 * The commit-rec is smaller than padding in this scenario and so it is
3341 * not added separately.
3183 */ 3342 */
3184 3343
3344 /* for trans header */
3345 unit_bytes += sizeof(xlog_op_header_t);
3346 unit_bytes += sizeof(xfs_trans_header_t);
3347
3185 /* for start-rec */ 3348 /* for start-rec */
3186 unit_bytes += sizeof(xlog_op_header_t); 3349 unit_bytes += sizeof(xlog_op_header_t);
3350
3351 /* for LR headers */
3352 num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
3353 unit_bytes += log->l_iclog_hsize * num_headers;
3354
3355 /* for commit-rec LR header - note: padding will subsume the ophdr */
3356 unit_bytes += log->l_iclog_hsize;
3357
3358 /* for split-recs - ophdrs added when data split over LRs */
3359 unit_bytes += sizeof(xlog_op_header_t) * num_headers;
3187 3360
3188 /* for padding */ 3361 /* for roundoff padding for transaction data and one for commit record */
3189 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && 3362 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) &&
3190 log->l_mp->m_sb.sb_logsunit > 1) { 3363 log->l_mp->m_sb.sb_logsunit > 1) {
3191 /* log su roundoff */ 3364 /* log su roundoff */
3192 unit_bytes += log->l_mp->m_sb.sb_logsunit; 3365 unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
3193 } else { 3366 } else {
3194 /* BB roundoff */ 3367 /* BB roundoff */
3195 unit_bytes += BBSIZE; 3368 unit_bytes += 2*BBSIZE;
3196 } 3369 }
3197 3370
3198 /* for commit-rec */
3199 unit_bytes += sizeof(xlog_op_header_t);
3200
3201 /* for LR headers */
3202 num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
3203 unit_bytes += log->l_iclog_hsize * num_headers;
3204
3205 tic->t_unit_res = unit_bytes; 3371 tic->t_unit_res = unit_bytes;
3206 tic->t_curr_res = unit_bytes; 3372 tic->t_curr_res = unit_bytes;
3207 tic->t_cnt = cnt; 3373 tic->t_cnt = cnt;
@@ -3209,10 +3375,13 @@ xlog_ticket_get(xlog_t *log,
3209 tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff); 3375 tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff);
3210 tic->t_clientid = client; 3376 tic->t_clientid = client;
3211 tic->t_flags = XLOG_TIC_INITED; 3377 tic->t_flags = XLOG_TIC_INITED;
3378 tic->t_trans_type = 0;
3212 if (xflags & XFS_LOG_PERM_RESERV) 3379 if (xflags & XFS_LOG_PERM_RESERV)
3213 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3380 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3214 sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); 3381 sv_init(&(tic->t_sema), SV_DEFAULT, "logtick");
3215 3382
3383 XLOG_TIC_RESET_RES(tic);
3384
3216 return tic; 3385 return tic;
3217} /* xlog_ticket_get */ 3386} /* xlog_ticket_get */
3218 3387
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 0db122ddda3f..18961119fc65 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -114,9 +114,44 @@ xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
114#define XFS_VOLUME 0x2 114#define XFS_VOLUME 0x2
115#define XFS_LOG 0xaa 115#define XFS_LOG 0xaa
116 116
117
118/* Region types for iovec's i_type */
119#if defined(XFS_LOG_RES_DEBUG)
120#define XLOG_REG_TYPE_BFORMAT 1
121#define XLOG_REG_TYPE_BCHUNK 2
122#define XLOG_REG_TYPE_EFI_FORMAT 3
123#define XLOG_REG_TYPE_EFD_FORMAT 4
124#define XLOG_REG_TYPE_IFORMAT 5
125#define XLOG_REG_TYPE_ICORE 6
126#define XLOG_REG_TYPE_IEXT 7
127#define XLOG_REG_TYPE_IBROOT 8
128#define XLOG_REG_TYPE_ILOCAL 9
129#define XLOG_REG_TYPE_IATTR_EXT 10
130#define XLOG_REG_TYPE_IATTR_BROOT 11
131#define XLOG_REG_TYPE_IATTR_LOCAL 12
132#define XLOG_REG_TYPE_QFORMAT 13
133#define XLOG_REG_TYPE_DQUOT 14
134#define XLOG_REG_TYPE_QUOTAOFF 15
135#define XLOG_REG_TYPE_LRHEADER 16
136#define XLOG_REG_TYPE_UNMOUNT 17
137#define XLOG_REG_TYPE_COMMIT 18
138#define XLOG_REG_TYPE_TRANSHDR 19
139#define XLOG_REG_TYPE_MAX 19
140#endif
141
142#if defined(XFS_LOG_RES_DEBUG)
143#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
144#else
145#define XLOG_VEC_SET_TYPE(vecp, t)
146#endif
147
148
117typedef struct xfs_log_iovec { 149typedef struct xfs_log_iovec {
118 xfs_caddr_t i_addr; /* beginning address of region */ 150 xfs_caddr_t i_addr; /* beginning address of region */
119 int i_len; /* length in bytes of region */ 151 int i_len; /* length in bytes of region */
152#if defined(XFS_LOG_RES_DEBUG)
153 uint i_type; /* type of region */
154#endif
120} xfs_log_iovec_t; 155} xfs_log_iovec_t;
121 156
122typedef void* xfs_log_ticket_t; 157typedef void* xfs_log_ticket_t;
@@ -159,7 +194,8 @@ int xfs_log_reserve(struct xfs_mount *mp,
159 int count, 194 int count,
160 xfs_log_ticket_t *ticket, 195 xfs_log_ticket_t *ticket,
161 __uint8_t clientid, 196 __uint8_t clientid,
162 uint flags); 197 uint flags,
198 uint t_type);
163int xfs_log_write(struct xfs_mount *mp, 199int xfs_log_write(struct xfs_mount *mp,
164 xfs_log_iovec_t region[], 200 xfs_log_iovec_t region[],
165 int nentries, 201 int nentries,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 1a1d452f15f9..a884cea82fca 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -112,7 +112,7 @@ struct xfs_mount;
112 * this has endian issues, of course. 112 * this has endian issues, of course.
113 */ 113 */
114 114
115#if __BYTE_ORDER == __LITTLE_ENDIAN 115#ifndef XFS_NATIVE_HOST
116#define GET_CLIENT_ID(i,arch) \ 116#define GET_CLIENT_ID(i,arch) \
117 ((i) & 0xff) 117 ((i) & 0xff)
118#else 118#else
@@ -335,18 +335,66 @@ typedef __uint32_t xlog_tid_t;
335 335
336#define XLOG_COVER_OPS 5 336#define XLOG_COVER_OPS 5
337 337
338
339/* Ticket reservation region accounting */
340#if defined(XFS_LOG_RES_DEBUG)
341#define XLOG_TIC_LEN_MAX 15
342#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \
343 (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)
344#define XLOG_TIC_ADD_OPHDR(t) ((t)->t_res_num_ophdrs++)
345#define XLOG_TIC_ADD_REGION(t, len, type) \
346 do { \
347 if ((t)->t_res_num == XLOG_TIC_LEN_MAX) { \
348 /* add to overflow and start again */ \
349 (t)->t_res_o_flow += (t)->t_res_arr_sum; \
350 (t)->t_res_num = 0; \
351 (t)->t_res_arr_sum = 0; \
352 } \
353 (t)->t_res_arr[(t)->t_res_num].r_len = (len); \
354 (t)->t_res_arr[(t)->t_res_num].r_type = (type); \
355 (t)->t_res_arr_sum += (len); \
356 (t)->t_res_num++; \
357 } while (0)
358
359/*
360 * Reservation region
361 * As would be stored in xfs_log_iovec but without the i_addr which
362 * we don't care about.
363 */
364typedef struct xlog_res {
365 uint r_len;
366 uint r_type;
367} xlog_res_t;
368#else
369#define XLOG_TIC_RESET_RES(t)
370#define XLOG_TIC_ADD_OPHDR(t)
371#define XLOG_TIC_ADD_REGION(t, len, type)
372#endif
373
374
338typedef struct xlog_ticket { 375typedef struct xlog_ticket {
339 sv_t t_sema; /* sleep on this semaphore :20 */ 376 sv_t t_sema; /* sleep on this semaphore : 20 */
340 struct xlog_ticket *t_next; /* : 4 */ 377 struct xlog_ticket *t_next; /* :4|8 */
341 struct xlog_ticket *t_prev; /* : 4 */ 378 struct xlog_ticket *t_prev; /* :4|8 */
342 xlog_tid_t t_tid; /* transaction identifier : 4 */ 379 xlog_tid_t t_tid; /* transaction identifier : 4 */
343 int t_curr_res; /* current reservation in bytes : 4 */ 380 int t_curr_res; /* current reservation in bytes : 4 */
344 int t_unit_res; /* unit reservation in bytes : 4 */ 381 int t_unit_res; /* unit reservation in bytes : 4 */
345 __uint8_t t_ocnt; /* original count : 1 */ 382 char t_ocnt; /* original count : 1 */
346 __uint8_t t_cnt; /* current count : 1 */ 383 char t_cnt; /* current count : 1 */
347 __uint8_t t_clientid; /* who does this belong to; : 1 */ 384 char t_clientid; /* who does this belong to; : 1 */
348 __uint8_t t_flags; /* properties of reservation : 1 */ 385 char t_flags; /* properties of reservation : 1 */
386 uint t_trans_type; /* transaction type : 4 */
387
388#if defined (XFS_LOG_RES_DEBUG)
389 /* reservation array fields */
390 uint t_res_num; /* num in array : 4 */
391 xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : X */
392 uint t_res_num_ophdrs; /* num op hdrs : 4 */
393 uint t_res_arr_sum; /* array sum : 4 */
394 uint t_res_o_flow; /* sum overflow : 4 */
395#endif
349} xlog_ticket_t; 396} xlog_ticket_t;
397
350#endif 398#endif
351 399
352 400
@@ -366,14 +414,10 @@ typedef struct xlog_op_header {
366#define XLOG_FMT_IRIX_BE 3 414#define XLOG_FMT_IRIX_BE 3
367 415
368/* our fmt */ 416/* our fmt */
369#if __BYTE_ORDER == __LITTLE_ENDIAN 417#ifdef XFS_NATIVE_HOST
370#define XLOG_FMT XLOG_FMT_LINUX_LE
371#else
372#if __BYTE_ORDER == __BIG_ENDIAN
373#define XLOG_FMT XLOG_FMT_LINUX_BE 418#define XLOG_FMT XLOG_FMT_LINUX_BE
374#else 419#else
375#error unknown byte order 420#define XLOG_FMT XLOG_FMT_LINUX_LE
376#endif
377#endif 421#endif
378 422
379typedef struct xlog_rec_header { 423typedef struct xlog_rec_header {
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0aac28ddb81c..14faabaabf29 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1387,7 +1387,7 @@ xlog_recover_add_to_cont_trans(
1387 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 1387 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1388 old_len = item->ri_buf[item->ri_cnt-1].i_len; 1388 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1389 1389
1390 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0); 1390 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u);
1391 memcpy(&ptr[old_len], dp, len); /* d, s, l */ 1391 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1392 item->ri_buf[item->ri_cnt-1].i_len += len; 1392 item->ri_buf[item->ri_cnt-1].i_len += len;
1393 item->ri_buf[item->ri_cnt-1].i_addr = ptr; 1393 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index 4f40c92863d5..a6cd6324e946 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -42,7 +42,8 @@
42#include "xfs_dir2.h" 42#include "xfs_dir2.h"
43#include "xfs_dmapi.h" 43#include "xfs_dmapi.h"
44#include "xfs_mount.h" 44#include "xfs_mount.h"
45 45#include "xfs_quota.h"
46#include "xfs_error.h"
46 47
47STATIC struct xfs_dquot * 48STATIC struct xfs_dquot *
48xfs_dqvopchown_default( 49xfs_dqvopchown_default(
@@ -54,8 +55,79 @@ xfs_dqvopchown_default(
54 return NULL; 55 return NULL;
55} 56}
56 57
58/*
59 * Clear the quotaflags in memory and in the superblock.
60 */
61int
62xfs_mount_reset_sbqflags(xfs_mount_t *mp)
63{
64 int error;
65 xfs_trans_t *tp;
66 unsigned long s;
67
68 mp->m_qflags = 0;
69 /*
70 * It is OK to look at sb_qflags here in mount path,
71 * without SB_LOCK.
72 */
73 if (mp->m_sb.sb_qflags == 0)
74 return 0;
75 s = XFS_SB_LOCK(mp);
76 mp->m_sb.sb_qflags = 0;
77 XFS_SB_UNLOCK(mp, s);
78
79 /*
80 * if the fs is readonly, let the incore superblock run
81 * with quotas off but don't flush the update out to disk
82 */
83 if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
84 return 0;
85#ifdef QUOTADEBUG
86 xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
87#endif
88 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
89 if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
90 XFS_DEFAULT_LOG_COUNT))) {
91 xfs_trans_cancel(tp, 0);
92 xfs_fs_cmn_err(CE_ALERT, mp,
93 "xfs_mount_reset_sbqflags: Superblock update failed!");
94 return error;
95 }
96 xfs_mod_sb(tp, XFS_SB_QFLAGS);
97 error = xfs_trans_commit(tp, 0, NULL);
98 return error;
99}
100
101STATIC int
102xfs_noquota_init(
103 xfs_mount_t *mp,
104 uint *needquotamount,
105 uint *quotaflags)
106{
107 int error = 0;
108
109 *quotaflags = 0;
110 *needquotamount = B_FALSE;
111
112 ASSERT(!XFS_IS_QUOTA_ON(mp));
113
114 /*
115 * If a file system had quotas running earlier, but decided to
116 * mount without -o uquota/pquota/gquota options, revoke the
117 * quotachecked license.
118 */
119 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
120 cmn_err(CE_NOTE,
121 "XFS resetting qflags for filesystem %s",
122 mp->m_fsname);
123
124 error = xfs_mount_reset_sbqflags(mp);
125 }
126 return error;
127}
128
57xfs_qmops_t xfs_qmcore_stub = { 129xfs_qmops_t xfs_qmcore_stub = {
58 .xfs_qminit = (xfs_qminit_t) fs_noerr, 130 .xfs_qminit = (xfs_qminit_t) xfs_noquota_init,
59 .xfs_qmdone = (xfs_qmdone_t) fs_noerr, 131 .xfs_qmdone = (xfs_qmdone_t) fs_noerr,
60 .xfs_qmmount = (xfs_qmmount_t) fs_noerr, 132 .xfs_qmmount = (xfs_qmmount_t) fs_noerr,
61 .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr, 133 .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr,
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 7134576ae7fa..32cb79752d5d 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -160,6 +160,20 @@ typedef struct xfs_qoff_logformat {
160#define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ 160#define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */
161 161
162/* 162/*
163 * Quota Accounting/Enforcement flags
164 */
165#define XFS_ALL_QUOTA_ACCT \
166 (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
167#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
168#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
169
170#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
171#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
172#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
173#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT)
174#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
175
176/*
163 * Incore only flags for quotaoff - these bits get cleared when quota(s) 177 * Incore only flags for quotaoff - these bits get cleared when quota(s)
164 * are in the process of getting turned off. These flags are in m_qflags but 178 * are in the process of getting turned off. These flags are in m_qflags but
165 * never in sb_qflags. 179 * never in sb_qflags.
@@ -362,6 +376,7 @@ typedef struct xfs_dqtrxops {
362 f | XFS_QMOPT_RES_REGBLKS) 376 f | XFS_QMOPT_RES_REGBLKS)
363 377
364extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); 378extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
379extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
365 380
366extern struct bhv_vfsops xfs_qmops; 381extern struct bhv_vfsops xfs_qmops;
367 382
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 06dfca531f79..92efe272b83d 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -276,7 +276,7 @@ xfs_trans_reserve(
276 276
277 error = xfs_log_reserve(tp->t_mountp, logspace, logcount, 277 error = xfs_log_reserve(tp->t_mountp, logspace, logcount,
278 &tp->t_ticket, 278 &tp->t_ticket,
279 XFS_TRANSACTION, log_flags); 279 XFS_TRANSACTION, log_flags, tp->t_type);
280 if (error) { 280 if (error) {
281 goto undo_blocks; 281 goto undo_blocks;
282 } 282 }
@@ -1032,6 +1032,7 @@ xfs_trans_fill_vecs(
1032 tp->t_header.th_num_items = nitems; 1032 tp->t_header.th_num_items = nitems;
1033 log_vector->i_addr = (xfs_caddr_t)&tp->t_header; 1033 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
1034 log_vector->i_len = sizeof(xfs_trans_header_t); 1034 log_vector->i_len = sizeof(xfs_trans_header_t);
1035 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR);
1035} 1036}
1036 1037
1037 1038
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index ec541d66fa2a..a263aec8b3a6 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -112,6 +112,7 @@ typedef struct xfs_trans_header {
112#define XFS_TRANS_GROWFSRT_ZERO 38 112#define XFS_TRANS_GROWFSRT_ZERO 38
113#define XFS_TRANS_GROWFSRT_FREE 39 113#define XFS_TRANS_GROWFSRT_FREE 39
114#define XFS_TRANS_SWAPEXT 40 114#define XFS_TRANS_SWAPEXT 40
115#define XFS_TRANS_TYPE_MAX 40
115/* new transaction types need to be reflected in xfs_logprint(8) */ 116/* new transaction types need to be reflected in xfs_logprint(8) */
116 117
117 118
@@ -998,6 +999,7 @@ struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
998void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); 999void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
999void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); 1000void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
1000void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); 1001void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
1002void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
1001void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); 1003void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
1002void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); 1004void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
1003void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); 1005void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 7bc5eab4c2c1..2a71b4f91bfa 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -379,8 +379,8 @@ xfs_trans_delete_ail(
379 else { 379 else {
380 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 380 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
381 "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL"); 381 "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL");
382 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
383 AIL_UNLOCK(mp, s); 382 AIL_UNLOCK(mp, s);
383 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
384 } 384 }
385 } 385 }
386} 386}
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 144da7a85466..e733293dd7f4 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -714,6 +714,29 @@ xfs_trans_bhold(xfs_trans_t *tp,
714} 714}
715 715
716/* 716/*
717 * Cancel the previous buffer hold request made on this buffer
718 * for this transaction.
719 */
720void
721xfs_trans_bhold_release(xfs_trans_t *tp,
722 xfs_buf_t *bp)
723{
724 xfs_buf_log_item_t *bip;
725
726 ASSERT(XFS_BUF_ISBUSY(bp));
727 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
728 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
729
730 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
731 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
732 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
733 ASSERT(atomic_read(&bip->bli_refcount) > 0);
734 ASSERT(bip->bli_flags & XFS_BLI_HOLD);
735 bip->bli_flags &= ~XFS_BLI_HOLD;
736 xfs_buf_item_trace("BHOLD RELEASE", bip);
737}
738
739/*
717 * This is called to mark bytes first through last inclusive of the given 740 * This is called to mark bytes first through last inclusive of the given
718 * buffer as needing to be logged when the transaction is committed. 741 * buffer as needing to be logged when the transaction is committed.
719 * The buffer must already be associated with the given transaction. 742 * The buffer must already be associated with the given transaction.
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 42bcc0215203..f1a904e23ade 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -795,7 +795,6 @@ xfs_statvfs(
795 xfs_mount_t *mp; 795 xfs_mount_t *mp;
796 xfs_sb_t *sbp; 796 xfs_sb_t *sbp;
797 unsigned long s; 797 unsigned long s;
798 u64 id;
799 798
800 mp = XFS_BHVTOM(bdp); 799 mp = XFS_BHVTOM(bdp);
801 sbp = &(mp->m_sb); 800 sbp = &(mp->m_sb);
@@ -823,9 +822,7 @@ xfs_statvfs(
823 statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); 822 statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
824 XFS_SB_UNLOCK(mp, s); 823 XFS_SB_UNLOCK(mp, s);
825 824
826 id = huge_encode_dev(mp->m_dev); 825 xfs_statvfs_fsid(statp, mp);
827 statp->f_fsid.val[0] = (u32)id;
828 statp->f_fsid.val[1] = (u32)(id >> 32);
829 statp->f_namelen = MAXNAMELEN - 1; 826 statp->f_namelen = MAXNAMELEN - 1;
830 827
831 return 0; 828 return 0;
@@ -906,7 +903,6 @@ xfs_sync_inodes(
906 xfs_inode_t *ip_next; 903 xfs_inode_t *ip_next;
907 xfs_buf_t *bp; 904 xfs_buf_t *bp;
908 vnode_t *vp = NULL; 905 vnode_t *vp = NULL;
909 vmap_t vmap;
910 int error; 906 int error;
911 int last_error; 907 int last_error;
912 uint64_t fflag; 908 uint64_t fflag;
@@ -1101,48 +1097,21 @@ xfs_sync_inodes(
1101 * lock in xfs_ireclaim() after the inode is pulled from 1097 * lock in xfs_ireclaim() after the inode is pulled from
1102 * the mount list will sleep until we release it here. 1098 * the mount list will sleep until we release it here.
1103 * This keeps the vnode from being freed while we reference 1099 * This keeps the vnode from being freed while we reference
1104 * it. It is also cheaper and simpler than actually doing 1100 * it.
1105 * a vn_get() for every inode we touch here.
1106 */ 1101 */
1107 if (xfs_ilock_nowait(ip, lock_flags) == 0) { 1102 if (xfs_ilock_nowait(ip, lock_flags) == 0) {
1108
1109 if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { 1103 if ((flags & SYNC_BDFLUSH) || (vp == NULL)) {
1110 ip = ip->i_mnext; 1104 ip = ip->i_mnext;
1111 continue; 1105 continue;
1112 } 1106 }
1113 1107
1114 /* 1108 vp = vn_grab(vp);
1115 * We need to unlock the inode list lock in order
1116 * to lock the inode. Insert a marker record into
1117 * the inode list to remember our position, dropping
1118 * the lock is now done inside the IPOINTER_INSERT
1119 * macro.
1120 *
1121 * We also use the inode list lock to protect us
1122 * in taking a snapshot of the vnode version number
1123 * for use in calling vn_get().
1124 */
1125 VMAP(vp, vmap);
1126 IPOINTER_INSERT(ip, mp);
1127
1128 vp = vn_get(vp, &vmap);
1129 if (vp == NULL) { 1109 if (vp == NULL) {
1130 /* 1110 ip = ip->i_mnext;
1131 * The vnode was reclaimed once we let go
1132 * of the inode list lock. Skip to the
1133 * next list entry. Remove the marker.
1134 */
1135
1136 XFS_MOUNT_ILOCK(mp);
1137
1138 mount_locked = B_TRUE;
1139 vnode_refed = B_FALSE;
1140
1141 IPOINTER_REMOVE(ip, mp);
1142
1143 continue; 1111 continue;
1144 } 1112 }
1145 1113
1114 IPOINTER_INSERT(ip, mp);
1146 xfs_ilock(ip, lock_flags); 1115 xfs_ilock(ip, lock_flags);
1147 1116
1148 ASSERT(vp == XFS_ITOV(ip)); 1117 ASSERT(vp == XFS_ITOV(ip));
@@ -1533,7 +1502,10 @@ xfs_syncsub(
1533 * eventually kicked out of the cache. 1502 * eventually kicked out of the cache.
1534 */ 1503 */
1535 if (flags & SYNC_REFCACHE) { 1504 if (flags & SYNC_REFCACHE) {
1536 xfs_refcache_purge_some(mp); 1505 if (flags & SYNC_WAIT)
1506 xfs_refcache_purge_mp(mp);
1507 else
1508 xfs_refcache_purge_some(mp);
1537 } 1509 }
1538 1510
1539 /* 1511 /*
@@ -1649,6 +1621,10 @@ xfs_vget(
1649#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ 1621#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */
1650#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ 1622#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */
1651#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ 1623#define MNTOPT_MTPT "mtpt" /* filesystem mount point */
1624#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */
1625#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */
1626#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */
1627#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */
1652#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ 1628#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */
1653#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ 1629#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */
1654#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ 1630#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
@@ -1769,6 +1745,12 @@ xfs_parseargs(
1769 } 1745 }
1770 args->flags |= XFSMNT_IHASHSIZE; 1746 args->flags |= XFSMNT_IHASHSIZE;
1771 args->ihashsize = simple_strtoul(value, &eov, 10); 1747 args->ihashsize = simple_strtoul(value, &eov, 10);
1748 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
1749 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
1750 vfsp->vfs_flag |= VFS_GRPID;
1751 } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
1752 !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
1753 vfsp->vfs_flag &= ~VFS_GRPID;
1772 } else if (!strcmp(this_char, MNTOPT_WSYNC)) { 1754 } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
1773 args->flags |= XFSMNT_WSYNC; 1755 args->flags |= XFSMNT_WSYNC;
1774 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { 1756 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
@@ -1890,6 +1872,7 @@ xfs_showargs(
1890 }; 1872 };
1891 struct proc_xfs_info *xfs_infop; 1873 struct proc_xfs_info *xfs_infop;
1892 struct xfs_mount *mp = XFS_BHVTOM(bhv); 1874 struct xfs_mount *mp = XFS_BHVTOM(bhv);
1875 struct vfs *vfsp = XFS_MTOVFS(mp);
1893 1876
1894 for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { 1877 for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
1895 if (mp->m_flags & xfs_infop->flag) 1878 if (mp->m_flags & xfs_infop->flag)
@@ -1926,7 +1909,10 @@ xfs_showargs(
1926 1909
1927 if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) 1910 if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT))
1928 seq_printf(m, "," MNTOPT_64BITINODE); 1911 seq_printf(m, "," MNTOPT_64BITINODE);
1929 1912
1913 if (vfsp->vfs_flag & VFS_GRPID)
1914 seq_printf(m, "," MNTOPT_GRPID);
1915
1930 return 0; 1916 return 0;
1931} 1917}
1932 1918
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1377c868f3f4..58bfe629b933 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -104,7 +104,7 @@ xfs_open(
104 * If it's a directory with any blocks, read-ahead block 0 104 * If it's a directory with any blocks, read-ahead block 0
105 * as we're almost certain to have the next operation be a read there. 105 * as we're almost certain to have the next operation be a read there.
106 */ 106 */
107 if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) { 107 if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) {
108 mode = xfs_ilock_map_shared(ip); 108 mode = xfs_ilock_map_shared(ip);
109 if (ip->i_d.di_nextents > 0) 109 if (ip->i_d.di_nextents > 0)
110 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); 110 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
@@ -163,18 +163,21 @@ xfs_getattr(
163 /* 163 /*
164 * Copy from in-core inode. 164 * Copy from in-core inode.
165 */ 165 */
166 vap->va_type = vp->v_type; 166 vap->va_mode = ip->i_d.di_mode;
167 vap->va_mode = ip->i_d.di_mode & MODEMASK;
168 vap->va_uid = ip->i_d.di_uid; 167 vap->va_uid = ip->i_d.di_uid;
169 vap->va_gid = ip->i_d.di_gid; 168 vap->va_gid = ip->i_d.di_gid;
170 vap->va_projid = ip->i_d.di_projid; 169 vap->va_projid = ip->i_d.di_projid;
171 170
172 /* 171 /*
173 * Check vnode type block/char vs. everything else. 172 * Check vnode type block/char vs. everything else.
174 * Do it with bitmask because that's faster than looking
175 * for multiple values individually.
176 */ 173 */
177 if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { 174 switch (ip->i_d.di_mode & S_IFMT) {
175 case S_IFBLK:
176 case S_IFCHR:
177 vap->va_rdev = ip->i_df.if_u2.if_rdev;
178 vap->va_blocksize = BLKDEV_IOSIZE;
179 break;
180 default:
178 vap->va_rdev = 0; 181 vap->va_rdev = 0;
179 182
180 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { 183 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
@@ -224,9 +227,7 @@ xfs_getattr(
224 (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : 227 (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
225 (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); 228 (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
226 } 229 }
227 } else { 230 break;
228 vap->va_rdev = ip->i_df.if_u2.if_rdev;
229 vap->va_blocksize = BLKDEV_IOSIZE;
230 } 231 }
231 232
232 vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; 233 vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
@@ -468,7 +469,7 @@ xfs_setattr(
468 m |= S_ISGID; 469 m |= S_ISGID;
469#if 0 470#if 0
470 /* Linux allows this, Irix doesn't. */ 471 /* Linux allows this, Irix doesn't. */
471 if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR) 472 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp))
472 m |= S_ISVTX; 473 m |= S_ISVTX;
473#endif 474#endif
474 if (m && !capable(CAP_FSETID)) 475 if (m && !capable(CAP_FSETID))
@@ -546,10 +547,10 @@ xfs_setattr(
546 goto error_return; 547 goto error_return;
547 } 548 }
548 549
549 if (vp->v_type == VDIR) { 550 if (VN_ISDIR(vp)) {
550 code = XFS_ERROR(EISDIR); 551 code = XFS_ERROR(EISDIR);
551 goto error_return; 552 goto error_return;
552 } else if (vp->v_type != VREG) { 553 } else if (!VN_ISREG(vp)) {
553 code = XFS_ERROR(EINVAL); 554 code = XFS_ERROR(EINVAL);
554 goto error_return; 555 goto error_return;
555 } 556 }
@@ -1567,7 +1568,7 @@ xfs_release(
1567 vp = BHV_TO_VNODE(bdp); 1568 vp = BHV_TO_VNODE(bdp);
1568 ip = XFS_BHVTOI(bdp); 1569 ip = XFS_BHVTOI(bdp);
1569 1570
1570 if ((vp->v_type != VREG) || (ip->i_d.di_mode == 0)) { 1571 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) {
1571 return 0; 1572 return 0;
1572 } 1573 }
1573 1574
@@ -1895,7 +1896,7 @@ xfs_create(
1895 dp = XFS_BHVTOI(dir_bdp); 1896 dp = XFS_BHVTOI(dir_bdp);
1896 mp = dp->i_mount; 1897 mp = dp->i_mount;
1897 1898
1898 dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); 1899 dm_di_mode = vap->va_mode;
1899 namelen = VNAMELEN(dentry); 1900 namelen = VNAMELEN(dentry);
1900 1901
1901 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 1902 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
@@ -1973,8 +1974,7 @@ xfs_create(
1973 (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) 1974 (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen)))
1974 goto error_return; 1975 goto error_return;
1975 rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; 1976 rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0;
1976 error = xfs_dir_ialloc(&tp, dp, 1977 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1,
1977 MAKEIMODE(vap->va_type,vap->va_mode), 1,
1978 rdev, credp, prid, resblks > 0, 1978 rdev, credp, prid, resblks > 0,
1979 &ip, &committed); 1979 &ip, &committed);
1980 if (error) { 1980 if (error) {
@@ -2620,7 +2620,7 @@ xfs_link(
2620 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); 2620 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address);
2621 2621
2622 target_namelen = VNAMELEN(dentry); 2622 target_namelen = VNAMELEN(dentry);
2623 if (src_vp->v_type == VDIR) 2623 if (VN_ISDIR(src_vp))
2624 return XFS_ERROR(EPERM); 2624 return XFS_ERROR(EPERM);
2625 2625
2626 src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); 2626 src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
@@ -2805,7 +2805,7 @@ xfs_mkdir(
2805 2805
2806 tp = NULL; 2806 tp = NULL;
2807 dp_joined_to_trans = B_FALSE; 2807 dp_joined_to_trans = B_FALSE;
2808 dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); 2808 dm_di_mode = vap->va_mode;
2809 2809
2810 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 2810 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
2811 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 2811 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
@@ -2879,8 +2879,7 @@ xfs_mkdir(
2879 /* 2879 /*
2880 * create the directory inode. 2880 * create the directory inode.
2881 */ 2881 */
2882 error = xfs_dir_ialloc(&tp, dp, 2882 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2,
2883 MAKEIMODE(vap->va_type,vap->va_mode), 2,
2884 0, credp, prid, resblks > 0, 2883 0, credp, prid, resblks > 0,
2885 &cdp, NULL); 2884 &cdp, NULL);
2886 if (error) { 2885 if (error) {
@@ -3650,7 +3649,7 @@ xfs_rwlock(
3650 vnode_t *vp; 3649 vnode_t *vp;
3651 3650
3652 vp = BHV_TO_VNODE(bdp); 3651 vp = BHV_TO_VNODE(bdp);
3653 if (vp->v_type == VDIR) 3652 if (VN_ISDIR(vp))
3654 return 1; 3653 return 1;
3655 ip = XFS_BHVTOI(bdp); 3654 ip = XFS_BHVTOI(bdp);
3656 if (locktype == VRWLOCK_WRITE) { 3655 if (locktype == VRWLOCK_WRITE) {
@@ -3681,7 +3680,7 @@ xfs_rwunlock(
3681 vnode_t *vp; 3680 vnode_t *vp;
3682 3681
3683 vp = BHV_TO_VNODE(bdp); 3682 vp = BHV_TO_VNODE(bdp);
3684 if (vp->v_type == VDIR) 3683 if (VN_ISDIR(vp))
3685 return; 3684 return;
3686 ip = XFS_BHVTOI(bdp); 3685 ip = XFS_BHVTOI(bdp);
3687 if (locktype == VRWLOCK_WRITE) { 3686 if (locktype == VRWLOCK_WRITE) {
@@ -3847,51 +3846,10 @@ xfs_reclaim(
3847 return 0; 3846 return 0;
3848 } 3847 }
3849 3848
3850 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 3849 vn_iowait(vp);
3851 if (ip->i_d.di_size > 0) {
3852 /*
3853 * Flush and invalidate any data left around that is
3854 * a part of this file.
3855 *
3856 * Get the inode's i/o lock so that buffers are pushed
3857 * out while holding the proper lock. We can't hold
3858 * the inode lock here since flushing out buffers may
3859 * cause us to try to get the lock in xfs_strategy().
3860 *
3861 * We don't have to call remapf() here, because there
3862 * cannot be any mapped file references to this vnode
3863 * since it is being reclaimed.
3864 */
3865 xfs_ilock(ip, XFS_IOLOCK_EXCL);
3866
3867 /*
3868 * If we hit an IO error, we need to make sure that the
3869 * buffer and page caches of file data for
3870 * the file are tossed away. We don't want to use
3871 * VOP_FLUSHINVAL_PAGES here because we don't want dirty
3872 * pages to stay attached to the vnode, but be
3873 * marked P_BAD. pdflush/vnode_pagebad
3874 * hates that.
3875 */
3876 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
3877 VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE);
3878 } else {
3879 VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
3880 }
3881 3850
3882 ASSERT(VN_CACHED(vp) == 0); 3851 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
3883 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || 3852 ASSERT(VN_CACHED(vp) == 0);
3884 ip->i_delayed_blks == 0);
3885 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3886 } else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
3887 /*
3888 * di_size field may not be quite accurate if we're
3889 * shutting down.
3890 */
3891 VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
3892 ASSERT(VN_CACHED(vp) == 0);
3893 }
3894 }
3895 3853
3896 /* If we have nothing to flush with this inode then complete the 3854 /* If we have nothing to flush with this inode then complete the
3897 * teardown now, otherwise break the link between the xfs inode 3855 * teardown now, otherwise break the link between the xfs inode
@@ -4567,7 +4525,7 @@ xfs_change_file_space(
4567 /* 4525 /*
4568 * must be a regular file and have write permission 4526 * must be a regular file and have write permission
4569 */ 4527 */
4570 if (vp->v_type != VREG) 4528 if (!VN_ISREG(vp))
4571 return XFS_ERROR(EINVAL); 4529 return XFS_ERROR(EINVAL);
4572 4530
4573 xfs_ilock(ip, XFS_ILOCK_SHARED); 4531 xfs_ilock(ip, XFS_ILOCK_SHARED);