aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/9p.c359
-rw-r--r--fs/9p/9p.h341
-rw-r--r--fs/9p/Makefile17
-rw-r--r--fs/9p/conv.c693
-rw-r--r--fs/9p/conv.h36
-rw-r--r--fs/9p/debug.h70
-rw-r--r--fs/9p/error.c93
-rw-r--r--fs/9p/error.h178
-rw-r--r--fs/9p/fid.c241
-rw-r--r--fs/9p/fid.h57
-rw-r--r--fs/9p/mux.c475
-rw-r--r--fs/9p/mux.h41
-rw-r--r--fs/9p/trans_fd.c172
-rw-r--r--fs/9p/trans_sock.c290
-rw-r--r--fs/9p/transport.h46
-rw-r--r--fs/9p/v9fs.c452
-rw-r--r--fs/9p/v9fs.h103
-rw-r--r--fs/9p/v9fs_vfs.h53
-rw-r--r--fs/9p/vfs_dentry.c126
-rw-r--r--fs/9p/vfs_dir.c226
-rw-r--r--fs/9p/vfs_file.c401
-rw-r--r--fs/9p/vfs_inode.c1338
-rw-r--r--fs/9p/vfs_super.c280
-rw-r--r--fs/Kconfig24
-rw-r--r--fs/Makefile2
-rw-r--r--fs/affs/inode.c1
-rw-r--r--fs/aio.c34
-rw-r--r--fs/autofs/autofs_i.h3
-rw-r--r--fs/autofs/dirhash.c5
-rw-r--r--fs/autofs/inode.c3
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/dir.c25
-rw-r--r--fs/bfs/file.c23
-rw-r--r--fs/bfs/inode.c104
-rw-r--r--fs/bio.c2
-rw-r--r--fs/compat.c6
-rw-r--r--fs/compat_ioctl.c7
-rw-r--r--fs/exec.c8
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/xattr.h8
-rw-r--r--fs/ext2/xattr_security.c22
-rw-r--r--fs/ext3/ialloc.c5
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/xattr.h11
-rw-r--r--fs/ext3/xattr_security.c22
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/fcntl.c60
-rw-r--r--fs/file.c387
-rw-r--r--fs/file_table.c40
-rw-r--r--fs/fuse/Makefile7
-rw-r--r--fs/fuse/dev.c877
-rw-r--r--fs/fuse/dir.c982
-rw-r--r--fs/fuse/file.c555
-rw-r--r--fs/fuse/fuse_i.h451
-rw-r--r--fs/fuse/inode.c591
-rw-r--r--fs/hostfs/hostfs_kern.c1
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/inode.c12
-rw-r--r--fs/jffs/inode-v23.c1
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/locks.c8
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/namei.c26
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/ntfs/ChangeLog70
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/aops.c293
-rw-r--r--fs/ntfs/attrib.c125
-rw-r--r--fs/ntfs/attrib.h2
-rw-r--r--fs/ntfs/compress.c8
-rw-r--r--fs/ntfs/dir.c3
-rw-r--r--fs/ntfs/file.c9
-rw-r--r--fs/ntfs/index.c1
-rw-r--r--fs/ntfs/inode.c227
-rw-r--r--fs/ntfs/lcnalloc.c39
-rw-r--r--fs/ntfs/lcnalloc.h21
-rw-r--r--fs/ntfs/logfile.c251
-rw-r--r--fs/ntfs/logfile.h8
-rw-r--r--fs/ntfs/malloc.h48
-rw-r--r--fs/ntfs/mft.c4
-rw-r--r--fs/ntfs/runlist.c374
-rw-r--r--fs/ntfs/runlist.h3
-rw-r--r--fs/ntfs/super.c16
-rw-r--r--fs/ntfs/unistr.c3
-rw-r--r--fs/open.c43
-rw-r--r--fs/proc/array.c5
-rw-r--r--fs/proc/base.c33
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/select.c23
-rw-r--r--fs/smbfs/inode.c1
-rw-r--r--fs/sysv/inode.c1
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/xfs/Makefile-linux-2.615
-rw-r--r--fs/xfs/support/ktrace.c2
99 files changed, 11299 insertions, 760 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c
new file mode 100644
index 000000000000..e847f504a47c
--- /dev/null
+++ b/fs/9p/9p.c
@@ -0,0 +1,359 @@
1/*
2 * linux/fs/9p/9p.c
3 *
4 * This file contains functions 9P2000 functions
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/config.h>
28#include <linux/module.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/idr.h>
32
33#include "debug.h"
34#include "v9fs.h"
35#include "9p.h"
36#include "mux.h"
37
38/**
39 * v9fs_t_version - negotiate protocol parameters with sever
40 * @v9ses: 9P2000 session information
41 * @msize: requested max size packet
42 * @version: requested version.extension string
43 * @fcall: pointer to response fcall pointer
44 *
45 */
46
47int
48v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
49 char *version, struct v9fs_fcall **fcall)
50{
51 struct v9fs_fcall msg;
52
53 dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
54 msg.id = TVERSION;
55 msg.params.tversion.msize = msize;
56 msg.params.tversion.version = version;
57
58 return v9fs_mux_rpc(v9ses, &msg, fcall);
59}
60
61/**
62 * v9fs_t_attach - mount the server
63 * @v9ses: 9P2000 session information
64 * @uname: user name doing the attach
65 * @aname: remote name being attached to
66 * @fid: mount fid to attatch to root node
67 * @afid: authentication fid (in this case result key)
68 * @fcall: pointer to response fcall pointer
69 *
70 */
71
72int
73v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
74 u32 fid, u32 afid, struct v9fs_fcall **fcall)
75{
76 struct v9fs_fcall msg;
77
78 dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
79 aname, fid, afid);
80 msg.id = TATTACH;
81 msg.params.tattach.fid = fid;
82 msg.params.tattach.afid = afid;
83 msg.params.tattach.uname = uname;
84 msg.params.tattach.aname = aname;
85
86 return v9fs_mux_rpc(v9ses, &msg, fcall);
87}
88
89/**
90 * v9fs_t_clunk - release a fid (finish a transaction)
91 * @v9ses: 9P2000 session information
92 * @fid: fid to release
93 * @fcall: pointer to response fcall pointer
94 *
95 */
96
97int
98v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
99 struct v9fs_fcall **fcall)
100{
101 struct v9fs_fcall msg;
102
103 dprintk(DEBUG_9P, "fid %d\n", fid);
104 msg.id = TCLUNK;
105 msg.params.tclunk.fid = fid;
106
107 return v9fs_mux_rpc(v9ses, &msg, fcall);
108}
109
110/**
111 * v9fs_v9fs_t_flush - flush a pending transaction
112 * @v9ses: 9P2000 session information
113 * @tag: tid to release
114 *
115 */
116
117int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
118{
119 struct v9fs_fcall msg;
120
121 dprintk(DEBUG_9P, "oldtag %d\n", tag);
122 msg.id = TFLUSH;
123 msg.params.tflush.oldtag = tag;
124 return v9fs_mux_rpc(v9ses, &msg, NULL);
125}
126
127/**
128 * v9fs_t_stat - read a file's meta-data
129 * @v9ses: 9P2000 session information
130 * @fid: fid pointing to file or directory to get info about
131 * @fcall: pointer to response fcall
132 *
133 */
134
135int
136v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
137{
138 struct v9fs_fcall msg;
139
140 dprintk(DEBUG_9P, "fid %d\n", fid);
141 if (fcall)
142 *fcall = NULL;
143
144 msg.id = TSTAT;
145 msg.params.tstat.fid = fid;
146 return v9fs_mux_rpc(v9ses, &msg, fcall);
147}
148
149/**
150 * v9fs_t_wstat - write a file's meta-data
151 * @v9ses: 9P2000 session information
152 * @fid: fid pointing to file or directory to write info about
153 * @stat: metadata
154 * @fcall: pointer to response fcall
155 *
156 */
157
158int
159v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
160 struct v9fs_stat *stat, struct v9fs_fcall **fcall)
161{
162 struct v9fs_fcall msg;
163
164 dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length);
165 msg.id = TWSTAT;
166 msg.params.twstat.fid = fid;
167 msg.params.twstat.stat = stat;
168
169 return v9fs_mux_rpc(v9ses, &msg, fcall);
170}
171
172/**
173 * v9fs_t_walk - walk a fid to a new file or directory
174 * @v9ses: 9P2000 session information
175 * @fid: fid to walk
176 * @newfid: new fid (for clone operations)
177 * @name: path to walk fid to
178 * @fcall: pointer to response fcall
179 *
180 */
181
182/* TODO: support multiple walk */
183
184int
185v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
186 char *name, struct v9fs_fcall **fcall)
187{
188 struct v9fs_fcall msg;
189
190 dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
191 msg.id = TWALK;
192 msg.params.twalk.fid = fid;
193 msg.params.twalk.newfid = newfid;
194
195 if (name) {
196 msg.params.twalk.nwname = 1;
197 msg.params.twalk.wnames = &name;
198 } else {
199 msg.params.twalk.nwname = 0;
200 }
201
202 return v9fs_mux_rpc(v9ses, &msg, fcall);
203}
204
205/**
206 * v9fs_t_open - open a file
207 *
208 * @v9ses - 9P2000 session information
209 * @fid - fid to open
210 * @mode - mode to open file (R, RW, etc)
211 * @fcall - pointer to response fcall
212 *
213 */
214
215int
216v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
217 struct v9fs_fcall **fcall)
218{
219 struct v9fs_fcall msg;
220 long errorno = -1;
221
222 dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
223 msg.id = TOPEN;
224 msg.params.topen.fid = fid;
225 msg.params.topen.mode = mode;
226
227 errorno = v9fs_mux_rpc(v9ses, &msg, fcall);
228
229 return errorno;
230}
231
232/**
233 * v9fs_t_remove - remove a file or directory
234 * @v9ses: 9P2000 session information
235 * @fid: fid to remove
236 * @fcall: pointer to response fcall
237 *
238 */
239
240int
241v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
242 struct v9fs_fcall **fcall)
243{
244 struct v9fs_fcall msg;
245
246 dprintk(DEBUG_9P, "fid %d\n", fid);
247 msg.id = TREMOVE;
248 msg.params.tremove.fid = fid;
249 return v9fs_mux_rpc(v9ses, &msg, fcall);
250}
251
252/**
253 * v9fs_t_create - create a file or directory
254 * @v9ses: 9P2000 session information
255 * @fid: fid to create
256 * @name: name of the file or directory to create
257 * @perm: permissions to create with
258 * @mode: mode to open file (R, RW, etc)
259 * @fcall: pointer to response fcall
260 *
261 */
262
263int
264v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
265 u32 perm, u8 mode, struct v9fs_fcall **fcall)
266{
267 struct v9fs_fcall msg;
268
269 dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
270 fid, name, perm, mode);
271
272 msg.id = TCREATE;
273 msg.params.tcreate.fid = fid;
274 msg.params.tcreate.name = name;
275 msg.params.tcreate.perm = perm;
276 msg.params.tcreate.mode = mode;
277
278 return v9fs_mux_rpc(v9ses, &msg, fcall);
279}
280
281/**
282 * v9fs_t_read - read data
283 * @v9ses: 9P2000 session information
284 * @fid: fid to read from
285 * @offset: offset to start read at
286 * @count: how many bytes to read
287 * @fcall: pointer to response fcall (with data)
288 *
289 */
290
291int
292v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
293 u32 count, struct v9fs_fcall **fcall)
294{
295 struct v9fs_fcall msg;
296 struct v9fs_fcall *rc = NULL;
297 long errorno = -1;
298
299 dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid,
300 (long unsigned int)offset, count);
301 msg.id = TREAD;
302 msg.params.tread.fid = fid;
303 msg.params.tread.offset = offset;
304 msg.params.tread.count = count;
305 errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
306
307 if (!errorno) {
308 errorno = rc->params.rread.count;
309 dump_data(rc->params.rread.data, rc->params.rread.count);
310 }
311
312 if (fcall)
313 *fcall = rc;
314 else
315 kfree(rc);
316
317 return errorno;
318}
319
320/**
321 * v9fs_t_write - write data
322 * @v9ses: 9P2000 session information
323 * @fid: fid to write to
324 * @offset: offset to start write at
325 * @count: how many bytes to write
326 * @fcall: pointer to response fcall
327 *
328 */
329
330int
331v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid,
332 u64 offset, u32 count, void *data, struct v9fs_fcall **fcall)
333{
334 struct v9fs_fcall msg;
335 struct v9fs_fcall *rc = NULL;
336 long errorno = -1;
337
338 dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid,
339 (unsigned long long)offset, count);
340 dump_data(data, count);
341
342 msg.id = TWRITE;
343 msg.params.twrite.fid = fid;
344 msg.params.twrite.offset = offset;
345 msg.params.twrite.count = count;
346 msg.params.twrite.data = data;
347
348 errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
349
350 if (!errorno)
351 errorno = rc->params.rwrite.count;
352
353 if (fcall)
354 *fcall = rc;
355 else
356 kfree(rc);
357
358 return errorno;
359}
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
new file mode 100644
index 000000000000..f55424216be2
--- /dev/null
+++ b/fs/9p/9p.h
@@ -0,0 +1,341 @@
1/*
2 * linux/fs/9p/9p.h
3 *
4 * 9P protocol definitions.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27/* Message Types */
28enum {
29 TVERSION = 100,
30 RVERSION,
31 TAUTH = 102,
32 RAUTH,
33 TATTACH = 104,
34 RATTACH,
35 TERROR = 106,
36 RERROR,
37 TFLUSH = 108,
38 RFLUSH,
39 TWALK = 110,
40 RWALK,
41 TOPEN = 112,
42 ROPEN,
43 TCREATE = 114,
44 RCREATE,
45 TREAD = 116,
46 RREAD,
47 TWRITE = 118,
48 RWRITE,
49 TCLUNK = 120,
50 RCLUNK,
51 TREMOVE = 122,
52 RREMOVE,
53 TSTAT = 124,
54 RSTAT,
55 TWSTAT = 126,
56 RWSTAT,
57};
58
59/* modes */
60enum {
61 V9FS_OREAD = 0x00,
62 V9FS_OWRITE = 0x01,
63 V9FS_ORDWR = 0x02,
64 V9FS_OEXEC = 0x03,
65 V9FS_OEXCL = 0x04,
66 V9FS_OTRUNC = 0x10,
67 V9FS_OREXEC = 0x20,
68 V9FS_ORCLOSE = 0x40,
69 V9FS_OAPPEND = 0x80,
70};
71
72/* permissions */
73enum {
74 V9FS_DMDIR = 0x80000000,
75 V9FS_DMAPPEND = 0x40000000,
76 V9FS_DMEXCL = 0x20000000,
77 V9FS_DMMOUNT = 0x10000000,
78 V9FS_DMAUTH = 0x08000000,
79 V9FS_DMTMP = 0x04000000,
80 V9FS_DMSYMLINK = 0x02000000,
81 V9FS_DMLINK = 0x01000000,
82 /* 9P2000.u extensions */
83 V9FS_DMDEVICE = 0x00800000,
84 V9FS_DMNAMEDPIPE = 0x00200000,
85 V9FS_DMSOCKET = 0x00100000,
86 V9FS_DMSETUID = 0x00080000,
87 V9FS_DMSETGID = 0x00040000,
88};
89
90/* qid.types */
91enum {
92 V9FS_QTDIR = 0x80,
93 V9FS_QTAPPEND = 0x40,
94 V9FS_QTEXCL = 0x20,
95 V9FS_QTMOUNT = 0x10,
96 V9FS_QTAUTH = 0x08,
97 V9FS_QTTMP = 0x04,
98 V9FS_QTSYMLINK = 0x02,
99 V9FS_QTLINK = 0x01,
100 V9FS_QTFILE = 0x00,
101};
102
103/* ample room for Twrite/Rread header (iounit) */
104#define V9FS_IOHDRSZ 24
105
106/* qids are the unique ID for a file (like an inode */
107struct v9fs_qid {
108 u8 type;
109 u32 version;
110 u64 path;
111};
112
113/* Plan 9 file metadata (stat) structure */
114struct v9fs_stat {
115 u16 size;
116 u16 type;
117 u32 dev;
118 struct v9fs_qid qid;
119 u32 mode;
120 u32 atime;
121 u32 mtime;
122 u64 length;
123 char *name;
124 char *uid;
125 char *gid;
126 char *muid;
127 char *extension; /* 9p2000.u extensions */
128 u32 n_uid; /* 9p2000.u extensions */
129 u32 n_gid; /* 9p2000.u extensions */
130 u32 n_muid; /* 9p2000.u extensions */
131 char data[0];
132};
133
134/* Structures for Protocol Operations */
135
136struct Tversion {
137 u32 msize;
138 char *version;
139};
140
141struct Rversion {
142 u32 msize;
143 char *version;
144};
145
146struct Tauth {
147 u32 afid;
148 char *uname;
149 char *aname;
150};
151
152struct Rauth {
153 struct v9fs_qid qid;
154};
155
156struct Rerror {
157 char *error;
158 u32 errno; /* 9p2000.u extension */
159};
160
161struct Tflush {
162 u32 oldtag;
163};
164
165struct Rflush {
166};
167
168struct Tattach {
169 u32 fid;
170 u32 afid;
171 char *uname;
172 char *aname;
173};
174
175struct Rattach {
176 struct v9fs_qid qid;
177};
178
179struct Twalk {
180 u32 fid;
181 u32 newfid;
182 u32 nwname;
183 char **wnames;
184};
185
186struct Rwalk {
187 u32 nwqid;
188 struct v9fs_qid *wqids;
189};
190
191struct Topen {
192 u32 fid;
193 u8 mode;
194};
195
196struct Ropen {
197 struct v9fs_qid qid;
198 u32 iounit;
199};
200
201struct Tcreate {
202 u32 fid;
203 char *name;
204 u32 perm;
205 u8 mode;
206};
207
208struct Rcreate {
209 struct v9fs_qid qid;
210 u32 iounit;
211};
212
213struct Tread {
214 u32 fid;
215 u64 offset;
216 u32 count;
217};
218
219struct Rread {
220 u32 count;
221 u8 *data;
222};
223
224struct Twrite {
225 u32 fid;
226 u64 offset;
227 u32 count;
228 u8 *data;
229};
230
231struct Rwrite {
232 u32 count;
233};
234
235struct Tclunk {
236 u32 fid;
237};
238
239struct Rclunk {
240};
241
242struct Tremove {
243 u32 fid;
244};
245
246struct Rremove {
247};
248
249struct Tstat {
250 u32 fid;
251};
252
253struct Rstat {
254 struct v9fs_stat *stat;
255};
256
257struct Twstat {
258 u32 fid;
259 struct v9fs_stat *stat;
260};
261
262struct Rwstat {
263};
264
265/*
266 * fcall is the primary packet structure
267 *
268 */
269
270struct v9fs_fcall {
271 u32 size;
272 u8 id;
273 u16 tag;
274
275 union {
276 struct Tversion tversion;
277 struct Rversion rversion;
278 struct Tauth tauth;
279 struct Rauth rauth;
280 struct Rerror rerror;
281 struct Tflush tflush;
282 struct Rflush rflush;
283 struct Tattach tattach;
284 struct Rattach rattach;
285 struct Twalk twalk;
286 struct Rwalk rwalk;
287 struct Topen topen;
288 struct Ropen ropen;
289 struct Tcreate tcreate;
290 struct Rcreate rcreate;
291 struct Tread tread;
292 struct Rread rread;
293 struct Twrite twrite;
294 struct Rwrite rwrite;
295 struct Tclunk tclunk;
296 struct Rclunk rclunk;
297 struct Tremove tremove;
298 struct Rremove rremove;
299 struct Tstat tstat;
300 struct Rstat rstat;
301 struct Twstat twstat;
302 struct Rwstat rwstat;
303 } params;
304};
305
306#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "")
307
308int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
309 char *version, struct v9fs_fcall **rcall);
310
311int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
312 u32 fid, u32 afid, struct v9fs_fcall **rcall);
313
314int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
315 struct v9fs_fcall **rcall);
316
317int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
318
319int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
320 struct v9fs_fcall **rcall);
321
322int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
323 struct v9fs_stat *stat, struct v9fs_fcall **rcall);
324
325int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
326 char *name, struct v9fs_fcall **rcall);
327
328int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
329 struct v9fs_fcall **rcall);
330
331int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
332 struct v9fs_fcall **rcall);
333
334int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
335 u32 perm, u8 mode, struct v9fs_fcall **rcall);
336
337int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
338 u64 offset, u32 count, struct v9fs_fcall **rcall);
339
340int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
341 u32 count, void *data, struct v9fs_fcall **rcall);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
new file mode 100644
index 000000000000..e4e4ffe5a7dc
--- /dev/null
+++ b/fs/9p/Makefile
@@ -0,0 +1,17 @@
1obj-$(CONFIG_9P_FS) := 9p2000.o
2
39p2000-objs := \
4 vfs_super.o \
5 vfs_inode.o \
6 vfs_file.o \
7 vfs_dir.o \
8 vfs_dentry.o \
9 error.o \
10 mux.o \
11 trans_fd.o \
12 trans_sock.o \
13 9p.o \
14 conv.o \
15 v9fs.o \
16 fid.o
17
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
new file mode 100644
index 000000000000..1554731bd653
--- /dev/null
+++ b/fs/9p/conv.c
@@ -0,0 +1,693 @@
1/*
2 * linux/fs/9p/conv.c
3 *
4 * 9P protocol conversion functions
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/config.h>
28#include <linux/module.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/idr.h>
32
33#include "debug.h"
34#include "v9fs.h"
35#include "9p.h"
36#include "conv.h"
37
38/*
39 * Buffer to help with string parsing
40 */
41struct cbuf {
42 unsigned char *sp;
43 unsigned char *p;
44 unsigned char *ep;
45};
46
47static inline void buf_init(struct cbuf *buf, void *data, int datalen)
48{
49 buf->sp = buf->p = data;
50 buf->ep = data + datalen;
51}
52
53static inline int buf_check_overflow(struct cbuf *buf)
54{
55 return buf->p > buf->ep;
56}
57
58static inline void buf_check_size(struct cbuf *buf, int len)
59{
60 if (buf->p+len > buf->ep) {
61 if (buf->p < buf->ep) {
62 eprintk(KERN_ERR, "buffer overflow\n");
63 buf->p = buf->ep + 1;
64 }
65 }
66}
67
68static inline void *buf_alloc(struct cbuf *buf, int len)
69{
70 void *ret = NULL;
71
72 buf_check_size(buf, len);
73 ret = buf->p;
74 buf->p += len;
75
76 return ret;
77}
78
79static inline void buf_put_int8(struct cbuf *buf, u8 val)
80{
81 buf_check_size(buf, 1);
82
83 buf->p[0] = val;
84 buf->p++;
85}
86
87static inline void buf_put_int16(struct cbuf *buf, u16 val)
88{
89 buf_check_size(buf, 2);
90
91 *(__le16 *) buf->p = cpu_to_le16(val);
92 buf->p += 2;
93}
94
95static inline void buf_put_int32(struct cbuf *buf, u32 val)
96{
97 buf_check_size(buf, 4);
98
99 *(__le32 *)buf->p = cpu_to_le32(val);
100 buf->p += 4;
101}
102
103static inline void buf_put_int64(struct cbuf *buf, u64 val)
104{
105 buf_check_size(buf, 8);
106
107 *(__le64 *)buf->p = cpu_to_le64(val);
108 buf->p += 8;
109}
110
111static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
112{
113 buf_check_size(buf, slen + 2);
114
115 buf_put_int16(buf, slen);
116 memcpy(buf->p, s, slen);
117 buf->p += slen;
118}
119
120static inline void buf_put_string(struct cbuf *buf, const char *s)
121{
122 buf_put_stringn(buf, s, strlen(s));
123}
124
125static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
126{
127 buf_check_size(buf, datalen);
128
129 memcpy(buf->p, data, datalen);
130 buf->p += datalen;
131}
132
133static inline u8 buf_get_int8(struct cbuf *buf)
134{
135 u8 ret = 0;
136
137 buf_check_size(buf, 1);
138 ret = buf->p[0];
139
140 buf->p++;
141
142 return ret;
143}
144
145static inline u16 buf_get_int16(struct cbuf *buf)
146{
147 u16 ret = 0;
148
149 buf_check_size(buf, 2);
150 ret = le16_to_cpu(*(__le16 *)buf->p);
151
152 buf->p += 2;
153
154 return ret;
155}
156
157static inline u32 buf_get_int32(struct cbuf *buf)
158{
159 u32 ret = 0;
160
161 buf_check_size(buf, 4);
162 ret = le32_to_cpu(*(__le32 *)buf->p);
163
164 buf->p += 4;
165
166 return ret;
167}
168
169static inline u64 buf_get_int64(struct cbuf *buf)
170{
171 u64 ret = 0;
172
173 buf_check_size(buf, 8);
174 ret = le64_to_cpu(*(__le64 *)buf->p);
175
176 buf->p += 8;
177
178 return ret;
179}
180
181static inline int
182buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
183{
184
185 u16 len = buf_get_int16(buf);
186 buf_check_size(buf, len);
187 if (len + 1 > datalen)
188 return 0;
189
190 memcpy(data, buf->p, len);
191 data[len] = 0;
192 buf->p += len;
193
194 return len + 1;
195}
196
197static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
198{
199 char *ret = NULL;
200 int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p);
201
202 if (n > 0) {
203 ret = sbuf->p;
204 sbuf->p += n;
205 }
206
207 return ret;
208}
209
210static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
211{
212 buf_check_size(buf, datalen);
213
214 memcpy(data, buf->p, datalen);
215 buf->p += datalen;
216
217 return datalen;
218}
219
220static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
221 int datalen)
222{
223 char *ret = NULL;
224 int n = 0;
225
226 buf_check_size(dbuf, datalen);
227
228 n = buf_get_data(buf, dbuf->p, datalen);
229
230 if (n > 0) {
231 ret = dbuf->p;
232 dbuf->p += n;
233 }
234
235 return ret;
236}
237
238/**
239 * v9fs_size_stat - calculate the size of a variable length stat struct
240 * @v9ses: session information
241 * @stat: metadata (stat) structure
242 *
243 */
244
245static int v9fs_size_stat(struct v9fs_session_info *v9ses,
246 struct v9fs_stat *stat)
247{
248 int size = 0;
249
250 if (stat == NULL) {
251 eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
252 return 0;
253 }
254
255 size = /* 2 + *//* size[2] */
256 2 + /* type[2] */
257 4 + /* dev[4] */
258 1 + /* qid.type[1] */
259 4 + /* qid.vers[4] */
260 8 + /* qid.path[8] */
261 4 + /* mode[4] */
262 4 + /* atime[4] */
263 4 + /* mtime[4] */
264 8 + /* length[8] */
265 8; /* minimum sum of string lengths */
266
267 if (stat->name)
268 size += strlen(stat->name);
269 if (stat->uid)
270 size += strlen(stat->uid);
271 if (stat->gid)
272 size += strlen(stat->gid);
273 if (stat->muid)
274 size += strlen(stat->muid);
275
276 if (v9ses->extended) {
277 size += 4 + /* n_uid[4] */
278 4 + /* n_gid[4] */
279 4 + /* n_muid[4] */
280 2; /* string length of extension[4] */
281 if (stat->extension)
282 size += strlen(stat->extension);
283 }
284
285 return size;
286}
287
288/**
289 * serialize_stat - safely format a stat structure for transmission
290 * @v9ses: session info
291 * @stat: metadata (stat) structure
292 * @bufp: buffer to serialize structure into
293 *
294 */
295
296static int
297serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
298 struct cbuf *bufp)
299{
300 buf_put_int16(bufp, stat->size);
301 buf_put_int16(bufp, stat->type);
302 buf_put_int32(bufp, stat->dev);
303 buf_put_int8(bufp, stat->qid.type);
304 buf_put_int32(bufp, stat->qid.version);
305 buf_put_int64(bufp, stat->qid.path);
306 buf_put_int32(bufp, stat->mode);
307 buf_put_int32(bufp, stat->atime);
308 buf_put_int32(bufp, stat->mtime);
309 buf_put_int64(bufp, stat->length);
310
311 buf_put_string(bufp, stat->name);
312 buf_put_string(bufp, stat->uid);
313 buf_put_string(bufp, stat->gid);
314 buf_put_string(bufp, stat->muid);
315
316 if (v9ses->extended) {
317 buf_put_string(bufp, stat->extension);
318 buf_put_int32(bufp, stat->n_uid);
319 buf_put_int32(bufp, stat->n_gid);
320 buf_put_int32(bufp, stat->n_muid);
321 }
322
323 if (buf_check_overflow(bufp))
324 return 0;
325
326 return stat->size;
327}
328
329/**
330 * deserialize_stat - safely decode a recieved metadata (stat) structure
331 * @v9ses: session info
332 * @bufp: buffer to deserialize
333 * @stat: metadata (stat) structure
334 * @dbufp: buffer to deserialize variable strings into
335 *
336 */
337
338static inline int
339deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
340 struct v9fs_stat *stat, struct cbuf *dbufp)
341{
342
343 stat->size = buf_get_int16(bufp);
344 stat->type = buf_get_int16(bufp);
345 stat->dev = buf_get_int32(bufp);
346 stat->qid.type = buf_get_int8(bufp);
347 stat->qid.version = buf_get_int32(bufp);
348 stat->qid.path = buf_get_int64(bufp);
349 stat->mode = buf_get_int32(bufp);
350 stat->atime = buf_get_int32(bufp);
351 stat->mtime = buf_get_int32(bufp);
352 stat->length = buf_get_int64(bufp);
353 stat->name = buf_get_stringb(bufp, dbufp);
354 stat->uid = buf_get_stringb(bufp, dbufp);
355 stat->gid = buf_get_stringb(bufp, dbufp);
356 stat->muid = buf_get_stringb(bufp, dbufp);
357
358 if (v9ses->extended) {
359 stat->extension = buf_get_stringb(bufp, dbufp);
360 stat->n_uid = buf_get_int32(bufp);
361 stat->n_gid = buf_get_int32(bufp);
362 stat->n_muid = buf_get_int32(bufp);
363 }
364
365 if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
366 return 0;
367
368 return stat->size + 2;
369}
370
371/**
372 * deserialize_statb - wrapper for decoding a received metadata structure
373 * @v9ses: session info
374 * @bufp: buffer to deserialize
375 * @dbufp: buffer to deserialize variable strings into
376 *
377 */
378
379static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
380 *v9ses, struct cbuf *bufp,
381 struct cbuf *dbufp)
382{
383 struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
384
385 if (ret) {
386 int n = deserialize_stat(v9ses, bufp, ret, dbufp);
387 if (n <= 0)
388 return NULL;
389 }
390
391 return ret;
392}
393
394/**
395 * v9fs_deserialize_stat - decode a received metadata structure
396 * @v9ses: session info
397 * @buf: buffer to deserialize
398 * @buflen: length of received buffer
399 * @stat: metadata structure to decode into
400 * @statlen: length of destination metadata structure
401 *
402 */
403
404int
405v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf,
406 u32 buflen, struct v9fs_stat *stat, u32 statlen)
407{
408 struct cbuf buffer;
409 struct cbuf *bufp = &buffer;
410 struct cbuf dbuffer;
411 struct cbuf *dbufp = &dbuffer;
412
413 buf_init(bufp, buf, buflen);
414 buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat),
415 statlen - sizeof(struct v9fs_stat));
416
417 return deserialize_stat(v9ses, bufp, stat, dbufp);
418}
419
420static inline int
421v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
422{
423 int size = 4 + 1 + 2; /* size[4] msg[1] tag[2] */
424 int i = 0;
425
426 switch (fcall->id) {
427 default:
428 eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
429 return 0;
430 case TVERSION: /* msize[4] version[s] */
431 size += 4 + 2 + strlen(fcall->params.tversion.version);
432 break;
433 case TAUTH: /* afid[4] uname[s] aname[s] */
434 size += 4 + 2 + strlen(fcall->params.tauth.uname) +
435 2 + strlen(fcall->params.tauth.aname);
436 break;
437 case TFLUSH: /* oldtag[2] */
438 size += 2;
439 break;
440 case TATTACH: /* fid[4] afid[4] uname[s] aname[s] */
441 size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
442 2 + strlen(fcall->params.tattach.aname);
443 break;
444 case TWALK: /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
445 size += 4 + 4 + 2;
446 /* now compute total for the array of names */
447 for (i = 0; i < fcall->params.twalk.nwname; i++)
448 size += 2 + strlen(fcall->params.twalk.wnames[i]);
449 break;
450 case TOPEN: /* fid[4] mode[1] */
451 size += 4 + 1;
452 break;
453 case TCREATE: /* fid[4] name[s] perm[4] mode[1] */
454 size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
455 break;
456 case TREAD: /* fid[4] offset[8] count[4] */
457 size += 4 + 8 + 4;
458 break;
459 case TWRITE: /* fid[4] offset[8] count[4] data[count] */
460 size += 4 + 8 + 4 + fcall->params.twrite.count;
461 break;
462 case TCLUNK: /* fid[4] */
463 size += 4;
464 break;
465 case TREMOVE: /* fid[4] */
466 size += 4;
467 break;
468 case TSTAT: /* fid[4] */
469 size += 4;
470 break;
471 case TWSTAT: /* fid[4] stat[n] */
472 fcall->params.twstat.stat->size =
473 v9fs_size_stat(v9ses, fcall->params.twstat.stat);
474 size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
475 }
476 return size;
477}
478
479/*
480 * v9fs_serialize_fcall - marshall fcall struct into a packet
481 * @v9ses: session information
482 * @fcall: structure to convert
483 * @data: buffer to serialize fcall into
484 * @datalen: length of buffer to serialize fcall into
485 *
486 */
487
488int
489v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
490 void *data, u32 datalen)
491{
492 int i = 0;
493 struct v9fs_stat *stat = NULL;
494 struct cbuf buffer;
495 struct cbuf *bufp = &buffer;
496
497 buf_init(bufp, data, datalen);
498
499 if (!fcall) {
500 eprintk(KERN_ERR, "no fcall\n");
501 return -EINVAL;
502 }
503
504 fcall->size = v9fs_size_fcall(v9ses, fcall);
505
506 buf_put_int32(bufp, fcall->size);
507 buf_put_int8(bufp, fcall->id);
508 buf_put_int16(bufp, fcall->tag);
509
510 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
511 fcall->tag);
512
513 /* now encode it */
514 switch (fcall->id) {
515 default:
516 eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
517 return -EPROTO;
518 case TVERSION:
519 buf_put_int32(bufp, fcall->params.tversion.msize);
520 buf_put_string(bufp, fcall->params.tversion.version);
521 break;
522 case TAUTH:
523 buf_put_int32(bufp, fcall->params.tauth.afid);
524 buf_put_string(bufp, fcall->params.tauth.uname);
525 buf_put_string(bufp, fcall->params.tauth.aname);
526 break;
527 case TFLUSH:
528 buf_put_int16(bufp, fcall->params.tflush.oldtag);
529 break;
530 case TATTACH:
531 buf_put_int32(bufp, fcall->params.tattach.fid);
532 buf_put_int32(bufp, fcall->params.tattach.afid);
533 buf_put_string(bufp, fcall->params.tattach.uname);
534 buf_put_string(bufp, fcall->params.tattach.aname);
535 break;
536 case TWALK:
537 buf_put_int32(bufp, fcall->params.twalk.fid);
538 buf_put_int32(bufp, fcall->params.twalk.newfid);
539 buf_put_int16(bufp, fcall->params.twalk.nwname);
540 for (i = 0; i < fcall->params.twalk.nwname; i++)
541 buf_put_string(bufp, fcall->params.twalk.wnames[i]);
542 break;
543 case TOPEN:
544 buf_put_int32(bufp, fcall->params.topen.fid);
545 buf_put_int8(bufp, fcall->params.topen.mode);
546 break;
547 case TCREATE:
548 buf_put_int32(bufp, fcall->params.tcreate.fid);
549 buf_put_string(bufp, fcall->params.tcreate.name);
550 buf_put_int32(bufp, fcall->params.tcreate.perm);
551 buf_put_int8(bufp, fcall->params.tcreate.mode);
552 break;
553 case TREAD:
554 buf_put_int32(bufp, fcall->params.tread.fid);
555 buf_put_int64(bufp, fcall->params.tread.offset);
556 buf_put_int32(bufp, fcall->params.tread.count);
557 break;
558 case TWRITE:
559 buf_put_int32(bufp, fcall->params.twrite.fid);
560 buf_put_int64(bufp, fcall->params.twrite.offset);
561 buf_put_int32(bufp, fcall->params.twrite.count);
562 buf_put_data(bufp, fcall->params.twrite.data,
563 fcall->params.twrite.count);
564 break;
565 case TCLUNK:
566 buf_put_int32(bufp, fcall->params.tclunk.fid);
567 break;
568 case TREMOVE:
569 buf_put_int32(bufp, fcall->params.tremove.fid);
570 break;
571 case TSTAT:
572 buf_put_int32(bufp, fcall->params.tstat.fid);
573 break;
574 case TWSTAT:
575 buf_put_int32(bufp, fcall->params.twstat.fid);
576 stat = fcall->params.twstat.stat;
577
578 buf_put_int16(bufp, stat->size + 2);
579 serialize_stat(v9ses, stat, bufp);
580 break;
581 }
582
583 if (buf_check_overflow(bufp))
584 return -EIO;
585
586 return fcall->size;
587}
588
589/**
590 * deserialize_fcall - unmarshal a response
591 * @v9ses: session information
592 * @msgsize: size of rcall message
593 * @buf: recieved buffer
594 * @buflen: length of received buffer
595 * @rcall: fcall structure to populate
596 * @rcalllen: length of fcall structure to populate
597 *
598 */
599
600int
601v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
602 void *buf, u32 buflen, struct v9fs_fcall *rcall,
603 int rcalllen)
604{
605
606 struct cbuf buffer;
607 struct cbuf *bufp = &buffer;
608 struct cbuf dbuffer;
609 struct cbuf *dbufp = &dbuffer;
610 int i = 0;
611
612 buf_init(bufp, buf, buflen);
613 buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
614 rcalllen - sizeof(struct v9fs_fcall));
615
616 rcall->size = msgsize;
617 rcall->id = buf_get_int8(bufp);
618 rcall->tag = buf_get_int16(bufp);
619
620 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
621 rcall->tag);
622 switch (rcall->id) {
623 default:
624 eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
625 return -EPROTO;
626 case RVERSION:
627 rcall->params.rversion.msize = buf_get_int32(bufp);
628 rcall->params.rversion.version = buf_get_stringb(bufp, dbufp);
629 break;
630 case RFLUSH:
631 break;
632 case RATTACH:
633 rcall->params.rattach.qid.type = buf_get_int8(bufp);
634 rcall->params.rattach.qid.version = buf_get_int32(bufp);
635 rcall->params.rattach.qid.path = buf_get_int64(bufp);
636 break;
637 case RWALK:
638 rcall->params.rwalk.nwqid = buf_get_int16(bufp);
639 rcall->params.rwalk.wqids = buf_alloc(bufp,
640 rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
641 if (rcall->params.rwalk.wqids)
642 for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
643 rcall->params.rwalk.wqids[i].type =
644 buf_get_int8(bufp);
645 rcall->params.rwalk.wqids[i].version =
646 buf_get_int16(bufp);
647 rcall->params.rwalk.wqids[i].path =
648 buf_get_int64(bufp);
649 }
650 break;
651 case ROPEN:
652 rcall->params.ropen.qid.type = buf_get_int8(bufp);
653 rcall->params.ropen.qid.version = buf_get_int32(bufp);
654 rcall->params.ropen.qid.path = buf_get_int64(bufp);
655 rcall->params.ropen.iounit = buf_get_int32(bufp);
656 break;
657 case RCREATE:
658 rcall->params.rcreate.qid.type = buf_get_int8(bufp);
659 rcall->params.rcreate.qid.version = buf_get_int32(bufp);
660 rcall->params.rcreate.qid.path = buf_get_int64(bufp);
661 rcall->params.rcreate.iounit = buf_get_int32(bufp);
662 break;
663 case RREAD:
664 rcall->params.rread.count = buf_get_int32(bufp);
665 rcall->params.rread.data = buf_get_datab(bufp, dbufp,
666 rcall->params.rread.count);
667 break;
668 case RWRITE:
669 rcall->params.rwrite.count = buf_get_int32(bufp);
670 break;
671 case RCLUNK:
672 break;
673 case RREMOVE:
674 break;
675 case RSTAT:
676 buf_get_int16(bufp);
677 rcall->params.rstat.stat =
678 deserialize_statb(v9ses, bufp, dbufp);
679 break;
680 case RWSTAT:
681 break;
682 case RERROR:
683 rcall->params.rerror.error = buf_get_stringb(bufp, dbufp);
684 if (v9ses->extended)
685 rcall->params.rerror.errno = buf_get_int16(bufp);
686 break;
687 }
688
689 if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
690 return -EIO;
691
692 return rcall->size;
693}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
new file mode 100644
index 000000000000..ee849613c61a
--- /dev/null
+++ b/fs/9p/conv.h
@@ -0,0 +1,36 @@
1/*
2 * linux/fs/9p/conv.h
3 *
4 * 9P protocol conversion definitions
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf,
28 u32 buflen, struct v9fs_stat *stat, u32 statlen);
29int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall,
30 void *buf, u32 buflen);
31int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
32 void *buf, u32 buflen, struct v9fs_fcall *rcall,
33 int rcalllen);
34
35/* this one is actually in error.c right now */
36int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
new file mode 100644
index 000000000000..4445f06919d9
--- /dev/null
+++ b/fs/9p/debug.h
@@ -0,0 +1,70 @@
1/*
2 * linux/fs/9p/debug.h - V9FS Debug Definitions
3 *
4 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
5 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to:
19 * Free Software Foundation
20 * 51 Franklin Street, Fifth Floor
21 * Boston, MA 02111-1301 USA
22 *
23 */
24
25#define DEBUG_ERROR (1<<0)
26#define DEBUG_CURRENT (1<<1)
27#define DEBUG_9P (1<<2)
28#define DEBUG_VFS (1<<3)
29#define DEBUG_CONV (1<<4)
30#define DEBUG_MUX (1<<5)
31#define DEBUG_TRANS (1<<6)
32#define DEBUG_SLABS (1<<7)
33
34#define DEBUG_DUMP_PKT 0
35
36extern int v9fs_debug_level;
37
38#define dprintk(level, format, arg...) \
39do { \
40 if((v9fs_debug_level & level)==level) \
41 printk(KERN_NOTICE "-- %s (%d): " \
42 format , __FUNCTION__, current->pid , ## arg); \
43} while(0)
44
45#define eprintk(level, format, arg...) \
46do { \
47 printk(level "v9fs: %s (%d): " \
48 format , __FUNCTION__, current->pid , ## arg); \
49} while(0)
50
51#if DEBUG_DUMP_PKT
52static inline void dump_data(const unsigned char *data, unsigned int datalen)
53{
54 int i, j;
55 int len = datalen;
56
57 printk(KERN_DEBUG "data ");
58 for (i = 0; i < len; i += 4) {
59 for (j = 0; (j < 4) && (i + j < len); j++)
60 printk(KERN_DEBUG "%02x", data[i + j]);
61 printk(KERN_DEBUG " ");
62 }
63 printk(KERN_DEBUG "\n");
64}
65#else /* DEBUG_DUMP_PKT */
66static inline void dump_data(const unsigned char *data, unsigned int datalen)
67{
68
69}
70#endif /* DEBUG_DUMP_PKT */
diff --git a/fs/9p/error.c b/fs/9p/error.c
new file mode 100644
index 000000000000..fee5d19179c5
--- /dev/null
+++ b/fs/9p/error.c
@@ -0,0 +1,93 @@
1/*
2 * linux/fs/9p/error.c
3 *
4 * Error string handling
5 *
6 * Plan 9 uses error strings, Unix uses error numbers. These functions
7 * try to help manage that and provide for dynamically adding error
8 * mappings.
9 *
10 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
11 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to:
25 * Free Software Foundation
26 * 51 Franklin Street, Fifth Floor
27 * Boston, MA 02111-1301 USA
28 *
29 */
30
31#include <linux/config.h>
32#include <linux/module.h>
33
34#include <linux/list.h>
35#include <linux/jhash.h>
36
37#include "debug.h"
38#include "error.h"
39
40/**
41 * v9fs_error_init - preload
42 * @errstr: error string
43 *
44 */
45
46int v9fs_error_init(void)
47{
48 struct errormap *c;
49 int bucket;
50
51 /* initialize hash table */
52 for (bucket = 0; bucket < ERRHASHSZ; bucket++)
53 INIT_HLIST_HEAD(&hash_errmap[bucket]);
54
55 /* load initial error map into hash table */
56 for (c = errmap; c->name != NULL; c++) {
57 bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ;
58 INIT_HLIST_NODE(&c->list);
59 hlist_add_head(&c->list, &hash_errmap[bucket]);
60 }
61
62 return 1;
63}
64
65/**
66 * errstr2errno - convert error string to error number
67 * @errstr: error string
68 *
69 */
70
71int v9fs_errstr2errno(char *errstr)
72{
73 int errno = 0;
74 struct hlist_node *p = NULL;
75 struct errormap *c = NULL;
76 int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ;
77
78 hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
79 if (!strcmp(c->name, errstr)) {
80 errno = c->val;
81 break;
82 }
83 }
84
85 if (errno == 0) {
86 /* TODO: if error isn't found, add it dynamically */
87 printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
88 errstr);
89 errno = 1;
90 }
91
92 return -errno;
93}
diff --git a/fs/9p/error.h b/fs/9p/error.h
new file mode 100644
index 000000000000..78f89acf7c9a
--- /dev/null
+++ b/fs/9p/error.h
@@ -0,0 +1,178 @@
1/*
2 * linux/fs/9p/error.h
3 *
4 * Huge Nasty Error Table
5 *
6 * Plan 9 uses error strings, Unix uses error numbers. This table tries to
7 * match UNIX strings and Plan 9 strings to unix error numbers. It is used
8 * to preload the dynamic error table which can also track user-specific error
9 * strings.
10 *
11 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
12 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to:
26 * Free Software Foundation
27 * 51 Franklin Street, Fifth Floor
28 * Boston, MA 02111-1301 USA
29 *
30 */
31
32#include <linux/errno.h>
33#include <asm/errno.h>
34
35struct errormap {
36 char *name;
37 int val;
38
39 struct hlist_node list;
40};
41
42#define ERRHASHSZ 32
43static struct hlist_head hash_errmap[ERRHASHSZ];
44
45/* FixMe - reduce to a reasonable size */
46static struct errormap errmap[] = {
47 {"Operation not permitted", EPERM},
48 {"wstat prohibited", EPERM},
49 {"No such file or directory", ENOENT},
50 {"directory entry not found", ENOENT},
51 {"file not found", ENOENT},
52 {"Interrupted system call", EINTR},
53 {"Input/output error", EIO},
54 {"No such device or address", ENXIO},
55 {"Argument list too long", E2BIG},
56 {"Bad file descriptor", EBADF},
57 {"Resource temporarily unavailable", EAGAIN},
58 {"Cannot allocate memory", ENOMEM},
59 {"Permission denied", EACCES},
60 {"Bad address", EFAULT},
61 {"Block device required", ENOTBLK},
62 {"Device or resource busy", EBUSY},
63 {"File exists", EEXIST},
64 {"Invalid cross-device link", EXDEV},
65 {"No such device", ENODEV},
66 {"Not a directory", ENOTDIR},
67 {"Is a directory", EISDIR},
68 {"Invalid argument", EINVAL},
69 {"Too many open files in system", ENFILE},
70 {"Too many open files", EMFILE},
71 {"Text file busy", ETXTBSY},
72 {"File too large", EFBIG},
73 {"No space left on device", ENOSPC},
74 {"Illegal seek", ESPIPE},
75 {"Read-only file system", EROFS},
76 {"Too many links", EMLINK},
77 {"Broken pipe", EPIPE},
78 {"Numerical argument out of domain", EDOM},
79 {"Numerical result out of range", ERANGE},
80 {"Resource deadlock avoided", EDEADLK},
81 {"File name too long", ENAMETOOLONG},
82 {"No locks available", ENOLCK},
83 {"Function not implemented", ENOSYS},
84 {"Directory not empty", ENOTEMPTY},
85 {"Too many levels of symbolic links", ELOOP},
86 {"No message of desired type", ENOMSG},
87 {"Identifier removed", EIDRM},
88 {"No data available", ENODATA},
89 {"Machine is not on the network", ENONET},
90 {"Package not installed", ENOPKG},
91 {"Object is remote", EREMOTE},
92 {"Link has been severed", ENOLINK},
93 {"Communication error on send", ECOMM},
94 {"Protocol error", EPROTO},
95 {"Bad message", EBADMSG},
96 {"File descriptor in bad state", EBADFD},
97 {"Streams pipe error", ESTRPIPE},
98 {"Too many users", EUSERS},
99 {"Socket operation on non-socket", ENOTSOCK},
100 {"Message too long", EMSGSIZE},
101 {"Protocol not available", ENOPROTOOPT},
102 {"Protocol not supported", EPROTONOSUPPORT},
103 {"Socket type not supported", ESOCKTNOSUPPORT},
104 {"Operation not supported", EOPNOTSUPP},
105 {"Protocol family not supported", EPFNOSUPPORT},
106 {"Network is down", ENETDOWN},
107 {"Network is unreachable", ENETUNREACH},
108 {"Network dropped connection on reset", ENETRESET},
109 {"Software caused connection abort", ECONNABORTED},
110 {"Connection reset by peer", ECONNRESET},
111 {"No buffer space available", ENOBUFS},
112 {"Transport endpoint is already connected", EISCONN},
113 {"Transport endpoint is not connected", ENOTCONN},
114 {"Cannot send after transport endpoint shutdown", ESHUTDOWN},
115 {"Connection timed out", ETIMEDOUT},
116 {"Connection refused", ECONNREFUSED},
117 {"Host is down", EHOSTDOWN},
118 {"No route to host", EHOSTUNREACH},
119 {"Operation already in progress", EALREADY},
120 {"Operation now in progress", EINPROGRESS},
121 {"Is a named type file", EISNAM},
122 {"Remote I/O error", EREMOTEIO},
123 {"Disk quota exceeded", EDQUOT},
124/* errors from fossil, vacfs, and u9fs */
125 {"fid unknown or out of range", EBADF},
126 {"permission denied", EACCES},
127 {"file does not exist", ENOENT},
128 {"authentication failed", ECONNREFUSED},
129 {"bad offset in directory read", ESPIPE},
130 {"bad use of fid", EBADF},
131 {"wstat can't convert between files and directories", EPERM},
132 {"directory is not empty", ENOTEMPTY},
133 {"file exists", EEXIST},
134 {"file already exists", EEXIST},
135 {"file or directory already exists", EEXIST},
136 {"fid already in use", EBADF},
137 {"file in use", ETXTBSY},
138 {"i/o error", EIO},
139 {"file already open for I/O", ETXTBSY},
140 {"illegal mode", EINVAL},
141 {"illegal name", ENAMETOOLONG},
142 {"not a directory", ENOTDIR},
143 {"not a member of proposed group", EPERM},
144 {"not owner", EACCES},
145 {"only owner can change group in wstat", EACCES},
146 {"read only file system", EROFS},
147 {"no access to special file", EPERM},
148 {"i/o count too large", EIO},
149 {"unknown group", EINVAL},
150 {"unknown user", EINVAL},
151 {"bogus wstat buffer", EPROTO},
152 {"exclusive use file already open", EAGAIN},
153 {"corrupted directory entry", EIO},
154 {"corrupted file entry", EIO},
155 {"corrupted block label", EIO},
156 {"corrupted meta data", EIO},
157 {"illegal offset", EINVAL},
158 {"illegal path element", ENOENT},
159 {"root of file system is corrupted", EIO},
160 {"corrupted super block", EIO},
161 {"protocol botch", EPROTO},
162 {"file system is full", ENOSPC},
163 {"file is in use", EAGAIN},
164 {"directory entry is not allocated", ENOENT},
165 {"file is read only", EROFS},
166 {"file has been removed", EIDRM},
167 {"only support truncation to zero length", EPERM},
168 {"cannot remove root", EPERM},
169 {"file too big", EFBIG},
170 {"venti i/o error", EIO},
171 /* these are not errors */
172 {"u9fs rhostsauth: no authentication required", 0},
173 {"u9fs authnone: no authentication required", 0},
174 {NULL, -1}
175};
176
177extern int v9fs_error_init(void);
178extern int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
new file mode 100644
index 000000000000..821c9c4d76aa
--- /dev/null
+++ b/fs/9p/fid.c
@@ -0,0 +1,241 @@
1/*
2 * V9FS FID Management
3 *
4 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to:
18 * Free Software Foundation
19 * 51 Franklin Street, Fifth Floor
20 * Boston, MA 02111-1301 USA
21 *
22 */
23
24#include <linux/config.h>
25#include <linux/module.h>
26#include <linux/errno.h>
27#include <linux/fs.h>
28#include <linux/idr.h>
29
30#include "debug.h"
31#include "v9fs.h"
32#include "9p.h"
33#include "v9fs_vfs.h"
34#include "transport.h"
35#include "mux.h"
36#include "conv.h"
37#include "fid.h"
38
39/**
40 * v9fs_fid_insert - add a fid to a dentry
41 * @fid: fid to add
42 * @dentry: dentry that it is being added to
43 *
44 */
45
46static int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry)
47{
48 struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
49 dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid,
50 dentry->d_iname, dentry);
51 if (dentry->d_fsdata == NULL) {
52 dentry->d_fsdata =
53 kmalloc(sizeof(struct list_head), GFP_KERNEL);
54 if (dentry->d_fsdata == NULL) {
55 dprintk(DEBUG_ERROR, "Out of memory\n");
56 return -ENOMEM;
57 }
58 fid_list = (struct list_head *)dentry->d_fsdata;
59 INIT_LIST_HEAD(fid_list); /* Initialize list head */
60 }
61
62 fid->uid = current->uid;
63 fid->pid = current->pid;
64 list_add(&fid->list, fid_list);
65 return 0;
66}
67
68/**
69 * v9fs_fid_create - allocate a FID structure
70 * @dentry - dentry to link newly created fid to
71 *
72 */
73
74struct v9fs_fid *v9fs_fid_create(struct dentry *dentry)
75{
76 struct v9fs_fid *new;
77
78 new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
79 if (new == NULL) {
80 dprintk(DEBUG_ERROR, "Out of Memory\n");
81 return ERR_PTR(-ENOMEM);
82 }
83
84 new->fid = -1;
85 new->fidopen = 0;
86 new->fidcreate = 0;
87 new->fidclunked = 0;
88 new->iounit = 0;
89
90 if (v9fs_fid_insert(new, dentry) == 0)
91 return new;
92 else {
93 dprintk(DEBUG_ERROR, "Problems inserting to dentry\n");
94 kfree(new);
95 return NULL;
96 }
97}
98
99/**
100 * v9fs_fid_destroy - deallocate a FID structure
101 * @fid: fid to destroy
102 *
103 */
104
105void v9fs_fid_destroy(struct v9fs_fid *fid)
106{
107 list_del(&fid->list);
108 kfree(fid);
109}
110
111/**
112 * v9fs_fid_lookup - retrieve the right fid from a particular dentry
113 * @dentry: dentry to look for fid in
114 * @type: intent of lookup (operation or traversal)
115 *
116 * search list of fids associated with a dentry for a fid with a matching
117 * thread id or uid. If that fails, look up the dentry's parents to see if you
118 * can find a matching fid.
119 *
120 */
121
122struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type)
123{
124 struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
125 struct v9fs_fid *current_fid = NULL;
126 struct v9fs_fid *temp = NULL;
127 struct v9fs_fid *return_fid = NULL;
128 int found_parent = 0;
129 int found_user = 0;
130
131 dprintk(DEBUG_9P, " dentry: %s (%p) type %d\n", dentry->d_iname, dentry,
132 type);
133
134 if (fid_list && !list_empty(fid_list)) {
135 list_for_each_entry_safe(current_fid, temp, fid_list, list) {
136 if (current_fid->uid == current->uid) {
137 if (return_fid == NULL) {
138 if ((type == FID_OP)
139 || (!current_fid->fidopen)) {
140 return_fid = current_fid;
141 found_user = 1;
142 }
143 }
144 }
145 if (current_fid->pid == current->real_parent->pid) {
146 if ((return_fid == NULL) || (found_parent)
147 || (found_user)) {
148 if ((type == FID_OP)
149 || (!current_fid->fidopen)) {
150 return_fid = current_fid;
151 found_parent = 1;
152 found_user = 0;
153 }
154 }
155 }
156 if (current_fid->pid == current->pid) {
157 if ((type == FID_OP) ||
158 (!current_fid->fidopen)) {
159 return_fid = current_fid;
160 found_parent = 0;
161 found_user = 0;
162 }
163 }
164 }
165 }
166
167 /* we are at the root but didn't match */
168 if ((!return_fid) && (dentry->d_parent == dentry)) {
169 /* TODO: clone attach with new uid */
170 return_fid = current_fid;
171 }
172
173 if (!return_fid) {
174 struct dentry *par = current->fs->pwd->d_parent;
175 int count = 1;
176 while (par != NULL) {
177 if (par == dentry)
178 break;
179 count++;
180 if (par == par->d_parent) {
181 dprintk(DEBUG_ERROR,
182 "got to root without finding dentry\n");
183 break;
184 }
185 par = par->d_parent;
186 }
187
188/* XXX - there may be some duplication we can get rid of */
189 if (par == dentry) {
190 /* we need to fid_lookup the starting point */
191 int fidnum = -1;
192 int oldfid = -1;
193 int result = -1;
194 struct v9fs_session_info *v9ses =
195 v9fs_inode2v9ses(current->fs->pwd->d_inode);
196
197 current_fid =
198 v9fs_fid_lookup(current->fs->pwd, FID_WALK);
199 if (current_fid == NULL) {
200 dprintk(DEBUG_ERROR,
201 "process cwd doesn't have a fid\n");
202 return return_fid;
203 }
204 oldfid = current_fid->fid;
205 par = current->fs->pwd;
206 /* TODO: take advantage of multiwalk */
207
208 fidnum = v9fs_get_idpool(&v9ses->fidpool);
209 if (fidnum < 0) {
210 dprintk(DEBUG_ERROR,
211 "could not get a new fid num\n");
212 return return_fid;
213 }
214
215 while (par != dentry) {
216 result =
217 v9fs_t_walk(v9ses, oldfid, fidnum, "..",
218 NULL);
219 if (result < 0) {
220 dprintk(DEBUG_ERROR,
221 "problem walking to parent\n");
222
223 break;
224 }
225 oldfid = fidnum;
226 if (par == par->d_parent) {
227 dprintk(DEBUG_ERROR,
228 "can't find dentry\n");
229 break;
230 }
231 par = par->d_parent;
232 }
233 if (par == dentry) {
234 return_fid = v9fs_fid_create(dentry);
235 return_fid->fid = fidnum;
236 }
237 }
238 }
239
240 return return_fid;
241}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
new file mode 100644
index 000000000000..7db478ccca36
--- /dev/null
+++ b/fs/9p/fid.h
@@ -0,0 +1,57 @@
1/*
2 * V9FS FID Management
3 *
4 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to:
18 * Free Software Foundation
19 * 51 Franklin Street, Fifth Floor
20 * Boston, MA 02111-1301 USA
21 *
22 */
23
24#include <linux/list.h>
25
26#define FID_OP 0
27#define FID_WALK 1
28
29struct v9fs_fid {
30 struct list_head list; /* list of fids associated with a dentry */
31 struct list_head active; /* XXX - debug */
32
33 u32 fid;
34 unsigned char fidopen; /* set when fid is opened */
35 unsigned char fidcreate; /* set when fid was just created */
36 unsigned char fidclunked; /* set when fid has already been clunked */
37
38 struct v9fs_qid qid;
39 u32 iounit;
40
41 /* readdir stuff */
42 int rdir_fpos;
43 loff_t rdir_pos;
44 struct v9fs_fcall *rdir_fcall;
45
46 /* management stuff */
47 pid_t pid; /* thread associated with this fid */
48 uid_t uid; /* user associated with this fid */
49
50 /* private data */
51 struct file *filp; /* backpointer to File struct for open files */
52 struct v9fs_session_info *v9ses; /* session info for this FID */
53};
54
55struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type);
56void v9fs_fid_destroy(struct v9fs_fid *fid);
57struct v9fs_fid *v9fs_fid_create(struct dentry *);
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
new file mode 100644
index 000000000000..8835b576f744
--- /dev/null
+++ b/fs/9p/mux.c
@@ -0,0 +1,475 @@
1/*
2 * linux/fs/9p/mux.c
3 *
4 * Protocol Multiplexer
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/config.h>
28#include <linux/module.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/kthread.h>
32#include <linux/idr.h>
33
34#include "debug.h"
35#include "v9fs.h"
36#include "9p.h"
37#include "transport.h"
38#include "conv.h"
39#include "mux.h"
40
41/**
42 * dprintcond - print condition of session info
43 * @v9ses: session info structure
44 * @req: RPC request structure
45 *
46 */
47
48static inline int
49dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
50{
51 dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status,
52 req->rcall);
53 return 0;
54}
55
56/**
57 * xread - force read of a certain number of bytes
58 * @v9ses: session info structure
59 * @ptr: pointer to buffer
60 * @sz: number of bytes to read
61 *
62 * Chuck Cranor CS-533 project1
63 */
64
65static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz)
66{
67 int rd = 0;
68 int ret = 0;
69 while (rd < sz) {
70 ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd);
71 if (ret <= 0) {
72 dprintk(DEBUG_ERROR, "xread errno %d\n", ret);
73 return ret;
74 }
75 rd += ret;
76 ptr += ret;
77 }
78 return (rd);
79}
80
81/**
82 * read_message - read a full 9P2000 fcall packet
83 * @v9ses: session info structure
84 * @rcall: fcall structure to read into
85 * @rcalllen: size of fcall buffer
86 *
87 */
88
89static int
90read_message(struct v9fs_session_info *v9ses,
91 struct v9fs_fcall *rcall, int rcalllen)
92{
93 unsigned char buf[4];
94 void *data;
95 int size = 0;
96 int res = 0;
97
98 res = xread(v9ses, buf, sizeof(buf));
99 if (res < 0) {
100 dprintk(DEBUG_ERROR,
101 "Reading of count field failed returned: %d\n", res);
102 return res;
103 }
104
105 if (res < 4) {
106 dprintk(DEBUG_ERROR,
107 "Reading of count field failed returned: %d\n", res);
108 return -EIO;
109 }
110
111 size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
112 dprintk(DEBUG_MUX, "got a packet count: %d\n", size);
113
114 /* adjust for the four bytes of size */
115 size -= 4;
116
117 if (size > v9ses->maxdata) {
118 dprintk(DEBUG_ERROR, "packet too big: %d\n", size);
119 return -E2BIG;
120 }
121
122 data = kmalloc(size, GFP_KERNEL);
123 if (!data) {
124 eprintk(KERN_WARNING, "out of memory\n");
125 return -ENOMEM;
126 }
127
128 res = xread(v9ses, data, size);
129 if (res < size) {
130 dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n",
131 res);
132 kfree(data);
133 return res;
134 }
135
136 /* we now have an in-memory string that is the reply.
137 * deserialize it. There is very little to go wrong at this point
138 * save for v9fs_alloc errors.
139 */
140 res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata,
141 rcall, rcalllen);
142
143 kfree(data);
144
145 if (res < 0)
146 return res;
147
148 return 0;
149}
150
151/**
152 * v9fs_recv - receive an RPC response for a particular tag
153 * @v9ses: session info structure
154 * @req: RPC request structure
155 *
156 */
157
158static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
159{
160 int ret = 0;
161
162 dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag);
163 ret = wait_event_interruptible(v9ses->read_wait,
164 ((v9ses->transport->status != Connected) ||
165 (req->rcall != 0) || (req->err < 0) ||
166 dprintcond(v9ses, req)));
167
168 dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall);
169
170 spin_lock(&v9ses->muxlock);
171 list_del(&req->next);
172 spin_unlock(&v9ses->muxlock);
173
174 if (req->err < 0)
175 return req->err;
176
177 if (v9ses->transport->status == Disconnected)
178 return -ECONNRESET;
179
180 return ret;
181}
182
183/**
184 * v9fs_send - send a 9P request
185 * @v9ses: session info structure
186 * @req: RPC request to send
187 *
188 */
189
190static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
191{
192 int ret = -1;
193 void *data = NULL;
194 struct v9fs_fcall *tcall = req->tcall;
195
196 data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
197 if (!data)
198 return -ENOMEM;
199
200 tcall->size = 0; /* enforce size recalculation */
201 ret =
202 v9fs_serialize_fcall(v9ses, tcall, data,
203 v9ses->maxdata + V9FS_IOHDRSZ);
204 if (ret < 0)
205 goto free_data;
206
207 spin_lock(&v9ses->muxlock);
208 list_add(&req->next, &v9ses->mux_fcalls);
209 spin_unlock(&v9ses->muxlock);
210
211 dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag,
212 tcall->size);
213 ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
214
215 if (ret != tcall->size) {
216 spin_lock(&v9ses->muxlock);
217 list_del(&req->next);
218 kfree(req->rcall);
219
220 spin_unlock(&v9ses->muxlock);
221 if (ret >= 0)
222 ret = -EREMOTEIO;
223 } else
224 ret = 0;
225
226 free_data:
227 kfree(data);
228 return ret;
229}
230
231/**
232 * v9fs_mux_rpc - send a request, receive a response
233 * @v9ses: session info structure
234 * @tcall: fcall to send
235 * @rcall: buffer to place response into
236 *
237 */
238
239long
240v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
241 struct v9fs_fcall **rcall)
242{
243 int tid = -1;
244 struct v9fs_fcall *fcall = NULL;
245 struct v9fs_rpcreq req;
246 int ret = -1;
247
248 if (!v9ses)
249 return -EINVAL;
250
251 if (!v9ses->transport || v9ses->transport->status != Connected)
252 return -EIO;
253
254 if (rcall)
255 *rcall = NULL;
256
257 if (tcall->id != TVERSION) {
258 tid = v9fs_get_idpool(&v9ses->tidpool);
259 if (tid < 0)
260 return -ENOMEM;
261 }
262
263 tcall->tag = tid;
264
265 req.tcall = tcall;
266 req.err = 0;
267 req.rcall = NULL;
268
269 ret = v9fs_send(v9ses, &req);
270
271 if (ret < 0) {
272 if (tcall->id != TVERSION)
273 v9fs_put_idpool(tid, &v9ses->tidpool);
274 dprintk(DEBUG_MUX, "error %d\n", ret);
275 return ret;
276 }
277
278 ret = v9fs_recv(v9ses, &req);
279
280 fcall = req.rcall;
281
282 dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret);
283 if (ret == -ERESTARTSYS) {
284 if (v9ses->transport->status != Disconnected
285 && tcall->id != TFLUSH) {
286 unsigned long flags;
287
288 dprintk(DEBUG_MUX, "flushing the tag: %d\n",
289 tcall->tag);
290 clear_thread_flag(TIF_SIGPENDING);
291 v9fs_t_flush(v9ses, tcall->tag);
292 spin_lock_irqsave(&current->sighand->siglock, flags);
293 recalc_sigpending();
294 spin_unlock_irqrestore(&current->sighand->siglock,
295 flags);
296 dprintk(DEBUG_MUX, "flushing done\n");
297 }
298
299 goto release_req;
300 } else if (ret < 0)
301 goto release_req;
302
303 if (!fcall)
304 ret = -EIO;
305 else {
306 if (fcall->id == RERROR) {
307 ret = v9fs_errstr2errno(fcall->params.rerror.error);
308 if (ret == 0) { /* string match failed */
309 if (fcall->params.rerror.errno)
310 ret = -(fcall->params.rerror.errno);
311 else
312 ret = -ESERVERFAULT;
313 }
314 } else if (fcall->id != tcall->id + 1) {
315 dprintk(DEBUG_ERROR,
316 "fcall mismatch: expected %d, got %d\n",
317 tcall->id + 1, fcall->id);
318 ret = -EIO;
319 }
320 }
321
322 release_req:
323 if (tcall->id != TVERSION)
324 v9fs_put_idpool(tid, &v9ses->tidpool);
325 if (rcall)
326 *rcall = fcall;
327 else
328 kfree(fcall);
329
330 return ret;
331}
332
333/**
334 * v9fs_mux_cancel_requests - cancels all pending requests
335 *
336 * @v9ses: session info structure
337 * @err: error code to return to the requests
338 */
339void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err)
340{
341 struct v9fs_rpcreq *rptr;
342 struct v9fs_rpcreq *rreq;
343
344 dprintk(DEBUG_MUX, " %d\n", err);
345 spin_lock(&v9ses->muxlock);
346 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
347 rreq->err = err;
348 }
349 spin_unlock(&v9ses->muxlock);
350 wake_up_all(&v9ses->read_wait);
351}
352
353/**
354 * v9fs_recvproc - kproc to handle demultiplexing responses
355 * @data: session info structure
356 *
357 */
358
359static int v9fs_recvproc(void *data)
360{
361 struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data;
362 struct v9fs_fcall *rcall = NULL;
363 struct v9fs_rpcreq *rptr;
364 struct v9fs_rpcreq *req;
365 struct v9fs_rpcreq *rreq;
366 int err = 0;
367
368 allow_signal(SIGKILL);
369 set_current_state(TASK_INTERRUPTIBLE);
370 complete(&v9ses->proccmpl);
371 while (!kthread_should_stop() && err >= 0) {
372 req = rptr = rreq = NULL;
373
374 rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
375 if (!rcall) {
376 eprintk(KERN_ERR, "no memory for buffers\n");
377 break;
378 }
379
380 err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ);
381 spin_lock(&v9ses->muxlock);
382 if (err < 0) {
383 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
384 rreq->err = err;
385 }
386 if(err != -ERESTARTSYS)
387 eprintk(KERN_ERR,
388 "Transport error while reading message %d\n", err);
389 } else {
390 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
391 if (rreq->tcall->tag == rcall->tag) {
392 req = rreq;
393 req->rcall = rcall;
394 break;
395 }
396 }
397 }
398
399 if (req && (req->tcall->id == TFLUSH)) {
400 struct v9fs_rpcreq *treq = NULL;
401 list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) {
402 if (treq->tcall->tag ==
403 req->tcall->params.tflush.oldtag) {
404 list_del(&rptr->next);
405 kfree(treq->rcall);
406 break;
407 }
408 }
409 }
410
411 spin_unlock(&v9ses->muxlock);
412
413 if (!req) {
414 if (err >= 0)
415 dprintk(DEBUG_ERROR,
416 "unexpected response: id %d tag %d\n",
417 rcall->id, rcall->tag);
418
419 kfree(rcall);
420 }
421
422 wake_up_all(&v9ses->read_wait);
423 set_current_state(TASK_INTERRUPTIBLE);
424 }
425
426 v9ses->transport->close(v9ses->transport);
427
428 /* Inform all pending processes about the failure */
429 wake_up_all(&v9ses->read_wait);
430
431 if (signal_pending(current))
432 complete(&v9ses->proccmpl);
433
434 dprintk(DEBUG_MUX, "recvproc: end\n");
435 v9ses->recvproc = NULL;
436
437 return err >= 0;
438}
439
440/**
441 * v9fs_mux_init - initialize multiplexer (spawn kproc)
442 * @v9ses: session info structure
443 * @dev_name: mount device information (to create unique kproc)
444 *
445 */
446
447int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name)
448{
449 char procname[60];
450
451 strncpy(procname, dev_name, sizeof(procname));
452 procname[sizeof(procname) - 1] = 0;
453
454 init_waitqueue_head(&v9ses->read_wait);
455 init_completion(&v9ses->fcread);
456 init_completion(&v9ses->proccmpl);
457 spin_lock_init(&v9ses->muxlock);
458 INIT_LIST_HEAD(&v9ses->mux_fcalls);
459 v9ses->recvproc = NULL;
460 v9ses->curfcall = NULL;
461
462 v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
463 "v9fs_recvproc %s", procname);
464
465 if (IS_ERR(v9ses->recvproc)) {
466 eprintk(KERN_ERR, "cannot create receiving thread\n");
467 v9fs_session_close(v9ses);
468 return -ECONNABORTED;
469 }
470
471 wake_up_process(v9ses->recvproc);
472 wait_for_completion(&v9ses->proccmpl);
473
474 return 0;
475}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
new file mode 100644
index 000000000000..4994cb10badf
--- /dev/null
+++ b/fs/9p/mux.h
@@ -0,0 +1,41 @@
1/*
2 * linux/fs/9p/mux.h
3 *
4 * Multiplexer Definitions
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26/* structure to manage each RPC transaction */
27
28struct v9fs_rpcreq {
29 struct v9fs_fcall *tcall;
30 struct v9fs_fcall *rcall;
31 int err; /* error code if response failed */
32
33 /* XXX - could we put scatter/gather buffers here? */
34
35 struct list_head next;
36};
37
38int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name);
39long v9fs_mux_rpc(struct v9fs_session_info *v9ses,
40 struct v9fs_fcall *tcall, struct v9fs_fcall **rcall);
41void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
new file mode 100644
index 000000000000..63b58ce98ff4
--- /dev/null
+++ b/fs/9p/trans_fd.c
@@ -0,0 +1,172 @@
1/*
2 * linux/fs/9p/trans_fd.c
3 *
4 * File Descriptor Transport Layer
5 *
6 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/module.h>
28#include <linux/net.h>
29#include <linux/ipv6.h>
30#include <linux/errno.h>
31#include <linux/kernel.h>
32#include <linux/un.h>
33#include <asm/uaccess.h>
34#include <linux/inet.h>
35#include <linux/idr.h>
36#include <linux/file.h>
37
38#include "debug.h"
39#include "v9fs.h"
40#include "transport.h"
41
42struct v9fs_trans_fd {
43 struct file *in_file;
44 struct file *out_file;
45};
46
47/**
48 * v9fs_fd_recv - receive from a socket
49 * @v9ses: session information
50 * @v: buffer to receive data into
51 * @len: size of receive buffer
52 *
53 */
54
55static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len)
56{
57 struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
58
59 if (!trans || trans->status != Connected || !ts)
60 return -EIO;
61
62 return kernel_read(ts->in_file, ts->in_file->f_pos, v, len);
63}
64
65/**
66 * v9fs_fd_send - send to a socket
67 * @v9ses: session information
68 * @v: buffer to send data from
69 * @len: size of send buffer
70 *
71 */
72
73static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len)
74{
75 struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
76 mm_segment_t oldfs = get_fs();
77 int ret = 0;
78
79 if (!trans || trans->status != Connected || !ts)
80 return -EIO;
81
82 set_fs(get_ds());
83 /* The cast to a user pointer is valid due to the set_fs() */
84 ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos);
85 set_fs(oldfs);
86
87 return ret;
88}
89
90/**
91 * v9fs_fd_init - initialize file descriptor transport
92 * @v9ses: session information
93 * @addr: address of server to mount
94 * @data: mount options
95 *
96 */
97
98static int
99v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
100{
101 struct v9fs_trans_fd *ts = NULL;
102 struct v9fs_transport *trans = v9ses->transport;
103
104 if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
105 printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
106 return -ENOPROTOOPT;
107 }
108
109 sema_init(&trans->writelock, 1);
110 sema_init(&trans->readlock, 1);
111
112 ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
113
114 if (!ts)
115 return -ENOMEM;
116
117 ts->in_file = fget( v9ses->rfdno );
118 ts->out_file = fget( v9ses->wfdno );
119
120 if (!ts->in_file || !ts->out_file) {
121 if (ts->in_file)
122 fput(ts->in_file);
123
124 if (ts->out_file)
125 fput(ts->out_file);
126
127 kfree(ts);
128 return -EIO;
129 }
130
131 trans->priv = ts;
132 trans->status = Connected;
133
134 return 0;
135}
136
137
138/**
139 * v9fs_fd_close - shutdown file descriptor
140 * @trans: private socket structure
141 *
142 */
143
144static void v9fs_fd_close(struct v9fs_transport *trans)
145{
146 struct v9fs_trans_fd *ts;
147
148 if (!trans)
149 return;
150
151 trans->status = Disconnected;
152 ts = trans->priv;
153
154 if (!ts)
155 return;
156
157 if (ts->in_file)
158 fput(ts->in_file);
159
160 if (ts->out_file)
161 fput(ts->out_file);
162
163 kfree(ts);
164}
165
166struct v9fs_transport v9fs_trans_fd = {
167 .init = v9fs_fd_init,
168 .write = v9fs_fd_send,
169 .read = v9fs_fd_recv,
170 .close = v9fs_fd_close,
171};
172
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
new file mode 100644
index 000000000000..01e26f0013ac
--- /dev/null
+++ b/fs/9p/trans_sock.c
@@ -0,0 +1,290 @@
1/*
2 * linux/fs/9p/trans_socket.c
3 *
4 * Socket Transport Layer
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
8 * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to:
22 * Free Software Foundation
23 * 51 Franklin Street, Fifth Floor
24 * Boston, MA 02111-1301 USA
25 *
26 */
27
28#include <linux/config.h>
29#include <linux/module.h>
30#include <linux/net.h>
31#include <linux/ipv6.h>
32#include <linux/errno.h>
33#include <linux/kernel.h>
34#include <linux/un.h>
35#include <asm/uaccess.h>
36#include <linux/inet.h>
37#include <linux/idr.h>
38
39#include "debug.h"
40#include "v9fs.h"
41#include "transport.h"
42
43#define V9FS_PORT 564
44
45struct v9fs_trans_sock {
46 struct socket *s;
47};
48
49/**
50 * v9fs_sock_recv - receive from a socket
51 * @v9ses: session information
52 * @v: buffer to receive data into
53 * @len: size of receive buffer
54 *
55 */
56
57static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
58{
59 struct msghdr msg;
60 struct kvec iov;
61 int result;
62 mm_segment_t oldfs;
63 struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
64
65 if (trans->status == Disconnected)
66 return -EREMOTEIO;
67
68 result = -EINVAL;
69
70 oldfs = get_fs();
71 set_fs(get_ds());
72
73 iov.iov_base = v;
74 iov.iov_len = len;
75 msg.msg_name = NULL;
76 msg.msg_namelen = 0;
77 msg.msg_iovlen = 1;
78 msg.msg_control = NULL;
79 msg.msg_controllen = 0;
80 msg.msg_namelen = 0;
81 msg.msg_flags = MSG_NOSIGNAL;
82
83 result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0);
84
85 dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state);
86 set_fs(oldfs);
87
88 if (result <= 0) {
89 if (result != -ERESTARTSYS)
90 trans->status = Disconnected;
91 }
92
93 return result;
94}
95
96/**
97 * v9fs_sock_send - send to a socket
98 * @v9ses: session information
99 * @v: buffer to send data from
100 * @len: size of send buffer
101 *
102 */
103
104static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
105{
106 struct kvec iov;
107 struct msghdr msg;
108 int result = -1;
109 mm_segment_t oldfs;
110 struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
111
112 dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len);
113 dump_data(v, len);
114
115 down(&trans->writelock);
116
117 oldfs = get_fs();
118 set_fs(get_ds());
119 iov.iov_base = v;
120 iov.iov_len = len;
121 msg.msg_name = NULL;
122 msg.msg_namelen = 0;
123 msg.msg_iovlen = 1;
124 msg.msg_control = NULL;
125 msg.msg_controllen = 0;
126 msg.msg_namelen = 0;
127 msg.msg_flags = MSG_NOSIGNAL;
128 result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
129 set_fs(oldfs);
130
131 if (result < 0) {
132 if (result != -ERESTARTSYS)
133 trans->status = Disconnected;
134 }
135
136 up(&trans->writelock);
137 return result;
138}
139
140/**
141 * v9fs_tcp_init - initialize TCP socket
142 * @v9ses: session information
143 * @addr: address of server to mount
144 * @data: mount options
145 *
146 */
147
148static int
149v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
150{
151 struct socket *csocket = NULL;
152 struct sockaddr_in sin_server;
153 int rc = 0;
154 struct v9fs_trans_sock *ts = NULL;
155 struct v9fs_transport *trans = v9ses->transport;
156
157 sema_init(&trans->writelock, 1);
158 sema_init(&trans->readlock, 1);
159
160 ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
161
162 if (!ts)
163 return -ENOMEM;
164
165 trans->priv = ts;
166 ts->s = NULL;
167
168 if (!addr)
169 return -EINVAL;
170
171 dprintk(DEBUG_TRANS, "Connecting to %s\n", addr);
172
173 sin_server.sin_family = AF_INET;
174 sin_server.sin_addr.s_addr = in_aton(addr);
175 sin_server.sin_port = htons(v9ses->port);
176 sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
177 rc = csocket->ops->connect(csocket,
178 (struct sockaddr *)&sin_server,
179 sizeof(struct sockaddr_in), 0);
180 if (rc < 0) {
181 eprintk(KERN_ERR,
182 "v9fs_trans_tcp: problem connecting socket to %s\n",
183 addr);
184 return rc;
185 }
186 csocket->sk->sk_allocation = GFP_NOIO;
187 ts->s = csocket;
188 trans->status = Connected;
189
190 return 0;
191}
192
193/**
194 * v9fs_unix_init - initialize UNIX domain socket
195 * @v9ses: session information
196 * @dev_name: path to named pipe
197 * @data: mount options
198 *
199 */
200
201static int
202v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
203 char *data)
204{
205 int rc;
206 struct socket *csocket;
207 struct sockaddr_un sun_server;
208 struct v9fs_transport *trans;
209 struct v9fs_trans_sock *ts;
210
211 rc = 0;
212 csocket = NULL;
213 trans = v9ses->transport;
214
215 if (strlen(dev_name) > UNIX_PATH_MAX) {
216 eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
217 dev_name);
218 return -ENOMEM;
219 }
220
221 ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
222 if (!ts)
223 return -ENOMEM;
224
225 trans->priv = ts;
226 ts->s = NULL;
227
228 sema_init(&trans->writelock, 1);
229 sema_init(&trans->readlock, 1);
230
231 sun_server.sun_family = PF_UNIX;
232 strcpy(sun_server.sun_path, dev_name);
233 sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
234 rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
235 sizeof(struct sockaddr_un) - 1, 0); /* -1 *is* important */
236 if (rc < 0) {
237 eprintk(KERN_ERR,
238 "v9fs_trans_unix: problem connecting socket: %s: %d\n",
239 dev_name, rc);
240 return rc;
241 }
242 csocket->sk->sk_allocation = GFP_NOIO;
243 ts->s = csocket;
244 trans->status = Connected;
245
246 return 0;
247}
248
249/**
250 * v9fs_sock_close - shutdown socket
251 * @trans: private socket structure
252 *
253 */
254
255static void v9fs_sock_close(struct v9fs_transport *trans)
256{
257 struct v9fs_trans_sock *ts;
258
259 if (!trans)
260 return;
261
262 ts = trans->priv;
263
264 if ((ts) && (ts->s)) {
265 dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s);
266 sock_release(ts->s);
267 ts->s = NULL;
268 trans->status = Disconnected;
269 dprintk(DEBUG_TRANS, "socket closed\n");
270 }
271
272 if (ts)
273 kfree(ts);
274
275 trans->priv = NULL;
276}
277
278struct v9fs_transport v9fs_trans_tcp = {
279 .init = v9fs_tcp_init,
280 .write = v9fs_sock_send,
281 .read = v9fs_sock_recv,
282 .close = v9fs_sock_close,
283};
284
285struct v9fs_transport v9fs_trans_unix = {
286 .init = v9fs_unix_init,
287 .write = v9fs_sock_send,
288 .read = v9fs_sock_recv,
289 .close = v9fs_sock_close,
290};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
new file mode 100644
index 000000000000..9e9cd418efd5
--- /dev/null
+++ b/fs/9p/transport.h
@@ -0,0 +1,46 @@
1/*
2 * linux/fs/9p/transport.h
3 *
4 * Transport Definition
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26enum v9fs_transport_status {
27 Connected,
28 Disconnected,
29 Hung,
30};
31
32struct v9fs_transport {
33 enum v9fs_transport_status status;
34 struct semaphore writelock;
35 struct semaphore readlock;
36 void *priv;
37
38 int (*init) (struct v9fs_session_info *, const char *, char *);
39 int (*write) (struct v9fs_transport *, void *, int);
40 int (*read) (struct v9fs_transport *, void *, int);
41 void (*close) (struct v9fs_transport *);
42};
43
44extern struct v9fs_transport v9fs_trans_tcp;
45extern struct v9fs_transport v9fs_trans_unix;
46extern struct v9fs_transport v9fs_trans_fd;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
new file mode 100644
index 000000000000..13bdbbab4387
--- /dev/null
+++ b/fs/9p/v9fs.c
@@ -0,0 +1,452 @@
1/*
2 * linux/fs/9p/v9fs.c
3 *
4 * This file contains functions assisting in mapping VFS to 9P2000
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/config.h>
28#include <linux/module.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/parser.h>
32#include <linux/idr.h>
33
34#include "debug.h"
35#include "v9fs.h"
36#include "9p.h"
37#include "v9fs_vfs.h"
38#include "transport.h"
39#include "mux.h"
40#include "conv.h"
41
42/* TODO: sysfs or debugfs interface */
43int v9fs_debug_level = 0; /* feature-rific global debug level */
44
45/*
46 * Option Parsing (code inspired by NFS code)
47 *
48 */
49
50enum {
51 /* Options that take integer arguments */
52 Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
53 Opt_rfdno, Opt_wfdno,
54 /* String options */
55 Opt_name, Opt_remotename,
56 /* Options that take no arguments */
57 Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
58 /* Error token */
59 Opt_err
60};
61
62static match_table_t tokens = {
63 {Opt_port, "port=%u"},
64 {Opt_msize, "msize=%u"},
65 {Opt_uid, "uid=%u"},
66 {Opt_gid, "gid=%u"},
67 {Opt_afid, "afid=%u"},
68 {Opt_rfdno, "rfdno=%u"},
69 {Opt_wfdno, "wfdno=%u"},
70 {Opt_debug, "debug=%u"},
71 {Opt_name, "name=%s"},
72 {Opt_remotename, "aname=%s"},
73 {Opt_unix, "proto=unix"},
74 {Opt_tcp, "proto=tcp"},
75 {Opt_fd, "proto=fd"},
76 {Opt_tcp, "tcp"},
77 {Opt_unix, "unix"},
78 {Opt_fd, "fd"},
79 {Opt_legacy, "noextend"},
80 {Opt_nodevmap, "nodevmap"},
81 {Opt_err, NULL}
82};
83
84/*
85 * Parse option string.
86 */
87
88/**
89 * v9fs_parse_options - parse mount options into session structure
90 * @options: options string passed from mount
91 * @v9ses: existing v9fs session information
92 *
93 */
94
95static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
96{
97 char *p;
98 substring_t args[MAX_OPT_ARGS];
99 int option;
100 int ret;
101
102 /* setup defaults */
103 v9ses->port = V9FS_PORT;
104 v9ses->maxdata = 9000;
105 v9ses->proto = PROTO_TCP;
106 v9ses->extended = 1;
107 v9ses->afid = ~0;
108 v9ses->debug = 0;
109 v9ses->rfdno = ~0;
110 v9ses->wfdno = ~0;
111
112 if (!options)
113 return;
114
115 while ((p = strsep(&options, ",")) != NULL) {
116 int token;
117 if (!*p)
118 continue;
119 token = match_token(p, tokens, args);
120 if (token < Opt_name) {
121 if ((ret = match_int(&args[0], &option)) < 0) {
122 dprintk(DEBUG_ERROR,
123 "integer field, but no integer?\n");
124 continue;
125 }
126
127 }
128 switch (token) {
129 case Opt_port:
130 v9ses->port = option;
131 break;
132 case Opt_msize:
133 v9ses->maxdata = option;
134 break;
135 case Opt_uid:
136 v9ses->uid = option;
137 break;
138 case Opt_gid:
139 v9ses->gid = option;
140 break;
141 case Opt_afid:
142 v9ses->afid = option;
143 break;
144 case Opt_rfdno:
145 v9ses->rfdno = option;
146 break;
147 case Opt_wfdno:
148 v9ses->wfdno = option;
149 break;
150 case Opt_debug:
151 v9ses->debug = option;
152 break;
153 case Opt_tcp:
154 v9ses->proto = PROTO_TCP;
155 break;
156 case Opt_unix:
157 v9ses->proto = PROTO_UNIX;
158 break;
159 case Opt_fd:
160 v9ses->proto = PROTO_FD;
161 break;
162 case Opt_name:
163 match_strcpy(v9ses->name, &args[0]);
164 break;
165 case Opt_remotename:
166 match_strcpy(v9ses->remotename, &args[0]);
167 break;
168 case Opt_legacy:
169 v9ses->extended = 0;
170 break;
171 case Opt_nodevmap:
172 v9ses->nodev = 1;
173 break;
174 default:
175 continue;
176 }
177 }
178}
179
180/**
181 * v9fs_inode2v9ses - safely extract v9fs session info from super block
182 * @inode: inode to extract information from
183 *
184 * Paranoid function to extract v9ses information from superblock,
185 * if anything is missing it will report an error.
186 *
187 */
188
189struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
190{
191 return (inode->i_sb->s_fs_info);
192}
193
194/**
195 * v9fs_get_idpool - allocate numeric id from pool
196 * @p - pool to allocate from
197 *
198 * XXX - This seems to be an awful generic function, should it be in idr.c with
199 * the lock included in struct idr?
200 */
201
202int v9fs_get_idpool(struct v9fs_idpool *p)
203{
204 int i = 0;
205 int error;
206
207retry:
208 if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
209 return 0;
210
211 if (down_interruptible(&p->lock) == -EINTR) {
212 eprintk(KERN_WARNING, "Interrupted while locking\n");
213 return -1;
214 }
215
216 error = idr_get_new(&p->pool, NULL, &i);
217 up(&p->lock);
218
219 if (error == -EAGAIN)
220 goto retry;
221 else if (error)
222 return -1;
223
224 return i;
225}
226
227/**
228 * v9fs_put_idpool - release numeric id from pool
229 * @p - pool to allocate from
230 *
231 * XXX - This seems to be an awful generic function, should it be in idr.c with
232 * the lock included in struct idr?
233 */
234
235void v9fs_put_idpool(int id, struct v9fs_idpool *p)
236{
237 if (down_interruptible(&p->lock) == -EINTR) {
238 eprintk(KERN_WARNING, "Interrupted while locking\n");
239 return;
240 }
241 idr_remove(&p->pool, id);
242 up(&p->lock);
243}
244
245/**
246 * v9fs_session_init - initialize session
247 * @v9ses: session information structure
248 * @dev_name: device being mounted
249 * @data: options
250 *
251 */
252
253int
254v9fs_session_init(struct v9fs_session_info *v9ses,
255 const char *dev_name, char *data)
256{
257 struct v9fs_fcall *fcall = NULL;
258 struct v9fs_transport *trans_proto;
259 int n = 0;
260 int newfid = -1;
261 int retval = -EINVAL;
262
263 v9ses->name = __getname();
264 if (!v9ses->name)
265 return -ENOMEM;
266
267 v9ses->remotename = __getname();
268 if (!v9ses->remotename) {
269 putname(v9ses->name);
270 return -ENOMEM;
271 }
272
273 strcpy(v9ses->name, V9FS_DEFUSER);
274 strcpy(v9ses->remotename, V9FS_DEFANAME);
275
276 v9fs_parse_options(data, v9ses);
277
278 /* set global debug level */
279 v9fs_debug_level = v9ses->debug;
280
281 /* id pools that are session-dependent: FIDs and TIDs */
282 idr_init(&v9ses->fidpool.pool);
283 init_MUTEX(&v9ses->fidpool.lock);
284 idr_init(&v9ses->tidpool.pool);
285 init_MUTEX(&v9ses->tidpool.lock);
286
287
288 switch (v9ses->proto) {
289 case PROTO_TCP:
290 trans_proto = &v9fs_trans_tcp;
291 break;
292 case PROTO_UNIX:
293 trans_proto = &v9fs_trans_unix;
294 *v9ses->remotename = 0;
295 break;
296 case PROTO_FD:
297 trans_proto = &v9fs_trans_fd;
298 *v9ses->remotename = 0;
299 break;
300 default:
301 printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
302 retval = -ENOPROTOOPT;
303 goto SessCleanUp;
304 };
305
306 v9ses->transport = trans_proto;
307
308 if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) {
309 eprintk(KERN_ERR, "problem initializing transport\n");
310 goto SessCleanUp;
311 }
312
313 v9ses->inprogress = 0;
314 v9ses->shutdown = 0;
315 v9ses->session_hung = 0;
316
317 if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) {
318 dprintk(DEBUG_ERROR, "problem initializing mux\n");
319 goto SessCleanUp;
320 }
321
322 if (v9ses->afid == ~0) {
323 if (v9ses->extended)
324 retval =
325 v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u",
326 &fcall);
327 else
328 retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000",
329 &fcall);
330
331 if (retval < 0) {
332 dprintk(DEBUG_ERROR, "v9fs_t_version failed\n");
333 goto FreeFcall;
334 }
335
336 /* Really should check for 9P1 and report error */
337 if (!strcmp(fcall->params.rversion.version, "9P2000.u")) {
338 dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
339 v9ses->extended = 1;
340 } else {
341 dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
342 v9ses->extended = 0;
343 }
344
345 n = fcall->params.rversion.msize;
346 kfree(fcall);
347
348 if (n < v9ses->maxdata)
349 v9ses->maxdata = n;
350 }
351
352 newfid = v9fs_get_idpool(&v9ses->fidpool);
353 if (newfid < 0) {
354 eprintk(KERN_WARNING, "couldn't allocate FID\n");
355 retval = -ENOMEM;
356 goto SessCleanUp;
357 }
358 /* it is a little bit ugly, but we have to prevent newfid */
359 /* being the same as afid, so if it is, get a new fid */
360 if (v9ses->afid != ~0 && newfid == v9ses->afid) {
361 newfid = v9fs_get_idpool(&v9ses->fidpool);
362 if (newfid < 0) {
363 eprintk(KERN_WARNING, "couldn't allocate FID\n");
364 retval = -ENOMEM;
365 goto SessCleanUp;
366 }
367 }
368
369 if ((retval =
370 v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid,
371 v9ses->afid, NULL))
372 < 0) {
373 dprintk(DEBUG_ERROR, "cannot attach\n");
374 goto SessCleanUp;
375 }
376
377 if (v9ses->afid != ~0) {
378 if (v9fs_t_clunk(v9ses, v9ses->afid, NULL))
379 dprintk(DEBUG_ERROR, "clunk failed\n");
380 }
381
382 return newfid;
383
384 FreeFcall:
385 kfree(fcall);
386
387 SessCleanUp:
388 v9fs_session_close(v9ses);
389 return retval;
390}
391
392/**
393 * v9fs_session_close - shutdown a session
394 * @v9ses: session information structure
395 *
396 */
397
398void v9fs_session_close(struct v9fs_session_info *v9ses)
399{
400 if (v9ses->recvproc) {
401 send_sig(SIGKILL, v9ses->recvproc, 1);
402 wait_for_completion(&v9ses->proccmpl);
403 }
404
405 if (v9ses->transport)
406 v9ses->transport->close(v9ses->transport);
407
408 putname(v9ses->name);
409 putname(v9ses->remotename);
410}
411
412/**
413 * v9fs_session_cancel - mark transport as disconnected
414 * and cancel all pending requests.
415 */
416void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
417 v9ses->transport->status = Disconnected;
418 v9fs_mux_cancel_requests(v9ses, -EIO);
419}
420
421extern int v9fs_error_init(void);
422
423/**
424 * v9fs_init - Initialize module
425 *
426 */
427
428static int __init init_v9fs(void)
429{
430 v9fs_error_init();
431
432 printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
433
434 return register_filesystem(&v9fs_fs_type);
435}
436
437/**
438 * v9fs_init - shutdown module
439 *
440 */
441
442static void __exit exit_v9fs(void)
443{
444 unregister_filesystem(&v9fs_fs_type);
445}
446
447module_init(init_v9fs)
448module_exit(exit_v9fs)
449
450MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
451MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
452MODULE_LICENSE("GPL");
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
new file mode 100644
index 000000000000..45dcef42bdd6
--- /dev/null
+++ b/fs/9p/v9fs.h
@@ -0,0 +1,103 @@
1/*
2 * V9FS definitions.
3 *
4 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
5 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to:
19 * Free Software Foundation
20 * 51 Franklin Street, Fifth Floor
21 * Boston, MA 02111-1301 USA
22 *
23 */
24
25/*
26 * Idpool structure provides lock and id management
27 *
28 */
29
30struct v9fs_idpool {
31 struct semaphore lock;
32 struct idr pool;
33};
34
35/*
36 * Session structure provides information for an opened session
37 *
38 */
39
40struct v9fs_session_info {
41 /* options */
42 unsigned int maxdata;
43 unsigned char extended; /* set to 1 if we are using UNIX extensions */
44 unsigned char nodev; /* set to 1 if no disable device mapping */
45 unsigned short port; /* port to connect to */
46 unsigned short debug; /* debug level */
47 unsigned short proto; /* protocol to use */
48 unsigned int afid; /* authentication fid */
49 unsigned int rfdno; /* read file descriptor number */
50 unsigned int wfdno; /* write file descriptor number */
51
52
53 char *name; /* user name to mount as */
54 char *remotename; /* name of remote hierarchy being mounted */
55 unsigned int uid; /* default uid/muid for legacy support */
56 unsigned int gid; /* default gid for legacy support */
57
58 /* book keeping */
59 struct v9fs_idpool fidpool; /* The FID pool for file descriptors */
60 struct v9fs_idpool tidpool; /* The TID pool for transactions ids */
61
62 /* transport information */
63 struct v9fs_transport *transport;
64
65 int inprogress; /* session in progress => true */
66 int shutdown; /* session shutting down. no more attaches. */
67 unsigned char session_hung;
68
69 /* mux private data */
70 struct v9fs_fcall *curfcall;
71 wait_queue_head_t read_wait;
72 struct completion fcread;
73 struct completion proccmpl;
74 struct task_struct *recvproc;
75
76 spinlock_t muxlock;
77 struct list_head mux_fcalls;
78};
79
80/* possible values of ->proto */
81enum {
82 PROTO_TCP,
83 PROTO_UNIX,
84 PROTO_FD,
85};
86
87int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
88struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
89void v9fs_session_close(struct v9fs_session_info *v9ses);
90int v9fs_get_idpool(struct v9fs_idpool *p);
91void v9fs_put_idpool(int id, struct v9fs_idpool *p);
92void v9fs_session_cancel(struct v9fs_session_info *v9ses);
93
94#define V9FS_MAGIC 0x01021997
95
96/* other default globals */
97#define V9FS_PORT 564
98#define V9FS_DEFUSER "nobody"
99#define V9FS_DEFANAME ""
100
101/* inital pool sizes for fids and tags */
102#define V9FS_START_FIDS 8192
103#define V9FS_START_TIDS 256
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
new file mode 100644
index 000000000000..2f2cea7ee3e7
--- /dev/null
+++ b/fs/9p/v9fs_vfs.h
@@ -0,0 +1,53 @@
1/*
2 * V9FS VFS extensions.
3 *
4 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
5 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to:
19 * Free Software Foundation
20 * 51 Franklin Street, Fifth Floor
21 * Boston, MA 02111-1301 USA
22 *
23 */
24
25/* plan9 semantics are that created files are implicitly opened.
26 * But linux semantics are that you call create, then open.
27 * the plan9 approach is superior as it provides an atomic
28 * open.
29 * we track the create fid here. When the file is opened, if fidopen is
30 * non-zero, we use the fid and can skip some steps.
31 * there may be a better way to do this, but I don't know it.
32 * one BAD way is to clunk the fid on create, then open it again:
33 * you lose the atomicity of file open
34 */
35
36/* special case:
37 * unlink calls remove, which is an implicit clunk. So we have to track
38 * that kind of thing so that we don't try to clunk a dead fid.
39 */
40
41extern struct file_system_type v9fs_fs_type;
42extern struct file_operations v9fs_file_operations;
43extern struct file_operations v9fs_dir_operations;
44extern struct dentry_operations v9fs_dentry_operations;
45
46struct inode *v9fs_get_inode(struct super_block *sb, int mode);
47ino_t v9fs_qid2ino(struct v9fs_qid *qid);
48void v9fs_mistat2inode(struct v9fs_stat *, struct inode *,
49 struct super_block *);
50int v9fs_dir_release(struct inode *inode, struct file *filp);
51int v9fs_file_open(struct inode *inode, struct file *file);
52void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat);
53void v9fs_dentry_release(struct dentry *);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
new file mode 100644
index 000000000000..306c96741f81
--- /dev/null
+++ b/fs/9p/vfs_dentry.c
@@ -0,0 +1,126 @@
1/*
2 * linux/fs/9p/vfs_dentry.c
3 *
4 * This file contians vfs dentry ops for the 9P2000 protocol.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/pagemap.h>
32#include <linux/stat.h>
33#include <linux/string.h>
34#include <linux/smp_lock.h>
35#include <linux/inet.h>
36#include <linux/namei.h>
37#include <linux/idr.h>
38
39#include "debug.h"
40#include "v9fs.h"
41#include "9p.h"
42#include "v9fs_vfs.h"
43#include "conv.h"
44#include "fid.h"
45
46/**
47 * v9fs_dentry_validate - VFS dcache hook to validate cache
48 * @dentry: dentry that is being validated
49 * @nd: path data
50 *
51 * dcache really shouldn't be used for 9P2000 as at all due to
52 * potential attached semantics to directory traversal (walk).
53 *
54 * FUTURE: look into how to use dcache to allow multi-stage
55 * walks in Plan 9 & potential for better dcache operation which
56 * would remain valid for Plan 9 semantics. Older versions
57 * had validation via stat for those interested. However, since
58 * stat has the same approximate overhead as walk there really
59 * is no difference. The only improvement would be from a
60 * time-decay cache like NFS has and that undermines the
61 * synchronous nature of 9P2000.
62 *
63 */
64
65static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
66{
67 struct dentry *dc = current->fs->pwd;
68
69 dprintk(DEBUG_VFS, "dentry: %s (%p)\n", dentry->d_iname, dentry);
70 if (v9fs_fid_lookup(dentry, FID_OP)) {
71 dprintk(DEBUG_VFS, "VALID\n");
72 return 1;
73 }
74
75 while (dc != NULL) {
76 if (dc == dentry) {
77 dprintk(DEBUG_VFS, "VALID\n");
78 return 1;
79 }
80 if (dc == dc->d_parent)
81 break;
82
83 dc = dc->d_parent;
84 }
85
86 dprintk(DEBUG_VFS, "INVALID\n");
87 return 0;
88}
89
90/**
91 * v9fs_dentry_release - called when dentry is going to be freed
92 * @dentry: dentry that is being release
93 *
94 */
95
96void v9fs_dentry_release(struct dentry *dentry)
97{
98 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
99
100 if (dentry->d_fsdata != NULL) {
101 struct list_head *fid_list = dentry->d_fsdata;
102 struct v9fs_fid *temp = NULL;
103 struct v9fs_fid *current_fid = NULL;
104 struct v9fs_fcall *fcall = NULL;
105
106 list_for_each_entry_safe(current_fid, temp, fid_list, list) {
107 if (v9fs_t_clunk
108 (current_fid->v9ses, current_fid->fid, &fcall))
109 dprintk(DEBUG_ERROR, "clunk failed: %s\n",
110 FCALL_ERROR(fcall));
111
112 v9fs_put_idpool(current_fid->fid,
113 &current_fid->v9ses->fidpool);
114
115 kfree(fcall);
116 v9fs_fid_destroy(current_fid);
117 }
118
119 kfree(dentry->d_fsdata); /* free the list_head */
120 }
121}
122
123struct dentry_operations v9fs_dentry_operations = {
124 .d_revalidate = v9fs_dentry_validate,
125 .d_release = v9fs_dentry_release,
126};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
new file mode 100644
index 000000000000..c478a7384186
--- /dev/null
+++ b/fs/9p/vfs_dir.c
@@ -0,0 +1,226 @@
1/*
2 * linux/fs/9p/vfs_dir.c
3 *
4 * This file contains vfs directory ops for the 9P2000 protocol.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/stat.h>
32#include <linux/string.h>
33#include <linux/smp_lock.h>
34#include <linux/inet.h>
35#include <linux/idr.h>
36
37#include "debug.h"
38#include "v9fs.h"
39#include "9p.h"
40#include "v9fs_vfs.h"
41#include "conv.h"
42#include "fid.h"
43
44/**
45 * dt_type - return file type
46 * @mistat: mistat structure
47 *
48 */
49
50static inline int dt_type(struct v9fs_stat *mistat)
51{
52 unsigned long perm = mistat->mode;
53 int rettype = DT_REG;
54
55 if (perm & V9FS_DMDIR)
56 rettype = DT_DIR;
57 if (perm & V9FS_DMSYMLINK)
58 rettype = DT_LNK;
59
60 return rettype;
61}
62
63/**
64 * v9fs_dir_readdir - read a directory
65 * @filep: opened file structure
66 * @dirent: directory structure ???
67 * @filldir: function to populate directory structure ???
68 *
69 */
70
71static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
72{
73 struct v9fs_fcall *fcall = NULL;
74 struct inode *inode = filp->f_dentry->d_inode;
75 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
76 struct v9fs_fid *file = filp->private_data;
77 unsigned int i, n;
78 int fid = -1;
79 int ret = 0;
80 struct v9fs_stat *mi = NULL;
81 int over = 0;
82
83 dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
84
85 fid = file->fid;
86
87 mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
88 if (!mi)
89 return -ENOMEM;
90
91 if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
92 kfree(file->rdir_fcall);
93 file->rdir_fcall = NULL;
94 }
95
96 if (file->rdir_fcall) {
97 n = file->rdir_fcall->params.rread.count;
98 i = file->rdir_fpos;
99 while (i < n) {
100 int s = v9fs_deserialize_stat(v9ses,
101 file->rdir_fcall->params.rread.data + i,
102 n - i, mi, v9ses->maxdata);
103
104 if (s == 0) {
105 dprintk(DEBUG_ERROR,
106 "error while deserializing mistat\n");
107 ret = -EIO;
108 goto FreeStructs;
109 }
110
111 over = filldir(dirent, mi->name, strlen(mi->name),
112 filp->f_pos, v9fs_qid2ino(&mi->qid),
113 dt_type(mi));
114
115 if (over) {
116 file->rdir_fpos = i;
117 file->rdir_pos = filp->f_pos;
118 break;
119 }
120
121 i += s;
122 filp->f_pos += s;
123 }
124
125 if (!over) {
126 kfree(file->rdir_fcall);
127 file->rdir_fcall = NULL;
128 }
129 }
130
131 while (!over) {
132 ret = v9fs_t_read(v9ses, fid, filp->f_pos,
133 v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
134 if (ret < 0) {
135 dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
136 ret, fcall);
137 goto FreeStructs;
138 } else if (ret == 0)
139 break;
140
141 n = ret;
142 i = 0;
143 while (i < n) {
144 int s = v9fs_deserialize_stat(v9ses,
145 fcall->params.rread.data + i, n - i, mi,
146 v9ses->maxdata);
147
148 if (s == 0) {
149 dprintk(DEBUG_ERROR,
150 "error while deserializing mistat\n");
151 return -EIO;
152 }
153
154 over = filldir(dirent, mi->name, strlen(mi->name),
155 filp->f_pos, v9fs_qid2ino(&mi->qid),
156 dt_type(mi));
157
158 if (over) {
159 file->rdir_fcall = fcall;
160 file->rdir_fpos = i;
161 file->rdir_pos = filp->f_pos;
162 fcall = NULL;
163 break;
164 }
165
166 i += s;
167 filp->f_pos += s;
168 }
169
170 kfree(fcall);
171 }
172
173 FreeStructs:
174 kfree(fcall);
175 kfree(mi);
176 return ret;
177}
178
179/**
180 * v9fs_dir_release - close a directory
181 * @inode: inode of the directory
182 * @filp: file pointer to a directory
183 *
184 */
185
186int v9fs_dir_release(struct inode *inode, struct file *filp)
187{
188 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
189 struct v9fs_fid *fid = filp->private_data;
190 int fidnum = -1;
191
192 dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp,
193 fid->fid);
194 fidnum = fid->fid;
195
196 filemap_fdatawrite(inode->i_mapping);
197 filemap_fdatawait(inode->i_mapping);
198
199 if (fidnum >= 0) {
200 fid->fidopen--;
201 dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
202 fid->fid);
203
204 if (fid->fidopen == 0) {
205 if (v9fs_t_clunk(v9ses, fidnum, NULL))
206 dprintk(DEBUG_ERROR, "clunk failed\n");
207
208 v9fs_put_idpool(fid->fid, &v9ses->fidpool);
209 }
210
211 kfree(fid->rdir_fcall);
212
213 filp->private_data = NULL;
214 v9fs_fid_destroy(fid);
215 }
216
217 d_drop(filp->f_dentry);
218 return 0;
219}
220
221struct file_operations v9fs_dir_operations = {
222 .read = generic_read_dir,
223 .readdir = v9fs_dir_readdir,
224 .open = v9fs_file_open,
225 .release = v9fs_dir_release,
226};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
new file mode 100644
index 000000000000..1f8ae7d580ab
--- /dev/null
+++ b/fs/9p/vfs_file.c
@@ -0,0 +1,401 @@
1/*
2 * linux/fs/9p/vfs_file.c
3 *
4 * This file contians vfs file ops for 9P2000.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/stat.h>
32#include <linux/string.h>
33#include <linux/smp_lock.h>
34#include <linux/inet.h>
35#include <linux/version.h>
36#include <linux/list.h>
37#include <asm/uaccess.h>
38#include <linux/idr.h>
39
40#include "debug.h"
41#include "v9fs.h"
42#include "9p.h"
43#include "v9fs_vfs.h"
44#include "fid.h"
45
46/**
47 * v9fs_file_open - open a file (or directory)
48 * @inode: inode to be opened
49 * @file: file being opened
50 *
51 */
52
53int v9fs_file_open(struct inode *inode, struct file *file)
54{
55 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
56 struct v9fs_fid *v9fid = v9fs_fid_lookup(file->f_dentry, FID_WALK);
57 struct v9fs_fid *v9newfid = NULL;
58 struct v9fs_fcall *fcall = NULL;
59 int open_mode = 0;
60 unsigned int iounit = 0;
61 int newfid = -1;
62 long result = -1;
63
64 dprintk(DEBUG_VFS, "inode: %p file: %p v9fid= %p\n", inode, file,
65 v9fid);
66
67 if (!v9fid) {
68 struct dentry *dentry = file->f_dentry;
69 dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n");
70
71 /* XXX - some duplication from lookup, generalize later */
72 /* basically vfs_lookup is too heavy weight */
73 v9fid = v9fs_fid_lookup(file->f_dentry, FID_OP);
74 if (!v9fid)
75 return -EBADF;
76
77 v9fid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
78 if (!v9fid)
79 return -EBADF;
80
81 newfid = v9fs_get_idpool(&v9ses->fidpool);
82 if (newfid < 0) {
83 eprintk(KERN_WARNING, "newfid fails!\n");
84 return -ENOSPC;
85 }
86
87 result =
88 v9fs_t_walk(v9ses, v9fid->fid, newfid,
89 (char *)file->f_dentry->d_name.name, NULL);
90 if (result < 0) {
91 v9fs_put_idpool(newfid, &v9ses->fidpool);
92 dprintk(DEBUG_ERROR, "rewalk didn't work\n");
93 return -EBADF;
94 }
95
96 v9fid = v9fs_fid_create(dentry);
97 if (v9fid == NULL) {
98 dprintk(DEBUG_ERROR, "couldn't insert\n");
99 return -ENOMEM;
100 }
101 v9fid->fid = newfid;
102 }
103
104 if (v9fid->fidcreate) {
105 /* create case */
106 newfid = v9fid->fid;
107 iounit = v9fid->iounit;
108 v9fid->fidcreate = 0;
109 } else {
110 if (!S_ISDIR(inode->i_mode))
111 newfid = v9fid->fid;
112 else {
113 newfid = v9fs_get_idpool(&v9ses->fidpool);
114 if (newfid < 0) {
115 eprintk(KERN_WARNING, "allocation failed\n");
116 return -ENOSPC;
117 }
118 /* This would be a somewhat critical clone */
119 result =
120 v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL,
121 &fcall);
122 if (result < 0) {
123 dprintk(DEBUG_ERROR, "clone error: %s\n",
124 FCALL_ERROR(fcall));
125 kfree(fcall);
126 return result;
127 }
128
129 v9newfid = v9fs_fid_create(file->f_dentry);
130 v9newfid->fid = newfid;
131 v9newfid->qid = v9fid->qid;
132 v9newfid->iounit = v9fid->iounit;
133 v9newfid->fidopen = 0;
134 v9newfid->fidclunked = 0;
135 v9newfid->v9ses = v9ses;
136 v9fid = v9newfid;
137 kfree(fcall);
138 }
139
140 /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
141 /* translate open mode appropriately */
142 open_mode = file->f_flags & 0x3;
143
144 if (file->f_flags & O_EXCL)
145 open_mode |= V9FS_OEXCL;
146
147 if (v9ses->extended) {
148 if (file->f_flags & O_TRUNC)
149 open_mode |= V9FS_OTRUNC;
150
151 if (file->f_flags & O_APPEND)
152 open_mode |= V9FS_OAPPEND;
153 }
154
155 result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
156 if (result < 0) {
157 dprintk(DEBUG_ERROR,
158 "open failed, open_mode 0x%x: %s\n", open_mode,
159 FCALL_ERROR(fcall));
160 kfree(fcall);
161 return result;
162 }
163
164 iounit = fcall->params.ropen.iounit;
165 kfree(fcall);
166 }
167
168
169 file->private_data = v9fid;
170
171 v9fid->rdir_pos = 0;
172 v9fid->rdir_fcall = NULL;
173 v9fid->fidopen = 1;
174 v9fid->filp = file;
175 v9fid->iounit = iounit;
176
177 return 0;
178}
179
180/**
181 * v9fs_file_lock - lock a file (or directory)
182 * @inode: inode to be opened
183 * @file: file being opened
184 *
185 * XXX - this looks like a local only lock, we should extend into 9P
186 * by using open exclusive
187 */
188
189static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
190{
191 int res = 0;
192 struct inode *inode = filp->f_dentry->d_inode;
193
194 dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
195
196 /* No mandatory locks */
197 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
198 return -ENOLCK;
199
200 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
201 filemap_fdatawrite(inode->i_mapping);
202 filemap_fdatawait(inode->i_mapping);
203 invalidate_inode_pages(&inode->i_data);
204 }
205
206 return res;
207}
208
209/**
210 * v9fs_read - read from a file (internal)
211 * @filep: file pointer to read
212 * @data: data buffer to read data into
213 * @count: size of buffer
214 * @offset: offset at which to read data
215 *
216 */
217
218static ssize_t
219v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset)
220{
221 struct inode *inode = filp->f_dentry->d_inode;
222 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
223 struct v9fs_fid *v9f = filp->private_data;
224 struct v9fs_fcall *fcall = NULL;
225 int fid = v9f->fid;
226 int rsize = 0;
227 int result = 0;
228 int total = 0;
229
230 dprintk(DEBUG_VFS, "\n");
231
232 rsize = v9ses->maxdata - V9FS_IOHDRSZ;
233 if (v9f->iounit != 0 && rsize > v9f->iounit)
234 rsize = v9f->iounit;
235
236 do {
237 if (count < rsize)
238 rsize = count;
239
240 result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall);
241
242 if (result < 0) {
243 printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n",
244 result);
245
246 kfree(fcall);
247 return total;
248 } else
249 *offset += result;
250
251 /* XXX - extra copy */
252 memcpy(buffer, fcall->params.rread.data, result);
253 count -= result;
254 buffer += result;
255 total += result;
256
257 kfree(fcall);
258
259 if (result < rsize)
260 break;
261 } while (count);
262
263 return total;
264}
265
266/**
267 * v9fs_file_read - read from a file
268 * @filep: file pointer to read
269 * @data: data buffer to read data into
270 * @count: size of buffer
271 * @offset: offset at which to read data
272 *
273 */
274
275static ssize_t
276v9fs_file_read(struct file *filp, char __user * data, size_t count,
277 loff_t * offset)
278{
279 int retval = -1;
280 int ret = 0;
281 char *buffer;
282
283 buffer = kmalloc(count, GFP_KERNEL);
284 if (!buffer)
285 return -ENOMEM;
286
287 retval = v9fs_read(filp, buffer, count, offset);
288 if (retval > 0) {
289 if ((ret = copy_to_user(data, buffer, retval)) != 0) {
290 dprintk(DEBUG_ERROR, "Problem copying to user %d\n",
291 ret);
292 retval = ret;
293 }
294 }
295
296 kfree(buffer);
297
298 return retval;
299}
300
301/**
302 * v9fs_write - write to a file
303 * @filep: file pointer to write
304 * @data: data buffer to write data from
305 * @count: size of buffer
306 * @offset: offset at which to write data
307 *
308 */
309
310static ssize_t
311v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset)
312{
313 struct inode *inode = filp->f_dentry->d_inode;
314 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
315 struct v9fs_fid *v9fid = filp->private_data;
316 struct v9fs_fcall *fcall;
317 int fid = v9fid->fid;
318 int result = -EIO;
319 int rsize = 0;
320 int total = 0;
321
322 dprintk(DEBUG_VFS, "data %p count %d offset %x\n", buffer, (int)count,
323 (int)*offset);
324 rsize = v9ses->maxdata - V9FS_IOHDRSZ;
325 if (v9fid->iounit != 0 && rsize > v9fid->iounit)
326 rsize = v9fid->iounit;
327
328 dump_data(buffer, count);
329
330 do {
331 if (count < rsize)
332 rsize = count;
333
334 result =
335 v9fs_t_write(v9ses, fid, *offset, rsize, buffer, &fcall);
336 if (result < 0) {
337 eprintk(KERN_ERR, "error while writing: %s(%d)\n",
338 FCALL_ERROR(fcall), result);
339 kfree(fcall);
340 return result;
341 } else
342 *offset += result;
343
344 kfree(fcall);
345
346 if (result != rsize) {
347 eprintk(KERN_ERR,
348 "short write: v9fs_t_write returned %d\n",
349 result);
350 break;
351 }
352
353 count -= result;
354 buffer += result;
355 total += result;
356 } while (count);
357
358 return total;
359}
360
361/**
362 * v9fs_file_write - write to a file
363 * @filep: file pointer to write
364 * @data: data buffer to write data from
365 * @count: size of buffer
366 * @offset: offset at which to write data
367 *
368 */
369
370static ssize_t
371v9fs_file_write(struct file *filp, const char __user * data,
372 size_t count, loff_t * offset)
373{
374 int ret = -1;
375 char *buffer;
376
377 buffer = kmalloc(count, GFP_KERNEL);
378 if (buffer == NULL)
379 return -ENOMEM;
380
381 ret = copy_from_user(buffer, data, count);
382 if (ret) {
383 dprintk(DEBUG_ERROR, "Problem copying from user\n");
384 ret = -EFAULT;
385 } else {
386 ret = v9fs_write(filp, buffer, count, offset);
387 }
388
389 kfree(buffer);
390
391 return ret;
392}
393
394struct file_operations v9fs_file_operations = {
395 .llseek = generic_file_llseek,
396 .read = v9fs_file_read,
397 .write = v9fs_file_write,
398 .open = v9fs_file_open,
399 .release = v9fs_dir_release,
400 .lock = v9fs_file_lock,
401};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
new file mode 100644
index 000000000000..0c13fc600049
--- /dev/null
+++ b/fs/9p/vfs_inode.c
@@ -0,0 +1,1338 @@
1/*
2 * linux/fs/9p/vfs_inode.c
3 *
4 * This file contains vfs inode ops for the 9P2000 protocol.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/pagemap.h>
32#include <linux/stat.h>
33#include <linux/string.h>
34#include <linux/smp_lock.h>
35#include <linux/inet.h>
36#include <linux/namei.h>
37#include <linux/idr.h>
38
39#include "debug.h"
40#include "v9fs.h"
41#include "9p.h"
42#include "v9fs_vfs.h"
43#include "conv.h"
44#include "fid.h"
45
46static struct inode_operations v9fs_dir_inode_operations;
47static struct inode_operations v9fs_dir_inode_operations_ext;
48static struct inode_operations v9fs_file_inode_operations;
49static struct inode_operations v9fs_symlink_inode_operations;
50
51/**
52 * unixmode2p9mode - convert unix mode bits to plan 9
53 * @v9ses: v9fs session information
54 * @mode: mode to convert
55 *
56 */
57
58static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
59{
60 int res;
61 res = mode & 0777;
62 if (S_ISDIR(mode))
63 res |= V9FS_DMDIR;
64 if (v9ses->extended) {
65 if (S_ISLNK(mode))
66 res |= V9FS_DMSYMLINK;
67 if (v9ses->nodev == 0) {
68 if (S_ISSOCK(mode))
69 res |= V9FS_DMSOCKET;
70 if (S_ISFIFO(mode))
71 res |= V9FS_DMNAMEDPIPE;
72 if (S_ISBLK(mode))
73 res |= V9FS_DMDEVICE;
74 if (S_ISCHR(mode))
75 res |= V9FS_DMDEVICE;
76 }
77
78 if ((mode & S_ISUID) == S_ISUID)
79 res |= V9FS_DMSETUID;
80 if ((mode & S_ISGID) == S_ISGID)
81 res |= V9FS_DMSETGID;
82 if ((mode & V9FS_DMLINK))
83 res |= V9FS_DMLINK;
84 }
85
86 return res;
87}
88
89/**
90 * p9mode2unixmode- convert plan9 mode bits to unix mode bits
91 * @v9ses: v9fs session information
92 * @mode: mode to convert
93 *
94 */
95
96static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
97{
98 int res;
99
100 res = mode & 0777;
101
102 if ((mode & V9FS_DMDIR) == V9FS_DMDIR)
103 res |= S_IFDIR;
104 else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended))
105 res |= S_IFLNK;
106 else if ((mode & V9FS_DMSOCKET) && (v9ses->extended)
107 && (v9ses->nodev == 0))
108 res |= S_IFSOCK;
109 else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended)
110 && (v9ses->nodev == 0))
111 res |= S_IFIFO;
112 else if ((mode & V9FS_DMDEVICE) && (v9ses->extended)
113 && (v9ses->nodev == 0))
114 res |= S_IFBLK;
115 else
116 res |= S_IFREG;
117
118 if (v9ses->extended) {
119 if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID)
120 res |= S_ISUID;
121
122 if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID)
123 res |= S_ISGID;
124 }
125
126 return res;
127}
128
129/**
130 * v9fs_blank_mistat - helper function to setup a 9P stat structure
131 * @v9ses: 9P session info (for determining extended mode)
132 * @mistat: structure to initialize
133 *
134 */
135
136static void
137v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
138{
139 mistat->type = ~0;
140 mistat->dev = ~0;
141 mistat->qid.type = ~0;
142 mistat->qid.version = ~0;
143 *((long long *)&mistat->qid.path) = ~0;
144 mistat->mode = ~0;
145 mistat->atime = ~0;
146 mistat->mtime = ~0;
147 mistat->length = ~0;
148 mistat->name = mistat->data;
149 mistat->uid = mistat->data;
150 mistat->gid = mistat->data;
151 mistat->muid = mistat->data;
152 if (v9ses->extended) {
153 mistat->n_uid = ~0;
154 mistat->n_gid = ~0;
155 mistat->n_muid = ~0;
156 mistat->extension = mistat->data;
157 }
158 *mistat->data = 0;
159}
160
161/**
162 * v9fs_mistat2unix - convert mistat to unix stat
163 * @mistat: Plan 9 metadata (mistat) structure
164 * @buf: unix metadata (stat) structure to populate
165 * @sb: superblock
166 *
167 */
168
169static void
170v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
171 struct super_block *sb)
172{
173 struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
174
175 buf->st_nlink = 1;
176
177 buf->st_atime = mistat->atime;
178 buf->st_mtime = mistat->mtime;
179 buf->st_ctime = mistat->mtime;
180
181 buf->st_uid = (unsigned short)-1;
182 buf->st_gid = (unsigned short)-1;
183
184 if (v9ses && v9ses->extended) {
185 /* TODO: string to uid mapping via user-space daemon */
186 if (mistat->n_uid != -1)
187 sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
188
189 if (mistat->n_gid != -1)
190 sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
191 }
192
193 if (buf->st_uid == (unsigned short)-1)
194 buf->st_uid = v9ses->uid;
195 if (buf->st_gid == (unsigned short)-1)
196 buf->st_gid = v9ses->gid;
197
198 buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
199 if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
200 char type = 0;
201 int major = -1;
202 int minor = -1;
203 sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
204 switch (type) {
205 case 'c':
206 buf->st_mode &= ~S_IFBLK;
207 buf->st_mode |= S_IFCHR;
208 break;
209 case 'b':
210 break;
211 default:
212 dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
213 type, mistat->extension);
214 };
215 buf->st_rdev = MKDEV(major, minor);
216 } else
217 buf->st_rdev = 0;
218
219 buf->st_size = mistat->length;
220
221 buf->st_blksize = sb->s_blocksize;
222 buf->st_blocks =
223 (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
224}
225
226/**
227 * v9fs_get_inode - helper function to setup an inode
228 * @sb: superblock
229 * @mode: mode to setup inode with
230 *
231 */
232
233struct inode *v9fs_get_inode(struct super_block *sb, int mode)
234{
235 struct inode *inode = NULL;
236 struct v9fs_session_info *v9ses = sb->s_fs_info;
237
238 dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
239
240 inode = new_inode(sb);
241 if (inode) {
242 inode->i_mode = mode;
243 inode->i_uid = current->fsuid;
244 inode->i_gid = current->fsgid;
245 inode->i_blksize = sb->s_blocksize;
246 inode->i_blocks = 0;
247 inode->i_rdev = 0;
248 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
249
250 switch (mode & S_IFMT) {
251 case S_IFIFO:
252 case S_IFBLK:
253 case S_IFCHR:
254 case S_IFSOCK:
255 if(!v9ses->extended) {
256 dprintk(DEBUG_ERROR, "special files without extended mode\n");
257 return ERR_PTR(-EINVAL);
258 }
259 init_special_inode(inode, inode->i_mode,
260 inode->i_rdev);
261 break;
262 case S_IFREG:
263 inode->i_op = &v9fs_file_inode_operations;
264 inode->i_fop = &v9fs_file_operations;
265 break;
266 case S_IFLNK:
267 if(!v9ses->extended) {
268 dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n");
269 return ERR_PTR(-EINVAL);
270 }
271 inode->i_op = &v9fs_symlink_inode_operations;
272 break;
273 case S_IFDIR:
274 inode->i_nlink++;
275 if(v9ses->extended)
276 inode->i_op = &v9fs_dir_inode_operations_ext;
277 else
278 inode->i_op = &v9fs_dir_inode_operations;
279 inode->i_fop = &v9fs_dir_operations;
280 break;
281 default:
282 dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
283 mode, mode & S_IFMT);
284 return ERR_PTR(-EINVAL);
285 }
286 } else {
287 eprintk(KERN_WARNING, "Problem allocating inode\n");
288 return ERR_PTR(-ENOMEM);
289 }
290 return inode;
291}
292
293/**
294 * v9fs_create - helper function to create files and directories
295 * @dir: directory inode file is being created in
296 * @file_dentry: dentry file is being created in
297 * @perm: permissions file is being created with
298 * @open_mode: resulting open mode for file
299 *
300 */
301
302static int
303v9fs_create(struct inode *dir,
304 struct dentry *file_dentry,
305 unsigned int perm, unsigned int open_mode)
306{
307 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
308 struct super_block *sb = dir->i_sb;
309 struct v9fs_fid *dirfid =
310 v9fs_fid_lookup(file_dentry->d_parent, FID_WALK);
311 struct v9fs_fid *fid = NULL;
312 struct inode *file_inode = NULL;
313 struct v9fs_fcall *fcall = NULL;
314 struct v9fs_qid qid;
315 struct stat newstat;
316 int dirfidnum = -1;
317 long newfid = -1;
318 int result = 0;
319 unsigned int iounit = 0;
320
321 perm = unixmode2p9mode(v9ses, perm);
322
323 dprintk(DEBUG_VFS, "dir: %p dentry: %p perm: %o mode: %o\n", dir,
324 file_dentry, perm, open_mode);
325
326 if (!dirfid)
327 return -EBADF;
328
329 dirfidnum = dirfid->fid;
330 if (dirfidnum < 0) {
331 dprintk(DEBUG_ERROR, "No fid for the directory #%lu\n",
332 dir->i_ino);
333 return -EBADF;
334 }
335
336 if (file_dentry->d_inode) {
337 dprintk(DEBUG_ERROR,
338 "Odd. There is an inode for dir %lu, name :%s:\n",
339 dir->i_ino, file_dentry->d_name.name);
340 return -EEXIST;
341 }
342
343 newfid = v9fs_get_idpool(&v9ses->fidpool);
344 if (newfid < 0) {
345 eprintk(KERN_WARNING, "no free fids available\n");
346 return -ENOSPC;
347 }
348
349 result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
350 if (result < 0) {
351 dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
352 v9fs_put_idpool(newfid, &v9ses->fidpool);
353 newfid = 0;
354 goto CleanUpFid;
355 }
356
357 kfree(fcall);
358
359 result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
360 perm, open_mode, &fcall);
361 if (result < 0) {
362 dprintk(DEBUG_ERROR, "create fails: %s(%d)\n",
363 FCALL_ERROR(fcall), result);
364
365 goto CleanUpFid;
366 }
367
368 iounit = fcall->params.rcreate.iounit;
369 qid = fcall->params.rcreate.qid;
370 kfree(fcall);
371
372 fid = v9fs_fid_create(file_dentry);
373 if (!fid) {
374 result = -ENOMEM;
375 goto CleanUpFid;
376 }
377
378 fid->fid = newfid;
379 fid->fidopen = 0;
380 fid->fidcreate = 1;
381 fid->qid = qid;
382 fid->iounit = iounit;
383 fid->rdir_pos = 0;
384 fid->rdir_fcall = NULL;
385 fid->v9ses = v9ses;
386
387 if ((perm & V9FS_DMSYMLINK) || (perm & V9FS_DMLINK) ||
388 (perm & V9FS_DMNAMEDPIPE) || (perm & V9FS_DMSOCKET) ||
389 (perm & V9FS_DMDEVICE))
390 return 0;
391
392 result = v9fs_t_stat(v9ses, newfid, &fcall);
393 if (result < 0) {
394 dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall),
395 result);
396 goto CleanUpFid;
397 }
398
399 v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
400
401 file_inode = v9fs_get_inode(sb, newstat.st_mode);
402 if ((!file_inode) || IS_ERR(file_inode)) {
403 dprintk(DEBUG_ERROR, "create inode failed\n");
404 result = -EBADF;
405 goto CleanUpFid;
406 }
407
408 v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb);
409 kfree(fcall);
410 d_instantiate(file_dentry, file_inode);
411
412 if (perm & V9FS_DMDIR) {
413 if (v9fs_t_clunk(v9ses, newfid, &fcall))
414 dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
415 FCALL_ERROR(fcall));
416
417 v9fs_put_idpool(newfid, &v9ses->fidpool);
418 kfree(fcall);
419 fid->fidopen = 0;
420 fid->fidcreate = 0;
421 d_drop(file_dentry);
422 }
423
424 return 0;
425
426 CleanUpFid:
427 kfree(fcall);
428
429 if (newfid) {
430 if (v9fs_t_clunk(v9ses, newfid, &fcall))
431 dprintk(DEBUG_ERROR, "clunk failed: %s\n",
432 FCALL_ERROR(fcall));
433
434 v9fs_put_idpool(newfid, &v9ses->fidpool);
435 kfree(fcall);
436 }
437 return result;
438}
439
440/**
441 * v9fs_remove - helper function to remove files and directories
442 * @dir: directory inode that is being deleted
443 * @file: dentry that is being deleted
444 * @rmdir: removing a directory
445 *
446 */
447
448static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
449{
450 struct v9fs_fcall *fcall = NULL;
451 struct super_block *sb = NULL;
452 struct v9fs_session_info *v9ses = NULL;
453 struct v9fs_fid *v9fid = NULL;
454 struct inode *file_inode = NULL;
455 int fid = -1;
456 int result = 0;
457
458 dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
459 rmdir);
460
461 file_inode = file->d_inode;
462 sb = file_inode->i_sb;
463 v9ses = v9fs_inode2v9ses(file_inode);
464 v9fid = v9fs_fid_lookup(file, FID_OP);
465
466 if (!v9fid) {
467 dprintk(DEBUG_ERROR,
468 "no v9fs_fid\n");
469 return -EBADF;
470 }
471
472 fid = v9fid->fid;
473 if (fid < 0) {
474 dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n",
475 file_inode->i_ino);
476 return -EBADF;
477 }
478
479 result = v9fs_t_remove(v9ses, fid, &fcall);
480 if (result < 0)
481 dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n",
482 FCALL_ERROR(fcall), result);
483 else {
484 v9fs_put_idpool(fid, &v9ses->fidpool);
485 v9fs_fid_destroy(v9fid);
486 }
487
488 kfree(fcall);
489 return result;
490}
491
492/**
493 * v9fs_vfs_create - VFS hook to create files
494 * @inode: directory inode that is being deleted
495 * @dentry: dentry that is being deleted
496 * @perm: create permissions
497 * @nd: path information
498 *
499 */
500
501static int
502v9fs_vfs_create(struct inode *inode, struct dentry *dentry, int perm,
503 struct nameidata *nd)
504{
505 return v9fs_create(inode, dentry, perm, O_RDWR);
506}
507
508/**
509 * v9fs_vfs_mkdir - VFS mkdir hook to create a directory
510 * @inode: inode that is being unlinked
511 * @dentry: dentry that is being unlinked
512 * @mode: mode for new directory
513 *
514 */
515
516static int v9fs_vfs_mkdir(struct inode *inode, struct dentry *dentry, int mode)
517{
518 return v9fs_create(inode, dentry, mode | S_IFDIR, O_RDONLY);
519}
520
521/**
522 * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
523 * @dir: inode that is being walked from
524 * @dentry: dentry that is being walked to?
525 * @nameidata: path data
526 *
527 */
528
529static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
530 struct nameidata *nameidata)
531{
532 struct super_block *sb;
533 struct v9fs_session_info *v9ses;
534 struct v9fs_fid *dirfid;
535 struct v9fs_fid *fid;
536 struct inode *inode;
537 struct v9fs_fcall *fcall = NULL;
538 struct stat newstat;
539 int dirfidnum = -1;
540 int newfid = -1;
541 int result = 0;
542
543 dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
544 dir, dentry->d_iname, dentry, nameidata);
545
546 sb = dir->i_sb;
547 v9ses = v9fs_inode2v9ses(dir);
548 dirfid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
549
550 if (!dirfid) {
551 dprintk(DEBUG_ERROR, "no dirfid\n");
552 return ERR_PTR(-EINVAL);
553 }
554
555 dirfidnum = dirfid->fid;
556
557 if (dirfidnum < 0) {
558 dprintk(DEBUG_ERROR, "no dirfid for inode %p, #%lu\n",
559 dir, dir->i_ino);
560 return ERR_PTR(-EBADF);
561 }
562
563 newfid = v9fs_get_idpool(&v9ses->fidpool);
564 if (newfid < 0) {
565 eprintk(KERN_WARNING, "newfid fails!\n");
566 return ERR_PTR(-ENOSPC);
567 }
568
569 result =
570 v9fs_t_walk(v9ses, dirfidnum, newfid, (char *)dentry->d_name.name,
571 NULL);
572 if (result < 0) {
573 v9fs_put_idpool(newfid, &v9ses->fidpool);
574 if (result == -ENOENT) {
575 d_add(dentry, NULL);
576 dprintk(DEBUG_ERROR,
577 "Return negative dentry %p count %d\n",
578 dentry, atomic_read(&dentry->d_count));
579 return NULL;
580 }
581 dprintk(DEBUG_ERROR, "walk error:%d\n", result);
582 goto FreeFcall;
583 }
584
585 result = v9fs_t_stat(v9ses, newfid, &fcall);
586 if (result < 0) {
587 dprintk(DEBUG_ERROR, "stat error\n");
588 goto FreeFcall;
589 }
590
591 v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
592 inode = v9fs_get_inode(sb, newstat.st_mode);
593
594 if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
595 eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
596 PTR_ERR(inode));
597
598 result = -ENOSPC;
599 goto FreeFcall;
600 }
601
602 inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid);
603
604 fid = v9fs_fid_create(dentry);
605 if (fid == NULL) {
606 dprintk(DEBUG_ERROR, "couldn't insert\n");
607 result = -ENOMEM;
608 goto FreeFcall;
609 }
610
611 fid->fid = newfid;
612 fid->fidopen = 0;
613 fid->v9ses = v9ses;
614 fid->qid = fcall->params.rstat.stat->qid;
615
616 dentry->d_op = &v9fs_dentry_operations;
617 v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb);
618
619 d_add(dentry, inode);
620 kfree(fcall);
621
622 return NULL;
623
624 FreeFcall:
625 kfree(fcall);
626 return ERR_PTR(result);
627}
628
629/**
630 * v9fs_vfs_unlink - VFS unlink hook to delete an inode
631 * @i: inode that is being unlinked
632 * @d: dentry that is being unlinked
633 *
634 */
635
636static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
637{
638 return v9fs_remove(i, d, 0);
639}
640
641/**
642 * v9fs_vfs_rmdir - VFS unlink hook to delete a directory
643 * @i: inode that is being unlinked
644 * @d: dentry that is being unlinked
645 *
646 */
647
648static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
649{
650 return v9fs_remove(i, d, 1);
651}
652
653/**
654 * v9fs_vfs_rename - VFS hook to rename an inode
655 * @old_dir: old dir inode
656 * @old_dentry: old dentry
657 * @new_dir: new dir inode
658 * @new_dentry: new dentry
659 *
660 */
661
662static int
663v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
664 struct inode *new_dir, struct dentry *new_dentry)
665{
666 struct inode *old_inode = old_dentry->d_inode;
667 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode);
668 struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_WALK);
669 struct v9fs_fid *olddirfid =
670 v9fs_fid_lookup(old_dentry->d_parent, FID_WALK);
671 struct v9fs_fid *newdirfid =
672 v9fs_fid_lookup(new_dentry->d_parent, FID_WALK);
673 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
674 struct v9fs_fcall *fcall = NULL;
675 int fid = -1;
676 int olddirfidnum = -1;
677 int newdirfidnum = -1;
678 int retval = 0;
679
680 dprintk(DEBUG_VFS, "\n");
681
682 if (!mistat)
683 return -ENOMEM;
684
685 if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
686 dprintk(DEBUG_ERROR, "problem with arguments\n");
687 return -EBADF;
688 }
689
690 /* 9P can only handle file rename in the same directory */
691 if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
692 dprintk(DEBUG_ERROR, "old dir and new dir are different\n");
693 retval = -EPERM;
694 goto FreeFcallnBail;
695 }
696
697 fid = oldfid->fid;
698 olddirfidnum = olddirfid->fid;
699 newdirfidnum = newdirfid->fid;
700
701 if (fid < 0) {
702 dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
703 old_inode->i_ino);
704 retval = -EBADF;
705 goto FreeFcallnBail;
706 }
707
708 v9fs_blank_mistat(v9ses, mistat);
709
710 strcpy(mistat->data + 1, v9ses->name);
711 mistat->name = mistat->data + 1 + strlen(v9ses->name);
712
713 if (new_dentry->d_name.len >
714 (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
715 dprintk(DEBUG_ERROR, "new name too long\n");
716 goto FreeFcallnBail;
717 }
718
719 strcpy(mistat->name, new_dentry->d_name.name);
720 retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
721
722 FreeFcallnBail:
723 kfree(mistat);
724
725 if (retval < 0)
726 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
727 FCALL_ERROR(fcall));
728
729 kfree(fcall);
730 return retval;
731}
732
733/**
734 * v9fs_vfs_getattr - retreive file metadata
735 * @mnt - mount information
736 * @dentry - file to get attributes on
737 * @stat - metadata structure to populate
738 *
739 */
740
741static int
742v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
743 struct kstat *stat)
744{
745 struct v9fs_fcall *fcall = NULL;
746 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
747 struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
748 int err = -EPERM;
749
750 dprintk(DEBUG_VFS, "dentry: %p\n", dentry);
751 if (!fid) {
752 dprintk(DEBUG_ERROR,
753 "couldn't find fid associated with dentry\n");
754 return -EBADF;
755 }
756
757 err = v9fs_t_stat(v9ses, fid->fid, &fcall);
758
759 if (err < 0)
760 dprintk(DEBUG_ERROR, "stat error\n");
761 else {
762 v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode,
763 dentry->d_inode->i_sb);
764 generic_fillattr(dentry->d_inode, stat);
765 }
766
767 kfree(fcall);
768 return err;
769}
770
771/**
772 * v9fs_vfs_setattr - set file metadata
773 * @dentry: file whose metadata to set
774 * @iattr: metadata assignment structure
775 *
776 */
777
778static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
779{
780 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
781 struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
782 struct v9fs_fcall *fcall = NULL;
783 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
784 int res = -EPERM;
785
786 dprintk(DEBUG_VFS, "\n");
787
788 if (!mistat)
789 return -ENOMEM;
790
791 if (!fid) {
792 dprintk(DEBUG_ERROR,
793 "Couldn't find fid associated with dentry\n");
794 return -EBADF;
795 }
796
797 v9fs_blank_mistat(v9ses, mistat);
798 if (iattr->ia_valid & ATTR_MODE)
799 mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode);
800
801 if (iattr->ia_valid & ATTR_MTIME)
802 mistat->mtime = iattr->ia_mtime.tv_sec;
803
804 if (iattr->ia_valid & ATTR_ATIME)
805 mistat->atime = iattr->ia_atime.tv_sec;
806
807 if (iattr->ia_valid & ATTR_SIZE)
808 mistat->length = iattr->ia_size;
809
810 if (v9ses->extended) {
811 char *ptr = mistat->data+1;
812
813 if (iattr->ia_valid & ATTR_UID) {
814 mistat->uid = ptr;
815 ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
816 mistat->n_uid = iattr->ia_uid;
817 }
818
819 if (iattr->ia_valid & ATTR_GID) {
820 mistat->gid = ptr;
821 ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
822 mistat->n_gid = iattr->ia_gid;
823 }
824 }
825
826 res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall);
827
828 if (res < 0)
829 dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall));
830
831 kfree(mistat);
832 kfree(fcall);
833
834 if (res >= 0)
835 res = inode_setattr(dentry->d_inode, iattr);
836
837 return res;
838}
839
840/**
841 * v9fs_mistat2inode - populate an inode structure with mistat info
842 * @mistat: Plan 9 metadata (mistat) structure
843 * @inode: inode to populate
844 * @sb: superblock of filesystem
845 *
846 */
847
848void
849v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
850 struct super_block *sb)
851{
852 struct v9fs_session_info *v9ses = sb->s_fs_info;
853
854 inode->i_nlink = 1;
855
856 inode->i_atime.tv_sec = mistat->atime;
857 inode->i_mtime.tv_sec = mistat->mtime;
858 inode->i_ctime.tv_sec = mistat->mtime;
859
860 inode->i_uid = -1;
861 inode->i_gid = -1;
862
863 if (v9ses->extended) {
864 /* TODO: string to uid mapping via user-space daemon */
865 inode->i_uid = mistat->n_uid;
866 inode->i_gid = mistat->n_gid;
867
868 if (mistat->n_uid == -1)
869 sscanf(mistat->uid, "%x", &inode->i_uid);
870
871 if (mistat->n_gid == -1)
872 sscanf(mistat->gid, "%x", &inode->i_gid);
873 }
874
875 if (inode->i_uid == -1)
876 inode->i_uid = v9ses->uid;
877 if (inode->i_gid == -1)
878 inode->i_gid = v9ses->gid;
879
880 inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
881 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
882 char type = 0;
883 int major = -1;
884 int minor = -1;
885 sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
886 switch (type) {
887 case 'c':
888 inode->i_mode &= ~S_IFBLK;
889 inode->i_mode |= S_IFCHR;
890 break;
891 case 'b':
892 break;
893 default:
894 dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
895 type, mistat->extension);
896 };
897 inode->i_rdev = MKDEV(major, minor);
898 } else
899 inode->i_rdev = 0;
900
901 inode->i_size = mistat->length;
902
903 inode->i_blksize = sb->s_blocksize;
904 inode->i_blocks =
905 (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits;
906}
907
908/**
909 * v9fs_qid2ino - convert qid into inode number
910 * @qid: qid to hash
911 *
912 * BUG: potential for inode number collisions?
913 */
914
915ino_t v9fs_qid2ino(struct v9fs_qid *qid)
916{
917 u64 path = qid->path + 2;
918 ino_t i = 0;
919
920 if (sizeof(ino_t) == sizeof(path))
921 memcpy(&i, &path, sizeof(ino_t));
922 else
923 i = (ino_t) (path ^ (path >> 32));
924
925 return i;
926}
927
928/**
929 * v9fs_vfs_symlink - helper function to create symlinks
930 * @dir: directory inode containing symlink
931 * @dentry: dentry for symlink
932 * @symname: symlink data
933 *
934 * See 9P2000.u RFC for more information
935 *
936 */
937
938static int
939v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
940{
941 int retval = -EPERM;
942 struct v9fs_fid *newfid;
943 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
944 struct v9fs_fcall *fcall = NULL;
945 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
946
947 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
948 symname);
949
950 if (!mistat)
951 return -ENOMEM;
952
953 if (!v9ses->extended) {
954 dprintk(DEBUG_ERROR, "not extended\n");
955 goto FreeFcall;
956 }
957
958 /* issue a create */
959 retval = v9fs_create(dir, dentry, S_IFLNK, 0);
960 if (retval != 0)
961 goto FreeFcall;
962
963 newfid = v9fs_fid_lookup(dentry, FID_OP);
964
965 /* issue a twstat */
966 v9fs_blank_mistat(v9ses, mistat);
967 strcpy(mistat->data + 1, symname);
968 mistat->extension = mistat->data + 1;
969 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
970 if (retval < 0) {
971 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
972 FCALL_ERROR(fcall));
973 goto FreeFcall;
974 }
975
976 kfree(fcall);
977
978 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
979 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
980 FCALL_ERROR(fcall));
981 goto FreeFcall;
982 }
983
984 d_drop(dentry); /* FID - will this also clunk? */
985
986 FreeFcall:
987 kfree(mistat);
988 kfree(fcall);
989
990 return retval;
991}
992
993/**
994 * v9fs_readlink - read a symlink's location (internal version)
995 * @dentry: dentry for symlink
996 * @buffer: buffer to load symlink location into
997 * @buflen: length of buffer
998 *
999 */
1000
1001static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1002{
1003 int retval = -EPERM;
1004
1005 struct v9fs_fcall *fcall = NULL;
1006 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
1007 struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
1008
1009 if (!fid) {
1010 dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n");
1011 retval = -EBADF;
1012 goto FreeFcall;
1013 }
1014
1015 if (!v9ses->extended) {
1016 retval = -EBADF;
1017 dprintk(DEBUG_ERROR, "not extended\n");
1018 goto FreeFcall;
1019 }
1020
1021 dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
1022 retval = v9fs_t_stat(v9ses, fid->fid, &fcall);
1023
1024 if (retval < 0) {
1025 dprintk(DEBUG_ERROR, "stat error\n");
1026 goto FreeFcall;
1027 }
1028
1029 if (!fcall)
1030 return -EIO;
1031
1032 if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) {
1033 retval = -EINVAL;
1034 goto FreeFcall;
1035 }
1036
1037 /* copy extension buffer into buffer */
1038 if (strlen(fcall->params.rstat.stat->extension) < buflen)
1039 buflen = strlen(fcall->params.rstat.stat->extension);
1040
1041 memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1);
1042
1043 retval = buflen;
1044
1045 FreeFcall:
1046 kfree(fcall);
1047
1048 return retval;
1049}
1050
1051/**
1052 * v9fs_vfs_readlink - read a symlink's location
1053 * @dentry: dentry for symlink
1054 * @buf: buffer to load symlink location into
1055 * @buflen: length of buffer
1056 *
1057 */
1058
1059static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1060 int buflen)
1061{
1062 int retval;
1063 int ret;
1064 char *link = __getname();
1065
1066 if (strlen(link) < buflen)
1067 buflen = strlen(link);
1068
1069 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
1070
1071 retval = v9fs_readlink(dentry, link, buflen);
1072
1073 if (retval > 0) {
1074 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1075 dprintk(DEBUG_ERROR, "problem copying to user: %d\n",
1076 ret);
1077 retval = ret;
1078 }
1079 }
1080
1081 putname(link);
1082 return retval;
1083}
1084
1085/**
1086 * v9fs_vfs_follow_link - follow a symlink path
1087 * @dentry: dentry for symlink
1088 * @nd: nameidata
1089 *
1090 */
1091
1092static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
1093{
1094 int len = 0;
1095 char *link = __getname();
1096
1097 dprintk(DEBUG_VFS, "%s n", dentry->d_name.name);
1098
1099 if (!link)
1100 link = ERR_PTR(-ENOMEM);
1101 else {
1102 len = v9fs_readlink(dentry, link, strlen(link));
1103
1104 if (len < 0) {
1105 putname(link);
1106 link = ERR_PTR(len);
1107 } else
1108 link[len] = 0;
1109 }
1110 nd_set_link(nd, link);
1111
1112 return NULL;
1113}
1114
1115/**
1116 * v9fs_vfs_put_link - release a symlink path
1117 * @dentry: dentry for symlink
1118 * @nd: nameidata
1119 *
1120 */
1121
1122static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1123{
1124 char *s = nd_get_link(nd);
1125
1126 dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
1127 if (!IS_ERR(s))
1128 putname(s);
1129}
1130
1131/**
1132 * v9fs_vfs_link - create a hardlink
1133 * @old_dentry: dentry for file to link to
1134 * @dir: inode destination for new link
1135 * @dentry: dentry for link
1136 *
1137 */
1138
1139/* XXX - lots of code dup'd from symlink and creates,
1140 * figure out a better reuse strategy
1141 */
1142
1143static int
1144v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1145 struct dentry *dentry)
1146{
1147 int retval = -EPERM;
1148 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
1149 struct v9fs_fcall *fcall = NULL;
1150 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
1151 struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_OP);
1152 struct v9fs_fid *newfid = NULL;
1153 char *symname = __getname();
1154
1155 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
1156 old_dentry->d_name.name);
1157
1158 if (!v9ses->extended) {
1159 dprintk(DEBUG_ERROR, "not extended\n");
1160 goto FreeMem;
1161 }
1162
1163 /* get fid of old_dentry */
1164 sprintf(symname, "hardlink(%d)\n", oldfid->fid);
1165
1166 /* issue a create */
1167 retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
1168 if (retval != 0)
1169 goto FreeMem;
1170
1171 newfid = v9fs_fid_lookup(dentry, FID_OP);
1172 if (!newfid) {
1173 dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
1174 goto FreeMem;
1175 }
1176
1177 /* issue a twstat */
1178 v9fs_blank_mistat(v9ses, mistat);
1179 strcpy(mistat->data + 1, symname);
1180 mistat->extension = mistat->data + 1;
1181 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
1182 if (retval < 0) {
1183 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
1184 FCALL_ERROR(fcall));
1185 goto FreeMem;
1186 }
1187
1188 kfree(fcall);
1189
1190 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
1191 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
1192 FCALL_ERROR(fcall));
1193 goto FreeMem;
1194 }
1195
1196 d_drop(dentry); /* FID - will this also clunk? */
1197
1198 kfree(fcall);
1199 fcall = NULL;
1200
1201 FreeMem:
1202 kfree(mistat);
1203 kfree(fcall);
1204 putname(symname);
1205 return retval;
1206}
1207
1208/**
1209 * v9fs_vfs_mknod - create a special file
1210 * @dir: inode destination for new link
1211 * @dentry: dentry for file
1212 * @mode: mode for creation
1213 * @dev_t: device associated with special file
1214 *
1215 */
1216
1217static int
1218v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1219{
1220 int retval = -EPERM;
1221 struct v9fs_fid *newfid;
1222 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
1223 struct v9fs_fcall *fcall = NULL;
1224 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
1225 char *symname = __getname();
1226
1227 dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1228 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
1229
1230 if (!mistat)
1231 return -ENOMEM;
1232
1233 if (!new_valid_dev(rdev)) {
1234 retval = -EINVAL;
1235 goto FreeMem;
1236 }
1237
1238 if (!v9ses->extended) {
1239 dprintk(DEBUG_ERROR, "not extended\n");
1240 goto FreeMem;
1241 }
1242
1243 /* issue a create */
1244 retval = v9fs_create(dir, dentry, mode, 0);
1245
1246 if (retval != 0)
1247 goto FreeMem;
1248
1249 newfid = v9fs_fid_lookup(dentry, FID_OP);
1250 if (!newfid) {
1251 dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
1252 retval = -EINVAL;
1253 goto FreeMem;
1254 }
1255
1256 /* build extension */
1257 if (S_ISBLK(mode))
1258 sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev));
1259 else if (S_ISCHR(mode))
1260 sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev));
1261 else if (S_ISFIFO(mode))
1262 ; /* DO NOTHING */
1263 else {
1264 retval = -EINVAL;
1265 goto FreeMem;
1266 }
1267
1268 if (!S_ISFIFO(mode)) {
1269 /* issue a twstat */
1270 v9fs_blank_mistat(v9ses, mistat);
1271 strcpy(mistat->data + 1, symname);
1272 mistat->extension = mistat->data + 1;
1273 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
1274 if (retval < 0) {
1275 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
1276 FCALL_ERROR(fcall));
1277 goto FreeMem;
1278 }
1279 }
1280
1281 /* need to update dcache so we show up */
1282 kfree(fcall);
1283
1284 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
1285 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
1286 FCALL_ERROR(fcall));
1287 goto FreeMem;
1288 }
1289
1290 d_drop(dentry); /* FID - will this also clunk? */
1291
1292 FreeMem:
1293 kfree(mistat);
1294 kfree(fcall);
1295 putname(symname);
1296
1297 return retval;
1298}
1299
1300static struct inode_operations v9fs_dir_inode_operations_ext = {
1301 .create = v9fs_vfs_create,
1302 .lookup = v9fs_vfs_lookup,
1303 .symlink = v9fs_vfs_symlink,
1304 .link = v9fs_vfs_link,
1305 .unlink = v9fs_vfs_unlink,
1306 .mkdir = v9fs_vfs_mkdir,
1307 .rmdir = v9fs_vfs_rmdir,
1308 .mknod = v9fs_vfs_mknod,
1309 .rename = v9fs_vfs_rename,
1310 .readlink = v9fs_vfs_readlink,
1311 .getattr = v9fs_vfs_getattr,
1312 .setattr = v9fs_vfs_setattr,
1313};
1314
1315static struct inode_operations v9fs_dir_inode_operations = {
1316 .create = v9fs_vfs_create,
1317 .lookup = v9fs_vfs_lookup,
1318 .unlink = v9fs_vfs_unlink,
1319 .mkdir = v9fs_vfs_mkdir,
1320 .rmdir = v9fs_vfs_rmdir,
1321 .mknod = v9fs_vfs_mknod,
1322 .rename = v9fs_vfs_rename,
1323 .getattr = v9fs_vfs_getattr,
1324 .setattr = v9fs_vfs_setattr,
1325};
1326
1327static struct inode_operations v9fs_file_inode_operations = {
1328 .getattr = v9fs_vfs_getattr,
1329 .setattr = v9fs_vfs_setattr,
1330};
1331
1332static struct inode_operations v9fs_symlink_inode_operations = {
1333 .readlink = v9fs_vfs_readlink,
1334 .follow_link = v9fs_vfs_follow_link,
1335 .put_link = v9fs_vfs_put_link,
1336 .getattr = v9fs_vfs_getattr,
1337 .setattr = v9fs_vfs_setattr,
1338};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
new file mode 100644
index 000000000000..868f350b2c5f
--- /dev/null
+++ b/fs/9p/vfs_super.c
@@ -0,0 +1,280 @@
1/*
2 * linux/fs/9p/vfs_super.c
3 *
4 * This file contians superblock ops for 9P2000. It is intended that
5 * you mount this file system on directories.
6 *
7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to:
22 * Free Software Foundation
23 * 51 Franklin Street, Fifth Floor
24 * Boston, MA 02111-1301 USA
25 *
26 */
27
28#include <linux/kernel.h>
29#include <linux/config.h>
30#include <linux/module.h>
31#include <linux/errno.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/stat.h>
35#include <linux/string.h>
36#include <linux/smp_lock.h>
37#include <linux/inet.h>
38#include <linux/pagemap.h>
39#include <linux/seq_file.h>
40#include <linux/mount.h>
41#include <linux/idr.h>
42
43#include "debug.h"
44#include "v9fs.h"
45#include "9p.h"
46#include "v9fs_vfs.h"
47#include "conv.h"
48#include "fid.h"
49
50static void v9fs_clear_inode(struct inode *);
51static struct super_operations v9fs_super_ops;
52
53/**
54 * v9fs_clear_inode - release an inode
55 * @inode: inode to release
56 *
57 */
58
59static void v9fs_clear_inode(struct inode *inode)
60{
61 filemap_fdatawrite(inode->i_mapping);
62}
63
64/**
65 * v9fs_set_super - set the superblock
66 * @s: super block
67 * @data: file system specific data
68 *
69 */
70
71static int v9fs_set_super(struct super_block *s, void *data)
72{
73 s->s_fs_info = data;
74 return set_anon_super(s, data);
75}
76
77/**
78 * v9fs_fill_super - populate superblock with info
79 * @sb: superblock
80 * @v9ses: session information
81 *
82 */
83
84static void
85v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
86 int flags)
87{
88 sb->s_maxbytes = MAX_LFS_FILESIZE;
89 sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
90 sb->s_blocksize = 1 << sb->s_blocksize_bits;
91 sb->s_magic = V9FS_MAGIC;
92 sb->s_op = &v9fs_super_ops;
93
94 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
95 MS_NODIRATIME | MS_NOATIME;
96}
97
98/**
99 * v9fs_get_sb - mount a superblock
100 * @fs_type: file system type
101 * @flags: mount flags
102 * @dev_name: device name that was mounted
103 * @data: mount options
104 *
105 */
106
107static struct super_block *v9fs_get_sb(struct file_system_type
108 *fs_type, int flags,
109 const char *dev_name, void *data)
110{
111 struct super_block *sb = NULL;
112 struct v9fs_fcall *fcall = NULL;
113 struct inode *inode = NULL;
114 struct dentry *root = NULL;
115 struct v9fs_session_info *v9ses = NULL;
116 struct v9fs_fid *root_fid = NULL;
117 int mode = S_IRWXUGO | S_ISVTX;
118 uid_t uid = current->fsuid;
119 gid_t gid = current->fsgid;
120 int stat_result = 0;
121 int newfid = 0;
122 int retval = 0;
123
124 dprintk(DEBUG_VFS, " \n");
125
126 v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL);
127 if (!v9ses)
128 return ERR_PTR(-ENOMEM);
129
130 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
131 dprintk(DEBUG_ERROR, "problem initiating session\n");
132 retval = newfid;
133 goto free_session;
134 }
135
136 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
137
138 v9fs_fill_super(sb, v9ses, flags);
139
140 inode = v9fs_get_inode(sb, S_IFDIR | mode);
141 if (IS_ERR(inode)) {
142 retval = PTR_ERR(inode);
143 goto put_back_sb;
144 }
145
146 inode->i_uid = uid;
147 inode->i_gid = gid;
148
149 root = d_alloc_root(inode);
150
151 if (!root) {
152 retval = -ENOMEM;
153 goto release_inode;
154 }
155
156 sb->s_root = root;
157
158 /* Setup the Root Inode */
159 root_fid = v9fs_fid_create(root);
160 if (root_fid == NULL) {
161 retval = -ENOMEM;
162 goto release_dentry;
163 }
164
165 root_fid->fidopen = 0;
166 root_fid->v9ses = v9ses;
167
168 stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
169 if (stat_result < 0) {
170 dprintk(DEBUG_ERROR, "stat error\n");
171 v9fs_t_clunk(v9ses, newfid, NULL);
172 v9fs_put_idpool(newfid, &v9ses->fidpool);
173 } else {
174 root_fid->fid = newfid;
175 root_fid->qid = fcall->params.rstat.stat->qid;
176 root->d_inode->i_ino =
177 v9fs_qid2ino(&fcall->params.rstat.stat->qid);
178 v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb);
179 }
180
181 kfree(fcall);
182
183 if (stat_result < 0) {
184 retval = stat_result;
185 goto release_dentry;
186 }
187
188 return sb;
189
190 release_dentry:
191 dput(sb->s_root);
192
193 release_inode:
194 iput(inode);
195
196 put_back_sb:
197 up_write(&sb->s_umount);
198 deactivate_super(sb);
199 v9fs_session_close(v9ses);
200
201 free_session:
202 kfree(v9ses);
203
204 return ERR_PTR(retval);
205}
206
207/**
208 * v9fs_kill_super - Kill Superblock
209 * @s: superblock
210 *
211 */
212
213static void v9fs_kill_super(struct super_block *s)
214{
215 struct v9fs_session_info *v9ses = s->s_fs_info;
216
217 dprintk(DEBUG_VFS, " %p\n", s);
218
219 v9fs_dentry_release(s->s_root); /* clunk root */
220
221 kill_anon_super(s);
222
223 v9fs_session_close(v9ses);
224 kfree(v9ses);
225 dprintk(DEBUG_VFS, "exiting kill_super\n");
226}
227
228/**
229 * v9fs_show_options - Show mount options in /proc/mounts
230 * @m: seq_file to write to
231 * @mnt: mount descriptor
232 *
233 */
234
235static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
236{
237 struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
238
239 if (v9ses->debug != 0)
240 seq_printf(m, ",debug=%u", v9ses->debug);
241 if (v9ses->port != V9FS_PORT)
242 seq_printf(m, ",port=%u", v9ses->port);
243 if (v9ses->maxdata != 9000)
244 seq_printf(m, ",msize=%u", v9ses->maxdata);
245 if (v9ses->afid != ~0)
246 seq_printf(m, ",afid=%u", v9ses->afid);
247 if (v9ses->proto == PROTO_UNIX)
248 seq_puts(m, ",proto=unix");
249 if (v9ses->extended == 0)
250 seq_puts(m, ",noextend");
251 if (v9ses->nodev == 1)
252 seq_puts(m, ",nodevmap");
253 seq_printf(m, ",name=%s", v9ses->name);
254 seq_printf(m, ",aname=%s", v9ses->remotename);
255 seq_printf(m, ",uid=%u", v9ses->uid);
256 seq_printf(m, ",gid=%u", v9ses->gid);
257 return 0;
258}
259
260static void
261v9fs_umount_begin(struct super_block *sb)
262{
263 struct v9fs_session_info *v9ses = sb->s_fs_info;
264
265 v9fs_session_cancel(v9ses);
266}
267
268static struct super_operations v9fs_super_ops = {
269 .statfs = simple_statfs,
270 .clear_inode = v9fs_clear_inode,
271 .show_options = v9fs_show_options,
272 .umount_begin = v9fs_umount_begin,
273};
274
275struct file_system_type v9fs_fs_type = {
276 .name = "9P",
277 .get_sb = v9fs_get_sb,
278 .kill_sb = v9fs_kill_super,
279 .owner = THIS_MODULE,
280};
diff --git a/fs/Kconfig b/fs/Kconfig
index 5e817902cb3b..068ccea2f184 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -462,6 +462,19 @@ config AUTOFS4_FS
462 local network, you probably do not need an automounter, and can say 462 local network, you probably do not need an automounter, and can say
463 N here. 463 N here.
464 464
465config FUSE_FS
466 tristate "Filesystem in Userspace support"
467 help
468 With FUSE it is possible to implement a fully functional filesystem
469 in a userspace program.
470
471 There's also companion library: libfuse. This library along with
472 utilities is available from the FUSE homepage:
473 <http://fuse.sourceforge.net/>
474
475 If you want to develop a userspace FS, or if you want to use
476 a filesystem based on FUSE, answer Y or M.
477
465menu "CD-ROM/DVD Filesystems" 478menu "CD-ROM/DVD Filesystems"
466 479
467config ISO9660_FS 480config ISO9660_FS
@@ -1703,6 +1716,17 @@ config AFS_FS
1703config RXRPC 1716config RXRPC
1704 tristate 1717 tristate
1705 1718
1719config 9P_FS
1720 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
1721 depends on INET && EXPERIMENTAL
1722 help
1723 If you say Y here, you will get experimental support for
1724 Plan 9 resource sharing via the 9P2000 protocol.
1725
1726 See <http://v9fs.sf.net> for more information.
1727
1728 If unsure, say N.
1729
1706endmenu 1730endmenu
1707 1731
1708menu "Partition Types" 1732menu "Partition Types"
diff --git a/fs/Makefile b/fs/Makefile
index 15158309dee4..1972da186272 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -89,11 +89,13 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4/
89obj-$(CONFIG_AUTOFS_FS) += autofs/ 89obj-$(CONFIG_AUTOFS_FS) += autofs/
90obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 90obj-$(CONFIG_AUTOFS4_FS) += autofs4/
91obj-$(CONFIG_ADFS_FS) += adfs/ 91obj-$(CONFIG_ADFS_FS) += adfs/
92obj-$(CONFIG_FUSE_FS) += fuse/
92obj-$(CONFIG_UDF_FS) += udf/ 93obj-$(CONFIG_UDF_FS) += udf/
93obj-$(CONFIG_RELAYFS_FS) += relayfs/ 94obj-$(CONFIG_RELAYFS_FS) += relayfs/
94obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ 95obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
95obj-$(CONFIG_JFS_FS) += jfs/ 96obj-$(CONFIG_JFS_FS) += jfs/
96obj-$(CONFIG_XFS_FS) += xfs/ 97obj-$(CONFIG_XFS_FS) += xfs/
98obj-$(CONFIG_9P_FS) += 9p/
97obj-$(CONFIG_AFS_FS) += afs/ 99obj-$(CONFIG_AFS_FS) += afs/
98obj-$(CONFIG_BEFS_FS) += befs/ 100obj-$(CONFIG_BEFS_FS) += befs/
99obj-$(CONFIG_HOSTFS) += hostfs/ 101obj-$(CONFIG_HOSTFS) += hostfs/
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 7aa6f2004536..9ebe881c6786 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -255,6 +255,7 @@ void
255affs_delete_inode(struct inode *inode) 255affs_delete_inode(struct inode *inode)
256{ 256{
257 pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); 257 pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
258 truncate_inode_pages(&inode->i_data, 0);
258 inode->i_size = 0; 259 inode->i_size = 0;
259 if (S_ISREG(inode->i_mode)) 260 if (S_ISREG(inode->i_mode))
260 affs_truncate(inode); 261 affs_truncate(inode);
diff --git a/fs/aio.c b/fs/aio.c
index 4f641abac3c0..38f62680fd63 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,6 +29,7 @@
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/workqueue.h> 30#include <linux/workqueue.h>
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/rcuref.h>
32 33
33#include <asm/kmap_types.h> 34#include <asm/kmap_types.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -499,7 +500,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
499 /* Must be done under the lock to serialise against cancellation. 500 /* Must be done under the lock to serialise against cancellation.
500 * Call this aio_fput as it duplicates fput via the fput_work. 501 * Call this aio_fput as it duplicates fput via the fput_work.
501 */ 502 */
502 if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { 503 if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) {
503 get_ioctx(ctx); 504 get_ioctx(ctx);
504 spin_lock(&fput_lock); 505 spin_lock(&fput_lock);
505 list_add(&req->ki_list, &fput_head); 506 list_add(&req->ki_list, &fput_head);
@@ -546,6 +547,24 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id)
546 return ioctx; 547 return ioctx;
547} 548}
548 549
550static int lock_kiocb_action(void *param)
551{
552 schedule();
553 return 0;
554}
555
556static inline void lock_kiocb(struct kiocb *iocb)
557{
558 wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action,
559 TASK_UNINTERRUPTIBLE);
560}
561
562static inline void unlock_kiocb(struct kiocb *iocb)
563{
564 kiocbClearLocked(iocb);
565 wake_up_bit(&iocb->ki_flags, KIF_LOCKED);
566}
567
549/* 568/*
550 * use_mm 569 * use_mm
551 * Makes the calling kernel thread take on the specified 570 * Makes the calling kernel thread take on the specified
@@ -786,7 +805,9 @@ static int __aio_run_iocbs(struct kioctx *ctx)
786 * Hold an extra reference while retrying i/o. 805 * Hold an extra reference while retrying i/o.
787 */ 806 */
788 iocb->ki_users++; /* grab extra reference */ 807 iocb->ki_users++; /* grab extra reference */
808 lock_kiocb(iocb);
789 aio_run_iocb(iocb); 809 aio_run_iocb(iocb);
810 unlock_kiocb(iocb);
790 if (__aio_put_req(ctx, iocb)) /* drop extra ref */ 811 if (__aio_put_req(ctx, iocb)) /* drop extra ref */
791 put_ioctx(ctx); 812 put_ioctx(ctx);
792 } 813 }
@@ -1527,10 +1548,9 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1527 goto out_put_req; 1548 goto out_put_req;
1528 1549
1529 spin_lock_irq(&ctx->ctx_lock); 1550 spin_lock_irq(&ctx->ctx_lock);
1530 if (likely(list_empty(&ctx->run_list))) { 1551 aio_run_iocb(req);
1531 aio_run_iocb(req); 1552 unlock_kiocb(req);
1532 } else { 1553 if (!list_empty(&ctx->run_list)) {
1533 list_add_tail(&req->ki_run_list, &ctx->run_list);
1534 /* drain the run list */ 1554 /* drain the run list */
1535 while (__aio_run_iocbs(ctx)) 1555 while (__aio_run_iocbs(ctx))
1536 ; 1556 ;
@@ -1661,6 +1681,7 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
1661 if (NULL != cancel) { 1681 if (NULL != cancel) {
1662 struct io_event tmp; 1682 struct io_event tmp;
1663 pr_debug("calling cancel\n"); 1683 pr_debug("calling cancel\n");
1684 lock_kiocb(kiocb);
1664 memset(&tmp, 0, sizeof(tmp)); 1685 memset(&tmp, 0, sizeof(tmp));
1665 tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user; 1686 tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user;
1666 tmp.data = kiocb->ki_user_data; 1687 tmp.data = kiocb->ki_user_data;
@@ -1672,8 +1693,9 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
1672 if (copy_to_user(result, &tmp, sizeof(tmp))) 1693 if (copy_to_user(result, &tmp, sizeof(tmp)))
1673 ret = -EFAULT; 1694 ret = -EFAULT;
1674 } 1695 }
1696 unlock_kiocb(kiocb);
1675 } else 1697 } else
1676 printk(KERN_DEBUG "iocb has no cancel operation\n"); 1698 ret = -EINVAL;
1677 1699
1678 put_ioctx(ctx); 1700 put_ioctx(ctx);
1679 1701
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 6171431272dc..990c28da5aec 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -105,6 +105,7 @@ struct autofs_sb_info {
105 struct file *pipe; 105 struct file *pipe;
106 pid_t oz_pgrp; 106 pid_t oz_pgrp;
107 int catatonic; 107 int catatonic;
108 struct super_block *sb;
108 unsigned long exp_timeout; 109 unsigned long exp_timeout;
109 ino_t next_dir_ino; 110 ino_t next_dir_ino;
110 struct autofs_wait_queue *queues; /* Wait queue pointer */ 111 struct autofs_wait_queue *queues; /* Wait queue pointer */
@@ -134,7 +135,7 @@ void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
134void autofs_hash_delete(struct autofs_dir_ent *); 135void autofs_hash_delete(struct autofs_dir_ent *);
135struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *); 136struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
136void autofs_hash_dputall(struct autofs_dirhash *); 137void autofs_hash_dputall(struct autofs_dirhash *);
137void autofs_hash_nuke(struct autofs_dirhash *); 138void autofs_hash_nuke(struct autofs_sb_info *);
138 139
139/* Expiration-handling functions */ 140/* Expiration-handling functions */
140 141
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 448143fd0796..5ccfcf26310d 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -232,13 +232,13 @@ void autofs_hash_dputall(struct autofs_dirhash *dh)
232 232
233/* Delete everything. This is used on filesystem destruction, so we 233/* Delete everything. This is used on filesystem destruction, so we
234 make no attempt to keep the pointers valid */ 234 make no attempt to keep the pointers valid */
235void autofs_hash_nuke(struct autofs_dirhash *dh) 235void autofs_hash_nuke(struct autofs_sb_info *sbi)
236{ 236{
237 int i; 237 int i;
238 struct autofs_dir_ent *ent, *nent; 238 struct autofs_dir_ent *ent, *nent;
239 239
240 for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) { 240 for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
241 for ( ent = dh->h[i] ; ent ; ent = nent ) { 241 for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
242 nent = ent->next; 242 nent = ent->next;
243 if ( ent->dentry ) 243 if ( ent->dentry )
244 dput(ent->dentry); 244 dput(ent->dentry);
@@ -246,4 +246,5 @@ void autofs_hash_nuke(struct autofs_dirhash *dh)
246 kfree(ent); 246 kfree(ent);
247 } 247 }
248 } 248 }
249 shrink_dcache_sb(sbi->sb);
249} 250}
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 4888c1fabbf7..65e5ed42190e 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -27,7 +27,7 @@ static void autofs_put_super(struct super_block *sb)
27 if ( !sbi->catatonic ) 27 if ( !sbi->catatonic )
28 autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */ 28 autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
29 29
30 autofs_hash_nuke(&sbi->dirhash); 30 autofs_hash_nuke(sbi);
31 for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) { 31 for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) {
32 if ( test_bit(n, sbi->symlink_bitmap) ) 32 if ( test_bit(n, sbi->symlink_bitmap) )
33 kfree(sbi->symlink[n].data); 33 kfree(sbi->symlink[n].data);
@@ -148,6 +148,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
148 s->s_magic = AUTOFS_SUPER_MAGIC; 148 s->s_magic = AUTOFS_SUPER_MAGIC;
149 s->s_op = &autofs_sops; 149 s->s_op = &autofs_sops;
150 s->s_time_gran = 1; 150 s->s_time_gran = 1;
151 sbi->sb = s;
151 152
152 root_inode = iget(s, AUTOFS_ROOT_INO); 153 root_inode = iget(s, AUTOFS_ROOT_INO);
153 root = d_alloc_root(root_inode); 154 root = d_alloc_root(root_inode);
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 1020dbc88bec..1fbc53f14aba 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -20,7 +20,6 @@ struct bfs_sb_info {
20 unsigned long si_lasti; 20 unsigned long si_lasti;
21 unsigned long * si_imap; 21 unsigned long * si_imap;
22 struct buffer_head * si_sbh; /* buffer header w/superblock */ 22 struct buffer_head * si_sbh; /* buffer header w/superblock */
23 struct bfs_super_block * si_bfs_sb; /* superblock in si_sbh->b_data */
24}; 23};
25 24
26/* 25/*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 5a1e5ce057ff..e240c335eb23 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -2,6 +2,7 @@
2 * fs/bfs/dir.c 2 * fs/bfs/dir.c
3 * BFS directory operations. 3 * BFS directory operations.
4 * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> 4 * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
5 * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
5 */ 6 */
6 7
7#include <linux/time.h> 8#include <linux/time.h>
@@ -20,9 +21,9 @@
20#define dprintf(x...) 21#define dprintf(x...)
21#endif 22#endif
22 23
23static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino); 24static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino);
24static struct buffer_head * bfs_find_entry(struct inode * dir, 25static struct buffer_head * bfs_find_entry(struct inode * dir,
25 const char * name, int namelen, struct bfs_dirent ** res_dir); 26 const unsigned char * name, int namelen, struct bfs_dirent ** res_dir);
26 27
27static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) 28static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
28{ 29{
@@ -53,7 +54,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
53 de = (struct bfs_dirent *)(bh->b_data + offset); 54 de = (struct bfs_dirent *)(bh->b_data + offset);
54 if (de->ino) { 55 if (de->ino) {
55 int size = strnlen(de->name, BFS_NAMELEN); 56 int size = strnlen(de->name, BFS_NAMELEN);
56 if (filldir(dirent, de->name, size, f->f_pos, de->ino, DT_UNKNOWN) < 0) { 57 if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) {
57 brelse(bh); 58 brelse(bh);
58 unlock_kernel(); 59 unlock_kernel();
59 return 0; 60 return 0;
@@ -107,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode,
107 inode->i_mapping->a_ops = &bfs_aops; 108 inode->i_mapping->a_ops = &bfs_aops;
108 inode->i_mode = mode; 109 inode->i_mode = mode;
109 inode->i_ino = ino; 110 inode->i_ino = ino;
110 BFS_I(inode)->i_dsk_ino = ino; 111 BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino);
111 BFS_I(inode)->i_sblock = 0; 112 BFS_I(inode)->i_sblock = 0;
112 BFS_I(inode)->i_eblock = 0; 113 BFS_I(inode)->i_eblock = 0;
113 insert_inode_hash(inode); 114 insert_inode_hash(inode);
@@ -139,7 +140,7 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st
139 lock_kernel(); 140 lock_kernel();
140 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); 141 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
141 if (bh) { 142 if (bh) {
142 unsigned long ino = le32_to_cpu(de->ino); 143 unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
143 brelse(bh); 144 brelse(bh);
144 inode = iget(dir->i_sb, ino); 145 inode = iget(dir->i_sb, ino);
145 if (!inode) { 146 if (!inode) {
@@ -183,7 +184,7 @@ static int bfs_unlink(struct inode * dir, struct dentry * dentry)
183 inode = dentry->d_inode; 184 inode = dentry->d_inode;
184 lock_kernel(); 185 lock_kernel();
185 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); 186 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
186 if (!bh || de->ino != inode->i_ino) 187 if (!bh || le16_to_cpu(de->ino) != inode->i_ino)
187 goto out_brelse; 188 goto out_brelse;
188 189
189 if (!inode->i_nlink) { 190 if (!inode->i_nlink) {
@@ -224,7 +225,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry,
224 old_dentry->d_name.name, 225 old_dentry->d_name.name,
225 old_dentry->d_name.len, &old_de); 226 old_dentry->d_name.len, &old_de);
226 227
227 if (!old_bh || old_de->ino != old_inode->i_ino) 228 if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino)
228 goto end_rename; 229 goto end_rename;
229 230
230 error = -EPERM; 231 error = -EPERM;
@@ -270,7 +271,7 @@ struct inode_operations bfs_dir_inops = {
270 .rename = bfs_rename, 271 .rename = bfs_rename,
271}; 272};
272 273
273static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino) 274static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino)
274{ 275{
275 struct buffer_head * bh; 276 struct buffer_head * bh;
276 struct bfs_dirent * de; 277 struct bfs_dirent * de;
@@ -304,7 +305,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
304 } 305 }
305 dir->i_mtime = CURRENT_TIME_SEC; 306 dir->i_mtime = CURRENT_TIME_SEC;
306 mark_inode_dirty(dir); 307 mark_inode_dirty(dir);
307 de->ino = ino; 308 de->ino = cpu_to_le16((u16)ino);
308 for (i=0; i<BFS_NAMELEN; i++) 309 for (i=0; i<BFS_NAMELEN; i++)
309 de->name[i] = (i < namelen) ? name[i] : 0; 310 de->name[i] = (i < namelen) ? name[i] : 0;
310 mark_buffer_dirty(bh); 311 mark_buffer_dirty(bh);
@@ -317,7 +318,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
317 return -ENOSPC; 318 return -ENOSPC;
318} 319}
319 320
320static inline int bfs_namecmp(int len, const char * name, const char * buffer) 321static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer)
321{ 322{
322 if (len < BFS_NAMELEN && buffer[len]) 323 if (len < BFS_NAMELEN && buffer[len])
323 return 0; 324 return 0;
@@ -325,7 +326,7 @@ static inline int bfs_namecmp(int len, const char * name, const char * buffer)
325} 326}
326 327
327static struct buffer_head * bfs_find_entry(struct inode * dir, 328static struct buffer_head * bfs_find_entry(struct inode * dir,
328 const char * name, int namelen, struct bfs_dirent ** res_dir) 329 const unsigned char * name, int namelen, struct bfs_dirent ** res_dir)
329{ 330{
330 unsigned long block, offset; 331 unsigned long block, offset;
331 struct buffer_head * bh; 332 struct buffer_head * bh;
@@ -346,7 +347,7 @@ static struct buffer_head * bfs_find_entry(struct inode * dir,
346 } 347 }
347 de = (struct bfs_dirent *)(bh->b_data + offset); 348 de = (struct bfs_dirent *)(bh->b_data + offset);
348 offset += BFS_DIRENT_SIZE; 349 offset += BFS_DIRENT_SIZE;
349 if (de->ino && bfs_namecmp(namelen, name, de->name)) { 350 if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) {
350 *res_dir = de; 351 *res_dir = de;
351 return bh; 352 return bh;
352 } 353 }
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 747fd1ea55e0..807723b65daf 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -40,8 +40,8 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo
40 return 0; 40 return 0;
41} 41}
42 42
43static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned long end, 43static int bfs_move_blocks(struct super_block *sb, unsigned long start,
44 unsigned long where) 44 unsigned long end, unsigned long where)
45{ 45{
46 unsigned long i; 46 unsigned long i;
47 47
@@ -57,20 +57,21 @@ static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned
57static int bfs_get_block(struct inode * inode, sector_t block, 57static int bfs_get_block(struct inode * inode, sector_t block,
58 struct buffer_head * bh_result, int create) 58 struct buffer_head * bh_result, int create)
59{ 59{
60 long phys; 60 unsigned long phys;
61 int err; 61 int err;
62 struct super_block *sb = inode->i_sb; 62 struct super_block *sb = inode->i_sb;
63 struct bfs_sb_info *info = BFS_SB(sb); 63 struct bfs_sb_info *info = BFS_SB(sb);
64 struct bfs_inode_info *bi = BFS_I(inode); 64 struct bfs_inode_info *bi = BFS_I(inode);
65 struct buffer_head *sbh = info->si_sbh; 65 struct buffer_head *sbh = info->si_sbh;
66 66
67 if (block < 0 || block > info->si_blocks) 67 if (block > info->si_blocks)
68 return -EIO; 68 return -EIO;
69 69
70 phys = bi->i_sblock + block; 70 phys = bi->i_sblock + block;
71 if (!create) { 71 if (!create) {
72 if (phys <= bi->i_eblock) { 72 if (phys <= bi->i_eblock) {
73 dprintf("c=%d, b=%08lx, phys=%08lx (granted)\n", create, block, phys); 73 dprintf("c=%d, b=%08lx, phys=%09lx (granted)\n",
74 create, (unsigned long)block, phys);
74 map_bh(bh_result, sb, phys); 75 map_bh(bh_result, sb, phys);
75 } 76 }
76 return 0; 77 return 0;
@@ -80,7 +81,7 @@ static int bfs_get_block(struct inode * inode, sector_t block,
80 of blocks allocated for this file, we can grant it */ 81 of blocks allocated for this file, we can grant it */
81 if (inode->i_size && phys <= bi->i_eblock) { 82 if (inode->i_size && phys <= bi->i_eblock) {
82 dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n", 83 dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n",
83 create, block, phys); 84 create, (unsigned long)block, phys);
84 map_bh(bh_result, sb, phys); 85 map_bh(bh_result, sb, phys);
85 return 0; 86 return 0;
86 } 87 }
@@ -88,11 +89,12 @@ static int bfs_get_block(struct inode * inode, sector_t block,
88 /* the rest has to be protected against itself */ 89 /* the rest has to be protected against itself */
89 lock_kernel(); 90 lock_kernel();
90 91
91 /* if the last data block for this file is the last allocated block, we can 92 /* if the last data block for this file is the last allocated
92 extend the file trivially, without moving it anywhere */ 93 block, we can extend the file trivially, without moving it
94 anywhere */
93 if (bi->i_eblock == info->si_lf_eblk) { 95 if (bi->i_eblock == info->si_lf_eblk) {
94 dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n", 96 dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n",
95 create, block, phys); 97 create, (unsigned long)block, phys);
96 map_bh(bh_result, sb, phys); 98 map_bh(bh_result, sb, phys);
97 info->si_freeb -= phys - bi->i_eblock; 99 info->si_freeb -= phys - bi->i_eblock;
98 info->si_lf_eblk = bi->i_eblock = phys; 100 info->si_lf_eblk = bi->i_eblock = phys;
@@ -114,7 +116,8 @@ static int bfs_get_block(struct inode * inode, sector_t block,
114 } else 116 } else
115 err = 0; 117 err = 0;
116 118
117 dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", create, block, phys); 119 dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n",
120 create, (unsigned long)block, phys);
118 bi->i_sblock = phys; 121 bi->i_sblock = phys;
119 phys += block; 122 phys += block;
120 info->si_lf_eblk = bi->i_eblock = phys; 123 info->si_lf_eblk = bi->i_eblock = phys;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 64e0fb33fc0c..c7b39aa279d7 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -3,6 +3,8 @@
3 * BFS superblock and inode operations. 3 * BFS superblock and inode operations.
4 * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> 4 * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
5 * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. 5 * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
6 *
7 * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
6 */ 8 */
7 9
8#include <linux/module.h> 10#include <linux/module.h>
@@ -54,46 +56,50 @@ static void bfs_read_inode(struct inode * inode)
54 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; 56 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
55 di = (struct bfs_inode *)bh->b_data + off; 57 di = (struct bfs_inode *)bh->b_data + off;
56 58
57 inode->i_mode = 0x0000FFFF & di->i_mode; 59 inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode);
58 if (di->i_vtype == BFS_VDIR) { 60 if (le32_to_cpu(di->i_vtype) == BFS_VDIR) {
59 inode->i_mode |= S_IFDIR; 61 inode->i_mode |= S_IFDIR;
60 inode->i_op = &bfs_dir_inops; 62 inode->i_op = &bfs_dir_inops;
61 inode->i_fop = &bfs_dir_operations; 63 inode->i_fop = &bfs_dir_operations;
62 } else if (di->i_vtype == BFS_VREG) { 64 } else if (le32_to_cpu(di->i_vtype) == BFS_VREG) {
63 inode->i_mode |= S_IFREG; 65 inode->i_mode |= S_IFREG;
64 inode->i_op = &bfs_file_inops; 66 inode->i_op = &bfs_file_inops;
65 inode->i_fop = &bfs_file_operations; 67 inode->i_fop = &bfs_file_operations;
66 inode->i_mapping->a_ops = &bfs_aops; 68 inode->i_mapping->a_ops = &bfs_aops;
67 } 69 }
68 70
69 inode->i_uid = di->i_uid; 71 BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock);
70 inode->i_gid = di->i_gid; 72 BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock);
71 inode->i_nlink = di->i_nlink; 73 inode->i_uid = le32_to_cpu(di->i_uid);
74 inode->i_gid = le32_to_cpu(di->i_gid);
75 inode->i_nlink = le32_to_cpu(di->i_nlink);
72 inode->i_size = BFS_FILESIZE(di); 76 inode->i_size = BFS_FILESIZE(di);
73 inode->i_blocks = BFS_FILEBLOCKS(di); 77 inode->i_blocks = BFS_FILEBLOCKS(di);
78 if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks);
74 inode->i_blksize = PAGE_SIZE; 79 inode->i_blksize = PAGE_SIZE;
75 inode->i_atime.tv_sec = di->i_atime; 80 inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
76 inode->i_mtime.tv_sec = di->i_mtime; 81 inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime);
77 inode->i_ctime.tv_sec = di->i_ctime; 82 inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime);
78 inode->i_atime.tv_nsec = 0; 83 inode->i_atime.tv_nsec = 0;
79 inode->i_mtime.tv_nsec = 0; 84 inode->i_mtime.tv_nsec = 0;
80 inode->i_ctime.tv_nsec = 0; 85 inode->i_ctime.tv_nsec = 0;
81 BFS_I(inode)->i_dsk_ino = di->i_ino; /* can be 0 so we store a copy */ 86 BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */
82 BFS_I(inode)->i_sblock = di->i_sblock;
83 BFS_I(inode)->i_eblock = di->i_eblock;
84 87
85 brelse(bh); 88 brelse(bh);
86} 89}
87 90
88static int bfs_write_inode(struct inode * inode, int unused) 91static int bfs_write_inode(struct inode * inode, int unused)
89{ 92{
90 unsigned long ino = inode->i_ino; 93 unsigned int ino = (u16)inode->i_ino;
94 unsigned long i_sblock;
91 struct bfs_inode * di; 95 struct bfs_inode * di;
92 struct buffer_head * bh; 96 struct buffer_head * bh;
93 int block, off; 97 int block, off;
94 98
99 dprintf("ino=%08x\n", ino);
100
95 if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { 101 if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) {
96 printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino); 102 printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino);
97 return -EIO; 103 return -EIO;
98 } 104 }
99 105
@@ -101,7 +107,7 @@ static int bfs_write_inode(struct inode * inode, int unused)
101 block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; 107 block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1;
102 bh = sb_bread(inode->i_sb, block); 108 bh = sb_bread(inode->i_sb, block);
103 if (!bh) { 109 if (!bh) {
104 printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); 110 printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino);
105 unlock_kernel(); 111 unlock_kernel();
106 return -EIO; 112 return -EIO;
107 } 113 }
@@ -109,24 +115,26 @@ static int bfs_write_inode(struct inode * inode, int unused)
109 off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; 115 off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
110 di = (struct bfs_inode *)bh->b_data + off; 116 di = (struct bfs_inode *)bh->b_data + off;
111 117
112 if (inode->i_ino == BFS_ROOT_INO) 118 if (ino == BFS_ROOT_INO)
113 di->i_vtype = BFS_VDIR; 119 di->i_vtype = cpu_to_le32(BFS_VDIR);
114 else 120 else
115 di->i_vtype = BFS_VREG; 121 di->i_vtype = cpu_to_le32(BFS_VREG);
116 122
117 di->i_ino = inode->i_ino; 123 di->i_ino = cpu_to_le16(ino);
118 di->i_mode = inode->i_mode; 124 di->i_mode = cpu_to_le32(inode->i_mode);
119 di->i_uid = inode->i_uid; 125 di->i_uid = cpu_to_le32(inode->i_uid);
120 di->i_gid = inode->i_gid; 126 di->i_gid = cpu_to_le32(inode->i_gid);
121 di->i_nlink = inode->i_nlink; 127 di->i_nlink = cpu_to_le32(inode->i_nlink);
122 di->i_atime = inode->i_atime.tv_sec; 128 di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
123 di->i_mtime = inode->i_mtime.tv_sec; 129 di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
124 di->i_ctime = inode->i_ctime.tv_sec; 130 di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
125 di->i_sblock = BFS_I(inode)->i_sblock; 131 i_sblock = BFS_I(inode)->i_sblock;
126 di->i_eblock = BFS_I(inode)->i_eblock; 132 di->i_sblock = cpu_to_le32(i_sblock);
127 di->i_eoffset = di->i_sblock * BFS_BSIZE + inode->i_size - 1; 133 di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
134 di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
128 135
129 mark_buffer_dirty(bh); 136 mark_buffer_dirty(bh);
137 dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off);
130 brelse(bh); 138 brelse(bh);
131 unlock_kernel(); 139 unlock_kernel();
132 return 0; 140 return 0;
@@ -140,11 +148,14 @@ static void bfs_delete_inode(struct inode * inode)
140 int block, off; 148 int block, off;
141 struct super_block * s = inode->i_sb; 149 struct super_block * s = inode->i_sb;
142 struct bfs_sb_info * info = BFS_SB(s); 150 struct bfs_sb_info * info = BFS_SB(s);
151 struct bfs_inode_info * bi = BFS_I(inode);
143 152
144 dprintf("ino=%08lx\n", inode->i_ino); 153 dprintf("ino=%08lx\n", ino);
145 154
146 if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > info->si_lasti) { 155 truncate_inode_pages(&inode->i_data, 0);
147 printf("invalid ino=%08lx\n", inode->i_ino); 156
157 if (ino < BFS_ROOT_INO || ino > info->si_lasti) {
158 printf("invalid ino=%08lx\n", ino);
148 return; 159 return;
149 } 160 }
150 161
@@ -160,13 +171,13 @@ static void bfs_delete_inode(struct inode * inode)
160 return; 171 return;
161 } 172 }
162 off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; 173 off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
163 di = (struct bfs_inode *)bh->b_data + off; 174 di = (struct bfs_inode *) bh->b_data + off;
164 if (di->i_ino) { 175 if (bi->i_dsk_ino) {
165 info->si_freeb += BFS_FILEBLOCKS(di); 176 info->si_freeb += 1 + bi->i_eblock - bi->i_sblock;
166 info->si_freei++; 177 info->si_freei++;
167 clear_bit(di->i_ino, info->si_imap); 178 clear_bit(ino, info->si_imap);
168 dump_imap("delete_inode", s); 179 dump_imap("delete_inode", s);
169 } 180 }
170 di->i_ino = 0; 181 di->i_ino = 0;
171 di->i_sblock = 0; 182 di->i_sblock = 0;
172 mark_buffer_dirty(bh); 183 mark_buffer_dirty(bh);
@@ -272,14 +283,14 @@ static struct super_operations bfs_sops = {
272 283
273void dump_imap(const char *prefix, struct super_block * s) 284void dump_imap(const char *prefix, struct super_block * s)
274{ 285{
275#if 0 286#ifdef DEBUG
276 int i; 287 int i;
277 char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL); 288 char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL);
278 289
279 if (!tmpbuf) 290 if (!tmpbuf)
280 return; 291 return;
281 for (i=BFS_SB(s)->si_lasti; i>=0; i--) { 292 for (i=BFS_SB(s)->si_lasti; i>=0; i--) {
282 if (i>PAGE_SIZE-100) break; 293 if (i > PAGE_SIZE-100) break;
283 if (test_bit(i, BFS_SB(s)->si_imap)) 294 if (test_bit(i, BFS_SB(s)->si_imap))
284 strcat(tmpbuf, "1"); 295 strcat(tmpbuf, "1");
285 else 296 else
@@ -295,7 +306,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
295 struct buffer_head * bh; 306 struct buffer_head * bh;
296 struct bfs_super_block * bfs_sb; 307 struct bfs_super_block * bfs_sb;
297 struct inode * inode; 308 struct inode * inode;
298 int i, imap_len; 309 unsigned i, imap_len;
299 struct bfs_sb_info * info; 310 struct bfs_sb_info * info;
300 311
301 info = kmalloc(sizeof(*info), GFP_KERNEL); 312 info = kmalloc(sizeof(*info), GFP_KERNEL);
@@ -310,19 +321,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
310 if(!bh) 321 if(!bh)
311 goto out; 322 goto out;
312 bfs_sb = (struct bfs_super_block *)bh->b_data; 323 bfs_sb = (struct bfs_super_block *)bh->b_data;
313 if (bfs_sb->s_magic != BFS_MAGIC) { 324 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
314 if (!silent) 325 if (!silent)
315 printf("No BFS filesystem on %s (magic=%08x)\n", 326 printf("No BFS filesystem on %s (magic=%08x)\n",
316 s->s_id, bfs_sb->s_magic); 327 s->s_id, le32_to_cpu(bfs_sb->s_magic));
317 goto out; 328 goto out;
318 } 329 }
319 if (BFS_UNCLEAN(bfs_sb, s) && !silent) 330 if (BFS_UNCLEAN(bfs_sb, s) && !silent)
320 printf("%s is unclean, continuing\n", s->s_id); 331 printf("%s is unclean, continuing\n", s->s_id);
321 332
322 s->s_magic = BFS_MAGIC; 333 s->s_magic = BFS_MAGIC;
323 info->si_bfs_sb = bfs_sb;
324 info->si_sbh = bh; 334 info->si_sbh = bh;
325 info->si_lasti = (bfs_sb->s_start - BFS_BSIZE)/sizeof(struct bfs_inode) 335 info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode)
326 + BFS_ROOT_INO - 1; 336 + BFS_ROOT_INO - 1;
327 337
328 imap_len = info->si_lasti/8 + 1; 338 imap_len = info->si_lasti/8 + 1;
@@ -346,8 +356,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
346 goto out; 356 goto out;
347 } 357 }
348 358
349 info->si_blocks = (bfs_sb->s_end + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ 359 info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
350 info->si_freeb = (bfs_sb->s_end + 1 - bfs_sb->s_start)>>BFS_BSIZE_BITS; 360 info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS;
351 info->si_freei = 0; 361 info->si_freei = 0;
352 info->si_lf_eblk = 0; 362 info->si_lf_eblk = 0;
353 info->si_lf_sblk = 0; 363 info->si_lf_sblk = 0;
diff --git a/fs/bio.c b/fs/bio.c
index a7d4fd3a3299..83a349574567 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -683,7 +683,7 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
683{ 683{
684 struct sg_iovec iov; 684 struct sg_iovec iov;
685 685
686 iov.iov_base = (__user void *)uaddr; 686 iov.iov_base = (void __user *)uaddr;
687 iov.iov_len = len; 687 iov.iov_len = len;
688 688
689 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); 689 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm);
diff --git a/fs/compat.c b/fs/compat.c
index 8c665705c6a0..ac3fb9ed8eea 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1619,6 +1619,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
1619 char *bits; 1619 char *bits;
1620 long timeout; 1620 long timeout;
1621 int size, max_fdset, ret = -EINVAL; 1621 int size, max_fdset, ret = -EINVAL;
1622 struct fdtable *fdt;
1622 1623
1623 timeout = MAX_SCHEDULE_TIMEOUT; 1624 timeout = MAX_SCHEDULE_TIMEOUT;
1624 if (tvp) { 1625 if (tvp) {
@@ -1644,7 +1645,10 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
1644 goto out_nofds; 1645 goto out_nofds;
1645 1646
1646 /* max_fdset can increase, so grab it once to avoid race */ 1647 /* max_fdset can increase, so grab it once to avoid race */
1647 max_fdset = current->files->max_fdset; 1648 rcu_read_lock();
1649 fdt = files_fdtable(current->files);
1650 max_fdset = fdt->max_fdset;
1651 rcu_read_unlock();
1648 if (n > max_fdset) 1652 if (n > max_fdset)
1649 n = max_fdset; 1653 n = max_fdset;
1650 1654
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 155e612635f1..e28a74203f3b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -798,13 +798,16 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
798 r = (void *) &r4; 798 r = (void *) &r4;
799 } 799 }
800 800
801 if (ret) 801 if (ret) {
802 return -EFAULT; 802 ret = -EFAULT;
803 goto out;
804 }
803 805
804 set_fs (KERNEL_DS); 806 set_fs (KERNEL_DS);
805 ret = sys_ioctl (fd, cmd, (unsigned long) r); 807 ret = sys_ioctl (fd, cmd, (unsigned long) r);
806 set_fs (old_fs); 808 set_fs (old_fs);
807 809
810out:
808 if (mysock) 811 if (mysock)
809 sockfd_put(mysock); 812 sockfd_put(mysock);
810 813
diff --git a/fs/exec.c b/fs/exec.c
index 222ab1c572d8..14dd03907ccb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -798,6 +798,7 @@ no_thread_group:
798static inline void flush_old_files(struct files_struct * files) 798static inline void flush_old_files(struct files_struct * files)
799{ 799{
800 long j = -1; 800 long j = -1;
801 struct fdtable *fdt;
801 802
802 spin_lock(&files->file_lock); 803 spin_lock(&files->file_lock);
803 for (;;) { 804 for (;;) {
@@ -805,12 +806,13 @@ static inline void flush_old_files(struct files_struct * files)
805 806
806 j++; 807 j++;
807 i = j * __NFDBITS; 808 i = j * __NFDBITS;
808 if (i >= files->max_fds || i >= files->max_fdset) 809 fdt = files_fdtable(files);
810 if (i >= fdt->max_fds || i >= fdt->max_fdset)
809 break; 811 break;
810 set = files->close_on_exec->fds_bits[j]; 812 set = fdt->close_on_exec->fds_bits[j];
811 if (!set) 813 if (!set)
812 continue; 814 continue;
813 files->close_on_exec->fds_bits[j] = 0; 815 fdt->close_on_exec->fds_bits[j] = 0;
814 spin_unlock(&files->file_lock); 816 spin_unlock(&files->file_lock);
815 for ( ; set ; i++,set >>= 1) { 817 for ( ; set ; i++,set >>= 1) {
816 if (set & 1) { 818 if (set & 1) {
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 161f156d98c8..c8d07030c897 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -615,6 +615,11 @@ got:
615 DQUOT_DROP(inode); 615 DQUOT_DROP(inode);
616 goto fail2; 616 goto fail2;
617 } 617 }
618 err = ext2_init_security(inode,dir);
619 if (err) {
620 DQUOT_FREE_INODE(inode);
621 goto fail2;
622 }
618 mark_inode_dirty(inode); 623 mark_inode_dirty(inode);
619 ext2_debug("allocating inode %lu\n", inode->i_ino); 624 ext2_debug("allocating inode %lu\n", inode->i_ino);
620 ext2_preread_inode(inode); 625 ext2_preread_inode(inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 53dceb0c6593..fdba4d1d3c60 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -71,6 +71,8 @@ void ext2_put_inode(struct inode *inode)
71 */ 71 */
72void ext2_delete_inode (struct inode * inode) 72void ext2_delete_inode (struct inode * inode)
73{ 73{
74 truncate_inode_pages(&inode->i_data, 0);
75
74 if (is_bad_inode(inode)) 76 if (is_bad_inode(inode))
75 goto no_delete; 77 goto no_delete;
76 EXT2_I(inode)->i_dtime = get_seconds(); 78 EXT2_I(inode)->i_dtime = get_seconds();
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 5f3bfde3b810..67cfeb66e897 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,3 +116,11 @@ exit_ext2_xattr(void)
116 116
117# endif /* CONFIG_EXT2_FS_XATTR */ 117# endif /* CONFIG_EXT2_FS_XATTR */
118 118
119#ifdef CONFIG_EXT2_FS_SECURITY
120extern int ext2_init_security(struct inode *inode, struct inode *dir);
121#else
122static inline int ext2_init_security(struct inode *inode, struct inode *dir)
123{
124 return 0;
125}
126#endif
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 6a6c59fbe599..a26612798471 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -8,6 +8,7 @@
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/smp_lock.h> 9#include <linux/smp_lock.h>
10#include <linux/ext2_fs.h> 10#include <linux/ext2_fs.h>
11#include <linux/security.h>
11#include "xattr.h" 12#include "xattr.h"
12 13
13static size_t 14static size_t
@@ -45,6 +46,27 @@ ext2_xattr_security_set(struct inode *inode, const char *name,
45 value, size, flags); 46 value, size, flags);
46} 47}
47 48
49int
50ext2_init_security(struct inode *inode, struct inode *dir)
51{
52 int err;
53 size_t len;
54 void *value;
55 char *name;
56
57 err = security_inode_init_security(inode, dir, &name, &value, &len);
58 if (err) {
59 if (err == -EOPNOTSUPP)
60 return 0;
61 return err;
62 }
63 err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY,
64 name, value, len, 0);
65 kfree(name);
66 kfree(value);
67 return err;
68}
69
48struct xattr_handler ext2_xattr_security_handler = { 70struct xattr_handler ext2_xattr_security_handler = {
49 .prefix = XATTR_SECURITY_PREFIX, 71 .prefix = XATTR_SECURITY_PREFIX,
50 .list = ext2_xattr_security_list, 72 .list = ext2_xattr_security_list,
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 6981bd014ede..96552769d039 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -607,6 +607,11 @@ got:
607 DQUOT_DROP(inode); 607 DQUOT_DROP(inode);
608 goto fail2; 608 goto fail2;
609 } 609 }
610 err = ext3_init_security(handle,inode, dir);
611 if (err) {
612 DQUOT_FREE_INODE(inode);
613 goto fail2;
614 }
610 err = ext3_mark_inode_dirty(handle, inode); 615 err = ext3_mark_inode_dirty(handle, inode);
611 if (err) { 616 if (err) {
612 ext3_std_error(sb, err); 617 ext3_std_error(sb, err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9989fdcf4d5a..b5177c90d6f1 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -187,6 +187,8 @@ void ext3_delete_inode (struct inode * inode)
187{ 187{
188 handle_t *handle; 188 handle_t *handle;
189 189
190 truncate_inode_pages(&inode->i_data, 0);
191
190 if (is_bad_inode(inode)) 192 if (is_bad_inode(inode))
191 goto no_delete; 193 goto no_delete;
192 194
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index eb31a69e82dc..2ceae38f3d49 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -133,3 +133,14 @@ exit_ext3_xattr(void)
133#define ext3_xattr_handlers NULL 133#define ext3_xattr_handlers NULL
134 134
135# endif /* CONFIG_EXT3_FS_XATTR */ 135# endif /* CONFIG_EXT3_FS_XATTR */
136
137#ifdef CONFIG_EXT3_FS_SECURITY
138extern int ext3_init_security(handle_t *handle, struct inode *inode,
139 struct inode *dir);
140#else
141static inline int ext3_init_security(handle_t *handle, struct inode *inode,
142 struct inode *dir)
143{
144 return 0;
145}
146#endif
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index ddc1c41750e1..b9c40c15647b 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -9,6 +9,7 @@
9#include <linux/smp_lock.h> 9#include <linux/smp_lock.h>
10#include <linux/ext3_jbd.h> 10#include <linux/ext3_jbd.h>
11#include <linux/ext3_fs.h> 11#include <linux/ext3_fs.h>
12#include <linux/security.h>
12#include "xattr.h" 13#include "xattr.h"
13 14
14static size_t 15static size_t
@@ -47,6 +48,27 @@ ext3_xattr_security_set(struct inode *inode, const char *name,
47 value, size, flags); 48 value, size, flags);
48} 49}
49 50
51int
52ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
53{
54 int err;
55 size_t len;
56 void *value;
57 char *name;
58
59 err = security_inode_init_security(inode, dir, &name, &value, &len);
60 if (err) {
61 if (err == -EOPNOTSUPP)
62 return 0;
63 return err;
64 }
65 err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
66 name, value, len, 0);
67 kfree(name);
68 kfree(value);
69 return err;
70}
71
50struct xattr_handler ext3_xattr_security_handler = { 72struct xattr_handler ext3_xattr_security_handler = {
51 .prefix = XATTR_SECURITY_PREFIX, 73 .prefix = XATTR_SECURITY_PREFIX,
52 .list = ext3_xattr_security_list, 74 .list = ext3_xattr_security_list,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 96ae85b67eba..a7cbe68e2259 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -335,6 +335,8 @@ EXPORT_SYMBOL(fat_build_inode);
335 335
336static void fat_delete_inode(struct inode *inode) 336static void fat_delete_inode(struct inode *inode)
337{ 337{
338 truncate_inode_pages(&inode->i_data, 0);
339
338 if (!is_bad_inode(inode)) { 340 if (!is_bad_inode(inode)) {
339 inode->i_size = 0; 341 inode->i_size = 0;
340 fat_truncate(inode); 342 fat_truncate(inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6fbc9d8fcc36..863b46e0d78a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -16,6 +16,7 @@
16#include <linux/security.h> 16#include <linux/security.h>
17#include <linux/ptrace.h> 17#include <linux/ptrace.h>
18#include <linux/signal.h> 18#include <linux/signal.h>
19#include <linux/rcupdate.h>
19 20
20#include <asm/poll.h> 21#include <asm/poll.h>
21#include <asm/siginfo.h> 22#include <asm/siginfo.h>
@@ -24,21 +25,25 @@
24void fastcall set_close_on_exec(unsigned int fd, int flag) 25void fastcall set_close_on_exec(unsigned int fd, int flag)
25{ 26{
26 struct files_struct *files = current->files; 27 struct files_struct *files = current->files;
28 struct fdtable *fdt;
27 spin_lock(&files->file_lock); 29 spin_lock(&files->file_lock);
30 fdt = files_fdtable(files);
28 if (flag) 31 if (flag)
29 FD_SET(fd, files->close_on_exec); 32 FD_SET(fd, fdt->close_on_exec);
30 else 33 else
31 FD_CLR(fd, files->close_on_exec); 34 FD_CLR(fd, fdt->close_on_exec);
32 spin_unlock(&files->file_lock); 35 spin_unlock(&files->file_lock);
33} 36}
34 37
35static inline int get_close_on_exec(unsigned int fd) 38static inline int get_close_on_exec(unsigned int fd)
36{ 39{
37 struct files_struct *files = current->files; 40 struct files_struct *files = current->files;
41 struct fdtable *fdt;
38 int res; 42 int res;
39 spin_lock(&files->file_lock); 43 rcu_read_lock();
40 res = FD_ISSET(fd, files->close_on_exec); 44 fdt = files_fdtable(files);
41 spin_unlock(&files->file_lock); 45 res = FD_ISSET(fd, fdt->close_on_exec);
46 rcu_read_unlock();
42 return res; 47 return res;
43} 48}
44 49
@@ -54,24 +59,26 @@ static int locate_fd(struct files_struct *files,
54 unsigned int newfd; 59 unsigned int newfd;
55 unsigned int start; 60 unsigned int start;
56 int error; 61 int error;
62 struct fdtable *fdt;
57 63
58 error = -EINVAL; 64 error = -EINVAL;
59 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 65 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
60 goto out; 66 goto out;
61 67
62repeat: 68repeat:
69 fdt = files_fdtable(files);
63 /* 70 /*
64 * Someone might have closed fd's in the range 71 * Someone might have closed fd's in the range
65 * orig_start..files->next_fd 72 * orig_start..fdt->next_fd
66 */ 73 */
67 start = orig_start; 74 start = orig_start;
68 if (start < files->next_fd) 75 if (start < fdt->next_fd)
69 start = files->next_fd; 76 start = fdt->next_fd;
70 77
71 newfd = start; 78 newfd = start;
72 if (start < files->max_fdset) { 79 if (start < fdt->max_fdset) {
73 newfd = find_next_zero_bit(files->open_fds->fds_bits, 80 newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
74 files->max_fdset, start); 81 fdt->max_fdset, start);
75 } 82 }
76 83
77 error = -EMFILE; 84 error = -EMFILE;
@@ -89,9 +96,15 @@ repeat:
89 if (error) 96 if (error)
90 goto repeat; 97 goto repeat;
91 98
92 if (start <= files->next_fd) 99 /*
93 files->next_fd = newfd + 1; 100 * We reacquired files_lock, so we are safe as long as
94 101 * we reacquire the fdtable pointer and use it while holding
102 * the lock, no one can free it during that time.
103 */
104 fdt = files_fdtable(files);
105 if (start <= fdt->next_fd)
106 fdt->next_fd = newfd + 1;
107
95 error = newfd; 108 error = newfd;
96 109
97out: 110out:
@@ -101,13 +114,16 @@ out:
101static int dupfd(struct file *file, unsigned int start) 114static int dupfd(struct file *file, unsigned int start)
102{ 115{
103 struct files_struct * files = current->files; 116 struct files_struct * files = current->files;
117 struct fdtable *fdt;
104 int fd; 118 int fd;
105 119
106 spin_lock(&files->file_lock); 120 spin_lock(&files->file_lock);
107 fd = locate_fd(files, file, start); 121 fd = locate_fd(files, file, start);
108 if (fd >= 0) { 122 if (fd >= 0) {
109 FD_SET(fd, files->open_fds); 123 /* locate_fd() may have expanded fdtable, load the ptr */
110 FD_CLR(fd, files->close_on_exec); 124 fdt = files_fdtable(files);
125 FD_SET(fd, fdt->open_fds);
126 FD_CLR(fd, fdt->close_on_exec);
111 spin_unlock(&files->file_lock); 127 spin_unlock(&files->file_lock);
112 fd_install(fd, file); 128 fd_install(fd, file);
113 } else { 129 } else {
@@ -123,6 +139,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
123 int err = -EBADF; 139 int err = -EBADF;
124 struct file * file, *tofree; 140 struct file * file, *tofree;
125 struct files_struct * files = current->files; 141 struct files_struct * files = current->files;
142 struct fdtable *fdt;
126 143
127 spin_lock(&files->file_lock); 144 spin_lock(&files->file_lock);
128 if (!(file = fcheck(oldfd))) 145 if (!(file = fcheck(oldfd)))
@@ -148,13 +165,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
148 165
149 /* Yes. It's a race. In user space. Nothing sane to do */ 166 /* Yes. It's a race. In user space. Nothing sane to do */
150 err = -EBUSY; 167 err = -EBUSY;
151 tofree = files->fd[newfd]; 168 fdt = files_fdtable(files);
152 if (!tofree && FD_ISSET(newfd, files->open_fds)) 169 tofree = fdt->fd[newfd];
170 if (!tofree && FD_ISSET(newfd, fdt->open_fds))
153 goto out_fput; 171 goto out_fput;
154 172
155 files->fd[newfd] = file; 173 rcu_assign_pointer(fdt->fd[newfd], file);
156 FD_SET(newfd, files->open_fds); 174 FD_SET(newfd, fdt->open_fds);
157 FD_CLR(newfd, files->close_on_exec); 175 FD_CLR(newfd, fdt->close_on_exec);
158 spin_unlock(&files->file_lock); 176 spin_unlock(&files->file_lock);
159 177
160 if (tofree) 178 if (tofree)
diff --git a/fs/file.c b/fs/file.c
index 92b5f25985d2..2127a7b9dc3a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -13,6 +13,25 @@
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/file.h> 14#include <linux/file.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/interrupt.h>
17#include <linux/spinlock.h>
18#include <linux/rcupdate.h>
19#include <linux/workqueue.h>
20
21struct fdtable_defer {
22 spinlock_t lock;
23 struct work_struct wq;
24 struct timer_list timer;
25 struct fdtable *next;
26};
27
28/*
29 * We use this list to defer free fdtables that have vmalloced
30 * sets/arrays. By keeping a per-cpu list, we avoid having to embed
31 * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
32 * this per-task structure.
33 */
34static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
16 35
17 36
18/* 37/*
@@ -48,82 +67,143 @@ void free_fd_array(struct file **array, int num)
48 vfree(array); 67 vfree(array);
49} 68}
50 69
51/* 70static void __free_fdtable(struct fdtable *fdt)
52 * Expand the fd array in the files_struct. Called with the files 71{
53 * spinlock held for write. 72 int fdset_size, fdarray_size;
54 */
55 73
56static int expand_fd_array(struct files_struct *files, int nr) 74 fdset_size = fdt->max_fdset / 8;
57 __releases(files->file_lock) 75 fdarray_size = fdt->max_fds * sizeof(struct file *);
58 __acquires(files->file_lock) 76 free_fdset(fdt->open_fds, fdset_size);
77 free_fdset(fdt->close_on_exec, fdset_size);
78 free_fd_array(fdt->fd, fdarray_size);
79 kfree(fdt);
80}
81
82static void fdtable_timer(unsigned long data)
59{ 83{
60 struct file **new_fds; 84 struct fdtable_defer *fddef = (struct fdtable_defer *)data;
61 int error, nfds;
62 85
63 86 spin_lock(&fddef->lock);
64 error = -EMFILE; 87 /*
65 if (files->max_fds >= NR_OPEN || nr >= NR_OPEN) 88 * If someone already emptied the queue return.
89 */
90 if (!fddef->next)
66 goto out; 91 goto out;
92 if (!schedule_work(&fddef->wq))
93 mod_timer(&fddef->timer, 5);
94out:
95 spin_unlock(&fddef->lock);
96}
67 97
68 nfds = files->max_fds; 98static void free_fdtable_work(struct fdtable_defer *f)
69 spin_unlock(&files->file_lock); 99{
100 struct fdtable *fdt;
70 101
71 /* 102 spin_lock_bh(&f->lock);
72 * Expand to the max in easy steps, and keep expanding it until 103 fdt = f->next;
73 * we have enough for the requested fd array size. 104 f->next = NULL;
74 */ 105 spin_unlock_bh(&f->lock);
106 while(fdt) {
107 struct fdtable *next = fdt->next;
108 __free_fdtable(fdt);
109 fdt = next;
110 }
111}
75 112
76 do { 113static void free_fdtable_rcu(struct rcu_head *rcu)
77#if NR_OPEN_DEFAULT < 256 114{
78 if (nfds < 256) 115 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
79 nfds = 256; 116 int fdset_size, fdarray_size;
80 else 117 struct fdtable_defer *fddef;
81#endif
82 if (nfds < (PAGE_SIZE / sizeof(struct file *)))
83 nfds = PAGE_SIZE / sizeof(struct file *);
84 else {
85 nfds = nfds * 2;
86 if (nfds > NR_OPEN)
87 nfds = NR_OPEN;
88 }
89 } while (nfds <= nr);
90 118
91 error = -ENOMEM; 119 BUG_ON(!fdt);
92 new_fds = alloc_fd_array(nfds); 120 fdset_size = fdt->max_fdset / 8;
93 spin_lock(&files->file_lock); 121 fdarray_size = fdt->max_fds * sizeof(struct file *);
94 if (!new_fds)
95 goto out;
96 122
97 /* Copy the existing array and install the new pointer */ 123 if (fdt->free_files) {
98 124 /*
99 if (nfds > files->max_fds) { 125 * The this fdtable was embedded in the files structure
100 struct file **old_fds; 126 * and the files structure itself was getting destroyed.
101 int i; 127 * It is now safe to free the files structure.
102 128 */
103 old_fds = xchg(&files->fd, new_fds); 129 kmem_cache_free(files_cachep, fdt->free_files);
104 i = xchg(&files->max_fds, nfds); 130 return;
105 131 }
106 /* Don't copy/clear the array if we are creating a new 132 if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
107 fd array for fork() */ 133 /*
108 if (i) { 134 * The fdtable was embedded
109 memcpy(new_fds, old_fds, i * sizeof(struct file *)); 135 */
110 /* clear the remainder of the array */ 136 return;
111 memset(&new_fds[i], 0, 137 }
112 (nfds-i) * sizeof(struct file *)); 138 if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
113 139 kfree(fdt->open_fds);
114 spin_unlock(&files->file_lock); 140 kfree(fdt->close_on_exec);
115 free_fd_array(old_fds, i); 141 kfree(fdt->fd);
116 spin_lock(&files->file_lock); 142 kfree(fdt);
117 }
118 } else { 143 } else {
119 /* Somebody expanded the array while we slept ... */ 144 fddef = &get_cpu_var(fdtable_defer_list);
120 spin_unlock(&files->file_lock); 145 spin_lock(&fddef->lock);
121 free_fd_array(new_fds, nfds); 146 fdt->next = fddef->next;
122 spin_lock(&files->file_lock); 147 fddef->next = fdt;
148 /*
149 * vmallocs are handled from the workqueue context.
150 * If the per-cpu workqueue is running, then we
151 * defer work scheduling through a timer.
152 */
153 if (!schedule_work(&fddef->wq))
154 mod_timer(&fddef->timer, 5);
155 spin_unlock(&fddef->lock);
156 put_cpu_var(fdtable_defer_list);
123 } 157 }
124 error = 0; 158}
125out: 159
126 return error; 160void free_fdtable(struct fdtable *fdt)
161{
162 if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
163 fdt->max_fds > NR_OPEN_DEFAULT)
164 call_rcu(&fdt->rcu, free_fdtable_rcu);
165}
166
167/*
168 * Expand the fdset in the files_struct. Called with the files spinlock
169 * held for write.
170 */
171static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
172{
173 int i;
174 int count;
175
176 BUG_ON(nfdt->max_fdset < fdt->max_fdset);
177 BUG_ON(nfdt->max_fds < fdt->max_fds);
178 /* Copy the existing tables and install the new pointers */
179
180 i = fdt->max_fdset / (sizeof(unsigned long) * 8);
181 count = (nfdt->max_fdset - fdt->max_fdset) / 8;
182
183 /*
184 * Don't copy the entire array if the current fdset is
185 * not yet initialised.
186 */
187 if (i) {
188 memcpy (nfdt->open_fds, fdt->open_fds,
189 fdt->max_fdset/8);
190 memcpy (nfdt->close_on_exec, fdt->close_on_exec,
191 fdt->max_fdset/8);
192 memset (&nfdt->open_fds->fds_bits[i], 0, count);
193 memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
194 }
195
196 /* Don't copy/clear the array if we are creating a new
197 fd array for fork() */
198 if (fdt->max_fds) {
199 memcpy(nfdt->fd, fdt->fd,
200 fdt->max_fds * sizeof(struct file *));
201 /* clear the remainder of the array */
202 memset(&nfdt->fd[fdt->max_fds], 0,
203 (nfdt->max_fds - fdt->max_fds) *
204 sizeof(struct file *));
205 }
206 nfdt->next_fd = fdt->next_fd;
127} 207}
128 208
129/* 209/*
@@ -154,26 +234,21 @@ void free_fdset(fd_set *array, int num)
154 vfree(array); 234 vfree(array);
155} 235}
156 236
157/* 237static struct fdtable *alloc_fdtable(int nr)
158 * Expand the fdset in the files_struct. Called with the files spinlock
159 * held for write.
160 */
161static int expand_fdset(struct files_struct *files, int nr)
162 __releases(file->file_lock)
163 __acquires(file->file_lock)
164{ 238{
165 fd_set *new_openset = NULL, *new_execset = NULL; 239 struct fdtable *fdt = NULL;
166 int error, nfds = 0; 240 int nfds = 0;
167 241 fd_set *new_openset = NULL, *new_execset = NULL;
168 error = -EMFILE; 242 struct file **new_fds;
169 if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)
170 goto out;
171 243
172 nfds = files->max_fdset; 244 fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
173 spin_unlock(&files->file_lock); 245 if (!fdt)
246 goto out;
247 memset(fdt, 0, sizeof(*fdt));
174 248
175 /* Expand to the max in easy steps */ 249 nfds = __FD_SETSIZE;
176 do { 250 /* Expand to the max in easy steps */
251 do {
177 if (nfds < (PAGE_SIZE * 8)) 252 if (nfds < (PAGE_SIZE * 8))
178 nfds = PAGE_SIZE * 8; 253 nfds = PAGE_SIZE * 8;
179 else { 254 else {
@@ -183,49 +258,88 @@ static int expand_fdset(struct files_struct *files, int nr)
183 } 258 }
184 } while (nfds <= nr); 259 } while (nfds <= nr);
185 260
186 error = -ENOMEM; 261 new_openset = alloc_fdset(nfds);
187 new_openset = alloc_fdset(nfds); 262 new_execset = alloc_fdset(nfds);
188 new_execset = alloc_fdset(nfds); 263 if (!new_openset || !new_execset)
189 spin_lock(&files->file_lock); 264 goto out;
190 if (!new_openset || !new_execset) 265 fdt->open_fds = new_openset;
266 fdt->close_on_exec = new_execset;
267 fdt->max_fdset = nfds;
268
269 nfds = NR_OPEN_DEFAULT;
270 /*
271 * Expand to the max in easy steps, and keep expanding it until
272 * we have enough for the requested fd array size.
273 */
274 do {
275#if NR_OPEN_DEFAULT < 256
276 if (nfds < 256)
277 nfds = 256;
278 else
279#endif
280 if (nfds < (PAGE_SIZE / sizeof(struct file *)))
281 nfds = PAGE_SIZE / sizeof(struct file *);
282 else {
283 nfds = nfds * 2;
284 if (nfds > NR_OPEN)
285 nfds = NR_OPEN;
286 }
287 } while (nfds <= nr);
288 new_fds = alloc_fd_array(nfds);
289 if (!new_fds)
290 goto out;
291 fdt->fd = new_fds;
292 fdt->max_fds = nfds;
293 fdt->free_files = NULL;
294 return fdt;
295out:
296 if (new_openset)
297 free_fdset(new_openset, nfds);
298 if (new_execset)
299 free_fdset(new_execset, nfds);
300 kfree(fdt);
301 return NULL;
302}
303
304/*
305 * Expands the file descriptor table - it will allocate a new fdtable and
306 * both fd array and fdset. It is expected to be called with the
307 * files_lock held.
308 */
309static int expand_fdtable(struct files_struct *files, int nr)
310 __releases(files->file_lock)
311 __acquires(files->file_lock)
312{
313 int error = 0;
314 struct fdtable *fdt;
315 struct fdtable *nfdt = NULL;
316
317 spin_unlock(&files->file_lock);
318 nfdt = alloc_fdtable(nr);
319 if (!nfdt) {
320 error = -ENOMEM;
321 spin_lock(&files->file_lock);
191 goto out; 322 goto out;
323 }
192 324
193 error = 0; 325 spin_lock(&files->file_lock);
194 326 fdt = files_fdtable(files);
195 /* Copy the existing tables and install the new pointers */ 327 /*
196 if (nfds > files->max_fdset) { 328 * Check again since another task may have expanded the
197 int i = files->max_fdset / (sizeof(unsigned long) * 8); 329 * fd table while we dropped the lock
198 int count = (nfds - files->max_fdset) / 8; 330 */
199 331 if (nr >= fdt->max_fds || nr >= fdt->max_fdset) {
200 /* 332 copy_fdtable(nfdt, fdt);
201 * Don't copy the entire array if the current fdset is 333 } else {
202 * not yet initialised. 334 /* Somebody expanded while we dropped file_lock */
203 */
204 if (i) {
205 memcpy (new_openset, files->open_fds, files->max_fdset/8);
206 memcpy (new_execset, files->close_on_exec, files->max_fdset/8);
207 memset (&new_openset->fds_bits[i], 0, count);
208 memset (&new_execset->fds_bits[i], 0, count);
209 }
210
211 nfds = xchg(&files->max_fdset, nfds);
212 new_openset = xchg(&files->open_fds, new_openset);
213 new_execset = xchg(&files->close_on_exec, new_execset);
214 spin_unlock(&files->file_lock); 335 spin_unlock(&files->file_lock);
215 free_fdset (new_openset, nfds); 336 __free_fdtable(nfdt);
216 free_fdset (new_execset, nfds);
217 spin_lock(&files->file_lock); 337 spin_lock(&files->file_lock);
218 return 0; 338 goto out;
219 } 339 }
220 /* Somebody expanded the array while we slept ... */ 340 rcu_assign_pointer(files->fdt, nfdt);
221 341 free_fdtable(fdt);
222out: 342out:
223 spin_unlock(&files->file_lock);
224 if (new_openset)
225 free_fdset(new_openset, nfds);
226 if (new_execset)
227 free_fdset(new_execset, nfds);
228 spin_lock(&files->file_lock);
229 return error; 343 return error;
230} 344}
231 345
@@ -237,18 +351,39 @@ out:
237int expand_files(struct files_struct *files, int nr) 351int expand_files(struct files_struct *files, int nr)
238{ 352{
239 int err, expand = 0; 353 int err, expand = 0;
354 struct fdtable *fdt;
240 355
241 if (nr >= files->max_fdset) { 356 fdt = files_fdtable(files);
242 expand = 1; 357 if (nr >= fdt->max_fdset || nr >= fdt->max_fds) {
243 if ((err = expand_fdset(files, nr))) 358 if (fdt->max_fdset >= NR_OPEN ||
359 fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) {
360 err = -EMFILE;
244 goto out; 361 goto out;
245 } 362 }
246 if (nr >= files->max_fds) {
247 expand = 1; 363 expand = 1;
248 if ((err = expand_fd_array(files, nr))) 364 if ((err = expand_fdtable(files, nr)))
249 goto out; 365 goto out;
250 } 366 }
251 err = expand; 367 err = expand;
252out: 368out:
253 return err; 369 return err;
254} 370}
371
372static void __devinit fdtable_defer_list_init(int cpu)
373{
374 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
375 spin_lock_init(&fddef->lock);
376 INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
377 init_timer(&fddef->timer);
378 fddef->timer.data = (unsigned long)fddef;
379 fddef->timer.function = fdtable_timer;
380 fddef->next = NULL;
381}
382
383void __init files_defer_init(void)
384{
385 int i;
386 /* Really early - can't use for_each_cpu */
387 for (i = 0; i < NR_CPUS; i++)
388 fdtable_defer_list_init(i);
389}
diff --git a/fs/file_table.c b/fs/file_table.c
index 43e9e1737de2..86ec8ae985b4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -14,6 +14,7 @@
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/eventpoll.h> 16#include <linux/eventpoll.h>
17#include <linux/rcupdate.h>
17#include <linux/mount.h> 18#include <linux/mount.h>
18#include <linux/cdev.h> 19#include <linux/cdev.h>
19#include <linux/fsnotify.h> 20#include <linux/fsnotify.h>
@@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags)
53 spin_unlock_irqrestore(&filp_count_lock, flags); 54 spin_unlock_irqrestore(&filp_count_lock, flags);
54} 55}
55 56
56static inline void file_free(struct file *f) 57static inline void file_free_rcu(struct rcu_head *head)
57{ 58{
59 struct file *f = container_of(head, struct file, f_rcuhead);
58 kmem_cache_free(filp_cachep, f); 60 kmem_cache_free(filp_cachep, f);
59} 61}
60 62
63static inline void file_free(struct file *f)
64{
65 call_rcu(&f->f_rcuhead, file_free_rcu);
66}
67
61/* Find an unused file structure and return a pointer to it. 68/* Find an unused file structure and return a pointer to it.
62 * Returns NULL, if there are no more free file structures or 69 * Returns NULL, if there are no more free file structures or
63 * we run out of memory. 70 * we run out of memory.
@@ -110,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp);
110 117
111void fastcall fput(struct file *file) 118void fastcall fput(struct file *file)
112{ 119{
113 if (atomic_dec_and_test(&file->f_count)) 120 if (rcuref_dec_and_test(&file->f_count))
114 __fput(file); 121 __fput(file);
115} 122}
116 123
@@ -156,11 +163,17 @@ struct file fastcall *fget(unsigned int fd)
156 struct file *file; 163 struct file *file;
157 struct files_struct *files = current->files; 164 struct files_struct *files = current->files;
158 165
159 spin_lock(&files->file_lock); 166 rcu_read_lock();
160 file = fcheck_files(files, fd); 167 file = fcheck_files(files, fd);
161 if (file) 168 if (file) {
162 get_file(file); 169 if (!rcuref_inc_lf(&file->f_count)) {
163 spin_unlock(&files->file_lock); 170 /* File object ref couldn't be taken */
171 rcu_read_unlock();
172 return NULL;
173 }
174 }
175 rcu_read_unlock();
176
164 return file; 177 return file;
165} 178}
166 179
@@ -182,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
182 if (likely((atomic_read(&files->count) == 1))) { 195 if (likely((atomic_read(&files->count) == 1))) {
183 file = fcheck_files(files, fd); 196 file = fcheck_files(files, fd);
184 } else { 197 } else {
185 spin_lock(&files->file_lock); 198 rcu_read_lock();
186 file = fcheck_files(files, fd); 199 file = fcheck_files(files, fd);
187 if (file) { 200 if (file) {
188 get_file(file); 201 if (rcuref_inc_lf(&file->f_count))
189 *fput_needed = 1; 202 *fput_needed = 1;
203 else
204 /* Didn't get the reference, someone's freed */
205 file = NULL;
190 } 206 }
191 spin_unlock(&files->file_lock); 207 rcu_read_unlock();
192 } 208 }
209
193 return file; 210 return file;
194} 211}
195 212
196 213
197void put_filp(struct file *file) 214void put_filp(struct file *file)
198{ 215{
199 if (atomic_dec_and_test(&file->f_count)) { 216 if (rcuref_dec_and_test(&file->f_count)) {
200 security_file_free(file); 217 security_file_free(file);
201 file_kill(file); 218 file_kill(file);
202 file_free(file); 219 file_free(file);
@@ -257,4 +274,5 @@ void __init files_init(unsigned long mempages)
257 files_stat.max_files = n; 274 files_stat.max_files = n;
258 if (files_stat.max_files < NR_FILE) 275 if (files_stat.max_files < NR_FILE)
259 files_stat.max_files = NR_FILE; 276 files_stat.max_files = NR_FILE;
277 files_defer_init();
260} 278}
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
new file mode 100644
index 000000000000..c3e1f760cac9
--- /dev/null
+++ b/fs/fuse/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the FUSE filesystem.
3#
4
5obj-$(CONFIG_FUSE_FS) += fuse.o
6
7fuse-objs := dev.o dir.o file.o inode.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
new file mode 100644
index 000000000000..d4c869c6d01b
--- /dev/null
+++ b/fs/fuse/dev.c
@@ -0,0 +1,877 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/poll.h>
14#include <linux/uio.h>
15#include <linux/miscdevice.h>
16#include <linux/pagemap.h>
17#include <linux/file.h>
18#include <linux/slab.h>
19
20MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21
22static kmem_cache_t *fuse_req_cachep;
23
24static inline struct fuse_conn *fuse_get_conn(struct file *file)
25{
26 struct fuse_conn *fc;
27 spin_lock(&fuse_lock);
28 fc = file->private_data;
29 if (fc && !fc->mounted)
30 fc = NULL;
31 spin_unlock(&fuse_lock);
32 return fc;
33}
34
35static inline void fuse_request_init(struct fuse_req *req)
36{
37 memset(req, 0, sizeof(*req));
38 INIT_LIST_HEAD(&req->list);
39 init_waitqueue_head(&req->waitq);
40 atomic_set(&req->count, 1);
41}
42
43struct fuse_req *fuse_request_alloc(void)
44{
45 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
46 if (req)
47 fuse_request_init(req);
48 return req;
49}
50
51void fuse_request_free(struct fuse_req *req)
52{
53 kmem_cache_free(fuse_req_cachep, req);
54}
55
56static inline void block_sigs(sigset_t *oldset)
57{
58 sigset_t mask;
59
60 siginitsetinv(&mask, sigmask(SIGKILL));
61 sigprocmask(SIG_BLOCK, &mask, oldset);
62}
63
64static inline void restore_sigs(sigset_t *oldset)
65{
66 sigprocmask(SIG_SETMASK, oldset, NULL);
67}
68
69void fuse_reset_request(struct fuse_req *req)
70{
71 int preallocated = req->preallocated;
72 BUG_ON(atomic_read(&req->count) != 1);
73 fuse_request_init(req);
74 req->preallocated = preallocated;
75}
76
77static void __fuse_get_request(struct fuse_req *req)
78{
79 atomic_inc(&req->count);
80}
81
82/* Must be called with > 1 refcount */
83static void __fuse_put_request(struct fuse_req *req)
84{
85 BUG_ON(atomic_read(&req->count) < 2);
86 atomic_dec(&req->count);
87}
88
89static struct fuse_req *do_get_request(struct fuse_conn *fc)
90{
91 struct fuse_req *req;
92
93 spin_lock(&fuse_lock);
94 BUG_ON(list_empty(&fc->unused_list));
95 req = list_entry(fc->unused_list.next, struct fuse_req, list);
96 list_del_init(&req->list);
97 spin_unlock(&fuse_lock);
98 fuse_request_init(req);
99 req->preallocated = 1;
100 req->in.h.uid = current->fsuid;
101 req->in.h.gid = current->fsgid;
102 req->in.h.pid = current->pid;
103 return req;
104}
105
106/* This can return NULL, but only in case it's interrupted by a SIGKILL */
107struct fuse_req *fuse_get_request(struct fuse_conn *fc)
108{
109 int intr;
110 sigset_t oldset;
111
112 block_sigs(&oldset);
113 intr = down_interruptible(&fc->outstanding_sem);
114 restore_sigs(&oldset);
115 return intr ? NULL : do_get_request(fc);
116}
117
118static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
119{
120 spin_lock(&fuse_lock);
121 if (req->preallocated)
122 list_add(&req->list, &fc->unused_list);
123 else
124 fuse_request_free(req);
125
126 /* If we are in debt decrease that first */
127 if (fc->outstanding_debt)
128 fc->outstanding_debt--;
129 else
130 up(&fc->outstanding_sem);
131 spin_unlock(&fuse_lock);
132}
133
134void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
135{
136 if (atomic_dec_and_test(&req->count))
137 fuse_putback_request(fc, req);
138}
139
140void fuse_release_background(struct fuse_req *req)
141{
142 iput(req->inode);
143 iput(req->inode2);
144 if (req->file)
145 fput(req->file);
146 spin_lock(&fuse_lock);
147 list_del(&req->bg_entry);
148 spin_unlock(&fuse_lock);
149}
150
151/*
152 * This function is called when a request is finished. Either a reply
153 * has arrived or it was interrupted (and not yet sent) or some error
154 * occured during communication with userspace, or the device file was
155 * closed. It decreases the referece count for the request. In case
156 * of a background request the referece to the stored objects are
157 * released. The requester thread is woken up (if still waiting), and
158 * finally the request is either freed or put on the unused_list
159 *
160 * Called with fuse_lock, unlocks it
161 */
162static void request_end(struct fuse_conn *fc, struct fuse_req *req)
163{
164 int putback;
165 req->finished = 1;
166 putback = atomic_dec_and_test(&req->count);
167 spin_unlock(&fuse_lock);
168 if (req->background) {
169 down_read(&fc->sbput_sem);
170 if (fc->mounted)
171 fuse_release_background(req);
172 up_read(&fc->sbput_sem);
173 }
174 wake_up(&req->waitq);
175 if (req->in.h.opcode == FUSE_INIT) {
176 int i;
177
178 if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
179 fc->conn_error = 1;
180
181 /* After INIT reply is received other requests can go
182 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
183 up()s on outstanding_sem. The last up() is done in
184 fuse_putback_request() */
185 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
186 up(&fc->outstanding_sem);
187 }
188 if (putback)
189 fuse_putback_request(fc, req);
190}
191
192/*
193 * Unfortunately request interruption not just solves the deadlock
194 * problem, it causes problems too. These stem from the fact, that an
195 * interrupted request is continued to be processed in userspace,
196 * while all the locks and object references (inode and file) held
197 * during the operation are released.
198 *
199 * To release the locks is exactly why there's a need to interrupt the
200 * request, so there's not a lot that can be done about this, except
201 * introduce additional locking in userspace.
202 *
203 * More important is to keep inode and file references until userspace
204 * has replied, otherwise FORGET and RELEASE could be sent while the
205 * inode/file is still used by the filesystem.
206 *
207 * For this reason the concept of "background" request is introduced.
208 * An interrupted request is backgrounded if it has been already sent
209 * to userspace. Backgrounding involves getting an extra reference to
210 * inode(s) or file used in the request, and adding the request to
211 * fc->background list. When a reply is received for a background
212 * request, the object references are released, and the request is
213 * removed from the list. If the filesystem is unmounted while there
214 * are still background requests, the list is walked and references
215 * are released as if a reply was received.
216 *
217 * There's one more use for a background request. The RELEASE message is
218 * always sent as background, since it doesn't return an error or
219 * data.
220 */
221static void background_request(struct fuse_conn *fc, struct fuse_req *req)
222{
223 req->background = 1;
224 list_add(&req->bg_entry, &fc->background);
225 if (req->inode)
226 req->inode = igrab(req->inode);
227 if (req->inode2)
228 req->inode2 = igrab(req->inode2);
229 if (req->file)
230 get_file(req->file);
231}
232
233/* Called with fuse_lock held. Releases, and then reacquires it. */
234static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
235{
236 sigset_t oldset;
237
238 spin_unlock(&fuse_lock);
239 block_sigs(&oldset);
240 wait_event_interruptible(req->waitq, req->finished);
241 restore_sigs(&oldset);
242 spin_lock(&fuse_lock);
243 if (req->finished)
244 return;
245
246 req->out.h.error = -EINTR;
247 req->interrupted = 1;
248 if (req->locked) {
249 /* This is uninterruptible sleep, because data is
250 being copied to/from the buffers of req. During
251 locked state, there mustn't be any filesystem
252 operation (e.g. page fault), since that could lead
253 to deadlock */
254 spin_unlock(&fuse_lock);
255 wait_event(req->waitq, !req->locked);
256 spin_lock(&fuse_lock);
257 }
258 if (!req->sent && !list_empty(&req->list)) {
259 list_del(&req->list);
260 __fuse_put_request(req);
261 } else if (!req->finished && req->sent)
262 background_request(fc, req);
263}
264
265static unsigned len_args(unsigned numargs, struct fuse_arg *args)
266{
267 unsigned nbytes = 0;
268 unsigned i;
269
270 for (i = 0; i < numargs; i++)
271 nbytes += args[i].size;
272
273 return nbytes;
274}
275
276static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
277{
278 fc->reqctr++;
279 /* zero is special */
280 if (fc->reqctr == 0)
281 fc->reqctr = 1;
282 req->in.h.unique = fc->reqctr;
283 req->in.h.len = sizeof(struct fuse_in_header) +
284 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
285 if (!req->preallocated) {
286 /* If request is not preallocated (either FORGET or
287 RELEASE), then still decrease outstanding_sem, so
288 user can't open infinite number of files while not
289 processing the RELEASE requests. However for
290 efficiency do it without blocking, so if down()
291 would block, just increase the debt instead */
292 if (down_trylock(&fc->outstanding_sem))
293 fc->outstanding_debt++;
294 }
295 list_add_tail(&req->list, &fc->pending);
296 wake_up(&fc->waitq);
297}
298
299/*
300 * This can only be interrupted by a SIGKILL
301 */
302void request_send(struct fuse_conn *fc, struct fuse_req *req)
303{
304 req->isreply = 1;
305 spin_lock(&fuse_lock);
306 if (!fc->connected)
307 req->out.h.error = -ENOTCONN;
308 else if (fc->conn_error)
309 req->out.h.error = -ECONNREFUSED;
310 else {
311 queue_request(fc, req);
312 /* acquire extra reference, since request is still needed
313 after request_end() */
314 __fuse_get_request(req);
315
316 request_wait_answer(fc, req);
317 }
318 spin_unlock(&fuse_lock);
319}
320
321static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
322{
323 spin_lock(&fuse_lock);
324 if (fc->connected) {
325 queue_request(fc, req);
326 spin_unlock(&fuse_lock);
327 } else {
328 req->out.h.error = -ENOTCONN;
329 request_end(fc, req);
330 }
331}
332
333void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
334{
335 req->isreply = 0;
336 request_send_nowait(fc, req);
337}
338
339void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
340{
341 req->isreply = 1;
342 spin_lock(&fuse_lock);
343 background_request(fc, req);
344 spin_unlock(&fuse_lock);
345 request_send_nowait(fc, req);
346}
347
348void fuse_send_init(struct fuse_conn *fc)
349{
350 /* This is called from fuse_read_super() so there's guaranteed
351 to be a request available */
352 struct fuse_req *req = do_get_request(fc);
353 struct fuse_init_in_out *arg = &req->misc.init_in_out;
354 arg->major = FUSE_KERNEL_VERSION;
355 arg->minor = FUSE_KERNEL_MINOR_VERSION;
356 req->in.h.opcode = FUSE_INIT;
357 req->in.numargs = 1;
358 req->in.args[0].size = sizeof(*arg);
359 req->in.args[0].value = arg;
360 req->out.numargs = 1;
361 req->out.args[0].size = sizeof(*arg);
362 req->out.args[0].value = arg;
363 request_send_background(fc, req);
364}
365
366/*
367 * Lock the request. Up to the next unlock_request() there mustn't be
368 * anything that could cause a page-fault. If the request was already
369 * interrupted bail out.
370 */
371static inline int lock_request(struct fuse_req *req)
372{
373 int err = 0;
374 if (req) {
375 spin_lock(&fuse_lock);
376 if (req->interrupted)
377 err = -ENOENT;
378 else
379 req->locked = 1;
380 spin_unlock(&fuse_lock);
381 }
382 return err;
383}
384
385/*
386 * Unlock request. If it was interrupted during being locked, the
387 * requester thread is currently waiting for it to be unlocked, so
388 * wake it up.
389 */
390static inline void unlock_request(struct fuse_req *req)
391{
392 if (req) {
393 spin_lock(&fuse_lock);
394 req->locked = 0;
395 if (req->interrupted)
396 wake_up(&req->waitq);
397 spin_unlock(&fuse_lock);
398 }
399}
400
401struct fuse_copy_state {
402 int write;
403 struct fuse_req *req;
404 const struct iovec *iov;
405 unsigned long nr_segs;
406 unsigned long seglen;
407 unsigned long addr;
408 struct page *pg;
409 void *mapaddr;
410 void *buf;
411 unsigned len;
412};
413
414static void fuse_copy_init(struct fuse_copy_state *cs, int write,
415 struct fuse_req *req, const struct iovec *iov,
416 unsigned long nr_segs)
417{
418 memset(cs, 0, sizeof(*cs));
419 cs->write = write;
420 cs->req = req;
421 cs->iov = iov;
422 cs->nr_segs = nr_segs;
423}
424
425/* Unmap and put previous page of userspace buffer */
426static inline void fuse_copy_finish(struct fuse_copy_state *cs)
427{
428 if (cs->mapaddr) {
429 kunmap_atomic(cs->mapaddr, KM_USER0);
430 if (cs->write) {
431 flush_dcache_page(cs->pg);
432 set_page_dirty_lock(cs->pg);
433 }
434 put_page(cs->pg);
435 cs->mapaddr = NULL;
436 }
437}
438
439/*
440 * Get another pagefull of userspace buffer, and map it to kernel
441 * address space, and lock request
442 */
443static int fuse_copy_fill(struct fuse_copy_state *cs)
444{
445 unsigned long offset;
446 int err;
447
448 unlock_request(cs->req);
449 fuse_copy_finish(cs);
450 if (!cs->seglen) {
451 BUG_ON(!cs->nr_segs);
452 cs->seglen = cs->iov[0].iov_len;
453 cs->addr = (unsigned long) cs->iov[0].iov_base;
454 cs->iov ++;
455 cs->nr_segs --;
456 }
457 down_read(&current->mm->mmap_sem);
458 err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
459 &cs->pg, NULL);
460 up_read(&current->mm->mmap_sem);
461 if (err < 0)
462 return err;
463 BUG_ON(err != 1);
464 offset = cs->addr % PAGE_SIZE;
465 cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
466 cs->buf = cs->mapaddr + offset;
467 cs->len = min(PAGE_SIZE - offset, cs->seglen);
468 cs->seglen -= cs->len;
469 cs->addr += cs->len;
470
471 return lock_request(cs->req);
472}
473
474/* Do as much copy to/from userspace buffer as we can */
475static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
476 unsigned *size)
477{
478 unsigned ncpy = min(*size, cs->len);
479 if (val) {
480 if (cs->write)
481 memcpy(cs->buf, *val, ncpy);
482 else
483 memcpy(*val, cs->buf, ncpy);
484 *val += ncpy;
485 }
486 *size -= ncpy;
487 cs->len -= ncpy;
488 cs->buf += ncpy;
489 return ncpy;
490}
491
492/*
493 * Copy a page in the request to/from the userspace buffer. Must be
494 * done atomically
495 */
496static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
497 unsigned offset, unsigned count, int zeroing)
498{
499 if (page && zeroing && count < PAGE_SIZE) {
500 void *mapaddr = kmap_atomic(page, KM_USER1);
501 memset(mapaddr, 0, PAGE_SIZE);
502 kunmap_atomic(mapaddr, KM_USER1);
503 }
504 while (count) {
505 int err;
506 if (!cs->len && (err = fuse_copy_fill(cs)))
507 return err;
508 if (page) {
509 void *mapaddr = kmap_atomic(page, KM_USER1);
510 void *buf = mapaddr + offset;
511 offset += fuse_copy_do(cs, &buf, &count);
512 kunmap_atomic(mapaddr, KM_USER1);
513 } else
514 offset += fuse_copy_do(cs, NULL, &count);
515 }
516 if (page && !cs->write)
517 flush_dcache_page(page);
518 return 0;
519}
520
521/* Copy pages in the request to/from userspace buffer */
522static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
523 int zeroing)
524{
525 unsigned i;
526 struct fuse_req *req = cs->req;
527 unsigned offset = req->page_offset;
528 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
529
530 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
531 struct page *page = req->pages[i];
532 int err = fuse_copy_page(cs, page, offset, count, zeroing);
533 if (err)
534 return err;
535
536 nbytes -= count;
537 count = min(nbytes, (unsigned) PAGE_SIZE);
538 offset = 0;
539 }
540 return 0;
541}
542
543/* Copy a single argument in the request to/from userspace buffer */
544static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
545{
546 while (size) {
547 int err;
548 if (!cs->len && (err = fuse_copy_fill(cs)))
549 return err;
550 fuse_copy_do(cs, &val, &size);
551 }
552 return 0;
553}
554
555/* Copy request arguments to/from userspace buffer */
556static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
557 unsigned argpages, struct fuse_arg *args,
558 int zeroing)
559{
560 int err = 0;
561 unsigned i;
562
563 for (i = 0; !err && i < numargs; i++) {
564 struct fuse_arg *arg = &args[i];
565 if (i == numargs - 1 && argpages)
566 err = fuse_copy_pages(cs, arg->size, zeroing);
567 else
568 err = fuse_copy_one(cs, arg->value, arg->size);
569 }
570 return err;
571}
572
573/* Wait until a request is available on the pending list */
574static void request_wait(struct fuse_conn *fc)
575{
576 DECLARE_WAITQUEUE(wait, current);
577
578 add_wait_queue_exclusive(&fc->waitq, &wait);
579 while (fc->mounted && list_empty(&fc->pending)) {
580 set_current_state(TASK_INTERRUPTIBLE);
581 if (signal_pending(current))
582 break;
583
584 spin_unlock(&fuse_lock);
585 schedule();
586 spin_lock(&fuse_lock);
587 }
588 set_current_state(TASK_RUNNING);
589 remove_wait_queue(&fc->waitq, &wait);
590}
591
592/*
593 * Read a single request into the userspace filesystem's buffer. This
594 * function waits until a request is available, then removes it from
595 * the pending list and copies request data to userspace buffer. If
596 * no reply is needed (FORGET) or request has been interrupted or
597 * there was an error during the copying then it's finished by calling
598 * request_end(). Otherwise add it to the processing list, and set
599 * the 'sent' flag.
600 */
601static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
602 unsigned long nr_segs, loff_t *off)
603{
604 int err;
605 struct fuse_conn *fc;
606 struct fuse_req *req;
607 struct fuse_in *in;
608 struct fuse_copy_state cs;
609 unsigned reqsize;
610
611 spin_lock(&fuse_lock);
612 fc = file->private_data;
613 err = -EPERM;
614 if (!fc)
615 goto err_unlock;
616 request_wait(fc);
617 err = -ENODEV;
618 if (!fc->mounted)
619 goto err_unlock;
620 err = -ERESTARTSYS;
621 if (list_empty(&fc->pending))
622 goto err_unlock;
623
624 req = list_entry(fc->pending.next, struct fuse_req, list);
625 list_del_init(&req->list);
626 spin_unlock(&fuse_lock);
627
628 in = &req->in;
629 reqsize = req->in.h.len;
630 fuse_copy_init(&cs, 1, req, iov, nr_segs);
631 err = -EINVAL;
632 if (iov_length(iov, nr_segs) >= reqsize) {
633 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
634 if (!err)
635 err = fuse_copy_args(&cs, in->numargs, in->argpages,
636 (struct fuse_arg *) in->args, 0);
637 }
638 fuse_copy_finish(&cs);
639
640 spin_lock(&fuse_lock);
641 req->locked = 0;
642 if (!err && req->interrupted)
643 err = -ENOENT;
644 if (err) {
645 if (!req->interrupted)
646 req->out.h.error = -EIO;
647 request_end(fc, req);
648 return err;
649 }
650 if (!req->isreply)
651 request_end(fc, req);
652 else {
653 req->sent = 1;
654 list_add_tail(&req->list, &fc->processing);
655 spin_unlock(&fuse_lock);
656 }
657 return reqsize;
658
659 err_unlock:
660 spin_unlock(&fuse_lock);
661 return err;
662}
663
664static ssize_t fuse_dev_read(struct file *file, char __user *buf,
665 size_t nbytes, loff_t *off)
666{
667 struct iovec iov;
668 iov.iov_len = nbytes;
669 iov.iov_base = buf;
670 return fuse_dev_readv(file, &iov, 1, off);
671}
672
673/* Look up request on processing list by unique ID */
674static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
675{
676 struct list_head *entry;
677
678 list_for_each(entry, &fc->processing) {
679 struct fuse_req *req;
680 req = list_entry(entry, struct fuse_req, list);
681 if (req->in.h.unique == unique)
682 return req;
683 }
684 return NULL;
685}
686
687static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
688 unsigned nbytes)
689{
690 unsigned reqsize = sizeof(struct fuse_out_header);
691
692 if (out->h.error)
693 return nbytes != reqsize ? -EINVAL : 0;
694
695 reqsize += len_args(out->numargs, out->args);
696
697 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
698 return -EINVAL;
699 else if (reqsize > nbytes) {
700 struct fuse_arg *lastarg = &out->args[out->numargs-1];
701 unsigned diffsize = reqsize - nbytes;
702 if (diffsize > lastarg->size)
703 return -EINVAL;
704 lastarg->size -= diffsize;
705 }
706 return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
707 out->page_zeroing);
708}
709
710/*
711 * Write a single reply to a request. First the header is copied from
712 * the write buffer. The request is then searched on the processing
713 * list by the unique ID found in the header. If found, then remove
714 * it from the list and copy the rest of the buffer to the request.
715 * The request is finished by calling request_end()
716 */
717static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
718 unsigned long nr_segs, loff_t *off)
719{
720 int err;
721 unsigned nbytes = iov_length(iov, nr_segs);
722 struct fuse_req *req;
723 struct fuse_out_header oh;
724 struct fuse_copy_state cs;
725 struct fuse_conn *fc = fuse_get_conn(file);
726 if (!fc)
727 return -ENODEV;
728
729 fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
730 if (nbytes < sizeof(struct fuse_out_header))
731 return -EINVAL;
732
733 err = fuse_copy_one(&cs, &oh, sizeof(oh));
734 if (err)
735 goto err_finish;
736 err = -EINVAL;
737 if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
738 oh.len != nbytes)
739 goto err_finish;
740
741 spin_lock(&fuse_lock);
742 req = request_find(fc, oh.unique);
743 err = -EINVAL;
744 if (!req)
745 goto err_unlock;
746
747 list_del_init(&req->list);
748 if (req->interrupted) {
749 request_end(fc, req);
750 fuse_copy_finish(&cs);
751 return -ENOENT;
752 }
753 req->out.h = oh;
754 req->locked = 1;
755 cs.req = req;
756 spin_unlock(&fuse_lock);
757
758 err = copy_out_args(&cs, &req->out, nbytes);
759 fuse_copy_finish(&cs);
760
761 spin_lock(&fuse_lock);
762 req->locked = 0;
763 if (!err) {
764 if (req->interrupted)
765 err = -ENOENT;
766 } else if (!req->interrupted)
767 req->out.h.error = -EIO;
768 request_end(fc, req);
769
770 return err ? err : nbytes;
771
772 err_unlock:
773 spin_unlock(&fuse_lock);
774 err_finish:
775 fuse_copy_finish(&cs);
776 return err;
777}
778
779static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
780 size_t nbytes, loff_t *off)
781{
782 struct iovec iov;
783 iov.iov_len = nbytes;
784 iov.iov_base = (char __user *) buf;
785 return fuse_dev_writev(file, &iov, 1, off);
786}
787
788static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
789{
790 struct fuse_conn *fc = fuse_get_conn(file);
791 unsigned mask = POLLOUT | POLLWRNORM;
792
793 if (!fc)
794 return -ENODEV;
795
796 poll_wait(file, &fc->waitq, wait);
797
798 spin_lock(&fuse_lock);
799 if (!list_empty(&fc->pending))
800 mask |= POLLIN | POLLRDNORM;
801 spin_unlock(&fuse_lock);
802
803 return mask;
804}
805
806/* Abort all requests on the given list (pending or processing) */
807static void end_requests(struct fuse_conn *fc, struct list_head *head)
808{
809 while (!list_empty(head)) {
810 struct fuse_req *req;
811 req = list_entry(head->next, struct fuse_req, list);
812 list_del_init(&req->list);
813 req->out.h.error = -ECONNABORTED;
814 request_end(fc, req);
815 spin_lock(&fuse_lock);
816 }
817}
818
819static int fuse_dev_release(struct inode *inode, struct file *file)
820{
821 struct fuse_conn *fc;
822
823 spin_lock(&fuse_lock);
824 fc = file->private_data;
825 if (fc) {
826 fc->connected = 0;
827 end_requests(fc, &fc->pending);
828 end_requests(fc, &fc->processing);
829 fuse_release_conn(fc);
830 }
831 spin_unlock(&fuse_lock);
832 return 0;
833}
834
835struct file_operations fuse_dev_operations = {
836 .owner = THIS_MODULE,
837 .llseek = no_llseek,
838 .read = fuse_dev_read,
839 .readv = fuse_dev_readv,
840 .write = fuse_dev_write,
841 .writev = fuse_dev_writev,
842 .poll = fuse_dev_poll,
843 .release = fuse_dev_release,
844};
845
846static struct miscdevice fuse_miscdevice = {
847 .minor = FUSE_MINOR,
848 .name = "fuse",
849 .fops = &fuse_dev_operations,
850};
851
852int __init fuse_dev_init(void)
853{
854 int err = -ENOMEM;
855 fuse_req_cachep = kmem_cache_create("fuse_request",
856 sizeof(struct fuse_req),
857 0, 0, NULL, NULL);
858 if (!fuse_req_cachep)
859 goto out;
860
861 err = misc_register(&fuse_miscdevice);
862 if (err)
863 goto out_cache_clean;
864
865 return 0;
866
867 out_cache_clean:
868 kmem_cache_destroy(fuse_req_cachep);
869 out:
870 return err;
871}
872
873void fuse_dev_cleanup(void)
874{
875 misc_deregister(&fuse_miscdevice);
876 kmem_cache_destroy(fuse_req_cachep);
877}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
new file mode 100644
index 000000000000..e79e49b3eec7
--- /dev/null
+++ b/fs/fuse/dir.c
@@ -0,0 +1,982 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/file.h>
13#include <linux/gfp.h>
14#include <linux/sched.h>
15#include <linux/namei.h>
16
17static inline unsigned long time_to_jiffies(unsigned long sec,
18 unsigned long nsec)
19{
20 struct timespec ts = {sec, nsec};
21 return jiffies + timespec_to_jiffies(&ts);
22}
23
24static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
25 struct dentry *entry,
26 struct fuse_entry_out *outarg)
27{
28 req->in.h.opcode = FUSE_LOOKUP;
29 req->in.h.nodeid = get_node_id(dir);
30 req->inode = dir;
31 req->in.numargs = 1;
32 req->in.args[0].size = entry->d_name.len + 1;
33 req->in.args[0].value = entry->d_name.name;
34 req->out.numargs = 1;
35 req->out.args[0].size = sizeof(struct fuse_entry_out);
36 req->out.args[0].value = outarg;
37}
38
39static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
40{
41 if (!entry->d_inode || is_bad_inode(entry->d_inode))
42 return 0;
43 else if (time_after(jiffies, entry->d_time)) {
44 int err;
45 struct fuse_entry_out outarg;
46 struct inode *inode = entry->d_inode;
47 struct fuse_inode *fi = get_fuse_inode(inode);
48 struct fuse_conn *fc = get_fuse_conn(inode);
49 struct fuse_req *req = fuse_get_request(fc);
50 if (!req)
51 return 0;
52
53 fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
54 request_send(fc, req);
55 err = req->out.h.error;
56 if (!err) {
57 if (outarg.nodeid != get_node_id(inode)) {
58 fuse_send_forget(fc, req, outarg.nodeid, 1);
59 return 0;
60 }
61 fi->nlookup ++;
62 }
63 fuse_put_request(fc, req);
64 if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
65 return 0;
66
67 fuse_change_attributes(inode, &outarg.attr);
68 entry->d_time = time_to_jiffies(outarg.entry_valid,
69 outarg.entry_valid_nsec);
70 fi->i_time = time_to_jiffies(outarg.attr_valid,
71 outarg.attr_valid_nsec);
72 }
73 return 1;
74}
75
76static struct dentry_operations fuse_dentry_operations = {
77 .d_revalidate = fuse_dentry_revalidate,
78};
79
80static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
81 struct inode **inodep)
82{
83 int err;
84 struct fuse_entry_out outarg;
85 struct inode *inode = NULL;
86 struct fuse_conn *fc = get_fuse_conn(dir);
87 struct fuse_req *req;
88
89 if (entry->d_name.len > FUSE_NAME_MAX)
90 return -ENAMETOOLONG;
91
92 req = fuse_get_request(fc);
93 if (!req)
94 return -EINTR;
95
96 fuse_lookup_init(req, dir, entry, &outarg);
97 request_send(fc, req);
98 err = req->out.h.error;
99 if (!err) {
100 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
101 &outarg.attr);
102 if (!inode) {
103 fuse_send_forget(fc, req, outarg.nodeid, 1);
104 return -ENOMEM;
105 }
106 }
107 fuse_put_request(fc, req);
108 if (err && err != -ENOENT)
109 return err;
110
111 if (inode) {
112 struct fuse_inode *fi = get_fuse_inode(inode);
113 entry->d_time = time_to_jiffies(outarg.entry_valid,
114 outarg.entry_valid_nsec);
115 fi->i_time = time_to_jiffies(outarg.attr_valid,
116 outarg.attr_valid_nsec);
117 }
118
119 entry->d_op = &fuse_dentry_operations;
120 *inodep = inode;
121 return 0;
122}
123
124void fuse_invalidate_attr(struct inode *inode)
125{
126 get_fuse_inode(inode)->i_time = jiffies - 1;
127}
128
129static void fuse_invalidate_entry(struct dentry *entry)
130{
131 d_invalidate(entry);
132 entry->d_time = jiffies - 1;
133}
134
135static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
136 struct inode *dir, struct dentry *entry,
137 int mode)
138{
139 struct fuse_entry_out outarg;
140 struct inode *inode;
141 struct fuse_inode *fi;
142 int err;
143
144 req->in.h.nodeid = get_node_id(dir);
145 req->inode = dir;
146 req->out.numargs = 1;
147 req->out.args[0].size = sizeof(outarg);
148 req->out.args[0].value = &outarg;
149 request_send(fc, req);
150 err = req->out.h.error;
151 if (err) {
152 fuse_put_request(fc, req);
153 return err;
154 }
155 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
156 &outarg.attr);
157 if (!inode) {
158 fuse_send_forget(fc, req, outarg.nodeid, 1);
159 return -ENOMEM;
160 }
161 fuse_put_request(fc, req);
162
163 /* Don't allow userspace to do really stupid things... */
164 if ((inode->i_mode ^ mode) & S_IFMT) {
165 iput(inode);
166 return -EIO;
167 }
168
169 entry->d_time = time_to_jiffies(outarg.entry_valid,
170 outarg.entry_valid_nsec);
171
172 fi = get_fuse_inode(inode);
173 fi->i_time = time_to_jiffies(outarg.attr_valid,
174 outarg.attr_valid_nsec);
175
176 d_instantiate(entry, inode);
177 fuse_invalidate_attr(dir);
178 return 0;
179}
180
181static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
182 dev_t rdev)
183{
184 struct fuse_mknod_in inarg;
185 struct fuse_conn *fc = get_fuse_conn(dir);
186 struct fuse_req *req = fuse_get_request(fc);
187 if (!req)
188 return -EINTR;
189
190 memset(&inarg, 0, sizeof(inarg));
191 inarg.mode = mode;
192 inarg.rdev = new_encode_dev(rdev);
193 req->in.h.opcode = FUSE_MKNOD;
194 req->in.numargs = 2;
195 req->in.args[0].size = sizeof(inarg);
196 req->in.args[0].value = &inarg;
197 req->in.args[1].size = entry->d_name.len + 1;
198 req->in.args[1].value = entry->d_name.name;
199 return create_new_entry(fc, req, dir, entry, mode);
200}
201
202static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
203 struct nameidata *nd)
204{
205 return fuse_mknod(dir, entry, mode, 0);
206}
207
208static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
209{
210 struct fuse_mkdir_in inarg;
211 struct fuse_conn *fc = get_fuse_conn(dir);
212 struct fuse_req *req = fuse_get_request(fc);
213 if (!req)
214 return -EINTR;
215
216 memset(&inarg, 0, sizeof(inarg));
217 inarg.mode = mode;
218 req->in.h.opcode = FUSE_MKDIR;
219 req->in.numargs = 2;
220 req->in.args[0].size = sizeof(inarg);
221 req->in.args[0].value = &inarg;
222 req->in.args[1].size = entry->d_name.len + 1;
223 req->in.args[1].value = entry->d_name.name;
224 return create_new_entry(fc, req, dir, entry, S_IFDIR);
225}
226
227static int fuse_symlink(struct inode *dir, struct dentry *entry,
228 const char *link)
229{
230 struct fuse_conn *fc = get_fuse_conn(dir);
231 unsigned len = strlen(link) + 1;
232 struct fuse_req *req;
233
234 if (len > FUSE_SYMLINK_MAX)
235 return -ENAMETOOLONG;
236
237 req = fuse_get_request(fc);
238 if (!req)
239 return -EINTR;
240
241 req->in.h.opcode = FUSE_SYMLINK;
242 req->in.numargs = 2;
243 req->in.args[0].size = entry->d_name.len + 1;
244 req->in.args[0].value = entry->d_name.name;
245 req->in.args[1].size = len;
246 req->in.args[1].value = link;
247 return create_new_entry(fc, req, dir, entry, S_IFLNK);
248}
249
250static int fuse_unlink(struct inode *dir, struct dentry *entry)
251{
252 int err;
253 struct fuse_conn *fc = get_fuse_conn(dir);
254 struct fuse_req *req = fuse_get_request(fc);
255 if (!req)
256 return -EINTR;
257
258 req->in.h.opcode = FUSE_UNLINK;
259 req->in.h.nodeid = get_node_id(dir);
260 req->inode = dir;
261 req->in.numargs = 1;
262 req->in.args[0].size = entry->d_name.len + 1;
263 req->in.args[0].value = entry->d_name.name;
264 request_send(fc, req);
265 err = req->out.h.error;
266 fuse_put_request(fc, req);
267 if (!err) {
268 struct inode *inode = entry->d_inode;
269
270 /* Set nlink to zero so the inode can be cleared, if
271 the inode does have more links this will be
272 discovered at the next lookup/getattr */
273 inode->i_nlink = 0;
274 fuse_invalidate_attr(inode);
275 fuse_invalidate_attr(dir);
276 } else if (err == -EINTR)
277 fuse_invalidate_entry(entry);
278 return err;
279}
280
281static int fuse_rmdir(struct inode *dir, struct dentry *entry)
282{
283 int err;
284 struct fuse_conn *fc = get_fuse_conn(dir);
285 struct fuse_req *req = fuse_get_request(fc);
286 if (!req)
287 return -EINTR;
288
289 req->in.h.opcode = FUSE_RMDIR;
290 req->in.h.nodeid = get_node_id(dir);
291 req->inode = dir;
292 req->in.numargs = 1;
293 req->in.args[0].size = entry->d_name.len + 1;
294 req->in.args[0].value = entry->d_name.name;
295 request_send(fc, req);
296 err = req->out.h.error;
297 fuse_put_request(fc, req);
298 if (!err) {
299 entry->d_inode->i_nlink = 0;
300 fuse_invalidate_attr(dir);
301 } else if (err == -EINTR)
302 fuse_invalidate_entry(entry);
303 return err;
304}
305
306static int fuse_rename(struct inode *olddir, struct dentry *oldent,
307 struct inode *newdir, struct dentry *newent)
308{
309 int err;
310 struct fuse_rename_in inarg;
311 struct fuse_conn *fc = get_fuse_conn(olddir);
312 struct fuse_req *req = fuse_get_request(fc);
313 if (!req)
314 return -EINTR;
315
316 memset(&inarg, 0, sizeof(inarg));
317 inarg.newdir = get_node_id(newdir);
318 req->in.h.opcode = FUSE_RENAME;
319 req->in.h.nodeid = get_node_id(olddir);
320 req->inode = olddir;
321 req->inode2 = newdir;
322 req->in.numargs = 3;
323 req->in.args[0].size = sizeof(inarg);
324 req->in.args[0].value = &inarg;
325 req->in.args[1].size = oldent->d_name.len + 1;
326 req->in.args[1].value = oldent->d_name.name;
327 req->in.args[2].size = newent->d_name.len + 1;
328 req->in.args[2].value = newent->d_name.name;
329 request_send(fc, req);
330 err = req->out.h.error;
331 fuse_put_request(fc, req);
332 if (!err) {
333 fuse_invalidate_attr(olddir);
334 if (olddir != newdir)
335 fuse_invalidate_attr(newdir);
336 } else if (err == -EINTR) {
337 /* If request was interrupted, DEITY only knows if the
338 rename actually took place. If the invalidation
339 fails (e.g. some process has CWD under the renamed
340 directory), then there can be inconsistency between
341 the dcache and the real filesystem. Tough luck. */
342 fuse_invalidate_entry(oldent);
343 if (newent->d_inode)
344 fuse_invalidate_entry(newent);
345 }
346
347 return err;
348}
349
350static int fuse_link(struct dentry *entry, struct inode *newdir,
351 struct dentry *newent)
352{
353 int err;
354 struct fuse_link_in inarg;
355 struct inode *inode = entry->d_inode;
356 struct fuse_conn *fc = get_fuse_conn(inode);
357 struct fuse_req *req = fuse_get_request(fc);
358 if (!req)
359 return -EINTR;
360
361 memset(&inarg, 0, sizeof(inarg));
362 inarg.oldnodeid = get_node_id(inode);
363 req->in.h.opcode = FUSE_LINK;
364 req->inode2 = inode;
365 req->in.numargs = 2;
366 req->in.args[0].size = sizeof(inarg);
367 req->in.args[0].value = &inarg;
368 req->in.args[1].size = newent->d_name.len + 1;
369 req->in.args[1].value = newent->d_name.name;
370 err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
371 /* Contrary to "normal" filesystems it can happen that link
372 makes two "logical" inodes point to the same "physical"
373 inode. We invalidate the attributes of the old one, so it
374 will reflect changes in the backing inode (link count,
375 etc.)
376 */
377 if (!err || err == -EINTR)
378 fuse_invalidate_attr(inode);
379 return err;
380}
381
382int fuse_do_getattr(struct inode *inode)
383{
384 int err;
385 struct fuse_attr_out arg;
386 struct fuse_conn *fc = get_fuse_conn(inode);
387 struct fuse_req *req = fuse_get_request(fc);
388 if (!req)
389 return -EINTR;
390
391 req->in.h.opcode = FUSE_GETATTR;
392 req->in.h.nodeid = get_node_id(inode);
393 req->inode = inode;
394 req->out.numargs = 1;
395 req->out.args[0].size = sizeof(arg);
396 req->out.args[0].value = &arg;
397 request_send(fc, req);
398 err = req->out.h.error;
399 fuse_put_request(fc, req);
400 if (!err) {
401 if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) {
402 make_bad_inode(inode);
403 err = -EIO;
404 } else {
405 struct fuse_inode *fi = get_fuse_inode(inode);
406 fuse_change_attributes(inode, &arg.attr);
407 fi->i_time = time_to_jiffies(arg.attr_valid,
408 arg.attr_valid_nsec);
409 }
410 }
411 return err;
412}
413
414/*
415 * Calling into a user-controlled filesystem gives the filesystem
416 * daemon ptrace-like capabilities over the requester process. This
417 * means, that the filesystem daemon is able to record the exact
418 * filesystem operations performed, and can also control the behavior
419 * of the requester process in otherwise impossible ways. For example
420 * it can delay the operation for arbitrary length of time allowing
421 * DoS against the requester.
422 *
423 * For this reason only those processes can call into the filesystem,
424 * for which the owner of the mount has ptrace privilege. This
425 * excludes processes started by other users, suid or sgid processes.
426 */
427static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
428{
429 if (fc->flags & FUSE_ALLOW_OTHER)
430 return 1;
431
432 if (task->euid == fc->user_id &&
433 task->suid == fc->user_id &&
434 task->uid == fc->user_id &&
435 task->egid == fc->group_id &&
436 task->sgid == fc->group_id &&
437 task->gid == fc->group_id)
438 return 1;
439
440 return 0;
441}
442
443static int fuse_revalidate(struct dentry *entry)
444{
445 struct inode *inode = entry->d_inode;
446 struct fuse_inode *fi = get_fuse_inode(inode);
447 struct fuse_conn *fc = get_fuse_conn(inode);
448
449 if (!fuse_allow_task(fc, current))
450 return -EACCES;
451 if (get_node_id(inode) != FUSE_ROOT_ID &&
452 time_before_eq(jiffies, fi->i_time))
453 return 0;
454
455 return fuse_do_getattr(inode);
456}
457
458static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
459{
460 struct fuse_conn *fc = get_fuse_conn(inode);
461
462 if (!fuse_allow_task(fc, current))
463 return -EACCES;
464 else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
465 int err = generic_permission(inode, mask, NULL);
466
467 /* If permission is denied, try to refresh file
468 attributes. This is also needed, because the root
469 node will at first have no permissions */
470 if (err == -EACCES) {
471 err = fuse_do_getattr(inode);
472 if (!err)
473 err = generic_permission(inode, mask, NULL);
474 }
475
476 /* FIXME: Need some mechanism to revoke permissions:
477 currently if the filesystem suddenly changes the
478 file mode, we will not be informed about it, and
479 continue to allow access to the file/directory.
480
481 This is actually not so grave, since the user can
482 simply keep access to the file/directory anyway by
483 keeping it open... */
484
485 return err;
486 } else {
487 int mode = inode->i_mode;
488 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
489 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
490 return -EROFS;
491 if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO))
492 return -EACCES;
493 return 0;
494 }
495}
496
497static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
498 void *dstbuf, filldir_t filldir)
499{
500 while (nbytes >= FUSE_NAME_OFFSET) {
501 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
502 size_t reclen = FUSE_DIRENT_SIZE(dirent);
503 int over;
504 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
505 return -EIO;
506 if (reclen > nbytes)
507 break;
508
509 over = filldir(dstbuf, dirent->name, dirent->namelen,
510 file->f_pos, dirent->ino, dirent->type);
511 if (over)
512 break;
513
514 buf += reclen;
515 nbytes -= reclen;
516 file->f_pos = dirent->off;
517 }
518
519 return 0;
520}
521
522static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file,
523 struct inode *inode, loff_t pos,
524 size_t count)
525{
526 return fuse_send_read_common(req, file, inode, pos, count, 1);
527}
528
529static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
530{
531 int err;
532 size_t nbytes;
533 struct page *page;
534 struct inode *inode = file->f_dentry->d_inode;
535 struct fuse_conn *fc = get_fuse_conn(inode);
536 struct fuse_req *req = fuse_get_request(fc);
537 if (!req)
538 return -EINTR;
539
540 page = alloc_page(GFP_KERNEL);
541 if (!page) {
542 fuse_put_request(fc, req);
543 return -ENOMEM;
544 }
545 req->num_pages = 1;
546 req->pages[0] = page;
547 nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE);
548 err = req->out.h.error;
549 fuse_put_request(fc, req);
550 if (!err)
551 err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
552 filldir);
553
554 __free_page(page);
555 fuse_invalidate_attr(inode); /* atime changed */
556 return err;
557}
558
559static char *read_link(struct dentry *dentry)
560{
561 struct inode *inode = dentry->d_inode;
562 struct fuse_conn *fc = get_fuse_conn(inode);
563 struct fuse_req *req = fuse_get_request(fc);
564 char *link;
565
566 if (!req)
567 return ERR_PTR(-EINTR);
568
569 link = (char *) __get_free_page(GFP_KERNEL);
570 if (!link) {
571 link = ERR_PTR(-ENOMEM);
572 goto out;
573 }
574 req->in.h.opcode = FUSE_READLINK;
575 req->in.h.nodeid = get_node_id(inode);
576 req->inode = inode;
577 req->out.argvar = 1;
578 req->out.numargs = 1;
579 req->out.args[0].size = PAGE_SIZE - 1;
580 req->out.args[0].value = link;
581 request_send(fc, req);
582 if (req->out.h.error) {
583 free_page((unsigned long) link);
584 link = ERR_PTR(req->out.h.error);
585 } else
586 link[req->out.args[0].size] = '\0';
587 out:
588 fuse_put_request(fc, req);
589 fuse_invalidate_attr(inode); /* atime changed */
590 return link;
591}
592
593static void free_link(char *link)
594{
595 if (!IS_ERR(link))
596 free_page((unsigned long) link);
597}
598
599static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
600{
601 nd_set_link(nd, read_link(dentry));
602 return NULL;
603}
604
605static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
606{
607 free_link(nd_get_link(nd));
608}
609
610static int fuse_dir_open(struct inode *inode, struct file *file)
611{
612 return fuse_open_common(inode, file, 1);
613}
614
615static int fuse_dir_release(struct inode *inode, struct file *file)
616{
617 return fuse_release_common(inode, file, 1);
618}
619
620static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
621{
622 /* nfsd can call this with no file */
623 return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
624}
625
626static unsigned iattr_to_fattr(struct iattr *iattr, struct fuse_attr *fattr)
627{
628 unsigned ivalid = iattr->ia_valid;
629 unsigned fvalid = 0;
630
631 memset(fattr, 0, sizeof(*fattr));
632
633 if (ivalid & ATTR_MODE)
634 fvalid |= FATTR_MODE, fattr->mode = iattr->ia_mode;
635 if (ivalid & ATTR_UID)
636 fvalid |= FATTR_UID, fattr->uid = iattr->ia_uid;
637 if (ivalid & ATTR_GID)
638 fvalid |= FATTR_GID, fattr->gid = iattr->ia_gid;
639 if (ivalid & ATTR_SIZE)
640 fvalid |= FATTR_SIZE, fattr->size = iattr->ia_size;
641 /* You can only _set_ these together (they may change by themselves) */
642 if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) {
643 fvalid |= FATTR_ATIME | FATTR_MTIME;
644 fattr->atime = iattr->ia_atime.tv_sec;
645 fattr->mtime = iattr->ia_mtime.tv_sec;
646 }
647
648 return fvalid;
649}
650
651static int fuse_setattr(struct dentry *entry, struct iattr *attr)
652{
653 struct inode *inode = entry->d_inode;
654 struct fuse_conn *fc = get_fuse_conn(inode);
655 struct fuse_inode *fi = get_fuse_inode(inode);
656 struct fuse_req *req;
657 struct fuse_setattr_in inarg;
658 struct fuse_attr_out outarg;
659 int err;
660 int is_truncate = 0;
661
662 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
663 err = inode_change_ok(inode, attr);
664 if (err)
665 return err;
666 }
667
668 if (attr->ia_valid & ATTR_SIZE) {
669 unsigned long limit;
670 is_truncate = 1;
671 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
672 if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
673 send_sig(SIGXFSZ, current, 0);
674 return -EFBIG;
675 }
676 }
677
678 req = fuse_get_request(fc);
679 if (!req)
680 return -EINTR;
681
682 memset(&inarg, 0, sizeof(inarg));
683 inarg.valid = iattr_to_fattr(attr, &inarg.attr);
684 req->in.h.opcode = FUSE_SETATTR;
685 req->in.h.nodeid = get_node_id(inode);
686 req->inode = inode;
687 req->in.numargs = 1;
688 req->in.args[0].size = sizeof(inarg);
689 req->in.args[0].value = &inarg;
690 req->out.numargs = 1;
691 req->out.args[0].size = sizeof(outarg);
692 req->out.args[0].value = &outarg;
693 request_send(fc, req);
694 err = req->out.h.error;
695 fuse_put_request(fc, req);
696 if (!err) {
697 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
698 make_bad_inode(inode);
699 err = -EIO;
700 } else {
701 if (is_truncate) {
702 loff_t origsize = i_size_read(inode);
703 i_size_write(inode, outarg.attr.size);
704 if (origsize > outarg.attr.size)
705 vmtruncate(inode, outarg.attr.size);
706 }
707 fuse_change_attributes(inode, &outarg.attr);
708 fi->i_time = time_to_jiffies(outarg.attr_valid,
709 outarg.attr_valid_nsec);
710 }
711 } else if (err == -EINTR)
712 fuse_invalidate_attr(inode);
713
714 return err;
715}
716
717static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
718 struct kstat *stat)
719{
720 struct inode *inode = entry->d_inode;
721 int err = fuse_revalidate(entry);
722 if (!err)
723 generic_fillattr(inode, stat);
724
725 return err;
726}
727
728static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
729 struct nameidata *nd)
730{
731 struct inode *inode;
732 int err = fuse_lookup_iget(dir, entry, &inode);
733 if (err)
734 return ERR_PTR(err);
735 if (inode && S_ISDIR(inode->i_mode)) {
736 /* Don't allow creating an alias to a directory */
737 struct dentry *alias = d_find_alias(inode);
738 if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
739 dput(alias);
740 iput(inode);
741 return ERR_PTR(-EIO);
742 }
743 }
744 return d_splice_alias(inode, entry);
745}
746
747static int fuse_setxattr(struct dentry *entry, const char *name,
748 const void *value, size_t size, int flags)
749{
750 struct inode *inode = entry->d_inode;
751 struct fuse_conn *fc = get_fuse_conn(inode);
752 struct fuse_req *req;
753 struct fuse_setxattr_in inarg;
754 int err;
755
756 if (size > FUSE_XATTR_SIZE_MAX)
757 return -E2BIG;
758
759 if (fc->no_setxattr)
760 return -EOPNOTSUPP;
761
762 req = fuse_get_request(fc);
763 if (!req)
764 return -EINTR;
765
766 memset(&inarg, 0, sizeof(inarg));
767 inarg.size = size;
768 inarg.flags = flags;
769 req->in.h.opcode = FUSE_SETXATTR;
770 req->in.h.nodeid = get_node_id(inode);
771 req->inode = inode;
772 req->in.numargs = 3;
773 req->in.args[0].size = sizeof(inarg);
774 req->in.args[0].value = &inarg;
775 req->in.args[1].size = strlen(name) + 1;
776 req->in.args[1].value = name;
777 req->in.args[2].size = size;
778 req->in.args[2].value = value;
779 request_send(fc, req);
780 err = req->out.h.error;
781 fuse_put_request(fc, req);
782 if (err == -ENOSYS) {
783 fc->no_setxattr = 1;
784 err = -EOPNOTSUPP;
785 }
786 return err;
787}
788
789static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
790 void *value, size_t size)
791{
792 struct inode *inode = entry->d_inode;
793 struct fuse_conn *fc = get_fuse_conn(inode);
794 struct fuse_req *req;
795 struct fuse_getxattr_in inarg;
796 struct fuse_getxattr_out outarg;
797 ssize_t ret;
798
799 if (fc->no_getxattr)
800 return -EOPNOTSUPP;
801
802 req = fuse_get_request(fc);
803 if (!req)
804 return -EINTR;
805
806 memset(&inarg, 0, sizeof(inarg));
807 inarg.size = size;
808 req->in.h.opcode = FUSE_GETXATTR;
809 req->in.h.nodeid = get_node_id(inode);
810 req->inode = inode;
811 req->in.numargs = 2;
812 req->in.args[0].size = sizeof(inarg);
813 req->in.args[0].value = &inarg;
814 req->in.args[1].size = strlen(name) + 1;
815 req->in.args[1].value = name;
816 /* This is really two different operations rolled into one */
817 req->out.numargs = 1;
818 if (size) {
819 req->out.argvar = 1;
820 req->out.args[0].size = size;
821 req->out.args[0].value = value;
822 } else {
823 req->out.args[0].size = sizeof(outarg);
824 req->out.args[0].value = &outarg;
825 }
826 request_send(fc, req);
827 ret = req->out.h.error;
828 if (!ret)
829 ret = size ? req->out.args[0].size : outarg.size;
830 else {
831 if (ret == -ENOSYS) {
832 fc->no_getxattr = 1;
833 ret = -EOPNOTSUPP;
834 }
835 }
836 fuse_put_request(fc, req);
837 return ret;
838}
839
840static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
841{
842 struct inode *inode = entry->d_inode;
843 struct fuse_conn *fc = get_fuse_conn(inode);
844 struct fuse_req *req;
845 struct fuse_getxattr_in inarg;
846 struct fuse_getxattr_out outarg;
847 ssize_t ret;
848
849 if (fc->no_listxattr)
850 return -EOPNOTSUPP;
851
852 req = fuse_get_request(fc);
853 if (!req)
854 return -EINTR;
855
856 memset(&inarg, 0, sizeof(inarg));
857 inarg.size = size;
858 req->in.h.opcode = FUSE_LISTXATTR;
859 req->in.h.nodeid = get_node_id(inode);
860 req->inode = inode;
861 req->in.numargs = 1;
862 req->in.args[0].size = sizeof(inarg);
863 req->in.args[0].value = &inarg;
864 /* This is really two different operations rolled into one */
865 req->out.numargs = 1;
866 if (size) {
867 req->out.argvar = 1;
868 req->out.args[0].size = size;
869 req->out.args[0].value = list;
870 } else {
871 req->out.args[0].size = sizeof(outarg);
872 req->out.args[0].value = &outarg;
873 }
874 request_send(fc, req);
875 ret = req->out.h.error;
876 if (!ret)
877 ret = size ? req->out.args[0].size : outarg.size;
878 else {
879 if (ret == -ENOSYS) {
880 fc->no_listxattr = 1;
881 ret = -EOPNOTSUPP;
882 }
883 }
884 fuse_put_request(fc, req);
885 return ret;
886}
887
888static int fuse_removexattr(struct dentry *entry, const char *name)
889{
890 struct inode *inode = entry->d_inode;
891 struct fuse_conn *fc = get_fuse_conn(inode);
892 struct fuse_req *req;
893 int err;
894
895 if (fc->no_removexattr)
896 return -EOPNOTSUPP;
897
898 req = fuse_get_request(fc);
899 if (!req)
900 return -EINTR;
901
902 req->in.h.opcode = FUSE_REMOVEXATTR;
903 req->in.h.nodeid = get_node_id(inode);
904 req->inode = inode;
905 req->in.numargs = 1;
906 req->in.args[0].size = strlen(name) + 1;
907 req->in.args[0].value = name;
908 request_send(fc, req);
909 err = req->out.h.error;
910 fuse_put_request(fc, req);
911 if (err == -ENOSYS) {
912 fc->no_removexattr = 1;
913 err = -EOPNOTSUPP;
914 }
915 return err;
916}
917
918static struct inode_operations fuse_dir_inode_operations = {
919 .lookup = fuse_lookup,
920 .mkdir = fuse_mkdir,
921 .symlink = fuse_symlink,
922 .unlink = fuse_unlink,
923 .rmdir = fuse_rmdir,
924 .rename = fuse_rename,
925 .link = fuse_link,
926 .setattr = fuse_setattr,
927 .create = fuse_create,
928 .mknod = fuse_mknod,
929 .permission = fuse_permission,
930 .getattr = fuse_getattr,
931 .setxattr = fuse_setxattr,
932 .getxattr = fuse_getxattr,
933 .listxattr = fuse_listxattr,
934 .removexattr = fuse_removexattr,
935};
936
937static struct file_operations fuse_dir_operations = {
938 .llseek = generic_file_llseek,
939 .read = generic_read_dir,
940 .readdir = fuse_readdir,
941 .open = fuse_dir_open,
942 .release = fuse_dir_release,
943 .fsync = fuse_dir_fsync,
944};
945
946static struct inode_operations fuse_common_inode_operations = {
947 .setattr = fuse_setattr,
948 .permission = fuse_permission,
949 .getattr = fuse_getattr,
950 .setxattr = fuse_setxattr,
951 .getxattr = fuse_getxattr,
952 .listxattr = fuse_listxattr,
953 .removexattr = fuse_removexattr,
954};
955
956static struct inode_operations fuse_symlink_inode_operations = {
957 .setattr = fuse_setattr,
958 .follow_link = fuse_follow_link,
959 .put_link = fuse_put_link,
960 .readlink = generic_readlink,
961 .getattr = fuse_getattr,
962 .setxattr = fuse_setxattr,
963 .getxattr = fuse_getxattr,
964 .listxattr = fuse_listxattr,
965 .removexattr = fuse_removexattr,
966};
967
968void fuse_init_common(struct inode *inode)
969{
970 inode->i_op = &fuse_common_inode_operations;
971}
972
973void fuse_init_dir(struct inode *inode)
974{
975 inode->i_op = &fuse_dir_inode_operations;
976 inode->i_fop = &fuse_dir_operations;
977}
978
979void fuse_init_symlink(struct inode *inode)
980{
981 inode->i_op = &fuse_symlink_inode_operations;
982}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
new file mode 100644
index 000000000000..6454022b0536
--- /dev/null
+++ b/fs/fuse/file.c
@@ -0,0 +1,555 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/slab.h>
13#include <linux/kernel.h>
14
15static struct file_operations fuse_direct_io_file_operations;
16
17int fuse_open_common(struct inode *inode, struct file *file, int isdir)
18{
19 struct fuse_conn *fc = get_fuse_conn(inode);
20 struct fuse_req *req;
21 struct fuse_open_in inarg;
22 struct fuse_open_out outarg;
23 struct fuse_file *ff;
24 int err;
25
26 err = generic_file_open(inode, file);
27 if (err)
28 return err;
29
30 /* If opening the root node, no lookup has been performed on
31 it, so the attributes must be refreshed */
32 if (get_node_id(inode) == FUSE_ROOT_ID) {
33 int err = fuse_do_getattr(inode);
34 if (err)
35 return err;
36 }
37
38 req = fuse_get_request(fc);
39 if (!req)
40 return -EINTR;
41
42 err = -ENOMEM;
43 ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
44 if (!ff)
45 goto out_put_request;
46
47 ff->release_req = fuse_request_alloc();
48 if (!ff->release_req) {
49 kfree(ff);
50 goto out_put_request;
51 }
52
53 memset(&inarg, 0, sizeof(inarg));
54 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
55 req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
56 req->in.h.nodeid = get_node_id(inode);
57 req->inode = inode;
58 req->in.numargs = 1;
59 req->in.args[0].size = sizeof(inarg);
60 req->in.args[0].value = &inarg;
61 req->out.numargs = 1;
62 req->out.args[0].size = sizeof(outarg);
63 req->out.args[0].value = &outarg;
64 request_send(fc, req);
65 err = req->out.h.error;
66 if (err) {
67 fuse_request_free(ff->release_req);
68 kfree(ff);
69 } else {
70 if (!isdir && (outarg.open_flags & FOPEN_DIRECT_IO))
71 file->f_op = &fuse_direct_io_file_operations;
72 if (!(outarg.open_flags & FOPEN_KEEP_CACHE))
73 invalidate_inode_pages(inode->i_mapping);
74 ff->fh = outarg.fh;
75 file->private_data = ff;
76 }
77
78 out_put_request:
79 fuse_put_request(fc, req);
80 return err;
81}
82
83int fuse_release_common(struct inode *inode, struct file *file, int isdir)
84{
85 struct fuse_conn *fc = get_fuse_conn(inode);
86 struct fuse_file *ff = file->private_data;
87 struct fuse_req *req = ff->release_req;
88 struct fuse_release_in *inarg = &req->misc.release_in;
89
90 inarg->fh = ff->fh;
91 inarg->flags = file->f_flags & ~O_EXCL;
92 req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
93 req->in.h.nodeid = get_node_id(inode);
94 req->inode = inode;
95 req->in.numargs = 1;
96 req->in.args[0].size = sizeof(struct fuse_release_in);
97 req->in.args[0].value = inarg;
98 request_send_background(fc, req);
99 kfree(ff);
100
101 /* Return value is ignored by VFS */
102 return 0;
103}
104
105static int fuse_open(struct inode *inode, struct file *file)
106{
107 return fuse_open_common(inode, file, 0);
108}
109
110static int fuse_release(struct inode *inode, struct file *file)
111{
112 return fuse_release_common(inode, file, 0);
113}
114
115static int fuse_flush(struct file *file)
116{
117 struct inode *inode = file->f_dentry->d_inode;
118 struct fuse_conn *fc = get_fuse_conn(inode);
119 struct fuse_file *ff = file->private_data;
120 struct fuse_req *req;
121 struct fuse_flush_in inarg;
122 int err;
123
124 if (fc->no_flush)
125 return 0;
126
127 req = fuse_get_request(fc);
128 if (!req)
129 return -EINTR;
130
131 memset(&inarg, 0, sizeof(inarg));
132 inarg.fh = ff->fh;
133 req->in.h.opcode = FUSE_FLUSH;
134 req->in.h.nodeid = get_node_id(inode);
135 req->inode = inode;
136 req->file = file;
137 req->in.numargs = 1;
138 req->in.args[0].size = sizeof(inarg);
139 req->in.args[0].value = &inarg;
140 request_send(fc, req);
141 err = req->out.h.error;
142 fuse_put_request(fc, req);
143 if (err == -ENOSYS) {
144 fc->no_flush = 1;
145 err = 0;
146 }
147 return err;
148}
149
150int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
151 int isdir)
152{
153 struct inode *inode = de->d_inode;
154 struct fuse_conn *fc = get_fuse_conn(inode);
155 struct fuse_file *ff = file->private_data;
156 struct fuse_req *req;
157 struct fuse_fsync_in inarg;
158 int err;
159
160 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
161 return 0;
162
163 req = fuse_get_request(fc);
164 if (!req)
165 return -EINTR;
166
167 memset(&inarg, 0, sizeof(inarg));
168 inarg.fh = ff->fh;
169 inarg.fsync_flags = datasync ? 1 : 0;
170 req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
171 req->in.h.nodeid = get_node_id(inode);
172 req->inode = inode;
173 req->file = file;
174 req->in.numargs = 1;
175 req->in.args[0].size = sizeof(inarg);
176 req->in.args[0].value = &inarg;
177 request_send(fc, req);
178 err = req->out.h.error;
179 fuse_put_request(fc, req);
180 if (err == -ENOSYS) {
181 if (isdir)
182 fc->no_fsyncdir = 1;
183 else
184 fc->no_fsync = 1;
185 err = 0;
186 }
187 return err;
188}
189
190static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
191{
192 return fuse_fsync_common(file, de, datasync, 0);
193}
194
195size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
196 struct inode *inode, loff_t pos, size_t count,
197 int isdir)
198{
199 struct fuse_conn *fc = get_fuse_conn(inode);
200 struct fuse_file *ff = file->private_data;
201 struct fuse_read_in inarg;
202
203 memset(&inarg, 0, sizeof(struct fuse_read_in));
204 inarg.fh = ff->fh;
205 inarg.offset = pos;
206 inarg.size = count;
207 req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ;
208 req->in.h.nodeid = get_node_id(inode);
209 req->inode = inode;
210 req->file = file;
211 req->in.numargs = 1;
212 req->in.args[0].size = sizeof(struct fuse_read_in);
213 req->in.args[0].value = &inarg;
214 req->out.argpages = 1;
215 req->out.argvar = 1;
216 req->out.numargs = 1;
217 req->out.args[0].size = count;
218 request_send(fc, req);
219 return req->out.args[0].size;
220}
221
222static inline size_t fuse_send_read(struct fuse_req *req, struct file *file,
223 struct inode *inode, loff_t pos,
224 size_t count)
225{
226 return fuse_send_read_common(req, file, inode, pos, count, 0);
227}
228
229static int fuse_readpage(struct file *file, struct page *page)
230{
231 struct inode *inode = page->mapping->host;
232 struct fuse_conn *fc = get_fuse_conn(inode);
233 loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT;
234 struct fuse_req *req = fuse_get_request(fc);
235 int err = -EINTR;
236 if (!req)
237 goto out;
238
239 req->out.page_zeroing = 1;
240 req->num_pages = 1;
241 req->pages[0] = page;
242 fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE);
243 err = req->out.h.error;
244 fuse_put_request(fc, req);
245 if (!err)
246 SetPageUptodate(page);
247 fuse_invalidate_attr(inode); /* atime changed */
248 out:
249 unlock_page(page);
250 return err;
251}
252
253static int fuse_send_readpages(struct fuse_req *req, struct file *file,
254 struct inode *inode)
255{
256 loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT;
257 size_t count = req->num_pages << PAGE_CACHE_SHIFT;
258 unsigned i;
259 req->out.page_zeroing = 1;
260 fuse_send_read(req, file, inode, pos, count);
261 for (i = 0; i < req->num_pages; i++) {
262 struct page *page = req->pages[i];
263 if (!req->out.h.error)
264 SetPageUptodate(page);
265 unlock_page(page);
266 }
267 return req->out.h.error;
268}
269
270struct fuse_readpages_data {
271 struct fuse_req *req;
272 struct file *file;
273 struct inode *inode;
274};
275
276static int fuse_readpages_fill(void *_data, struct page *page)
277{
278 struct fuse_readpages_data *data = _data;
279 struct fuse_req *req = data->req;
280 struct inode *inode = data->inode;
281 struct fuse_conn *fc = get_fuse_conn(inode);
282
283 if (req->num_pages &&
284 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
285 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
286 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
287 int err = fuse_send_readpages(req, data->file, inode);
288 if (err) {
289 unlock_page(page);
290 return err;
291 }
292 fuse_reset_request(req);
293 }
294 req->pages[req->num_pages] = page;
295 req->num_pages ++;
296 return 0;
297}
298
299static int fuse_readpages(struct file *file, struct address_space *mapping,
300 struct list_head *pages, unsigned nr_pages)
301{
302 struct inode *inode = mapping->host;
303 struct fuse_conn *fc = get_fuse_conn(inode);
304 struct fuse_readpages_data data;
305 int err;
306 data.file = file;
307 data.inode = inode;
308 data.req = fuse_get_request(fc);
309 if (!data.req)
310 return -EINTR;
311
312 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
313 if (!err && data.req->num_pages)
314 err = fuse_send_readpages(data.req, file, inode);
315 fuse_put_request(fc, data.req);
316 fuse_invalidate_attr(inode); /* atime changed */
317 return err;
318}
319
320static size_t fuse_send_write(struct fuse_req *req, struct file *file,
321 struct inode *inode, loff_t pos, size_t count)
322{
323 struct fuse_conn *fc = get_fuse_conn(inode);
324 struct fuse_file *ff = file->private_data;
325 struct fuse_write_in inarg;
326 struct fuse_write_out outarg;
327
328 memset(&inarg, 0, sizeof(struct fuse_write_in));
329 inarg.fh = ff->fh;
330 inarg.offset = pos;
331 inarg.size = count;
332 req->in.h.opcode = FUSE_WRITE;
333 req->in.h.nodeid = get_node_id(inode);
334 req->inode = inode;
335 req->file = file;
336 req->in.argpages = 1;
337 req->in.numargs = 2;
338 req->in.args[0].size = sizeof(struct fuse_write_in);
339 req->in.args[0].value = &inarg;
340 req->in.args[1].size = count;
341 req->out.numargs = 1;
342 req->out.args[0].size = sizeof(struct fuse_write_out);
343 req->out.args[0].value = &outarg;
344 request_send(fc, req);
345 return outarg.size;
346}
347
348static int fuse_prepare_write(struct file *file, struct page *page,
349 unsigned offset, unsigned to)
350{
351 /* No op */
352 return 0;
353}
354
355static int fuse_commit_write(struct file *file, struct page *page,
356 unsigned offset, unsigned to)
357{
358 int err;
359 size_t nres;
360 unsigned count = to - offset;
361 struct inode *inode = page->mapping->host;
362 struct fuse_conn *fc = get_fuse_conn(inode);
363 loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset;
364 struct fuse_req *req = fuse_get_request(fc);
365 if (!req)
366 return -EINTR;
367
368 req->num_pages = 1;
369 req->pages[0] = page;
370 req->page_offset = offset;
371 nres = fuse_send_write(req, file, inode, pos, count);
372 err = req->out.h.error;
373 fuse_put_request(fc, req);
374 if (!err && nres != count)
375 err = -EIO;
376 if (!err) {
377 pos += count;
378 if (pos > i_size_read(inode))
379 i_size_write(inode, pos);
380
381 if (offset == 0 && to == PAGE_CACHE_SIZE) {
382 clear_page_dirty(page);
383 SetPageUptodate(page);
384 }
385 }
386 fuse_invalidate_attr(inode);
387 return err;
388}
389
390static void fuse_release_user_pages(struct fuse_req *req, int write)
391{
392 unsigned i;
393
394 for (i = 0; i < req->num_pages; i++) {
395 struct page *page = req->pages[i];
396 if (write)
397 set_page_dirty_lock(page);
398 put_page(page);
399 }
400}
401
402static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
403 unsigned nbytes, int write)
404{
405 unsigned long user_addr = (unsigned long) buf;
406 unsigned offset = user_addr & ~PAGE_MASK;
407 int npages;
408
409 /* This doesn't work with nfsd */
410 if (!current->mm)
411 return -EPERM;
412
413 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
414 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
415 npages = min(npages, FUSE_MAX_PAGES_PER_REQ);
416 down_read(&current->mm->mmap_sem);
417 npages = get_user_pages(current, current->mm, user_addr, npages, write,
418 0, req->pages, NULL);
419 up_read(&current->mm->mmap_sem);
420 if (npages < 0)
421 return npages;
422
423 req->num_pages = npages;
424 req->page_offset = offset;
425 return 0;
426}
427
428static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
429 size_t count, loff_t *ppos, int write)
430{
431 struct inode *inode = file->f_dentry->d_inode;
432 struct fuse_conn *fc = get_fuse_conn(inode);
433 size_t nmax = write ? fc->max_write : fc->max_read;
434 loff_t pos = *ppos;
435 ssize_t res = 0;
436 struct fuse_req *req = fuse_get_request(fc);
437 if (!req)
438 return -EINTR;
439
440 while (count) {
441 size_t tmp;
442 size_t nres;
443 size_t nbytes = min(count, nmax);
444 int err = fuse_get_user_pages(req, buf, nbytes, !write);
445 if (err) {
446 res = err;
447 break;
448 }
449 tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset;
450 nbytes = min(nbytes, tmp);
451 if (write)
452 nres = fuse_send_write(req, file, inode, pos, nbytes);
453 else
454 nres = fuse_send_read(req, file, inode, pos, nbytes);
455 fuse_release_user_pages(req, !write);
456 if (req->out.h.error) {
457 if (!res)
458 res = req->out.h.error;
459 break;
460 } else if (nres > nbytes) {
461 res = -EIO;
462 break;
463 }
464 count -= nres;
465 res += nres;
466 pos += nres;
467 buf += nres;
468 if (nres != nbytes)
469 break;
470 if (count)
471 fuse_reset_request(req);
472 }
473 fuse_put_request(fc, req);
474 if (res > 0) {
475 if (write && pos > i_size_read(inode))
476 i_size_write(inode, pos);
477 *ppos = pos;
478 }
479 fuse_invalidate_attr(inode);
480
481 return res;
482}
483
484static ssize_t fuse_direct_read(struct file *file, char __user *buf,
485 size_t count, loff_t *ppos)
486{
487 return fuse_direct_io(file, buf, count, ppos, 0);
488}
489
490static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
491 size_t count, loff_t *ppos)
492{
493 struct inode *inode = file->f_dentry->d_inode;
494 ssize_t res;
495 /* Don't allow parallel writes to the same file */
496 down(&inode->i_sem);
497 res = fuse_direct_io(file, buf, count, ppos, 1);
498 up(&inode->i_sem);
499 return res;
500}
501
502static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
503{
504 if ((vma->vm_flags & VM_SHARED)) {
505 if ((vma->vm_flags & VM_WRITE))
506 return -ENODEV;
507 else
508 vma->vm_flags &= ~VM_MAYWRITE;
509 }
510 return generic_file_mmap(file, vma);
511}
512
513static int fuse_set_page_dirty(struct page *page)
514{
515 printk("fuse_set_page_dirty: should not happen\n");
516 dump_stack();
517 return 0;
518}
519
520static struct file_operations fuse_file_operations = {
521 .llseek = generic_file_llseek,
522 .read = generic_file_read,
523 .write = generic_file_write,
524 .mmap = fuse_file_mmap,
525 .open = fuse_open,
526 .flush = fuse_flush,
527 .release = fuse_release,
528 .fsync = fuse_fsync,
529 .sendfile = generic_file_sendfile,
530};
531
532static struct file_operations fuse_direct_io_file_operations = {
533 .llseek = generic_file_llseek,
534 .read = fuse_direct_read,
535 .write = fuse_direct_write,
536 .open = fuse_open,
537 .flush = fuse_flush,
538 .release = fuse_release,
539 .fsync = fuse_fsync,
540 /* no mmap and sendfile */
541};
542
543static struct address_space_operations fuse_file_aops = {
544 .readpage = fuse_readpage,
545 .prepare_write = fuse_prepare_write,
546 .commit_write = fuse_commit_write,
547 .readpages = fuse_readpages,
548 .set_page_dirty = fuse_set_page_dirty,
549};
550
551void fuse_init_file_inode(struct inode *inode)
552{
553 inode->i_fop = &fuse_file_operations;
554 inode->i_data.a_ops = &fuse_file_aops;
555}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
new file mode 100644
index 000000000000..24d761518d86
--- /dev/null
+++ b/fs/fuse/fuse_i.h
@@ -0,0 +1,451 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include <linux/fuse.h>
10#include <linux/fs.h>
11#include <linux/wait.h>
12#include <linux/list.h>
13#include <linux/spinlock.h>
14#include <linux/mm.h>
15#include <linux/backing-dev.h>
16#include <asm/semaphore.h>
17
18/** Max number of pages that can be used in a single read request */
19#define FUSE_MAX_PAGES_PER_REQ 32
20
21/** If more requests are outstanding, then the operation will block */
22#define FUSE_MAX_OUTSTANDING 10
23
24/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
25 module will check permissions based on the file mode. Otherwise no
26 permission checking is done in the kernel */
27#define FUSE_DEFAULT_PERMISSIONS (1 << 0)
28
29/** If the FUSE_ALLOW_OTHER flag is given, then not only the user
30 doing the mount will be allowed to access the filesystem */
31#define FUSE_ALLOW_OTHER (1 << 1)
32
33
34/** FUSE inode */
35struct fuse_inode {
36 /** Inode data */
37 struct inode inode;
38
39 /** Unique ID, which identifies the inode between userspace
40 * and kernel */
41 u64 nodeid;
42
43 /** Number of lookups on this inode */
44 u64 nlookup;
45
46 /** The request used for sending the FORGET message */
47 struct fuse_req *forget_req;
48
49 /** Time in jiffies until the file attributes are valid */
50 unsigned long i_time;
51};
52
53/** FUSE specific file data */
54struct fuse_file {
55 /** Request reserved for flush and release */
56 struct fuse_req *release_req;
57
58 /** File handle used by userspace */
59 u64 fh;
60};
61
62/** One input argument of a request */
63struct fuse_in_arg {
64 unsigned size;
65 const void *value;
66};
67
68/** The request input */
69struct fuse_in {
70 /** The request header */
71 struct fuse_in_header h;
72
73 /** True if the data for the last argument is in req->pages */
74 unsigned argpages:1;
75
76 /** Number of arguments */
77 unsigned numargs;
78
79 /** Array of arguments */
80 struct fuse_in_arg args[3];
81};
82
83/** One output argument of a request */
84struct fuse_arg {
85 unsigned size;
86 void *value;
87};
88
89/** The request output */
90struct fuse_out {
91 /** Header returned from userspace */
92 struct fuse_out_header h;
93
94 /** Last argument is variable length (can be shorter than
95 arg->size) */
96 unsigned argvar:1;
97
98 /** Last argument is a list of pages to copy data to */
99 unsigned argpages:1;
100
101 /** Zero partially or not copied pages */
102 unsigned page_zeroing:1;
103
104 /** Number or arguments */
105 unsigned numargs;
106
107 /** Array of arguments */
108 struct fuse_arg args[3];
109};
110
111struct fuse_req;
112struct fuse_conn;
113
114/**
115 * A request to the client
116 */
117struct fuse_req {
118 /** This can be on either unused_list, pending or processing
119 lists in fuse_conn */
120 struct list_head list;
121
122 /** Entry on the background list */
123 struct list_head bg_entry;
124
125 /** refcount */
126 atomic_t count;
127
128 /** True if the request has reply */
129 unsigned isreply:1;
130
131 /** The request is preallocated */
132 unsigned preallocated:1;
133
134 /** The request was interrupted */
135 unsigned interrupted:1;
136
137 /** Request is sent in the background */
138 unsigned background:1;
139
140 /** Data is being copied to/from the request */
141 unsigned locked:1;
142
143 /** Request has been sent to userspace */
144 unsigned sent:1;
145
146 /** The request is finished */
147 unsigned finished:1;
148
149 /** The request input */
150 struct fuse_in in;
151
152 /** The request output */
153 struct fuse_out out;
154
155 /** Used to wake up the task waiting for completion of request*/
156 wait_queue_head_t waitq;
157
158 /** Data for asynchronous requests */
159 union {
160 struct fuse_forget_in forget_in;
161 struct fuse_release_in release_in;
162 struct fuse_init_in_out init_in_out;
163 } misc;
164
165 /** page vector */
166 struct page *pages[FUSE_MAX_PAGES_PER_REQ];
167
168 /** number of pages in vector */
169 unsigned num_pages;
170
171 /** offset of data on first page */
172 unsigned page_offset;
173
174 /** Inode used in the request */
175 struct inode *inode;
176
177 /** Second inode used in the request (or NULL) */
178 struct inode *inode2;
179
180 /** File used in the request (or NULL) */
181 struct file *file;
182};
183
184/**
185 * A Fuse connection.
186 *
187 * This structure is created, when the filesystem is mounted, and is
188 * destroyed, when the client device is closed and the filesystem is
189 * unmounted.
190 */
191struct fuse_conn {
192 /** Reference count */
193 int count;
194
195 /** The user id for this mount */
196 uid_t user_id;
197
198 /** The group id for this mount */
199 gid_t group_id;
200
201 /** The fuse mount flags for this mount */
202 unsigned flags;
203
204 /** Maximum read size */
205 unsigned max_read;
206
207 /** Maximum write size */
208 unsigned max_write;
209
210 /** Readers of the connection are waiting on this */
211 wait_queue_head_t waitq;
212
213 /** The list of pending requests */
214 struct list_head pending;
215
216 /** The list of requests being processed */
217 struct list_head processing;
218
219 /** Requests put in the background (RELEASE or any other
220 interrupted request) */
221 struct list_head background;
222
223 /** Controls the maximum number of outstanding requests */
224 struct semaphore outstanding_sem;
225
226 /** This counts the number of outstanding requests if
227 outstanding_sem would go negative */
228 unsigned outstanding_debt;
229
230 /** RW semaphore for exclusion with fuse_put_super() */
231 struct rw_semaphore sbput_sem;
232
233 /** The list of unused requests */
234 struct list_head unused_list;
235
236 /** The next unique request id */
237 u64 reqctr;
238
239 /** Mount is active */
240 unsigned mounted : 1;
241
242 /** Connection established */
243 unsigned connected : 1;
244
245 /** Connection failed (version mismatch) */
246 unsigned conn_error : 1;
247
248 /** Is fsync not implemented by fs? */
249 unsigned no_fsync : 1;
250
251 /** Is fsyncdir not implemented by fs? */
252 unsigned no_fsyncdir : 1;
253
254 /** Is flush not implemented by fs? */
255 unsigned no_flush : 1;
256
257 /** Is setxattr not implemented by fs? */
258 unsigned no_setxattr : 1;
259
260 /** Is getxattr not implemented by fs? */
261 unsigned no_getxattr : 1;
262
263 /** Is listxattr not implemented by fs? */
264 unsigned no_listxattr : 1;
265
266 /** Is removexattr not implemented by fs? */
267 unsigned no_removexattr : 1;
268
269 /** Backing dev info */
270 struct backing_dev_info bdi;
271};
272
273static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb)
274{
275 return (struct fuse_conn **) &sb->s_fs_info;
276}
277
278static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
279{
280 return *get_fuse_conn_super_p(sb);
281}
282
283static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
284{
285 return get_fuse_conn_super(inode->i_sb);
286}
287
288static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
289{
290 return container_of(inode, struct fuse_inode, inode);
291}
292
293static inline u64 get_node_id(struct inode *inode)
294{
295 return get_fuse_inode(inode)->nodeid;
296}
297
298/** Device operations */
299extern struct file_operations fuse_dev_operations;
300
301/**
302 * This is the single global spinlock which protects FUSE's structures
303 *
304 * The following data is protected by this lock:
305 *
306 * - the private_data field of the device file
307 * - the s_fs_info field of the super block
308 * - unused_list, pending, processing lists in fuse_conn
309 * - background list in fuse_conn
310 * - the unique request ID counter reqctr in fuse_conn
311 * - the sb (super_block) field in fuse_conn
312 * - the file (device file) field in fuse_conn
313 */
314extern spinlock_t fuse_lock;
315
316/**
317 * Get a filled in inode
318 */
319struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
320 int generation, struct fuse_attr *attr);
321
322/**
323 * Send FORGET command
324 */
325void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
326 unsigned long nodeid, u64 nlookup);
327
328/**
329 * Send READ or READDIR request
330 */
331size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
332 struct inode *inode, loff_t pos, size_t count,
333 int isdir);
334
335/**
336 * Send OPEN or OPENDIR request
337 */
338int fuse_open_common(struct inode *inode, struct file *file, int isdir);
339
340/**
341 * Send RELEASE or RELEASEDIR request
342 */
343int fuse_release_common(struct inode *inode, struct file *file, int isdir);
344
345/**
346 * Send FSYNC or FSYNCDIR request
347 */
348int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
349 int isdir);
350
351/**
352 * Initialise file operations on a regular file
353 */
354void fuse_init_file_inode(struct inode *inode);
355
356/**
357 * Initialise inode operations on regular files and special files
358 */
359void fuse_init_common(struct inode *inode);
360
361/**
362 * Initialise inode and file operations on a directory
363 */
364void fuse_init_dir(struct inode *inode);
365
366/**
367 * Initialise inode operations on a symlink
368 */
369void fuse_init_symlink(struct inode *inode);
370
371/**
372 * Change attributes of an inode
373 */
374void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr);
375
376/**
377 * Check if the connection can be released, and if yes, then free the
378 * connection structure
379 */
380void fuse_release_conn(struct fuse_conn *fc);
381
382/**
383 * Initialize the client device
384 */
385int fuse_dev_init(void);
386
387/**
388 * Cleanup the client device
389 */
390void fuse_dev_cleanup(void);
391
392/**
393 * Allocate a request
394 */
395struct fuse_req *fuse_request_alloc(void);
396
397/**
398 * Free a request
399 */
400void fuse_request_free(struct fuse_req *req);
401
402/**
403 * Reinitialize a request, the preallocated flag is left unmodified
404 */
405void fuse_reset_request(struct fuse_req *req);
406
407/**
408 * Reserve a preallocated request
409 */
410struct fuse_req *fuse_get_request(struct fuse_conn *fc);
411
412/**
413 * Decrement reference count of a request. If count goes to zero put
414 * on unused list (preallocated) or free reqest (not preallocated).
415 */
416void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
417
418/**
419 * Send a request (synchronous)
420 */
421void request_send(struct fuse_conn *fc, struct fuse_req *req);
422
423/**
424 * Send a request with no reply
425 */
426void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
427
428/**
429 * Send a request in the background
430 */
431void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
432
433/**
434 * Release inodes and file assiciated with background request
435 */
436void fuse_release_background(struct fuse_req *req);
437
438/**
439 * Get the attributes of a file
440 */
441int fuse_do_getattr(struct inode *inode);
442
443/**
444 * Invalidate inode attributes
445 */
446void fuse_invalidate_attr(struct inode *inode);
447
448/**
449 * Send the INIT message
450 */
451void fuse_send_init(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
new file mode 100644
index 000000000000..e69a546844d0
--- /dev/null
+++ b/fs/fuse/inode.c
@@ -0,0 +1,591 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/slab.h>
13#include <linux/file.h>
14#include <linux/mount.h>
15#include <linux/seq_file.h>
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/parser.h>
19#include <linux/statfs.h>
20
21MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
22MODULE_DESCRIPTION("Filesystem in Userspace");
23MODULE_LICENSE("GPL");
24
25spinlock_t fuse_lock;
26static kmem_cache_t *fuse_inode_cachep;
27
28#define FUSE_SUPER_MAGIC 0x65735546
29
30struct fuse_mount_data {
31 int fd;
32 unsigned rootmode;
33 unsigned user_id;
34 unsigned group_id;
35 unsigned fd_present : 1;
36 unsigned rootmode_present : 1;
37 unsigned user_id_present : 1;
38 unsigned group_id_present : 1;
39 unsigned flags;
40 unsigned max_read;
41};
42
43static struct inode *fuse_alloc_inode(struct super_block *sb)
44{
45 struct inode *inode;
46 struct fuse_inode *fi;
47
48 inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL);
49 if (!inode)
50 return NULL;
51
52 fi = get_fuse_inode(inode);
53 fi->i_time = jiffies - 1;
54 fi->nodeid = 0;
55 fi->nlookup = 0;
56 fi->forget_req = fuse_request_alloc();
57 if (!fi->forget_req) {
58 kmem_cache_free(fuse_inode_cachep, inode);
59 return NULL;
60 }
61
62 return inode;
63}
64
65static void fuse_destroy_inode(struct inode *inode)
66{
67 struct fuse_inode *fi = get_fuse_inode(inode);
68 if (fi->forget_req)
69 fuse_request_free(fi->forget_req);
70 kmem_cache_free(fuse_inode_cachep, inode);
71}
72
73static void fuse_read_inode(struct inode *inode)
74{
75 /* No op */
76}
77
78void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
79 unsigned long nodeid, u64 nlookup)
80{
81 struct fuse_forget_in *inarg = &req->misc.forget_in;
82 inarg->nlookup = nlookup;
83 req->in.h.opcode = FUSE_FORGET;
84 req->in.h.nodeid = nodeid;
85 req->in.numargs = 1;
86 req->in.args[0].size = sizeof(struct fuse_forget_in);
87 req->in.args[0].value = inarg;
88 request_send_noreply(fc, req);
89}
90
91static void fuse_clear_inode(struct inode *inode)
92{
93 if (inode->i_sb->s_flags & MS_ACTIVE) {
94 struct fuse_conn *fc = get_fuse_conn(inode);
95 struct fuse_inode *fi = get_fuse_inode(inode);
96 fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
97 fi->forget_req = NULL;
98 }
99}
100
101void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
102{
103 if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
104 invalidate_inode_pages(inode->i_mapping);
105
106 inode->i_ino = attr->ino;
107 inode->i_mode = (inode->i_mode & S_IFMT) + (attr->mode & 07777);
108 inode->i_nlink = attr->nlink;
109 inode->i_uid = attr->uid;
110 inode->i_gid = attr->gid;
111 i_size_write(inode, attr->size);
112 inode->i_blksize = PAGE_CACHE_SIZE;
113 inode->i_blocks = attr->blocks;
114 inode->i_atime.tv_sec = attr->atime;
115 inode->i_atime.tv_nsec = attr->atimensec;
116 inode->i_mtime.tv_sec = attr->mtime;
117 inode->i_mtime.tv_nsec = attr->mtimensec;
118 inode->i_ctime.tv_sec = attr->ctime;
119 inode->i_ctime.tv_nsec = attr->ctimensec;
120}
121
122static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
123{
124 inode->i_mode = attr->mode & S_IFMT;
125 i_size_write(inode, attr->size);
126 if (S_ISREG(inode->i_mode)) {
127 fuse_init_common(inode);
128 fuse_init_file_inode(inode);
129 } else if (S_ISDIR(inode->i_mode))
130 fuse_init_dir(inode);
131 else if (S_ISLNK(inode->i_mode))
132 fuse_init_symlink(inode);
133 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
134 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
135 fuse_init_common(inode);
136 init_special_inode(inode, inode->i_mode,
137 new_decode_dev(attr->rdev));
138 } else {
139 /* Don't let user create weird files */
140 inode->i_mode = S_IFREG;
141 fuse_init_common(inode);
142 fuse_init_file_inode(inode);
143 }
144}
145
146static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
147{
148 unsigned long nodeid = *(unsigned long *) _nodeidp;
149 if (get_node_id(inode) == nodeid)
150 return 1;
151 else
152 return 0;
153}
154
155static int fuse_inode_set(struct inode *inode, void *_nodeidp)
156{
157 unsigned long nodeid = *(unsigned long *) _nodeidp;
158 get_fuse_inode(inode)->nodeid = nodeid;
159 return 0;
160}
161
162struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
163 int generation, struct fuse_attr *attr)
164{
165 struct inode *inode;
166 struct fuse_inode *fi;
167 struct fuse_conn *fc = get_fuse_conn_super(sb);
168 int retried = 0;
169
170 retry:
171 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
172 if (!inode)
173 return NULL;
174
175 if ((inode->i_state & I_NEW)) {
176 inode->i_flags |= S_NOATIME|S_NOCMTIME;
177 inode->i_generation = generation;
178 inode->i_data.backing_dev_info = &fc->bdi;
179 fuse_init_inode(inode, attr);
180 unlock_new_inode(inode);
181 } else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
182 BUG_ON(retried);
183 /* Inode has changed type, any I/O on the old should fail */
184 make_bad_inode(inode);
185 iput(inode);
186 retried = 1;
187 goto retry;
188 }
189
190 fi = get_fuse_inode(inode);
191 fi->nlookup ++;
192 fuse_change_attributes(inode, attr);
193 return inode;
194}
195
196static void fuse_put_super(struct super_block *sb)
197{
198 struct fuse_conn *fc = get_fuse_conn_super(sb);
199
200 down_write(&fc->sbput_sem);
201 while (!list_empty(&fc->background))
202 fuse_release_background(list_entry(fc->background.next,
203 struct fuse_req, bg_entry));
204
205 spin_lock(&fuse_lock);
206 fc->mounted = 0;
207 fc->user_id = 0;
208 fc->group_id = 0;
209 fc->flags = 0;
210 /* Flush all readers on this fs */
211 wake_up_all(&fc->waitq);
212 up_write(&fc->sbput_sem);
213 fuse_release_conn(fc);
214 spin_unlock(&fuse_lock);
215}
216
217static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
218{
219 stbuf->f_type = FUSE_SUPER_MAGIC;
220 stbuf->f_bsize = attr->bsize;
221 stbuf->f_blocks = attr->blocks;
222 stbuf->f_bfree = attr->bfree;
223 stbuf->f_bavail = attr->bavail;
224 stbuf->f_files = attr->files;
225 stbuf->f_ffree = attr->ffree;
226 stbuf->f_namelen = attr->namelen;
227 /* fsid is left zero */
228}
229
230static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
231{
232 struct fuse_conn *fc = get_fuse_conn_super(sb);
233 struct fuse_req *req;
234 struct fuse_statfs_out outarg;
235 int err;
236
237 req = fuse_get_request(fc);
238 if (!req)
239 return -EINTR;
240
241 req->in.numargs = 0;
242 req->in.h.opcode = FUSE_STATFS;
243 req->out.numargs = 1;
244 req->out.args[0].size = sizeof(outarg);
245 req->out.args[0].value = &outarg;
246 request_send(fc, req);
247 err = req->out.h.error;
248 if (!err)
249 convert_fuse_statfs(buf, &outarg.st);
250 fuse_put_request(fc, req);
251 return err;
252}
253
254enum {
255 OPT_FD,
256 OPT_ROOTMODE,
257 OPT_USER_ID,
258 OPT_GROUP_ID,
259 OPT_DEFAULT_PERMISSIONS,
260 OPT_ALLOW_OTHER,
261 OPT_MAX_READ,
262 OPT_ERR
263};
264
265static match_table_t tokens = {
266 {OPT_FD, "fd=%u"},
267 {OPT_ROOTMODE, "rootmode=%o"},
268 {OPT_USER_ID, "user_id=%u"},
269 {OPT_GROUP_ID, "group_id=%u"},
270 {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
271 {OPT_ALLOW_OTHER, "allow_other"},
272 {OPT_MAX_READ, "max_read=%u"},
273 {OPT_ERR, NULL}
274};
275
276static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
277{
278 char *p;
279 memset(d, 0, sizeof(struct fuse_mount_data));
280 d->max_read = ~0;
281
282 while ((p = strsep(&opt, ",")) != NULL) {
283 int token;
284 int value;
285 substring_t args[MAX_OPT_ARGS];
286 if (!*p)
287 continue;
288
289 token = match_token(p, tokens, args);
290 switch (token) {
291 case OPT_FD:
292 if (match_int(&args[0], &value))
293 return 0;
294 d->fd = value;
295 d->fd_present = 1;
296 break;
297
298 case OPT_ROOTMODE:
299 if (match_octal(&args[0], &value))
300 return 0;
301 d->rootmode = value;
302 d->rootmode_present = 1;
303 break;
304
305 case OPT_USER_ID:
306 if (match_int(&args[0], &value))
307 return 0;
308 d->user_id = value;
309 d->user_id_present = 1;
310 break;
311
312 case OPT_GROUP_ID:
313 if (match_int(&args[0], &value))
314 return 0;
315 d->group_id = value;
316 d->group_id_present = 1;
317 break;
318
319 case OPT_DEFAULT_PERMISSIONS:
320 d->flags |= FUSE_DEFAULT_PERMISSIONS;
321 break;
322
323 case OPT_ALLOW_OTHER:
324 d->flags |= FUSE_ALLOW_OTHER;
325 break;
326
327 case OPT_MAX_READ:
328 if (match_int(&args[0], &value))
329 return 0;
330 d->max_read = value;
331 break;
332
333 default:
334 return 0;
335 }
336 }
337
338 if (!d->fd_present || !d->rootmode_present ||
339 !d->user_id_present || !d->group_id_present)
340 return 0;
341
342 return 1;
343}
344
345static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
346{
347 struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
348
349 seq_printf(m, ",user_id=%u", fc->user_id);
350 seq_printf(m, ",group_id=%u", fc->group_id);
351 if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
352 seq_puts(m, ",default_permissions");
353 if (fc->flags & FUSE_ALLOW_OTHER)
354 seq_puts(m, ",allow_other");
355 if (fc->max_read != ~0)
356 seq_printf(m, ",max_read=%u", fc->max_read);
357 return 0;
358}
359
360static void free_conn(struct fuse_conn *fc)
361{
362 while (!list_empty(&fc->unused_list)) {
363 struct fuse_req *req;
364 req = list_entry(fc->unused_list.next, struct fuse_req, list);
365 list_del(&req->list);
366 fuse_request_free(req);
367 }
368 kfree(fc);
369}
370
371/* Must be called with the fuse lock held */
372void fuse_release_conn(struct fuse_conn *fc)
373{
374 fc->count--;
375 if (!fc->count)
376 free_conn(fc);
377}
378
379static struct fuse_conn *new_conn(void)
380{
381 struct fuse_conn *fc;
382
383 fc = kmalloc(sizeof(*fc), GFP_KERNEL);
384 if (fc != NULL) {
385 int i;
386 memset(fc, 0, sizeof(*fc));
387 init_waitqueue_head(&fc->waitq);
388 INIT_LIST_HEAD(&fc->pending);
389 INIT_LIST_HEAD(&fc->processing);
390 INIT_LIST_HEAD(&fc->unused_list);
391 INIT_LIST_HEAD(&fc->background);
392 sema_init(&fc->outstanding_sem, 0);
393 init_rwsem(&fc->sbput_sem);
394 for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
395 struct fuse_req *req = fuse_request_alloc();
396 if (!req) {
397 free_conn(fc);
398 return NULL;
399 }
400 list_add(&req->list, &fc->unused_list);
401 }
402 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
403 fc->bdi.unplug_io_fn = default_unplug_io_fn;
404 fc->reqctr = 0;
405 }
406 return fc;
407}
408
409static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
410{
411 struct fuse_conn *fc;
412
413 if (file->f_op != &fuse_dev_operations)
414 return ERR_PTR(-EINVAL);
415 fc = new_conn();
416 if (fc == NULL)
417 return ERR_PTR(-ENOMEM);
418 spin_lock(&fuse_lock);
419 if (file->private_data) {
420 free_conn(fc);
421 fc = ERR_PTR(-EINVAL);
422 } else {
423 file->private_data = fc;
424 *get_fuse_conn_super_p(sb) = fc;
425 fc->mounted = 1;
426 fc->connected = 1;
427 fc->count = 2;
428 }
429 spin_unlock(&fuse_lock);
430 return fc;
431}
432
433static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
434{
435 struct fuse_attr attr;
436 memset(&attr, 0, sizeof(attr));
437
438 attr.mode = mode;
439 attr.ino = FUSE_ROOT_ID;
440 return fuse_iget(sb, 1, 0, &attr);
441}
442
443static struct super_operations fuse_super_operations = {
444 .alloc_inode = fuse_alloc_inode,
445 .destroy_inode = fuse_destroy_inode,
446 .read_inode = fuse_read_inode,
447 .clear_inode = fuse_clear_inode,
448 .put_super = fuse_put_super,
449 .statfs = fuse_statfs,
450 .show_options = fuse_show_options,
451};
452
453static int fuse_fill_super(struct super_block *sb, void *data, int silent)
454{
455 struct fuse_conn *fc;
456 struct inode *root;
457 struct fuse_mount_data d;
458 struct file *file;
459 int err;
460
461 if (!parse_fuse_opt((char *) data, &d))
462 return -EINVAL;
463
464 sb->s_blocksize = PAGE_CACHE_SIZE;
465 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
466 sb->s_magic = FUSE_SUPER_MAGIC;
467 sb->s_op = &fuse_super_operations;
468 sb->s_maxbytes = MAX_LFS_FILESIZE;
469
470 file = fget(d.fd);
471 if (!file)
472 return -EINVAL;
473
474 fc = get_conn(file, sb);
475 fput(file);
476 if (IS_ERR(fc))
477 return PTR_ERR(fc);
478
479 fc->flags = d.flags;
480 fc->user_id = d.user_id;
481 fc->group_id = d.group_id;
482 fc->max_read = d.max_read;
483 if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
484 fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
485 fc->max_write = FUSE_MAX_IN / 2;
486
487 err = -ENOMEM;
488 root = get_root_inode(sb, d.rootmode);
489 if (root == NULL)
490 goto err;
491
492 sb->s_root = d_alloc_root(root);
493 if (!sb->s_root) {
494 iput(root);
495 goto err;
496 }
497 fuse_send_init(fc);
498 return 0;
499
500 err:
501 spin_lock(&fuse_lock);
502 fuse_release_conn(fc);
503 spin_unlock(&fuse_lock);
504 return err;
505}
506
507static struct super_block *fuse_get_sb(struct file_system_type *fs_type,
508 int flags, const char *dev_name,
509 void *raw_data)
510{
511 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super);
512}
513
514static struct file_system_type fuse_fs_type = {
515 .owner = THIS_MODULE,
516 .name = "fuse",
517 .get_sb = fuse_get_sb,
518 .kill_sb = kill_anon_super,
519};
520
521static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
522 unsigned long flags)
523{
524 struct inode * inode = foo;
525
526 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
527 SLAB_CTOR_CONSTRUCTOR)
528 inode_init_once(inode);
529}
530
531static int __init fuse_fs_init(void)
532{
533 int err;
534
535 err = register_filesystem(&fuse_fs_type);
536 if (err)
537 printk("fuse: failed to register filesystem\n");
538 else {
539 fuse_inode_cachep = kmem_cache_create("fuse_inode",
540 sizeof(struct fuse_inode),
541 0, SLAB_HWCACHE_ALIGN,
542 fuse_inode_init_once, NULL);
543 if (!fuse_inode_cachep) {
544 unregister_filesystem(&fuse_fs_type);
545 err = -ENOMEM;
546 }
547 }
548
549 return err;
550}
551
552static void fuse_fs_cleanup(void)
553{
554 unregister_filesystem(&fuse_fs_type);
555 kmem_cache_destroy(fuse_inode_cachep);
556}
557
558static int __init fuse_init(void)
559{
560 int res;
561
562 printk("fuse init (API version %i.%i)\n",
563 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
564
565 spin_lock_init(&fuse_lock);
566 res = fuse_fs_init();
567 if (res)
568 goto err;
569
570 res = fuse_dev_init();
571 if (res)
572 goto err_fs_cleanup;
573
574 return 0;
575
576 err_fs_cleanup:
577 fuse_fs_cleanup();
578 err:
579 return res;
580}
581
582static void __exit fuse_exit(void)
583{
584 printk(KERN_DEBUG "fuse exit\n");
585
586 fuse_fs_cleanup();
587 fuse_dev_cleanup();
588}
589
590module_init(fuse_init);
591module_exit(fuse_exit);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b2d18200a003..59c5062cd63f 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -284,6 +284,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
284 284
285static void hostfs_delete_inode(struct inode *inode) 285static void hostfs_delete_inode(struct inode *inode)
286{ 286{
287 truncate_inode_pages(&inode->i_data, 0);
287 if(HOSTFS_I(inode)->fd != -1) { 288 if(HOSTFS_I(inode)->fd != -1) {
288 close_file(&HOSTFS_I(inode)->fd); 289 close_file(&HOSTFS_I(inode)->fd);
289 HOSTFS_I(inode)->fd = -1; 290 HOSTFS_I(inode)->fd = -1;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 38b1741fa539..e3d17e9ea6c1 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -284,6 +284,7 @@ void hpfs_write_if_changed(struct inode *inode)
284 284
285void hpfs_delete_inode(struct inode *inode) 285void hpfs_delete_inode(struct inode *inode)
286{ 286{
287 truncate_inode_pages(&inode->i_data, 0);
287 lock_kernel(); 288 lock_kernel();
288 hpfs_remove_fnode(inode->i_sb, inode->i_ino); 289 hpfs_remove_fnode(inode->i_sb, inode->i_ino);
289 unlock_kernel(); 290 unlock_kernel();
diff --git a/fs/inode.c b/fs/inode.c
index 71df1b1e8f75..f80a79ff156b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1034,19 +1034,21 @@ void generic_delete_inode(struct inode *inode)
1034 inodes_stat.nr_inodes--; 1034 inodes_stat.nr_inodes--;
1035 spin_unlock(&inode_lock); 1035 spin_unlock(&inode_lock);
1036 1036
1037 if (inode->i_data.nrpages)
1038 truncate_inode_pages(&inode->i_data, 0);
1039
1040 security_inode_delete(inode); 1037 security_inode_delete(inode);
1041 1038
1042 if (op->delete_inode) { 1039 if (op->delete_inode) {
1043 void (*delete)(struct inode *) = op->delete_inode; 1040 void (*delete)(struct inode *) = op->delete_inode;
1044 if (!is_bad_inode(inode)) 1041 if (!is_bad_inode(inode))
1045 DQUOT_INIT(inode); 1042 DQUOT_INIT(inode);
1046 /* s_op->delete_inode internally recalls clear_inode() */ 1043 /* Filesystems implementing their own
1044 * s_op->delete_inode are required to call
1045 * truncate_inode_pages and clear_inode()
1046 * internally */
1047 delete(inode); 1047 delete(inode);
1048 } else 1048 } else {
1049 truncate_inode_pages(&inode->i_data, 0);
1049 clear_inode(inode); 1050 clear_inode(inode);
1051 }
1050 spin_lock(&inode_lock); 1052 spin_lock(&inode_lock);
1051 hlist_del_init(&inode->i_hash); 1053 hlist_del_init(&inode->i_hash);
1052 spin_unlock(&inode_lock); 1054 spin_unlock(&inode_lock);
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 777b90057b89..3dcc6d2162cb 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1744,6 +1744,7 @@ jffs_delete_inode(struct inode *inode)
1744 D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n", 1744 D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n",
1745 inode->i_ino)); 1745 inode->i_ino));
1746 1746
1747 truncate_inode_pages(&inode->i_data, 0);
1747 lock_kernel(); 1748 lock_kernel();
1748 inode->i_size = 0; 1749 inode->i_size = 0;
1749 inode->i_blocks = 0; 1750 inode->i_blocks = 0;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 767c7ecb429e..cff352f4ec18 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -132,6 +132,8 @@ void jfs_delete_inode(struct inode *inode)
132 (JFS_IP(inode)->fileset != cpu_to_le32(FILESYSTEM_I))) 132 (JFS_IP(inode)->fileset != cpu_to_le32(FILESYSTEM_I)))
133 return; 133 return;
134 134
135 truncate_inode_pages(&inode->i_data, 0);
136
135 if (test_cflag(COMMIT_Freewmap, inode)) 137 if (test_cflag(COMMIT_Freewmap, inode))
136 jfs_free_zero_link(inode); 138 jfs_free_zero_link(inode);
137 139
diff --git a/fs/locks.c b/fs/locks.c
index 11956b6179ff..c2c09b4798d6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2198,21 +2198,23 @@ void steal_locks(fl_owner_t from)
2198{ 2198{
2199 struct files_struct *files = current->files; 2199 struct files_struct *files = current->files;
2200 int i, j; 2200 int i, j;
2201 struct fdtable *fdt;
2201 2202
2202 if (from == files) 2203 if (from == files)
2203 return; 2204 return;
2204 2205
2205 lock_kernel(); 2206 lock_kernel();
2206 j = 0; 2207 j = 0;
2208 fdt = files_fdtable(files);
2207 for (;;) { 2209 for (;;) {
2208 unsigned long set; 2210 unsigned long set;
2209 i = j * __NFDBITS; 2211 i = j * __NFDBITS;
2210 if (i >= files->max_fdset || i >= files->max_fds) 2212 if (i >= fdt->max_fdset || i >= fdt->max_fds)
2211 break; 2213 break;
2212 set = files->open_fds->fds_bits[j++]; 2214 set = fdt->open_fds->fds_bits[j++];
2213 while (set) { 2215 while (set) {
2214 if (set & 1) { 2216 if (set & 1) {
2215 struct file *file = files->fd[i]; 2217 struct file *file = fdt->fd[i];
2216 if (file) 2218 if (file)
2217 __steal_locks(file, from); 2219 __steal_locks(file, from);
2218 } 2220 }
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 3f18c21198d7..790cc0d0e970 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -24,6 +24,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data);
24 24
25static void minix_delete_inode(struct inode *inode) 25static void minix_delete_inode(struct inode *inode)
26{ 26{
27 truncate_inode_pages(&inode->i_data, 0);
27 inode->i_size = 0; 28 inode->i_size = 0;
28 minix_truncate(inode); 29 minix_truncate(inode);
29 minix_free_inode(inode); 30 minix_free_inode(inode);
diff --git a/fs/namei.c b/fs/namei.c
index 145e852c4bd0..21d85f1ac839 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1316,10 +1316,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1316 return error; 1316 return error;
1317 DQUOT_INIT(dir); 1317 DQUOT_INIT(dir);
1318 error = dir->i_op->create(dir, dentry, mode, nd); 1318 error = dir->i_op->create(dir, dentry, mode, nd);
1319 if (!error) { 1319 if (!error)
1320 fsnotify_create(dir, dentry->d_name.name); 1320 fsnotify_create(dir, dentry->d_name.name);
1321 security_inode_post_create(dir, dentry, mode);
1322 }
1323 return error; 1321 return error;
1324} 1322}
1325 1323
@@ -1635,10 +1633,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1635 1633
1636 DQUOT_INIT(dir); 1634 DQUOT_INIT(dir);
1637 error = dir->i_op->mknod(dir, dentry, mode, dev); 1635 error = dir->i_op->mknod(dir, dentry, mode, dev);
1638 if (!error) { 1636 if (!error)
1639 fsnotify_create(dir, dentry->d_name.name); 1637 fsnotify_create(dir, dentry->d_name.name);
1640 security_inode_post_mknod(dir, dentry, mode, dev);
1641 }
1642 return error; 1638 return error;
1643} 1639}
1644 1640
@@ -1708,10 +1704,8 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1708 1704
1709 DQUOT_INIT(dir); 1705 DQUOT_INIT(dir);
1710 error = dir->i_op->mkdir(dir, dentry, mode); 1706 error = dir->i_op->mkdir(dir, dentry, mode);
1711 if (!error) { 1707 if (!error)
1712 fsnotify_mkdir(dir, dentry->d_name.name); 1708 fsnotify_mkdir(dir, dentry->d_name.name);
1713 security_inode_post_mkdir(dir,dentry, mode);
1714 }
1715 return error; 1709 return error;
1716} 1710}
1717 1711
@@ -1947,10 +1941,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
1947 1941
1948 DQUOT_INIT(dir); 1942 DQUOT_INIT(dir);
1949 error = dir->i_op->symlink(dir, dentry, oldname); 1943 error = dir->i_op->symlink(dir, dentry, oldname);
1950 if (!error) { 1944 if (!error)
1951 fsnotify_create(dir, dentry->d_name.name); 1945 fsnotify_create(dir, dentry->d_name.name);
1952 security_inode_post_symlink(dir, dentry, oldname);
1953 }
1954 return error; 1946 return error;
1955} 1947}
1956 1948
@@ -2020,10 +2012,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2020 DQUOT_INIT(dir); 2012 DQUOT_INIT(dir);
2021 error = dir->i_op->link(old_dentry, dir, new_dentry); 2013 error = dir->i_op->link(old_dentry, dir, new_dentry);
2022 up(&old_dentry->d_inode->i_sem); 2014 up(&old_dentry->d_inode->i_sem);
2023 if (!error) { 2015 if (!error)
2024 fsnotify_create(dir, new_dentry->d_name.name); 2016 fsnotify_create(dir, new_dentry->d_name.name);
2025 security_inode_post_link(old_dentry, dir, new_dentry);
2026 }
2027 return error; 2017 return error;
2028} 2018}
2029 2019
@@ -2142,11 +2132,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2142 d_rehash(new_dentry); 2132 d_rehash(new_dentry);
2143 dput(new_dentry); 2133 dput(new_dentry);
2144 } 2134 }
2145 if (!error) { 2135 if (!error)
2146 d_move(old_dentry,new_dentry); 2136 d_move(old_dentry,new_dentry);
2147 security_inode_post_rename(old_dir, old_dentry,
2148 new_dir, new_dentry);
2149 }
2150 return error; 2137 return error;
2151} 2138}
2152 2139
@@ -2172,7 +2159,6 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2172 /* The following d_move() should become unconditional */ 2159 /* The following d_move() should become unconditional */
2173 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) 2160 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
2174 d_move(old_dentry, new_dentry); 2161 d_move(old_dentry, new_dentry);
2175 security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry);
2176 } 2162 }
2177 if (target) 2163 if (target)
2178 up(&target->i_sem); 2164 up(&target->i_sem);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 44795d2f4b30..8c8839203cd5 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -286,6 +286,8 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
286static void 286static void
287ncp_delete_inode(struct inode *inode) 287ncp_delete_inode(struct inode *inode)
288{ 288{
289 truncate_inode_pages(&inode->i_data, 0);
290
289 if (S_ISDIR(inode->i_mode)) { 291 if (S_ISDIR(inode->i_mode)) {
290 DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); 292 DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino);
291 } 293 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 541b418327c8..6922469d6fc5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -146,6 +146,8 @@ nfs_delete_inode(struct inode * inode)
146{ 146{
147 dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); 147 dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
148 148
149 truncate_inode_pages(&inode->i_data, 0);
150
149 nfs_wb_all(inode); 151 nfs_wb_all(inode);
150 /* 152 /*
151 * The following should never happen... 153 * The following should never happen...
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9eecc9939dfe..e4fd6134244d 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -22,6 +22,76 @@ ToDo/Notes:
22 - Enable the code for setting the NT4 compatibility flag when we start 22 - Enable the code for setting the NT4 compatibility flag when we start
23 making NTFS 1.2 specific modifications. 23 making NTFS 1.2 specific modifications.
24 24
252.1.24 - Lots of bug fixes and support more clean journal states.
26
27 - Support journals ($LogFile) which have been modified by chkdsk. This
28 means users can boot into Windows after we marked the volume dirty.
29 The Windows boot will run chkdsk and then reboot. The user can then
30 immediately boot into Linux rather than having to do a full Windows
31 boot first before rebooting into Linux and we will recognize such a
32 journal and empty it as it is clean by definition.
33 - Support journals ($LogFile) with only one restart page as well as
34 journals with two different restart pages. We sanity check both and
35 either use the only sane one or the more recent one of the two in the
36 case that both are valid.
37 - Modify fs/ntfs/malloc.h::ntfs_malloc_nofs() to do the kmalloc() based
38 allocations with __GFP_HIGHMEM, analogous to how the vmalloc() based
39 allocations are done.
40 - Add fs/ntfs/malloc.h::ntfs_malloc_nofs_nofail() which is analogous to
41 ntfs_malloc_nofs() but it performs allocations with __GFP_NOFAIL and
42 hence cannot fail.
43 - Use ntfs_malloc_nofs_nofail() in the two critical regions in
44 fs/ntfs/runlist.c::ntfs_runlists_merge(). This means we no longer
45 need to panic() if the allocation fails as it now cannot fail.
46 - Fix two nasty runlist merging bugs that had gone unnoticed so far.
47 Thanks to Stefano Picerno for the bug report.
48 - Remove two bogus BUG_ON()s from fs/ntfs/mft.c.
49 - Fix handling of valid but empty mapping pairs array in
50 fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress().
51 - Report unrepresentable inodes during ntfs_readdir() as KERN_WARNING
52 messages and include the inode number. Thanks to Yura Pakhuchiy for
53 pointing this out.
54 - Change ntfs_rl_truncate_nolock() to throw away the runlist if the new
55 length is zero.
56 - Add runlist.[hc]::ntfs_rl_punch_nolock() which punches a caller
57 specified hole into a runlist.
58 - Fix a bug in fs/ntfs/index.c::ntfs_index_lookup(). When the returned
59 index entry is in the index root, we forgot to set the @ir pointer in
60 the index context. Thanks to Yura Pakhuchiy for finding this bug.
61 - Remove bogus setting of PageError in ntfs_read_compressed_block().
62 - Add fs/ntfs/attrib.[hc]::ntfs_resident_attr_value_resize().
63 - Fix a bug in ntfs_map_runlist_nolock() where we forgot to protect
64 access to the allocated size in the ntfs inode with the size lock.
65 - Fix ntfs_attr_vcn_to_lcn_nolock() and ntfs_attr_find_vcn_nolock() to
66 return LCN_ENOENT when there is no runlist and the allocated size is
67 zero.
68 - Fix load_attribute_list() to handle the case of a NULL runlist.
69 - Fix handling of sparse attributes in ntfs_attr_make_non_resident().
70 - Add BUG() checks to ntfs_attr_make_non_resident() and ntfs_attr_set()
71 to ensure that these functions are never called for compressed or
72 encrypted attributes.
73 - Fix cluster (de)allocators to work when the runlist is NULL and more
74 importantly to take a locked runlist rather than them locking it
75 which leads to lock reversal.
76 - Truncate {a,c,m}time to the ntfs supported time granularity when
77 updating the times in the inode in ntfs_setattr().
78 - Fixup handling of sparse, compressed, and encrypted attributes in
79 fs/ntfs/inode.c::ntfs_read_locked_{,attr_,index_}inode(),
80 fs/ntfs/aops.c::ntfs_{read,write}page().
81 - Make ntfs_write_block() not instantiate sparse blocks if they contain
82 only zeroes.
83 - Optimize fs/ntfs/aops.c::ntfs_write_block() by extending the page
84 lock protection over the buffer submission for i/o which allows the
85 removal of the get_bh()/put_bh() pairs for each buffer.
86 - Fix fs/ntfs/aops.c::ntfs_{read,write}_block() to handle the case
87 where a concurrent truncate has truncated the runlist under our feet.
88 - Fix page_has_buffers()/page_buffers() handling in fs/ntfs/aops.c.
89 - In fs/ntfs/aops.c::ntfs_end_buffer_async_read(), use a bit spin lock
90 in the first buffer head instead of a driver global spin lock to
91 improve scalability.
92 - Minor fix to error handling and error message display in
93 fs/ntfs/aops.c::ntfs_prepare_nonresident_write().
94
252.1.23 - Implement extension of resident files and make writing safe as well as 952.1.23 - Implement extension of resident files and make writing safe as well as
26 many bug fixes, cleanups, and enhancements... 96 many bug fixes, cleanups, and enhancements...
27 97
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index f083f27d8b69..894b2b876d35 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\" 9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\"
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ifeq ($(CONFIG_NTFS_DEBUG),y)
12EXTRA_CFLAGS += -DDEBUG 12EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 78adad7a988d..545236414d59 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -55,9 +55,8 @@
55 */ 55 */
56static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) 56static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
57{ 57{
58 static DEFINE_SPINLOCK(page_uptodate_lock);
59 unsigned long flags; 58 unsigned long flags;
60 struct buffer_head *tmp; 59 struct buffer_head *first, *tmp;
61 struct page *page; 60 struct page *page;
62 ntfs_inode *ni; 61 ntfs_inode *ni;
63 int page_uptodate = 1; 62 int page_uptodate = 1;
@@ -89,11 +88,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
89 } 88 }
90 } else { 89 } else {
91 clear_buffer_uptodate(bh); 90 clear_buffer_uptodate(bh);
91 SetPageError(page);
92 ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", 92 ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
93 (unsigned long long)bh->b_blocknr); 93 (unsigned long long)bh->b_blocknr);
94 SetPageError(page);
95 } 94 }
96 spin_lock_irqsave(&page_uptodate_lock, flags); 95 first = page_buffers(page);
96 local_irq_save(flags);
97 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
97 clear_buffer_async_read(bh); 98 clear_buffer_async_read(bh);
98 unlock_buffer(bh); 99 unlock_buffer(bh);
99 tmp = bh; 100 tmp = bh;
@@ -108,7 +109,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
108 } 109 }
109 tmp = tmp->b_this_page; 110 tmp = tmp->b_this_page;
110 } while (tmp != bh); 111 } while (tmp != bh);
111 spin_unlock_irqrestore(&page_uptodate_lock, flags); 112 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
113 local_irq_restore(flags);
112 /* 114 /*
113 * If none of the buffers had errors then we can set the page uptodate, 115 * If none of the buffers had errors then we can set the page uptodate,
114 * but we first have to perform the post read mst fixups, if the 116 * but we first have to perform the post read mst fixups, if the
@@ -141,7 +143,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
141 unlock_page(page); 143 unlock_page(page);
142 return; 144 return;
143still_busy: 145still_busy:
144 spin_unlock_irqrestore(&page_uptodate_lock, flags); 146 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
147 local_irq_restore(flags);
145 return; 148 return;
146} 149}
147 150
@@ -185,13 +188,15 @@ static int ntfs_read_block(struct page *page)
185 blocksize_bits = VFS_I(ni)->i_blkbits; 188 blocksize_bits = VFS_I(ni)->i_blkbits;
186 blocksize = 1 << blocksize_bits; 189 blocksize = 1 << blocksize_bits;
187 190
188 if (!page_has_buffers(page)) 191 if (!page_has_buffers(page)) {
189 create_empty_buffers(page, blocksize, 0); 192 create_empty_buffers(page, blocksize, 0);
190 bh = head = page_buffers(page); 193 if (unlikely(!page_has_buffers(page))) {
191 if (unlikely(!bh)) { 194 unlock_page(page);
192 unlock_page(page); 195 return -ENOMEM;
193 return -ENOMEM; 196 }
194 } 197 }
198 bh = head = page_buffers(page);
199 BUG_ON(!bh);
195 200
196 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 201 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
197 read_lock_irqsave(&ni->size_lock, flags); 202 read_lock_irqsave(&ni->size_lock, flags);
@@ -204,6 +209,7 @@ static int ntfs_read_block(struct page *page)
204 nr = i = 0; 209 nr = i = 0;
205 do { 210 do {
206 u8 *kaddr; 211 u8 *kaddr;
212 int err;
207 213
208 if (unlikely(buffer_uptodate(bh))) 214 if (unlikely(buffer_uptodate(bh)))
209 continue; 215 continue;
@@ -211,6 +217,7 @@ static int ntfs_read_block(struct page *page)
211 arr[nr++] = bh; 217 arr[nr++] = bh;
212 continue; 218 continue;
213 } 219 }
220 err = 0;
214 bh->b_bdev = vol->sb->s_bdev; 221 bh->b_bdev = vol->sb->s_bdev;
215 /* Is the block within the allowed limits? */ 222 /* Is the block within the allowed limits? */
216 if (iblock < lblock) { 223 if (iblock < lblock) {
@@ -252,7 +259,6 @@ lock_retry_remap:
252 goto handle_hole; 259 goto handle_hole;
253 /* If first try and runlist unmapped, map and retry. */ 260 /* If first try and runlist unmapped, map and retry. */
254 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { 261 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
255 int err;
256 is_retry = TRUE; 262 is_retry = TRUE;
257 /* 263 /*
258 * Attempt to map runlist, dropping lock for 264 * Attempt to map runlist, dropping lock for
@@ -263,20 +269,30 @@ lock_retry_remap:
263 if (likely(!err)) 269 if (likely(!err))
264 goto lock_retry_remap; 270 goto lock_retry_remap;
265 rl = NULL; 271 rl = NULL;
266 lcn = err;
267 } else if (!rl) 272 } else if (!rl)
268 up_read(&ni->runlist.lock); 273 up_read(&ni->runlist.lock);
274 /*
275 * If buffer is outside the runlist, treat it as a
276 * hole. This can happen due to concurrent truncate
277 * for example.
278 */
279 if (err == -ENOENT || lcn == LCN_ENOENT) {
280 err = 0;
281 goto handle_hole;
282 }
269 /* Hard error, zero out region. */ 283 /* Hard error, zero out region. */
284 if (!err)
285 err = -EIO;
270 bh->b_blocknr = -1; 286 bh->b_blocknr = -1;
271 SetPageError(page); 287 SetPageError(page);
272 ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " 288 ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
273 "attribute type 0x%x, vcn 0x%llx, " 289 "attribute type 0x%x, vcn 0x%llx, "
274 "offset 0x%x because its location on " 290 "offset 0x%x because its location on "
275 "disk could not be determined%s " 291 "disk could not be determined%s "
276 "(error code %lli).", ni->mft_no, 292 "(error code %i).", ni->mft_no,
277 ni->type, (unsigned long long)vcn, 293 ni->type, (unsigned long long)vcn,
278 vcn_ofs, is_retry ? " even after " 294 vcn_ofs, is_retry ? " even after "
279 "retrying" : "", (long long)lcn); 295 "retrying" : "", err);
280 } 296 }
281 /* 297 /*
282 * Either iblock was outside lblock limits or 298 * Either iblock was outside lblock limits or
@@ -289,9 +305,10 @@ handle_hole:
289handle_zblock: 305handle_zblock:
290 kaddr = kmap_atomic(page, KM_USER0); 306 kaddr = kmap_atomic(page, KM_USER0);
291 memset(kaddr + i * blocksize, 0, blocksize); 307 memset(kaddr + i * blocksize, 0, blocksize);
292 flush_dcache_page(page);
293 kunmap_atomic(kaddr, KM_USER0); 308 kunmap_atomic(kaddr, KM_USER0);
294 set_buffer_uptodate(bh); 309 flush_dcache_page(page);
310 if (likely(!err))
311 set_buffer_uptodate(bh);
295 } while (i++, iblock++, (bh = bh->b_this_page) != head); 312 } while (i++, iblock++, (bh = bh->b_this_page) != head);
296 313
297 /* Release the lock if we took it. */ 314 /* Release the lock if we took it. */
@@ -367,31 +384,38 @@ retry_readpage:
367 return 0; 384 return 0;
368 } 385 }
369 ni = NTFS_I(page->mapping->host); 386 ni = NTFS_I(page->mapping->host);
370 387 /*
388 * Only $DATA attributes can be encrypted and only unnamed $DATA
389 * attributes can be compressed. Index root can have the flags set but
390 * this means to create compressed/encrypted files, not that the
391 * attribute is compressed/encrypted.
392 */
393 if (ni->type != AT_INDEX_ROOT) {
394 /* If attribute is encrypted, deny access, just like NT4. */
395 if (NInoEncrypted(ni)) {
396 BUG_ON(ni->type != AT_DATA);
397 err = -EACCES;
398 goto err_out;
399 }
400 /* Compressed data streams are handled in compress.c. */
401 if (NInoNonResident(ni) && NInoCompressed(ni)) {
402 BUG_ON(ni->type != AT_DATA);
403 BUG_ON(ni->name_len);
404 return ntfs_read_compressed_block(page);
405 }
406 }
371 /* NInoNonResident() == NInoIndexAllocPresent() */ 407 /* NInoNonResident() == NInoIndexAllocPresent() */
372 if (NInoNonResident(ni)) { 408 if (NInoNonResident(ni)) {
373 /* 409 /* Normal, non-resident data stream. */
374 * Only unnamed $DATA attributes can be compressed or
375 * encrypted.
376 */
377 if (ni->type == AT_DATA && !ni->name_len) {
378 /* If file is encrypted, deny access, just like NT4. */
379 if (NInoEncrypted(ni)) {
380 err = -EACCES;
381 goto err_out;
382 }
383 /* Compressed data streams are handled in compress.c. */
384 if (NInoCompressed(ni))
385 return ntfs_read_compressed_block(page);
386 }
387 /* Normal data stream. */
388 return ntfs_read_block(page); 410 return ntfs_read_block(page);
389 } 411 }
390 /* 412 /*
391 * Attribute is resident, implying it is not compressed or encrypted. 413 * Attribute is resident, implying it is not compressed or encrypted.
392 * This also means the attribute is smaller than an mft record and 414 * This also means the attribute is smaller than an mft record and
393 * hence smaller than a page, so can simply zero out any pages with 415 * hence smaller than a page, so can simply zero out any pages with
394 * index above 0. 416 * index above 0. Note the attribute can actually be marked compressed
417 * but if it is resident the actual data is not compressed so we are
418 * ok to ignore the compressed flag here.
395 */ 419 */
396 if (unlikely(page->index > 0)) { 420 if (unlikely(page->index > 0)) {
397 kaddr = kmap_atomic(page, KM_USER0); 421 kaddr = kmap_atomic(page, KM_USER0);
@@ -511,19 +535,21 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
511 BUG_ON(!PageUptodate(page)); 535 BUG_ON(!PageUptodate(page));
512 create_empty_buffers(page, blocksize, 536 create_empty_buffers(page, blocksize,
513 (1 << BH_Uptodate) | (1 << BH_Dirty)); 537 (1 << BH_Uptodate) | (1 << BH_Dirty));
538 if (unlikely(!page_has_buffers(page))) {
539 ntfs_warning(vol->sb, "Error allocating page "
540 "buffers. Redirtying page so we try "
541 "again later.");
542 /*
543 * Put the page back on mapping->dirty_pages, but leave
544 * its buffers' dirty state as-is.
545 */
546 redirty_page_for_writepage(wbc, page);
547 unlock_page(page);
548 return 0;
549 }
514 } 550 }
515 bh = head = page_buffers(page); 551 bh = head = page_buffers(page);
516 if (unlikely(!bh)) { 552 BUG_ON(!bh);
517 ntfs_warning(vol->sb, "Error allocating page buffers. "
518 "Redirtying page so we try again later.");
519 /*
520 * Put the page back on mapping->dirty_pages, but leave its
521 * buffer's dirty state as-is.
522 */
523 redirty_page_for_writepage(wbc, page);
524 unlock_page(page);
525 return 0;
526 }
527 553
528 /* NOTE: Different naming scheme to ntfs_read_block()! */ 554 /* NOTE: Different naming scheme to ntfs_read_block()! */
529 555
@@ -670,6 +696,27 @@ lock_retry_remap:
670 } 696 }
671 /* It is a hole, need to instantiate it. */ 697 /* It is a hole, need to instantiate it. */
672 if (lcn == LCN_HOLE) { 698 if (lcn == LCN_HOLE) {
699 u8 *kaddr;
700 unsigned long *bpos, *bend;
701
702 /* Check if the buffer is zero. */
703 kaddr = kmap_atomic(page, KM_USER0);
704 bpos = (unsigned long *)(kaddr + bh_offset(bh));
705 bend = (unsigned long *)((u8*)bpos + blocksize);
706 do {
707 if (unlikely(*bpos))
708 break;
709 } while (likely(++bpos < bend));
710 kunmap_atomic(kaddr, KM_USER0);
711 if (bpos == bend) {
712 /*
713 * Buffer is zero and sparse, no need to write
714 * it.
715 */
716 bh->b_blocknr = -1;
717 clear_buffer_dirty(bh);
718 continue;
719 }
673 // TODO: Instantiate the hole. 720 // TODO: Instantiate the hole.
674 // clear_buffer_new(bh); 721 // clear_buffer_new(bh);
675 // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); 722 // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
@@ -690,20 +737,37 @@ lock_retry_remap:
690 if (likely(!err)) 737 if (likely(!err))
691 goto lock_retry_remap; 738 goto lock_retry_remap;
692 rl = NULL; 739 rl = NULL;
693 lcn = err;
694 } else if (!rl) 740 } else if (!rl)
695 up_read(&ni->runlist.lock); 741 up_read(&ni->runlist.lock);
742 /*
743 * If buffer is outside the runlist, truncate has cut it out
744 * of the runlist. Just clean and clear the buffer and set it
745 * uptodate so it can get discarded by the VM.
746 */
747 if (err == -ENOENT || lcn == LCN_ENOENT) {
748 u8 *kaddr;
749
750 bh->b_blocknr = -1;
751 clear_buffer_dirty(bh);
752 kaddr = kmap_atomic(page, KM_USER0);
753 memset(kaddr + bh_offset(bh), 0, blocksize);
754 kunmap_atomic(kaddr, KM_USER0);
755 flush_dcache_page(page);
756 set_buffer_uptodate(bh);
757 err = 0;
758 continue;
759 }
696 /* Failed to map the buffer, even after retrying. */ 760 /* Failed to map the buffer, even after retrying. */
761 if (!err)
762 err = -EIO;
697 bh->b_blocknr = -1; 763 bh->b_blocknr = -1;
698 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " 764 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
699 "attribute type 0x%x, vcn 0x%llx, offset 0x%x " 765 "attribute type 0x%x, vcn 0x%llx, offset 0x%x "
700 "because its location on disk could not be " 766 "because its location on disk could not be "
701 "determined%s (error code %lli).", ni->mft_no, 767 "determined%s (error code %i).", ni->mft_no,
702 ni->type, (unsigned long long)vcn, 768 ni->type, (unsigned long long)vcn,
703 vcn_ofs, is_retry ? " even after " 769 vcn_ofs, is_retry ? " even after "
704 "retrying" : "", (long long)lcn); 770 "retrying" : "", err);
705 if (!err)
706 err = -EIO;
707 break; 771 break;
708 } while (block++, (bh = bh->b_this_page) != head); 772 } while (block++, (bh = bh->b_this_page) != head);
709 773
@@ -714,7 +778,7 @@ lock_retry_remap:
714 /* For the error case, need to reset bh to the beginning. */ 778 /* For the error case, need to reset bh to the beginning. */
715 bh = head; 779 bh = head;
716 780
717 /* Just an optimization, so ->readpage() isn't called later. */ 781 /* Just an optimization, so ->readpage() is not called later. */
718 if (unlikely(!PageUptodate(page))) { 782 if (unlikely(!PageUptodate(page))) {
719 int uptodate = 1; 783 int uptodate = 1;
720 do { 784 do {
@@ -730,7 +794,6 @@ lock_retry_remap:
730 794
731 /* Setup all mapped, dirty buffers for async write i/o. */ 795 /* Setup all mapped, dirty buffers for async write i/o. */
732 do { 796 do {
733 get_bh(bh);
734 if (buffer_mapped(bh) && buffer_dirty(bh)) { 797 if (buffer_mapped(bh) && buffer_dirty(bh)) {
735 lock_buffer(bh); 798 lock_buffer(bh);
736 if (test_clear_buffer_dirty(bh)) { 799 if (test_clear_buffer_dirty(bh)) {
@@ -768,14 +831,8 @@ lock_retry_remap:
768 831
769 BUG_ON(PageWriteback(page)); 832 BUG_ON(PageWriteback(page));
770 set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ 833 set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
771 unlock_page(page);
772 834
773 /* 835 /* Submit the prepared buffers for i/o. */
774 * Submit the prepared buffers for i/o. Note the page is unlocked,
775 * and the async write i/o completion handler can end_page_writeback()
776 * at any time after the *first* submit_bh(). So the buffers can then
777 * disappear...
778 */
779 need_end_writeback = TRUE; 836 need_end_writeback = TRUE;
780 do { 837 do {
781 struct buffer_head *next = bh->b_this_page; 838 struct buffer_head *next = bh->b_this_page;
@@ -783,9 +840,9 @@ lock_retry_remap:
783 submit_bh(WRITE, bh); 840 submit_bh(WRITE, bh);
784 need_end_writeback = FALSE; 841 need_end_writeback = FALSE;
785 } 842 }
786 put_bh(bh);
787 bh = next; 843 bh = next;
788 } while (bh != head); 844 } while (bh != head);
845 unlock_page(page);
789 846
790 /* If no i/o was started, need to end_page_writeback(). */ 847 /* If no i/o was started, need to end_page_writeback(). */
791 if (unlikely(need_end_writeback)) 848 if (unlikely(need_end_writeback))
@@ -860,7 +917,6 @@ static int ntfs_write_mst_block(struct page *page,
860 sync = (wbc->sync_mode == WB_SYNC_ALL); 917 sync = (wbc->sync_mode == WB_SYNC_ALL);
861 918
862 /* Make sure we have mapped buffers. */ 919 /* Make sure we have mapped buffers. */
863 BUG_ON(!page_has_buffers(page));
864 bh = head = page_buffers(page); 920 bh = head = page_buffers(page);
865 BUG_ON(!bh); 921 BUG_ON(!bh);
866 922
@@ -1280,38 +1336,42 @@ retry_writepage:
1280 ntfs_debug("Write outside i_size - truncated?"); 1336 ntfs_debug("Write outside i_size - truncated?");
1281 return 0; 1337 return 0;
1282 } 1338 }
1339 /*
1340 * Only $DATA attributes can be encrypted and only unnamed $DATA
1341 * attributes can be compressed. Index root can have the flags set but
1342 * this means to create compressed/encrypted files, not that the
1343 * attribute is compressed/encrypted.
1344 */
1345 if (ni->type != AT_INDEX_ROOT) {
1346 /* If file is encrypted, deny access, just like NT4. */
1347 if (NInoEncrypted(ni)) {
1348 unlock_page(page);
1349 BUG_ON(ni->type != AT_DATA);
1350 ntfs_debug("Denying write access to encrypted "
1351 "file.");
1352 return -EACCES;
1353 }
1354 /* Compressed data streams are handled in compress.c. */
1355 if (NInoNonResident(ni) && NInoCompressed(ni)) {
1356 BUG_ON(ni->type != AT_DATA);
1357 BUG_ON(ni->name_len);
1358 // TODO: Implement and replace this with
1359 // return ntfs_write_compressed_block(page);
1360 unlock_page(page);
1361 ntfs_error(vi->i_sb, "Writing to compressed files is "
1362 "not supported yet. Sorry.");
1363 return -EOPNOTSUPP;
1364 }
1365 // TODO: Implement and remove this check.
1366 if (NInoNonResident(ni) && NInoSparse(ni)) {
1367 unlock_page(page);
1368 ntfs_error(vi->i_sb, "Writing to sparse files is not "
1369 "supported yet. Sorry.");
1370 return -EOPNOTSUPP;
1371 }
1372 }
1283 /* NInoNonResident() == NInoIndexAllocPresent() */ 1373 /* NInoNonResident() == NInoIndexAllocPresent() */
1284 if (NInoNonResident(ni)) { 1374 if (NInoNonResident(ni)) {
1285 /*
1286 * Only unnamed $DATA attributes can be compressed, encrypted,
1287 * and/or sparse.
1288 */
1289 if (ni->type == AT_DATA && !ni->name_len) {
1290 /* If file is encrypted, deny access, just like NT4. */
1291 if (NInoEncrypted(ni)) {
1292 unlock_page(page);
1293 ntfs_debug("Denying write access to encrypted "
1294 "file.");
1295 return -EACCES;
1296 }
1297 /* Compressed data streams are handled in compress.c. */
1298 if (NInoCompressed(ni)) {
1299 // TODO: Implement and replace this check with
1300 // return ntfs_write_compressed_block(page);
1301 unlock_page(page);
1302 ntfs_error(vi->i_sb, "Writing to compressed "
1303 "files is not supported yet. "
1304 "Sorry.");
1305 return -EOPNOTSUPP;
1306 }
1307 // TODO: Implement and remove this check.
1308 if (NInoSparse(ni)) {
1309 unlock_page(page);
1310 ntfs_error(vi->i_sb, "Writing to sparse files "
1311 "is not supported yet. Sorry.");
1312 return -EOPNOTSUPP;
1313 }
1314 }
1315 /* We have to zero every time due to mmap-at-end-of-file. */ 1375 /* We have to zero every time due to mmap-at-end-of-file. */
1316 if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { 1376 if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
1317 /* The page straddles i_size. */ 1377 /* The page straddles i_size. */
@@ -1324,14 +1384,16 @@ retry_writepage:
1324 /* Handle mst protected attributes. */ 1384 /* Handle mst protected attributes. */
1325 if (NInoMstProtected(ni)) 1385 if (NInoMstProtected(ni))
1326 return ntfs_write_mst_block(page, wbc); 1386 return ntfs_write_mst_block(page, wbc);
1327 /* Normal data stream. */ 1387 /* Normal, non-resident data stream. */
1328 return ntfs_write_block(page, wbc); 1388 return ntfs_write_block(page, wbc);
1329 } 1389 }
1330 /* 1390 /*
1331 * Attribute is resident, implying it is not compressed, encrypted, 1391 * Attribute is resident, implying it is not compressed, encrypted, or
1332 * sparse, or mst protected. This also means the attribute is smaller 1392 * mst protected. This also means the attribute is smaller than an mft
1333 * than an mft record and hence smaller than a page, so can simply 1393 * record and hence smaller than a page, so can simply return error on
1334 * return error on any pages with index above 0. 1394 * any pages with index above 0. Note the attribute can actually be
1395 * marked compressed but if it is resident the actual data is not
1396 * compressed so we are ok to ignore the compressed flag here.
1335 */ 1397 */
1336 BUG_ON(page_has_buffers(page)); 1398 BUG_ON(page_has_buffers(page));
1337 BUG_ON(!PageUptodate(page)); 1399 BUG_ON(!PageUptodate(page));
@@ -1380,30 +1442,14 @@ retry_writepage:
1380 BUG_ON(PageWriteback(page)); 1442 BUG_ON(PageWriteback(page));
1381 set_page_writeback(page); 1443 set_page_writeback(page);
1382 unlock_page(page); 1444 unlock_page(page);
1383
1384 /* 1445 /*
1385 * Here, we don't need to zero the out of bounds area everytime because 1446 * Here, we do not need to zero the out of bounds area everytime
1386 * the below memcpy() already takes care of the mmap-at-end-of-file 1447 * because the below memcpy() already takes care of the
1387 * requirements. If the file is converted to a non-resident one, then 1448 * mmap-at-end-of-file requirements. If the file is converted to a
1388 * the code path use is switched to the non-resident one where the 1449 * non-resident one, then the code path use is switched to the
1389 * zeroing happens on each ntfs_writepage() invocation. 1450 * non-resident one where the zeroing happens on each ntfs_writepage()
1390 * 1451 * invocation.
1391 * The above also applies nicely when i_size is decreased.
1392 *
1393 * When i_size is increased, the memory between the old and new i_size
1394 * _must_ be zeroed (or overwritten with new data). Otherwise we will
1395 * expose data to userspace/disk which should never have been exposed.
1396 *
1397 * FIXME: Ensure that i_size increases do the zeroing/overwriting and
1398 * if we cannot guarantee that, then enable the zeroing below. If the
1399 * zeroing below is enabled, we MUST move the unlock_page() from above
1400 * to after the kunmap_atomic(), i.e. just before the
1401 * end_page_writeback().
1402 * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
1403 * increases for resident attributes so those are ok.
1404 * TODO: ntfs_truncate(), others?
1405 */ 1452 */
1406
1407 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 1453 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
1408 i_size = i_size_read(vi); 1454 i_size = i_size_read(vi);
1409 if (unlikely(attr_len > i_size)) { 1455 if (unlikely(attr_len > i_size)) {
@@ -1681,27 +1727,25 @@ lock_retry_remap:
1681 if (likely(!err)) 1727 if (likely(!err))
1682 goto lock_retry_remap; 1728 goto lock_retry_remap;
1683 rl = NULL; 1729 rl = NULL;
1684 lcn = err;
1685 } else if (!rl) 1730 } else if (!rl)
1686 up_read(&ni->runlist.lock); 1731 up_read(&ni->runlist.lock);
1687 /* 1732 /*
1688 * Failed to map the buffer, even after 1733 * Failed to map the buffer, even after
1689 * retrying. 1734 * retrying.
1690 */ 1735 */
1736 if (!err)
1737 err = -EIO;
1691 bh->b_blocknr = -1; 1738 bh->b_blocknr = -1;
1692 ntfs_error(vol->sb, "Failed to write to inode " 1739 ntfs_error(vol->sb, "Failed to write to inode "
1693 "0x%lx, attribute type 0x%x, " 1740 "0x%lx, attribute type 0x%x, "
1694 "vcn 0x%llx, offset 0x%x " 1741 "vcn 0x%llx, offset 0x%x "
1695 "because its location on disk " 1742 "because its location on disk "
1696 "could not be determined%s " 1743 "could not be determined%s "
1697 "(error code %lli).", 1744 "(error code %i).",
1698 ni->mft_no, ni->type, 1745 ni->mft_no, ni->type,
1699 (unsigned long long)vcn, 1746 (unsigned long long)vcn,
1700 vcn_ofs, is_retry ? " even " 1747 vcn_ofs, is_retry ? " even "
1701 "after retrying" : "", 1748 "after retrying" : "", err);
1702 (long long)lcn);
1703 if (!err)
1704 err = -EIO;
1705 goto err_out; 1749 goto err_out;
1706 } 1750 }
1707 /* We now have a successful remap, i.e. lcn >= 0. */ 1751 /* We now have a successful remap, i.e. lcn >= 0. */
@@ -2357,6 +2401,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
2357 buffers_to_free = bh; 2401 buffers_to_free = bh;
2358 } 2402 }
2359 bh = head = page_buffers(page); 2403 bh = head = page_buffers(page);
2404 BUG_ON(!bh);
2360 do { 2405 do {
2361 bh_ofs = bh_offset(bh); 2406 bh_ofs = bh_offset(bh);
2362 if (bh_ofs + bh_size <= ofs) 2407 if (bh_ofs + bh_size <= ofs)
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index cd0f9e740b14..3f9a4ff42ee5 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -43,6 +43,9 @@
43 * which is not an error as such. This is -ENOENT. It means that @vcn is out 43 * which is not an error as such. This is -ENOENT. It means that @vcn is out
44 * of bounds of the runlist. 44 * of bounds of the runlist.
45 * 45 *
46 * Note the runlist can be NULL after this function returns if @vcn is zero and
47 * the attribute has zero allocated size, i.e. there simply is no runlist.
48 *
46 * Locking: - The runlist must be locked for writing. 49 * Locking: - The runlist must be locked for writing.
47 * - This function modifies the runlist. 50 * - This function modifies the runlist.
48 */ 51 */
@@ -54,6 +57,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
54 ATTR_RECORD *a; 57 ATTR_RECORD *a;
55 ntfs_attr_search_ctx *ctx; 58 ntfs_attr_search_ctx *ctx;
56 runlist_element *rl; 59 runlist_element *rl;
60 unsigned long flags;
57 int err = 0; 61 int err = 0;
58 62
59 ntfs_debug("Mapping runlist part containing vcn 0x%llx.", 63 ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
@@ -85,8 +89,11 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
85 * ntfs_mapping_pairs_decompress() fails. 89 * ntfs_mapping_pairs_decompress() fails.
86 */ 90 */
87 end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1; 91 end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
88 if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) 92 if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) {
93 read_lock_irqsave(&ni->size_lock, flags);
89 end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits; 94 end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits;
95 read_unlock_irqrestore(&ni->size_lock, flags);
96 }
90 if (unlikely(vcn >= end_vcn)) { 97 if (unlikely(vcn >= end_vcn)) {
91 err = -ENOENT; 98 err = -ENOENT;
92 goto err_out; 99 goto err_out;
@@ -165,6 +172,7 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
165 const BOOL write_locked) 172 const BOOL write_locked)
166{ 173{
167 LCN lcn; 174 LCN lcn;
175 unsigned long flags;
168 BOOL is_retry = FALSE; 176 BOOL is_retry = FALSE;
169 177
170 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", 178 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
@@ -173,6 +181,14 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
173 BUG_ON(!ni); 181 BUG_ON(!ni);
174 BUG_ON(!NInoNonResident(ni)); 182 BUG_ON(!NInoNonResident(ni));
175 BUG_ON(vcn < 0); 183 BUG_ON(vcn < 0);
184 if (!ni->runlist.rl) {
185 read_lock_irqsave(&ni->size_lock, flags);
186 if (!ni->allocated_size) {
187 read_unlock_irqrestore(&ni->size_lock, flags);
188 return LCN_ENOENT;
189 }
190 read_unlock_irqrestore(&ni->size_lock, flags);
191 }
176retry_remap: 192retry_remap:
177 /* Convert vcn to lcn. If that fails map the runlist and retry once. */ 193 /* Convert vcn to lcn. If that fails map the runlist and retry once. */
178 lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn); 194 lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn);
@@ -255,6 +271,7 @@ retry_remap:
255runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, 271runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
256 const BOOL write_locked) 272 const BOOL write_locked)
257{ 273{
274 unsigned long flags;
258 runlist_element *rl; 275 runlist_element *rl;
259 int err = 0; 276 int err = 0;
260 BOOL is_retry = FALSE; 277 BOOL is_retry = FALSE;
@@ -265,6 +282,14 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
265 BUG_ON(!ni); 282 BUG_ON(!ni);
266 BUG_ON(!NInoNonResident(ni)); 283 BUG_ON(!NInoNonResident(ni));
267 BUG_ON(vcn < 0); 284 BUG_ON(vcn < 0);
285 if (!ni->runlist.rl) {
286 read_lock_irqsave(&ni->size_lock, flags);
287 if (!ni->allocated_size) {
288 read_unlock_irqrestore(&ni->size_lock, flags);
289 return ERR_PTR(-ENOENT);
290 }
291 read_unlock_irqrestore(&ni->size_lock, flags);
292 }
268retry_remap: 293retry_remap:
269 rl = ni->runlist.rl; 294 rl = ni->runlist.rl;
270 if (likely(rl && vcn >= rl[0].vcn)) { 295 if (likely(rl && vcn >= rl[0].vcn)) {
@@ -528,6 +553,11 @@ int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start,
528 block_size_bits = sb->s_blocksize_bits; 553 block_size_bits = sb->s_blocksize_bits;
529 down_read(&runlist->lock); 554 down_read(&runlist->lock);
530 rl = runlist->rl; 555 rl = runlist->rl;
556 if (!rl) {
557 ntfs_error(sb, "Cannot read attribute list since runlist is "
558 "missing.");
559 goto err_out;
560 }
531 /* Read all clusters specified by the runlist one run at a time. */ 561 /* Read all clusters specified by the runlist one run at a time. */
532 while (rl->length) { 562 while (rl->length) {
533 lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn); 563 lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn);
@@ -1247,6 +1277,46 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
1247} 1277}
1248 1278
1249/** 1279/**
1280 * ntfs_resident_attr_value_resize - resize the value of a resident attribute
1281 * @m: mft record containing attribute record
1282 * @a: attribute record whose value to resize
1283 * @new_size: new size in bytes to which to resize the attribute value of @a
1284 *
1285 * Resize the value of the attribute @a in the mft record @m to @new_size bytes.
1286 * If the value is made bigger, the newly allocated space is cleared.
1287 *
1288 * Return 0 on success and -errno on error. The following error codes are
1289 * defined:
1290 * -ENOSPC - Not enough space in the mft record @m to perform the resize.
1291 *
1292 * Note: On error, no modifications have been performed whatsoever.
1293 *
1294 * Warning: If you make a record smaller without having copied all the data you
1295 * are interested in the data may be overwritten.
1296 */
1297int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
1298 const u32 new_size)
1299{
1300 u32 old_size;
1301
1302 /* Resize the resident part of the attribute record. */
1303 if (ntfs_attr_record_resize(m, a,
1304 le16_to_cpu(a->data.resident.value_offset) + new_size))
1305 return -ENOSPC;
1306 /*
1307 * The resize succeeded! If we made the attribute value bigger, clear
1308 * the area between the old size and @new_size.
1309 */
1310 old_size = le32_to_cpu(a->data.resident.value_length);
1311 if (new_size > old_size)
1312 memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
1313 old_size, 0, new_size - old_size);
1314 /* Finally update the length of the attribute value. */
1315 a->data.resident.value_length = cpu_to_le32(new_size);
1316 return 0;
1317}
1318
1319/**
1250 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute 1320 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
1251 * @ni: ntfs inode describing the attribute to convert 1321 * @ni: ntfs inode describing the attribute to convert
1252 * 1322 *
@@ -1302,6 +1372,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1302 return err; 1372 return err;
1303 } 1373 }
1304 /* 1374 /*
1375 * FIXME: Compressed and encrypted attributes are not supported when
1376 * writing and we should never have gotten here for them.
1377 */
1378 BUG_ON(NInoCompressed(ni));
1379 BUG_ON(NInoEncrypted(ni));
1380 /*
1305 * The size needs to be aligned to a cluster boundary for allocation 1381 * The size needs to be aligned to a cluster boundary for allocation
1306 * purposes. 1382 * purposes.
1307 */ 1383 */
@@ -1377,10 +1453,15 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1377 BUG_ON(a->non_resident); 1453 BUG_ON(a->non_resident);
1378 /* 1454 /*
1379 * Calculate new offsets for the name and the mapping pairs array. 1455 * Calculate new offsets for the name and the mapping pairs array.
1380 * We assume the attribute is not compressed or sparse.
1381 */ 1456 */
1382 name_ofs = (offsetof(ATTR_REC, 1457 if (NInoSparse(ni) || NInoCompressed(ni))
1383 data.non_resident.compressed_size) + 7) & ~7; 1458 name_ofs = (offsetof(ATTR_REC,
1459 data.non_resident.compressed_size) +
1460 sizeof(a->data.non_resident.compressed_size) +
1461 7) & ~7;
1462 else
1463 name_ofs = (offsetof(ATTR_REC,
1464 data.non_resident.compressed_size) + 7) & ~7;
1384 mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7; 1465 mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
1385 /* 1466 /*
1386 * Determine the size of the resident part of the now non-resident 1467 * Determine the size of the resident part of the now non-resident
@@ -1419,24 +1500,23 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1419 memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset), 1500 memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
1420 a->name_length * sizeof(ntfschar)); 1501 a->name_length * sizeof(ntfschar));
1421 a->name_offset = cpu_to_le16(name_ofs); 1502 a->name_offset = cpu_to_le16(name_ofs);
1422 /*
1423 * FIXME: For now just clear all of these as we do not support them
1424 * when writing.
1425 */
1426 a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE |
1427 ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK));
1428 /* Setup the fields specific to non-resident attributes. */ 1503 /* Setup the fields specific to non-resident attributes. */
1429 a->data.non_resident.lowest_vcn = 0; 1504 a->data.non_resident.lowest_vcn = 0;
1430 a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >> 1505 a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >>
1431 vol->cluster_size_bits); 1506 vol->cluster_size_bits);
1432 a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs); 1507 a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs);
1433 a->data.non_resident.compression_unit = 0;
1434 memset(&a->data.non_resident.reserved, 0, 1508 memset(&a->data.non_resident.reserved, 0,
1435 sizeof(a->data.non_resident.reserved)); 1509 sizeof(a->data.non_resident.reserved));
1436 a->data.non_resident.allocated_size = cpu_to_sle64(new_size); 1510 a->data.non_resident.allocated_size = cpu_to_sle64(new_size);
1437 a->data.non_resident.data_size = 1511 a->data.non_resident.data_size =
1438 a->data.non_resident.initialized_size = 1512 a->data.non_resident.initialized_size =
1439 cpu_to_sle64(attr_size); 1513 cpu_to_sle64(attr_size);
1514 if (NInoSparse(ni) || NInoCompressed(ni)) {
1515 a->data.non_resident.compression_unit = 4;
1516 a->data.non_resident.compressed_size =
1517 a->data.non_resident.allocated_size;
1518 } else
1519 a->data.non_resident.compression_unit = 0;
1440 /* Generate the mapping pairs array into the attribute record. */ 1520 /* Generate the mapping pairs array into the attribute record. */
1441 err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs, 1521 err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
1442 arec_size - mp_ofs, rl, 0, -1, NULL); 1522 arec_size - mp_ofs, rl, 0, -1, NULL);
@@ -1446,16 +1526,19 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1446 goto undo_err_out; 1526 goto undo_err_out;
1447 } 1527 }
1448 /* Setup the in-memory attribute structure to be non-resident. */ 1528 /* Setup the in-memory attribute structure to be non-resident. */
1449 /*
1450 * FIXME: For now just clear all of these as we do not support them
1451 * when writing.
1452 */
1453 NInoClearSparse(ni);
1454 NInoClearEncrypted(ni);
1455 NInoClearCompressed(ni);
1456 ni->runlist.rl = rl; 1529 ni->runlist.rl = rl;
1457 write_lock_irqsave(&ni->size_lock, flags); 1530 write_lock_irqsave(&ni->size_lock, flags);
1458 ni->allocated_size = new_size; 1531 ni->allocated_size = new_size;
1532 if (NInoSparse(ni) || NInoCompressed(ni)) {
1533 ni->itype.compressed.size = ni->allocated_size;
1534 ni->itype.compressed.block_size = 1U <<
1535 (a->data.non_resident.compression_unit +
1536 vol->cluster_size_bits);
1537 ni->itype.compressed.block_size_bits =
1538 ffs(ni->itype.compressed.block_size) - 1;
1539 ni->itype.compressed.block_clusters = 1U <<
1540 a->data.non_resident.compression_unit;
1541 }
1459 write_unlock_irqrestore(&ni->size_lock, flags); 1542 write_unlock_irqrestore(&ni->size_lock, flags);
1460 /* 1543 /*
1461 * This needs to be last since the address space operations ->readpage 1544 * This needs to be last since the address space operations ->readpage
@@ -1603,6 +1686,12 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
1603 BUG_ON(cnt < 0); 1686 BUG_ON(cnt < 0);
1604 if (!cnt) 1687 if (!cnt)
1605 goto done; 1688 goto done;
1689 /*
1690 * FIXME: Compressed and encrypted attributes are not supported when
1691 * writing and we should never have gotten here for them.
1692 */
1693 BUG_ON(NInoCompressed(ni));
1694 BUG_ON(NInoEncrypted(ni));
1606 mapping = VFS_I(ni)->i_mapping; 1695 mapping = VFS_I(ni)->i_mapping;
1607 /* Work out the starting index and page offset. */ 1696 /* Work out the starting index and page offset. */
1608 idx = ofs >> PAGE_CACHE_SHIFT; 1697 idx = ofs >> PAGE_CACHE_SHIFT;
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index 0e4ac6d3c0e7..0618ed6fd7b3 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -99,6 +99,8 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol,
99 const ATTR_TYPE type); 99 const ATTR_TYPE type);
100 100
101extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); 101extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
102extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
103 const u32 new_size);
102 104
103extern int ntfs_attr_make_non_resident(ntfs_inode *ni); 105extern int ntfs_attr_make_non_resident(ntfs_inode *ni);
104 106
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 6d265cfd49aa..25d24106f893 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -539,7 +539,6 @@ int ntfs_read_compressed_block(struct page *page)
539 if (unlikely(!pages || !bhs)) { 539 if (unlikely(!pages || !bhs)) {
540 kfree(bhs); 540 kfree(bhs);
541 kfree(pages); 541 kfree(pages);
542 SetPageError(page);
543 unlock_page(page); 542 unlock_page(page);
544 ntfs_error(vol->sb, "Failed to allocate internal buffers."); 543 ntfs_error(vol->sb, "Failed to allocate internal buffers.");
545 return -ENOMEM; 544 return -ENOMEM;
@@ -871,9 +870,6 @@ lock_retry_remap:
871 for (; prev_cur_page < cur_page; prev_cur_page++) { 870 for (; prev_cur_page < cur_page; prev_cur_page++) {
872 page = pages[prev_cur_page]; 871 page = pages[prev_cur_page];
873 if (page) { 872 if (page) {
874 if (prev_cur_page == xpage &&
875 !xpage_done)
876 SetPageError(page);
877 flush_dcache_page(page); 873 flush_dcache_page(page);
878 kunmap(page); 874 kunmap(page);
879 unlock_page(page); 875 unlock_page(page);
@@ -904,8 +900,6 @@ lock_retry_remap:
904 "Terminating them with extreme " 900 "Terminating them with extreme "
905 "prejudice. Inode 0x%lx, page index " 901 "prejudice. Inode 0x%lx, page index "
906 "0x%lx.", ni->mft_no, page->index); 902 "0x%lx.", ni->mft_no, page->index);
907 if (cur_page == xpage && !xpage_done)
908 SetPageError(page);
909 flush_dcache_page(page); 903 flush_dcache_page(page);
910 kunmap(page); 904 kunmap(page);
911 unlock_page(page); 905 unlock_page(page);
@@ -953,8 +947,6 @@ err_out:
953 for (i = cur_page; i < max_page; i++) { 947 for (i = cur_page; i < max_page; i++) {
954 page = pages[i]; 948 page = pages[i];
955 if (page) { 949 if (page) {
956 if (i == xpage && !xpage_done)
957 SetPageError(page);
958 flush_dcache_page(page); 950 flush_dcache_page(page);
959 kunmap(page); 951 kunmap(page);
960 unlock_page(page); 952 unlock_page(page);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 46779471c542..795c3d1930f5 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1051,7 +1051,8 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
1051 ie->key.file_name.file_name_length, &name, 1051 ie->key.file_name.file_name_length, &name,
1052 NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); 1052 NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
1053 if (name_len <= 0) { 1053 if (name_len <= 0) {
1054 ntfs_debug("Skipping unrepresentable file."); 1054 ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
1055 (long long)MREF_LE(ie->data.dir.indexed_file));
1055 return 0; 1056 return 0;
1056 } 1057 }
1057 if (ie->key.file_name.file_attributes & 1058 if (ie->key.file_name.file_attributes &
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index e0f530ce6b99..be9fd1dd423d 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -94,6 +94,11 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
94 if (!datasync || !NInoNonResident(NTFS_I(vi))) 94 if (!datasync || !NInoNonResident(NTFS_I(vi)))
95 ret = ntfs_write_inode(vi, 1); 95 ret = ntfs_write_inode(vi, 1);
96 write_inode_now(vi, !datasync); 96 write_inode_now(vi, !datasync);
97 /*
98 * NOTE: If we were to use mapping->private_list (see ext2 and
99 * fs/buffer.c) for dirty blocks then we could optimize the below to be
100 * sync_mapping_buffers(vi->i_mapping).
101 */
97 err = sync_blockdev(vi->i_sb->s_bdev); 102 err = sync_blockdev(vi->i_sb->s_bdev);
98 if (unlikely(err && !ret)) 103 if (unlikely(err && !ret))
99 ret = err; 104 ret = err;
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 11fd5307d780..8f2d5727546f 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -205,6 +205,7 @@ int ntfs_index_lookup(const void *key, const int key_len,
205 &ie->key, key_len)) { 205 &ie->key, key_len)) {
206ir_done: 206ir_done:
207 ictx->is_in_root = TRUE; 207 ictx->is_in_root = TRUE;
208 ictx->ir = ir;
208 ictx->actx = actx; 209 ictx->actx = actx;
209 ictx->base_ni = base_ni; 210 ictx->base_ni = base_ni;
210 ictx->ia = NULL; 211 ictx->ia = NULL;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 886214a77f90..dc4bbe3acf5c 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1013,41 +1013,50 @@ skip_large_dir_stuff:
1013 } 1013 }
1014 a = ctx->attr; 1014 a = ctx->attr;
1015 /* Setup the state. */ 1015 /* Setup the state. */
1016 if (a->non_resident) { 1016 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1017 NInoSetNonResident(ni); 1017 if (a->flags & ATTR_COMPRESSION_MASK) {
1018 if (a->flags & (ATTR_COMPRESSION_MASK | 1018 NInoSetCompressed(ni);
1019 ATTR_IS_SPARSE)) { 1019 if (vol->cluster_size > 4096) {
1020 if (a->flags & ATTR_COMPRESSION_MASK) { 1020 ntfs_error(vi->i_sb, "Found "
1021 NInoSetCompressed(ni);
1022 if (vol->cluster_size > 4096) {
1023 ntfs_error(vi->i_sb, "Found "
1024 "compressed data but " 1021 "compressed data but "
1025 "compression is " 1022 "compression is "
1026 "disabled due to " 1023 "disabled due to "
1027 "cluster size (%i) > " 1024 "cluster size (%i) > "
1028 "4kiB.", 1025 "4kiB.",
1029 vol->cluster_size); 1026 vol->cluster_size);
1030 goto unm_err_out; 1027 goto unm_err_out;
1031 } 1028 }
1032 if ((a->flags & ATTR_COMPRESSION_MASK) 1029 if ((a->flags & ATTR_COMPRESSION_MASK)
1033 != ATTR_IS_COMPRESSED) { 1030 != ATTR_IS_COMPRESSED) {
1034 ntfs_error(vi->i_sb, "Found " 1031 ntfs_error(vi->i_sb, "Found unknown "
1035 "unknown compression " 1032 "compression method "
1036 "method or corrupt " 1033 "or corrupt file.");
1037 "file."); 1034 goto unm_err_out;
1038 goto unm_err_out;
1039 }
1040 } 1035 }
1041 if (a->flags & ATTR_IS_SPARSE) 1036 }
1042 NInoSetSparse(ni); 1037 if (a->flags & ATTR_IS_SPARSE)
1038 NInoSetSparse(ni);
1039 }
1040 if (a->flags & ATTR_IS_ENCRYPTED) {
1041 if (NInoCompressed(ni)) {
1042 ntfs_error(vi->i_sb, "Found encrypted and "
1043 "compressed data.");
1044 goto unm_err_out;
1045 }
1046 NInoSetEncrypted(ni);
1047 }
1048 if (a->non_resident) {
1049 NInoSetNonResident(ni);
1050 if (NInoCompressed(ni) || NInoSparse(ni)) {
1043 if (a->data.non_resident.compression_unit != 1051 if (a->data.non_resident.compression_unit !=
1044 4) { 1052 4) {
1045 ntfs_error(vi->i_sb, "Found " 1053 ntfs_error(vi->i_sb, "Found "
1046 "nonstandard compression unit " 1054 "nonstandard "
1047 "(%u instead of 4). Cannot " 1055 "compression unit (%u "
1048 "handle this.", 1056 "instead of 4). "
1049 a->data.non_resident. 1057 "Cannot handle this.",
1050 compression_unit); 1058 a->data.non_resident.
1059 compression_unit);
1051 err = -EOPNOTSUPP; 1060 err = -EOPNOTSUPP;
1052 goto unm_err_out; 1061 goto unm_err_out;
1053 } 1062 }
@@ -1065,14 +1074,6 @@ skip_large_dir_stuff:
1065 a->data.non_resident. 1074 a->data.non_resident.
1066 compressed_size); 1075 compressed_size);
1067 } 1076 }
1068 if (a->flags & ATTR_IS_ENCRYPTED) {
1069 if (a->flags & ATTR_COMPRESSION_MASK) {
1070 ntfs_error(vi->i_sb, "Found encrypted "
1071 "and compressed data.");
1072 goto unm_err_out;
1073 }
1074 NInoSetEncrypted(ni);
1075 }
1076 if (a->data.non_resident.lowest_vcn) { 1077 if (a->data.non_resident.lowest_vcn) {
1077 ntfs_error(vi->i_sb, "First extent of $DATA " 1078 ntfs_error(vi->i_sb, "First extent of $DATA "
1078 "attribute has non zero " 1079 "attribute has non zero "
@@ -1212,6 +1213,75 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1212 if (unlikely(err)) 1213 if (unlikely(err))
1213 goto unm_err_out; 1214 goto unm_err_out;
1214 a = ctx->attr; 1215 a = ctx->attr;
1216 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1217 if (a->flags & ATTR_COMPRESSION_MASK) {
1218 NInoSetCompressed(ni);
1219 if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
1220 ni->name_len)) {
1221 ntfs_error(vi->i_sb, "Found compressed "
1222 "non-data or named data "
1223 "attribute. Please report "
1224 "you saw this message to "
1225 "linux-ntfs-dev@lists."
1226 "sourceforge.net");
1227 goto unm_err_out;
1228 }
1229 if (vol->cluster_size > 4096) {
1230 ntfs_error(vi->i_sb, "Found compressed "
1231 "attribute but compression is "
1232 "disabled due to cluster size "
1233 "(%i) > 4kiB.",
1234 vol->cluster_size);
1235 goto unm_err_out;
1236 }
1237 if ((a->flags & ATTR_COMPRESSION_MASK) !=
1238 ATTR_IS_COMPRESSED) {
1239 ntfs_error(vi->i_sb, "Found unknown "
1240 "compression method.");
1241 goto unm_err_out;
1242 }
1243 }
1244 /*
1245 * The encryption flag set in an index root just means to
1246 * compress all files.
1247 */
1248 if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1249 ntfs_error(vi->i_sb, "Found mst protected attribute "
1250 "but the attribute is %s. Please "
1251 "report you saw this message to "
1252 "linux-ntfs-dev@lists.sourceforge.net",
1253 NInoCompressed(ni) ? "compressed" :
1254 "sparse");
1255 goto unm_err_out;
1256 }
1257 if (a->flags & ATTR_IS_SPARSE)
1258 NInoSetSparse(ni);
1259 }
1260 if (a->flags & ATTR_IS_ENCRYPTED) {
1261 if (NInoCompressed(ni)) {
1262 ntfs_error(vi->i_sb, "Found encrypted and compressed "
1263 "data.");
1264 goto unm_err_out;
1265 }
1266 /*
1267 * The encryption flag set in an index root just means to
1268 * encrypt all files.
1269 */
1270 if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1271 ntfs_error(vi->i_sb, "Found mst protected attribute "
1272 "but the attribute is encrypted. "
1273 "Please report you saw this message "
1274 "to linux-ntfs-dev@lists.sourceforge."
1275 "net");
1276 goto unm_err_out;
1277 }
1278 if (ni->type != AT_DATA) {
1279 ntfs_error(vi->i_sb, "Found encrypted non-data "
1280 "attribute.");
1281 goto unm_err_out;
1282 }
1283 NInoSetEncrypted(ni);
1284 }
1215 if (!a->non_resident) { 1285 if (!a->non_resident) {
1216 /* Ensure the attribute name is placed before the value. */ 1286 /* Ensure the attribute name is placed before the value. */
1217 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= 1287 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
@@ -1220,11 +1290,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1220 "the attribute value."); 1290 "the attribute value.");
1221 goto unm_err_out; 1291 goto unm_err_out;
1222 } 1292 }
1223 if (NInoMstProtected(ni) || a->flags) { 1293 if (NInoMstProtected(ni)) {
1224 ntfs_error(vi->i_sb, "Found mst protected attribute " 1294 ntfs_error(vi->i_sb, "Found mst protected attribute "
1225 "or attribute with non-zero flags but " 1295 "but the attribute is resident. "
1226 "the attribute is resident. Please " 1296 "Please report you saw this message to "
1227 "report you saw this message to "
1228 "linux-ntfs-dev@lists.sourceforge.net"); 1297 "linux-ntfs-dev@lists.sourceforge.net");
1229 goto unm_err_out; 1298 goto unm_err_out;
1230 } 1299 }
@@ -1250,50 +1319,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1250 "the mapping pairs array."); 1319 "the mapping pairs array.");
1251 goto unm_err_out; 1320 goto unm_err_out;
1252 } 1321 }
1253 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { 1322 if ((NInoCompressed(ni) || NInoSparse(ni)) &&
1254 if (a->flags & ATTR_COMPRESSION_MASK) { 1323 ni->type != AT_INDEX_ROOT) {
1255 NInoSetCompressed(ni);
1256 if ((ni->type != AT_DATA) || (ni->type ==
1257 AT_DATA && ni->name_len)) {
1258 ntfs_error(vi->i_sb, "Found compressed "
1259 "non-data or named "
1260 "data attribute. "
1261 "Please report you "
1262 "saw this message to "
1263 "linux-ntfs-dev@lists."
1264 "sourceforge.net");
1265 goto unm_err_out;
1266 }
1267 if (vol->cluster_size > 4096) {
1268 ntfs_error(vi->i_sb, "Found compressed "
1269 "attribute but "
1270 "compression is "
1271 "disabled due to "
1272 "cluster size (%i) > "
1273 "4kiB.",
1274 vol->cluster_size);
1275 goto unm_err_out;
1276 }
1277 if ((a->flags & ATTR_COMPRESSION_MASK) !=
1278 ATTR_IS_COMPRESSED) {
1279 ntfs_error(vi->i_sb, "Found unknown "
1280 "compression method.");
1281 goto unm_err_out;
1282 }
1283 }
1284 if (NInoMstProtected(ni)) {
1285 ntfs_error(vi->i_sb, "Found mst protected "
1286 "attribute but the attribute "
1287 "is %s. Please report you "
1288 "saw this message to "
1289 "linux-ntfs-dev@lists."
1290 "sourceforge.net",
1291 NInoCompressed(ni) ?
1292 "compressed" : "sparse");
1293 goto unm_err_out;
1294 }
1295 if (a->flags & ATTR_IS_SPARSE)
1296 NInoSetSparse(ni);
1297 if (a->data.non_resident.compression_unit != 4) { 1324 if (a->data.non_resident.compression_unit != 4) {
1298 ntfs_error(vi->i_sb, "Found nonstandard " 1325 ntfs_error(vi->i_sb, "Found nonstandard "
1299 "compression unit (%u instead " 1326 "compression unit (%u instead "
@@ -1313,23 +1340,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1313 ni->itype.compressed.size = sle64_to_cpu( 1340 ni->itype.compressed.size = sle64_to_cpu(
1314 a->data.non_resident.compressed_size); 1341 a->data.non_resident.compressed_size);
1315 } 1342 }
1316 if (a->flags & ATTR_IS_ENCRYPTED) {
1317 if (a->flags & ATTR_COMPRESSION_MASK) {
1318 ntfs_error(vi->i_sb, "Found encrypted and "
1319 "compressed data.");
1320 goto unm_err_out;
1321 }
1322 if (NInoMstProtected(ni)) {
1323 ntfs_error(vi->i_sb, "Found mst protected "
1324 "attribute but the attribute "
1325 "is encrypted. Please report "
1326 "you saw this message to "
1327 "linux-ntfs-dev@lists."
1328 "sourceforge.net");
1329 goto unm_err_out;
1330 }
1331 NInoSetEncrypted(ni);
1332 }
1333 if (a->data.non_resident.lowest_vcn) { 1343 if (a->data.non_resident.lowest_vcn) {
1334 ntfs_error(vi->i_sb, "First extent of attribute has " 1344 ntfs_error(vi->i_sb, "First extent of attribute has "
1335 "non-zero lowest_vcn."); 1345 "non-zero lowest_vcn.");
@@ -1348,12 +1358,12 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1348 vi->i_mapping->a_ops = &ntfs_mst_aops; 1358 vi->i_mapping->a_ops = &ntfs_mst_aops;
1349 else 1359 else
1350 vi->i_mapping->a_ops = &ntfs_aops; 1360 vi->i_mapping->a_ops = &ntfs_aops;
1351 if (NInoCompressed(ni) || NInoSparse(ni)) 1361 if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
1352 vi->i_blocks = ni->itype.compressed.size >> 9; 1362 vi->i_blocks = ni->itype.compressed.size >> 9;
1353 else 1363 else
1354 vi->i_blocks = ni->allocated_size >> 9; 1364 vi->i_blocks = ni->allocated_size >> 9;
1355 /* 1365 /*
1356 * Make sure the base inode doesn't go away and attach it to the 1366 * Make sure the base inode does not go away and attach it to the
1357 * attribute inode. 1367 * attribute inode.
1358 */ 1368 */
1359 igrab(base_vi); 1369 igrab(base_vi);
@@ -1480,7 +1490,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1480 "after the attribute value."); 1490 "after the attribute value.");
1481 goto unm_err_out; 1491 goto unm_err_out;
1482 } 1492 }
1483 /* Compressed/encrypted/sparse index root is not allowed. */ 1493 /*
1494 * Compressed/encrypted/sparse index root is not allowed, except for
1495 * directories of course but those are not dealt with here.
1496 */
1484 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | 1497 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
1485 ATTR_IS_SPARSE)) { 1498 ATTR_IS_SPARSE)) {
1486 ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index " 1499 ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
@@ -2430,16 +2443,18 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
2430 * We skipped the truncate but must still update 2443 * We skipped the truncate but must still update
2431 * timestamps. 2444 * timestamps.
2432 */ 2445 */
2433 ia_valid |= ATTR_MTIME|ATTR_CTIME; 2446 ia_valid |= ATTR_MTIME | ATTR_CTIME;
2434 } 2447 }
2435 } 2448 }
2436
2437 if (ia_valid & ATTR_ATIME) 2449 if (ia_valid & ATTR_ATIME)
2438 vi->i_atime = attr->ia_atime; 2450 vi->i_atime = timespec_trunc(attr->ia_atime,
2451 vi->i_sb->s_time_gran);
2439 if (ia_valid & ATTR_MTIME) 2452 if (ia_valid & ATTR_MTIME)
2440 vi->i_mtime = attr->ia_mtime; 2453 vi->i_mtime = timespec_trunc(attr->ia_mtime,
2454 vi->i_sb->s_time_gran);
2441 if (ia_valid & ATTR_CTIME) 2455 if (ia_valid & ATTR_CTIME)
2442 vi->i_ctime = attr->ia_ctime; 2456 vi->i_ctime = timespec_trunc(attr->ia_ctime,
2457 vi->i_sb->s_time_gran);
2443 mark_inode_dirty(vi); 2458 mark_inode_dirty(vi);
2444out: 2459out:
2445 return err; 2460 return err;
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index a4bc07616e5d..7b5934290685 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -54,6 +54,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
54 int ret = 0; 54 int ret = 0;
55 55
56 ntfs_debug("Entering."); 56 ntfs_debug("Entering.");
57 if (!rl)
58 return 0;
57 for (; rl->length; rl++) { 59 for (; rl->length; rl++) {
58 int err; 60 int err;
59 61
@@ -163,17 +165,9 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
163 BUG_ON(zone < FIRST_ZONE); 165 BUG_ON(zone < FIRST_ZONE);
164 BUG_ON(zone > LAST_ZONE); 166 BUG_ON(zone > LAST_ZONE);
165 167
166 /* Return empty runlist if @count == 0 */ 168 /* Return NULL if @count is zero. */
167 // FIXME: Do we want to just return NULL instead? (AIA) 169 if (!count)
168 if (!count) { 170 return NULL;
169 rl = ntfs_malloc_nofs(PAGE_SIZE);
170 if (!rl)
171 return ERR_PTR(-ENOMEM);
172 rl[0].vcn = start_vcn;
173 rl[0].lcn = LCN_RL_NOT_MAPPED;
174 rl[0].length = 0;
175 return rl;
176 }
177 /* Take the lcnbmp lock for writing. */ 171 /* Take the lcnbmp lock for writing. */
178 down_write(&vol->lcnbmp_lock); 172 down_write(&vol->lcnbmp_lock);
179 /* 173 /*
@@ -788,7 +782,8 @@ out:
788 * @vi: vfs inode whose runlist describes the clusters to free 782 * @vi: vfs inode whose runlist describes the clusters to free
789 * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters 783 * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters
790 * @count: number of clusters to free or -1 for all clusters 784 * @count: number of clusters to free or -1 for all clusters
791 * @is_rollback: if TRUE this is a rollback operation 785 * @write_locked: true if the runlist is locked for writing
786 * @is_rollback: true if this is a rollback operation
792 * 787 *
793 * Free @count clusters starting at the cluster @start_vcn in the runlist 788 * Free @count clusters starting at the cluster @start_vcn in the runlist
794 * described by the vfs inode @vi. 789 * described by the vfs inode @vi.
@@ -806,17 +801,17 @@ out:
806 * Return the number of deallocated clusters (not counting sparse ones) on 801 * Return the number of deallocated clusters (not counting sparse ones) on
807 * success and -errno on error. 802 * success and -errno on error.
808 * 803 *
809 * Locking: - The runlist described by @vi must be unlocked on entry and is 804 * Locking: - The runlist described by @vi must be locked on entry and is
810 * unlocked on return. 805 * locked on return. Note if the runlist is locked for reading the
811 * - This function takes the runlist lock of @vi for reading and 806 * lock may be dropped and reacquired. Note the runlist may be
812 * sometimes for writing and sometimes modifies the runlist. 807 * modified when needed runlist fragments need to be mapped.
813 * - The volume lcn bitmap must be unlocked on entry and is unlocked 808 * - The volume lcn bitmap must be unlocked on entry and is unlocked
814 * on return. 809 * on return.
815 * - This function takes the volume lcn bitmap lock for writing and 810 * - This function takes the volume lcn bitmap lock for writing and
816 * modifies the bitmap contents. 811 * modifies the bitmap contents.
817 */ 812 */
818s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, 813s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
819 const BOOL is_rollback) 814 const BOOL write_locked, const BOOL is_rollback)
820{ 815{
821 s64 delta, to_free, total_freed, real_freed; 816 s64 delta, to_free, total_freed, real_freed;
822 ntfs_inode *ni; 817 ntfs_inode *ni;
@@ -848,8 +843,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
848 843
849 total_freed = real_freed = 0; 844 total_freed = real_freed = 0;
850 845
851 down_read(&ni->runlist.lock); 846 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, write_locked);
852 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE);
853 if (IS_ERR(rl)) { 847 if (IS_ERR(rl)) {
854 if (!is_rollback) 848 if (!is_rollback)
855 ntfs_error(vol->sb, "Failed to find first runlist " 849 ntfs_error(vol->sb, "Failed to find first runlist "
@@ -903,7 +897,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
903 897
904 /* Attempt to map runlist. */ 898 /* Attempt to map runlist. */
905 vcn = rl->vcn; 899 vcn = rl->vcn;
906 rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE); 900 rl = ntfs_attr_find_vcn_nolock(ni, vcn, write_locked);
907 if (IS_ERR(rl)) { 901 if (IS_ERR(rl)) {
908 err = PTR_ERR(rl); 902 err = PTR_ERR(rl);
909 if (!is_rollback) 903 if (!is_rollback)
@@ -950,7 +944,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
950 /* Update the total done clusters. */ 944 /* Update the total done clusters. */
951 total_freed += to_free; 945 total_freed += to_free;
952 } 946 }
953 up_read(&ni->runlist.lock);
954 if (likely(!is_rollback)) 947 if (likely(!is_rollback))
955 up_write(&vol->lcnbmp_lock); 948 up_write(&vol->lcnbmp_lock);
956 949
@@ -960,7 +953,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
960 ntfs_debug("Done."); 953 ntfs_debug("Done.");
961 return real_freed; 954 return real_freed;
962err_out: 955err_out:
963 up_read(&ni->runlist.lock);
964 if (is_rollback) 956 if (is_rollback)
965 return err; 957 return err;
966 /* If no real clusters were freed, no need to rollback. */ 958 /* If no real clusters were freed, no need to rollback. */
@@ -973,7 +965,8 @@ err_out:
973 * If rollback fails, set the volume errors flag, emit an error 965 * If rollback fails, set the volume errors flag, emit an error
974 * message, and return the error code. 966 * message, and return the error code.
975 */ 967 */
976 delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE); 968 delta = __ntfs_cluster_free(vi, start_vcn, total_freed, write_locked,
969 TRUE);
977 if (delta < 0) { 970 if (delta < 0) {
978 ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " 971 ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving "
979 "inconsistent metadata! Unmount and run " 972 "inconsistent metadata! Unmount and run "
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h
index 4cac1c024af6..e4d7fb98d685 100644
--- a/fs/ntfs/lcnalloc.h
+++ b/fs/ntfs/lcnalloc.h
@@ -43,13 +43,14 @@ extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol,
43 const NTFS_CLUSTER_ALLOCATION_ZONES zone); 43 const NTFS_CLUSTER_ALLOCATION_ZONES zone);
44 44
45extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, 45extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
46 s64 count, const BOOL is_rollback); 46 s64 count, const BOOL write_locked, const BOOL is_rollback);
47 47
48/** 48/**
49 * ntfs_cluster_free - free clusters on an ntfs volume 49 * ntfs_cluster_free - free clusters on an ntfs volume
50 * @vi: vfs inode whose runlist describes the clusters to free 50 * @vi: vfs inode whose runlist describes the clusters to free
51 * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters 51 * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters
52 * @count: number of clusters to free or -1 for all clusters 52 * @count: number of clusters to free or -1 for all clusters
53 * @write_locked: true if the runlist is locked for writing
53 * 54 *
54 * Free @count clusters starting at the cluster @start_vcn in the runlist 55 * Free @count clusters starting at the cluster @start_vcn in the runlist
55 * described by the vfs inode @vi. 56 * described by the vfs inode @vi.
@@ -64,19 +65,19 @@ extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
64 * Return the number of deallocated clusters (not counting sparse ones) on 65 * Return the number of deallocated clusters (not counting sparse ones) on
65 * success and -errno on error. 66 * success and -errno on error.
66 * 67 *
67 * Locking: - The runlist described by @vi must be unlocked on entry and is 68 * Locking: - The runlist described by @vi must be locked on entry and is
68 * unlocked on return. 69 * locked on return. Note if the runlist is locked for reading the
69 * - This function takes the runlist lock of @vi for reading and 70 * lock may be dropped and reacquired. Note the runlist may be
70 * sometimes for writing and sometimes modifies the runlist. 71 * modified when needed runlist fragments need to be mapped.
71 * - The volume lcn bitmap must be unlocked on entry and is unlocked 72 * - The volume lcn bitmap must be unlocked on entry and is unlocked
72 * on return. 73 * on return.
73 * - This function takes the volume lcn bitmap lock for writing and 74 * - This function takes the volume lcn bitmap lock for writing and
74 * modifies the bitmap contents. 75 * modifies the bitmap contents.
75 */ 76 */
76static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn, 77static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
77 s64 count) 78 s64 count, const BOOL write_locked)
78{ 79{
79 return __ntfs_cluster_free(vi, start_vcn, count, FALSE); 80 return __ntfs_cluster_free(vi, start_vcn, count, write_locked, FALSE);
80} 81}
81 82
82extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, 83extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
@@ -93,8 +94,10 @@ extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
93 * 94 *
94 * Return 0 on success and -errno on error. 95 * Return 0 on success and -errno on error.
95 * 96 *
96 * Locking: This function takes the volume lcn bitmap lock for writing and 97 * Locking: - This function takes the volume lcn bitmap lock for writing and
97 * modifies the bitmap contents. 98 * modifies the bitmap contents.
99 * - The caller must have locked the runlist @rl for reading or
100 * writing.
98 */ 101 */
99static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol, 102static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol,
100 const runlist_element *rl) 103 const runlist_element *rl)
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 8edb8e20fb08..0173e95500d9 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -121,7 +121,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
121 */ 121 */
122 if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) { 122 if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) {
123 ntfs_error(vi->i_sb, "$LogFile restart page is not modified " 123 ntfs_error(vi->i_sb, "$LogFile restart page is not modified "
124 "chkdsk but a chkdsk LSN is specified."); 124 "by chkdsk but a chkdsk LSN is specified.");
125 return FALSE; 125 return FALSE;
126 } 126 }
127 ntfs_debug("Done."); 127 ntfs_debug("Done.");
@@ -312,10 +312,12 @@ err_out:
312 * @vi: $LogFile inode to which the restart page belongs 312 * @vi: $LogFile inode to which the restart page belongs
313 * @rp: restart page to check 313 * @rp: restart page to check
314 * @pos: position in @vi at which the restart page resides 314 * @pos: position in @vi at which the restart page resides
315 * @wrp: copy of the multi sector transfer deprotected restart page 315 * @wrp: [OUT] copy of the multi sector transfer deprotected restart page
316 * @lsn: [OUT] set to the current logfile lsn on success
316 * 317 *
317 * Check the restart page @rp for consistency and return TRUE if it is 318 * Check the restart page @rp for consistency and return 0 if it is consistent
318 * consistent and FALSE otherwise. 319 * and -errno otherwise. The restart page may have been modified by chkdsk in
320 * which case its magic is CHKD instead of RSTR.
319 * 321 *
320 * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not 322 * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not
321 * require the full restart page. 323 * require the full restart page.
@@ -323,25 +325,33 @@ err_out:
323 * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a 325 * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a
324 * copy of the complete multi sector transfer deprotected page. On failure, 326 * copy of the complete multi sector transfer deprotected page. On failure,
325 * *@wrp is undefined. 327 * *@wrp is undefined.
328 *
329 * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current
330 * logfile lsn according to this restart page. On failure, *@lsn is undefined.
331 *
332 * The following error codes are defined:
333 * -EINVAL - The restart page is inconsistent.
334 * -ENOMEM - Not enough memory to load the restart page.
335 * -EIO - Failed to reading from $LogFile.
326 */ 336 */
327static BOOL ntfs_check_and_load_restart_page(struct inode *vi, 337static int ntfs_check_and_load_restart_page(struct inode *vi,
328 RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp) 338 RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp,
339 LSN *lsn)
329{ 340{
330 RESTART_AREA *ra; 341 RESTART_AREA *ra;
331 RESTART_PAGE_HEADER *trp; 342 RESTART_PAGE_HEADER *trp;
332 int size; 343 int size, err;
333 BOOL ret;
334 344
335 ntfs_debug("Entering."); 345 ntfs_debug("Entering.");
336 /* Check the restart page header for consistency. */ 346 /* Check the restart page header for consistency. */
337 if (!ntfs_check_restart_page_header(vi, rp, pos)) { 347 if (!ntfs_check_restart_page_header(vi, rp, pos)) {
338 /* Error output already done inside the function. */ 348 /* Error output already done inside the function. */
339 return FALSE; 349 return -EINVAL;
340 } 350 }
341 /* Check the restart area for consistency. */ 351 /* Check the restart area for consistency. */
342 if (!ntfs_check_restart_area(vi, rp)) { 352 if (!ntfs_check_restart_area(vi, rp)) {
343 /* Error output already done inside the function. */ 353 /* Error output already done inside the function. */
344 return FALSE; 354 return -EINVAL;
345 } 355 }
346 ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); 356 ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
347 /* 357 /*
@@ -352,7 +362,7 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
352 if (!trp) { 362 if (!trp) {
353 ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile " 363 ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile "
354 "restart page buffer."); 364 "restart page buffer.");
355 return FALSE; 365 return -ENOMEM;
356 } 366 }
357 /* 367 /*
358 * Read the whole of the restart page into the buffer. If it fits 368 * Read the whole of the restart page into the buffer. If it fits
@@ -379,6 +389,9 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
379 if (IS_ERR(page)) { 389 if (IS_ERR(page)) {
380 ntfs_error(vi->i_sb, "Error mapping $LogFile " 390 ntfs_error(vi->i_sb, "Error mapping $LogFile "
381 "page (index %lu).", idx); 391 "page (index %lu).", idx);
392 err = PTR_ERR(page);
393 if (err != -EIO && err != -ENOMEM)
394 err = -EIO;
382 goto err_out; 395 goto err_out;
383 } 396 }
384 size = min_t(int, to_read, PAGE_CACHE_SIZE); 397 size = min_t(int, to_read, PAGE_CACHE_SIZE);
@@ -392,29 +405,57 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
392 /* Perform the multi sector transfer deprotection on the buffer. */ 405 /* Perform the multi sector transfer deprotection on the buffer. */
393 if (post_read_mst_fixup((NTFS_RECORD*)trp, 406 if (post_read_mst_fixup((NTFS_RECORD*)trp,
394 le32_to_cpu(rp->system_page_size))) { 407 le32_to_cpu(rp->system_page_size))) {
395 ntfs_error(vi->i_sb, "Multi sector transfer error detected in " 408 /*
396 "$LogFile restart page."); 409 * A multi sector tranfer error was detected. We only need to
397 goto err_out; 410 * abort if the restart page contents exceed the multi sector
411 * transfer fixup of the first sector.
412 */
413 if (le16_to_cpu(rp->restart_area_offset) +
414 le16_to_cpu(ra->restart_area_length) >
415 NTFS_BLOCK_SIZE - sizeof(u16)) {
416 ntfs_error(vi->i_sb, "Multi sector transfer error "
417 "detected in $LogFile restart page.");
418 err = -EINVAL;
419 goto err_out;
420 }
421 }
422 /*
423 * If the restart page is modified by chkdsk or there are no active
424 * logfile clients, the logfile is consistent. Otherwise, need to
425 * check the log client records for consistency, too.
426 */
427 err = 0;
428 if (ntfs_is_rstr_record(rp->magic) &&
429 ra->client_in_use_list != LOGFILE_NO_CLIENT) {
430 if (!ntfs_check_log_client_array(vi, trp)) {
431 err = -EINVAL;
432 goto err_out;
433 }
434 }
435 if (lsn) {
436 if (ntfs_is_rstr_record(rp->magic))
437 *lsn = sle64_to_cpu(ra->current_lsn);
438 else /* if (ntfs_is_chkd_record(rp->magic)) */
439 *lsn = sle64_to_cpu(rp->chkdsk_lsn);
398 } 440 }
399 /* Check the log client records for consistency. */
400 ret = ntfs_check_log_client_array(vi, trp);
401 if (ret && wrp)
402 *wrp = trp;
403 else
404 ntfs_free(trp);
405 ntfs_debug("Done."); 441 ntfs_debug("Done.");
406 return ret; 442 if (wrp)
443 *wrp = trp;
444 else {
407err_out: 445err_out:
408 ntfs_free(trp); 446 ntfs_free(trp);
409 return FALSE; 447 }
448 return err;
410} 449}
411 450
412/** 451/**
413 * ntfs_check_logfile - check the journal for consistency 452 * ntfs_check_logfile - check the journal for consistency
414 * @log_vi: struct inode of loaded journal $LogFile to check 453 * @log_vi: struct inode of loaded journal $LogFile to check
454 * @rp: [OUT] on success this is a copy of the current restart page
415 * 455 *
416 * Check the $LogFile journal for consistency and return TRUE if it is 456 * Check the $LogFile journal for consistency and return TRUE if it is
417 * consistent and FALSE if not. 457 * consistent and FALSE if not. On success, the current restart page is
458 * returned in *@rp. Caller must call ntfs_free(*@rp) when finished with it.
418 * 459 *
419 * At present we only check the two restart pages and ignore the log record 460 * At present we only check the two restart pages and ignore the log record
420 * pages. 461 * pages.
@@ -424,19 +465,18 @@ err_out:
424 * if the $LogFile was created on a system with a different page size to ours 465 * if the $LogFile was created on a system with a different page size to ours
425 * yet and mst deprotection would fail if our page size is smaller. 466 * yet and mst deprotection would fail if our page size is smaller.
426 */ 467 */
427BOOL ntfs_check_logfile(struct inode *log_vi) 468BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
428{ 469{
429 s64 size, pos, rstr1_pos, rstr2_pos; 470 s64 size, pos;
471 LSN rstr1_lsn, rstr2_lsn;
430 ntfs_volume *vol = NTFS_SB(log_vi->i_sb); 472 ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
431 struct address_space *mapping = log_vi->i_mapping; 473 struct address_space *mapping = log_vi->i_mapping;
432 struct page *page = NULL; 474 struct page *page = NULL;
433 u8 *kaddr = NULL; 475 u8 *kaddr = NULL;
434 RESTART_PAGE_HEADER *rstr1_ph = NULL; 476 RESTART_PAGE_HEADER *rstr1_ph = NULL;
435 RESTART_PAGE_HEADER *rstr2_ph = NULL; 477 RESTART_PAGE_HEADER *rstr2_ph = NULL;
436 int log_page_size, log_page_mask, ofs; 478 int log_page_size, log_page_mask, err;
437 BOOL logfile_is_empty = TRUE; 479 BOOL logfile_is_empty = TRUE;
438 BOOL rstr1_found = FALSE;
439 BOOL rstr2_found = FALSE;
440 u8 log_page_bits; 480 u8 log_page_bits;
441 481
442 ntfs_debug("Entering."); 482 ntfs_debug("Entering.");
@@ -491,7 +531,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
491 if (IS_ERR(page)) { 531 if (IS_ERR(page)) {
492 ntfs_error(vol->sb, "Error mapping $LogFile " 532 ntfs_error(vol->sb, "Error mapping $LogFile "
493 "page (index %lu).", idx); 533 "page (index %lu).", idx);
494 return FALSE; 534 goto err_out;
495 } 535 }
496 } 536 }
497 kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK); 537 kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK);
@@ -510,99 +550,95 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
510 */ 550 */
511 if (ntfs_is_rcrd_recordp((le32*)kaddr)) 551 if (ntfs_is_rcrd_recordp((le32*)kaddr))
512 break; 552 break;
513 /* 553 /* If not a (modified by chkdsk) restart page, continue. */
514 * A modified by chkdsk restart page means we cannot handle 554 if (!ntfs_is_rstr_recordp((le32*)kaddr) &&
515 * this log file. 555 !ntfs_is_chkd_recordp((le32*)kaddr)) {
516 */
517 if (ntfs_is_chkd_recordp((le32*)kaddr)) {
518 ntfs_error(vol->sb, "$LogFile has been modified by "
519 "chkdsk. Mount this volume in "
520 "Windows.");
521 goto err_out;
522 }
523 /* If not a restart page, continue. */
524 if (!ntfs_is_rstr_recordp((le32*)kaddr)) {
525 /* Skip to the minimum page size for the next one. */
526 if (!pos) 556 if (!pos)
527 pos = NTFS_BLOCK_SIZE >> 1; 557 pos = NTFS_BLOCK_SIZE >> 1;
528 continue; 558 continue;
529 } 559 }
530 /* We now know we have a restart page. */
531 if (!pos) {
532 rstr1_found = TRUE;
533 rstr1_pos = pos;
534 } else {
535 if (rstr2_found) {
536 ntfs_error(vol->sb, "Found more than two "
537 "restart pages in $LogFile.");
538 goto err_out;
539 }
540 rstr2_found = TRUE;
541 rstr2_pos = pos;
542 }
543 /* 560 /*
544 * Check the restart page for consistency and get a copy of the 561 * Check the (modified by chkdsk) restart page for consistency
545 * complete multi sector transfer deprotected restart page. 562 * and get a copy of the complete multi sector transfer
563 * deprotected restart page.
546 */ 564 */
547 if (!ntfs_check_and_load_restart_page(log_vi, 565 err = ntfs_check_and_load_restart_page(log_vi,
548 (RESTART_PAGE_HEADER*)kaddr, pos, 566 (RESTART_PAGE_HEADER*)kaddr, pos,
549 !pos ? &rstr1_ph : &rstr2_ph)) { 567 !rstr1_ph ? &rstr1_ph : &rstr2_ph,
550 /* Error output already done inside the function. */ 568 !rstr1_ph ? &rstr1_lsn : &rstr2_lsn);
551 goto err_out; 569 if (!err) {
570 /*
571 * If we have now found the first (modified by chkdsk)
572 * restart page, continue looking for the second one.
573 */
574 if (!pos) {
575 pos = NTFS_BLOCK_SIZE >> 1;
576 continue;
577 }
578 /*
579 * We have now found the second (modified by chkdsk)
580 * restart page, so we can stop looking.
581 */
582 break;
552 } 583 }
553 /* 584 /*
554 * We have a valid restart page. The next one must be after 585 * Error output already done inside the function. Note, we do
555 * a whole system page size as specified by the valid restart 586 * not abort if the restart page was invalid as we might still
556 * page. 587 * find a valid one further in the file.
557 */ 588 */
589 if (err != -EINVAL) {
590 ntfs_unmap_page(page);
591 goto err_out;
592 }
593 /* Continue looking. */
558 if (!pos) 594 if (!pos)
559 pos = le32_to_cpu(rstr1_ph->system_page_size) >> 1; 595 pos = NTFS_BLOCK_SIZE >> 1;
560 } 596 }
561 if (page) { 597 if (page)
562 ntfs_unmap_page(page); 598 ntfs_unmap_page(page);
563 page = NULL;
564 }
565 if (logfile_is_empty) { 599 if (logfile_is_empty) {
566 NVolSetLogFileEmpty(vol); 600 NVolSetLogFileEmpty(vol);
567is_empty: 601is_empty:
568 ntfs_debug("Done. ($LogFile is empty.)"); 602 ntfs_debug("Done. ($LogFile is empty.)");
569 return TRUE; 603 return TRUE;
570 } 604 }
571 if (!rstr1_found || !rstr2_found) { 605 if (!rstr1_ph) {
572 ntfs_error(vol->sb, "Did not find two restart pages in " 606 BUG_ON(rstr2_ph);
573 "$LogFile."); 607 ntfs_error(vol->sb, "Did not find any restart pages in "
574 goto err_out; 608 "$LogFile and it was not empty.");
609 return FALSE;
610 }
611 /* If both restart pages were found, use the more recent one. */
612 if (rstr2_ph) {
613 /*
614 * If the second restart area is more recent, switch to it.
615 * Otherwise just throw it away.
616 */
617 if (rstr2_lsn > rstr1_lsn) {
618 ntfs_free(rstr1_ph);
619 rstr1_ph = rstr2_ph;
620 /* rstr1_lsn = rstr2_lsn; */
621 } else
622 ntfs_free(rstr2_ph);
623 rstr2_ph = NULL;
575 } 624 }
576 /*
577 * The two restart areas must be identical except for the update
578 * sequence number.
579 */
580 ofs = le16_to_cpu(rstr1_ph->usa_ofs);
581 if (memcmp(rstr1_ph, rstr2_ph, ofs) || (ofs += sizeof(u16),
582 memcmp((u8*)rstr1_ph + ofs, (u8*)rstr2_ph + ofs,
583 le32_to_cpu(rstr1_ph->system_page_size) - ofs))) {
584 ntfs_error(vol->sb, "The two restart pages in $LogFile do not "
585 "match.");
586 goto err_out;
587 }
588 ntfs_free(rstr1_ph);
589 ntfs_free(rstr2_ph);
590 /* All consistency checks passed. */ 625 /* All consistency checks passed. */
626 if (rp)
627 *rp = rstr1_ph;
628 else
629 ntfs_free(rstr1_ph);
591 ntfs_debug("Done."); 630 ntfs_debug("Done.");
592 return TRUE; 631 return TRUE;
593err_out: 632err_out:
594 if (page)
595 ntfs_unmap_page(page);
596 if (rstr1_ph) 633 if (rstr1_ph)
597 ntfs_free(rstr1_ph); 634 ntfs_free(rstr1_ph);
598 if (rstr2_ph)
599 ntfs_free(rstr2_ph);
600 return FALSE; 635 return FALSE;
601} 636}
602 637
603/** 638/**
604 * ntfs_is_logfile_clean - check in the journal if the volume is clean 639 * ntfs_is_logfile_clean - check in the journal if the volume is clean
605 * @log_vi: struct inode of loaded journal $LogFile to check 640 * @log_vi: struct inode of loaded journal $LogFile to check
641 * @rp: copy of the current restart page
606 * 642 *
607 * Analyze the $LogFile journal and return TRUE if it indicates the volume was 643 * Analyze the $LogFile journal and return TRUE if it indicates the volume was
608 * shutdown cleanly and FALSE if not. 644 * shutdown cleanly and FALSE if not.
@@ -619,11 +655,9 @@ err_out:
619 * is empty this function requires that NVolLogFileEmpty() is true otherwise an 655 * is empty this function requires that NVolLogFileEmpty() is true otherwise an
620 * empty volume will be reported as dirty. 656 * empty volume will be reported as dirty.
621 */ 657 */
622BOOL ntfs_is_logfile_clean(struct inode *log_vi) 658BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp)
623{ 659{
624 ntfs_volume *vol = NTFS_SB(log_vi->i_sb); 660 ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
625 struct page *page;
626 RESTART_PAGE_HEADER *rp;
627 RESTART_AREA *ra; 661 RESTART_AREA *ra;
628 662
629 ntfs_debug("Entering."); 663 ntfs_debug("Entering.");
@@ -632,24 +666,15 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi)
632 ntfs_debug("Done. ($LogFile is empty.)"); 666 ntfs_debug("Done. ($LogFile is empty.)");
633 return TRUE; 667 return TRUE;
634 } 668 }
635 /* 669 BUG_ON(!rp);
636 * Read the first restart page. It will be possibly incomplete and 670 if (!ntfs_is_rstr_record(rp->magic) &&
637 * will not be multi sector transfer deprotected but we only need the 671 !ntfs_is_chkd_record(rp->magic)) {
638 * first NTFS_BLOCK_SIZE bytes so it does not matter. 672 ntfs_error(vol->sb, "Restart page buffer is invalid. This is "
639 */ 673 "probably a bug in that the $LogFile should "
640 page = ntfs_map_page(log_vi->i_mapping, 0); 674 "have been consistency checked before calling "
641 if (IS_ERR(page)) { 675 "this function.");
642 ntfs_error(vol->sb, "Error mapping $LogFile page (index 0).");
643 return FALSE; 676 return FALSE;
644 } 677 }
645 rp = (RESTART_PAGE_HEADER*)page_address(page);
646 if (!ntfs_is_rstr_record(rp->magic)) {
647 ntfs_error(vol->sb, "No restart page found at offset zero in "
648 "$LogFile. This is probably a bug in that "
649 "the $LogFile should have been consistency "
650 "checked before calling this function.");
651 goto err_out;
652 }
653 ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); 678 ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
654 /* 679 /*
655 * If the $LogFile has active clients, i.e. it is open, and we do not 680 * If the $LogFile has active clients, i.e. it is open, and we do not
@@ -659,15 +684,11 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi)
659 if (ra->client_in_use_list != LOGFILE_NO_CLIENT && 684 if (ra->client_in_use_list != LOGFILE_NO_CLIENT &&
660 !(ra->flags & RESTART_VOLUME_IS_CLEAN)) { 685 !(ra->flags & RESTART_VOLUME_IS_CLEAN)) {
661 ntfs_debug("Done. $LogFile indicates a dirty shutdown."); 686 ntfs_debug("Done. $LogFile indicates a dirty shutdown.");
662 goto err_out; 687 return FALSE;
663 } 688 }
664 ntfs_unmap_page(page);
665 /* $LogFile indicates a clean shutdown. */ 689 /* $LogFile indicates a clean shutdown. */
666 ntfs_debug("Done. $LogFile indicates a clean shutdown."); 690 ntfs_debug("Done. $LogFile indicates a clean shutdown.");
667 return TRUE; 691 return TRUE;
668err_out:
669 ntfs_unmap_page(page);
670 return FALSE;
671} 692}
672 693
673/** 694/**
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index 4ee4378de061..42388f95ea6d 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -2,7 +2,7 @@
2 * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of 2 * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of
3 * the Linux-NTFS project. 3 * the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2000-2004 Anton Altaparmakov 5 * Copyright (c) 2000-2005 Anton Altaparmakov
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as published 8 * modify it under the terms of the GNU General Public License as published
@@ -296,9 +296,11 @@ typedef struct {
296/* sizeof() = 160 (0xa0) bytes */ 296/* sizeof() = 160 (0xa0) bytes */
297} __attribute__ ((__packed__)) LOG_CLIENT_RECORD; 297} __attribute__ ((__packed__)) LOG_CLIENT_RECORD;
298 298
299extern BOOL ntfs_check_logfile(struct inode *log_vi); 299extern BOOL ntfs_check_logfile(struct inode *log_vi,
300 RESTART_PAGE_HEADER **rp);
300 301
301extern BOOL ntfs_is_logfile_clean(struct inode *log_vi); 302extern BOOL ntfs_is_logfile_clean(struct inode *log_vi,
303 const RESTART_PAGE_HEADER *rp);
302 304
303extern BOOL ntfs_empty_logfile(struct inode *log_vi); 305extern BOOL ntfs_empty_logfile(struct inode *log_vi);
304 306
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index fac5944df6d8..9994e019a3cf 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -27,27 +27,63 @@
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28 28
29/** 29/**
30 * ntfs_malloc_nofs - allocate memory in multiples of pages 30 * __ntfs_malloc - allocate memory in multiples of pages
31 * @size number of bytes to allocate 31 * @size: number of bytes to allocate
32 * @gfp_mask: extra flags for the allocator
33 *
34 * Internal function. You probably want ntfs_malloc_nofs()...
32 * 35 *
33 * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and 36 * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
34 * returns a pointer to the allocated memory. 37 * returns a pointer to the allocated memory.
35 * 38 *
36 * If there was insufficient memory to complete the request, return NULL. 39 * If there was insufficient memory to complete the request, return NULL.
40 * Depending on @gfp_mask the allocation may be guaranteed to succeed.
37 */ 41 */
38static inline void *ntfs_malloc_nofs(unsigned long size) 42static inline void *__ntfs_malloc(unsigned long size,
43 unsigned int __nocast gfp_mask)
39{ 44{
40 if (likely(size <= PAGE_SIZE)) { 45 if (likely(size <= PAGE_SIZE)) {
41 BUG_ON(!size); 46 BUG_ON(!size);
42 /* kmalloc() has per-CPU caches so is faster for now. */ 47 /* kmalloc() has per-CPU caches so is faster for now. */
43 return kmalloc(PAGE_SIZE, GFP_NOFS); 48 return kmalloc(PAGE_SIZE, gfp_mask);
44 /* return (void *)__get_free_page(GFP_NOFS | __GFP_HIGHMEM); */ 49 /* return (void *)__get_free_page(gfp_mask); */
45 } 50 }
46 if (likely(size >> PAGE_SHIFT < num_physpages)) 51 if (likely(size >> PAGE_SHIFT < num_physpages))
47 return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL); 52 return __vmalloc(size, gfp_mask, PAGE_KERNEL);
48 return NULL; 53 return NULL;
49} 54}
50 55
56/**
57 * ntfs_malloc_nofs - allocate memory in multiples of pages
58 * @size: number of bytes to allocate
59 *
60 * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
61 * returns a pointer to the allocated memory.
62 *
63 * If there was insufficient memory to complete the request, return NULL.
64 */
65static inline void *ntfs_malloc_nofs(unsigned long size)
66{
67 return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM);
68}
69
70/**
71 * ntfs_malloc_nofs_nofail - allocate memory in multiples of pages
72 * @size: number of bytes to allocate
73 *
74 * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
75 * returns a pointer to the allocated memory.
76 *
77 * This function guarantees that the allocation will succeed. It will sleep
78 * for as long as it takes to complete the allocation.
79 *
80 * If there was insufficient memory to complete the request, return NULL.
81 */
82static inline void *ntfs_malloc_nofs_nofail(unsigned long size)
83{
84 return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOFAIL);
85}
86
51static inline void ntfs_free(void *addr) 87static inline void ntfs_free(void *addr)
52{ 88{
53 if (likely(((unsigned long)addr < VMALLOC_START) || 89 if (likely(((unsigned long)addr < VMALLOC_START) ||
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 317f7c679fd3..2c32b84385a8 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -511,7 +511,6 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
511 } while (bh); 511 } while (bh);
512 tail->b_this_page = head; 512 tail->b_this_page = head;
513 attach_page_buffers(page, head); 513 attach_page_buffers(page, head);
514 BUG_ON(!page_has_buffers(page));
515 } 514 }
516 bh = head = page_buffers(page); 515 bh = head = page_buffers(page);
517 BUG_ON(!bh); 516 BUG_ON(!bh);
@@ -692,7 +691,6 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
692 */ 691 */
693 if (!NInoTestClearDirty(ni)) 692 if (!NInoTestClearDirty(ni))
694 goto done; 693 goto done;
695 BUG_ON(!page_has_buffers(page));
696 bh = head = page_buffers(page); 694 bh = head = page_buffers(page);
697 BUG_ON(!bh); 695 BUG_ON(!bh);
698 rl = NULL; 696 rl = NULL;
@@ -1955,7 +1953,7 @@ restore_undo_alloc:
1955 a = ctx->attr; 1953 a = ctx->attr;
1956 a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); 1954 a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1);
1957undo_alloc: 1955undo_alloc:
1958 if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1) < 0) { 1956 if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1, TRUE) < 0) {
1959 ntfs_error(vol->sb, "Failed to free clusters from mft data " 1957 ntfs_error(vol->sb, "Failed to free clusters from mft data "
1960 "attribute.%s", es); 1958 "attribute.%s", es);
1961 NVolSetErrors(vol); 1959 NVolSetErrors(vol);
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 758855b0414e..f5b2ac929081 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -35,7 +35,7 @@ static inline void ntfs_rl_mm(runlist_element *base, int dst, int src,
35 int size) 35 int size)
36{ 36{
37 if (likely((dst != src) && (size > 0))) 37 if (likely((dst != src) && (size > 0)))
38 memmove(base + dst, base + src, size * sizeof (*base)); 38 memmove(base + dst, base + src, size * sizeof(*base));
39} 39}
40 40
41/** 41/**
@@ -95,6 +95,51 @@ static inline runlist_element *ntfs_rl_realloc(runlist_element *rl,
95} 95}
96 96
97/** 97/**
98 * ntfs_rl_realloc_nofail - Reallocate memory for runlists
99 * @rl: original runlist
100 * @old_size: number of runlist elements in the original runlist @rl
101 * @new_size: number of runlist elements we need space for
102 *
103 * As the runlists grow, more memory will be required. To prevent the
104 * kernel having to allocate and reallocate large numbers of small bits of
105 * memory, this function returns an entire page of memory.
106 *
107 * This function guarantees that the allocation will succeed. It will sleep
108 * for as long as it takes to complete the allocation.
109 *
110 * It is up to the caller to serialize access to the runlist @rl.
111 *
112 * N.B. If the new allocation doesn't require a different number of pages in
113 * memory, the function will return the original pointer.
114 *
115 * On success, return a pointer to the newly allocated, or recycled, memory.
116 * On error, return -errno. The following error codes are defined:
117 * -ENOMEM - Not enough memory to allocate runlist array.
118 * -EINVAL - Invalid parameters were passed in.
119 */
120static inline runlist_element *ntfs_rl_realloc_nofail(runlist_element *rl,
121 int old_size, int new_size)
122{
123 runlist_element *new_rl;
124
125 old_size = PAGE_ALIGN(old_size * sizeof(*rl));
126 new_size = PAGE_ALIGN(new_size * sizeof(*rl));
127 if (old_size == new_size)
128 return rl;
129
130 new_rl = ntfs_malloc_nofs_nofail(new_size);
131 BUG_ON(!new_rl);
132
133 if (likely(rl != NULL)) {
134 if (unlikely(old_size > new_size))
135 old_size = new_size;
136 memcpy(new_rl, rl, old_size);
137 ntfs_free(rl);
138 }
139 return new_rl;
140}
141
142/**
98 * ntfs_are_rl_mergeable - test if two runlists can be joined together 143 * ntfs_are_rl_mergeable - test if two runlists can be joined together
99 * @dst: original runlist 144 * @dst: original runlist
100 * @src: new runlist to test for mergeability with @dst 145 * @src: new runlist to test for mergeability with @dst
@@ -497,6 +542,7 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
497 /* Scan to the end of the source runlist. */ 542 /* Scan to the end of the source runlist. */
498 for (dend = 0; likely(drl[dend].length); dend++) 543 for (dend = 0; likely(drl[dend].length); dend++)
499 ; 544 ;
545 dend++;
500 drl = ntfs_rl_realloc(drl, dend, dend + 1); 546 drl = ntfs_rl_realloc(drl, dend, dend + 1);
501 if (IS_ERR(drl)) 547 if (IS_ERR(drl))
502 return drl; 548 return drl;
@@ -566,8 +612,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
566 ((drl[dins].vcn + drl[dins].length) <= /* End of hole */ 612 ((drl[dins].vcn + drl[dins].length) <= /* End of hole */
567 (srl[send - 1].vcn + srl[send - 1].length))); 613 (srl[send - 1].vcn + srl[send - 1].length)));
568 614
569 /* Or we'll lose an end marker */ 615 /* Or we will lose an end marker. */
570 if (start && finish && (drl[dins].length == 0)) 616 if (finish && !drl[dins].length)
571 ss++; 617 ss++;
572 if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn)) 618 if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn))
573 finish = FALSE; 619 finish = FALSE;
@@ -621,11 +667,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
621 if (drl[ds].lcn != LCN_RL_NOT_MAPPED) { 667 if (drl[ds].lcn != LCN_RL_NOT_MAPPED) {
622 /* Add an unmapped runlist element. */ 668 /* Add an unmapped runlist element. */
623 if (!slots) { 669 if (!slots) {
624 /* FIXME/TODO: We need to have the 670 drl = ntfs_rl_realloc_nofail(drl, ds,
625 * extra memory already! (AIA) */ 671 ds + 2);
626 drl = ntfs_rl_realloc(drl, ds, ds + 2);
627 if (!drl)
628 goto critical_error;
629 slots = 2; 672 slots = 2;
630 } 673 }
631 ds++; 674 ds++;
@@ -640,13 +683,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
640 drl[ds].length = marker_vcn - drl[ds].vcn; 683 drl[ds].length = marker_vcn - drl[ds].vcn;
641 /* Finally add the ENOENT terminator. */ 684 /* Finally add the ENOENT terminator. */
642 ds++; 685 ds++;
643 if (!slots) { 686 if (!slots)
644 /* FIXME/TODO: We need to have the extra 687 drl = ntfs_rl_realloc_nofail(drl, ds, ds + 1);
645 * memory already! (AIA) */
646 drl = ntfs_rl_realloc(drl, ds, ds + 1);
647 if (!drl)
648 goto critical_error;
649 }
650 drl[ds].vcn = marker_vcn; 688 drl[ds].vcn = marker_vcn;
651 drl[ds].lcn = LCN_ENOENT; 689 drl[ds].lcn = LCN_ENOENT;
652 drl[ds].length = (s64)0; 690 drl[ds].length = (s64)0;
@@ -659,11 +697,6 @@ finished:
659 ntfs_debug("Merged runlist:"); 697 ntfs_debug("Merged runlist:");
660 ntfs_debug_dump_runlist(drl); 698 ntfs_debug_dump_runlist(drl);
661 return drl; 699 return drl;
662
663critical_error:
664 /* Critical error! We cannot afford to fail here. */
665 ntfs_error(NULL, "Critical error! Not enough memory.");
666 panic("NTFS: Cannot continue.");
667} 700}
668 701
669/** 702/**
@@ -727,6 +760,9 @@ runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol,
727 ntfs_error(vol->sb, "Corrupt attribute."); 760 ntfs_error(vol->sb, "Corrupt attribute.");
728 return ERR_PTR(-EIO); 761 return ERR_PTR(-EIO);
729 } 762 }
763 /* If the mapping pairs array is valid but empty, nothing to do. */
764 if (!vcn && !*buf)
765 return old_rl;
730 /* Current position in runlist array. */ 766 /* Current position in runlist array. */
731 rlpos = 0; 767 rlpos = 0;
732 /* Allocate first page and set current runlist size to one page. */ 768 /* Allocate first page and set current runlist size to one page. */
@@ -1419,6 +1455,7 @@ err_out:
1419 1455
1420/** 1456/**
1421 * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn 1457 * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn
1458 * @vol: ntfs volume (needed for error output)
1422 * @runlist: runlist to truncate 1459 * @runlist: runlist to truncate
1423 * @new_length: the new length of the runlist in VCNs 1460 * @new_length: the new length of the runlist in VCNs
1424 * 1461 *
@@ -1426,12 +1463,16 @@ err_out:
1426 * holding the runlist elements to a length of @new_length VCNs. 1463 * holding the runlist elements to a length of @new_length VCNs.
1427 * 1464 *
1428 * If @new_length lies within the runlist, the runlist elements with VCNs of 1465 * If @new_length lies within the runlist, the runlist elements with VCNs of
1429 * @new_length and above are discarded. 1466 * @new_length and above are discarded. As a special case if @new_length is
1467 * zero, the runlist is discarded and set to NULL.
1430 * 1468 *
1431 * If @new_length lies beyond the runlist, a sparse runlist element is added to 1469 * If @new_length lies beyond the runlist, a sparse runlist element is added to
1432 * the end of the runlist @runlist or if the last runlist element is a sparse 1470 * the end of the runlist @runlist or if the last runlist element is a sparse
1433 * one already, this is extended. 1471 * one already, this is extended.
1434 * 1472 *
1473 * Note, no checking is done for unmapped runlist elements. It is assumed that
1474 * the caller has mapped any elements that need to be mapped already.
1475 *
1435 * Return 0 on success and -errno on error. 1476 * Return 0 on success and -errno on error.
1436 * 1477 *
1437 * Locking: The caller must hold @runlist->lock for writing. 1478 * Locking: The caller must hold @runlist->lock for writing.
@@ -1446,6 +1487,13 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
1446 BUG_ON(!runlist); 1487 BUG_ON(!runlist);
1447 BUG_ON(new_length < 0); 1488 BUG_ON(new_length < 0);
1448 rl = runlist->rl; 1489 rl = runlist->rl;
1490 if (!new_length) {
1491 ntfs_debug("Freeing runlist.");
1492 runlist->rl = NULL;
1493 if (rl)
1494 ntfs_free(rl);
1495 return 0;
1496 }
1449 if (unlikely(!rl)) { 1497 if (unlikely(!rl)) {
1450 /* 1498 /*
1451 * Create a runlist consisting of a sparse runlist element of 1499 * Create a runlist consisting of a sparse runlist element of
@@ -1553,4 +1601,288 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
1553 return 0; 1601 return 0;
1554} 1602}
1555 1603
1604/**
1605 * ntfs_rl_punch_nolock - punch a hole into a runlist
1606 * @vol: ntfs volume (needed for error output)
1607 * @runlist: runlist to punch a hole into
1608 * @start: starting VCN of the hole to be created
1609 * @length: size of the hole to be created in units of clusters
1610 *
1611 * Punch a hole into the runlist @runlist starting at VCN @start and of size
1612 * @length clusters.
1613 *
1614 * Return 0 on success and -errno on error, in which case @runlist has not been
1615 * modified.
1616 *
1617 * If @start and/or @start + @length are outside the runlist return error code
1618 * -ENOENT.
1619 *
1620 * If the runlist contains unmapped or error elements between @start and @start
1621 * + @length return error code -EINVAL.
1622 *
1623 * Locking: The caller must hold @runlist->lock for writing.
1624 */
1625int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
1626 const VCN start, const s64 length)
1627{
1628 const VCN end = start + length;
1629 s64 delta;
1630 runlist_element *rl, *rl_end, *rl_real_end, *trl;
1631 int old_size;
1632 BOOL lcn_fixup = FALSE;
1633
1634 ntfs_debug("Entering for start 0x%llx, length 0x%llx.",
1635 (long long)start, (long long)length);
1636 BUG_ON(!runlist);
1637 BUG_ON(start < 0);
1638 BUG_ON(length < 0);
1639 BUG_ON(end < 0);
1640 rl = runlist->rl;
1641 if (unlikely(!rl)) {
1642 if (likely(!start && !length))
1643 return 0;
1644 return -ENOENT;
1645 }
1646 /* Find @start in the runlist. */
1647 while (likely(rl->length && start >= rl[1].vcn))
1648 rl++;
1649 rl_end = rl;
1650 /* Find @end in the runlist. */
1651 while (likely(rl_end->length && end >= rl_end[1].vcn)) {
1652 /* Verify there are no unmapped or error elements. */
1653 if (unlikely(rl_end->lcn < LCN_HOLE))
1654 return -EINVAL;
1655 rl_end++;
1656 }
1657 /* Check the last element. */
1658 if (unlikely(rl_end->length && rl_end->lcn < LCN_HOLE))
1659 return -EINVAL;
1660 /* This covers @start being out of bounds, too. */
1661 if (!rl_end->length && end > rl_end->vcn)
1662 return -ENOENT;
1663 if (!length)
1664 return 0;
1665 if (!rl->length)
1666 return -ENOENT;
1667 rl_real_end = rl_end;
1668 /* Determine the runlist size. */
1669 while (likely(rl_real_end->length))
1670 rl_real_end++;
1671 old_size = rl_real_end - runlist->rl + 1;
1672 /* If @start is in a hole simply extend the hole. */
1673 if (rl->lcn == LCN_HOLE) {
1674 /*
1675 * If both @start and @end are in the same sparse run, we are
1676 * done.
1677 */
1678 if (end <= rl[1].vcn) {
1679 ntfs_debug("Done (requested hole is already sparse).");
1680 return 0;
1681 }
1682extend_hole:
1683 /* Extend the hole. */
1684 rl->length = end - rl->vcn;
1685 /* If @end is in a hole, merge it with the current one. */
1686 if (rl_end->lcn == LCN_HOLE) {
1687 rl_end++;
1688 rl->length = rl_end->vcn - rl->vcn;
1689 }
1690 /* We have done the hole. Now deal with the remaining tail. */
1691 rl++;
1692 /* Cut out all runlist elements up to @end. */
1693 if (rl < rl_end)
1694 memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
1695 sizeof(*rl));
1696 /* Adjust the beginning of the tail if necessary. */
1697 if (end > rl->vcn) {
1698 s64 delta = end - rl->vcn;
1699 rl->vcn = end;
1700 rl->length -= delta;
1701 /* Only adjust the lcn if it is real. */
1702 if (rl->lcn >= 0)
1703 rl->lcn += delta;
1704 }
1705shrink_allocation:
1706 /* Reallocate memory if the allocation changed. */
1707 if (rl < rl_end) {
1708 rl = ntfs_rl_realloc(runlist->rl, old_size,
1709 old_size - (rl_end - rl));
1710 if (IS_ERR(rl))
1711 ntfs_warning(vol->sb, "Failed to shrink "
1712 "runlist buffer. This just "
1713 "wastes a bit of memory "
1714 "temporarily so we ignore it "
1715 "and return success.");
1716 else
1717 runlist->rl = rl;
1718 }
1719 ntfs_debug("Done (extend hole).");
1720 return 0;
1721 }
1722 /*
1723 * If @start is at the beginning of a run things are easier as there is
1724 * no need to split the first run.
1725 */
1726 if (start == rl->vcn) {
1727 /*
1728 * @start is at the beginning of a run.
1729 *
1730 * If the previous run is sparse, extend its hole.
1731 *
1732 * If @end is not in the same run, switch the run to be sparse
1733 * and extend the newly created hole.
1734 *
1735 * Thus both of these cases reduce the problem to the above
1736 * case of "@start is in a hole".
1737 */
1738 if (rl > runlist->rl && (rl - 1)->lcn == LCN_HOLE) {
1739 rl--;
1740 goto extend_hole;
1741 }
1742 if (end >= rl[1].vcn) {
1743 rl->lcn = LCN_HOLE;
1744 goto extend_hole;
1745 }
1746 /*
1747 * The final case is when @end is in the same run as @start.
1748 * For this need to split the run into two. One run for the
1749 * sparse region between the beginning of the old run, i.e.
1750 * @start, and @end and one for the remaining non-sparse
1751 * region, i.e. between @end and the end of the old run.
1752 */
1753 trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
1754 if (IS_ERR(trl))
1755 goto enomem_out;
1756 old_size++;
1757 if (runlist->rl != trl) {
1758 rl = trl + (rl - runlist->rl);
1759 rl_end = trl + (rl_end - runlist->rl);
1760 rl_real_end = trl + (rl_real_end - runlist->rl);
1761 runlist->rl = trl;
1762 }
1763split_end:
1764 /* Shift all the runs up by one. */
1765 memmove(rl + 1, rl, (rl_real_end - rl + 1) * sizeof(*rl));
1766 /* Finally, setup the two split runs. */
1767 rl->lcn = LCN_HOLE;
1768 rl->length = length;
1769 rl++;
1770 rl->vcn += length;
1771 /* Only adjust the lcn if it is real. */
1772 if (rl->lcn >= 0 || lcn_fixup)
1773 rl->lcn += length;
1774 rl->length -= length;
1775 ntfs_debug("Done (split one).");
1776 return 0;
1777 }
1778 /*
1779 * @start is neither in a hole nor at the beginning of a run.
1780 *
1781 * If @end is in a hole, things are easier as simply truncating the run
1782 * @start is in to end at @start - 1, deleting all runs after that up
1783 * to @end, and finally extending the beginning of the run @end is in
1784 * to be @start is all that is needed.
1785 */
1786 if (rl_end->lcn == LCN_HOLE) {
1787 /* Truncate the run containing @start. */
1788 rl->length = start - rl->vcn;
1789 rl++;
1790 /* Cut out all runlist elements up to @end. */
1791 if (rl < rl_end)
1792 memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
1793 sizeof(*rl));
1794 /* Extend the beginning of the run @end is in to be @start. */
1795 rl->vcn = start;
1796 rl->length = rl[1].vcn - start;
1797 goto shrink_allocation;
1798 }
1799 /*
1800 * If @end is not in a hole there are still two cases to distinguish.
1801 * Either @end is or is not in the same run as @start.
1802 *
1803 * The second case is easier as it can be reduced to an already solved
1804 * problem by truncating the run @start is in to end at @start - 1.
1805 * Then, if @end is in the next run need to split the run into a sparse
1806 * run followed by a non-sparse run (already covered above) and if @end
1807 * is not in the next run switching it to be sparse, again reduces the
1808 * problem to the already covered case of "@start is in a hole".
1809 */
1810 if (end >= rl[1].vcn) {
1811 /*
1812 * If @end is not in the next run, reduce the problem to the
1813 * case of "@start is in a hole".
1814 */
1815 if (rl[1].length && end >= rl[2].vcn) {
1816 /* Truncate the run containing @start. */
1817 rl->length = start - rl->vcn;
1818 rl++;
1819 rl->vcn = start;
1820 rl->lcn = LCN_HOLE;
1821 goto extend_hole;
1822 }
1823 trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
1824 if (IS_ERR(trl))
1825 goto enomem_out;
1826 old_size++;
1827 if (runlist->rl != trl) {
1828 rl = trl + (rl - runlist->rl);
1829 rl_end = trl + (rl_end - runlist->rl);
1830 rl_real_end = trl + (rl_real_end - runlist->rl);
1831 runlist->rl = trl;
1832 }
1833 /* Truncate the run containing @start. */
1834 rl->length = start - rl->vcn;
1835 rl++;
1836 /*
1837 * @end is in the next run, reduce the problem to the case
1838 * where "@start is at the beginning of a run and @end is in
1839 * the same run as @start".
1840 */
1841 delta = rl->vcn - start;
1842 rl->vcn = start;
1843 if (rl->lcn >= 0) {
1844 rl->lcn -= delta;
1845 /* Need this in case the lcn just became negative. */
1846 lcn_fixup = TRUE;
1847 }
1848 rl->length += delta;
1849 goto split_end;
1850 }
1851 /*
1852 * The first case from above, i.e. @end is in the same run as @start.
1853 * We need to split the run into three. One run for the non-sparse
1854 * region between the beginning of the old run and @start, one for the
1855 * sparse region between @start and @end, and one for the remaining
1856 * non-sparse region, i.e. between @end and the end of the old run.
1857 */
1858 trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 2);
1859 if (IS_ERR(trl))
1860 goto enomem_out;
1861 old_size += 2;
1862 if (runlist->rl != trl) {
1863 rl = trl + (rl - runlist->rl);
1864 rl_end = trl + (rl_end - runlist->rl);
1865 rl_real_end = trl + (rl_real_end - runlist->rl);
1866 runlist->rl = trl;
1867 }
1868 /* Shift all the runs up by two. */
1869 memmove(rl + 2, rl, (rl_real_end - rl + 1) * sizeof(*rl));
1870 /* Finally, setup the three split runs. */
1871 rl->length = start - rl->vcn;
1872 rl++;
1873 rl->vcn = start;
1874 rl->lcn = LCN_HOLE;
1875 rl->length = length;
1876 rl++;
1877 delta = end - rl->vcn;
1878 rl->vcn = end;
1879 rl->lcn += delta;
1880 rl->length -= delta;
1881 ntfs_debug("Done (split both).");
1882 return 0;
1883enomem_out:
1884 ntfs_error(vol->sb, "Not enough memory to extend runlist buffer.");
1885 return -ENOMEM;
1886}
1887
1556#endif /* NTFS_RW */ 1888#endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
index aa0ee6540e7c..47728fbb610b 100644
--- a/fs/ntfs/runlist.h
+++ b/fs/ntfs/runlist.h
@@ -94,6 +94,9 @@ extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
94extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol, 94extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol,
95 runlist *const runlist, const s64 new_length); 95 runlist *const runlist, const s64 new_length);
96 96
97int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
98 const VCN start, const s64 length);
99
97#endif /* NTFS_RW */ 100#endif /* NTFS_RW */
98 101
99#endif /* _LINUX_NTFS_RUNLIST_H */ 102#endif /* _LINUX_NTFS_RUNLIST_H */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 41aa8eb6755b..b2b392961268 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1133,7 +1133,8 @@ mft_unmap_out:
1133 * 1133 *
1134 * Return TRUE on success or FALSE on error. 1134 * Return TRUE on success or FALSE on error.
1135 */ 1135 */
1136static BOOL load_and_check_logfile(ntfs_volume *vol) 1136static BOOL load_and_check_logfile(ntfs_volume *vol,
1137 RESTART_PAGE_HEADER **rp)
1137{ 1138{
1138 struct inode *tmp_ino; 1139 struct inode *tmp_ino;
1139 1140
@@ -1145,7 +1146,7 @@ static BOOL load_and_check_logfile(ntfs_volume *vol)
1145 /* Caller will display error message. */ 1146 /* Caller will display error message. */
1146 return FALSE; 1147 return FALSE;
1147 } 1148 }
1148 if (!ntfs_check_logfile(tmp_ino)) { 1149 if (!ntfs_check_logfile(tmp_ino, rp)) {
1149 iput(tmp_ino); 1150 iput(tmp_ino);
1150 /* ntfs_check_logfile() will have displayed error output. */ 1151 /* ntfs_check_logfile() will have displayed error output. */
1151 return FALSE; 1152 return FALSE;
@@ -1689,6 +1690,7 @@ static BOOL load_system_files(ntfs_volume *vol)
1689 VOLUME_INFORMATION *vi; 1690 VOLUME_INFORMATION *vi;
1690 ntfs_attr_search_ctx *ctx; 1691 ntfs_attr_search_ctx *ctx;
1691#ifdef NTFS_RW 1692#ifdef NTFS_RW
1693 RESTART_PAGE_HEADER *rp;
1692 int err; 1694 int err;
1693#endif /* NTFS_RW */ 1695#endif /* NTFS_RW */
1694 1696
@@ -1841,8 +1843,9 @@ get_ctx_vol_failed:
1841 * Get the inode for the logfile, check it and determine if the volume 1843 * Get the inode for the logfile, check it and determine if the volume
1842 * was shutdown cleanly. 1844 * was shutdown cleanly.
1843 */ 1845 */
1844 if (!load_and_check_logfile(vol) || 1846 rp = NULL;
1845 !ntfs_is_logfile_clean(vol->logfile_ino)) { 1847 if (!load_and_check_logfile(vol, &rp) ||
1848 !ntfs_is_logfile_clean(vol->logfile_ino, rp)) {
1846 static const char *es1a = "Failed to load $LogFile"; 1849 static const char *es1a = "Failed to load $LogFile";
1847 static const char *es1b = "$LogFile is not clean"; 1850 static const char *es1b = "$LogFile is not clean";
1848 static const char *es2 = ". Mount in Windows."; 1851 static const char *es2 = ". Mount in Windows.";
@@ -1857,6 +1860,10 @@ get_ctx_vol_failed:
1857 "continue nor on_errors=" 1860 "continue nor on_errors="
1858 "remount-ro was specified%s", 1861 "remount-ro was specified%s",
1859 es1, es2); 1862 es1, es2);
1863 if (vol->logfile_ino) {
1864 BUG_ON(!rp);
1865 ntfs_free(rp);
1866 }
1860 goto iput_logfile_err_out; 1867 goto iput_logfile_err_out;
1861 } 1868 }
1862 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1869 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
@@ -1867,6 +1874,7 @@ get_ctx_vol_failed:
1867 /* This will prevent a read-write remount. */ 1874 /* This will prevent a read-write remount. */
1868 NVolSetErrors(vol); 1875 NVolSetErrors(vol);
1869 } 1876 }
1877 ntfs_free(rp);
1870#endif /* NTFS_RW */ 1878#endif /* NTFS_RW */
1871 /* Get the root directory inode so we can do path lookups. */ 1879 /* Get the root directory inode so we can do path lookups. */
1872 vol->root_ino = ntfs_iget(sb, FILE_root); 1880 vol->root_ino = ntfs_iget(sb, FILE_root);
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 19c42e231b44..a389a5a16c84 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -372,7 +372,8 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
372 return -EINVAL; 372 return -EINVAL;
373conversion_err: 373conversion_err:
374 ntfs_error(vol->sb, "Unicode name contains characters that cannot be " 374 ntfs_error(vol->sb, "Unicode name contains characters that cannot be "
375 "converted to character set %s.", nls->charset); 375 "converted to character set %s. You might want to "
376 "try to use the mount option nls=utf8.", nls->charset);
376 if (ns != *outs) 377 if (ns != *outs)
377 kfree(ns); 378 kfree(ns);
378 if (wc != -ENAMETOOLONG) 379 if (wc != -ENAMETOOLONG)
diff --git a/fs/open.c b/fs/open.c
index 4ee2dcc31c28..2fac58c51910 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -24,6 +24,7 @@
24#include <linux/personality.h> 24#include <linux/personality.h>
25#include <linux/pagemap.h> 25#include <linux/pagemap.h>
26#include <linux/syscalls.h> 26#include <linux/syscalls.h>
27#include <linux/rcupdate.h>
27 28
28#include <asm/unistd.h> 29#include <asm/unistd.h>
29 30
@@ -842,14 +843,16 @@ int get_unused_fd(void)
842{ 843{
843 struct files_struct * files = current->files; 844 struct files_struct * files = current->files;
844 int fd, error; 845 int fd, error;
846 struct fdtable *fdt;
845 847
846 error = -EMFILE; 848 error = -EMFILE;
847 spin_lock(&files->file_lock); 849 spin_lock(&files->file_lock);
848 850
849repeat: 851repeat:
850 fd = find_next_zero_bit(files->open_fds->fds_bits, 852 fdt = files_fdtable(files);
851 files->max_fdset, 853 fd = find_next_zero_bit(fdt->open_fds->fds_bits,
852 files->next_fd); 854 fdt->max_fdset,
855 fdt->next_fd);
853 856
854 /* 857 /*
855 * N.B. For clone tasks sharing a files structure, this test 858 * N.B. For clone tasks sharing a files structure, this test
@@ -872,14 +875,14 @@ repeat:
872 goto repeat; 875 goto repeat;
873 } 876 }
874 877
875 FD_SET(fd, files->open_fds); 878 FD_SET(fd, fdt->open_fds);
876 FD_CLR(fd, files->close_on_exec); 879 FD_CLR(fd, fdt->close_on_exec);
877 files->next_fd = fd + 1; 880 fdt->next_fd = fd + 1;
878#if 1 881#if 1
879 /* Sanity check */ 882 /* Sanity check */
880 if (files->fd[fd] != NULL) { 883 if (fdt->fd[fd] != NULL) {
881 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); 884 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
882 files->fd[fd] = NULL; 885 fdt->fd[fd] = NULL;
883 } 886 }
884#endif 887#endif
885 error = fd; 888 error = fd;
@@ -893,9 +896,10 @@ EXPORT_SYMBOL(get_unused_fd);
893 896
894static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) 897static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
895{ 898{
896 __FD_CLR(fd, files->open_fds); 899 struct fdtable *fdt = files_fdtable(files);
897 if (fd < files->next_fd) 900 __FD_CLR(fd, fdt->open_fds);
898 files->next_fd = fd; 901 if (fd < fdt->next_fd)
902 fdt->next_fd = fd;
899} 903}
900 904
901void fastcall put_unused_fd(unsigned int fd) 905void fastcall put_unused_fd(unsigned int fd)
@@ -924,10 +928,11 @@ EXPORT_SYMBOL(put_unused_fd);
924void fastcall fd_install(unsigned int fd, struct file * file) 928void fastcall fd_install(unsigned int fd, struct file * file)
925{ 929{
926 struct files_struct *files = current->files; 930 struct files_struct *files = current->files;
931 struct fdtable *fdt;
927 spin_lock(&files->file_lock); 932 spin_lock(&files->file_lock);
928 if (unlikely(files->fd[fd] != NULL)) 933 fdt = files_fdtable(files);
929 BUG(); 934 BUG_ON(fdt->fd[fd] != NULL);
930 files->fd[fd] = file; 935 rcu_assign_pointer(fdt->fd[fd], file);
931 spin_unlock(&files->file_lock); 936 spin_unlock(&files->file_lock);
932} 937}
933 938
@@ -1010,15 +1015,17 @@ asmlinkage long sys_close(unsigned int fd)
1010{ 1015{
1011 struct file * filp; 1016 struct file * filp;
1012 struct files_struct *files = current->files; 1017 struct files_struct *files = current->files;
1018 struct fdtable *fdt;
1013 1019
1014 spin_lock(&files->file_lock); 1020 spin_lock(&files->file_lock);
1015 if (fd >= files->max_fds) 1021 fdt = files_fdtable(files);
1022 if (fd >= fdt->max_fds)
1016 goto out_unlock; 1023 goto out_unlock;
1017 filp = files->fd[fd]; 1024 filp = fdt->fd[fd];
1018 if (!filp) 1025 if (!filp)
1019 goto out_unlock; 1026 goto out_unlock;
1020 files->fd[fd] = NULL; 1027 rcu_assign_pointer(fdt->fd[fd], NULL);
1021 FD_CLR(fd, files->close_on_exec); 1028 FD_CLR(fd, fdt->close_on_exec);
1022 __put_unused_fd(files, fd); 1029 __put_unused_fd(files, fd);
1023 spin_unlock(&files->file_lock); 1030 spin_unlock(&files->file_lock);
1024 return filp_close(filp, files); 1031 return filp_close(filp, files);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 37668fe998ad..d88d518d30f6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
159{ 159{
160 struct group_info *group_info; 160 struct group_info *group_info;
161 int g; 161 int g;
162 struct fdtable *fdt = NULL;
162 163
163 read_lock(&tasklist_lock); 164 read_lock(&tasklist_lock);
164 buffer += sprintf(buffer, 165 buffer += sprintf(buffer,
@@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer)
179 p->gid, p->egid, p->sgid, p->fsgid); 180 p->gid, p->egid, p->sgid, p->fsgid);
180 read_unlock(&tasklist_lock); 181 read_unlock(&tasklist_lock);
181 task_lock(p); 182 task_lock(p);
183 if (p->files)
184 fdt = files_fdtable(p->files);
182 buffer += sprintf(buffer, 185 buffer += sprintf(buffer,
183 "FDSize:\t%d\n" 186 "FDSize:\t%d\n"
184 "Groups:\t", 187 "Groups:\t",
185 p->files ? p->files->max_fds : 0); 188 fdt ? fdt->max_fds : 0);
186 189
187 group_info = p->group_info; 190 group_info = p->group_info;
188 get_group_info(group_info); 191 get_group_info(group_info);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 84751f3f52d5..23db452ab428 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -62,6 +62,7 @@
62#include <linux/namespace.h> 62#include <linux/namespace.h>
63#include <linux/mm.h> 63#include <linux/mm.h>
64#include <linux/smp_lock.h> 64#include <linux/smp_lock.h>
65#include <linux/rcupdate.h>
65#include <linux/kallsyms.h> 66#include <linux/kallsyms.h>
66#include <linux/mount.h> 67#include <linux/mount.h>
67#include <linux/security.h> 68#include <linux/security.h>
@@ -283,16 +284,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
283 284
284 files = get_files_struct(task); 285 files = get_files_struct(task);
285 if (files) { 286 if (files) {
286 spin_lock(&files->file_lock); 287 rcu_read_lock();
287 file = fcheck_files(files, fd); 288 file = fcheck_files(files, fd);
288 if (file) { 289 if (file) {
289 *mnt = mntget(file->f_vfsmnt); 290 *mnt = mntget(file->f_vfsmnt);
290 *dentry = dget(file->f_dentry); 291 *dentry = dget(file->f_dentry);
291 spin_unlock(&files->file_lock); 292 rcu_read_unlock();
292 put_files_struct(files); 293 put_files_struct(files);
293 return 0; 294 return 0;
294 } 295 }
295 spin_unlock(&files->file_lock); 296 rcu_read_unlock();
296 put_files_struct(files); 297 put_files_struct(files);
297 } 298 }
298 return -ENOENT; 299 return -ENOENT;
@@ -1039,6 +1040,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1039 int retval; 1040 int retval;
1040 char buf[NUMBUF]; 1041 char buf[NUMBUF];
1041 struct files_struct * files; 1042 struct files_struct * files;
1043 struct fdtable *fdt;
1042 1044
1043 retval = -ENOENT; 1045 retval = -ENOENT;
1044 if (!pid_alive(p)) 1046 if (!pid_alive(p))
@@ -1061,15 +1063,16 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1061 files = get_files_struct(p); 1063 files = get_files_struct(p);
1062 if (!files) 1064 if (!files)
1063 goto out; 1065 goto out;
1064 spin_lock(&files->file_lock); 1066 rcu_read_lock();
1067 fdt = files_fdtable(files);
1065 for (fd = filp->f_pos-2; 1068 for (fd = filp->f_pos-2;
1066 fd < files->max_fds; 1069 fd < fdt->max_fds;
1067 fd++, filp->f_pos++) { 1070 fd++, filp->f_pos++) {
1068 unsigned int i,j; 1071 unsigned int i,j;
1069 1072
1070 if (!fcheck_files(files, fd)) 1073 if (!fcheck_files(files, fd))
1071 continue; 1074 continue;
1072 spin_unlock(&files->file_lock); 1075 rcu_read_unlock();
1073 1076
1074 j = NUMBUF; 1077 j = NUMBUF;
1075 i = fd; 1078 i = fd;
@@ -1081,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1081 1084
1082 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1085 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1083 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1086 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1084 spin_lock(&files->file_lock); 1087 rcu_read_lock();
1085 break; 1088 break;
1086 } 1089 }
1087 spin_lock(&files->file_lock); 1090 rcu_read_lock();
1088 } 1091 }
1089 spin_unlock(&files->file_lock); 1092 rcu_read_unlock();
1090 put_files_struct(files); 1093 put_files_struct(files);
1091 } 1094 }
1092out: 1095out:
@@ -1261,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1261 1264
1262 files = get_files_struct(task); 1265 files = get_files_struct(task);
1263 if (files) { 1266 if (files) {
1264 spin_lock(&files->file_lock); 1267 rcu_read_lock();
1265 if (fcheck_files(files, fd)) { 1268 if (fcheck_files(files, fd)) {
1266 spin_unlock(&files->file_lock); 1269 rcu_read_unlock();
1267 put_files_struct(files); 1270 put_files_struct(files);
1268 if (task_dumpable(task)) { 1271 if (task_dumpable(task)) {
1269 inode->i_uid = task->euid; 1272 inode->i_uid = task->euid;
@@ -1275,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1275 security_task_to_inode(task, inode); 1278 security_task_to_inode(task, inode);
1276 return 1; 1279 return 1;
1277 } 1280 }
1278 spin_unlock(&files->file_lock); 1281 rcu_read_unlock();
1279 put_files_struct(files); 1282 put_files_struct(files);
1280 } 1283 }
1281 d_drop(dentry); 1284 d_drop(dentry);
@@ -1367,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1367 if (!files) 1370 if (!files)
1368 goto out_unlock; 1371 goto out_unlock;
1369 inode->i_mode = S_IFLNK; 1372 inode->i_mode = S_IFLNK;
1370 spin_lock(&files->file_lock); 1373 rcu_read_lock();
1371 file = fcheck_files(files, fd); 1374 file = fcheck_files(files, fd);
1372 if (!file) 1375 if (!file)
1373 goto out_unlock2; 1376 goto out_unlock2;
@@ -1375,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1375 inode->i_mode |= S_IRUSR | S_IXUSR; 1378 inode->i_mode |= S_IRUSR | S_IXUSR;
1376 if (file->f_mode & 2) 1379 if (file->f_mode & 2)
1377 inode->i_mode |= S_IWUSR | S_IXUSR; 1380 inode->i_mode |= S_IWUSR | S_IXUSR;
1378 spin_unlock(&files->file_lock); 1381 rcu_read_unlock();
1379 put_files_struct(files); 1382 put_files_struct(files);
1380 inode->i_op = &proc_pid_link_inode_operations; 1383 inode->i_op = &proc_pid_link_inode_operations;
1381 inode->i_size = 64; 1384 inode->i_size = 64;
@@ -1385,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1385 return NULL; 1388 return NULL;
1386 1389
1387out_unlock2: 1390out_unlock2:
1388 spin_unlock(&files->file_lock); 1391 rcu_read_unlock();
1389 put_files_struct(files); 1392 put_files_struct(files);
1390out_unlock: 1393out_unlock:
1391 iput(inode); 1394 iput(inode);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 133c28685105..effa6c0c467a 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -60,6 +60,8 @@ static void proc_delete_inode(struct inode *inode)
60 struct proc_dir_entry *de; 60 struct proc_dir_entry *de;
61 struct task_struct *tsk; 61 struct task_struct *tsk;
62 62
63 truncate_inode_pages(&inode->i_data, 0);
64
63 /* Let go of any associated process */ 65 /* Let go of any associated process */
64 tsk = PROC_I(inode)->task; 66 tsk = PROC_I(inode)->task;
65 if (tsk) 67 if (tsk)
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b79162a35478..80f32911c0cb 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -63,6 +63,7 @@ int qnx4_sync_inode(struct inode *inode)
63static void qnx4_delete_inode(struct inode *inode) 63static void qnx4_delete_inode(struct inode *inode)
64{ 64{
65 QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); 65 QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
66 truncate_inode_pages(&inode->i_data, 0);
66 inode->i_size = 0; 67 inode->i_size = 0;
67 qnx4_truncate(inode); 68 qnx4_truncate(inode);
68 lock_kernel(); 69 lock_kernel();
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ff291c973a56..1a8a1bf2154d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -33,6 +33,8 @@ void reiserfs_delete_inode(struct inode *inode)
33 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 33 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
34 struct reiserfs_transaction_handle th; 34 struct reiserfs_transaction_handle th;
35 35
36 truncate_inode_pages(&inode->i_data, 0);
37
36 reiserfs_write_lock(inode->i_sb); 38 reiserfs_write_lock(inode->i_sb);
37 39
38 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 40 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
diff --git a/fs/select.c b/fs/select.c
index b80e7eb0ac0d..f10a10317d54 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -22,6 +22,7 @@
22#include <linux/personality.h> /* for STICKY_TIMEOUTS */ 22#include <linux/personality.h> /* for STICKY_TIMEOUTS */
23#include <linux/file.h> 23#include <linux/file.h>
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/rcupdate.h>
25 26
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
27 28
@@ -132,11 +133,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
132 unsigned long *open_fds; 133 unsigned long *open_fds;
133 unsigned long set; 134 unsigned long set;
134 int max; 135 int max;
136 struct fdtable *fdt;
135 137
136 /* handle last in-complete long-word first */ 138 /* handle last in-complete long-word first */
137 set = ~(~0UL << (n & (__NFDBITS-1))); 139 set = ~(~0UL << (n & (__NFDBITS-1)));
138 n /= __NFDBITS; 140 n /= __NFDBITS;
139 open_fds = current->files->open_fds->fds_bits+n; 141 fdt = files_fdtable(current->files);
142 open_fds = fdt->open_fds->fds_bits+n;
140 max = 0; 143 max = 0;
141 if (set) { 144 if (set) {
142 set &= BITS(fds, n); 145 set &= BITS(fds, n);
@@ -183,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
183 int retval, i; 186 int retval, i;
184 long __timeout = *timeout; 187 long __timeout = *timeout;
185 188
186 spin_lock(&current->files->file_lock); 189 rcu_read_lock();
187 retval = max_select_fd(n, fds); 190 retval = max_select_fd(n, fds);
188 spin_unlock(&current->files->file_lock); 191 rcu_read_unlock();
189 192
190 if (retval < 0) 193 if (retval < 0)
191 return retval; 194 return retval;
@@ -299,6 +302,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
299 char *bits; 302 char *bits;
300 long timeout; 303 long timeout;
301 int ret, size, max_fdset; 304 int ret, size, max_fdset;
305 struct fdtable *fdt;
302 306
303 timeout = MAX_SCHEDULE_TIMEOUT; 307 timeout = MAX_SCHEDULE_TIMEOUT;
304 if (tvp) { 308 if (tvp) {
@@ -326,7 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
326 goto out_nofds; 330 goto out_nofds;
327 331
328 /* max_fdset can increase, so grab it once to avoid race */ 332 /* max_fdset can increase, so grab it once to avoid race */
329 max_fdset = current->files->max_fdset; 333 rcu_read_lock();
334 fdt = files_fdtable(current->files);
335 max_fdset = fdt->max_fdset;
336 rcu_read_unlock();
330 if (n > max_fdset) 337 if (n > max_fdset)
331 n = max_fdset; 338 n = max_fdset;
332 339
@@ -464,9 +471,15 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
464 unsigned int i; 471 unsigned int i;
465 struct poll_list *head; 472 struct poll_list *head;
466 struct poll_list *walk; 473 struct poll_list *walk;
474 struct fdtable *fdt;
475 int max_fdset;
467 476
468 /* Do a sanity check on nfds ... */ 477 /* Do a sanity check on nfds ... */
469 if (nfds > current->files->max_fdset && nfds > OPEN_MAX) 478 rcu_read_lock();
479 fdt = files_fdtable(current->files);
480 max_fdset = fdt->max_fdset;
481 rcu_read_unlock();
482 if (nfds > max_fdset && nfds > OPEN_MAX)
470 return -EINVAL; 483 return -EINVAL;
471 484
472 if (timeout) { 485 if (timeout) {
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 4765aaac9fd2..10b994428fef 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -331,6 +331,7 @@ static void
331smb_delete_inode(struct inode *ino) 331smb_delete_inode(struct inode *ino)
332{ 332{
333 DEBUG1("ino=%ld\n", ino->i_ino); 333 DEBUG1("ino=%ld\n", ino->i_ino);
334 truncate_inode_pages(&ino->i_data, 0);
334 lock_kernel(); 335 lock_kernel();
335 if (smb_close(ino)) 336 if (smb_close(ino))
336 PARANOIA("could not close inode %ld\n", ino->i_ino); 337 PARANOIA("could not close inode %ld\n", ino->i_ino);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 0530077d9dd8..fa33eceb0011 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -292,6 +292,7 @@ int sysv_sync_inode(struct inode * inode)
292 292
293static void sysv_delete_inode(struct inode *inode) 293static void sysv_delete_inode(struct inode *inode)
294{ 294{
295 truncate_inode_pages(&inode->i_data, 0);
295 inode->i_size = 0; 296 inode->i_size = 0;
296 sysv_truncate(inode); 297 sysv_truncate(inode);
297 lock_kernel(); 298 lock_kernel();
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 3d68de39fad6..b83890beaaac 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -87,6 +87,8 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
87 */ 87 */
88void udf_delete_inode(struct inode * inode) 88void udf_delete_inode(struct inode * inode)
89{ 89{
90 truncate_inode_pages(&inode->i_data, 0);
91
90 if (is_bad_inode(inode)) 92 if (is_bad_inode(inode))
91 goto no_delete; 93 goto no_delete;
92 94
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 718627ca8b5c..55f4aa16e3fc 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -804,6 +804,7 @@ int ufs_sync_inode (struct inode *inode)
804 804
805void ufs_delete_inode (struct inode * inode) 805void ufs_delete_inode (struct inode * inode)
806{ 806{
807 truncate_inode_pages(&inode->i_data, 0);
807 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ 808 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
808 lock_kernel(); 809 lock_kernel();
809 mark_inode_dirty(inode); 810 mark_inode_dirty(inode);
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index 8e18ff157247..d8c87fa21ad1 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -1,5 +1,5 @@
1# 1#
2# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. 2# Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3# 3#
4# This program is free software; you can redistribute it and/or modify it 4# This program is free software; you can redistribute it and/or modify it
5# under the terms of version 2 of the GNU General Public License as 5# under the terms of version 2 of the GNU General Public License as
@@ -55,7 +55,18 @@ ifeq ($(CONFIG_XFS_TRACE),y)
55endif 55endif
56 56
57obj-$(CONFIG_XFS_FS) += xfs.o 57obj-$(CONFIG_XFS_FS) += xfs.o
58xfs-$(CONFIG_XFS_QUOTA) += quota/ 58
59xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
60 xfs_dquot.o \
61 xfs_dquot_item.o \
62 xfs_trans_dquot.o \
63 xfs_qm_syscalls.o \
64 xfs_qm_bhv.o \
65 xfs_qm.o)
66
67ifeq ($(CONFIG_XFS_QUOTA),y)
68xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
69endif
59 70
60xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o 71xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
61xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o 72xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 3dae14c8c55a..fa8394f9437d 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -170,7 +170,7 @@ ktrace_enter(
170 void *val14, 170 void *val14,
171 void *val15) 171 void *val15)
172{ 172{
173 static lock_t wrap_lock = SPIN_LOCK_UNLOCKED; 173 static DEFINE_SPINLOCK(wrap_lock);
174 unsigned long flags; 174 unsigned long flags;
175 int index; 175 int index;
176 ktrace_entry_t *ktep; 176 ktrace_entry_t *ktep;