diff options
Diffstat (limited to 'fs')
219 files changed, 15415 insertions, 2666 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c new file mode 100644 index 000000000000..e847f504a47c --- /dev/null +++ b/fs/9p/9p.c | |||
@@ -0,0 +1,359 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/9p.c | ||
3 | * | ||
4 | * This file contains functions 9P2000 functions | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to: | ||
21 | * Free Software Foundation | ||
22 | * 51 Franklin Street, Fifth Floor | ||
23 | * Boston, MA 02111-1301 USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/config.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/errno.h> | ||
30 | #include <linux/fs.h> | ||
31 | #include <linux/idr.h> | ||
32 | |||
33 | #include "debug.h" | ||
34 | #include "v9fs.h" | ||
35 | #include "9p.h" | ||
36 | #include "mux.h" | ||
37 | |||
38 | /** | ||
39 | * v9fs_t_version - negotiate protocol parameters with sever | ||
40 | * @v9ses: 9P2000 session information | ||
41 | * @msize: requested max size packet | ||
42 | * @version: requested version.extension string | ||
43 | * @fcall: pointer to response fcall pointer | ||
44 | * | ||
45 | */ | ||
46 | |||
47 | int | ||
48 | v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, | ||
49 | char *version, struct v9fs_fcall **fcall) | ||
50 | { | ||
51 | struct v9fs_fcall msg; | ||
52 | |||
53 | dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version); | ||
54 | msg.id = TVERSION; | ||
55 | msg.params.tversion.msize = msize; | ||
56 | msg.params.tversion.version = version; | ||
57 | |||
58 | return v9fs_mux_rpc(v9ses, &msg, fcall); | ||
59 | } | ||
60 | |||
61 | /** | ||
62 | * v9fs_t_attach - mount the server | ||
63 | * @v9ses: 9P2000 session information | ||
64 | * @uname: user name doing the attach | ||
65 | * @aname: remote name being attached to | ||
66 | * @fid: mount fid to attatch to root node | ||
67 | * @afid: authentication fid (in this case result key) | ||
68 | * @fcall: pointer to response fcall pointer | ||
69 | * | ||
70 | */ | ||
71 | |||
72 | int | ||
73 | v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, | ||
74 | u32 fid, u32 afid, struct v9fs_fcall **fcall) | ||
75 | { | ||
76 | struct v9fs_fcall msg; | ||
77 | |||
78 | dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname, | ||
79 | aname, fid, afid); | ||
80 | msg.id = TATTACH; | ||
81 | msg.params.tattach.fid = fid; | ||
82 | msg.params.tattach.afid = afid; | ||
83 | msg.params.tattach.uname = uname; | ||
84 | msg.params.tattach.aname = aname; | ||
85 | |||
86 | return v9fs_mux_rpc(v9ses, &msg, fcall); | ||
87 | } | ||
88 | |||
89 | /** | ||
90 | * v9fs_t_clunk - release a fid (finish a transaction) | ||
91 | * @v9ses: 9P2000 session information | ||
92 | * @fid: fid to release | ||
93 | * @fcall: pointer to response fcall pointer | ||
94 | * | ||
95 | */ | ||
96 | |||
97 | int | ||
98 | v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, | ||
99 | struct v9fs_fcall **fcall) | ||
100 | { | ||
101 | struct v9fs_fcall msg; | ||
102 | |||
103 | dprintk(DEBUG_9P, "fid %d\n", fid); | ||
104 | msg.id = TCLUNK; | ||
105 | msg.params.tclunk.fid = fid; | ||
106 | |||
107 | return v9fs_mux_rpc(v9ses, &msg, fcall); | ||
108 | } | ||
109 | |||
110 | /** | ||
111 | * v9fs_v9fs_t_flush - flush a pending transaction | ||
112 | * @v9ses: 9P2000 session information | ||
113 | * @tag: tid to release | ||
114 | * | ||
115 | */ | ||
116 | |||
117 | int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag) | ||
118 | { | ||
119 | struct v9fs_fcall msg; | ||
120 | |||
121 | dprintk(DEBUG_9P, "oldtag %d\n", tag); | ||
122 | msg.id = TFLUSH; | ||
123 | msg.params.tflush.oldtag = tag; | ||
124 | return v9fs_mux_rpc(v9ses, &msg, NULL); | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * v9fs_t_stat - read a file's meta-data | ||
129 | * @v9ses: 9P2000 session information | ||
130 | * @fid: fid pointing to file or directory to get info about | ||
131 | * @fcall: pointer to response fcall | ||
132 | * | ||
133 | */ | ||
134 | |||
135 | int | ||
136 | v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall) | ||
137 | { | ||
138 | struct v9fs_fcall msg; | ||
139 | |||
140 | dprintk(DEBUG_9P, "fid %d\n", fid); | ||
141 | if (fcall) | ||
142 | *fcall = NULL; | ||
143 | |||
144 | msg.id = TSTAT; | ||
145 | msg.params.tstat.fid = fid; | ||
146 | return v9fs_mux_rpc(v9ses, &msg, fcall); | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * v9fs_t_wstat - write a file's meta-data | ||
151 | * @v9ses: 9P2000 session information | ||
152 | * @fid: fid pointing to file or directory to write info about | ||
153 | * @stat: metadata | ||
154 | * @fcall: pointer to response fcall | ||
155 | * | ||
156 | */ | ||
157 | |||
158 | int | ||
159 | v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, | ||
160 | struct v9fs_stat *stat, struct v9fs_fcall **fcall) | ||
161 | { | ||
162 | struct v9fs_fcall msg; | ||
163 | |||
164 | dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length); | ||
165 | msg.id = TWSTAT; | ||
166 | msg.params.twstat.fid = fid; | ||
167 | msg.params.twstat.stat = stat; | ||
168 | |||
169 | return v9fs_mux_rpc(v9ses, &msg, fcall); | ||
170 | } | ||
171 | |||
172 | /** | ||
173 | * v9fs_t_walk - walk a fid to a new file or directory | ||
174 | * @v9ses: 9P2000 session information | ||
175 | * @fid: fid to walk | ||
176 | * @newfid: new fid (for clone operations) | ||
177 | * @name: path to walk fid to | ||
178 | * @fcall: pointer to response fcall | ||
179 | * | ||
180 | */ | ||
181 | |||
182 | /* TODO: support multiple walk */ | ||
183 | |||
184 | int | ||
185 | v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, | ||
186 | char *name, struct v9fs_fcall **fcall) | ||
187 | { | ||
188 | struct v9fs_fcall msg; | ||
189 | |||
190 | dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name); | ||
191 | msg.id = TWALK; | ||
192 | msg.params.twalk.fid = fid; | ||
193 | msg.params.twalk.newfid = newfid; | ||
194 | |||
195 | if (name) { | ||
196 | msg.params.twalk.nwname = 1; | ||
197 | msg.params.twalk.wnames = &name; | ||
198 | } else { | ||
199 | msg.params.twalk.nwname = 0; | ||
200 | } | ||
201 | |||
202 | return v9fs_mux_rpc(v9ses, &msg, fcall); | ||
203 | } | ||
204 | |||
205 | /** | ||
206 | * v9fs_t_open - open a file | ||
207 | * | ||
208 | * @v9ses - 9P2000 session information | ||
209 | * @fid - fid to open | ||
210 | * @mode - mode to open file (R, RW, etc) | ||
211 | * @fcall - pointer to response fcall | ||
212 | * | ||
213 | */ | ||
214 | |||
215 | int | ||
216 | v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, | ||
217 | struct v9fs_fcall **fcall) | ||
218 | { | ||
219 | struct v9fs_fcall msg; | ||
220 | long errorno = -1; | ||
221 | |||
222 | dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode); | ||
223 | msg.id = TOPEN; | ||
224 | msg.params.topen.fid = fid; | ||
225 | msg.params.topen.mode = mode; | ||
226 | |||
227 | errorno = v9fs_mux_rpc(v9ses, &msg, fcall); | ||
228 | |||
229 | return errorno; | ||
230 | } | ||
231 | |||
232 | /** | ||
233 | * v9fs_t_remove - remove a file or directory | ||
234 | * @v9ses: 9P2000 session information | ||
235 | * @fid: fid to remove | ||
236 | * @fcall: pointer to response fcall | ||
237 | * | ||
238 | */ | ||
239 | |||
240 | int | ||
241 | v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, | ||
242 | struct v9fs_fcall **fcall) | ||
243 | { | ||
244 | struct v9fs_fcall msg; | ||
245 | |||
246 | dprintk(DEBUG_9P, "fid %d\n", fid); | ||
247 | msg.id = TREMOVE; | ||
248 | msg.params.tremove.fid = fid; | ||
249 | return v9fs_mux_rpc(v9ses, &msg, fcall); | ||
250 | } | ||
251 | |||
252 | /** | ||
253 | * v9fs_t_create - create a file or directory | ||
254 | * @v9ses: 9P2000 session information | ||
255 | * @fid: fid to create | ||
256 | * @name: name of the file or directory to create | ||
257 | * @perm: permissions to create with | ||
258 | * @mode: mode to open file (R, RW, etc) | ||
259 | * @fcall: pointer to response fcall | ||
260 | * | ||
261 | */ | ||
262 | |||
263 | int | ||
264 | v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, | ||
265 | u32 perm, u8 mode, struct v9fs_fcall **fcall) | ||
266 | { | ||
267 | struct v9fs_fcall msg; | ||
268 | |||
269 | dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n", | ||
270 | fid, name, perm, mode); | ||
271 | |||
272 | msg.id = TCREATE; | ||
273 | msg.params.tcreate.fid = fid; | ||
274 | msg.params.tcreate.name = name; | ||
275 | msg.params.tcreate.perm = perm; | ||
276 | msg.params.tcreate.mode = mode; | ||
277 | |||
278 | return v9fs_mux_rpc(v9ses, &msg, fcall); | ||
279 | } | ||
280 | |||
281 | /** | ||
282 | * v9fs_t_read - read data | ||
283 | * @v9ses: 9P2000 session information | ||
284 | * @fid: fid to read from | ||
285 | * @offset: offset to start read at | ||
286 | * @count: how many bytes to read | ||
287 | * @fcall: pointer to response fcall (with data) | ||
288 | * | ||
289 | */ | ||
290 | |||
291 | int | ||
292 | v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, | ||
293 | u32 count, struct v9fs_fcall **fcall) | ||
294 | { | ||
295 | struct v9fs_fcall msg; | ||
296 | struct v9fs_fcall *rc = NULL; | ||
297 | long errorno = -1; | ||
298 | |||
299 | dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid, | ||
300 | (long unsigned int)offset, count); | ||
301 | msg.id = TREAD; | ||
302 | msg.params.tread.fid = fid; | ||
303 | msg.params.tread.offset = offset; | ||
304 | msg.params.tread.count = count; | ||
305 | errorno = v9fs_mux_rpc(v9ses, &msg, &rc); | ||
306 | |||
307 | if (!errorno) { | ||
308 | errorno = rc->params.rread.count; | ||
309 | dump_data(rc->params.rread.data, rc->params.rread.count); | ||
310 | } | ||
311 | |||
312 | if (fcall) | ||
313 | *fcall = rc; | ||
314 | else | ||
315 | kfree(rc); | ||
316 | |||
317 | return errorno; | ||
318 | } | ||
319 | |||
320 | /** | ||
321 | * v9fs_t_write - write data | ||
322 | * @v9ses: 9P2000 session information | ||
323 | * @fid: fid to write to | ||
324 | * @offset: offset to start write at | ||
325 | * @count: how many bytes to write | ||
326 | * @fcall: pointer to response fcall | ||
327 | * | ||
328 | */ | ||
329 | |||
330 | int | ||
331 | v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, | ||
332 | u64 offset, u32 count, void *data, struct v9fs_fcall **fcall) | ||
333 | { | ||
334 | struct v9fs_fcall msg; | ||
335 | struct v9fs_fcall *rc = NULL; | ||
336 | long errorno = -1; | ||
337 | |||
338 | dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid, | ||
339 | (unsigned long long)offset, count); | ||
340 | dump_data(data, count); | ||
341 | |||
342 | msg.id = TWRITE; | ||
343 | msg.params.twrite.fid = fid; | ||
344 | msg.params.twrite.offset = offset; | ||
345 | msg.params.twrite.count = count; | ||
346 | msg.params.twrite.data = data; | ||
347 | |||
348 | errorno = v9fs_mux_rpc(v9ses, &msg, &rc); | ||
349 | |||
350 | if (!errorno) | ||
351 | errorno = rc->params.rwrite.count; | ||
352 | |||
353 | if (fcall) | ||
354 | *fcall = rc; | ||
355 | else | ||
356 | kfree(rc); | ||
357 | |||
358 | return errorno; | ||
359 | } | ||
diff --git a/fs/9p/9p.h b/fs/9p/9p.h new file mode 100644 index 000000000000..f55424216be2 --- /dev/null +++ b/fs/9p/9p.h | |||
@@ -0,0 +1,341 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/9p.h | ||
3 | * | ||
4 | * 9P protocol definitions. | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to: | ||
21 | * Free Software Foundation | ||
22 | * 51 Franklin Street, Fifth Floor | ||
23 | * Boston, MA 02111-1301 USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | /* Message Types */ | ||
28 | enum { | ||
29 | TVERSION = 100, | ||
30 | RVERSION, | ||
31 | TAUTH = 102, | ||
32 | RAUTH, | ||
33 | TATTACH = 104, | ||
34 | RATTACH, | ||
35 | TERROR = 106, | ||
36 | RERROR, | ||
37 | TFLUSH = 108, | ||
38 | RFLUSH, | ||
39 | TWALK = 110, | ||
40 | RWALK, | ||
41 | TOPEN = 112, | ||
42 | ROPEN, | ||
43 | TCREATE = 114, | ||
44 | RCREATE, | ||
45 | TREAD = 116, | ||
46 | RREAD, | ||
47 | TWRITE = 118, | ||
48 | RWRITE, | ||
49 | TCLUNK = 120, | ||
50 | RCLUNK, | ||
51 | TREMOVE = 122, | ||
52 | RREMOVE, | ||
53 | TSTAT = 124, | ||
54 | RSTAT, | ||
55 | TWSTAT = 126, | ||
56 | RWSTAT, | ||
57 | }; | ||
58 | |||
59 | /* modes */ | ||
60 | enum { | ||
61 | V9FS_OREAD = 0x00, | ||
62 | V9FS_OWRITE = 0x01, | ||
63 | V9FS_ORDWR = 0x02, | ||
64 | V9FS_OEXEC = 0x03, | ||
65 | V9FS_OEXCL = 0x04, | ||
66 | V9FS_OTRUNC = 0x10, | ||
67 | V9FS_OREXEC = 0x20, | ||
68 | V9FS_ORCLOSE = 0x40, | ||
69 | V9FS_OAPPEND = 0x80, | ||
70 | }; | ||
71 | |||
72 | /* permissions */ | ||
73 | enum { | ||
74 | V9FS_DMDIR = 0x80000000, | ||
75 | V9FS_DMAPPEND = 0x40000000, | ||
76 | V9FS_DMEXCL = 0x20000000, | ||
77 | V9FS_DMMOUNT = 0x10000000, | ||
78 | V9FS_DMAUTH = 0x08000000, | ||
79 | V9FS_DMTMP = 0x04000000, | ||
80 | V9FS_DMSYMLINK = 0x02000000, | ||
81 | V9FS_DMLINK = 0x01000000, | ||
82 | /* 9P2000.u extensions */ | ||
83 | V9FS_DMDEVICE = 0x00800000, | ||
84 | V9FS_DMNAMEDPIPE = 0x00200000, | ||
85 | V9FS_DMSOCKET = 0x00100000, | ||
86 | V9FS_DMSETUID = 0x00080000, | ||
87 | V9FS_DMSETGID = 0x00040000, | ||
88 | }; | ||
89 | |||
90 | /* qid.types */ | ||
91 | enum { | ||
92 | V9FS_QTDIR = 0x80, | ||
93 | V9FS_QTAPPEND = 0x40, | ||
94 | V9FS_QTEXCL = 0x20, | ||
95 | V9FS_QTMOUNT = 0x10, | ||
96 | V9FS_QTAUTH = 0x08, | ||
97 | V9FS_QTTMP = 0x04, | ||
98 | V9FS_QTSYMLINK = 0x02, | ||
99 | V9FS_QTLINK = 0x01, | ||
100 | V9FS_QTFILE = 0x00, | ||
101 | }; | ||
102 | |||
103 | /* ample room for Twrite/Rread header (iounit) */ | ||
104 | #define V9FS_IOHDRSZ 24 | ||
105 | |||
106 | /* qids are the unique ID for a file (like an inode */ | ||
107 | struct v9fs_qid { | ||
108 | u8 type; | ||
109 | u32 version; | ||
110 | u64 path; | ||
111 | }; | ||
112 | |||
113 | /* Plan 9 file metadata (stat) structure */ | ||
114 | struct v9fs_stat { | ||
115 | u16 size; | ||
116 | u16 type; | ||
117 | u32 dev; | ||
118 | struct v9fs_qid qid; | ||
119 | u32 mode; | ||
120 | u32 atime; | ||
121 | u32 mtime; | ||
122 | u64 length; | ||
123 | char *name; | ||
124 | char *uid; | ||
125 | char *gid; | ||
126 | char *muid; | ||
127 | char *extension; /* 9p2000.u extensions */ | ||
128 | u32 n_uid; /* 9p2000.u extensions */ | ||
129 | u32 n_gid; /* 9p2000.u extensions */ | ||
130 | u32 n_muid; /* 9p2000.u extensions */ | ||
131 | char data[0]; | ||
132 | }; | ||
133 | |||
134 | /* Structures for Protocol Operations */ | ||
135 | |||
136 | struct Tversion { | ||
137 | u32 msize; | ||
138 | char *version; | ||
139 | }; | ||
140 | |||
141 | struct Rversion { | ||
142 | u32 msize; | ||
143 | char *version; | ||
144 | }; | ||
145 | |||
146 | struct Tauth { | ||
147 | u32 afid; | ||
148 | char *uname; | ||
149 | char *aname; | ||
150 | }; | ||
151 | |||
152 | struct Rauth { | ||
153 | struct v9fs_qid qid; | ||
154 | }; | ||
155 | |||
156 | struct Rerror { | ||
157 | char *error; | ||
158 | u32 errno; /* 9p2000.u extension */ | ||
159 | }; | ||
160 | |||
161 | struct Tflush { | ||
162 | u32 oldtag; | ||
163 | }; | ||
164 | |||
165 | struct Rflush { | ||
166 | }; | ||
167 | |||
168 | struct Tattach { | ||
169 | u32 fid; | ||
170 | u32 afid; | ||
171 | char *uname; | ||
172 | char *aname; | ||
173 | }; | ||
174 | |||
175 | struct Rattach { | ||
176 | struct v9fs_qid qid; | ||
177 | }; | ||
178 | |||
179 | struct Twalk { | ||
180 | u32 fid; | ||
181 | u32 newfid; | ||
182 | u32 nwname; | ||
183 | char **wnames; | ||
184 | }; | ||
185 | |||
186 | struct Rwalk { | ||
187 | u32 nwqid; | ||
188 | struct v9fs_qid *wqids; | ||
189 | }; | ||
190 | |||
191 | struct Topen { | ||
192 | u32 fid; | ||
193 | u8 mode; | ||
194 | }; | ||
195 | |||
196 | struct Ropen { | ||
197 | struct v9fs_qid qid; | ||
198 | u32 iounit; | ||
199 | }; | ||
200 | |||
201 | struct Tcreate { | ||
202 | u32 fid; | ||
203 | char *name; | ||
204 | u32 perm; | ||
205 | u8 mode; | ||
206 | }; | ||
207 | |||
208 | struct Rcreate { | ||
209 | struct v9fs_qid qid; | ||
210 | u32 iounit; | ||
211 | }; | ||
212 | |||
213 | struct Tread { | ||
214 | u32 fid; | ||
215 | u64 offset; | ||
216 | u32 count; | ||
217 | }; | ||
218 | |||
219 | struct Rread { | ||
220 | u32 count; | ||
221 | u8 *data; | ||
222 | }; | ||
223 | |||
224 | struct Twrite { | ||
225 | u32 fid; | ||
226 | u64 offset; | ||
227 | u32 count; | ||
228 | u8 *data; | ||
229 | }; | ||
230 | |||
231 | struct Rwrite { | ||
232 | u32 count; | ||
233 | }; | ||
234 | |||
235 | struct Tclunk { | ||
236 | u32 fid; | ||
237 | }; | ||
238 | |||
239 | struct Rclunk { | ||
240 | }; | ||
241 | |||
242 | struct Tremove { | ||
243 | u32 fid; | ||
244 | }; | ||
245 | |||
246 | struct Rremove { | ||
247 | }; | ||
248 | |||
249 | struct Tstat { | ||
250 | u32 fid; | ||
251 | }; | ||
252 | |||
253 | struct Rstat { | ||
254 | struct v9fs_stat *stat; | ||
255 | }; | ||
256 | |||
257 | struct Twstat { | ||
258 | u32 fid; | ||
259 | struct v9fs_stat *stat; | ||
260 | }; | ||
261 | |||
262 | struct Rwstat { | ||
263 | }; | ||
264 | |||
265 | /* | ||
266 | * fcall is the primary packet structure | ||
267 | * | ||
268 | */ | ||
269 | |||
270 | struct v9fs_fcall { | ||
271 | u32 size; | ||
272 | u8 id; | ||
273 | u16 tag; | ||
274 | |||
275 | union { | ||
276 | struct Tversion tversion; | ||
277 | struct Rversion rversion; | ||
278 | struct Tauth tauth; | ||
279 | struct Rauth rauth; | ||
280 | struct Rerror rerror; | ||
281 | struct Tflush tflush; | ||
282 | struct Rflush rflush; | ||
283 | struct Tattach tattach; | ||
284 | struct Rattach rattach; | ||
285 | struct Twalk twalk; | ||
286 | struct Rwalk rwalk; | ||
287 | struct Topen topen; | ||
288 | struct Ropen ropen; | ||
289 | struct Tcreate tcreate; | ||
290 | struct Rcreate rcreate; | ||
291 | struct Tread tread; | ||
292 | struct Rread rread; | ||
293 | struct Twrite twrite; | ||
294 | struct Rwrite rwrite; | ||
295 | struct Tclunk tclunk; | ||
296 | struct Rclunk rclunk; | ||
297 | struct Tremove tremove; | ||
298 | struct Rremove rremove; | ||
299 | struct Tstat tstat; | ||
300 | struct Rstat rstat; | ||
301 | struct Twstat twstat; | ||
302 | struct Rwstat rwstat; | ||
303 | } params; | ||
304 | }; | ||
305 | |||
306 | #define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "") | ||
307 | |||
308 | int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, | ||
309 | char *version, struct v9fs_fcall **rcall); | ||
310 | |||
311 | int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, | ||
312 | u32 fid, u32 afid, struct v9fs_fcall **rcall); | ||
313 | |||
314 | int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, | ||
315 | struct v9fs_fcall **rcall); | ||
316 | |||
317 | int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag); | ||
318 | |||
319 | int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, | ||
320 | struct v9fs_fcall **rcall); | ||
321 | |||
322 | int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, | ||
323 | struct v9fs_stat *stat, struct v9fs_fcall **rcall); | ||
324 | |||
325 | int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, | ||
326 | char *name, struct v9fs_fcall **rcall); | ||
327 | |||
328 | int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, | ||
329 | struct v9fs_fcall **rcall); | ||
330 | |||
331 | int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, | ||
332 | struct v9fs_fcall **rcall); | ||
333 | |||
334 | int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, | ||
335 | u32 perm, u8 mode, struct v9fs_fcall **rcall); | ||
336 | |||
337 | int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, | ||
338 | u64 offset, u32 count, struct v9fs_fcall **rcall); | ||
339 | |||
340 | int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, | ||
341 | u32 count, void *data, struct v9fs_fcall **rcall); | ||
diff --git a/fs/9p/Makefile b/fs/9p/Makefile new file mode 100644 index 000000000000..e4e4ffe5a7dc --- /dev/null +++ b/fs/9p/Makefile | |||
@@ -0,0 +1,17 @@ | |||
1 | obj-$(CONFIG_9P_FS) := 9p2000.o | ||
2 | |||
3 | 9p2000-objs := \ | ||
4 | vfs_super.o \ | ||
5 | vfs_inode.o \ | ||
6 | vfs_file.o \ | ||
7 | vfs_dir.o \ | ||
8 | vfs_dentry.o \ | ||
9 | error.o \ | ||
10 | mux.o \ | ||
11 | trans_fd.o \ | ||
12 | trans_sock.o \ | ||
13 | 9p.o \ | ||
14 | conv.o \ | ||
15 | v9fs.o \ | ||
16 | fid.o | ||
17 | |||
diff --git a/fs/9p/conv.c b/fs/9p/conv.c new file mode 100644 index 000000000000..1554731bd653 --- /dev/null +++ b/fs/9p/conv.c | |||
@@ -0,0 +1,693 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/conv.c | ||
3 | * | ||
4 | * 9P protocol conversion functions | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to: | ||
21 | * Free Software Foundation | ||
22 | * 51 Franklin Street, Fifth Floor | ||
23 | * Boston, MA 02111-1301 USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/config.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/errno.h> | ||
30 | #include <linux/fs.h> | ||
31 | #include <linux/idr.h> | ||
32 | |||
33 | #include "debug.h" | ||
34 | #include "v9fs.h" | ||
35 | #include "9p.h" | ||
36 | #include "conv.h" | ||
37 | |||
38 | /* | ||
39 | * Buffer to help with string parsing | ||
40 | */ | ||
41 | struct cbuf { | ||
42 | unsigned char *sp; | ||
43 | unsigned char *p; | ||
44 | unsigned char *ep; | ||
45 | }; | ||
46 | |||
47 | static inline void buf_init(struct cbuf *buf, void *data, int datalen) | ||
48 | { | ||
49 | buf->sp = buf->p = data; | ||
50 | buf->ep = data + datalen; | ||
51 | } | ||
52 | |||
53 | static inline int buf_check_overflow(struct cbuf *buf) | ||
54 | { | ||
55 | return buf->p > buf->ep; | ||
56 | } | ||
57 | |||
58 | static inline void buf_check_size(struct cbuf *buf, int len) | ||
59 | { | ||
60 | if (buf->p+len > buf->ep) { | ||
61 | if (buf->p < buf->ep) { | ||
62 | eprintk(KERN_ERR, "buffer overflow\n"); | ||
63 | buf->p = buf->ep + 1; | ||
64 | } | ||
65 | } | ||
66 | } | ||
67 | |||
68 | static inline void *buf_alloc(struct cbuf *buf, int len) | ||
69 | { | ||
70 | void *ret = NULL; | ||
71 | |||
72 | buf_check_size(buf, len); | ||
73 | ret = buf->p; | ||
74 | buf->p += len; | ||
75 | |||
76 | return ret; | ||
77 | } | ||
78 | |||
79 | static inline void buf_put_int8(struct cbuf *buf, u8 val) | ||
80 | { | ||
81 | buf_check_size(buf, 1); | ||
82 | |||
83 | buf->p[0] = val; | ||
84 | buf->p++; | ||
85 | } | ||
86 | |||
87 | static inline void buf_put_int16(struct cbuf *buf, u16 val) | ||
88 | { | ||
89 | buf_check_size(buf, 2); | ||
90 | |||
91 | *(__le16 *) buf->p = cpu_to_le16(val); | ||
92 | buf->p += 2; | ||
93 | } | ||
94 | |||
95 | static inline void buf_put_int32(struct cbuf *buf, u32 val) | ||
96 | { | ||
97 | buf_check_size(buf, 4); | ||
98 | |||
99 | *(__le32 *)buf->p = cpu_to_le32(val); | ||
100 | buf->p += 4; | ||
101 | } | ||
102 | |||
103 | static inline void buf_put_int64(struct cbuf *buf, u64 val) | ||
104 | { | ||
105 | buf_check_size(buf, 8); | ||
106 | |||
107 | *(__le64 *)buf->p = cpu_to_le64(val); | ||
108 | buf->p += 8; | ||
109 | } | ||
110 | |||
111 | static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) | ||
112 | { | ||
113 | buf_check_size(buf, slen + 2); | ||
114 | |||
115 | buf_put_int16(buf, slen); | ||
116 | memcpy(buf->p, s, slen); | ||
117 | buf->p += slen; | ||
118 | } | ||
119 | |||
120 | static inline void buf_put_string(struct cbuf *buf, const char *s) | ||
121 | { | ||
122 | buf_put_stringn(buf, s, strlen(s)); | ||
123 | } | ||
124 | |||
125 | static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen) | ||
126 | { | ||
127 | buf_check_size(buf, datalen); | ||
128 | |||
129 | memcpy(buf->p, data, datalen); | ||
130 | buf->p += datalen; | ||
131 | } | ||
132 | |||
133 | static inline u8 buf_get_int8(struct cbuf *buf) | ||
134 | { | ||
135 | u8 ret = 0; | ||
136 | |||
137 | buf_check_size(buf, 1); | ||
138 | ret = buf->p[0]; | ||
139 | |||
140 | buf->p++; | ||
141 | |||
142 | return ret; | ||
143 | } | ||
144 | |||
145 | static inline u16 buf_get_int16(struct cbuf *buf) | ||
146 | { | ||
147 | u16 ret = 0; | ||
148 | |||
149 | buf_check_size(buf, 2); | ||
150 | ret = le16_to_cpu(*(__le16 *)buf->p); | ||
151 | |||
152 | buf->p += 2; | ||
153 | |||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | static inline u32 buf_get_int32(struct cbuf *buf) | ||
158 | { | ||
159 | u32 ret = 0; | ||
160 | |||
161 | buf_check_size(buf, 4); | ||
162 | ret = le32_to_cpu(*(__le32 *)buf->p); | ||
163 | |||
164 | buf->p += 4; | ||
165 | |||
166 | return ret; | ||
167 | } | ||
168 | |||
169 | static inline u64 buf_get_int64(struct cbuf *buf) | ||
170 | { | ||
171 | u64 ret = 0; | ||
172 | |||
173 | buf_check_size(buf, 8); | ||
174 | ret = le64_to_cpu(*(__le64 *)buf->p); | ||
175 | |||
176 | buf->p += 8; | ||
177 | |||
178 | return ret; | ||
179 | } | ||
180 | |||
181 | static inline int | ||
182 | buf_get_string(struct cbuf *buf, char *data, unsigned int datalen) | ||
183 | { | ||
184 | |||
185 | u16 len = buf_get_int16(buf); | ||
186 | buf_check_size(buf, len); | ||
187 | if (len + 1 > datalen) | ||
188 | return 0; | ||
189 | |||
190 | memcpy(data, buf->p, len); | ||
191 | data[len] = 0; | ||
192 | buf->p += len; | ||
193 | |||
194 | return len + 1; | ||
195 | } | ||
196 | |||
197 | static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf) | ||
198 | { | ||
199 | char *ret = NULL; | ||
200 | int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p); | ||
201 | |||
202 | if (n > 0) { | ||
203 | ret = sbuf->p; | ||
204 | sbuf->p += n; | ||
205 | } | ||
206 | |||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | static inline int buf_get_data(struct cbuf *buf, void *data, int datalen) | ||
211 | { | ||
212 | buf_check_size(buf, datalen); | ||
213 | |||
214 | memcpy(data, buf->p, datalen); | ||
215 | buf->p += datalen; | ||
216 | |||
217 | return datalen; | ||
218 | } | ||
219 | |||
220 | static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf, | ||
221 | int datalen) | ||
222 | { | ||
223 | char *ret = NULL; | ||
224 | int n = 0; | ||
225 | |||
226 | buf_check_size(dbuf, datalen); | ||
227 | |||
228 | n = buf_get_data(buf, dbuf->p, datalen); | ||
229 | |||
230 | if (n > 0) { | ||
231 | ret = dbuf->p; | ||
232 | dbuf->p += n; | ||
233 | } | ||
234 | |||
235 | return ret; | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * v9fs_size_stat - calculate the size of a variable length stat struct | ||
240 | * @v9ses: session information | ||
241 | * @stat: metadata (stat) structure | ||
242 | * | ||
243 | */ | ||
244 | |||
245 | static int v9fs_size_stat(struct v9fs_session_info *v9ses, | ||
246 | struct v9fs_stat *stat) | ||
247 | { | ||
248 | int size = 0; | ||
249 | |||
250 | if (stat == NULL) { | ||
251 | eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n"); | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | size = /* 2 + *//* size[2] */ | ||
256 | 2 + /* type[2] */ | ||
257 | 4 + /* dev[4] */ | ||
258 | 1 + /* qid.type[1] */ | ||
259 | 4 + /* qid.vers[4] */ | ||
260 | 8 + /* qid.path[8] */ | ||
261 | 4 + /* mode[4] */ | ||
262 | 4 + /* atime[4] */ | ||
263 | 4 + /* mtime[4] */ | ||
264 | 8 + /* length[8] */ | ||
265 | 8; /* minimum sum of string lengths */ | ||
266 | |||
267 | if (stat->name) | ||
268 | size += strlen(stat->name); | ||
269 | if (stat->uid) | ||
270 | size += strlen(stat->uid); | ||
271 | if (stat->gid) | ||
272 | size += strlen(stat->gid); | ||
273 | if (stat->muid) | ||
274 | size += strlen(stat->muid); | ||
275 | |||
276 | if (v9ses->extended) { | ||
277 | size += 4 + /* n_uid[4] */ | ||
278 | 4 + /* n_gid[4] */ | ||
279 | 4 + /* n_muid[4] */ | ||
280 | 2; /* string length of extension[4] */ | ||
281 | if (stat->extension) | ||
282 | size += strlen(stat->extension); | ||
283 | } | ||
284 | |||
285 | return size; | ||
286 | } | ||
287 | |||
288 | /** | ||
289 | * serialize_stat - safely format a stat structure for transmission | ||
290 | * @v9ses: session info | ||
291 | * @stat: metadata (stat) structure | ||
292 | * @bufp: buffer to serialize structure into | ||
293 | * | ||
294 | */ | ||
295 | |||
296 | static int | ||
297 | serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat, | ||
298 | struct cbuf *bufp) | ||
299 | { | ||
300 | buf_put_int16(bufp, stat->size); | ||
301 | buf_put_int16(bufp, stat->type); | ||
302 | buf_put_int32(bufp, stat->dev); | ||
303 | buf_put_int8(bufp, stat->qid.type); | ||
304 | buf_put_int32(bufp, stat->qid.version); | ||
305 | buf_put_int64(bufp, stat->qid.path); | ||
306 | buf_put_int32(bufp, stat->mode); | ||
307 | buf_put_int32(bufp, stat->atime); | ||
308 | buf_put_int32(bufp, stat->mtime); | ||
309 | buf_put_int64(bufp, stat->length); | ||
310 | |||
311 | buf_put_string(bufp, stat->name); | ||
312 | buf_put_string(bufp, stat->uid); | ||
313 | buf_put_string(bufp, stat->gid); | ||
314 | buf_put_string(bufp, stat->muid); | ||
315 | |||
316 | if (v9ses->extended) { | ||
317 | buf_put_string(bufp, stat->extension); | ||
318 | buf_put_int32(bufp, stat->n_uid); | ||
319 | buf_put_int32(bufp, stat->n_gid); | ||
320 | buf_put_int32(bufp, stat->n_muid); | ||
321 | } | ||
322 | |||
323 | if (buf_check_overflow(bufp)) | ||
324 | return 0; | ||
325 | |||
326 | return stat->size; | ||
327 | } | ||
328 | |||
329 | /** | ||
330 | * deserialize_stat - safely decode a recieved metadata (stat) structure | ||
331 | * @v9ses: session info | ||
332 | * @bufp: buffer to deserialize | ||
333 | * @stat: metadata (stat) structure | ||
334 | * @dbufp: buffer to deserialize variable strings into | ||
335 | * | ||
336 | */ | ||
337 | |||
338 | static inline int | ||
339 | deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp, | ||
340 | struct v9fs_stat *stat, struct cbuf *dbufp) | ||
341 | { | ||
342 | |||
343 | stat->size = buf_get_int16(bufp); | ||
344 | stat->type = buf_get_int16(bufp); | ||
345 | stat->dev = buf_get_int32(bufp); | ||
346 | stat->qid.type = buf_get_int8(bufp); | ||
347 | stat->qid.version = buf_get_int32(bufp); | ||
348 | stat->qid.path = buf_get_int64(bufp); | ||
349 | stat->mode = buf_get_int32(bufp); | ||
350 | stat->atime = buf_get_int32(bufp); | ||
351 | stat->mtime = buf_get_int32(bufp); | ||
352 | stat->length = buf_get_int64(bufp); | ||
353 | stat->name = buf_get_stringb(bufp, dbufp); | ||
354 | stat->uid = buf_get_stringb(bufp, dbufp); | ||
355 | stat->gid = buf_get_stringb(bufp, dbufp); | ||
356 | stat->muid = buf_get_stringb(bufp, dbufp); | ||
357 | |||
358 | if (v9ses->extended) { | ||
359 | stat->extension = buf_get_stringb(bufp, dbufp); | ||
360 | stat->n_uid = buf_get_int32(bufp); | ||
361 | stat->n_gid = buf_get_int32(bufp); | ||
362 | stat->n_muid = buf_get_int32(bufp); | ||
363 | } | ||
364 | |||
365 | if (buf_check_overflow(bufp) || buf_check_overflow(dbufp)) | ||
366 | return 0; | ||
367 | |||
368 | return stat->size + 2; | ||
369 | } | ||
370 | |||
371 | /** | ||
372 | * deserialize_statb - wrapper for decoding a received metadata structure | ||
373 | * @v9ses: session info | ||
374 | * @bufp: buffer to deserialize | ||
375 | * @dbufp: buffer to deserialize variable strings into | ||
376 | * | ||
377 | */ | ||
378 | |||
379 | static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info | ||
380 | *v9ses, struct cbuf *bufp, | ||
381 | struct cbuf *dbufp) | ||
382 | { | ||
383 | struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat)); | ||
384 | |||
385 | if (ret) { | ||
386 | int n = deserialize_stat(v9ses, bufp, ret, dbufp); | ||
387 | if (n <= 0) | ||
388 | return NULL; | ||
389 | } | ||
390 | |||
391 | return ret; | ||
392 | } | ||
393 | |||
394 | /** | ||
395 | * v9fs_deserialize_stat - decode a received metadata structure | ||
396 | * @v9ses: session info | ||
397 | * @buf: buffer to deserialize | ||
398 | * @buflen: length of received buffer | ||
399 | * @stat: metadata structure to decode into | ||
400 | * @statlen: length of destination metadata structure | ||
401 | * | ||
402 | */ | ||
403 | |||
404 | int | ||
405 | v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf, | ||
406 | u32 buflen, struct v9fs_stat *stat, u32 statlen) | ||
407 | { | ||
408 | struct cbuf buffer; | ||
409 | struct cbuf *bufp = &buffer; | ||
410 | struct cbuf dbuffer; | ||
411 | struct cbuf *dbufp = &dbuffer; | ||
412 | |||
413 | buf_init(bufp, buf, buflen); | ||
414 | buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat), | ||
415 | statlen - sizeof(struct v9fs_stat)); | ||
416 | |||
417 | return deserialize_stat(v9ses, bufp, stat, dbufp); | ||
418 | } | ||
419 | |||
420 | static inline int | ||
421 | v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall) | ||
422 | { | ||
423 | int size = 4 + 1 + 2; /* size[4] msg[1] tag[2] */ | ||
424 | int i = 0; | ||
425 | |||
426 | switch (fcall->id) { | ||
427 | default: | ||
428 | eprintk(KERN_ERR, "bad msg type %d\n", fcall->id); | ||
429 | return 0; | ||
430 | case TVERSION: /* msize[4] version[s] */ | ||
431 | size += 4 + 2 + strlen(fcall->params.tversion.version); | ||
432 | break; | ||
433 | case TAUTH: /* afid[4] uname[s] aname[s] */ | ||
434 | size += 4 + 2 + strlen(fcall->params.tauth.uname) + | ||
435 | 2 + strlen(fcall->params.tauth.aname); | ||
436 | break; | ||
437 | case TFLUSH: /* oldtag[2] */ | ||
438 | size += 2; | ||
439 | break; | ||
440 | case TATTACH: /* fid[4] afid[4] uname[s] aname[s] */ | ||
441 | size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) + | ||
442 | 2 + strlen(fcall->params.tattach.aname); | ||
443 | break; | ||
444 | case TWALK: /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */ | ||
445 | size += 4 + 4 + 2; | ||
446 | /* now compute total for the array of names */ | ||
447 | for (i = 0; i < fcall->params.twalk.nwname; i++) | ||
448 | size += 2 + strlen(fcall->params.twalk.wnames[i]); | ||
449 | break; | ||
450 | case TOPEN: /* fid[4] mode[1] */ | ||
451 | size += 4 + 1; | ||
452 | break; | ||
453 | case TCREATE: /* fid[4] name[s] perm[4] mode[1] */ | ||
454 | size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1; | ||
455 | break; | ||
456 | case TREAD: /* fid[4] offset[8] count[4] */ | ||
457 | size += 4 + 8 + 4; | ||
458 | break; | ||
459 | case TWRITE: /* fid[4] offset[8] count[4] data[count] */ | ||
460 | size += 4 + 8 + 4 + fcall->params.twrite.count; | ||
461 | break; | ||
462 | case TCLUNK: /* fid[4] */ | ||
463 | size += 4; | ||
464 | break; | ||
465 | case TREMOVE: /* fid[4] */ | ||
466 | size += 4; | ||
467 | break; | ||
468 | case TSTAT: /* fid[4] */ | ||
469 | size += 4; | ||
470 | break; | ||
471 | case TWSTAT: /* fid[4] stat[n] */ | ||
472 | fcall->params.twstat.stat->size = | ||
473 | v9fs_size_stat(v9ses, fcall->params.twstat.stat); | ||
474 | size += 4 + 2 + 2 + fcall->params.twstat.stat->size; | ||
475 | } | ||
476 | return size; | ||
477 | } | ||
478 | |||
479 | /* | ||
480 | * v9fs_serialize_fcall - marshall fcall struct into a packet | ||
481 | * @v9ses: session information | ||
482 | * @fcall: structure to convert | ||
483 | * @data: buffer to serialize fcall into | ||
484 | * @datalen: length of buffer to serialize fcall into | ||
485 | * | ||
486 | */ | ||
487 | |||
488 | int | ||
489 | v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall, | ||
490 | void *data, u32 datalen) | ||
491 | { | ||
492 | int i = 0; | ||
493 | struct v9fs_stat *stat = NULL; | ||
494 | struct cbuf buffer; | ||
495 | struct cbuf *bufp = &buffer; | ||
496 | |||
497 | buf_init(bufp, data, datalen); | ||
498 | |||
499 | if (!fcall) { | ||
500 | eprintk(KERN_ERR, "no fcall\n"); | ||
501 | return -EINVAL; | ||
502 | } | ||
503 | |||
504 | fcall->size = v9fs_size_fcall(v9ses, fcall); | ||
505 | |||
506 | buf_put_int32(bufp, fcall->size); | ||
507 | buf_put_int8(bufp, fcall->id); | ||
508 | buf_put_int16(bufp, fcall->tag); | ||
509 | |||
510 | dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id, | ||
511 | fcall->tag); | ||
512 | |||
513 | /* now encode it */ | ||
514 | switch (fcall->id) { | ||
515 | default: | ||
516 | eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id); | ||
517 | return -EPROTO; | ||
518 | case TVERSION: | ||
519 | buf_put_int32(bufp, fcall->params.tversion.msize); | ||
520 | buf_put_string(bufp, fcall->params.tversion.version); | ||
521 | break; | ||
522 | case TAUTH: | ||
523 | buf_put_int32(bufp, fcall->params.tauth.afid); | ||
524 | buf_put_string(bufp, fcall->params.tauth.uname); | ||
525 | buf_put_string(bufp, fcall->params.tauth.aname); | ||
526 | break; | ||
527 | case TFLUSH: | ||
528 | buf_put_int16(bufp, fcall->params.tflush.oldtag); | ||
529 | break; | ||
530 | case TATTACH: | ||
531 | buf_put_int32(bufp, fcall->params.tattach.fid); | ||
532 | buf_put_int32(bufp, fcall->params.tattach.afid); | ||
533 | buf_put_string(bufp, fcall->params.tattach.uname); | ||
534 | buf_put_string(bufp, fcall->params.tattach.aname); | ||
535 | break; | ||
536 | case TWALK: | ||
537 | buf_put_int32(bufp, fcall->params.twalk.fid); | ||
538 | buf_put_int32(bufp, fcall->params.twalk.newfid); | ||
539 | buf_put_int16(bufp, fcall->params.twalk.nwname); | ||
540 | for (i = 0; i < fcall->params.twalk.nwname; i++) | ||
541 | buf_put_string(bufp, fcall->params.twalk.wnames[i]); | ||
542 | break; | ||
543 | case TOPEN: | ||
544 | buf_put_int32(bufp, fcall->params.topen.fid); | ||
545 | buf_put_int8(bufp, fcall->params.topen.mode); | ||
546 | break; | ||
547 | case TCREATE: | ||
548 | buf_put_int32(bufp, fcall->params.tcreate.fid); | ||
549 | buf_put_string(bufp, fcall->params.tcreate.name); | ||
550 | buf_put_int32(bufp, fcall->params.tcreate.perm); | ||
551 | buf_put_int8(bufp, fcall->params.tcreate.mode); | ||
552 | break; | ||
553 | case TREAD: | ||
554 | buf_put_int32(bufp, fcall->params.tread.fid); | ||
555 | buf_put_int64(bufp, fcall->params.tread.offset); | ||
556 | buf_put_int32(bufp, fcall->params.tread.count); | ||
557 | break; | ||
558 | case TWRITE: | ||
559 | buf_put_int32(bufp, fcall->params.twrite.fid); | ||
560 | buf_put_int64(bufp, fcall->params.twrite.offset); | ||
561 | buf_put_int32(bufp, fcall->params.twrite.count); | ||
562 | buf_put_data(bufp, fcall->params.twrite.data, | ||
563 | fcall->params.twrite.count); | ||
564 | break; | ||
565 | case TCLUNK: | ||
566 | buf_put_int32(bufp, fcall->params.tclunk.fid); | ||
567 | break; | ||
568 | case TREMOVE: | ||
569 | buf_put_int32(bufp, fcall->params.tremove.fid); | ||
570 | break; | ||
571 | case TSTAT: | ||
572 | buf_put_int32(bufp, fcall->params.tstat.fid); | ||
573 | break; | ||
574 | case TWSTAT: | ||
575 | buf_put_int32(bufp, fcall->params.twstat.fid); | ||
576 | stat = fcall->params.twstat.stat; | ||
577 | |||
578 | buf_put_int16(bufp, stat->size + 2); | ||
579 | serialize_stat(v9ses, stat, bufp); | ||
580 | break; | ||
581 | } | ||
582 | |||
583 | if (buf_check_overflow(bufp)) | ||
584 | return -EIO; | ||
585 | |||
586 | return fcall->size; | ||
587 | } | ||
588 | |||
589 | /** | ||
590 | * deserialize_fcall - unmarshal a response | ||
591 | * @v9ses: session information | ||
592 | * @msgsize: size of rcall message | ||
593 | * @buf: recieved buffer | ||
594 | * @buflen: length of received buffer | ||
595 | * @rcall: fcall structure to populate | ||
596 | * @rcalllen: length of fcall structure to populate | ||
597 | * | ||
598 | */ | ||
599 | |||
600 | int | ||
601 | v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, | ||
602 | void *buf, u32 buflen, struct v9fs_fcall *rcall, | ||
603 | int rcalllen) | ||
604 | { | ||
605 | |||
606 | struct cbuf buffer; | ||
607 | struct cbuf *bufp = &buffer; | ||
608 | struct cbuf dbuffer; | ||
609 | struct cbuf *dbufp = &dbuffer; | ||
610 | int i = 0; | ||
611 | |||
612 | buf_init(bufp, buf, buflen); | ||
613 | buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall), | ||
614 | rcalllen - sizeof(struct v9fs_fcall)); | ||
615 | |||
616 | rcall->size = msgsize; | ||
617 | rcall->id = buf_get_int8(bufp); | ||
618 | rcall->tag = buf_get_int16(bufp); | ||
619 | |||
620 | dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id, | ||
621 | rcall->tag); | ||
622 | switch (rcall->id) { | ||
623 | default: | ||
624 | eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id); | ||
625 | return -EPROTO; | ||
626 | case RVERSION: | ||
627 | rcall->params.rversion.msize = buf_get_int32(bufp); | ||
628 | rcall->params.rversion.version = buf_get_stringb(bufp, dbufp); | ||
629 | break; | ||
630 | case RFLUSH: | ||
631 | break; | ||
632 | case RATTACH: | ||
633 | rcall->params.rattach.qid.type = buf_get_int8(bufp); | ||
634 | rcall->params.rattach.qid.version = buf_get_int32(bufp); | ||
635 | rcall->params.rattach.qid.path = buf_get_int64(bufp); | ||
636 | break; | ||
637 | case RWALK: | ||
638 | rcall->params.rwalk.nwqid = buf_get_int16(bufp); | ||
639 | rcall->params.rwalk.wqids = buf_alloc(bufp, | ||
640 | rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid)); | ||
641 | if (rcall->params.rwalk.wqids) | ||
642 | for (i = 0; i < rcall->params.rwalk.nwqid; i++) { | ||
643 | rcall->params.rwalk.wqids[i].type = | ||
644 | buf_get_int8(bufp); | ||
645 | rcall->params.rwalk.wqids[i].version = | ||
646 | buf_get_int16(bufp); | ||
647 | rcall->params.rwalk.wqids[i].path = | ||
648 | buf_get_int64(bufp); | ||
649 | } | ||
650 | break; | ||
651 | case ROPEN: | ||
652 | rcall->params.ropen.qid.type = buf_get_int8(bufp); | ||
653 | rcall->params.ropen.qid.version = buf_get_int32(bufp); | ||
654 | rcall->params.ropen.qid.path = buf_get_int64(bufp); | ||
655 | rcall->params.ropen.iounit = buf_get_int32(bufp); | ||
656 | break; | ||
657 | case RCREATE: | ||
658 | rcall->params.rcreate.qid.type = buf_get_int8(bufp); | ||
659 | rcall->params.rcreate.qid.version = buf_get_int32(bufp); | ||
660 | rcall->params.rcreate.qid.path = buf_get_int64(bufp); | ||
661 | rcall->params.rcreate.iounit = buf_get_int32(bufp); | ||
662 | break; | ||
663 | case RREAD: | ||
664 | rcall->params.rread.count = buf_get_int32(bufp); | ||
665 | rcall->params.rread.data = buf_get_datab(bufp, dbufp, | ||
666 | rcall->params.rread.count); | ||
667 | break; | ||
668 | case RWRITE: | ||
669 | rcall->params.rwrite.count = buf_get_int32(bufp); | ||
670 | break; | ||
671 | case RCLUNK: | ||
672 | break; | ||
673 | case RREMOVE: | ||
674 | break; | ||
675 | case RSTAT: | ||
676 | buf_get_int16(bufp); | ||
677 | rcall->params.rstat.stat = | ||
678 | deserialize_statb(v9ses, bufp, dbufp); | ||
679 | break; | ||
680 | case RWSTAT: | ||
681 | break; | ||
682 | case RERROR: | ||
683 | rcall->params.rerror.error = buf_get_stringb(bufp, dbufp); | ||
684 | if (v9ses->extended) | ||
685 | rcall->params.rerror.errno = buf_get_int16(bufp); | ||
686 | break; | ||
687 | } | ||
688 | |||
689 | if (buf_check_overflow(bufp) || buf_check_overflow(dbufp)) | ||
690 | return -EIO; | ||
691 | |||
692 | return rcall->size; | ||
693 | } | ||
diff --git a/fs/9p/conv.h b/fs/9p/conv.h new file mode 100644 index 000000000000..ee849613c61a --- /dev/null +++ b/fs/9p/conv.h | |||
@@ -0,0 +1,36 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/conv.h | ||
3 | * | ||
4 | * 9P protocol conversion definitions | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to: | ||
21 | * Free Software Foundation | ||
22 | * 51 Franklin Street, Fifth Floor | ||
23 | * Boston, MA 02111-1301 USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf, | ||
28 | u32 buflen, struct v9fs_stat *stat, u32 statlen); | ||
29 | int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall, | ||
30 | void *buf, u32 buflen); | ||
31 | int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen, | ||
32 | void *buf, u32 buflen, struct v9fs_fcall *rcall, | ||
33 | int rcalllen); | ||
34 | |||
35 | /* this one is actually in error.c right now */ | ||
36 | int v9fs_errstr2errno(char *errstr); | ||
diff --git a/fs/9p/debug.h b/fs/9p/debug.h new file mode 100644 index 000000000000..4445f06919d9 --- /dev/null +++ b/fs/9p/debug.h | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/debug.h - V9FS Debug Definitions | ||
3 | * | ||
4 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
5 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to: | ||
19 | * Free Software Foundation | ||
20 | * 51 Franklin Street, Fifth Floor | ||
21 | * Boston, MA 02111-1301 USA | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #define DEBUG_ERROR (1<<0) | ||
26 | #define DEBUG_CURRENT (1<<1) | ||
27 | #define DEBUG_9P (1<<2) | ||
28 | #define DEBUG_VFS (1<<3) | ||
29 | #define DEBUG_CONV (1<<4) | ||
30 | #define DEBUG_MUX (1<<5) | ||
31 | #define DEBUG_TRANS (1<<6) | ||
32 | #define DEBUG_SLABS (1<<7) | ||
33 | |||
34 | #define DEBUG_DUMP_PKT 0 | ||
35 | |||
36 | extern int v9fs_debug_level; | ||
37 | |||
38 | #define dprintk(level, format, arg...) \ | ||
39 | do { \ | ||
40 | if((v9fs_debug_level & level)==level) \ | ||
41 | printk(KERN_NOTICE "-- %s (%d): " \ | ||
42 | format , __FUNCTION__, current->pid , ## arg); \ | ||
43 | } while(0) | ||
44 | |||
45 | #define eprintk(level, format, arg...) \ | ||
46 | do { \ | ||
47 | printk(level "v9fs: %s (%d): " \ | ||
48 | format , __FUNCTION__, current->pid , ## arg); \ | ||
49 | } while(0) | ||
50 | |||
51 | #if DEBUG_DUMP_PKT | ||
52 | static inline void dump_data(const unsigned char *data, unsigned int datalen) | ||
53 | { | ||
54 | int i, j; | ||
55 | int len = datalen; | ||
56 | |||
57 | printk(KERN_DEBUG "data "); | ||
58 | for (i = 0; i < len; i += 4) { | ||
59 | for (j = 0; (j < 4) && (i + j < len); j++) | ||
60 | printk(KERN_DEBUG "%02x", data[i + j]); | ||
61 | printk(KERN_DEBUG " "); | ||
62 | } | ||
63 | printk(KERN_DEBUG "\n"); | ||
64 | } | ||
65 | #else /* DEBUG_DUMP_PKT */ | ||
66 | static inline void dump_data(const unsigned char *data, unsigned int datalen) | ||
67 | { | ||
68 | |||
69 | } | ||
70 | #endif /* DEBUG_DUMP_PKT */ | ||
diff --git a/fs/9p/error.c b/fs/9p/error.c new file mode 100644 index 000000000000..fee5d19179c5 --- /dev/null +++ b/fs/9p/error.c | |||
@@ -0,0 +1,93 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/error.c | ||
3 | * | ||
4 | * Error string handling | ||
5 | * | ||
6 | * Plan 9 uses error strings, Unix uses error numbers. These functions | ||
7 | * try to help manage that and provide for dynamically adding error | ||
8 | * mappings. | ||
9 | * | ||
10 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
11 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or modify | ||
14 | * it under the terms of the GNU General Public License as published by | ||
15 | * the Free Software Foundation; either version 2 of the License, or | ||
16 | * (at your option) any later version. | ||
17 | * | ||
18 | * This program is distributed in the hope that it will be useful, | ||
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
21 | * GNU General Public License for more details. | ||
22 | * | ||
23 | * You should have received a copy of the GNU General Public License | ||
24 | * along with this program; if not, write to: | ||
25 | * Free Software Foundation | ||
26 | * 51 Franklin Street, Fifth Floor | ||
27 | * Boston, MA 02111-1301 USA | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #include <linux/config.h> | ||
32 | #include <linux/module.h> | ||
33 | |||
34 | #include <linux/list.h> | ||
35 | #include <linux/jhash.h> | ||
36 | |||
37 | #include "debug.h" | ||
38 | #include "error.h" | ||
39 | |||
40 | /** | ||
41 | * v9fs_error_init - preload | ||
42 | * @errstr: error string | ||
43 | * | ||
44 | */ | ||
45 | |||
46 | int v9fs_error_init(void) | ||
47 | { | ||
48 | struct errormap *c; | ||
49 | int bucket; | ||
50 | |||
51 | /* initialize hash table */ | ||
52 | for (bucket = 0; bucket < ERRHASHSZ; bucket++) | ||
53 | INIT_HLIST_HEAD(&hash_errmap[bucket]); | ||
54 | |||
55 | /* load initial error map into hash table */ | ||
56 | for (c = errmap; c->name != NULL; c++) { | ||
57 | bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ; | ||
58 | INIT_HLIST_NODE(&c->list); | ||
59 | hlist_add_head(&c->list, &hash_errmap[bucket]); | ||
60 | } | ||
61 | |||
62 | return 1; | ||
63 | } | ||
64 | |||
65 | /** | ||
66 | * errstr2errno - convert error string to error number | ||
67 | * @errstr: error string | ||
68 | * | ||
69 | */ | ||
70 | |||
71 | int v9fs_errstr2errno(char *errstr) | ||
72 | { | ||
73 | int errno = 0; | ||
74 | struct hlist_node *p = NULL; | ||
75 | struct errormap *c = NULL; | ||
76 | int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ; | ||
77 | |||
78 | hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { | ||
79 | if (!strcmp(c->name, errstr)) { | ||
80 | errno = c->val; | ||
81 | break; | ||
82 | } | ||
83 | } | ||
84 | |||
85 | if (errno == 0) { | ||
86 | /* TODO: if error isn't found, add it dynamically */ | ||
87 | printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__, | ||
88 | errstr); | ||
89 | errno = 1; | ||
90 | } | ||
91 | |||
92 | return -errno; | ||
93 | } | ||
diff --git a/fs/9p/error.h b/fs/9p/error.h new file mode 100644 index 000000000000..78f89acf7c9a --- /dev/null +++ b/fs/9p/error.h | |||
@@ -0,0 +1,178 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/error.h | ||
3 | * | ||
4 | * Huge Nasty Error Table | ||
5 | * | ||
6 | * Plan 9 uses error strings, Unix uses error numbers. This table tries to | ||
7 | * match UNIX strings and Plan 9 strings to unix error numbers. It is used | ||
8 | * to preload the dynamic error table which can also track user-specific error | ||
9 | * strings. | ||
10 | * | ||
11 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
12 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify | ||
15 | * it under the terms of the GNU General Public License as published by | ||
16 | * the Free Software Foundation; either version 2 of the License, or | ||
17 | * (at your option) any later version. | ||
18 | * | ||
19 | * This program is distributed in the hope that it will be useful, | ||
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
22 | * GNU General Public License for more details. | ||
23 | * | ||
24 | * You should have received a copy of the GNU General Public License | ||
25 | * along with this program; if not, write to: | ||
26 | * Free Software Foundation | ||
27 | * 51 Franklin Street, Fifth Floor | ||
28 | * Boston, MA 02111-1301 USA | ||
29 | * | ||
30 | */ | ||
31 | |||
32 | #include <linux/errno.h> | ||
33 | #include <asm/errno.h> | ||
34 | |||
35 | struct errormap { | ||
36 | char *name; | ||
37 | int val; | ||
38 | |||
39 | struct hlist_node list; | ||
40 | }; | ||
41 | |||
42 | #define ERRHASHSZ 32 | ||
43 | static struct hlist_head hash_errmap[ERRHASHSZ]; | ||
44 | |||
45 | /* FixMe - reduce to a reasonable size */ | ||
46 | static struct errormap errmap[] = { | ||
47 | {"Operation not permitted", EPERM}, | ||
48 | {"wstat prohibited", EPERM}, | ||
49 | {"No such file or directory", ENOENT}, | ||
50 | {"directory entry not found", ENOENT}, | ||
51 | {"file not found", ENOENT}, | ||
52 | {"Interrupted system call", EINTR}, | ||
53 | {"Input/output error", EIO}, | ||
54 | {"No such device or address", ENXIO}, | ||
55 | {"Argument list too long", E2BIG}, | ||
56 | {"Bad file descriptor", EBADF}, | ||
57 | {"Resource temporarily unavailable", EAGAIN}, | ||
58 | {"Cannot allocate memory", ENOMEM}, | ||
59 | {"Permission denied", EACCES}, | ||
60 | {"Bad address", EFAULT}, | ||
61 | {"Block device required", ENOTBLK}, | ||
62 | {"Device or resource busy", EBUSY}, | ||
63 | {"File exists", EEXIST}, | ||
64 | {"Invalid cross-device link", EXDEV}, | ||
65 | {"No such device", ENODEV}, | ||
66 | {"Not a directory", ENOTDIR}, | ||
67 | {"Is a directory", EISDIR}, | ||
68 | {"Invalid argument", EINVAL}, | ||
69 | {"Too many open files in system", ENFILE}, | ||
70 | {"Too many open files", EMFILE}, | ||
71 | {"Text file busy", ETXTBSY}, | ||
72 | {"File too large", EFBIG}, | ||
73 | {"No space left on device", ENOSPC}, | ||
74 | {"Illegal seek", ESPIPE}, | ||
75 | {"Read-only file system", EROFS}, | ||
76 | {"Too many links", EMLINK}, | ||
77 | {"Broken pipe", EPIPE}, | ||
78 | {"Numerical argument out of domain", EDOM}, | ||
79 | {"Numerical result out of range", ERANGE}, | ||
80 | {"Resource deadlock avoided", EDEADLK}, | ||
81 | {"File name too long", ENAMETOOLONG}, | ||
82 | {"No locks available", ENOLCK}, | ||
83 | {"Function not implemented", ENOSYS}, | ||
84 | {"Directory not empty", ENOTEMPTY}, | ||
85 | {"Too many levels of symbolic links", ELOOP}, | ||
86 | {"No message of desired type", ENOMSG}, | ||
87 | {"Identifier removed", EIDRM}, | ||
88 | {"No data available", ENODATA}, | ||
89 | {"Machine is not on the network", ENONET}, | ||
90 | {"Package not installed", ENOPKG}, | ||
91 | {"Object is remote", EREMOTE}, | ||
92 | {"Link has been severed", ENOLINK}, | ||
93 | {"Communication error on send", ECOMM}, | ||
94 | {"Protocol error", EPROTO}, | ||
95 | {"Bad message", EBADMSG}, | ||
96 | {"File descriptor in bad state", EBADFD}, | ||
97 | {"Streams pipe error", ESTRPIPE}, | ||
98 | {"Too many users", EUSERS}, | ||
99 | {"Socket operation on non-socket", ENOTSOCK}, | ||
100 | {"Message too long", EMSGSIZE}, | ||
101 | {"Protocol not available", ENOPROTOOPT}, | ||
102 | {"Protocol not supported", EPROTONOSUPPORT}, | ||
103 | {"Socket type not supported", ESOCKTNOSUPPORT}, | ||
104 | {"Operation not supported", EOPNOTSUPP}, | ||
105 | {"Protocol family not supported", EPFNOSUPPORT}, | ||
106 | {"Network is down", ENETDOWN}, | ||
107 | {"Network is unreachable", ENETUNREACH}, | ||
108 | {"Network dropped connection on reset", ENETRESET}, | ||
109 | {"Software caused connection abort", ECONNABORTED}, | ||
110 | {"Connection reset by peer", ECONNRESET}, | ||
111 | {"No buffer space available", ENOBUFS}, | ||
112 | {"Transport endpoint is already connected", EISCONN}, | ||
113 | {"Transport endpoint is not connected", ENOTCONN}, | ||
114 | {"Cannot send after transport endpoint shutdown", ESHUTDOWN}, | ||
115 | {"Connection timed out", ETIMEDOUT}, | ||
116 | {"Connection refused", ECONNREFUSED}, | ||
117 | {"Host is down", EHOSTDOWN}, | ||
118 | {"No route to host", EHOSTUNREACH}, | ||
119 | {"Operation already in progress", EALREADY}, | ||
120 | {"Operation now in progress", EINPROGRESS}, | ||
121 | {"Is a named type file", EISNAM}, | ||
122 | {"Remote I/O error", EREMOTEIO}, | ||
123 | {"Disk quota exceeded", EDQUOT}, | ||
124 | /* errors from fossil, vacfs, and u9fs */ | ||
125 | {"fid unknown or out of range", EBADF}, | ||
126 | {"permission denied", EACCES}, | ||
127 | {"file does not exist", ENOENT}, | ||
128 | {"authentication failed", ECONNREFUSED}, | ||
129 | {"bad offset in directory read", ESPIPE}, | ||
130 | {"bad use of fid", EBADF}, | ||
131 | {"wstat can't convert between files and directories", EPERM}, | ||
132 | {"directory is not empty", ENOTEMPTY}, | ||
133 | {"file exists", EEXIST}, | ||
134 | {"file already exists", EEXIST}, | ||
135 | {"file or directory already exists", EEXIST}, | ||
136 | {"fid already in use", EBADF}, | ||
137 | {"file in use", ETXTBSY}, | ||
138 | {"i/o error", EIO}, | ||
139 | {"file already open for I/O", ETXTBSY}, | ||
140 | {"illegal mode", EINVAL}, | ||
141 | {"illegal name", ENAMETOOLONG}, | ||
142 | {"not a directory", ENOTDIR}, | ||
143 | {"not a member of proposed group", EPERM}, | ||
144 | {"not owner", EACCES}, | ||
145 | {"only owner can change group in wstat", EACCES}, | ||
146 | {"read only file system", EROFS}, | ||
147 | {"no access to special file", EPERM}, | ||
148 | {"i/o count too large", EIO}, | ||
149 | {"unknown group", EINVAL}, | ||
150 | {"unknown user", EINVAL}, | ||
151 | {"bogus wstat buffer", EPROTO}, | ||
152 | {"exclusive use file already open", EAGAIN}, | ||
153 | {"corrupted directory entry", EIO}, | ||
154 | {"corrupted file entry", EIO}, | ||
155 | {"corrupted block label", EIO}, | ||
156 | {"corrupted meta data", EIO}, | ||
157 | {"illegal offset", EINVAL}, | ||
158 | {"illegal path element", ENOENT}, | ||
159 | {"root of file system is corrupted", EIO}, | ||
160 | {"corrupted super block", EIO}, | ||
161 | {"protocol botch", EPROTO}, | ||
162 | {"file system is full", ENOSPC}, | ||
163 | {"file is in use", EAGAIN}, | ||
164 | {"directory entry is not allocated", ENOENT}, | ||
165 | {"file is read only", EROFS}, | ||
166 | {"file has been removed", EIDRM}, | ||
167 | {"only support truncation to zero length", EPERM}, | ||
168 | {"cannot remove root", EPERM}, | ||
169 | {"file too big", EFBIG}, | ||
170 | {"venti i/o error", EIO}, | ||
171 | /* these are not errors */ | ||
172 | {"u9fs rhostsauth: no authentication required", 0}, | ||
173 | {"u9fs authnone: no authentication required", 0}, | ||
174 | {NULL, -1} | ||
175 | }; | ||
176 | |||
177 | extern int v9fs_error_init(void); | ||
178 | extern int v9fs_errstr2errno(char *errstr); | ||
diff --git a/fs/9p/fid.c b/fs/9p/fid.c new file mode 100644 index 000000000000..821c9c4d76aa --- /dev/null +++ b/fs/9p/fid.c | |||
@@ -0,0 +1,241 @@ | |||
1 | /* | ||
2 | * V9FS FID Management | ||
3 | * | ||
4 | * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to: | ||
18 | * Free Software Foundation | ||
19 | * 51 Franklin Street, Fifth Floor | ||
20 | * Boston, MA 02111-1301 USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/config.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/errno.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <linux/idr.h> | ||
29 | |||
30 | #include "debug.h" | ||
31 | #include "v9fs.h" | ||
32 | #include "9p.h" | ||
33 | #include "v9fs_vfs.h" | ||
34 | #include "transport.h" | ||
35 | #include "mux.h" | ||
36 | #include "conv.h" | ||
37 | #include "fid.h" | ||
38 | |||
39 | /** | ||
40 | * v9fs_fid_insert - add a fid to a dentry | ||
41 | * @fid: fid to add | ||
42 | * @dentry: dentry that it is being added to | ||
43 | * | ||
44 | */ | ||
45 | |||
46 | static int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry) | ||
47 | { | ||
48 | struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; | ||
49 | dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid, | ||
50 | dentry->d_iname, dentry); | ||
51 | if (dentry->d_fsdata == NULL) { | ||
52 | dentry->d_fsdata = | ||
53 | kmalloc(sizeof(struct list_head), GFP_KERNEL); | ||
54 | if (dentry->d_fsdata == NULL) { | ||
55 | dprintk(DEBUG_ERROR, "Out of memory\n"); | ||
56 | return -ENOMEM; | ||
57 | } | ||
58 | fid_list = (struct list_head *)dentry->d_fsdata; | ||
59 | INIT_LIST_HEAD(fid_list); /* Initialize list head */ | ||
60 | } | ||
61 | |||
62 | fid->uid = current->uid; | ||
63 | fid->pid = current->pid; | ||
64 | list_add(&fid->list, fid_list); | ||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | /** | ||
69 | * v9fs_fid_create - allocate a FID structure | ||
70 | * @dentry - dentry to link newly created fid to | ||
71 | * | ||
72 | */ | ||
73 | |||
74 | struct v9fs_fid *v9fs_fid_create(struct dentry *dentry) | ||
75 | { | ||
76 | struct v9fs_fid *new; | ||
77 | |||
78 | new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL); | ||
79 | if (new == NULL) { | ||
80 | dprintk(DEBUG_ERROR, "Out of Memory\n"); | ||
81 | return ERR_PTR(-ENOMEM); | ||
82 | } | ||
83 | |||
84 | new->fid = -1; | ||
85 | new->fidopen = 0; | ||
86 | new->fidcreate = 0; | ||
87 | new->fidclunked = 0; | ||
88 | new->iounit = 0; | ||
89 | |||
90 | if (v9fs_fid_insert(new, dentry) == 0) | ||
91 | return new; | ||
92 | else { | ||
93 | dprintk(DEBUG_ERROR, "Problems inserting to dentry\n"); | ||
94 | kfree(new); | ||
95 | return NULL; | ||
96 | } | ||
97 | } | ||
98 | |||
99 | /** | ||
100 | * v9fs_fid_destroy - deallocate a FID structure | ||
101 | * @fid: fid to destroy | ||
102 | * | ||
103 | */ | ||
104 | |||
105 | void v9fs_fid_destroy(struct v9fs_fid *fid) | ||
106 | { | ||
107 | list_del(&fid->list); | ||
108 | kfree(fid); | ||
109 | } | ||
110 | |||
111 | /** | ||
112 | * v9fs_fid_lookup - retrieve the right fid from a particular dentry | ||
113 | * @dentry: dentry to look for fid in | ||
114 | * @type: intent of lookup (operation or traversal) | ||
115 | * | ||
116 | * search list of fids associated with a dentry for a fid with a matching | ||
117 | * thread id or uid. If that fails, look up the dentry's parents to see if you | ||
118 | * can find a matching fid. | ||
119 | * | ||
120 | */ | ||
121 | |||
122 | struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type) | ||
123 | { | ||
124 | struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; | ||
125 | struct v9fs_fid *current_fid = NULL; | ||
126 | struct v9fs_fid *temp = NULL; | ||
127 | struct v9fs_fid *return_fid = NULL; | ||
128 | int found_parent = 0; | ||
129 | int found_user = 0; | ||
130 | |||
131 | dprintk(DEBUG_9P, " dentry: %s (%p) type %d\n", dentry->d_iname, dentry, | ||
132 | type); | ||
133 | |||
134 | if (fid_list && !list_empty(fid_list)) { | ||
135 | list_for_each_entry_safe(current_fid, temp, fid_list, list) { | ||
136 | if (current_fid->uid == current->uid) { | ||
137 | if (return_fid == NULL) { | ||
138 | if ((type == FID_OP) | ||
139 | || (!current_fid->fidopen)) { | ||
140 | return_fid = current_fid; | ||
141 | found_user = 1; | ||
142 | } | ||
143 | } | ||
144 | } | ||
145 | if (current_fid->pid == current->real_parent->pid) { | ||
146 | if ((return_fid == NULL) || (found_parent) | ||
147 | || (found_user)) { | ||
148 | if ((type == FID_OP) | ||
149 | || (!current_fid->fidopen)) { | ||
150 | return_fid = current_fid; | ||
151 | found_parent = 1; | ||
152 | found_user = 0; | ||
153 | } | ||
154 | } | ||
155 | } | ||
156 | if (current_fid->pid == current->pid) { | ||
157 | if ((type == FID_OP) || | ||
158 | (!current_fid->fidopen)) { | ||
159 | return_fid = current_fid; | ||
160 | found_parent = 0; | ||
161 | found_user = 0; | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | } | ||
166 | |||
167 | /* we are at the root but didn't match */ | ||
168 | if ((!return_fid) && (dentry->d_parent == dentry)) { | ||
169 | /* TODO: clone attach with new uid */ | ||
170 | return_fid = current_fid; | ||
171 | } | ||
172 | |||
173 | if (!return_fid) { | ||
174 | struct dentry *par = current->fs->pwd->d_parent; | ||
175 | int count = 1; | ||
176 | while (par != NULL) { | ||
177 | if (par == dentry) | ||
178 | break; | ||
179 | count++; | ||
180 | if (par == par->d_parent) { | ||
181 | dprintk(DEBUG_ERROR, | ||
182 | "got to root without finding dentry\n"); | ||
183 | break; | ||
184 | } | ||
185 | par = par->d_parent; | ||
186 | } | ||
187 | |||
188 | /* XXX - there may be some duplication we can get rid of */ | ||
189 | if (par == dentry) { | ||
190 | /* we need to fid_lookup the starting point */ | ||
191 | int fidnum = -1; | ||
192 | int oldfid = -1; | ||
193 | int result = -1; | ||
194 | struct v9fs_session_info *v9ses = | ||
195 | v9fs_inode2v9ses(current->fs->pwd->d_inode); | ||
196 | |||
197 | current_fid = | ||
198 | v9fs_fid_lookup(current->fs->pwd, FID_WALK); | ||
199 | if (current_fid == NULL) { | ||
200 | dprintk(DEBUG_ERROR, | ||
201 | "process cwd doesn't have a fid\n"); | ||
202 | return return_fid; | ||
203 | } | ||
204 | oldfid = current_fid->fid; | ||
205 | par = current->fs->pwd; | ||
206 | /* TODO: take advantage of multiwalk */ | ||
207 | |||
208 | fidnum = v9fs_get_idpool(&v9ses->fidpool); | ||
209 | if (fidnum < 0) { | ||
210 | dprintk(DEBUG_ERROR, | ||
211 | "could not get a new fid num\n"); | ||
212 | return return_fid; | ||
213 | } | ||
214 | |||
215 | while (par != dentry) { | ||
216 | result = | ||
217 | v9fs_t_walk(v9ses, oldfid, fidnum, "..", | ||
218 | NULL); | ||
219 | if (result < 0) { | ||
220 | dprintk(DEBUG_ERROR, | ||
221 | "problem walking to parent\n"); | ||
222 | |||
223 | break; | ||
224 | } | ||
225 | oldfid = fidnum; | ||
226 | if (par == par->d_parent) { | ||
227 | dprintk(DEBUG_ERROR, | ||
228 | "can't find dentry\n"); | ||
229 | break; | ||
230 | } | ||
231 | par = par->d_parent; | ||
232 | } | ||
233 | if (par == dentry) { | ||
234 | return_fid = v9fs_fid_create(dentry); | ||
235 | return_fid->fid = fidnum; | ||
236 | } | ||
237 | } | ||
238 | } | ||
239 | |||
240 | return return_fid; | ||
241 | } | ||
diff --git a/fs/9p/fid.h b/fs/9p/fid.h new file mode 100644 index 000000000000..7db478ccca36 --- /dev/null +++ b/fs/9p/fid.h | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * V9FS FID Management | ||
3 | * | ||
4 | * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to: | ||
18 | * Free Software Foundation | ||
19 | * 51 Franklin Street, Fifth Floor | ||
20 | * Boston, MA 02111-1301 USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/list.h> | ||
25 | |||
26 | #define FID_OP 0 | ||
27 | #define FID_WALK 1 | ||
28 | |||
29 | struct v9fs_fid { | ||
30 | struct list_head list; /* list of fids associated with a dentry */ | ||
31 | struct list_head active; /* XXX - debug */ | ||
32 | |||
33 | u32 fid; | ||
34 | unsigned char fidopen; /* set when fid is opened */ | ||
35 | unsigned char fidcreate; /* set when fid was just created */ | ||
36 | unsigned char fidclunked; /* set when fid has already been clunked */ | ||
37 | |||
38 | struct v9fs_qid qid; | ||
39 | u32 iounit; | ||
40 | |||
41 | /* readdir stuff */ | ||
42 | int rdir_fpos; | ||
43 | loff_t rdir_pos; | ||
44 | struct v9fs_fcall *rdir_fcall; | ||
45 | |||
46 | /* management stuff */ | ||
47 | pid_t pid; /* thread associated with this fid */ | ||
48 | uid_t uid; /* user associated with this fid */ | ||
49 | |||
50 | /* private data */ | ||
51 | struct file *filp; /* backpointer to File struct for open files */ | ||
52 | struct v9fs_session_info *v9ses; /* session info for this FID */ | ||
53 | }; | ||
54 | |||
55 | struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type); | ||
56 | void v9fs_fid_destroy(struct v9fs_fid *fid); | ||
57 | struct v9fs_fid *v9fs_fid_create(struct dentry *); | ||
diff --git a/fs/9p/mux.c b/fs/9p/mux.c new file mode 100644 index 000000000000..8835b576f744 --- /dev/null +++ b/fs/9p/mux.c | |||
@@ -0,0 +1,475 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/mux.c | ||
3 | * | ||
4 | * Protocol Multiplexer | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to: | ||
21 | * Free Software Foundation | ||
22 | * 51 Franklin Street, Fifth Floor | ||
23 | * Boston, MA 02111-1301 USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/config.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/errno.h> | ||
30 | #include <linux/fs.h> | ||
31 | #include <linux/kthread.h> | ||
32 | #include <linux/idr.h> | ||
33 | |||
34 | #include "debug.h" | ||
35 | #include "v9fs.h" | ||
36 | #include "9p.h" | ||
37 | #include "transport.h" | ||
38 | #include "conv.h" | ||
39 | #include "mux.h" | ||
40 | |||
41 | /** | ||
42 | * dprintcond - print condition of session info | ||
43 | * @v9ses: session info structure | ||
44 | * @req: RPC request structure | ||
45 | * | ||
46 | */ | ||
47 | |||
48 | static inline int | ||
49 | dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) | ||
50 | { | ||
51 | dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status, | ||
52 | req->rcall); | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | /** | ||
57 | * xread - force read of a certain number of bytes | ||
58 | * @v9ses: session info structure | ||
59 | * @ptr: pointer to buffer | ||
60 | * @sz: number of bytes to read | ||
61 | * | ||
62 | * Chuck Cranor CS-533 project1 | ||
63 | */ | ||
64 | |||
65 | static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz) | ||
66 | { | ||
67 | int rd = 0; | ||
68 | int ret = 0; | ||
69 | while (rd < sz) { | ||
70 | ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd); | ||
71 | if (ret <= 0) { | ||
72 | dprintk(DEBUG_ERROR, "xread errno %d\n", ret); | ||
73 | return ret; | ||
74 | } | ||
75 | rd += ret; | ||
76 | ptr += ret; | ||
77 | } | ||
78 | return (rd); | ||
79 | } | ||
80 | |||
81 | /** | ||
82 | * read_message - read a full 9P2000 fcall packet | ||
83 | * @v9ses: session info structure | ||
84 | * @rcall: fcall structure to read into | ||
85 | * @rcalllen: size of fcall buffer | ||
86 | * | ||
87 | */ | ||
88 | |||
89 | static int | ||
90 | read_message(struct v9fs_session_info *v9ses, | ||
91 | struct v9fs_fcall *rcall, int rcalllen) | ||
92 | { | ||
93 | unsigned char buf[4]; | ||
94 | void *data; | ||
95 | int size = 0; | ||
96 | int res = 0; | ||
97 | |||
98 | res = xread(v9ses, buf, sizeof(buf)); | ||
99 | if (res < 0) { | ||
100 | dprintk(DEBUG_ERROR, | ||
101 | "Reading of count field failed returned: %d\n", res); | ||
102 | return res; | ||
103 | } | ||
104 | |||
105 | if (res < 4) { | ||
106 | dprintk(DEBUG_ERROR, | ||
107 | "Reading of count field failed returned: %d\n", res); | ||
108 | return -EIO; | ||
109 | } | ||
110 | |||
111 | size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); | ||
112 | dprintk(DEBUG_MUX, "got a packet count: %d\n", size); | ||
113 | |||
114 | /* adjust for the four bytes of size */ | ||
115 | size -= 4; | ||
116 | |||
117 | if (size > v9ses->maxdata) { | ||
118 | dprintk(DEBUG_ERROR, "packet too big: %d\n", size); | ||
119 | return -E2BIG; | ||
120 | } | ||
121 | |||
122 | data = kmalloc(size, GFP_KERNEL); | ||
123 | if (!data) { | ||
124 | eprintk(KERN_WARNING, "out of memory\n"); | ||
125 | return -ENOMEM; | ||
126 | } | ||
127 | |||
128 | res = xread(v9ses, data, size); | ||
129 | if (res < size) { | ||
130 | dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n", | ||
131 | res); | ||
132 | kfree(data); | ||
133 | return res; | ||
134 | } | ||
135 | |||
136 | /* we now have an in-memory string that is the reply. | ||
137 | * deserialize it. There is very little to go wrong at this point | ||
138 | * save for v9fs_alloc errors. | ||
139 | */ | ||
140 | res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata, | ||
141 | rcall, rcalllen); | ||
142 | |||
143 | kfree(data); | ||
144 | |||
145 | if (res < 0) | ||
146 | return res; | ||
147 | |||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * v9fs_recv - receive an RPC response for a particular tag | ||
153 | * @v9ses: session info structure | ||
154 | * @req: RPC request structure | ||
155 | * | ||
156 | */ | ||
157 | |||
158 | static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) | ||
159 | { | ||
160 | int ret = 0; | ||
161 | |||
162 | dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag); | ||
163 | ret = wait_event_interruptible(v9ses->read_wait, | ||
164 | ((v9ses->transport->status != Connected) || | ||
165 | (req->rcall != 0) || (req->err < 0) || | ||
166 | dprintcond(v9ses, req))); | ||
167 | |||
168 | dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall); | ||
169 | |||
170 | spin_lock(&v9ses->muxlock); | ||
171 | list_del(&req->next); | ||
172 | spin_unlock(&v9ses->muxlock); | ||
173 | |||
174 | if (req->err < 0) | ||
175 | return req->err; | ||
176 | |||
177 | if (v9ses->transport->status == Disconnected) | ||
178 | return -ECONNRESET; | ||
179 | |||
180 | return ret; | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * v9fs_send - send a 9P request | ||
185 | * @v9ses: session info structure | ||
186 | * @req: RPC request to send | ||
187 | * | ||
188 | */ | ||
189 | |||
190 | static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) | ||
191 | { | ||
192 | int ret = -1; | ||
193 | void *data = NULL; | ||
194 | struct v9fs_fcall *tcall = req->tcall; | ||
195 | |||
196 | data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL); | ||
197 | if (!data) | ||
198 | return -ENOMEM; | ||
199 | |||
200 | tcall->size = 0; /* enforce size recalculation */ | ||
201 | ret = | ||
202 | v9fs_serialize_fcall(v9ses, tcall, data, | ||
203 | v9ses->maxdata + V9FS_IOHDRSZ); | ||
204 | if (ret < 0) | ||
205 | goto free_data; | ||
206 | |||
207 | spin_lock(&v9ses->muxlock); | ||
208 | list_add(&req->next, &v9ses->mux_fcalls); | ||
209 | spin_unlock(&v9ses->muxlock); | ||
210 | |||
211 | dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag, | ||
212 | tcall->size); | ||
213 | ret = v9ses->transport->write(v9ses->transport, data, tcall->size); | ||
214 | |||
215 | if (ret != tcall->size) { | ||
216 | spin_lock(&v9ses->muxlock); | ||
217 | list_del(&req->next); | ||
218 | kfree(req->rcall); | ||
219 | |||
220 | spin_unlock(&v9ses->muxlock); | ||
221 | if (ret >= 0) | ||
222 | ret = -EREMOTEIO; | ||
223 | } else | ||
224 | ret = 0; | ||
225 | |||
226 | free_data: | ||
227 | kfree(data); | ||
228 | return ret; | ||
229 | } | ||
230 | |||
231 | /** | ||
232 | * v9fs_mux_rpc - send a request, receive a response | ||
233 | * @v9ses: session info structure | ||
234 | * @tcall: fcall to send | ||
235 | * @rcall: buffer to place response into | ||
236 | * | ||
237 | */ | ||
238 | |||
239 | long | ||
240 | v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall, | ||
241 | struct v9fs_fcall **rcall) | ||
242 | { | ||
243 | int tid = -1; | ||
244 | struct v9fs_fcall *fcall = NULL; | ||
245 | struct v9fs_rpcreq req; | ||
246 | int ret = -1; | ||
247 | |||
248 | if (!v9ses) | ||
249 | return -EINVAL; | ||
250 | |||
251 | if (!v9ses->transport || v9ses->transport->status != Connected) | ||
252 | return -EIO; | ||
253 | |||
254 | if (rcall) | ||
255 | *rcall = NULL; | ||
256 | |||
257 | if (tcall->id != TVERSION) { | ||
258 | tid = v9fs_get_idpool(&v9ses->tidpool); | ||
259 | if (tid < 0) | ||
260 | return -ENOMEM; | ||
261 | } | ||
262 | |||
263 | tcall->tag = tid; | ||
264 | |||
265 | req.tcall = tcall; | ||
266 | req.err = 0; | ||
267 | req.rcall = NULL; | ||
268 | |||
269 | ret = v9fs_send(v9ses, &req); | ||
270 | |||
271 | if (ret < 0) { | ||
272 | if (tcall->id != TVERSION) | ||
273 | v9fs_put_idpool(tid, &v9ses->tidpool); | ||
274 | dprintk(DEBUG_MUX, "error %d\n", ret); | ||
275 | return ret; | ||
276 | } | ||
277 | |||
278 | ret = v9fs_recv(v9ses, &req); | ||
279 | |||
280 | fcall = req.rcall; | ||
281 | |||
282 | dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret); | ||
283 | if (ret == -ERESTARTSYS) { | ||
284 | if (v9ses->transport->status != Disconnected | ||
285 | && tcall->id != TFLUSH) { | ||
286 | unsigned long flags; | ||
287 | |||
288 | dprintk(DEBUG_MUX, "flushing the tag: %d\n", | ||
289 | tcall->tag); | ||
290 | clear_thread_flag(TIF_SIGPENDING); | ||
291 | v9fs_t_flush(v9ses, tcall->tag); | ||
292 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
293 | recalc_sigpending(); | ||
294 | spin_unlock_irqrestore(¤t->sighand->siglock, | ||
295 | flags); | ||
296 | dprintk(DEBUG_MUX, "flushing done\n"); | ||
297 | } | ||
298 | |||
299 | goto release_req; | ||
300 | } else if (ret < 0) | ||
301 | goto release_req; | ||
302 | |||
303 | if (!fcall) | ||
304 | ret = -EIO; | ||
305 | else { | ||
306 | if (fcall->id == RERROR) { | ||
307 | ret = v9fs_errstr2errno(fcall->params.rerror.error); | ||
308 | if (ret == 0) { /* string match failed */ | ||
309 | if (fcall->params.rerror.errno) | ||
310 | ret = -(fcall->params.rerror.errno); | ||
311 | else | ||
312 | ret = -ESERVERFAULT; | ||
313 | } | ||
314 | } else if (fcall->id != tcall->id + 1) { | ||
315 | dprintk(DEBUG_ERROR, | ||
316 | "fcall mismatch: expected %d, got %d\n", | ||
317 | tcall->id + 1, fcall->id); | ||
318 | ret = -EIO; | ||
319 | } | ||
320 | } | ||
321 | |||
322 | release_req: | ||
323 | if (tcall->id != TVERSION) | ||
324 | v9fs_put_idpool(tid, &v9ses->tidpool); | ||
325 | if (rcall) | ||
326 | *rcall = fcall; | ||
327 | else | ||
328 | kfree(fcall); | ||
329 | |||
330 | return ret; | ||
331 | } | ||
332 | |||
333 | /** | ||
334 | * v9fs_mux_cancel_requests - cancels all pending requests | ||
335 | * | ||
336 | * @v9ses: session info structure | ||
337 | * @err: error code to return to the requests | ||
338 | */ | ||
339 | void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err) | ||
340 | { | ||
341 | struct v9fs_rpcreq *rptr; | ||
342 | struct v9fs_rpcreq *rreq; | ||
343 | |||
344 | dprintk(DEBUG_MUX, " %d\n", err); | ||
345 | spin_lock(&v9ses->muxlock); | ||
346 | list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { | ||
347 | rreq->err = err; | ||
348 | } | ||
349 | spin_unlock(&v9ses->muxlock); | ||
350 | wake_up_all(&v9ses->read_wait); | ||
351 | } | ||
352 | |||
353 | /** | ||
354 | * v9fs_recvproc - kproc to handle demultiplexing responses | ||
355 | * @data: session info structure | ||
356 | * | ||
357 | */ | ||
358 | |||
359 | static int v9fs_recvproc(void *data) | ||
360 | { | ||
361 | struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data; | ||
362 | struct v9fs_fcall *rcall = NULL; | ||
363 | struct v9fs_rpcreq *rptr; | ||
364 | struct v9fs_rpcreq *req; | ||
365 | struct v9fs_rpcreq *rreq; | ||
366 | int err = 0; | ||
367 | |||
368 | allow_signal(SIGKILL); | ||
369 | set_current_state(TASK_INTERRUPTIBLE); | ||
370 | complete(&v9ses->proccmpl); | ||
371 | while (!kthread_should_stop() && err >= 0) { | ||
372 | req = rptr = rreq = NULL; | ||
373 | |||
374 | rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL); | ||
375 | if (!rcall) { | ||
376 | eprintk(KERN_ERR, "no memory for buffers\n"); | ||
377 | break; | ||
378 | } | ||
379 | |||
380 | err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ); | ||
381 | spin_lock(&v9ses->muxlock); | ||
382 | if (err < 0) { | ||
383 | list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { | ||
384 | rreq->err = err; | ||
385 | } | ||
386 | if(err != -ERESTARTSYS) | ||
387 | eprintk(KERN_ERR, | ||
388 | "Transport error while reading message %d\n", err); | ||
389 | } else { | ||
390 | list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { | ||
391 | if (rreq->tcall->tag == rcall->tag) { | ||
392 | req = rreq; | ||
393 | req->rcall = rcall; | ||
394 | break; | ||
395 | } | ||
396 | } | ||
397 | } | ||
398 | |||
399 | if (req && (req->tcall->id == TFLUSH)) { | ||
400 | struct v9fs_rpcreq *treq = NULL; | ||
401 | list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) { | ||
402 | if (treq->tcall->tag == | ||
403 | req->tcall->params.tflush.oldtag) { | ||
404 | list_del(&rptr->next); | ||
405 | kfree(treq->rcall); | ||
406 | break; | ||
407 | } | ||
408 | } | ||
409 | } | ||
410 | |||
411 | spin_unlock(&v9ses->muxlock); | ||
412 | |||
413 | if (!req) { | ||
414 | if (err >= 0) | ||
415 | dprintk(DEBUG_ERROR, | ||
416 | "unexpected response: id %d tag %d\n", | ||
417 | rcall->id, rcall->tag); | ||
418 | |||
419 | kfree(rcall); | ||
420 | } | ||
421 | |||
422 | wake_up_all(&v9ses->read_wait); | ||
423 | set_current_state(TASK_INTERRUPTIBLE); | ||
424 | } | ||
425 | |||
426 | v9ses->transport->close(v9ses->transport); | ||
427 | |||
428 | /* Inform all pending processes about the failure */ | ||
429 | wake_up_all(&v9ses->read_wait); | ||
430 | |||
431 | if (signal_pending(current)) | ||
432 | complete(&v9ses->proccmpl); | ||
433 | |||
434 | dprintk(DEBUG_MUX, "recvproc: end\n"); | ||
435 | v9ses->recvproc = NULL; | ||
436 | |||
437 | return err >= 0; | ||
438 | } | ||
439 | |||
440 | /** | ||
441 | * v9fs_mux_init - initialize multiplexer (spawn kproc) | ||
442 | * @v9ses: session info structure | ||
443 | * @dev_name: mount device information (to create unique kproc) | ||
444 | * | ||
445 | */ | ||
446 | |||
447 | int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name) | ||
448 | { | ||
449 | char procname[60]; | ||
450 | |||
451 | strncpy(procname, dev_name, sizeof(procname)); | ||
452 | procname[sizeof(procname) - 1] = 0; | ||
453 | |||
454 | init_waitqueue_head(&v9ses->read_wait); | ||
455 | init_completion(&v9ses->fcread); | ||
456 | init_completion(&v9ses->proccmpl); | ||
457 | spin_lock_init(&v9ses->muxlock); | ||
458 | INIT_LIST_HEAD(&v9ses->mux_fcalls); | ||
459 | v9ses->recvproc = NULL; | ||
460 | v9ses->curfcall = NULL; | ||
461 | |||
462 | v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses, | ||
463 | "v9fs_recvproc %s", procname); | ||
464 | |||
465 | if (IS_ERR(v9ses->recvproc)) { | ||
466 | eprintk(KERN_ERR, "cannot create receiving thread\n"); | ||
467 | v9fs_session_close(v9ses); | ||
468 | return -ECONNABORTED; | ||
469 | } | ||
470 | |||
471 | wake_up_process(v9ses->recvproc); | ||
472 | wait_for_completion(&v9ses->proccmpl); | ||
473 | |||
474 | return 0; | ||
475 | } | ||
diff --git a/fs/9p/mux.h b/fs/9p/mux.h new file mode 100644 index 000000000000..4994cb10badf --- /dev/null +++ b/fs/9p/mux.h | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/mux.h | ||
3 | * | ||
4 | * Multiplexer Definitions | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to: | ||
20 | * Free Software Foundation | ||
21 | * 51 Franklin Street, Fifth Floor | ||
22 | * Boston, MA 02111-1301 USA | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | /* structure to manage each RPC transaction */ | ||
27 | |||
28 | struct v9fs_rpcreq { | ||
29 | struct v9fs_fcall *tcall; | ||
30 | struct v9fs_fcall *rcall; | ||
31 | int err; /* error code if response failed */ | ||
32 | |||
33 | /* XXX - could we put scatter/gather buffers here? */ | ||
34 | |||
35 | struct list_head next; | ||
36 | }; | ||
37 | |||
38 | int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name); | ||
39 | long v9fs_mux_rpc(struct v9fs_session_info *v9ses, | ||
40 | struct v9fs_fcall *tcall, struct v9fs_fcall **rcall); | ||
41 | void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err); | ||
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c new file mode 100644 index 000000000000..63b58ce98ff4 --- /dev/null +++ b/fs/9p/trans_fd.c | |||
@@ -0,0 +1,172 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/trans_fd.c | ||
3 | * | ||
4 | * File Descriptor Transport Layer | ||
5 | * | ||
6 | * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to: | ||
20 | * Free Software Foundation | ||
21 | * 51 Franklin Street, Fifth Floor | ||
22 | * Boston, MA 02111-1301 USA | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/config.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/net.h> | ||
29 | #include <linux/ipv6.h> | ||
30 | #include <linux/errno.h> | ||
31 | #include <linux/kernel.h> | ||
32 | #include <linux/un.h> | ||
33 | #include <asm/uaccess.h> | ||
34 | #include <linux/inet.h> | ||
35 | #include <linux/idr.h> | ||
36 | #include <linux/file.h> | ||
37 | |||
38 | #include "debug.h" | ||
39 | #include "v9fs.h" | ||
40 | #include "transport.h" | ||
41 | |||
42 | struct v9fs_trans_fd { | ||
43 | struct file *in_file; | ||
44 | struct file *out_file; | ||
45 | }; | ||
46 | |||
47 | /** | ||
48 | * v9fs_fd_recv - receive from a socket | ||
49 | * @v9ses: session information | ||
50 | * @v: buffer to receive data into | ||
51 | * @len: size of receive buffer | ||
52 | * | ||
53 | */ | ||
54 | |||
55 | static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len) | ||
56 | { | ||
57 | struct v9fs_trans_fd *ts = trans ? trans->priv : NULL; | ||
58 | |||
59 | if (!trans || trans->status != Connected || !ts) | ||
60 | return -EIO; | ||
61 | |||
62 | return kernel_read(ts->in_file, ts->in_file->f_pos, v, len); | ||
63 | } | ||
64 | |||
65 | /** | ||
66 | * v9fs_fd_send - send to a socket | ||
67 | * @v9ses: session information | ||
68 | * @v: buffer to send data from | ||
69 | * @len: size of send buffer | ||
70 | * | ||
71 | */ | ||
72 | |||
73 | static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len) | ||
74 | { | ||
75 | struct v9fs_trans_fd *ts = trans ? trans->priv : NULL; | ||
76 | mm_segment_t oldfs = get_fs(); | ||
77 | int ret = 0; | ||
78 | |||
79 | if (!trans || trans->status != Connected || !ts) | ||
80 | return -EIO; | ||
81 | |||
82 | set_fs(get_ds()); | ||
83 | /* The cast to a user pointer is valid due to the set_fs() */ | ||
84 | ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos); | ||
85 | set_fs(oldfs); | ||
86 | |||
87 | return ret; | ||
88 | } | ||
89 | |||
90 | /** | ||
91 | * v9fs_fd_init - initialize file descriptor transport | ||
92 | * @v9ses: session information | ||
93 | * @addr: address of server to mount | ||
94 | * @data: mount options | ||
95 | * | ||
96 | */ | ||
97 | |||
98 | static int | ||
99 | v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data) | ||
100 | { | ||
101 | struct v9fs_trans_fd *ts = NULL; | ||
102 | struct v9fs_transport *trans = v9ses->transport; | ||
103 | |||
104 | if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) { | ||
105 | printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); | ||
106 | return -ENOPROTOOPT; | ||
107 | } | ||
108 | |||
109 | sema_init(&trans->writelock, 1); | ||
110 | sema_init(&trans->readlock, 1); | ||
111 | |||
112 | ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL); | ||
113 | |||
114 | if (!ts) | ||
115 | return -ENOMEM; | ||
116 | |||
117 | ts->in_file = fget( v9ses->rfdno ); | ||
118 | ts->out_file = fget( v9ses->wfdno ); | ||
119 | |||
120 | if (!ts->in_file || !ts->out_file) { | ||
121 | if (ts->in_file) | ||
122 | fput(ts->in_file); | ||
123 | |||
124 | if (ts->out_file) | ||
125 | fput(ts->out_file); | ||
126 | |||
127 | kfree(ts); | ||
128 | return -EIO; | ||
129 | } | ||
130 | |||
131 | trans->priv = ts; | ||
132 | trans->status = Connected; | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | |||
138 | /** | ||
139 | * v9fs_fd_close - shutdown file descriptor | ||
140 | * @trans: private socket structure | ||
141 | * | ||
142 | */ | ||
143 | |||
144 | static void v9fs_fd_close(struct v9fs_transport *trans) | ||
145 | { | ||
146 | struct v9fs_trans_fd *ts; | ||
147 | |||
148 | if (!trans) | ||
149 | return; | ||
150 | |||
151 | trans->status = Disconnected; | ||
152 | ts = trans->priv; | ||
153 | |||
154 | if (!ts) | ||
155 | return; | ||
156 | |||
157 | if (ts->in_file) | ||
158 | fput(ts->in_file); | ||
159 | |||
160 | if (ts->out_file) | ||
161 | fput(ts->out_file); | ||
162 | |||
163 | kfree(ts); | ||
164 | } | ||
165 | |||
166 | struct v9fs_transport v9fs_trans_fd = { | ||
167 | .init = v9fs_fd_init, | ||
168 | .write = v9fs_fd_send, | ||
169 | .read = v9fs_fd_recv, | ||
170 | .close = v9fs_fd_close, | ||
171 | }; | ||
172 | |||
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c new file mode 100644 index 000000000000..01e26f0013ac --- /dev/null +++ b/fs/9p/trans_sock.c | |||
@@ -0,0 +1,290 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/trans_socket.c | ||
3 | * | ||
4 | * Socket Transport Layer | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> | ||
8 | * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to: | ||
22 | * Free Software Foundation | ||
23 | * 51 Franklin Street, Fifth Floor | ||
24 | * Boston, MA 02111-1301 USA | ||
25 | * | ||
26 | */ | ||
27 | |||
28 | #include <linux/config.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/net.h> | ||
31 | #include <linux/ipv6.h> | ||
32 | #include <linux/errno.h> | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/un.h> | ||
35 | #include <asm/uaccess.h> | ||
36 | #include <linux/inet.h> | ||
37 | #include <linux/idr.h> | ||
38 | |||
39 | #include "debug.h" | ||
40 | #include "v9fs.h" | ||
41 | #include "transport.h" | ||
42 | |||
43 | #define V9FS_PORT 564 | ||
44 | |||
45 | struct v9fs_trans_sock { | ||
46 | struct socket *s; | ||
47 | }; | ||
48 | |||
49 | /** | ||
50 | * v9fs_sock_recv - receive from a socket | ||
51 | * @v9ses: session information | ||
52 | * @v: buffer to receive data into | ||
53 | * @len: size of receive buffer | ||
54 | * | ||
55 | */ | ||
56 | |||
57 | static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len) | ||
58 | { | ||
59 | struct msghdr msg; | ||
60 | struct kvec iov; | ||
61 | int result; | ||
62 | mm_segment_t oldfs; | ||
63 | struct v9fs_trans_sock *ts = trans ? trans->priv : NULL; | ||
64 | |||
65 | if (trans->status == Disconnected) | ||
66 | return -EREMOTEIO; | ||
67 | |||
68 | result = -EINVAL; | ||
69 | |||
70 | oldfs = get_fs(); | ||
71 | set_fs(get_ds()); | ||
72 | |||
73 | iov.iov_base = v; | ||
74 | iov.iov_len = len; | ||
75 | msg.msg_name = NULL; | ||
76 | msg.msg_namelen = 0; | ||
77 | msg.msg_iovlen = 1; | ||
78 | msg.msg_control = NULL; | ||
79 | msg.msg_controllen = 0; | ||
80 | msg.msg_namelen = 0; | ||
81 | msg.msg_flags = MSG_NOSIGNAL; | ||
82 | |||
83 | result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0); | ||
84 | |||
85 | dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state); | ||
86 | set_fs(oldfs); | ||
87 | |||
88 | if (result <= 0) { | ||
89 | if (result != -ERESTARTSYS) | ||
90 | trans->status = Disconnected; | ||
91 | } | ||
92 | |||
93 | return result; | ||
94 | } | ||
95 | |||
96 | /** | ||
97 | * v9fs_sock_send - send to a socket | ||
98 | * @v9ses: session information | ||
99 | * @v: buffer to send data from | ||
100 | * @len: size of send buffer | ||
101 | * | ||
102 | */ | ||
103 | |||
104 | static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len) | ||
105 | { | ||
106 | struct kvec iov; | ||
107 | struct msghdr msg; | ||
108 | int result = -1; | ||
109 | mm_segment_t oldfs; | ||
110 | struct v9fs_trans_sock *ts = trans ? trans->priv : NULL; | ||
111 | |||
112 | dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len); | ||
113 | dump_data(v, len); | ||
114 | |||
115 | down(&trans->writelock); | ||
116 | |||
117 | oldfs = get_fs(); | ||
118 | set_fs(get_ds()); | ||
119 | iov.iov_base = v; | ||
120 | iov.iov_len = len; | ||
121 | msg.msg_name = NULL; | ||
122 | msg.msg_namelen = 0; | ||
123 | msg.msg_iovlen = 1; | ||
124 | msg.msg_control = NULL; | ||
125 | msg.msg_controllen = 0; | ||
126 | msg.msg_namelen = 0; | ||
127 | msg.msg_flags = MSG_NOSIGNAL; | ||
128 | result = kernel_sendmsg(ts->s, &msg, &iov, 1, len); | ||
129 | set_fs(oldfs); | ||
130 | |||
131 | if (result < 0) { | ||
132 | if (result != -ERESTARTSYS) | ||
133 | trans->status = Disconnected; | ||
134 | } | ||
135 | |||
136 | up(&trans->writelock); | ||
137 | return result; | ||
138 | } | ||
139 | |||
140 | /** | ||
141 | * v9fs_tcp_init - initialize TCP socket | ||
142 | * @v9ses: session information | ||
143 | * @addr: address of server to mount | ||
144 | * @data: mount options | ||
145 | * | ||
146 | */ | ||
147 | |||
148 | static int | ||
149 | v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data) | ||
150 | { | ||
151 | struct socket *csocket = NULL; | ||
152 | struct sockaddr_in sin_server; | ||
153 | int rc = 0; | ||
154 | struct v9fs_trans_sock *ts = NULL; | ||
155 | struct v9fs_transport *trans = v9ses->transport; | ||
156 | |||
157 | sema_init(&trans->writelock, 1); | ||
158 | sema_init(&trans->readlock, 1); | ||
159 | |||
160 | ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL); | ||
161 | |||
162 | if (!ts) | ||
163 | return -ENOMEM; | ||
164 | |||
165 | trans->priv = ts; | ||
166 | ts->s = NULL; | ||
167 | |||
168 | if (!addr) | ||
169 | return -EINVAL; | ||
170 | |||
171 | dprintk(DEBUG_TRANS, "Connecting to %s\n", addr); | ||
172 | |||
173 | sin_server.sin_family = AF_INET; | ||
174 | sin_server.sin_addr.s_addr = in_aton(addr); | ||
175 | sin_server.sin_port = htons(v9ses->port); | ||
176 | sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); | ||
177 | rc = csocket->ops->connect(csocket, | ||
178 | (struct sockaddr *)&sin_server, | ||
179 | sizeof(struct sockaddr_in), 0); | ||
180 | if (rc < 0) { | ||
181 | eprintk(KERN_ERR, | ||
182 | "v9fs_trans_tcp: problem connecting socket to %s\n", | ||
183 | addr); | ||
184 | return rc; | ||
185 | } | ||
186 | csocket->sk->sk_allocation = GFP_NOIO; | ||
187 | ts->s = csocket; | ||
188 | trans->status = Connected; | ||
189 | |||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | /** | ||
194 | * v9fs_unix_init - initialize UNIX domain socket | ||
195 | * @v9ses: session information | ||
196 | * @dev_name: path to named pipe | ||
197 | * @data: mount options | ||
198 | * | ||
199 | */ | ||
200 | |||
201 | static int | ||
202 | v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, | ||
203 | char *data) | ||
204 | { | ||
205 | int rc; | ||
206 | struct socket *csocket; | ||
207 | struct sockaddr_un sun_server; | ||
208 | struct v9fs_transport *trans; | ||
209 | struct v9fs_trans_sock *ts; | ||
210 | |||
211 | rc = 0; | ||
212 | csocket = NULL; | ||
213 | trans = v9ses->transport; | ||
214 | |||
215 | if (strlen(dev_name) > UNIX_PATH_MAX) { | ||
216 | eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n", | ||
217 | dev_name); | ||
218 | return -ENOMEM; | ||
219 | } | ||
220 | |||
221 | ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL); | ||
222 | if (!ts) | ||
223 | return -ENOMEM; | ||
224 | |||
225 | trans->priv = ts; | ||
226 | ts->s = NULL; | ||
227 | |||
228 | sema_init(&trans->writelock, 1); | ||
229 | sema_init(&trans->readlock, 1); | ||
230 | |||
231 | sun_server.sun_family = PF_UNIX; | ||
232 | strcpy(sun_server.sun_path, dev_name); | ||
233 | sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); | ||
234 | rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, | ||
235 | sizeof(struct sockaddr_un) - 1, 0); /* -1 *is* important */ | ||
236 | if (rc < 0) { | ||
237 | eprintk(KERN_ERR, | ||
238 | "v9fs_trans_unix: problem connecting socket: %s: %d\n", | ||
239 | dev_name, rc); | ||
240 | return rc; | ||
241 | } | ||
242 | csocket->sk->sk_allocation = GFP_NOIO; | ||
243 | ts->s = csocket; | ||
244 | trans->status = Connected; | ||
245 | |||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | /** | ||
250 | * v9fs_sock_close - shutdown socket | ||
251 | * @trans: private socket structure | ||
252 | * | ||
253 | */ | ||
254 | |||
255 | static void v9fs_sock_close(struct v9fs_transport *trans) | ||
256 | { | ||
257 | struct v9fs_trans_sock *ts; | ||
258 | |||
259 | if (!trans) | ||
260 | return; | ||
261 | |||
262 | ts = trans->priv; | ||
263 | |||
264 | if ((ts) && (ts->s)) { | ||
265 | dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s); | ||
266 | sock_release(ts->s); | ||
267 | ts->s = NULL; | ||
268 | trans->status = Disconnected; | ||
269 | dprintk(DEBUG_TRANS, "socket closed\n"); | ||
270 | } | ||
271 | |||
272 | if (ts) | ||
273 | kfree(ts); | ||
274 | |||
275 | trans->priv = NULL; | ||
276 | } | ||
277 | |||
278 | struct v9fs_transport v9fs_trans_tcp = { | ||
279 | .init = v9fs_tcp_init, | ||
280 | .write = v9fs_sock_send, | ||
281 | .read = v9fs_sock_recv, | ||
282 | .close = v9fs_sock_close, | ||
283 | }; | ||
284 | |||
285 | struct v9fs_transport v9fs_trans_unix = { | ||
286 | .init = v9fs_unix_init, | ||
287 | .write = v9fs_sock_send, | ||
288 | .read = v9fs_sock_recv, | ||
289 | .close = v9fs_sock_close, | ||
290 | }; | ||
diff --git a/fs/9p/transport.h b/fs/9p/transport.h new file mode 100644 index 000000000000..9e9cd418efd5 --- /dev/null +++ b/fs/9p/transport.h | |||
@@ -0,0 +1,46 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/transport.h | ||
3 | * | ||
4 | * Transport Definition | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to: | ||
20 | * Free Software Foundation | ||
21 | * 51 Franklin Street, Fifth Floor | ||
22 | * Boston, MA 02111-1301 USA | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | enum v9fs_transport_status { | ||
27 | Connected, | ||
28 | Disconnected, | ||
29 | Hung, | ||
30 | }; | ||
31 | |||
32 | struct v9fs_transport { | ||
33 | enum v9fs_transport_status status; | ||
34 | struct semaphore writelock; | ||
35 | struct semaphore readlock; | ||
36 | void *priv; | ||
37 | |||
38 | int (*init) (struct v9fs_session_info *, const char *, char *); | ||
39 | int (*write) (struct v9fs_transport *, void *, int); | ||
40 | int (*read) (struct v9fs_transport *, void *, int); | ||
41 | void (*close) (struct v9fs_transport *); | ||
42 | }; | ||
43 | |||
44 | extern struct v9fs_transport v9fs_trans_tcp; | ||
45 | extern struct v9fs_transport v9fs_trans_unix; | ||
46 | extern struct v9fs_transport v9fs_trans_fd; | ||
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c new file mode 100644 index 000000000000..13bdbbab4387 --- /dev/null +++ b/fs/9p/v9fs.c | |||
@@ -0,0 +1,452 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/v9fs.c | ||
3 | * | ||
4 | * This file contains functions assisting in mapping VFS to 9P2000 | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to: | ||
21 | * Free Software Foundation | ||
22 | * 51 Franklin Street, Fifth Floor | ||
23 | * Boston, MA 02111-1301 USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/config.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/errno.h> | ||
30 | #include <linux/fs.h> | ||
31 | #include <linux/parser.h> | ||
32 | #include <linux/idr.h> | ||
33 | |||
34 | #include "debug.h" | ||
35 | #include "v9fs.h" | ||
36 | #include "9p.h" | ||
37 | #include "v9fs_vfs.h" | ||
38 | #include "transport.h" | ||
39 | #include "mux.h" | ||
40 | #include "conv.h" | ||
41 | |||
42 | /* TODO: sysfs or debugfs interface */ | ||
43 | int v9fs_debug_level = 0; /* feature-rific global debug level */ | ||
44 | |||
45 | /* | ||
46 | * Option Parsing (code inspired by NFS code) | ||
47 | * | ||
48 | */ | ||
49 | |||
50 | enum { | ||
51 | /* Options that take integer arguments */ | ||
52 | Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug, | ||
53 | Opt_rfdno, Opt_wfdno, | ||
54 | /* String options */ | ||
55 | Opt_name, Opt_remotename, | ||
56 | /* Options that take no arguments */ | ||
57 | Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, | ||
58 | /* Error token */ | ||
59 | Opt_err | ||
60 | }; | ||
61 | |||
62 | static match_table_t tokens = { | ||
63 | {Opt_port, "port=%u"}, | ||
64 | {Opt_msize, "msize=%u"}, | ||
65 | {Opt_uid, "uid=%u"}, | ||
66 | {Opt_gid, "gid=%u"}, | ||
67 | {Opt_afid, "afid=%u"}, | ||
68 | {Opt_rfdno, "rfdno=%u"}, | ||
69 | {Opt_wfdno, "wfdno=%u"}, | ||
70 | {Opt_debug, "debug=%u"}, | ||
71 | {Opt_name, "name=%s"}, | ||
72 | {Opt_remotename, "aname=%s"}, | ||
73 | {Opt_unix, "proto=unix"}, | ||
74 | {Opt_tcp, "proto=tcp"}, | ||
75 | {Opt_fd, "proto=fd"}, | ||
76 | {Opt_tcp, "tcp"}, | ||
77 | {Opt_unix, "unix"}, | ||
78 | {Opt_fd, "fd"}, | ||
79 | {Opt_legacy, "noextend"}, | ||
80 | {Opt_nodevmap, "nodevmap"}, | ||
81 | {Opt_err, NULL} | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * Parse option string. | ||
86 | */ | ||
87 | |||
88 | /** | ||
89 | * v9fs_parse_options - parse mount options into session structure | ||
90 | * @options: options string passed from mount | ||
91 | * @v9ses: existing v9fs session information | ||
92 | * | ||
93 | */ | ||
94 | |||
95 | static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses) | ||
96 | { | ||
97 | char *p; | ||
98 | substring_t args[MAX_OPT_ARGS]; | ||
99 | int option; | ||
100 | int ret; | ||
101 | |||
102 | /* setup defaults */ | ||
103 | v9ses->port = V9FS_PORT; | ||
104 | v9ses->maxdata = 9000; | ||
105 | v9ses->proto = PROTO_TCP; | ||
106 | v9ses->extended = 1; | ||
107 | v9ses->afid = ~0; | ||
108 | v9ses->debug = 0; | ||
109 | v9ses->rfdno = ~0; | ||
110 | v9ses->wfdno = ~0; | ||
111 | |||
112 | if (!options) | ||
113 | return; | ||
114 | |||
115 | while ((p = strsep(&options, ",")) != NULL) { | ||
116 | int token; | ||
117 | if (!*p) | ||
118 | continue; | ||
119 | token = match_token(p, tokens, args); | ||
120 | if (token < Opt_name) { | ||
121 | if ((ret = match_int(&args[0], &option)) < 0) { | ||
122 | dprintk(DEBUG_ERROR, | ||
123 | "integer field, but no integer?\n"); | ||
124 | continue; | ||
125 | } | ||
126 | |||
127 | } | ||
128 | switch (token) { | ||
129 | case Opt_port: | ||
130 | v9ses->port = option; | ||
131 | break; | ||
132 | case Opt_msize: | ||
133 | v9ses->maxdata = option; | ||
134 | break; | ||
135 | case Opt_uid: | ||
136 | v9ses->uid = option; | ||
137 | break; | ||
138 | case Opt_gid: | ||
139 | v9ses->gid = option; | ||
140 | break; | ||
141 | case Opt_afid: | ||
142 | v9ses->afid = option; | ||
143 | break; | ||
144 | case Opt_rfdno: | ||
145 | v9ses->rfdno = option; | ||
146 | break; | ||
147 | case Opt_wfdno: | ||
148 | v9ses->wfdno = option; | ||
149 | break; | ||
150 | case Opt_debug: | ||
151 | v9ses->debug = option; | ||
152 | break; | ||
153 | case Opt_tcp: | ||
154 | v9ses->proto = PROTO_TCP; | ||
155 | break; | ||
156 | case Opt_unix: | ||
157 | v9ses->proto = PROTO_UNIX; | ||
158 | break; | ||
159 | case Opt_fd: | ||
160 | v9ses->proto = PROTO_FD; | ||
161 | break; | ||
162 | case Opt_name: | ||
163 | match_strcpy(v9ses->name, &args[0]); | ||
164 | break; | ||
165 | case Opt_remotename: | ||
166 | match_strcpy(v9ses->remotename, &args[0]); | ||
167 | break; | ||
168 | case Opt_legacy: | ||
169 | v9ses->extended = 0; | ||
170 | break; | ||
171 | case Opt_nodevmap: | ||
172 | v9ses->nodev = 1; | ||
173 | break; | ||
174 | default: | ||
175 | continue; | ||
176 | } | ||
177 | } | ||
178 | } | ||
179 | |||
180 | /** | ||
181 | * v9fs_inode2v9ses - safely extract v9fs session info from super block | ||
182 | * @inode: inode to extract information from | ||
183 | * | ||
184 | * Paranoid function to extract v9ses information from superblock, | ||
185 | * if anything is missing it will report an error. | ||
186 | * | ||
187 | */ | ||
188 | |||
189 | struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) | ||
190 | { | ||
191 | return (inode->i_sb->s_fs_info); | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * v9fs_get_idpool - allocate numeric id from pool | ||
196 | * @p - pool to allocate from | ||
197 | * | ||
198 | * XXX - This seems to be an awful generic function, should it be in idr.c with | ||
199 | * the lock included in struct idr? | ||
200 | */ | ||
201 | |||
202 | int v9fs_get_idpool(struct v9fs_idpool *p) | ||
203 | { | ||
204 | int i = 0; | ||
205 | int error; | ||
206 | |||
207 | retry: | ||
208 | if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) | ||
209 | return 0; | ||
210 | |||
211 | if (down_interruptible(&p->lock) == -EINTR) { | ||
212 | eprintk(KERN_WARNING, "Interrupted while locking\n"); | ||
213 | return -1; | ||
214 | } | ||
215 | |||
216 | error = idr_get_new(&p->pool, NULL, &i); | ||
217 | up(&p->lock); | ||
218 | |||
219 | if (error == -EAGAIN) | ||
220 | goto retry; | ||
221 | else if (error) | ||
222 | return -1; | ||
223 | |||
224 | return i; | ||
225 | } | ||
226 | |||
227 | /** | ||
228 | * v9fs_put_idpool - release numeric id from pool | ||
229 | * @p - pool to allocate from | ||
230 | * | ||
231 | * XXX - This seems to be an awful generic function, should it be in idr.c with | ||
232 | * the lock included in struct idr? | ||
233 | */ | ||
234 | |||
235 | void v9fs_put_idpool(int id, struct v9fs_idpool *p) | ||
236 | { | ||
237 | if (down_interruptible(&p->lock) == -EINTR) { | ||
238 | eprintk(KERN_WARNING, "Interrupted while locking\n"); | ||
239 | return; | ||
240 | } | ||
241 | idr_remove(&p->pool, id); | ||
242 | up(&p->lock); | ||
243 | } | ||
244 | |||
245 | /** | ||
246 | * v9fs_session_init - initialize session | ||
247 | * @v9ses: session information structure | ||
248 | * @dev_name: device being mounted | ||
249 | * @data: options | ||
250 | * | ||
251 | */ | ||
252 | |||
253 | int | ||
254 | v9fs_session_init(struct v9fs_session_info *v9ses, | ||
255 | const char *dev_name, char *data) | ||
256 | { | ||
257 | struct v9fs_fcall *fcall = NULL; | ||
258 | struct v9fs_transport *trans_proto; | ||
259 | int n = 0; | ||
260 | int newfid = -1; | ||
261 | int retval = -EINVAL; | ||
262 | |||
263 | v9ses->name = __getname(); | ||
264 | if (!v9ses->name) | ||
265 | return -ENOMEM; | ||
266 | |||
267 | v9ses->remotename = __getname(); | ||
268 | if (!v9ses->remotename) { | ||
269 | putname(v9ses->name); | ||
270 | return -ENOMEM; | ||
271 | } | ||
272 | |||
273 | strcpy(v9ses->name, V9FS_DEFUSER); | ||
274 | strcpy(v9ses->remotename, V9FS_DEFANAME); | ||
275 | |||
276 | v9fs_parse_options(data, v9ses); | ||
277 | |||
278 | /* set global debug level */ | ||
279 | v9fs_debug_level = v9ses->debug; | ||
280 | |||
281 | /* id pools that are session-dependent: FIDs and TIDs */ | ||
282 | idr_init(&v9ses->fidpool.pool); | ||
283 | init_MUTEX(&v9ses->fidpool.lock); | ||
284 | idr_init(&v9ses->tidpool.pool); | ||
285 | init_MUTEX(&v9ses->tidpool.lock); | ||
286 | |||
287 | |||
288 | switch (v9ses->proto) { | ||
289 | case PROTO_TCP: | ||
290 | trans_proto = &v9fs_trans_tcp; | ||
291 | break; | ||
292 | case PROTO_UNIX: | ||
293 | trans_proto = &v9fs_trans_unix; | ||
294 | *v9ses->remotename = 0; | ||
295 | break; | ||
296 | case PROTO_FD: | ||
297 | trans_proto = &v9fs_trans_fd; | ||
298 | *v9ses->remotename = 0; | ||
299 | break; | ||
300 | default: | ||
301 | printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto); | ||
302 | retval = -ENOPROTOOPT; | ||
303 | goto SessCleanUp; | ||
304 | }; | ||
305 | |||
306 | v9ses->transport = trans_proto; | ||
307 | |||
308 | if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) { | ||
309 | eprintk(KERN_ERR, "problem initializing transport\n"); | ||
310 | goto SessCleanUp; | ||
311 | } | ||
312 | |||
313 | v9ses->inprogress = 0; | ||
314 | v9ses->shutdown = 0; | ||
315 | v9ses->session_hung = 0; | ||
316 | |||
317 | if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) { | ||
318 | dprintk(DEBUG_ERROR, "problem initializing mux\n"); | ||
319 | goto SessCleanUp; | ||
320 | } | ||
321 | |||
322 | if (v9ses->afid == ~0) { | ||
323 | if (v9ses->extended) | ||
324 | retval = | ||
325 | v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u", | ||
326 | &fcall); | ||
327 | else | ||
328 | retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000", | ||
329 | &fcall); | ||
330 | |||
331 | if (retval < 0) { | ||
332 | dprintk(DEBUG_ERROR, "v9fs_t_version failed\n"); | ||
333 | goto FreeFcall; | ||
334 | } | ||
335 | |||
336 | /* Really should check for 9P1 and report error */ | ||
337 | if (!strcmp(fcall->params.rversion.version, "9P2000.u")) { | ||
338 | dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n"); | ||
339 | v9ses->extended = 1; | ||
340 | } else { | ||
341 | dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n"); | ||
342 | v9ses->extended = 0; | ||
343 | } | ||
344 | |||
345 | n = fcall->params.rversion.msize; | ||
346 | kfree(fcall); | ||
347 | |||
348 | if (n < v9ses->maxdata) | ||
349 | v9ses->maxdata = n; | ||
350 | } | ||
351 | |||
352 | newfid = v9fs_get_idpool(&v9ses->fidpool); | ||
353 | if (newfid < 0) { | ||
354 | eprintk(KERN_WARNING, "couldn't allocate FID\n"); | ||
355 | retval = -ENOMEM; | ||
356 | goto SessCleanUp; | ||
357 | } | ||
358 | /* it is a little bit ugly, but we have to prevent newfid */ | ||
359 | /* being the same as afid, so if it is, get a new fid */ | ||
360 | if (v9ses->afid != ~0 && newfid == v9ses->afid) { | ||
361 | newfid = v9fs_get_idpool(&v9ses->fidpool); | ||
362 | if (newfid < 0) { | ||
363 | eprintk(KERN_WARNING, "couldn't allocate FID\n"); | ||
364 | retval = -ENOMEM; | ||
365 | goto SessCleanUp; | ||
366 | } | ||
367 | } | ||
368 | |||
369 | if ((retval = | ||
370 | v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid, | ||
371 | v9ses->afid, NULL)) | ||
372 | < 0) { | ||
373 | dprintk(DEBUG_ERROR, "cannot attach\n"); | ||
374 | goto SessCleanUp; | ||
375 | } | ||
376 | |||
377 | if (v9ses->afid != ~0) { | ||
378 | if (v9fs_t_clunk(v9ses, v9ses->afid, NULL)) | ||
379 | dprintk(DEBUG_ERROR, "clunk failed\n"); | ||
380 | } | ||
381 | |||
382 | return newfid; | ||
383 | |||
384 | FreeFcall: | ||
385 | kfree(fcall); | ||
386 | |||
387 | SessCleanUp: | ||
388 | v9fs_session_close(v9ses); | ||
389 | return retval; | ||
390 | } | ||
391 | |||
392 | /** | ||
393 | * v9fs_session_close - shutdown a session | ||
394 | * @v9ses: session information structure | ||
395 | * | ||
396 | */ | ||
397 | |||
398 | void v9fs_session_close(struct v9fs_session_info *v9ses) | ||
399 | { | ||
400 | if (v9ses->recvproc) { | ||
401 | send_sig(SIGKILL, v9ses->recvproc, 1); | ||
402 | wait_for_completion(&v9ses->proccmpl); | ||
403 | } | ||
404 | |||
405 | if (v9ses->transport) | ||
406 | v9ses->transport->close(v9ses->transport); | ||
407 | |||
408 | putname(v9ses->name); | ||
409 | putname(v9ses->remotename); | ||
410 | } | ||
411 | |||
412 | /** | ||
413 | * v9fs_session_cancel - mark transport as disconnected | ||
414 | * and cancel all pending requests. | ||
415 | */ | ||
416 | void v9fs_session_cancel(struct v9fs_session_info *v9ses) { | ||
417 | v9ses->transport->status = Disconnected; | ||
418 | v9fs_mux_cancel_requests(v9ses, -EIO); | ||
419 | } | ||
420 | |||
421 | extern int v9fs_error_init(void); | ||
422 | |||
423 | /** | ||
424 | * v9fs_init - Initialize module | ||
425 | * | ||
426 | */ | ||
427 | |||
428 | static int __init init_v9fs(void) | ||
429 | { | ||
430 | v9fs_error_init(); | ||
431 | |||
432 | printk(KERN_INFO "Installing v9fs 9P2000 file system support\n"); | ||
433 | |||
434 | return register_filesystem(&v9fs_fs_type); | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * v9fs_init - shutdown module | ||
439 | * | ||
440 | */ | ||
441 | |||
442 | static void __exit exit_v9fs(void) | ||
443 | { | ||
444 | unregister_filesystem(&v9fs_fs_type); | ||
445 | } | ||
446 | |||
447 | module_init(init_v9fs) | ||
448 | module_exit(exit_v9fs) | ||
449 | |||
450 | MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); | ||
451 | MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>"); | ||
452 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h new file mode 100644 index 000000000000..45dcef42bdd6 --- /dev/null +++ b/fs/9p/v9fs.h | |||
@@ -0,0 +1,103 @@ | |||
1 | /* | ||
2 | * V9FS definitions. | ||
3 | * | ||
4 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
5 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to: | ||
19 | * Free Software Foundation | ||
20 | * 51 Franklin Street, Fifth Floor | ||
21 | * Boston, MA 02111-1301 USA | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * Idpool structure provides lock and id management | ||
27 | * | ||
28 | */ | ||
29 | |||
30 | struct v9fs_idpool { | ||
31 | struct semaphore lock; | ||
32 | struct idr pool; | ||
33 | }; | ||
34 | |||
35 | /* | ||
36 | * Session structure provides information for an opened session | ||
37 | * | ||
38 | */ | ||
39 | |||
40 | struct v9fs_session_info { | ||
41 | /* options */ | ||
42 | unsigned int maxdata; | ||
43 | unsigned char extended; /* set to 1 if we are using UNIX extensions */ | ||
44 | unsigned char nodev; /* set to 1 if no disable device mapping */ | ||
45 | unsigned short port; /* port to connect to */ | ||
46 | unsigned short debug; /* debug level */ | ||
47 | unsigned short proto; /* protocol to use */ | ||
48 | unsigned int afid; /* authentication fid */ | ||
49 | unsigned int rfdno; /* read file descriptor number */ | ||
50 | unsigned int wfdno; /* write file descriptor number */ | ||
51 | |||
52 | |||
53 | char *name; /* user name to mount as */ | ||
54 | char *remotename; /* name of remote hierarchy being mounted */ | ||
55 | unsigned int uid; /* default uid/muid for legacy support */ | ||
56 | unsigned int gid; /* default gid for legacy support */ | ||
57 | |||
58 | /* book keeping */ | ||
59 | struct v9fs_idpool fidpool; /* The FID pool for file descriptors */ | ||
60 | struct v9fs_idpool tidpool; /* The TID pool for transactions ids */ | ||
61 | |||
62 | /* transport information */ | ||
63 | struct v9fs_transport *transport; | ||
64 | |||
65 | int inprogress; /* session in progress => true */ | ||
66 | int shutdown; /* session shutting down. no more attaches. */ | ||
67 | unsigned char session_hung; | ||
68 | |||
69 | /* mux private data */ | ||
70 | struct v9fs_fcall *curfcall; | ||
71 | wait_queue_head_t read_wait; | ||
72 | struct completion fcread; | ||
73 | struct completion proccmpl; | ||
74 | struct task_struct *recvproc; | ||
75 | |||
76 | spinlock_t muxlock; | ||
77 | struct list_head mux_fcalls; | ||
78 | }; | ||
79 | |||
80 | /* possible values of ->proto */ | ||
81 | enum { | ||
82 | PROTO_TCP, | ||
83 | PROTO_UNIX, | ||
84 | PROTO_FD, | ||
85 | }; | ||
86 | |||
87 | int v9fs_session_init(struct v9fs_session_info *, const char *, char *); | ||
88 | struct v9fs_session_info *v9fs_inode2v9ses(struct inode *); | ||
89 | void v9fs_session_close(struct v9fs_session_info *v9ses); | ||
90 | int v9fs_get_idpool(struct v9fs_idpool *p); | ||
91 | void v9fs_put_idpool(int id, struct v9fs_idpool *p); | ||
92 | void v9fs_session_cancel(struct v9fs_session_info *v9ses); | ||
93 | |||
94 | #define V9FS_MAGIC 0x01021997 | ||
95 | |||
96 | /* other default globals */ | ||
97 | #define V9FS_PORT 564 | ||
98 | #define V9FS_DEFUSER "nobody" | ||
99 | #define V9FS_DEFANAME "" | ||
100 | |||
101 | /* inital pool sizes for fids and tags */ | ||
102 | #define V9FS_START_FIDS 8192 | ||
103 | #define V9FS_START_TIDS 256 | ||
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h new file mode 100644 index 000000000000..2f2cea7ee3e7 --- /dev/null +++ b/fs/9p/v9fs_vfs.h | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * V9FS VFS extensions. | ||
3 | * | ||
4 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
5 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to: | ||
19 | * Free Software Foundation | ||
20 | * 51 Franklin Street, Fifth Floor | ||
21 | * Boston, MA 02111-1301 USA | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | /* plan9 semantics are that created files are implicitly opened. | ||
26 | * But linux semantics are that you call create, then open. | ||
27 | * the plan9 approach is superior as it provides an atomic | ||
28 | * open. | ||
29 | * we track the create fid here. When the file is opened, if fidopen is | ||
30 | * non-zero, we use the fid and can skip some steps. | ||
31 | * there may be a better way to do this, but I don't know it. | ||
32 | * one BAD way is to clunk the fid on create, then open it again: | ||
33 | * you lose the atomicity of file open | ||
34 | */ | ||
35 | |||
36 | /* special case: | ||
37 | * unlink calls remove, which is an implicit clunk. So we have to track | ||
38 | * that kind of thing so that we don't try to clunk a dead fid. | ||
39 | */ | ||
40 | |||
41 | extern struct file_system_type v9fs_fs_type; | ||
42 | extern struct file_operations v9fs_file_operations; | ||
43 | extern struct file_operations v9fs_dir_operations; | ||
44 | extern struct dentry_operations v9fs_dentry_operations; | ||
45 | |||
46 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); | ||
47 | ino_t v9fs_qid2ino(struct v9fs_qid *qid); | ||
48 | void v9fs_mistat2inode(struct v9fs_stat *, struct inode *, | ||
49 | struct super_block *); | ||
50 | int v9fs_dir_release(struct inode *inode, struct file *filp); | ||
51 | int v9fs_file_open(struct inode *inode, struct file *file); | ||
52 | void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat); | ||
53 | void v9fs_dentry_release(struct dentry *); | ||
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c new file mode 100644 index 000000000000..306c96741f81 --- /dev/null +++ b/fs/9p/vfs_dentry.c | |||
@@ -0,0 +1,126 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/vfs_dentry.c | ||
3 | * | ||
4 | * This file contians vfs dentry ops for the 9P2000 protocol. | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to: | ||
21 | * Free Software Foundation | ||
22 | * 51 Franklin Street, Fifth Floor | ||
23 | * Boston, MA 02111-1301 USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/module.h> | ||
28 | #include <linux/errno.h> | ||
29 | #include <linux/fs.h> | ||
30 | #include <linux/file.h> | ||
31 | #include <linux/pagemap.h> | ||
32 | #include <linux/stat.h> | ||
33 | #include <linux/string.h> | ||
34 | #include <linux/smp_lock.h> | ||
35 | #include <linux/inet.h> | ||
36 | #include <linux/namei.h> | ||
37 | #include <linux/idr.h> | ||
38 | |||
39 | #include "debug.h" | ||
40 | #include "v9fs.h" | ||
41 | #include "9p.h" | ||
42 | #include "v9fs_vfs.h" | ||
43 | #include "conv.h" | ||
44 | #include "fid.h" | ||
45 | |||
46 | /** | ||
47 | * v9fs_dentry_validate - VFS dcache hook to validate cache | ||
48 | * @dentry: dentry that is being validated | ||
49 | * @nd: path data | ||
50 | * | ||
51 | * dcache really shouldn't be used for 9P2000 as at all due to | ||
52 | * potential attached semantics to directory traversal (walk). | ||
53 | * | ||
54 | * FUTURE: look into how to use dcache to allow multi-stage | ||
55 | * walks in Plan 9 & potential for better dcache operation which | ||
56 | * would remain valid for Plan 9 semantics. Older versions | ||
57 | * had validation via stat for those interested. However, since | ||
58 | * stat has the same approximate overhead as walk there really | ||
59 | * is no difference. The only improvement would be from a | ||
60 | * time-decay cache like NFS has and that undermines the | ||
61 | * synchronous nature of 9P2000. | ||
62 | * | ||
63 | */ | ||
64 | |||
65 | static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd) | ||
66 | { | ||
67 | struct dentry *dc = current->fs->pwd; | ||
68 | |||
69 | dprintk(DEBUG_VFS, "dentry: %s (%p)\n", dentry->d_iname, dentry); | ||
70 | if (v9fs_fid_lookup(dentry, FID_OP)) { | ||
71 | dprintk(DEBUG_VFS, "VALID\n"); | ||
72 | return 1; | ||
73 | } | ||
74 | |||
75 | while (dc != NULL) { | ||
76 | if (dc == dentry) { | ||
77 | dprintk(DEBUG_VFS, "VALID\n"); | ||
78 | return 1; | ||
79 | } | ||
80 | if (dc == dc->d_parent) | ||
81 | break; | ||
82 | |||
83 | dc = dc->d_parent; | ||
84 | } | ||
85 | |||
86 | dprintk(DEBUG_VFS, "INVALID\n"); | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | /** | ||
91 | * v9fs_dentry_release - called when dentry is going to be freed | ||
92 | * @dentry: dentry that is being release | ||
93 | * | ||
94 | */ | ||
95 | |||
96 | void v9fs_dentry_release(struct dentry *dentry) | ||
97 | { | ||
98 | dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); | ||
99 | |||
100 | if (dentry->d_fsdata != NULL) { | ||
101 | struct list_head *fid_list = dentry->d_fsdata; | ||
102 | struct v9fs_fid *temp = NULL; | ||
103 | struct v9fs_fid *current_fid = NULL; | ||
104 | struct v9fs_fcall *fcall = NULL; | ||
105 | |||
106 | list_for_each_entry_safe(current_fid, temp, fid_list, list) { | ||
107 | if (v9fs_t_clunk | ||
108 | (current_fid->v9ses, current_fid->fid, &fcall)) | ||
109 | dprintk(DEBUG_ERROR, "clunk failed: %s\n", | ||
110 | FCALL_ERROR(fcall)); | ||
111 | |||
112 | v9fs_put_idpool(current_fid->fid, | ||
113 | ¤t_fid->v9ses->fidpool); | ||
114 | |||
115 | kfree(fcall); | ||
116 | v9fs_fid_destroy(current_fid); | ||
117 | } | ||
118 | |||
119 | kfree(dentry->d_fsdata); /* free the list_head */ | ||
120 | } | ||
121 | } | ||
122 | |||
123 | struct dentry_operations v9fs_dentry_operations = { | ||
124 | .d_revalidate = v9fs_dentry_validate, | ||
125 | .d_release = v9fs_dentry_release, | ||
126 | }; | ||
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c new file mode 100644 index 000000000000..c478a7384186 --- /dev/null +++ b/fs/9p/vfs_dir.c | |||
@@ -0,0 +1,226 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/vfs_dir.c | ||
3 | * | ||
4 | * This file contains vfs directory ops for the 9P2000 protocol. | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to: | ||
21 | * Free Software Foundation | ||
22 | * 51 Franklin Street, Fifth Floor | ||
23 | * Boston, MA 02111-1301 USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/module.h> | ||
28 | #include <linux/errno.h> | ||
29 | #include <linux/fs.h> | ||
30 | #include <linux/file.h> | ||
31 | #include <linux/stat.h> | ||
32 | #include <linux/string.h> | ||
33 | #include <linux/smp_lock.h> | ||
34 | #include <linux/inet.h> | ||
35 | #include <linux/idr.h> | ||
36 | |||
37 | #include "debug.h" | ||
38 | #include "v9fs.h" | ||
39 | #include "9p.h" | ||
40 | #include "v9fs_vfs.h" | ||
41 | #include "conv.h" | ||
42 | #include "fid.h" | ||
43 | |||
44 | /** | ||
45 | * dt_type - return file type | ||
46 | * @mistat: mistat structure | ||
47 | * | ||
48 | */ | ||
49 | |||
50 | static inline int dt_type(struct v9fs_stat *mistat) | ||
51 | { | ||
52 | unsigned long perm = mistat->mode; | ||
53 | int rettype = DT_REG; | ||
54 | |||
55 | if (perm & V9FS_DMDIR) | ||
56 | rettype = DT_DIR; | ||
57 | if (perm & V9FS_DMSYMLINK) | ||
58 | rettype = DT_LNK; | ||
59 | |||
60 | return rettype; | ||
61 | } | ||
62 | |||
63 | /** | ||
64 | * v9fs_dir_readdir - read a directory | ||
65 | * @filep: opened file structure | ||
66 | * @dirent: directory structure ??? | ||
67 | * @filldir: function to populate directory structure ??? | ||
68 | * | ||
69 | */ | ||
70 | |||
71 | static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
72 | { | ||
73 | struct v9fs_fcall *fcall = NULL; | ||
74 | struct inode *inode = filp->f_dentry->d_inode; | ||
75 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); | ||
76 | struct v9fs_fid *file = filp->private_data; | ||
77 | unsigned int i, n; | ||
78 | int fid = -1; | ||
79 | int ret = 0; | ||
80 | struct v9fs_stat *mi = NULL; | ||
81 | int over = 0; | ||
82 | |||
83 | dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name); | ||
84 | |||
85 | fid = file->fid; | ||
86 | |||
87 | mi = kmalloc(v9ses->maxdata, GFP_KERNEL); | ||
88 | if (!mi) | ||
89 | return -ENOMEM; | ||
90 | |||
91 | if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) { | ||
92 | kfree(file->rdir_fcall); | ||
93 | file->rdir_fcall = NULL; | ||
94 | } | ||
95 | |||
96 | if (file->rdir_fcall) { | ||
97 | n = file->rdir_fcall->params.rread.count; | ||
98 | i = file->rdir_fpos; | ||
99 | while (i < n) { | ||
100 | int s = v9fs_deserialize_stat(v9ses, | ||
101 | file->rdir_fcall->params.rread.data + i, | ||
102 | n - i, mi, v9ses->maxdata); | ||
103 | |||
104 | if (s == 0) { | ||
105 | dprintk(DEBUG_ERROR, | ||
106 | "error while deserializing mistat\n"); | ||
107 | ret = -EIO; | ||
108 | goto FreeStructs; | ||
109 | } | ||
110 | |||
111 | over = filldir(dirent, mi->name, strlen(mi->name), | ||
112 | filp->f_pos, v9fs_qid2ino(&mi->qid), | ||
113 | dt_type(mi)); | ||
114 | |||
115 | if (over) { | ||
116 | file->rdir_fpos = i; | ||
117 | file->rdir_pos = filp->f_pos; | ||
118 | break; | ||
119 | } | ||
120 | |||
121 | i += s; | ||
122 | filp->f_pos += s; | ||
123 | } | ||
124 | |||
125 | if (!over) { | ||
126 | kfree(file->rdir_fcall); | ||
127 | file->rdir_fcall = NULL; | ||
128 | } | ||
129 | } | ||
130 | |||
131 | while (!over) { | ||
132 | ret = v9fs_t_read(v9ses, fid, filp->f_pos, | ||
133 | v9ses->maxdata-V9FS_IOHDRSZ, &fcall); | ||
134 | if (ret < 0) { | ||
135 | dprintk(DEBUG_ERROR, "error while reading: %d: %p\n", | ||
136 | ret, fcall); | ||
137 | goto FreeStructs; | ||
138 | } else if (ret == 0) | ||
139 | break; | ||
140 | |||
141 | n = ret; | ||
142 | i = 0; | ||
143 | while (i < n) { | ||
144 | int s = v9fs_deserialize_stat(v9ses, | ||
145 | fcall->params.rread.data + i, n - i, mi, | ||
146 | v9ses->maxdata); | ||
147 | |||
148 | if (s == 0) { | ||
149 | dprintk(DEBUG_ERROR, | ||
150 | "error while deserializing mistat\n"); | ||
151 | return -EIO; | ||
152 | } | ||
153 | |||
154 | over = filldir(dirent, mi->name, strlen(mi->name), | ||
155 | filp->f_pos, v9fs_qid2ino(&mi->qid), | ||
156 | dt_type(mi)); | ||
157 | |||
158 | if (over) { | ||
159 | file->rdir_fcall = fcall; | ||
160 | file->rdir_fpos = i; | ||
161 | file->rdir_pos = filp->f_pos; | ||
162 | fcall = NULL; | ||
163 | break; | ||
164 | } | ||
165 | |||
166 | i += s; | ||
167 | filp->f_pos += s; | ||
168 | } | ||
169 | |||
170 | kfree(fcall); | ||
171 | } | ||
172 | |||
173 | FreeStructs: | ||
174 | kfree(fcall); | ||
175 | kfree(mi); | ||
176 | return ret; | ||
177 | } | ||
178 | |||
179 | /** | ||
180 | * v9fs_dir_release - close a directory | ||
181 | * @inode: inode of the directory | ||
182 | * @filp: file pointer to a directory | ||
183 | * | ||
184 | */ | ||
185 | |||
186 | int v9fs_dir_release(struct inode *inode, struct file *filp) | ||
187 | { | ||
188 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); | ||
189 | struct v9fs_fid *fid = filp->private_data; | ||
190 | int fidnum = -1; | ||
191 | |||
192 | dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp, | ||
193 | fid->fid); | ||
194 | fidnum = fid->fid; | ||
195 | |||
196 | filemap_fdatawrite(inode->i_mapping); | ||
197 | filemap_fdatawait(inode->i_mapping); | ||
198 | |||
199 | if (fidnum >= 0) { | ||
200 | fid->fidopen--; | ||
201 | dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen, | ||
202 | fid->fid); | ||
203 | |||
204 | if (fid->fidopen == 0) { | ||
205 | if (v9fs_t_clunk(v9ses, fidnum, NULL)) | ||
206 | dprintk(DEBUG_ERROR, "clunk failed\n"); | ||
207 | |||
208 | v9fs_put_idpool(fid->fid, &v9ses->fidpool); | ||
209 | } | ||
210 | |||
211 | kfree(fid->rdir_fcall); | ||
212 | |||
213 | filp->private_data = NULL; | ||
214 | v9fs_fid_destroy(fid); | ||
215 | } | ||
216 | |||
217 | d_drop(filp->f_dentry); | ||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | struct file_operations v9fs_dir_operations = { | ||
222 | .read = generic_read_dir, | ||
223 | .readdir = v9fs_dir_readdir, | ||
224 | .open = v9fs_file_open, | ||
225 | .release = v9fs_dir_release, | ||
226 | }; | ||
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c new file mode 100644 index 000000000000..1f8ae7d580ab --- /dev/null +++ b/fs/9p/vfs_file.c | |||
@@ -0,0 +1,401 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/vfs_file.c | ||
3 | * | ||
4 | * This file contians vfs file ops for 9P2000. | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to: | ||
21 | * Free Software Foundation | ||
22 | * 51 Franklin Street, Fifth Floor | ||
23 | * Boston, MA 02111-1301 USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/module.h> | ||
28 | #include <linux/errno.h> | ||
29 | #include <linux/fs.h> | ||
30 | #include <linux/file.h> | ||
31 | #include <linux/stat.h> | ||
32 | #include <linux/string.h> | ||
33 | #include <linux/smp_lock.h> | ||
34 | #include <linux/inet.h> | ||
35 | #include <linux/version.h> | ||
36 | #include <linux/list.h> | ||
37 | #include <asm/uaccess.h> | ||
38 | #include <linux/idr.h> | ||
39 | |||
40 | #include "debug.h" | ||
41 | #include "v9fs.h" | ||
42 | #include "9p.h" | ||
43 | #include "v9fs_vfs.h" | ||
44 | #include "fid.h" | ||
45 | |||
46 | /** | ||
47 | * v9fs_file_open - open a file (or directory) | ||
48 | * @inode: inode to be opened | ||
49 | * @file: file being opened | ||
50 | * | ||
51 | */ | ||
52 | |||
53 | int v9fs_file_open(struct inode *inode, struct file *file) | ||
54 | { | ||
55 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); | ||
56 | struct v9fs_fid *v9fid = v9fs_fid_lookup(file->f_dentry, FID_WALK); | ||
57 | struct v9fs_fid *v9newfid = NULL; | ||
58 | struct v9fs_fcall *fcall = NULL; | ||
59 | int open_mode = 0; | ||
60 | unsigned int iounit = 0; | ||
61 | int newfid = -1; | ||
62 | long result = -1; | ||
63 | |||
64 | dprintk(DEBUG_VFS, "inode: %p file: %p v9fid= %p\n", inode, file, | ||
65 | v9fid); | ||
66 | |||
67 | if (!v9fid) { | ||
68 | struct dentry *dentry = file->f_dentry; | ||
69 | dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n"); | ||
70 | |||
71 | /* XXX - some duplication from lookup, generalize later */ | ||
72 | /* basically vfs_lookup is too heavy weight */ | ||
73 | v9fid = v9fs_fid_lookup(file->f_dentry, FID_OP); | ||
74 | if (!v9fid) | ||
75 | return -EBADF; | ||
76 | |||
77 | v9fid = v9fs_fid_lookup(dentry->d_parent, FID_WALK); | ||
78 | if (!v9fid) | ||
79 | return -EBADF; | ||
80 | |||
81 | newfid = v9fs_get_idpool(&v9ses->fidpool); | ||
82 | if (newfid < 0) { | ||
83 | eprintk(KERN_WARNING, "newfid fails!\n"); | ||
84 | return -ENOSPC; | ||
85 | } | ||
86 | |||
87 | result = | ||
88 | v9fs_t_walk(v9ses, v9fid->fid, newfid, | ||
89 | (char *)file->f_dentry->d_name.name, NULL); | ||
90 | if (result < 0) { | ||
91 | v9fs_put_idpool(newfid, &v9ses->fidpool); | ||
92 | dprintk(DEBUG_ERROR, "rewalk didn't work\n"); | ||
93 | return -EBADF; | ||
94 | } | ||
95 | |||
96 | v9fid = v9fs_fid_create(dentry); | ||
97 | if (v9fid == NULL) { | ||
98 | dprintk(DEBUG_ERROR, "couldn't insert\n"); | ||
99 | return -ENOMEM; | ||
100 | } | ||
101 | v9fid->fid = newfid; | ||
102 | } | ||
103 | |||
104 | if (v9fid->fidcreate) { | ||
105 | /* create case */ | ||
106 | newfid = v9fid->fid; | ||
107 | iounit = v9fid->iounit; | ||
108 | v9fid->fidcreate = 0; | ||
109 | } else { | ||
110 | if (!S_ISDIR(inode->i_mode)) | ||
111 | newfid = v9fid->fid; | ||
112 | else { | ||
113 | newfid = v9fs_get_idpool(&v9ses->fidpool); | ||
114 | if (newfid < 0) { | ||
115 | eprintk(KERN_WARNING, "allocation failed\n"); | ||
116 | return -ENOSPC; | ||
117 | } | ||
118 | /* This would be a somewhat critical clone */ | ||
119 | result = | ||
120 | v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL, | ||
121 | &fcall); | ||
122 | if (result < 0) { | ||
123 | dprintk(DEBUG_ERROR, "clone error: %s\n", | ||
124 | FCALL_ERROR(fcall)); | ||
125 | kfree(fcall); | ||
126 | return result; | ||
127 | } | ||
128 | |||
129 | v9newfid = v9fs_fid_create(file->f_dentry); | ||
130 | v9newfid->fid = newfid; | ||
131 | v9newfid->qid = v9fid->qid; | ||
132 | v9newfid->iounit = v9fid->iounit; | ||
133 | v9newfid->fidopen = 0; | ||
134 | v9newfid->fidclunked = 0; | ||
135 | v9newfid->v9ses = v9ses; | ||
136 | v9fid = v9newfid; | ||
137 | kfree(fcall); | ||
138 | } | ||
139 | |||
140 | /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */ | ||
141 | /* translate open mode appropriately */ | ||
142 | open_mode = file->f_flags & 0x3; | ||
143 | |||
144 | if (file->f_flags & O_EXCL) | ||
145 | open_mode |= V9FS_OEXCL; | ||
146 | |||
147 | if (v9ses->extended) { | ||
148 | if (file->f_flags & O_TRUNC) | ||
149 | open_mode |= V9FS_OTRUNC; | ||
150 | |||
151 | if (file->f_flags & O_APPEND) | ||
152 | open_mode |= V9FS_OAPPEND; | ||
153 | } | ||
154 | |||
155 | result = v9fs_t_open(v9ses, newfid, open_mode, &fcall); | ||
156 | if (result < 0) { | ||
157 | dprintk(DEBUG_ERROR, | ||
158 | "open failed, open_mode 0x%x: %s\n", open_mode, | ||
159 | FCALL_ERROR(fcall)); | ||
160 | kfree(fcall); | ||
161 | return result; | ||
162 | } | ||
163 | |||
164 | iounit = fcall->params.ropen.iounit; | ||
165 | kfree(fcall); | ||
166 | } | ||
167 | |||
168 | |||
169 | file->private_data = v9fid; | ||
170 | |||
171 | v9fid->rdir_pos = 0; | ||
172 | v9fid->rdir_fcall = NULL; | ||
173 | v9fid->fidopen = 1; | ||
174 | v9fid->filp = file; | ||
175 | v9fid->iounit = iounit; | ||
176 | |||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | /** | ||
181 | * v9fs_file_lock - lock a file (or directory) | ||
182 | * @inode: inode to be opened | ||
183 | * @file: file being opened | ||
184 | * | ||
185 | * XXX - this looks like a local only lock, we should extend into 9P | ||
186 | * by using open exclusive | ||
187 | */ | ||
188 | |||
189 | static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) | ||
190 | { | ||
191 | int res = 0; | ||
192 | struct inode *inode = filp->f_dentry->d_inode; | ||
193 | |||
194 | dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); | ||
195 | |||
196 | /* No mandatory locks */ | ||
197 | if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) | ||
198 | return -ENOLCK; | ||
199 | |||
200 | if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { | ||
201 | filemap_fdatawrite(inode->i_mapping); | ||
202 | filemap_fdatawait(inode->i_mapping); | ||
203 | invalidate_inode_pages(&inode->i_data); | ||
204 | } | ||
205 | |||
206 | return res; | ||
207 | } | ||
208 | |||
209 | /** | ||
210 | * v9fs_read - read from a file (internal) | ||
211 | * @filep: file pointer to read | ||
212 | * @data: data buffer to read data into | ||
213 | * @count: size of buffer | ||
214 | * @offset: offset at which to read data | ||
215 | * | ||
216 | */ | ||
217 | |||
218 | static ssize_t | ||
219 | v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) | ||
220 | { | ||
221 | struct inode *inode = filp->f_dentry->d_inode; | ||
222 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); | ||
223 | struct v9fs_fid *v9f = filp->private_data; | ||
224 | struct v9fs_fcall *fcall = NULL; | ||
225 | int fid = v9f->fid; | ||
226 | int rsize = 0; | ||
227 | int result = 0; | ||
228 | int total = 0; | ||
229 | |||
230 | dprintk(DEBUG_VFS, "\n"); | ||
231 | |||
232 | rsize = v9ses->maxdata - V9FS_IOHDRSZ; | ||
233 | if (v9f->iounit != 0 && rsize > v9f->iounit) | ||
234 | rsize = v9f->iounit; | ||
235 | |||
236 | do { | ||
237 | if (count < rsize) | ||
238 | rsize = count; | ||
239 | |||
240 | result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall); | ||
241 | |||
242 | if (result < 0) { | ||
243 | printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n", | ||
244 | result); | ||
245 | |||
246 | kfree(fcall); | ||
247 | return total; | ||
248 | } else | ||
249 | *offset += result; | ||
250 | |||
251 | /* XXX - extra copy */ | ||
252 | memcpy(buffer, fcall->params.rread.data, result); | ||
253 | count -= result; | ||
254 | buffer += result; | ||
255 | total += result; | ||
256 | |||
257 | kfree(fcall); | ||
258 | |||
259 | if (result < rsize) | ||
260 | break; | ||
261 | } while (count); | ||
262 | |||
263 | return total; | ||
264 | } | ||
265 | |||
266 | /** | ||
267 | * v9fs_file_read - read from a file | ||
268 | * @filep: file pointer to read | ||
269 | * @data: data buffer to read data into | ||
270 | * @count: size of buffer | ||
271 | * @offset: offset at which to read data | ||
272 | * | ||
273 | */ | ||
274 | |||
275 | static ssize_t | ||
276 | v9fs_file_read(struct file *filp, char __user * data, size_t count, | ||
277 | loff_t * offset) | ||
278 | { | ||
279 | int retval = -1; | ||
280 | int ret = 0; | ||
281 | char *buffer; | ||
282 | |||
283 | buffer = kmalloc(count, GFP_KERNEL); | ||
284 | if (!buffer) | ||
285 | return -ENOMEM; | ||
286 | |||
287 | retval = v9fs_read(filp, buffer, count, offset); | ||
288 | if (retval > 0) { | ||
289 | if ((ret = copy_to_user(data, buffer, retval)) != 0) { | ||
290 | dprintk(DEBUG_ERROR, "Problem copying to user %d\n", | ||
291 | ret); | ||
292 | retval = ret; | ||
293 | } | ||
294 | } | ||
295 | |||
296 | kfree(buffer); | ||
297 | |||
298 | return retval; | ||
299 | } | ||
300 | |||
301 | /** | ||
302 | * v9fs_write - write to a file | ||
303 | * @filep: file pointer to write | ||
304 | * @data: data buffer to write data from | ||
305 | * @count: size of buffer | ||
306 | * @offset: offset at which to write data | ||
307 | * | ||
308 | */ | ||
309 | |||
310 | static ssize_t | ||
311 | v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset) | ||
312 | { | ||
313 | struct inode *inode = filp->f_dentry->d_inode; | ||
314 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); | ||
315 | struct v9fs_fid *v9fid = filp->private_data; | ||
316 | struct v9fs_fcall *fcall; | ||
317 | int fid = v9fid->fid; | ||
318 | int result = -EIO; | ||
319 | int rsize = 0; | ||
320 | int total = 0; | ||
321 | |||
322 | dprintk(DEBUG_VFS, "data %p count %d offset %x\n", buffer, (int)count, | ||
323 | (int)*offset); | ||
324 | rsize = v9ses->maxdata - V9FS_IOHDRSZ; | ||
325 | if (v9fid->iounit != 0 && rsize > v9fid->iounit) | ||
326 | rsize = v9fid->iounit; | ||
327 | |||
328 | dump_data(buffer, count); | ||
329 | |||
330 | do { | ||
331 | if (count < rsize) | ||
332 | rsize = count; | ||
333 | |||
334 | result = | ||
335 | v9fs_t_write(v9ses, fid, *offset, rsize, buffer, &fcall); | ||
336 | if (result < 0) { | ||
337 | eprintk(KERN_ERR, "error while writing: %s(%d)\n", | ||
338 | FCALL_ERROR(fcall), result); | ||
339 | kfree(fcall); | ||
340 | return result; | ||
341 | } else | ||
342 | *offset += result; | ||
343 | |||
344 | kfree(fcall); | ||
345 | |||
346 | if (result != rsize) { | ||
347 | eprintk(KERN_ERR, | ||
348 | "short write: v9fs_t_write returned %d\n", | ||
349 | result); | ||
350 | break; | ||
351 | } | ||
352 | |||
353 | count -= result; | ||
354 | buffer += result; | ||
355 | total += result; | ||
356 | } while (count); | ||
357 | |||
358 | return total; | ||
359 | } | ||
360 | |||
361 | /** | ||
362 | * v9fs_file_write - write to a file | ||
363 | * @filep: file pointer to write | ||
364 | * @data: data buffer to write data from | ||
365 | * @count: size of buffer | ||
366 | * @offset: offset at which to write data | ||
367 | * | ||
368 | */ | ||
369 | |||
370 | static ssize_t | ||
371 | v9fs_file_write(struct file *filp, const char __user * data, | ||
372 | size_t count, loff_t * offset) | ||
373 | { | ||
374 | int ret = -1; | ||
375 | char *buffer; | ||
376 | |||
377 | buffer = kmalloc(count, GFP_KERNEL); | ||
378 | if (buffer == NULL) | ||
379 | return -ENOMEM; | ||
380 | |||
381 | ret = copy_from_user(buffer, data, count); | ||
382 | if (ret) { | ||
383 | dprintk(DEBUG_ERROR, "Problem copying from user\n"); | ||
384 | ret = -EFAULT; | ||
385 | } else { | ||
386 | ret = v9fs_write(filp, buffer, count, offset); | ||
387 | } | ||
388 | |||
389 | kfree(buffer); | ||
390 | |||
391 | return ret; | ||
392 | } | ||
393 | |||
394 | struct file_operations v9fs_file_operations = { | ||
395 | .llseek = generic_file_llseek, | ||
396 | .read = v9fs_file_read, | ||
397 | .write = v9fs_file_write, | ||
398 | .open = v9fs_file_open, | ||
399 | .release = v9fs_dir_release, | ||
400 | .lock = v9fs_file_lock, | ||
401 | }; | ||
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c new file mode 100644 index 000000000000..0c13fc600049 --- /dev/null +++ b/fs/9p/vfs_inode.c | |||
@@ -0,0 +1,1338 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/vfs_inode.c | ||
3 | * | ||
4 | * This file contains vfs inode ops for the 9P2000 protocol. | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to: | ||
21 | * Free Software Foundation | ||
22 | * 51 Franklin Street, Fifth Floor | ||
23 | * Boston, MA 02111-1301 USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/module.h> | ||
28 | #include <linux/errno.h> | ||
29 | #include <linux/fs.h> | ||
30 | #include <linux/file.h> | ||
31 | #include <linux/pagemap.h> | ||
32 | #include <linux/stat.h> | ||
33 | #include <linux/string.h> | ||
34 | #include <linux/smp_lock.h> | ||
35 | #include <linux/inet.h> | ||
36 | #include <linux/namei.h> | ||
37 | #include <linux/idr.h> | ||
38 | |||
39 | #include "debug.h" | ||
40 | #include "v9fs.h" | ||
41 | #include "9p.h" | ||
42 | #include "v9fs_vfs.h" | ||
43 | #include "conv.h" | ||
44 | #include "fid.h" | ||
45 | |||
46 | static struct inode_operations v9fs_dir_inode_operations; | ||
47 | static struct inode_operations v9fs_dir_inode_operations_ext; | ||
48 | static struct inode_operations v9fs_file_inode_operations; | ||
49 | static struct inode_operations v9fs_symlink_inode_operations; | ||
50 | |||
51 | /** | ||
52 | * unixmode2p9mode - convert unix mode bits to plan 9 | ||
53 | * @v9ses: v9fs session information | ||
54 | * @mode: mode to convert | ||
55 | * | ||
56 | */ | ||
57 | |||
58 | static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) | ||
59 | { | ||
60 | int res; | ||
61 | res = mode & 0777; | ||
62 | if (S_ISDIR(mode)) | ||
63 | res |= V9FS_DMDIR; | ||
64 | if (v9ses->extended) { | ||
65 | if (S_ISLNK(mode)) | ||
66 | res |= V9FS_DMSYMLINK; | ||
67 | if (v9ses->nodev == 0) { | ||
68 | if (S_ISSOCK(mode)) | ||
69 | res |= V9FS_DMSOCKET; | ||
70 | if (S_ISFIFO(mode)) | ||
71 | res |= V9FS_DMNAMEDPIPE; | ||
72 | if (S_ISBLK(mode)) | ||
73 | res |= V9FS_DMDEVICE; | ||
74 | if (S_ISCHR(mode)) | ||
75 | res |= V9FS_DMDEVICE; | ||
76 | } | ||
77 | |||
78 | if ((mode & S_ISUID) == S_ISUID) | ||
79 | res |= V9FS_DMSETUID; | ||
80 | if ((mode & S_ISGID) == S_ISGID) | ||
81 | res |= V9FS_DMSETGID; | ||
82 | if ((mode & V9FS_DMLINK)) | ||
83 | res |= V9FS_DMLINK; | ||
84 | } | ||
85 | |||
86 | return res; | ||
87 | } | ||
88 | |||
89 | /** | ||
90 | * p9mode2unixmode- convert plan9 mode bits to unix mode bits | ||
91 | * @v9ses: v9fs session information | ||
92 | * @mode: mode to convert | ||
93 | * | ||
94 | */ | ||
95 | |||
96 | static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) | ||
97 | { | ||
98 | int res; | ||
99 | |||
100 | res = mode & 0777; | ||
101 | |||
102 | if ((mode & V9FS_DMDIR) == V9FS_DMDIR) | ||
103 | res |= S_IFDIR; | ||
104 | else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended)) | ||
105 | res |= S_IFLNK; | ||
106 | else if ((mode & V9FS_DMSOCKET) && (v9ses->extended) | ||
107 | && (v9ses->nodev == 0)) | ||
108 | res |= S_IFSOCK; | ||
109 | else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended) | ||
110 | && (v9ses->nodev == 0)) | ||
111 | res |= S_IFIFO; | ||
112 | else if ((mode & V9FS_DMDEVICE) && (v9ses->extended) | ||
113 | && (v9ses->nodev == 0)) | ||
114 | res |= S_IFBLK; | ||
115 | else | ||
116 | res |= S_IFREG; | ||
117 | |||
118 | if (v9ses->extended) { | ||
119 | if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID) | ||
120 | res |= S_ISUID; | ||
121 | |||
122 | if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID) | ||
123 | res |= S_ISGID; | ||
124 | } | ||
125 | |||
126 | return res; | ||
127 | } | ||
128 | |||
129 | /** | ||
130 | * v9fs_blank_mistat - helper function to setup a 9P stat structure | ||
131 | * @v9ses: 9P session info (for determining extended mode) | ||
132 | * @mistat: structure to initialize | ||
133 | * | ||
134 | */ | ||
135 | |||
136 | static void | ||
137 | v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat) | ||
138 | { | ||
139 | mistat->type = ~0; | ||
140 | mistat->dev = ~0; | ||
141 | mistat->qid.type = ~0; | ||
142 | mistat->qid.version = ~0; | ||
143 | *((long long *)&mistat->qid.path) = ~0; | ||
144 | mistat->mode = ~0; | ||
145 | mistat->atime = ~0; | ||
146 | mistat->mtime = ~0; | ||
147 | mistat->length = ~0; | ||
148 | mistat->name = mistat->data; | ||
149 | mistat->uid = mistat->data; | ||
150 | mistat->gid = mistat->data; | ||
151 | mistat->muid = mistat->data; | ||
152 | if (v9ses->extended) { | ||
153 | mistat->n_uid = ~0; | ||
154 | mistat->n_gid = ~0; | ||
155 | mistat->n_muid = ~0; | ||
156 | mistat->extension = mistat->data; | ||
157 | } | ||
158 | *mistat->data = 0; | ||
159 | } | ||
160 | |||
161 | /** | ||
162 | * v9fs_mistat2unix - convert mistat to unix stat | ||
163 | * @mistat: Plan 9 metadata (mistat) structure | ||
164 | * @buf: unix metadata (stat) structure to populate | ||
165 | * @sb: superblock | ||
166 | * | ||
167 | */ | ||
168 | |||
169 | static void | ||
170 | v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf, | ||
171 | struct super_block *sb) | ||
172 | { | ||
173 | struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL; | ||
174 | |||
175 | buf->st_nlink = 1; | ||
176 | |||
177 | buf->st_atime = mistat->atime; | ||
178 | buf->st_mtime = mistat->mtime; | ||
179 | buf->st_ctime = mistat->mtime; | ||
180 | |||
181 | buf->st_uid = (unsigned short)-1; | ||
182 | buf->st_gid = (unsigned short)-1; | ||
183 | |||
184 | if (v9ses && v9ses->extended) { | ||
185 | /* TODO: string to uid mapping via user-space daemon */ | ||
186 | if (mistat->n_uid != -1) | ||
187 | sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid); | ||
188 | |||
189 | if (mistat->n_gid != -1) | ||
190 | sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid); | ||
191 | } | ||
192 | |||
193 | if (buf->st_uid == (unsigned short)-1) | ||
194 | buf->st_uid = v9ses->uid; | ||
195 | if (buf->st_gid == (unsigned short)-1) | ||
196 | buf->st_gid = v9ses->gid; | ||
197 | |||
198 | buf->st_mode = p9mode2unixmode(v9ses, mistat->mode); | ||
199 | if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) { | ||
200 | char type = 0; | ||
201 | int major = -1; | ||
202 | int minor = -1; | ||
203 | sscanf(mistat->extension, "%c %u %u", &type, &major, &minor); | ||
204 | switch (type) { | ||
205 | case 'c': | ||
206 | buf->st_mode &= ~S_IFBLK; | ||
207 | buf->st_mode |= S_IFCHR; | ||
208 | break; | ||
209 | case 'b': | ||
210 | break; | ||
211 | default: | ||
212 | dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n", | ||
213 | type, mistat->extension); | ||
214 | }; | ||
215 | buf->st_rdev = MKDEV(major, minor); | ||
216 | } else | ||
217 | buf->st_rdev = 0; | ||
218 | |||
219 | buf->st_size = mistat->length; | ||
220 | |||
221 | buf->st_blksize = sb->s_blocksize; | ||
222 | buf->st_blocks = | ||
223 | (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits; | ||
224 | } | ||
225 | |||
226 | /** | ||
227 | * v9fs_get_inode - helper function to setup an inode | ||
228 | * @sb: superblock | ||
229 | * @mode: mode to setup inode with | ||
230 | * | ||
231 | */ | ||
232 | |||
233 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) | ||
234 | { | ||
235 | struct inode *inode = NULL; | ||
236 | struct v9fs_session_info *v9ses = sb->s_fs_info; | ||
237 | |||
238 | dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); | ||
239 | |||
240 | inode = new_inode(sb); | ||
241 | if (inode) { | ||
242 | inode->i_mode = mode; | ||
243 | inode->i_uid = current->fsuid; | ||
244 | inode->i_gid = current->fsgid; | ||
245 | inode->i_blksize = sb->s_blocksize; | ||
246 | inode->i_blocks = 0; | ||
247 | inode->i_rdev = 0; | ||
248 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
249 | |||
250 | switch (mode & S_IFMT) { | ||
251 | case S_IFIFO: | ||
252 | case S_IFBLK: | ||
253 | case S_IFCHR: | ||
254 | case S_IFSOCK: | ||
255 | if(!v9ses->extended) { | ||
256 | dprintk(DEBUG_ERROR, "special files without extended mode\n"); | ||
257 | return ERR_PTR(-EINVAL); | ||
258 | } | ||
259 | init_special_inode(inode, inode->i_mode, | ||
260 | inode->i_rdev); | ||
261 | break; | ||
262 | case S_IFREG: | ||
263 | inode->i_op = &v9fs_file_inode_operations; | ||
264 | inode->i_fop = &v9fs_file_operations; | ||
265 | break; | ||
266 | case S_IFLNK: | ||
267 | if(!v9ses->extended) { | ||
268 | dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n"); | ||
269 | return ERR_PTR(-EINVAL); | ||
270 | } | ||
271 | inode->i_op = &v9fs_symlink_inode_operations; | ||
272 | break; | ||
273 | case S_IFDIR: | ||
274 | inode->i_nlink++; | ||
275 | if(v9ses->extended) | ||
276 | inode->i_op = &v9fs_dir_inode_operations_ext; | ||
277 | else | ||
278 | inode->i_op = &v9fs_dir_inode_operations; | ||
279 | inode->i_fop = &v9fs_dir_operations; | ||
280 | break; | ||
281 | default: | ||
282 | dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", | ||
283 | mode, mode & S_IFMT); | ||
284 | return ERR_PTR(-EINVAL); | ||
285 | } | ||
286 | } else { | ||
287 | eprintk(KERN_WARNING, "Problem allocating inode\n"); | ||
288 | return ERR_PTR(-ENOMEM); | ||
289 | } | ||
290 | return inode; | ||
291 | } | ||
292 | |||
293 | /** | ||
294 | * v9fs_create - helper function to create files and directories | ||
295 | * @dir: directory inode file is being created in | ||
296 | * @file_dentry: dentry file is being created in | ||
297 | * @perm: permissions file is being created with | ||
298 | * @open_mode: resulting open mode for file | ||
299 | * | ||
300 | */ | ||
301 | |||
302 | static int | ||
303 | v9fs_create(struct inode *dir, | ||
304 | struct dentry *file_dentry, | ||
305 | unsigned int perm, unsigned int open_mode) | ||
306 | { | ||
307 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); | ||
308 | struct super_block *sb = dir->i_sb; | ||
309 | struct v9fs_fid *dirfid = | ||
310 | v9fs_fid_lookup(file_dentry->d_parent, FID_WALK); | ||
311 | struct v9fs_fid *fid = NULL; | ||
312 | struct inode *file_inode = NULL; | ||
313 | struct v9fs_fcall *fcall = NULL; | ||
314 | struct v9fs_qid qid; | ||
315 | struct stat newstat; | ||
316 | int dirfidnum = -1; | ||
317 | long newfid = -1; | ||
318 | int result = 0; | ||
319 | unsigned int iounit = 0; | ||
320 | |||
321 | perm = unixmode2p9mode(v9ses, perm); | ||
322 | |||
323 | dprintk(DEBUG_VFS, "dir: %p dentry: %p perm: %o mode: %o\n", dir, | ||
324 | file_dentry, perm, open_mode); | ||
325 | |||
326 | if (!dirfid) | ||
327 | return -EBADF; | ||
328 | |||
329 | dirfidnum = dirfid->fid; | ||
330 | if (dirfidnum < 0) { | ||
331 | dprintk(DEBUG_ERROR, "No fid for the directory #%lu\n", | ||
332 | dir->i_ino); | ||
333 | return -EBADF; | ||
334 | } | ||
335 | |||
336 | if (file_dentry->d_inode) { | ||
337 | dprintk(DEBUG_ERROR, | ||
338 | "Odd. There is an inode for dir %lu, name :%s:\n", | ||
339 | dir->i_ino, file_dentry->d_name.name); | ||
340 | return -EEXIST; | ||
341 | } | ||
342 | |||
343 | newfid = v9fs_get_idpool(&v9ses->fidpool); | ||
344 | if (newfid < 0) { | ||
345 | eprintk(KERN_WARNING, "no free fids available\n"); | ||
346 | return -ENOSPC; | ||
347 | } | ||
348 | |||
349 | result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall); | ||
350 | if (result < 0) { | ||
351 | dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); | ||
352 | v9fs_put_idpool(newfid, &v9ses->fidpool); | ||
353 | newfid = 0; | ||
354 | goto CleanUpFid; | ||
355 | } | ||
356 | |||
357 | kfree(fcall); | ||
358 | |||
359 | result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name, | ||
360 | perm, open_mode, &fcall); | ||
361 | if (result < 0) { | ||
362 | dprintk(DEBUG_ERROR, "create fails: %s(%d)\n", | ||
363 | FCALL_ERROR(fcall), result); | ||
364 | |||
365 | goto CleanUpFid; | ||
366 | } | ||
367 | |||
368 | iounit = fcall->params.rcreate.iounit; | ||
369 | qid = fcall->params.rcreate.qid; | ||
370 | kfree(fcall); | ||
371 | |||
372 | fid = v9fs_fid_create(file_dentry); | ||
373 | if (!fid) { | ||
374 | result = -ENOMEM; | ||
375 | goto CleanUpFid; | ||
376 | } | ||
377 | |||
378 | fid->fid = newfid; | ||
379 | fid->fidopen = 0; | ||
380 | fid->fidcreate = 1; | ||
381 | fid->qid = qid; | ||
382 | fid->iounit = iounit; | ||
383 | fid->rdir_pos = 0; | ||
384 | fid->rdir_fcall = NULL; | ||
385 | fid->v9ses = v9ses; | ||
386 | |||
387 | if ((perm & V9FS_DMSYMLINK) || (perm & V9FS_DMLINK) || | ||
388 | (perm & V9FS_DMNAMEDPIPE) || (perm & V9FS_DMSOCKET) || | ||
389 | (perm & V9FS_DMDEVICE)) | ||
390 | return 0; | ||
391 | |||
392 | result = v9fs_t_stat(v9ses, newfid, &fcall); | ||
393 | if (result < 0) { | ||
394 | dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall), | ||
395 | result); | ||
396 | goto CleanUpFid; | ||
397 | } | ||
398 | |||
399 | v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb); | ||
400 | |||
401 | file_inode = v9fs_get_inode(sb, newstat.st_mode); | ||
402 | if ((!file_inode) || IS_ERR(file_inode)) { | ||
403 | dprintk(DEBUG_ERROR, "create inode failed\n"); | ||
404 | result = -EBADF; | ||
405 | goto CleanUpFid; | ||
406 | } | ||
407 | |||
408 | v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb); | ||
409 | kfree(fcall); | ||
410 | d_instantiate(file_dentry, file_inode); | ||
411 | |||
412 | if (perm & V9FS_DMDIR) { | ||
413 | if (v9fs_t_clunk(v9ses, newfid, &fcall)) | ||
414 | dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n", | ||
415 | FCALL_ERROR(fcall)); | ||
416 | |||
417 | v9fs_put_idpool(newfid, &v9ses->fidpool); | ||
418 | kfree(fcall); | ||
419 | fid->fidopen = 0; | ||
420 | fid->fidcreate = 0; | ||
421 | d_drop(file_dentry); | ||
422 | } | ||
423 | |||
424 | return 0; | ||
425 | |||
426 | CleanUpFid: | ||
427 | kfree(fcall); | ||
428 | |||
429 | if (newfid) { | ||
430 | if (v9fs_t_clunk(v9ses, newfid, &fcall)) | ||
431 | dprintk(DEBUG_ERROR, "clunk failed: %s\n", | ||
432 | FCALL_ERROR(fcall)); | ||
433 | |||
434 | v9fs_put_idpool(newfid, &v9ses->fidpool); | ||
435 | kfree(fcall); | ||
436 | } | ||
437 | return result; | ||
438 | } | ||
439 | |||
440 | /** | ||
441 | * v9fs_remove - helper function to remove files and directories | ||
442 | * @dir: directory inode that is being deleted | ||
443 | * @file: dentry that is being deleted | ||
444 | * @rmdir: removing a directory | ||
445 | * | ||
446 | */ | ||
447 | |||
448 | static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) | ||
449 | { | ||
450 | struct v9fs_fcall *fcall = NULL; | ||
451 | struct super_block *sb = NULL; | ||
452 | struct v9fs_session_info *v9ses = NULL; | ||
453 | struct v9fs_fid *v9fid = NULL; | ||
454 | struct inode *file_inode = NULL; | ||
455 | int fid = -1; | ||
456 | int result = 0; | ||
457 | |||
458 | dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, | ||
459 | rmdir); | ||
460 | |||
461 | file_inode = file->d_inode; | ||
462 | sb = file_inode->i_sb; | ||
463 | v9ses = v9fs_inode2v9ses(file_inode); | ||
464 | v9fid = v9fs_fid_lookup(file, FID_OP); | ||
465 | |||
466 | if (!v9fid) { | ||
467 | dprintk(DEBUG_ERROR, | ||
468 | "no v9fs_fid\n"); | ||
469 | return -EBADF; | ||
470 | } | ||
471 | |||
472 | fid = v9fid->fid; | ||
473 | if (fid < 0) { | ||
474 | dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n", | ||
475 | file_inode->i_ino); | ||
476 | return -EBADF; | ||
477 | } | ||
478 | |||
479 | result = v9fs_t_remove(v9ses, fid, &fcall); | ||
480 | if (result < 0) | ||
481 | dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n", | ||
482 | FCALL_ERROR(fcall), result); | ||
483 | else { | ||
484 | v9fs_put_idpool(fid, &v9ses->fidpool); | ||
485 | v9fs_fid_destroy(v9fid); | ||
486 | } | ||
487 | |||
488 | kfree(fcall); | ||
489 | return result; | ||
490 | } | ||
491 | |||
492 | /** | ||
493 | * v9fs_vfs_create - VFS hook to create files | ||
494 | * @inode: directory inode that is being deleted | ||
495 | * @dentry: dentry that is being deleted | ||
496 | * @perm: create permissions | ||
497 | * @nd: path information | ||
498 | * | ||
499 | */ | ||
500 | |||
501 | static int | ||
502 | v9fs_vfs_create(struct inode *inode, struct dentry *dentry, int perm, | ||
503 | struct nameidata *nd) | ||
504 | { | ||
505 | return v9fs_create(inode, dentry, perm, O_RDWR); | ||
506 | } | ||
507 | |||
508 | /** | ||
509 | * v9fs_vfs_mkdir - VFS mkdir hook to create a directory | ||
510 | * @inode: inode that is being unlinked | ||
511 | * @dentry: dentry that is being unlinked | ||
512 | * @mode: mode for new directory | ||
513 | * | ||
514 | */ | ||
515 | |||
516 | static int v9fs_vfs_mkdir(struct inode *inode, struct dentry *dentry, int mode) | ||
517 | { | ||
518 | return v9fs_create(inode, dentry, mode | S_IFDIR, O_RDONLY); | ||
519 | } | ||
520 | |||
521 | /** | ||
522 | * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode | ||
523 | * @dir: inode that is being walked from | ||
524 | * @dentry: dentry that is being walked to? | ||
525 | * @nameidata: path data | ||
526 | * | ||
527 | */ | ||
528 | |||
529 | static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | ||
530 | struct nameidata *nameidata) | ||
531 | { | ||
532 | struct super_block *sb; | ||
533 | struct v9fs_session_info *v9ses; | ||
534 | struct v9fs_fid *dirfid; | ||
535 | struct v9fs_fid *fid; | ||
536 | struct inode *inode; | ||
537 | struct v9fs_fcall *fcall = NULL; | ||
538 | struct stat newstat; | ||
539 | int dirfidnum = -1; | ||
540 | int newfid = -1; | ||
541 | int result = 0; | ||
542 | |||
543 | dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", | ||
544 | dir, dentry->d_iname, dentry, nameidata); | ||
545 | |||
546 | sb = dir->i_sb; | ||
547 | v9ses = v9fs_inode2v9ses(dir); | ||
548 | dirfid = v9fs_fid_lookup(dentry->d_parent, FID_WALK); | ||
549 | |||
550 | if (!dirfid) { | ||
551 | dprintk(DEBUG_ERROR, "no dirfid\n"); | ||
552 | return ERR_PTR(-EINVAL); | ||
553 | } | ||
554 | |||
555 | dirfidnum = dirfid->fid; | ||
556 | |||
557 | if (dirfidnum < 0) { | ||
558 | dprintk(DEBUG_ERROR, "no dirfid for inode %p, #%lu\n", | ||
559 | dir, dir->i_ino); | ||
560 | return ERR_PTR(-EBADF); | ||
561 | } | ||
562 | |||
563 | newfid = v9fs_get_idpool(&v9ses->fidpool); | ||
564 | if (newfid < 0) { | ||
565 | eprintk(KERN_WARNING, "newfid fails!\n"); | ||
566 | return ERR_PTR(-ENOSPC); | ||
567 | } | ||
568 | |||
569 | result = | ||
570 | v9fs_t_walk(v9ses, dirfidnum, newfid, (char *)dentry->d_name.name, | ||
571 | NULL); | ||
572 | if (result < 0) { | ||
573 | v9fs_put_idpool(newfid, &v9ses->fidpool); | ||
574 | if (result == -ENOENT) { | ||
575 | d_add(dentry, NULL); | ||
576 | dprintk(DEBUG_ERROR, | ||
577 | "Return negative dentry %p count %d\n", | ||
578 | dentry, atomic_read(&dentry->d_count)); | ||
579 | return NULL; | ||
580 | } | ||
581 | dprintk(DEBUG_ERROR, "walk error:%d\n", result); | ||
582 | goto FreeFcall; | ||
583 | } | ||
584 | |||
585 | result = v9fs_t_stat(v9ses, newfid, &fcall); | ||
586 | if (result < 0) { | ||
587 | dprintk(DEBUG_ERROR, "stat error\n"); | ||
588 | goto FreeFcall; | ||
589 | } | ||
590 | |||
591 | v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb); | ||
592 | inode = v9fs_get_inode(sb, newstat.st_mode); | ||
593 | |||
594 | if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) { | ||
595 | eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n", | ||
596 | PTR_ERR(inode)); | ||
597 | |||
598 | result = -ENOSPC; | ||
599 | goto FreeFcall; | ||
600 | } | ||
601 | |||
602 | inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid); | ||
603 | |||
604 | fid = v9fs_fid_create(dentry); | ||
605 | if (fid == NULL) { | ||
606 | dprintk(DEBUG_ERROR, "couldn't insert\n"); | ||
607 | result = -ENOMEM; | ||
608 | goto FreeFcall; | ||
609 | } | ||
610 | |||
611 | fid->fid = newfid; | ||
612 | fid->fidopen = 0; | ||
613 | fid->v9ses = v9ses; | ||
614 | fid->qid = fcall->params.rstat.stat->qid; | ||
615 | |||
616 | dentry->d_op = &v9fs_dentry_operations; | ||
617 | v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb); | ||
618 | |||
619 | d_add(dentry, inode); | ||
620 | kfree(fcall); | ||
621 | |||
622 | return NULL; | ||
623 | |||
624 | FreeFcall: | ||
625 | kfree(fcall); | ||
626 | return ERR_PTR(result); | ||
627 | } | ||
628 | |||
629 | /** | ||
630 | * v9fs_vfs_unlink - VFS unlink hook to delete an inode | ||
631 | * @i: inode that is being unlinked | ||
632 | * @d: dentry that is being unlinked | ||
633 | * | ||
634 | */ | ||
635 | |||
636 | static int v9fs_vfs_unlink(struct inode *i, struct dentry *d) | ||
637 | { | ||
638 | return v9fs_remove(i, d, 0); | ||
639 | } | ||
640 | |||
641 | /** | ||
642 | * v9fs_vfs_rmdir - VFS unlink hook to delete a directory | ||
643 | * @i: inode that is being unlinked | ||
644 | * @d: dentry that is being unlinked | ||
645 | * | ||
646 | */ | ||
647 | |||
648 | static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) | ||
649 | { | ||
650 | return v9fs_remove(i, d, 1); | ||
651 | } | ||
652 | |||
653 | /** | ||
654 | * v9fs_vfs_rename - VFS hook to rename an inode | ||
655 | * @old_dir: old dir inode | ||
656 | * @old_dentry: old dentry | ||
657 | * @new_dir: new dir inode | ||
658 | * @new_dentry: new dentry | ||
659 | * | ||
660 | */ | ||
661 | |||
662 | static int | ||
663 | v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
664 | struct inode *new_dir, struct dentry *new_dentry) | ||
665 | { | ||
666 | struct inode *old_inode = old_dentry->d_inode; | ||
667 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode); | ||
668 | struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_WALK); | ||
669 | struct v9fs_fid *olddirfid = | ||
670 | v9fs_fid_lookup(old_dentry->d_parent, FID_WALK); | ||
671 | struct v9fs_fid *newdirfid = | ||
672 | v9fs_fid_lookup(new_dentry->d_parent, FID_WALK); | ||
673 | struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); | ||
674 | struct v9fs_fcall *fcall = NULL; | ||
675 | int fid = -1; | ||
676 | int olddirfidnum = -1; | ||
677 | int newdirfidnum = -1; | ||
678 | int retval = 0; | ||
679 | |||
680 | dprintk(DEBUG_VFS, "\n"); | ||
681 | |||
682 | if (!mistat) | ||
683 | return -ENOMEM; | ||
684 | |||
685 | if ((!oldfid) || (!olddirfid) || (!newdirfid)) { | ||
686 | dprintk(DEBUG_ERROR, "problem with arguments\n"); | ||
687 | return -EBADF; | ||
688 | } | ||
689 | |||
690 | /* 9P can only handle file rename in the same directory */ | ||
691 | if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { | ||
692 | dprintk(DEBUG_ERROR, "old dir and new dir are different\n"); | ||
693 | retval = -EPERM; | ||
694 | goto FreeFcallnBail; | ||
695 | } | ||
696 | |||
697 | fid = oldfid->fid; | ||
698 | olddirfidnum = olddirfid->fid; | ||
699 | newdirfidnum = newdirfid->fid; | ||
700 | |||
701 | if (fid < 0) { | ||
702 | dprintk(DEBUG_ERROR, "no fid for old file #%lu\n", | ||
703 | old_inode->i_ino); | ||
704 | retval = -EBADF; | ||
705 | goto FreeFcallnBail; | ||
706 | } | ||
707 | |||
708 | v9fs_blank_mistat(v9ses, mistat); | ||
709 | |||
710 | strcpy(mistat->data + 1, v9ses->name); | ||
711 | mistat->name = mistat->data + 1 + strlen(v9ses->name); | ||
712 | |||
713 | if (new_dentry->d_name.len > | ||
714 | (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) { | ||
715 | dprintk(DEBUG_ERROR, "new name too long\n"); | ||
716 | goto FreeFcallnBail; | ||
717 | } | ||
718 | |||
719 | strcpy(mistat->name, new_dentry->d_name.name); | ||
720 | retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall); | ||
721 | |||
722 | FreeFcallnBail: | ||
723 | kfree(mistat); | ||
724 | |||
725 | if (retval < 0) | ||
726 | dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", | ||
727 | FCALL_ERROR(fcall)); | ||
728 | |||
729 | kfree(fcall); | ||
730 | return retval; | ||
731 | } | ||
732 | |||
733 | /** | ||
734 | * v9fs_vfs_getattr - retreive file metadata | ||
735 | * @mnt - mount information | ||
736 | * @dentry - file to get attributes on | ||
737 | * @stat - metadata structure to populate | ||
738 | * | ||
739 | */ | ||
740 | |||
741 | static int | ||
742 | v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
743 | struct kstat *stat) | ||
744 | { | ||
745 | struct v9fs_fcall *fcall = NULL; | ||
746 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); | ||
747 | struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP); | ||
748 | int err = -EPERM; | ||
749 | |||
750 | dprintk(DEBUG_VFS, "dentry: %p\n", dentry); | ||
751 | if (!fid) { | ||
752 | dprintk(DEBUG_ERROR, | ||
753 | "couldn't find fid associated with dentry\n"); | ||
754 | return -EBADF; | ||
755 | } | ||
756 | |||
757 | err = v9fs_t_stat(v9ses, fid->fid, &fcall); | ||
758 | |||
759 | if (err < 0) | ||
760 | dprintk(DEBUG_ERROR, "stat error\n"); | ||
761 | else { | ||
762 | v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode, | ||
763 | dentry->d_inode->i_sb); | ||
764 | generic_fillattr(dentry->d_inode, stat); | ||
765 | } | ||
766 | |||
767 | kfree(fcall); | ||
768 | return err; | ||
769 | } | ||
770 | |||
771 | /** | ||
772 | * v9fs_vfs_setattr - set file metadata | ||
773 | * @dentry: file whose metadata to set | ||
774 | * @iattr: metadata assignment structure | ||
775 | * | ||
776 | */ | ||
777 | |||
778 | static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) | ||
779 | { | ||
780 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); | ||
781 | struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP); | ||
782 | struct v9fs_fcall *fcall = NULL; | ||
783 | struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); | ||
784 | int res = -EPERM; | ||
785 | |||
786 | dprintk(DEBUG_VFS, "\n"); | ||
787 | |||
788 | if (!mistat) | ||
789 | return -ENOMEM; | ||
790 | |||
791 | if (!fid) { | ||
792 | dprintk(DEBUG_ERROR, | ||
793 | "Couldn't find fid associated with dentry\n"); | ||
794 | return -EBADF; | ||
795 | } | ||
796 | |||
797 | v9fs_blank_mistat(v9ses, mistat); | ||
798 | if (iattr->ia_valid & ATTR_MODE) | ||
799 | mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode); | ||
800 | |||
801 | if (iattr->ia_valid & ATTR_MTIME) | ||
802 | mistat->mtime = iattr->ia_mtime.tv_sec; | ||
803 | |||
804 | if (iattr->ia_valid & ATTR_ATIME) | ||
805 | mistat->atime = iattr->ia_atime.tv_sec; | ||
806 | |||
807 | if (iattr->ia_valid & ATTR_SIZE) | ||
808 | mistat->length = iattr->ia_size; | ||
809 | |||
810 | if (v9ses->extended) { | ||
811 | char *ptr = mistat->data+1; | ||
812 | |||
813 | if (iattr->ia_valid & ATTR_UID) { | ||
814 | mistat->uid = ptr; | ||
815 | ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid); | ||
816 | mistat->n_uid = iattr->ia_uid; | ||
817 | } | ||
818 | |||
819 | if (iattr->ia_valid & ATTR_GID) { | ||
820 | mistat->gid = ptr; | ||
821 | ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid); | ||
822 | mistat->n_gid = iattr->ia_gid; | ||
823 | } | ||
824 | } | ||
825 | |||
826 | res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall); | ||
827 | |||
828 | if (res < 0) | ||
829 | dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall)); | ||
830 | |||
831 | kfree(mistat); | ||
832 | kfree(fcall); | ||
833 | |||
834 | if (res >= 0) | ||
835 | res = inode_setattr(dentry->d_inode, iattr); | ||
836 | |||
837 | return res; | ||
838 | } | ||
839 | |||
840 | /** | ||
841 | * v9fs_mistat2inode - populate an inode structure with mistat info | ||
842 | * @mistat: Plan 9 metadata (mistat) structure | ||
843 | * @inode: inode to populate | ||
844 | * @sb: superblock of filesystem | ||
845 | * | ||
846 | */ | ||
847 | |||
848 | void | ||
849 | v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode, | ||
850 | struct super_block *sb) | ||
851 | { | ||
852 | struct v9fs_session_info *v9ses = sb->s_fs_info; | ||
853 | |||
854 | inode->i_nlink = 1; | ||
855 | |||
856 | inode->i_atime.tv_sec = mistat->atime; | ||
857 | inode->i_mtime.tv_sec = mistat->mtime; | ||
858 | inode->i_ctime.tv_sec = mistat->mtime; | ||
859 | |||
860 | inode->i_uid = -1; | ||
861 | inode->i_gid = -1; | ||
862 | |||
863 | if (v9ses->extended) { | ||
864 | /* TODO: string to uid mapping via user-space daemon */ | ||
865 | inode->i_uid = mistat->n_uid; | ||
866 | inode->i_gid = mistat->n_gid; | ||
867 | |||
868 | if (mistat->n_uid == -1) | ||
869 | sscanf(mistat->uid, "%x", &inode->i_uid); | ||
870 | |||
871 | if (mistat->n_gid == -1) | ||
872 | sscanf(mistat->gid, "%x", &inode->i_gid); | ||
873 | } | ||
874 | |||
875 | if (inode->i_uid == -1) | ||
876 | inode->i_uid = v9ses->uid; | ||
877 | if (inode->i_gid == -1) | ||
878 | inode->i_gid = v9ses->gid; | ||
879 | |||
880 | inode->i_mode = p9mode2unixmode(v9ses, mistat->mode); | ||
881 | if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { | ||
882 | char type = 0; | ||
883 | int major = -1; | ||
884 | int minor = -1; | ||
885 | sscanf(mistat->extension, "%c %u %u", &type, &major, &minor); | ||
886 | switch (type) { | ||
887 | case 'c': | ||
888 | inode->i_mode &= ~S_IFBLK; | ||
889 | inode->i_mode |= S_IFCHR; | ||
890 | break; | ||
891 | case 'b': | ||
892 | break; | ||
893 | default: | ||
894 | dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n", | ||
895 | type, mistat->extension); | ||
896 | }; | ||
897 | inode->i_rdev = MKDEV(major, minor); | ||
898 | } else | ||
899 | inode->i_rdev = 0; | ||
900 | |||
901 | inode->i_size = mistat->length; | ||
902 | |||
903 | inode->i_blksize = sb->s_blocksize; | ||
904 | inode->i_blocks = | ||
905 | (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits; | ||
906 | } | ||
907 | |||
908 | /** | ||
909 | * v9fs_qid2ino - convert qid into inode number | ||
910 | * @qid: qid to hash | ||
911 | * | ||
912 | * BUG: potential for inode number collisions? | ||
913 | */ | ||
914 | |||
915 | ino_t v9fs_qid2ino(struct v9fs_qid *qid) | ||
916 | { | ||
917 | u64 path = qid->path + 2; | ||
918 | ino_t i = 0; | ||
919 | |||
920 | if (sizeof(ino_t) == sizeof(path)) | ||
921 | memcpy(&i, &path, sizeof(ino_t)); | ||
922 | else | ||
923 | i = (ino_t) (path ^ (path >> 32)); | ||
924 | |||
925 | return i; | ||
926 | } | ||
927 | |||
928 | /** | ||
929 | * v9fs_vfs_symlink - helper function to create symlinks | ||
930 | * @dir: directory inode containing symlink | ||
931 | * @dentry: dentry for symlink | ||
932 | * @symname: symlink data | ||
933 | * | ||
934 | * See 9P2000.u RFC for more information | ||
935 | * | ||
936 | */ | ||
937 | |||
938 | static int | ||
939 | v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) | ||
940 | { | ||
941 | int retval = -EPERM; | ||
942 | struct v9fs_fid *newfid; | ||
943 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); | ||
944 | struct v9fs_fcall *fcall = NULL; | ||
945 | struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); | ||
946 | |||
947 | dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, | ||
948 | symname); | ||
949 | |||
950 | if (!mistat) | ||
951 | return -ENOMEM; | ||
952 | |||
953 | if (!v9ses->extended) { | ||
954 | dprintk(DEBUG_ERROR, "not extended\n"); | ||
955 | goto FreeFcall; | ||
956 | } | ||
957 | |||
958 | /* issue a create */ | ||
959 | retval = v9fs_create(dir, dentry, S_IFLNK, 0); | ||
960 | if (retval != 0) | ||
961 | goto FreeFcall; | ||
962 | |||
963 | newfid = v9fs_fid_lookup(dentry, FID_OP); | ||
964 | |||
965 | /* issue a twstat */ | ||
966 | v9fs_blank_mistat(v9ses, mistat); | ||
967 | strcpy(mistat->data + 1, symname); | ||
968 | mistat->extension = mistat->data + 1; | ||
969 | retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); | ||
970 | if (retval < 0) { | ||
971 | dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", | ||
972 | FCALL_ERROR(fcall)); | ||
973 | goto FreeFcall; | ||
974 | } | ||
975 | |||
976 | kfree(fcall); | ||
977 | |||
978 | if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { | ||
979 | dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", | ||
980 | FCALL_ERROR(fcall)); | ||
981 | goto FreeFcall; | ||
982 | } | ||
983 | |||
984 | d_drop(dentry); /* FID - will this also clunk? */ | ||
985 | |||
986 | FreeFcall: | ||
987 | kfree(mistat); | ||
988 | kfree(fcall); | ||
989 | |||
990 | return retval; | ||
991 | } | ||
992 | |||
993 | /** | ||
994 | * v9fs_readlink - read a symlink's location (internal version) | ||
995 | * @dentry: dentry for symlink | ||
996 | * @buffer: buffer to load symlink location into | ||
997 | * @buflen: length of buffer | ||
998 | * | ||
999 | */ | ||
1000 | |||
1001 | static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) | ||
1002 | { | ||
1003 | int retval = -EPERM; | ||
1004 | |||
1005 | struct v9fs_fcall *fcall = NULL; | ||
1006 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); | ||
1007 | struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP); | ||
1008 | |||
1009 | if (!fid) { | ||
1010 | dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n"); | ||
1011 | retval = -EBADF; | ||
1012 | goto FreeFcall; | ||
1013 | } | ||
1014 | |||
1015 | if (!v9ses->extended) { | ||
1016 | retval = -EBADF; | ||
1017 | dprintk(DEBUG_ERROR, "not extended\n"); | ||
1018 | goto FreeFcall; | ||
1019 | } | ||
1020 | |||
1021 | dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name); | ||
1022 | retval = v9fs_t_stat(v9ses, fid->fid, &fcall); | ||
1023 | |||
1024 | if (retval < 0) { | ||
1025 | dprintk(DEBUG_ERROR, "stat error\n"); | ||
1026 | goto FreeFcall; | ||
1027 | } | ||
1028 | |||
1029 | if (!fcall) | ||
1030 | return -EIO; | ||
1031 | |||
1032 | if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) { | ||
1033 | retval = -EINVAL; | ||
1034 | goto FreeFcall; | ||
1035 | } | ||
1036 | |||
1037 | /* copy extension buffer into buffer */ | ||
1038 | if (strlen(fcall->params.rstat.stat->extension) < buflen) | ||
1039 | buflen = strlen(fcall->params.rstat.stat->extension); | ||
1040 | |||
1041 | memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1); | ||
1042 | |||
1043 | retval = buflen; | ||
1044 | |||
1045 | FreeFcall: | ||
1046 | kfree(fcall); | ||
1047 | |||
1048 | return retval; | ||
1049 | } | ||
1050 | |||
1051 | /** | ||
1052 | * v9fs_vfs_readlink - read a symlink's location | ||
1053 | * @dentry: dentry for symlink | ||
1054 | * @buf: buffer to load symlink location into | ||
1055 | * @buflen: length of buffer | ||
1056 | * | ||
1057 | */ | ||
1058 | |||
1059 | static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer, | ||
1060 | int buflen) | ||
1061 | { | ||
1062 | int retval; | ||
1063 | int ret; | ||
1064 | char *link = __getname(); | ||
1065 | |||
1066 | if (strlen(link) < buflen) | ||
1067 | buflen = strlen(link); | ||
1068 | |||
1069 | dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); | ||
1070 | |||
1071 | retval = v9fs_readlink(dentry, link, buflen); | ||
1072 | |||
1073 | if (retval > 0) { | ||
1074 | if ((ret = copy_to_user(buffer, link, retval)) != 0) { | ||
1075 | dprintk(DEBUG_ERROR, "problem copying to user: %d\n", | ||
1076 | ret); | ||
1077 | retval = ret; | ||
1078 | } | ||
1079 | } | ||
1080 | |||
1081 | putname(link); | ||
1082 | return retval; | ||
1083 | } | ||
1084 | |||
1085 | /** | ||
1086 | * v9fs_vfs_follow_link - follow a symlink path | ||
1087 | * @dentry: dentry for symlink | ||
1088 | * @nd: nameidata | ||
1089 | * | ||
1090 | */ | ||
1091 | |||
1092 | static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
1093 | { | ||
1094 | int len = 0; | ||
1095 | char *link = __getname(); | ||
1096 | |||
1097 | dprintk(DEBUG_VFS, "%s n", dentry->d_name.name); | ||
1098 | |||
1099 | if (!link) | ||
1100 | link = ERR_PTR(-ENOMEM); | ||
1101 | else { | ||
1102 | len = v9fs_readlink(dentry, link, strlen(link)); | ||
1103 | |||
1104 | if (len < 0) { | ||
1105 | putname(link); | ||
1106 | link = ERR_PTR(len); | ||
1107 | } else | ||
1108 | link[len] = 0; | ||
1109 | } | ||
1110 | nd_set_link(nd, link); | ||
1111 | |||
1112 | return NULL; | ||
1113 | } | ||
1114 | |||
1115 | /** | ||
1116 | * v9fs_vfs_put_link - release a symlink path | ||
1117 | * @dentry: dentry for symlink | ||
1118 | * @nd: nameidata | ||
1119 | * | ||
1120 | */ | ||
1121 | |||
1122 | static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) | ||
1123 | { | ||
1124 | char *s = nd_get_link(nd); | ||
1125 | |||
1126 | dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); | ||
1127 | if (!IS_ERR(s)) | ||
1128 | putname(s); | ||
1129 | } | ||
1130 | |||
1131 | /** | ||
1132 | * v9fs_vfs_link - create a hardlink | ||
1133 | * @old_dentry: dentry for file to link to | ||
1134 | * @dir: inode destination for new link | ||
1135 | * @dentry: dentry for link | ||
1136 | * | ||
1137 | */ | ||
1138 | |||
1139 | /* XXX - lots of code dup'd from symlink and creates, | ||
1140 | * figure out a better reuse strategy | ||
1141 | */ | ||
1142 | |||
1143 | static int | ||
1144 | v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, | ||
1145 | struct dentry *dentry) | ||
1146 | { | ||
1147 | int retval = -EPERM; | ||
1148 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); | ||
1149 | struct v9fs_fcall *fcall = NULL; | ||
1150 | struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); | ||
1151 | struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_OP); | ||
1152 | struct v9fs_fid *newfid = NULL; | ||
1153 | char *symname = __getname(); | ||
1154 | |||
1155 | dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, | ||
1156 | old_dentry->d_name.name); | ||
1157 | |||
1158 | if (!v9ses->extended) { | ||
1159 | dprintk(DEBUG_ERROR, "not extended\n"); | ||
1160 | goto FreeMem; | ||
1161 | } | ||
1162 | |||
1163 | /* get fid of old_dentry */ | ||
1164 | sprintf(symname, "hardlink(%d)\n", oldfid->fid); | ||
1165 | |||
1166 | /* issue a create */ | ||
1167 | retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0); | ||
1168 | if (retval != 0) | ||
1169 | goto FreeMem; | ||
1170 | |||
1171 | newfid = v9fs_fid_lookup(dentry, FID_OP); | ||
1172 | if (!newfid) { | ||
1173 | dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n"); | ||
1174 | goto FreeMem; | ||
1175 | } | ||
1176 | |||
1177 | /* issue a twstat */ | ||
1178 | v9fs_blank_mistat(v9ses, mistat); | ||
1179 | strcpy(mistat->data + 1, symname); | ||
1180 | mistat->extension = mistat->data + 1; | ||
1181 | retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); | ||
1182 | if (retval < 0) { | ||
1183 | dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", | ||
1184 | FCALL_ERROR(fcall)); | ||
1185 | goto FreeMem; | ||
1186 | } | ||
1187 | |||
1188 | kfree(fcall); | ||
1189 | |||
1190 | if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { | ||
1191 | dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", | ||
1192 | FCALL_ERROR(fcall)); | ||
1193 | goto FreeMem; | ||
1194 | } | ||
1195 | |||
1196 | d_drop(dentry); /* FID - will this also clunk? */ | ||
1197 | |||
1198 | kfree(fcall); | ||
1199 | fcall = NULL; | ||
1200 | |||
1201 | FreeMem: | ||
1202 | kfree(mistat); | ||
1203 | kfree(fcall); | ||
1204 | putname(symname); | ||
1205 | return retval; | ||
1206 | } | ||
1207 | |||
1208 | /** | ||
1209 | * v9fs_vfs_mknod - create a special file | ||
1210 | * @dir: inode destination for new link | ||
1211 | * @dentry: dentry for file | ||
1212 | * @mode: mode for creation | ||
1213 | * @dev_t: device associated with special file | ||
1214 | * | ||
1215 | */ | ||
1216 | |||
1217 | static int | ||
1218 | v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | ||
1219 | { | ||
1220 | int retval = -EPERM; | ||
1221 | struct v9fs_fid *newfid; | ||
1222 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); | ||
1223 | struct v9fs_fcall *fcall = NULL; | ||
1224 | struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); | ||
1225 | char *symname = __getname(); | ||
1226 | |||
1227 | dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, | ||
1228 | dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); | ||
1229 | |||
1230 | if (!mistat) | ||
1231 | return -ENOMEM; | ||
1232 | |||
1233 | if (!new_valid_dev(rdev)) { | ||
1234 | retval = -EINVAL; | ||
1235 | goto FreeMem; | ||
1236 | } | ||
1237 | |||
1238 | if (!v9ses->extended) { | ||
1239 | dprintk(DEBUG_ERROR, "not extended\n"); | ||
1240 | goto FreeMem; | ||
1241 | } | ||
1242 | |||
1243 | /* issue a create */ | ||
1244 | retval = v9fs_create(dir, dentry, mode, 0); | ||
1245 | |||
1246 | if (retval != 0) | ||
1247 | goto FreeMem; | ||
1248 | |||
1249 | newfid = v9fs_fid_lookup(dentry, FID_OP); | ||
1250 | if (!newfid) { | ||
1251 | dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n"); | ||
1252 | retval = -EINVAL; | ||
1253 | goto FreeMem; | ||
1254 | } | ||
1255 | |||
1256 | /* build extension */ | ||
1257 | if (S_ISBLK(mode)) | ||
1258 | sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev)); | ||
1259 | else if (S_ISCHR(mode)) | ||
1260 | sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev)); | ||
1261 | else if (S_ISFIFO(mode)) | ||
1262 | ; /* DO NOTHING */ | ||
1263 | else { | ||
1264 | retval = -EINVAL; | ||
1265 | goto FreeMem; | ||
1266 | } | ||
1267 | |||
1268 | if (!S_ISFIFO(mode)) { | ||
1269 | /* issue a twstat */ | ||
1270 | v9fs_blank_mistat(v9ses, mistat); | ||
1271 | strcpy(mistat->data + 1, symname); | ||
1272 | mistat->extension = mistat->data + 1; | ||
1273 | retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); | ||
1274 | if (retval < 0) { | ||
1275 | dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", | ||
1276 | FCALL_ERROR(fcall)); | ||
1277 | goto FreeMem; | ||
1278 | } | ||
1279 | } | ||
1280 | |||
1281 | /* need to update dcache so we show up */ | ||
1282 | kfree(fcall); | ||
1283 | |||
1284 | if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { | ||
1285 | dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", | ||
1286 | FCALL_ERROR(fcall)); | ||
1287 | goto FreeMem; | ||
1288 | } | ||
1289 | |||
1290 | d_drop(dentry); /* FID - will this also clunk? */ | ||
1291 | |||
1292 | FreeMem: | ||
1293 | kfree(mistat); | ||
1294 | kfree(fcall); | ||
1295 | putname(symname); | ||
1296 | |||
1297 | return retval; | ||
1298 | } | ||
1299 | |||
1300 | static struct inode_operations v9fs_dir_inode_operations_ext = { | ||
1301 | .create = v9fs_vfs_create, | ||
1302 | .lookup = v9fs_vfs_lookup, | ||
1303 | .symlink = v9fs_vfs_symlink, | ||
1304 | .link = v9fs_vfs_link, | ||
1305 | .unlink = v9fs_vfs_unlink, | ||
1306 | .mkdir = v9fs_vfs_mkdir, | ||
1307 | .rmdir = v9fs_vfs_rmdir, | ||
1308 | .mknod = v9fs_vfs_mknod, | ||
1309 | .rename = v9fs_vfs_rename, | ||
1310 | .readlink = v9fs_vfs_readlink, | ||
1311 | .getattr = v9fs_vfs_getattr, | ||
1312 | .setattr = v9fs_vfs_setattr, | ||
1313 | }; | ||
1314 | |||
1315 | static struct inode_operations v9fs_dir_inode_operations = { | ||
1316 | .create = v9fs_vfs_create, | ||
1317 | .lookup = v9fs_vfs_lookup, | ||
1318 | .unlink = v9fs_vfs_unlink, | ||
1319 | .mkdir = v9fs_vfs_mkdir, | ||
1320 | .rmdir = v9fs_vfs_rmdir, | ||
1321 | .mknod = v9fs_vfs_mknod, | ||
1322 | .rename = v9fs_vfs_rename, | ||
1323 | .getattr = v9fs_vfs_getattr, | ||
1324 | .setattr = v9fs_vfs_setattr, | ||
1325 | }; | ||
1326 | |||
1327 | static struct inode_operations v9fs_file_inode_operations = { | ||
1328 | .getattr = v9fs_vfs_getattr, | ||
1329 | .setattr = v9fs_vfs_setattr, | ||
1330 | }; | ||
1331 | |||
1332 | static struct inode_operations v9fs_symlink_inode_operations = { | ||
1333 | .readlink = v9fs_vfs_readlink, | ||
1334 | .follow_link = v9fs_vfs_follow_link, | ||
1335 | .put_link = v9fs_vfs_put_link, | ||
1336 | .getattr = v9fs_vfs_getattr, | ||
1337 | .setattr = v9fs_vfs_setattr, | ||
1338 | }; | ||
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c new file mode 100644 index 000000000000..868f350b2c5f --- /dev/null +++ b/fs/9p/vfs_super.c | |||
@@ -0,0 +1,280 @@ | |||
1 | /* | ||
2 | * linux/fs/9p/vfs_super.c | ||
3 | * | ||
4 | * This file contians superblock ops for 9P2000. It is intended that | ||
5 | * you mount this file system on directories. | ||
6 | * | ||
7 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
8 | * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to: | ||
22 | * Free Software Foundation | ||
23 | * 51 Franklin Street, Fifth Floor | ||
24 | * Boston, MA 02111-1301 USA | ||
25 | * | ||
26 | */ | ||
27 | |||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/config.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/errno.h> | ||
32 | #include <linux/fs.h> | ||
33 | #include <linux/file.h> | ||
34 | #include <linux/stat.h> | ||
35 | #include <linux/string.h> | ||
36 | #include <linux/smp_lock.h> | ||
37 | #include <linux/inet.h> | ||
38 | #include <linux/pagemap.h> | ||
39 | #include <linux/seq_file.h> | ||
40 | #include <linux/mount.h> | ||
41 | #include <linux/idr.h> | ||
42 | |||
43 | #include "debug.h" | ||
44 | #include "v9fs.h" | ||
45 | #include "9p.h" | ||
46 | #include "v9fs_vfs.h" | ||
47 | #include "conv.h" | ||
48 | #include "fid.h" | ||
49 | |||
50 | static void v9fs_clear_inode(struct inode *); | ||
51 | static struct super_operations v9fs_super_ops; | ||
52 | |||
53 | /** | ||
54 | * v9fs_clear_inode - release an inode | ||
55 | * @inode: inode to release | ||
56 | * | ||
57 | */ | ||
58 | |||
59 | static void v9fs_clear_inode(struct inode *inode) | ||
60 | { | ||
61 | filemap_fdatawrite(inode->i_mapping); | ||
62 | } | ||
63 | |||
64 | /** | ||
65 | * v9fs_set_super - set the superblock | ||
66 | * @s: super block | ||
67 | * @data: file system specific data | ||
68 | * | ||
69 | */ | ||
70 | |||
71 | static int v9fs_set_super(struct super_block *s, void *data) | ||
72 | { | ||
73 | s->s_fs_info = data; | ||
74 | return set_anon_super(s, data); | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * v9fs_fill_super - populate superblock with info | ||
79 | * @sb: superblock | ||
80 | * @v9ses: session information | ||
81 | * | ||
82 | */ | ||
83 | |||
84 | static void | ||
85 | v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | ||
86 | int flags) | ||
87 | { | ||
88 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
89 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); | ||
90 | sb->s_blocksize = 1 << sb->s_blocksize_bits; | ||
91 | sb->s_magic = V9FS_MAGIC; | ||
92 | sb->s_op = &v9fs_super_ops; | ||
93 | |||
94 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | | ||
95 | MS_NODIRATIME | MS_NOATIME; | ||
96 | } | ||
97 | |||
98 | /** | ||
99 | * v9fs_get_sb - mount a superblock | ||
100 | * @fs_type: file system type | ||
101 | * @flags: mount flags | ||
102 | * @dev_name: device name that was mounted | ||
103 | * @data: mount options | ||
104 | * | ||
105 | */ | ||
106 | |||
107 | static struct super_block *v9fs_get_sb(struct file_system_type | ||
108 | *fs_type, int flags, | ||
109 | const char *dev_name, void *data) | ||
110 | { | ||
111 | struct super_block *sb = NULL; | ||
112 | struct v9fs_fcall *fcall = NULL; | ||
113 | struct inode *inode = NULL; | ||
114 | struct dentry *root = NULL; | ||
115 | struct v9fs_session_info *v9ses = NULL; | ||
116 | struct v9fs_fid *root_fid = NULL; | ||
117 | int mode = S_IRWXUGO | S_ISVTX; | ||
118 | uid_t uid = current->fsuid; | ||
119 | gid_t gid = current->fsgid; | ||
120 | int stat_result = 0; | ||
121 | int newfid = 0; | ||
122 | int retval = 0; | ||
123 | |||
124 | dprintk(DEBUG_VFS, " \n"); | ||
125 | |||
126 | v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL); | ||
127 | if (!v9ses) | ||
128 | return ERR_PTR(-ENOMEM); | ||
129 | |||
130 | if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { | ||
131 | dprintk(DEBUG_ERROR, "problem initiating session\n"); | ||
132 | retval = newfid; | ||
133 | goto free_session; | ||
134 | } | ||
135 | |||
136 | sb = sget(fs_type, NULL, v9fs_set_super, v9ses); | ||
137 | |||
138 | v9fs_fill_super(sb, v9ses, flags); | ||
139 | |||
140 | inode = v9fs_get_inode(sb, S_IFDIR | mode); | ||
141 | if (IS_ERR(inode)) { | ||
142 | retval = PTR_ERR(inode); | ||
143 | goto put_back_sb; | ||
144 | } | ||
145 | |||
146 | inode->i_uid = uid; | ||
147 | inode->i_gid = gid; | ||
148 | |||
149 | root = d_alloc_root(inode); | ||
150 | |||
151 | if (!root) { | ||
152 | retval = -ENOMEM; | ||
153 | goto release_inode; | ||
154 | } | ||
155 | |||
156 | sb->s_root = root; | ||
157 | |||
158 | /* Setup the Root Inode */ | ||
159 | root_fid = v9fs_fid_create(root); | ||
160 | if (root_fid == NULL) { | ||
161 | retval = -ENOMEM; | ||
162 | goto release_dentry; | ||
163 | } | ||
164 | |||
165 | root_fid->fidopen = 0; | ||
166 | root_fid->v9ses = v9ses; | ||
167 | |||
168 | stat_result = v9fs_t_stat(v9ses, newfid, &fcall); | ||
169 | if (stat_result < 0) { | ||
170 | dprintk(DEBUG_ERROR, "stat error\n"); | ||
171 | v9fs_t_clunk(v9ses, newfid, NULL); | ||
172 | v9fs_put_idpool(newfid, &v9ses->fidpool); | ||
173 | } else { | ||
174 | root_fid->fid = newfid; | ||
175 | root_fid->qid = fcall->params.rstat.stat->qid; | ||
176 | root->d_inode->i_ino = | ||
177 | v9fs_qid2ino(&fcall->params.rstat.stat->qid); | ||
178 | v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb); | ||
179 | } | ||
180 | |||
181 | kfree(fcall); | ||
182 | |||
183 | if (stat_result < 0) { | ||
184 | retval = stat_result; | ||
185 | goto release_dentry; | ||
186 | } | ||
187 | |||
188 | return sb; | ||
189 | |||
190 | release_dentry: | ||
191 | dput(sb->s_root); | ||
192 | |||
193 | release_inode: | ||
194 | iput(inode); | ||
195 | |||
196 | put_back_sb: | ||
197 | up_write(&sb->s_umount); | ||
198 | deactivate_super(sb); | ||
199 | v9fs_session_close(v9ses); | ||
200 | |||
201 | free_session: | ||
202 | kfree(v9ses); | ||
203 | |||
204 | return ERR_PTR(retval); | ||
205 | } | ||
206 | |||
207 | /** | ||
208 | * v9fs_kill_super - Kill Superblock | ||
209 | * @s: superblock | ||
210 | * | ||
211 | */ | ||
212 | |||
213 | static void v9fs_kill_super(struct super_block *s) | ||
214 | { | ||
215 | struct v9fs_session_info *v9ses = s->s_fs_info; | ||
216 | |||
217 | dprintk(DEBUG_VFS, " %p\n", s); | ||
218 | |||
219 | v9fs_dentry_release(s->s_root); /* clunk root */ | ||
220 | |||
221 | kill_anon_super(s); | ||
222 | |||
223 | v9fs_session_close(v9ses); | ||
224 | kfree(v9ses); | ||
225 | dprintk(DEBUG_VFS, "exiting kill_super\n"); | ||
226 | } | ||
227 | |||
228 | /** | ||
229 | * v9fs_show_options - Show mount options in /proc/mounts | ||
230 | * @m: seq_file to write to | ||
231 | * @mnt: mount descriptor | ||
232 | * | ||
233 | */ | ||
234 | |||
235 | static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
236 | { | ||
237 | struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info; | ||
238 | |||
239 | if (v9ses->debug != 0) | ||
240 | seq_printf(m, ",debug=%u", v9ses->debug); | ||
241 | if (v9ses->port != V9FS_PORT) | ||
242 | seq_printf(m, ",port=%u", v9ses->port); | ||
243 | if (v9ses->maxdata != 9000) | ||
244 | seq_printf(m, ",msize=%u", v9ses->maxdata); | ||
245 | if (v9ses->afid != ~0) | ||
246 | seq_printf(m, ",afid=%u", v9ses->afid); | ||
247 | if (v9ses->proto == PROTO_UNIX) | ||
248 | seq_puts(m, ",proto=unix"); | ||
249 | if (v9ses->extended == 0) | ||
250 | seq_puts(m, ",noextend"); | ||
251 | if (v9ses->nodev == 1) | ||
252 | seq_puts(m, ",nodevmap"); | ||
253 | seq_printf(m, ",name=%s", v9ses->name); | ||
254 | seq_printf(m, ",aname=%s", v9ses->remotename); | ||
255 | seq_printf(m, ",uid=%u", v9ses->uid); | ||
256 | seq_printf(m, ",gid=%u", v9ses->gid); | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | static void | ||
261 | v9fs_umount_begin(struct super_block *sb) | ||
262 | { | ||
263 | struct v9fs_session_info *v9ses = sb->s_fs_info; | ||
264 | |||
265 | v9fs_session_cancel(v9ses); | ||
266 | } | ||
267 | |||
268 | static struct super_operations v9fs_super_ops = { | ||
269 | .statfs = simple_statfs, | ||
270 | .clear_inode = v9fs_clear_inode, | ||
271 | .show_options = v9fs_show_options, | ||
272 | .umount_begin = v9fs_umount_begin, | ||
273 | }; | ||
274 | |||
275 | struct file_system_type v9fs_fs_type = { | ||
276 | .name = "9P", | ||
277 | .get_sb = v9fs_get_sb, | ||
278 | .kill_sb = v9fs_kill_super, | ||
279 | .owner = THIS_MODULE, | ||
280 | }; | ||
diff --git a/fs/Kconfig b/fs/Kconfig index e54be7058359..068ccea2f184 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -382,10 +382,8 @@ config QUOTA | |||
382 | usage (also called disk quotas). Currently, it works for the | 382 | usage (also called disk quotas). Currently, it works for the |
383 | ext2, ext3, and reiserfs file system. ext3 also supports journalled | 383 | ext2, ext3, and reiserfs file system. ext3 also supports journalled |
384 | quotas for which you don't need to run quotacheck(8) after an unclean | 384 | quotas for which you don't need to run quotacheck(8) after an unclean |
385 | shutdown. You need additional software in order to use quota support | 385 | shutdown. |
386 | (you can download sources from | 386 | For further details, read the Quota mini-HOWTO, available from |
387 | <http://www.sf.net/projects/linuxquota/>). For further details, read | ||
388 | the Quota mini-HOWTO, available from | ||
389 | <http://www.tldp.org/docs.html#howto>, or the documentation provided | 387 | <http://www.tldp.org/docs.html#howto>, or the documentation provided |
390 | with the quota tools. Probably the quota support is only useful for | 388 | with the quota tools. Probably the quota support is only useful for |
391 | multi user systems. If unsure, say N. | 389 | multi user systems. If unsure, say N. |
@@ -403,8 +401,7 @@ config QFMT_V2 | |||
403 | depends on QUOTA | 401 | depends on QUOTA |
404 | help | 402 | help |
405 | This quota format allows using quotas with 32-bit UIDs/GIDs. If you | 403 | This quota format allows using quotas with 32-bit UIDs/GIDs. If you |
406 | need this functionality say Y here. Note that you will need recent | 404 | need this functionality say Y here. |
407 | quota utilities (>= 3.01) for new quota format with this kernel. | ||
408 | 405 | ||
409 | config QUOTACTL | 406 | config QUOTACTL |
410 | bool | 407 | bool |
@@ -465,6 +462,19 @@ config AUTOFS4_FS | |||
465 | local network, you probably do not need an automounter, and can say | 462 | local network, you probably do not need an automounter, and can say |
466 | N here. | 463 | N here. |
467 | 464 | ||
465 | config FUSE_FS | ||
466 | tristate "Filesystem in Userspace support" | ||
467 | help | ||
468 | With FUSE it is possible to implement a fully functional filesystem | ||
469 | in a userspace program. | ||
470 | |||
471 | There's also companion library: libfuse. This library along with | ||
472 | utilities is available from the FUSE homepage: | ||
473 | <http://fuse.sourceforge.net/> | ||
474 | |||
475 | If you want to develop a userspace FS, or if you want to use | ||
476 | a filesystem based on FUSE, answer Y or M. | ||
477 | |||
468 | menu "CD-ROM/DVD Filesystems" | 478 | menu "CD-ROM/DVD Filesystems" |
469 | 479 | ||
470 | config ISO9660_FS | 480 | config ISO9660_FS |
@@ -783,28 +793,6 @@ config SYSFS | |||
783 | 793 | ||
784 | Designers of embedded systems may wish to say N here to conserve space. | 794 | Designers of embedded systems may wish to say N here to conserve space. |
785 | 795 | ||
786 | config DEVPTS_FS_XATTR | ||
787 | bool "/dev/pts Extended Attributes" | ||
788 | depends on UNIX98_PTYS | ||
789 | help | ||
790 | Extended attributes are name:value pairs associated with inodes by | ||
791 | the kernel or by users (see the attr(5) manual page, or visit | ||
792 | <http://acl.bestbits.at/> for details). | ||
793 | |||
794 | If unsure, say N. | ||
795 | |||
796 | config DEVPTS_FS_SECURITY | ||
797 | bool "/dev/pts Security Labels" | ||
798 | depends on DEVPTS_FS_XATTR | ||
799 | help | ||
800 | Security labels support alternative access control models | ||
801 | implemented by security modules like SELinux. This option | ||
802 | enables an extended attribute handler for file security | ||
803 | labels in the /dev/pts filesystem. | ||
804 | |||
805 | If you are not using a security module that requires using | ||
806 | extended attributes for file security labels, say N. | ||
807 | |||
808 | config TMPFS | 796 | config TMPFS |
809 | bool "Virtual memory file system support (former shm fs)" | 797 | bool "Virtual memory file system support (former shm fs)" |
810 | help | 798 | help |
@@ -817,27 +805,6 @@ config TMPFS | |||
817 | 805 | ||
818 | See <file:Documentation/filesystems/tmpfs.txt> for details. | 806 | See <file:Documentation/filesystems/tmpfs.txt> for details. |
819 | 807 | ||
820 | config TMPFS_XATTR | ||
821 | bool "tmpfs Extended Attributes" | ||
822 | depends on TMPFS | ||
823 | help | ||
824 | Extended attributes are name:value pairs associated with inodes by | ||
825 | the kernel or by users (see the attr(5) manual page, or visit | ||
826 | <http://acl.bestbits.at/> for details). | ||
827 | |||
828 | If unsure, say N. | ||
829 | |||
830 | config TMPFS_SECURITY | ||
831 | bool "tmpfs Security Labels" | ||
832 | depends on TMPFS_XATTR | ||
833 | help | ||
834 | Security labels support alternative access control models | ||
835 | implemented by security modules like SELinux. This option | ||
836 | enables an extended attribute handler for file security | ||
837 | labels in the tmpfs filesystem. | ||
838 | If you are not using a security module that requires using | ||
839 | extended attributes for file security labels, say N. | ||
840 | |||
841 | config HUGETLBFS | 808 | config HUGETLBFS |
842 | bool "HugeTLB file system support" | 809 | bool "HugeTLB file system support" |
843 | depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN | 810 | depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN |
@@ -859,6 +826,18 @@ config RAMFS | |||
859 | To compile this as a module, choose M here: the module will be called | 826 | To compile this as a module, choose M here: the module will be called |
860 | ramfs. | 827 | ramfs. |
861 | 828 | ||
829 | config RELAYFS_FS | ||
830 | tristate "Relayfs file system support" | ||
831 | ---help--- | ||
832 | Relayfs is a high-speed data relay filesystem designed to provide | ||
833 | an efficient mechanism for tools and facilities to relay large | ||
834 | amounts of data from kernel space to user space. | ||
835 | |||
836 | To compile this code as a module, choose M here: the module will be | ||
837 | called relayfs. | ||
838 | |||
839 | If unsure, say N. | ||
840 | |||
862 | endmenu | 841 | endmenu |
863 | 842 | ||
864 | menu "Miscellaneous filesystems" | 843 | menu "Miscellaneous filesystems" |
@@ -1737,6 +1716,17 @@ config AFS_FS | |||
1737 | config RXRPC | 1716 | config RXRPC |
1738 | tristate | 1717 | tristate |
1739 | 1718 | ||
1719 | config 9P_FS | ||
1720 | tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" | ||
1721 | depends on INET && EXPERIMENTAL | ||
1722 | help | ||
1723 | If you say Y here, you will get experimental support for | ||
1724 | Plan 9 resource sharing via the 9P2000 protocol. | ||
1725 | |||
1726 | See <http://v9fs.sf.net> for more information. | ||
1727 | |||
1728 | If unsure, say N. | ||
1729 | |||
1740 | endmenu | 1730 | endmenu |
1741 | 1731 | ||
1742 | menu "Partition Types" | 1732 | menu "Partition Types" |
diff --git a/fs/Makefile b/fs/Makefile index cf95eb894fd5..1972da186272 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -89,10 +89,13 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4/ | |||
89 | obj-$(CONFIG_AUTOFS_FS) += autofs/ | 89 | obj-$(CONFIG_AUTOFS_FS) += autofs/ |
90 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ | 90 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ |
91 | obj-$(CONFIG_ADFS_FS) += adfs/ | 91 | obj-$(CONFIG_ADFS_FS) += adfs/ |
92 | obj-$(CONFIG_FUSE_FS) += fuse/ | ||
92 | obj-$(CONFIG_UDF_FS) += udf/ | 93 | obj-$(CONFIG_UDF_FS) += udf/ |
94 | obj-$(CONFIG_RELAYFS_FS) += relayfs/ | ||
93 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ | 95 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ |
94 | obj-$(CONFIG_JFS_FS) += jfs/ | 96 | obj-$(CONFIG_JFS_FS) += jfs/ |
95 | obj-$(CONFIG_XFS_FS) += xfs/ | 97 | obj-$(CONFIG_XFS_FS) += xfs/ |
98 | obj-$(CONFIG_9P_FS) += 9p/ | ||
96 | obj-$(CONFIG_AFS_FS) += afs/ | 99 | obj-$(CONFIG_AFS_FS) += afs/ |
97 | obj-$(CONFIG_BEFS_FS) += befs/ | 100 | obj-$(CONFIG_BEFS_FS) += befs/ |
98 | obj-$(CONFIG_HOSTFS) += hostfs/ | 101 | obj-$(CONFIG_HOSTFS) += hostfs/ |
diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 7aa6f2004536..9ebe881c6786 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c | |||
@@ -255,6 +255,7 @@ void | |||
255 | affs_delete_inode(struct inode *inode) | 255 | affs_delete_inode(struct inode *inode) |
256 | { | 256 | { |
257 | pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); | 257 | pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); |
258 | truncate_inode_pages(&inode->i_data, 0); | ||
258 | inode->i_size = 0; | 259 | inode->i_size = 0; |
259 | if (S_ISREG(inode->i_mode)) | 260 | if (S_ISREG(inode->i_mode)) |
260 | affs_truncate(inode); | 261 | affs_truncate(inode); |
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/workqueue.h> | 30 | #include <linux/workqueue.h> |
31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
32 | #include <linux/rcuref.h> | ||
32 | 33 | ||
33 | #include <asm/kmap_types.h> | 34 | #include <asm/kmap_types.h> |
34 | #include <asm/uaccess.h> | 35 | #include <asm/uaccess.h> |
@@ -499,7 +500,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) | |||
499 | /* Must be done under the lock to serialise against cancellation. | 500 | /* Must be done under the lock to serialise against cancellation. |
500 | * Call this aio_fput as it duplicates fput via the fput_work. | 501 | * Call this aio_fput as it duplicates fput via the fput_work. |
501 | */ | 502 | */ |
502 | if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { | 503 | if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) { |
503 | get_ioctx(ctx); | 504 | get_ioctx(ctx); |
504 | spin_lock(&fput_lock); | 505 | spin_lock(&fput_lock); |
505 | list_add(&req->ki_list, &fput_head); | 506 | list_add(&req->ki_list, &fput_head); |
@@ -546,6 +547,24 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id) | |||
546 | return ioctx; | 547 | return ioctx; |
547 | } | 548 | } |
548 | 549 | ||
550 | static int lock_kiocb_action(void *param) | ||
551 | { | ||
552 | schedule(); | ||
553 | return 0; | ||
554 | } | ||
555 | |||
556 | static inline void lock_kiocb(struct kiocb *iocb) | ||
557 | { | ||
558 | wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action, | ||
559 | TASK_UNINTERRUPTIBLE); | ||
560 | } | ||
561 | |||
562 | static inline void unlock_kiocb(struct kiocb *iocb) | ||
563 | { | ||
564 | kiocbClearLocked(iocb); | ||
565 | wake_up_bit(&iocb->ki_flags, KIF_LOCKED); | ||
566 | } | ||
567 | |||
549 | /* | 568 | /* |
550 | * use_mm | 569 | * use_mm |
551 | * Makes the calling kernel thread take on the specified | 570 | * Makes the calling kernel thread take on the specified |
@@ -567,6 +586,10 @@ static void use_mm(struct mm_struct *mm) | |||
567 | atomic_inc(&mm->mm_count); | 586 | atomic_inc(&mm->mm_count); |
568 | tsk->mm = mm; | 587 | tsk->mm = mm; |
569 | tsk->active_mm = mm; | 588 | tsk->active_mm = mm; |
589 | /* | ||
590 | * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise | ||
591 | * it won't work. Update it accordingly if you change it here | ||
592 | */ | ||
570 | activate_mm(active_mm, mm); | 593 | activate_mm(active_mm, mm); |
571 | task_unlock(tsk); | 594 | task_unlock(tsk); |
572 | 595 | ||
@@ -782,7 +805,9 @@ static int __aio_run_iocbs(struct kioctx *ctx) | |||
782 | * Hold an extra reference while retrying i/o. | 805 | * Hold an extra reference while retrying i/o. |
783 | */ | 806 | */ |
784 | iocb->ki_users++; /* grab extra reference */ | 807 | iocb->ki_users++; /* grab extra reference */ |
808 | lock_kiocb(iocb); | ||
785 | aio_run_iocb(iocb); | 809 | aio_run_iocb(iocb); |
810 | unlock_kiocb(iocb); | ||
786 | if (__aio_put_req(ctx, iocb)) /* drop extra ref */ | 811 | if (__aio_put_req(ctx, iocb)) /* drop extra ref */ |
787 | put_ioctx(ctx); | 812 | put_ioctx(ctx); |
788 | } | 813 | } |
@@ -1523,10 +1548,9 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1523 | goto out_put_req; | 1548 | goto out_put_req; |
1524 | 1549 | ||
1525 | spin_lock_irq(&ctx->ctx_lock); | 1550 | spin_lock_irq(&ctx->ctx_lock); |
1526 | if (likely(list_empty(&ctx->run_list))) { | 1551 | aio_run_iocb(req); |
1527 | aio_run_iocb(req); | 1552 | unlock_kiocb(req); |
1528 | } else { | 1553 | if (!list_empty(&ctx->run_list)) { |
1529 | list_add_tail(&req->ki_run_list, &ctx->run_list); | ||
1530 | /* drain the run list */ | 1554 | /* drain the run list */ |
1531 | while (__aio_run_iocbs(ctx)) | 1555 | while (__aio_run_iocbs(ctx)) |
1532 | ; | 1556 | ; |
@@ -1657,6 +1681,7 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, | |||
1657 | if (NULL != cancel) { | 1681 | if (NULL != cancel) { |
1658 | struct io_event tmp; | 1682 | struct io_event tmp; |
1659 | pr_debug("calling cancel\n"); | 1683 | pr_debug("calling cancel\n"); |
1684 | lock_kiocb(kiocb); | ||
1660 | memset(&tmp, 0, sizeof(tmp)); | 1685 | memset(&tmp, 0, sizeof(tmp)); |
1661 | tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user; | 1686 | tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user; |
1662 | tmp.data = kiocb->ki_user_data; | 1687 | tmp.data = kiocb->ki_user_data; |
@@ -1668,8 +1693,9 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, | |||
1668 | if (copy_to_user(result, &tmp, sizeof(tmp))) | 1693 | if (copy_to_user(result, &tmp, sizeof(tmp))) |
1669 | ret = -EFAULT; | 1694 | ret = -EFAULT; |
1670 | } | 1695 | } |
1696 | unlock_kiocb(kiocb); | ||
1671 | } else | 1697 | } else |
1672 | printk(KERN_DEBUG "iocb has no cancel operation\n"); | 1698 | ret = -EINVAL; |
1673 | 1699 | ||
1674 | put_ioctx(ctx); | 1700 | put_ioctx(ctx); |
1675 | 1701 | ||
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 6171431272dc..990c28da5aec 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h | |||
@@ -105,6 +105,7 @@ struct autofs_sb_info { | |||
105 | struct file *pipe; | 105 | struct file *pipe; |
106 | pid_t oz_pgrp; | 106 | pid_t oz_pgrp; |
107 | int catatonic; | 107 | int catatonic; |
108 | struct super_block *sb; | ||
108 | unsigned long exp_timeout; | 109 | unsigned long exp_timeout; |
109 | ino_t next_dir_ino; | 110 | ino_t next_dir_ino; |
110 | struct autofs_wait_queue *queues; /* Wait queue pointer */ | 111 | struct autofs_wait_queue *queues; /* Wait queue pointer */ |
@@ -134,7 +135,7 @@ void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *); | |||
134 | void autofs_hash_delete(struct autofs_dir_ent *); | 135 | void autofs_hash_delete(struct autofs_dir_ent *); |
135 | struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *); | 136 | struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *); |
136 | void autofs_hash_dputall(struct autofs_dirhash *); | 137 | void autofs_hash_dputall(struct autofs_dirhash *); |
137 | void autofs_hash_nuke(struct autofs_dirhash *); | 138 | void autofs_hash_nuke(struct autofs_sb_info *); |
138 | 139 | ||
139 | /* Expiration-handling functions */ | 140 | /* Expiration-handling functions */ |
140 | 141 | ||
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index 448143fd0796..5ccfcf26310d 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c | |||
@@ -232,13 +232,13 @@ void autofs_hash_dputall(struct autofs_dirhash *dh) | |||
232 | 232 | ||
233 | /* Delete everything. This is used on filesystem destruction, so we | 233 | /* Delete everything. This is used on filesystem destruction, so we |
234 | make no attempt to keep the pointers valid */ | 234 | make no attempt to keep the pointers valid */ |
235 | void autofs_hash_nuke(struct autofs_dirhash *dh) | 235 | void autofs_hash_nuke(struct autofs_sb_info *sbi) |
236 | { | 236 | { |
237 | int i; | 237 | int i; |
238 | struct autofs_dir_ent *ent, *nent; | 238 | struct autofs_dir_ent *ent, *nent; |
239 | 239 | ||
240 | for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) { | 240 | for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) { |
241 | for ( ent = dh->h[i] ; ent ; ent = nent ) { | 241 | for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) { |
242 | nent = ent->next; | 242 | nent = ent->next; |
243 | if ( ent->dentry ) | 243 | if ( ent->dentry ) |
244 | dput(ent->dentry); | 244 | dput(ent->dentry); |
@@ -246,4 +246,5 @@ void autofs_hash_nuke(struct autofs_dirhash *dh) | |||
246 | kfree(ent); | 246 | kfree(ent); |
247 | } | 247 | } |
248 | } | 248 | } |
249 | shrink_dcache_sb(sbi->sb); | ||
249 | } | 250 | } |
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 4888c1fabbf7..65e5ed42190e 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c | |||
@@ -27,7 +27,7 @@ static void autofs_put_super(struct super_block *sb) | |||
27 | if ( !sbi->catatonic ) | 27 | if ( !sbi->catatonic ) |
28 | autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */ | 28 | autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */ |
29 | 29 | ||
30 | autofs_hash_nuke(&sbi->dirhash); | 30 | autofs_hash_nuke(sbi); |
31 | for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) { | 31 | for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) { |
32 | if ( test_bit(n, sbi->symlink_bitmap) ) | 32 | if ( test_bit(n, sbi->symlink_bitmap) ) |
33 | kfree(sbi->symlink[n].data); | 33 | kfree(sbi->symlink[n].data); |
@@ -148,6 +148,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) | |||
148 | s->s_magic = AUTOFS_SUPER_MAGIC; | 148 | s->s_magic = AUTOFS_SUPER_MAGIC; |
149 | s->s_op = &autofs_sops; | 149 | s->s_op = &autofs_sops; |
150 | s->s_time_gran = 1; | 150 | s->s_time_gran = 1; |
151 | sbi->sb = s; | ||
151 | 152 | ||
152 | root_inode = iget(s, AUTOFS_ROOT_INO); | 153 | root_inode = iget(s, AUTOFS_ROOT_INO); |
153 | root = d_alloc_root(root_inode); | 154 | root = d_alloc_root(root_inode); |
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 1020dbc88bec..1fbc53f14aba 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h | |||
@@ -20,7 +20,6 @@ struct bfs_sb_info { | |||
20 | unsigned long si_lasti; | 20 | unsigned long si_lasti; |
21 | unsigned long * si_imap; | 21 | unsigned long * si_imap; |
22 | struct buffer_head * si_sbh; /* buffer header w/superblock */ | 22 | struct buffer_head * si_sbh; /* buffer header w/superblock */ |
23 | struct bfs_super_block * si_bfs_sb; /* superblock in si_sbh->b_data */ | ||
24 | }; | 23 | }; |
25 | 24 | ||
26 | /* | 25 | /* |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 5a1e5ce057ff..e240c335eb23 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * fs/bfs/dir.c | 2 | * fs/bfs/dir.c |
3 | * BFS directory operations. | 3 | * BFS directory operations. |
4 | * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> | 4 | * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> |
5 | * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005 | ||
5 | */ | 6 | */ |
6 | 7 | ||
7 | #include <linux/time.h> | 8 | #include <linux/time.h> |
@@ -20,9 +21,9 @@ | |||
20 | #define dprintf(x...) | 21 | #define dprintf(x...) |
21 | #endif | 22 | #endif |
22 | 23 | ||
23 | static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino); | 24 | static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino); |
24 | static struct buffer_head * bfs_find_entry(struct inode * dir, | 25 | static struct buffer_head * bfs_find_entry(struct inode * dir, |
25 | const char * name, int namelen, struct bfs_dirent ** res_dir); | 26 | const unsigned char * name, int namelen, struct bfs_dirent ** res_dir); |
26 | 27 | ||
27 | static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) | 28 | static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) |
28 | { | 29 | { |
@@ -53,7 +54,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) | |||
53 | de = (struct bfs_dirent *)(bh->b_data + offset); | 54 | de = (struct bfs_dirent *)(bh->b_data + offset); |
54 | if (de->ino) { | 55 | if (de->ino) { |
55 | int size = strnlen(de->name, BFS_NAMELEN); | 56 | int size = strnlen(de->name, BFS_NAMELEN); |
56 | if (filldir(dirent, de->name, size, f->f_pos, de->ino, DT_UNKNOWN) < 0) { | 57 | if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) { |
57 | brelse(bh); | 58 | brelse(bh); |
58 | unlock_kernel(); | 59 | unlock_kernel(); |
59 | return 0; | 60 | return 0; |
@@ -107,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, | |||
107 | inode->i_mapping->a_ops = &bfs_aops; | 108 | inode->i_mapping->a_ops = &bfs_aops; |
108 | inode->i_mode = mode; | 109 | inode->i_mode = mode; |
109 | inode->i_ino = ino; | 110 | inode->i_ino = ino; |
110 | BFS_I(inode)->i_dsk_ino = ino; | 111 | BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino); |
111 | BFS_I(inode)->i_sblock = 0; | 112 | BFS_I(inode)->i_sblock = 0; |
112 | BFS_I(inode)->i_eblock = 0; | 113 | BFS_I(inode)->i_eblock = 0; |
113 | insert_inode_hash(inode); | 114 | insert_inode_hash(inode); |
@@ -139,7 +140,7 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st | |||
139 | lock_kernel(); | 140 | lock_kernel(); |
140 | bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); | 141 | bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); |
141 | if (bh) { | 142 | if (bh) { |
142 | unsigned long ino = le32_to_cpu(de->ino); | 143 | unsigned long ino = (unsigned long)le16_to_cpu(de->ino); |
143 | brelse(bh); | 144 | brelse(bh); |
144 | inode = iget(dir->i_sb, ino); | 145 | inode = iget(dir->i_sb, ino); |
145 | if (!inode) { | 146 | if (!inode) { |
@@ -183,7 +184,7 @@ static int bfs_unlink(struct inode * dir, struct dentry * dentry) | |||
183 | inode = dentry->d_inode; | 184 | inode = dentry->d_inode; |
184 | lock_kernel(); | 185 | lock_kernel(); |
185 | bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); | 186 | bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); |
186 | if (!bh || de->ino != inode->i_ino) | 187 | if (!bh || le16_to_cpu(de->ino) != inode->i_ino) |
187 | goto out_brelse; | 188 | goto out_brelse; |
188 | 189 | ||
189 | if (!inode->i_nlink) { | 190 | if (!inode->i_nlink) { |
@@ -224,7 +225,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry, | |||
224 | old_dentry->d_name.name, | 225 | old_dentry->d_name.name, |
225 | old_dentry->d_name.len, &old_de); | 226 | old_dentry->d_name.len, &old_de); |
226 | 227 | ||
227 | if (!old_bh || old_de->ino != old_inode->i_ino) | 228 | if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino) |
228 | goto end_rename; | 229 | goto end_rename; |
229 | 230 | ||
230 | error = -EPERM; | 231 | error = -EPERM; |
@@ -270,7 +271,7 @@ struct inode_operations bfs_dir_inops = { | |||
270 | .rename = bfs_rename, | 271 | .rename = bfs_rename, |
271 | }; | 272 | }; |
272 | 273 | ||
273 | static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino) | 274 | static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino) |
274 | { | 275 | { |
275 | struct buffer_head * bh; | 276 | struct buffer_head * bh; |
276 | struct bfs_dirent * de; | 277 | struct bfs_dirent * de; |
@@ -304,7 +305,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int | |||
304 | } | 305 | } |
305 | dir->i_mtime = CURRENT_TIME_SEC; | 306 | dir->i_mtime = CURRENT_TIME_SEC; |
306 | mark_inode_dirty(dir); | 307 | mark_inode_dirty(dir); |
307 | de->ino = ino; | 308 | de->ino = cpu_to_le16((u16)ino); |
308 | for (i=0; i<BFS_NAMELEN; i++) | 309 | for (i=0; i<BFS_NAMELEN; i++) |
309 | de->name[i] = (i < namelen) ? name[i] : 0; | 310 | de->name[i] = (i < namelen) ? name[i] : 0; |
310 | mark_buffer_dirty(bh); | 311 | mark_buffer_dirty(bh); |
@@ -317,7 +318,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int | |||
317 | return -ENOSPC; | 318 | return -ENOSPC; |
318 | } | 319 | } |
319 | 320 | ||
320 | static inline int bfs_namecmp(int len, const char * name, const char * buffer) | 321 | static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer) |
321 | { | 322 | { |
322 | if (len < BFS_NAMELEN && buffer[len]) | 323 | if (len < BFS_NAMELEN && buffer[len]) |
323 | return 0; | 324 | return 0; |
@@ -325,7 +326,7 @@ static inline int bfs_namecmp(int len, const char * name, const char * buffer) | |||
325 | } | 326 | } |
326 | 327 | ||
327 | static struct buffer_head * bfs_find_entry(struct inode * dir, | 328 | static struct buffer_head * bfs_find_entry(struct inode * dir, |
328 | const char * name, int namelen, struct bfs_dirent ** res_dir) | 329 | const unsigned char * name, int namelen, struct bfs_dirent ** res_dir) |
329 | { | 330 | { |
330 | unsigned long block, offset; | 331 | unsigned long block, offset; |
331 | struct buffer_head * bh; | 332 | struct buffer_head * bh; |
@@ -346,7 +347,7 @@ static struct buffer_head * bfs_find_entry(struct inode * dir, | |||
346 | } | 347 | } |
347 | de = (struct bfs_dirent *)(bh->b_data + offset); | 348 | de = (struct bfs_dirent *)(bh->b_data + offset); |
348 | offset += BFS_DIRENT_SIZE; | 349 | offset += BFS_DIRENT_SIZE; |
349 | if (de->ino && bfs_namecmp(namelen, name, de->name)) { | 350 | if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) { |
350 | *res_dir = de; | 351 | *res_dir = de; |
351 | return bh; | 352 | return bh; |
352 | } | 353 | } |
diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 747fd1ea55e0..807723b65daf 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c | |||
@@ -40,8 +40,8 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo | |||
40 | return 0; | 40 | return 0; |
41 | } | 41 | } |
42 | 42 | ||
43 | static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned long end, | 43 | static int bfs_move_blocks(struct super_block *sb, unsigned long start, |
44 | unsigned long where) | 44 | unsigned long end, unsigned long where) |
45 | { | 45 | { |
46 | unsigned long i; | 46 | unsigned long i; |
47 | 47 | ||
@@ -57,20 +57,21 @@ static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned | |||
57 | static int bfs_get_block(struct inode * inode, sector_t block, | 57 | static int bfs_get_block(struct inode * inode, sector_t block, |
58 | struct buffer_head * bh_result, int create) | 58 | struct buffer_head * bh_result, int create) |
59 | { | 59 | { |
60 | long phys; | 60 | unsigned long phys; |
61 | int err; | 61 | int err; |
62 | struct super_block *sb = inode->i_sb; | 62 | struct super_block *sb = inode->i_sb; |
63 | struct bfs_sb_info *info = BFS_SB(sb); | 63 | struct bfs_sb_info *info = BFS_SB(sb); |
64 | struct bfs_inode_info *bi = BFS_I(inode); | 64 | struct bfs_inode_info *bi = BFS_I(inode); |
65 | struct buffer_head *sbh = info->si_sbh; | 65 | struct buffer_head *sbh = info->si_sbh; |
66 | 66 | ||
67 | if (block < 0 || block > info->si_blocks) | 67 | if (block > info->si_blocks) |
68 | return -EIO; | 68 | return -EIO; |
69 | 69 | ||
70 | phys = bi->i_sblock + block; | 70 | phys = bi->i_sblock + block; |
71 | if (!create) { | 71 | if (!create) { |
72 | if (phys <= bi->i_eblock) { | 72 | if (phys <= bi->i_eblock) { |
73 | dprintf("c=%d, b=%08lx, phys=%08lx (granted)\n", create, block, phys); | 73 | dprintf("c=%d, b=%08lx, phys=%09lx (granted)\n", |
74 | create, (unsigned long)block, phys); | ||
74 | map_bh(bh_result, sb, phys); | 75 | map_bh(bh_result, sb, phys); |
75 | } | 76 | } |
76 | return 0; | 77 | return 0; |
@@ -80,7 +81,7 @@ static int bfs_get_block(struct inode * inode, sector_t block, | |||
80 | of blocks allocated for this file, we can grant it */ | 81 | of blocks allocated for this file, we can grant it */ |
81 | if (inode->i_size && phys <= bi->i_eblock) { | 82 | if (inode->i_size && phys <= bi->i_eblock) { |
82 | dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n", | 83 | dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n", |
83 | create, block, phys); | 84 | create, (unsigned long)block, phys); |
84 | map_bh(bh_result, sb, phys); | 85 | map_bh(bh_result, sb, phys); |
85 | return 0; | 86 | return 0; |
86 | } | 87 | } |
@@ -88,11 +89,12 @@ static int bfs_get_block(struct inode * inode, sector_t block, | |||
88 | /* the rest has to be protected against itself */ | 89 | /* the rest has to be protected against itself */ |
89 | lock_kernel(); | 90 | lock_kernel(); |
90 | 91 | ||
91 | /* if the last data block for this file is the last allocated block, we can | 92 | /* if the last data block for this file is the last allocated |
92 | extend the file trivially, without moving it anywhere */ | 93 | block, we can extend the file trivially, without moving it |
94 | anywhere */ | ||
93 | if (bi->i_eblock == info->si_lf_eblk) { | 95 | if (bi->i_eblock == info->si_lf_eblk) { |
94 | dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n", | 96 | dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n", |
95 | create, block, phys); | 97 | create, (unsigned long)block, phys); |
96 | map_bh(bh_result, sb, phys); | 98 | map_bh(bh_result, sb, phys); |
97 | info->si_freeb -= phys - bi->i_eblock; | 99 | info->si_freeb -= phys - bi->i_eblock; |
98 | info->si_lf_eblk = bi->i_eblock = phys; | 100 | info->si_lf_eblk = bi->i_eblock = phys; |
@@ -114,7 +116,8 @@ static int bfs_get_block(struct inode * inode, sector_t block, | |||
114 | } else | 116 | } else |
115 | err = 0; | 117 | err = 0; |
116 | 118 | ||
117 | dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", create, block, phys); | 119 | dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", |
120 | create, (unsigned long)block, phys); | ||
118 | bi->i_sblock = phys; | 121 | bi->i_sblock = phys; |
119 | phys += block; | 122 | phys += block; |
120 | info->si_lf_eblk = bi->i_eblock = phys; | 123 | info->si_lf_eblk = bi->i_eblock = phys; |
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 64e0fb33fc0c..c7b39aa279d7 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
@@ -3,6 +3,8 @@ | |||
3 | * BFS superblock and inode operations. | 3 | * BFS superblock and inode operations. |
4 | * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> | 4 | * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> |
5 | * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. | 5 | * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. |
6 | * | ||
7 | * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005. | ||
6 | */ | 8 | */ |
7 | 9 | ||
8 | #include <linux/module.h> | 10 | #include <linux/module.h> |
@@ -54,46 +56,50 @@ static void bfs_read_inode(struct inode * inode) | |||
54 | off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; | 56 | off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; |
55 | di = (struct bfs_inode *)bh->b_data + off; | 57 | di = (struct bfs_inode *)bh->b_data + off; |
56 | 58 | ||
57 | inode->i_mode = 0x0000FFFF & di->i_mode; | 59 | inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode); |
58 | if (di->i_vtype == BFS_VDIR) { | 60 | if (le32_to_cpu(di->i_vtype) == BFS_VDIR) { |
59 | inode->i_mode |= S_IFDIR; | 61 | inode->i_mode |= S_IFDIR; |
60 | inode->i_op = &bfs_dir_inops; | 62 | inode->i_op = &bfs_dir_inops; |
61 | inode->i_fop = &bfs_dir_operations; | 63 | inode->i_fop = &bfs_dir_operations; |
62 | } else if (di->i_vtype == BFS_VREG) { | 64 | } else if (le32_to_cpu(di->i_vtype) == BFS_VREG) { |
63 | inode->i_mode |= S_IFREG; | 65 | inode->i_mode |= S_IFREG; |
64 | inode->i_op = &bfs_file_inops; | 66 | inode->i_op = &bfs_file_inops; |
65 | inode->i_fop = &bfs_file_operations; | 67 | inode->i_fop = &bfs_file_operations; |
66 | inode->i_mapping->a_ops = &bfs_aops; | 68 | inode->i_mapping->a_ops = &bfs_aops; |
67 | } | 69 | } |
68 | 70 | ||
69 | inode->i_uid = di->i_uid; | 71 | BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); |
70 | inode->i_gid = di->i_gid; | 72 | BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock); |
71 | inode->i_nlink = di->i_nlink; | 73 | inode->i_uid = le32_to_cpu(di->i_uid); |
74 | inode->i_gid = le32_to_cpu(di->i_gid); | ||
75 | inode->i_nlink = le32_to_cpu(di->i_nlink); | ||
72 | inode->i_size = BFS_FILESIZE(di); | 76 | inode->i_size = BFS_FILESIZE(di); |
73 | inode->i_blocks = BFS_FILEBLOCKS(di); | 77 | inode->i_blocks = BFS_FILEBLOCKS(di); |
78 | if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks); | ||
74 | inode->i_blksize = PAGE_SIZE; | 79 | inode->i_blksize = PAGE_SIZE; |
75 | inode->i_atime.tv_sec = di->i_atime; | 80 | inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); |
76 | inode->i_mtime.tv_sec = di->i_mtime; | 81 | inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime); |
77 | inode->i_ctime.tv_sec = di->i_ctime; | 82 | inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime); |
78 | inode->i_atime.tv_nsec = 0; | 83 | inode->i_atime.tv_nsec = 0; |
79 | inode->i_mtime.tv_nsec = 0; | 84 | inode->i_mtime.tv_nsec = 0; |
80 | inode->i_ctime.tv_nsec = 0; | 85 | inode->i_ctime.tv_nsec = 0; |
81 | BFS_I(inode)->i_dsk_ino = di->i_ino; /* can be 0 so we store a copy */ | 86 | BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */ |
82 | BFS_I(inode)->i_sblock = di->i_sblock; | ||
83 | BFS_I(inode)->i_eblock = di->i_eblock; | ||
84 | 87 | ||
85 | brelse(bh); | 88 | brelse(bh); |
86 | } | 89 | } |
87 | 90 | ||
88 | static int bfs_write_inode(struct inode * inode, int unused) | 91 | static int bfs_write_inode(struct inode * inode, int unused) |
89 | { | 92 | { |
90 | unsigned long ino = inode->i_ino; | 93 | unsigned int ino = (u16)inode->i_ino; |
94 | unsigned long i_sblock; | ||
91 | struct bfs_inode * di; | 95 | struct bfs_inode * di; |
92 | struct buffer_head * bh; | 96 | struct buffer_head * bh; |
93 | int block, off; | 97 | int block, off; |
94 | 98 | ||
99 | dprintf("ino=%08x\n", ino); | ||
100 | |||
95 | if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { | 101 | if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { |
96 | printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino); | 102 | printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino); |
97 | return -EIO; | 103 | return -EIO; |
98 | } | 104 | } |
99 | 105 | ||
@@ -101,7 +107,7 @@ static int bfs_write_inode(struct inode * inode, int unused) | |||
101 | block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; | 107 | block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; |
102 | bh = sb_bread(inode->i_sb, block); | 108 | bh = sb_bread(inode->i_sb, block); |
103 | if (!bh) { | 109 | if (!bh) { |
104 | printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); | 110 | printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino); |
105 | unlock_kernel(); | 111 | unlock_kernel(); |
106 | return -EIO; | 112 | return -EIO; |
107 | } | 113 | } |
@@ -109,24 +115,26 @@ static int bfs_write_inode(struct inode * inode, int unused) | |||
109 | off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; | 115 | off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; |
110 | di = (struct bfs_inode *)bh->b_data + off; | 116 | di = (struct bfs_inode *)bh->b_data + off; |
111 | 117 | ||
112 | if (inode->i_ino == BFS_ROOT_INO) | 118 | if (ino == BFS_ROOT_INO) |
113 | di->i_vtype = BFS_VDIR; | 119 | di->i_vtype = cpu_to_le32(BFS_VDIR); |
114 | else | 120 | else |
115 | di->i_vtype = BFS_VREG; | 121 | di->i_vtype = cpu_to_le32(BFS_VREG); |
116 | 122 | ||
117 | di->i_ino = inode->i_ino; | 123 | di->i_ino = cpu_to_le16(ino); |
118 | di->i_mode = inode->i_mode; | 124 | di->i_mode = cpu_to_le32(inode->i_mode); |
119 | di->i_uid = inode->i_uid; | 125 | di->i_uid = cpu_to_le32(inode->i_uid); |
120 | di->i_gid = inode->i_gid; | 126 | di->i_gid = cpu_to_le32(inode->i_gid); |
121 | di->i_nlink = inode->i_nlink; | 127 | di->i_nlink = cpu_to_le32(inode->i_nlink); |
122 | di->i_atime = inode->i_atime.tv_sec; | 128 | di->i_atime = cpu_to_le32(inode->i_atime.tv_sec); |
123 | di->i_mtime = inode->i_mtime.tv_sec; | 129 | di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); |
124 | di->i_ctime = inode->i_ctime.tv_sec; | 130 | di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); |
125 | di->i_sblock = BFS_I(inode)->i_sblock; | 131 | i_sblock = BFS_I(inode)->i_sblock; |
126 | di->i_eblock = BFS_I(inode)->i_eblock; | 132 | di->i_sblock = cpu_to_le32(i_sblock); |
127 | di->i_eoffset = di->i_sblock * BFS_BSIZE + inode->i_size - 1; | 133 | di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock); |
134 | di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); | ||
128 | 135 | ||
129 | mark_buffer_dirty(bh); | 136 | mark_buffer_dirty(bh); |
137 | dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off); | ||
130 | brelse(bh); | 138 | brelse(bh); |
131 | unlock_kernel(); | 139 | unlock_kernel(); |
132 | return 0; | 140 | return 0; |
@@ -140,11 +148,14 @@ static void bfs_delete_inode(struct inode * inode) | |||
140 | int block, off; | 148 | int block, off; |
141 | struct super_block * s = inode->i_sb; | 149 | struct super_block * s = inode->i_sb; |
142 | struct bfs_sb_info * info = BFS_SB(s); | 150 | struct bfs_sb_info * info = BFS_SB(s); |
151 | struct bfs_inode_info * bi = BFS_I(inode); | ||
143 | 152 | ||
144 | dprintf("ino=%08lx\n", inode->i_ino); | 153 | dprintf("ino=%08lx\n", ino); |
145 | 154 | ||
146 | if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > info->si_lasti) { | 155 | truncate_inode_pages(&inode->i_data, 0); |
147 | printf("invalid ino=%08lx\n", inode->i_ino); | 156 | |
157 | if (ino < BFS_ROOT_INO || ino > info->si_lasti) { | ||
158 | printf("invalid ino=%08lx\n", ino); | ||
148 | return; | 159 | return; |
149 | } | 160 | } |
150 | 161 | ||
@@ -160,13 +171,13 @@ static void bfs_delete_inode(struct inode * inode) | |||
160 | return; | 171 | return; |
161 | } | 172 | } |
162 | off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; | 173 | off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; |
163 | di = (struct bfs_inode *)bh->b_data + off; | 174 | di = (struct bfs_inode *) bh->b_data + off; |
164 | if (di->i_ino) { | 175 | if (bi->i_dsk_ino) { |
165 | info->si_freeb += BFS_FILEBLOCKS(di); | 176 | info->si_freeb += 1 + bi->i_eblock - bi->i_sblock; |
166 | info->si_freei++; | 177 | info->si_freei++; |
167 | clear_bit(di->i_ino, info->si_imap); | 178 | clear_bit(ino, info->si_imap); |
168 | dump_imap("delete_inode", s); | 179 | dump_imap("delete_inode", s); |
169 | } | 180 | } |
170 | di->i_ino = 0; | 181 | di->i_ino = 0; |
171 | di->i_sblock = 0; | 182 | di->i_sblock = 0; |
172 | mark_buffer_dirty(bh); | 183 | mark_buffer_dirty(bh); |
@@ -272,14 +283,14 @@ static struct super_operations bfs_sops = { | |||
272 | 283 | ||
273 | void dump_imap(const char *prefix, struct super_block * s) | 284 | void dump_imap(const char *prefix, struct super_block * s) |
274 | { | 285 | { |
275 | #if 0 | 286 | #ifdef DEBUG |
276 | int i; | 287 | int i; |
277 | char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL); | 288 | char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL); |
278 | 289 | ||
279 | if (!tmpbuf) | 290 | if (!tmpbuf) |
280 | return; | 291 | return; |
281 | for (i=BFS_SB(s)->si_lasti; i>=0; i--) { | 292 | for (i=BFS_SB(s)->si_lasti; i>=0; i--) { |
282 | if (i>PAGE_SIZE-100) break; | 293 | if (i > PAGE_SIZE-100) break; |
283 | if (test_bit(i, BFS_SB(s)->si_imap)) | 294 | if (test_bit(i, BFS_SB(s)->si_imap)) |
284 | strcat(tmpbuf, "1"); | 295 | strcat(tmpbuf, "1"); |
285 | else | 296 | else |
@@ -295,7 +306,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) | |||
295 | struct buffer_head * bh; | 306 | struct buffer_head * bh; |
296 | struct bfs_super_block * bfs_sb; | 307 | struct bfs_super_block * bfs_sb; |
297 | struct inode * inode; | 308 | struct inode * inode; |
298 | int i, imap_len; | 309 | unsigned i, imap_len; |
299 | struct bfs_sb_info * info; | 310 | struct bfs_sb_info * info; |
300 | 311 | ||
301 | info = kmalloc(sizeof(*info), GFP_KERNEL); | 312 | info = kmalloc(sizeof(*info), GFP_KERNEL); |
@@ -310,19 +321,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) | |||
310 | if(!bh) | 321 | if(!bh) |
311 | goto out; | 322 | goto out; |
312 | bfs_sb = (struct bfs_super_block *)bh->b_data; | 323 | bfs_sb = (struct bfs_super_block *)bh->b_data; |
313 | if (bfs_sb->s_magic != BFS_MAGIC) { | 324 | if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { |
314 | if (!silent) | 325 | if (!silent) |
315 | printf("No BFS filesystem on %s (magic=%08x)\n", | 326 | printf("No BFS filesystem on %s (magic=%08x)\n", |
316 | s->s_id, bfs_sb->s_magic); | 327 | s->s_id, le32_to_cpu(bfs_sb->s_magic)); |
317 | goto out; | 328 | goto out; |
318 | } | 329 | } |
319 | if (BFS_UNCLEAN(bfs_sb, s) && !silent) | 330 | if (BFS_UNCLEAN(bfs_sb, s) && !silent) |
320 | printf("%s is unclean, continuing\n", s->s_id); | 331 | printf("%s is unclean, continuing\n", s->s_id); |
321 | 332 | ||
322 | s->s_magic = BFS_MAGIC; | 333 | s->s_magic = BFS_MAGIC; |
323 | info->si_bfs_sb = bfs_sb; | ||
324 | info->si_sbh = bh; | 334 | info->si_sbh = bh; |
325 | info->si_lasti = (bfs_sb->s_start - BFS_BSIZE)/sizeof(struct bfs_inode) | 335 | info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode) |
326 | + BFS_ROOT_INO - 1; | 336 | + BFS_ROOT_INO - 1; |
327 | 337 | ||
328 | imap_len = info->si_lasti/8 + 1; | 338 | imap_len = info->si_lasti/8 + 1; |
@@ -346,8 +356,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) | |||
346 | goto out; | 356 | goto out; |
347 | } | 357 | } |
348 | 358 | ||
349 | info->si_blocks = (bfs_sb->s_end + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ | 359 | info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ |
350 | info->si_freeb = (bfs_sb->s_end + 1 - bfs_sb->s_start)>>BFS_BSIZE_BITS; | 360 | info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS; |
351 | info->si_freei = 0; | 361 | info->si_freei = 0; |
352 | info->si_lf_eblk = 0; | 362 | info->si_lf_eblk = 0; |
353 | info->si_lf_sblk = 0; | 363 | info->si_lf_sblk = 0; |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index c8998dc66882..7974efa107bc 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -520,7 +520,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
520 | DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); | 520 | DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); |
521 | 521 | ||
522 | down_write(¤t->mm->mmap_sem); | 522 | down_write(¤t->mm->mmap_sem); |
523 | textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_SHARED, 0); | 523 | textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0); |
524 | up_write(¤t->mm->mmap_sem); | 524 | up_write(¤t->mm->mmap_sem); |
525 | if (!textpos || textpos >= (unsigned long) -4096) { | 525 | if (!textpos || textpos >= (unsigned long) -4096) { |
526 | if (!textpos) | 526 | if (!textpos) |
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <linux/mempool.h> | 26 | #include <linux/mempool.h> |
27 | #include <linux/workqueue.h> | 27 | #include <linux/workqueue.h> |
28 | #include <scsi/sg.h> /* for struct sg_iovec */ | ||
28 | 29 | ||
29 | #define BIO_POOL_SIZE 256 | 30 | #define BIO_POOL_SIZE 256 |
30 | 31 | ||
@@ -104,18 +105,22 @@ static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int | |||
104 | return bvl; | 105 | return bvl; |
105 | } | 106 | } |
106 | 107 | ||
107 | /* | 108 | void bio_free(struct bio *bio, struct bio_set *bio_set) |
108 | * default destructor for a bio allocated with bio_alloc_bioset() | ||
109 | */ | ||
110 | static void bio_destructor(struct bio *bio) | ||
111 | { | 109 | { |
112 | const int pool_idx = BIO_POOL_IDX(bio); | 110 | const int pool_idx = BIO_POOL_IDX(bio); |
113 | struct bio_set *bs = bio->bi_set; | ||
114 | 111 | ||
115 | BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); | 112 | BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); |
116 | 113 | ||
117 | mempool_free(bio->bi_io_vec, bs->bvec_pools[pool_idx]); | 114 | mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); |
118 | mempool_free(bio, bs->bio_pool); | 115 | mempool_free(bio, bio_set->bio_pool); |
116 | } | ||
117 | |||
118 | /* | ||
119 | * default destructor for a bio allocated with bio_alloc_bioset() | ||
120 | */ | ||
121 | static void bio_fs_destructor(struct bio *bio) | ||
122 | { | ||
123 | bio_free(bio, fs_bio_set); | ||
119 | } | 124 | } |
120 | 125 | ||
121 | inline void bio_init(struct bio *bio) | 126 | inline void bio_init(struct bio *bio) |
@@ -171,8 +176,6 @@ struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, stru | |||
171 | bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; | 176 | bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; |
172 | } | 177 | } |
173 | bio->bi_io_vec = bvl; | 178 | bio->bi_io_vec = bvl; |
174 | bio->bi_destructor = bio_destructor; | ||
175 | bio->bi_set = bs; | ||
176 | } | 179 | } |
177 | out: | 180 | out: |
178 | return bio; | 181 | return bio; |
@@ -180,7 +183,12 @@ out: | |||
180 | 183 | ||
181 | struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) | 184 | struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) |
182 | { | 185 | { |
183 | return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | 186 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); |
187 | |||
188 | if (bio) | ||
189 | bio->bi_destructor = bio_fs_destructor; | ||
190 | |||
191 | return bio; | ||
184 | } | 192 | } |
185 | 193 | ||
186 | void zero_fill_bio(struct bio *bio) | 194 | void zero_fill_bio(struct bio *bio) |
@@ -273,8 +281,10 @@ struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask) | |||
273 | { | 281 | { |
274 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); | 282 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); |
275 | 283 | ||
276 | if (b) | 284 | if (b) { |
285 | b->bi_destructor = bio_fs_destructor; | ||
277 | __bio_clone(b, bio); | 286 | __bio_clone(b, bio); |
287 | } | ||
278 | 288 | ||
279 | return b; | 289 | return b; |
280 | } | 290 | } |
@@ -546,22 +556,34 @@ out_bmd: | |||
546 | return ERR_PTR(ret); | 556 | return ERR_PTR(ret); |
547 | } | 557 | } |
548 | 558 | ||
549 | static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, | 559 | static struct bio *__bio_map_user_iov(request_queue_t *q, |
550 | unsigned long uaddr, unsigned int len, | 560 | struct block_device *bdev, |
551 | int write_to_vm) | 561 | struct sg_iovec *iov, int iov_count, |
562 | int write_to_vm) | ||
552 | { | 563 | { |
553 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 564 | int i, j; |
554 | unsigned long start = uaddr >> PAGE_SHIFT; | 565 | int nr_pages = 0; |
555 | const int nr_pages = end - start; | ||
556 | int ret, offset, i; | ||
557 | struct page **pages; | 566 | struct page **pages; |
558 | struct bio *bio; | 567 | struct bio *bio; |
568 | int cur_page = 0; | ||
569 | int ret, offset; | ||
559 | 570 | ||
560 | /* | 571 | for (i = 0; i < iov_count; i++) { |
561 | * transfer and buffer must be aligned to at least hardsector | 572 | unsigned long uaddr = (unsigned long)iov[i].iov_base; |
562 | * size for now, in the future we can relax this restriction | 573 | unsigned long len = iov[i].iov_len; |
563 | */ | 574 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
564 | if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) | 575 | unsigned long start = uaddr >> PAGE_SHIFT; |
576 | |||
577 | nr_pages += end - start; | ||
578 | /* | ||
579 | * transfer and buffer must be aligned to at least hardsector | ||
580 | * size for now, in the future we can relax this restriction | ||
581 | */ | ||
582 | if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) | ||
583 | return ERR_PTR(-EINVAL); | ||
584 | } | ||
585 | |||
586 | if (!nr_pages) | ||
565 | return ERR_PTR(-EINVAL); | 587 | return ERR_PTR(-EINVAL); |
566 | 588 | ||
567 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 589 | bio = bio_alloc(GFP_KERNEL, nr_pages); |
@@ -573,42 +595,54 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, | |||
573 | if (!pages) | 595 | if (!pages) |
574 | goto out; | 596 | goto out; |
575 | 597 | ||
576 | down_read(¤t->mm->mmap_sem); | 598 | memset(pages, 0, nr_pages * sizeof(struct page *)); |
577 | ret = get_user_pages(current, current->mm, uaddr, nr_pages, | 599 | |
578 | write_to_vm, 0, pages, NULL); | 600 | for (i = 0; i < iov_count; i++) { |
579 | up_read(¤t->mm->mmap_sem); | 601 | unsigned long uaddr = (unsigned long)iov[i].iov_base; |
580 | 602 | unsigned long len = iov[i].iov_len; | |
581 | if (ret < nr_pages) | 603 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
582 | goto out; | 604 | unsigned long start = uaddr >> PAGE_SHIFT; |
583 | 605 | const int local_nr_pages = end - start; | |
584 | bio->bi_bdev = bdev; | 606 | const int page_limit = cur_page + local_nr_pages; |
585 | 607 | ||
586 | offset = uaddr & ~PAGE_MASK; | 608 | down_read(¤t->mm->mmap_sem); |
587 | for (i = 0; i < nr_pages; i++) { | 609 | ret = get_user_pages(current, current->mm, uaddr, |
588 | unsigned int bytes = PAGE_SIZE - offset; | 610 | local_nr_pages, |
589 | 611 | write_to_vm, 0, &pages[cur_page], NULL); | |
590 | if (len <= 0) | 612 | up_read(¤t->mm->mmap_sem); |
591 | break; | 613 | |
592 | 614 | if (ret < local_nr_pages) | |
593 | if (bytes > len) | 615 | goto out_unmap; |
594 | bytes = len; | 616 | |
617 | |||
618 | offset = uaddr & ~PAGE_MASK; | ||
619 | for (j = cur_page; j < page_limit; j++) { | ||
620 | unsigned int bytes = PAGE_SIZE - offset; | ||
621 | |||
622 | if (len <= 0) | ||
623 | break; | ||
624 | |||
625 | if (bytes > len) | ||
626 | bytes = len; | ||
627 | |||
628 | /* | ||
629 | * sorry... | ||
630 | */ | ||
631 | if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes) | ||
632 | break; | ||
633 | |||
634 | len -= bytes; | ||
635 | offset = 0; | ||
636 | } | ||
595 | 637 | ||
638 | cur_page = j; | ||
596 | /* | 639 | /* |
597 | * sorry... | 640 | * release the pages we didn't map into the bio, if any |
598 | */ | 641 | */ |
599 | if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes) | 642 | while (j < page_limit) |
600 | break; | 643 | page_cache_release(pages[j++]); |
601 | |||
602 | len -= bytes; | ||
603 | offset = 0; | ||
604 | } | 644 | } |
605 | 645 | ||
606 | /* | ||
607 | * release the pages we didn't map into the bio, if any | ||
608 | */ | ||
609 | while (i < nr_pages) | ||
610 | page_cache_release(pages[i++]); | ||
611 | |||
612 | kfree(pages); | 646 | kfree(pages); |
613 | 647 | ||
614 | /* | 648 | /* |
@@ -617,9 +651,17 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, | |||
617 | if (!write_to_vm) | 651 | if (!write_to_vm) |
618 | bio->bi_rw |= (1 << BIO_RW); | 652 | bio->bi_rw |= (1 << BIO_RW); |
619 | 653 | ||
654 | bio->bi_bdev = bdev; | ||
620 | bio->bi_flags |= (1 << BIO_USER_MAPPED); | 655 | bio->bi_flags |= (1 << BIO_USER_MAPPED); |
621 | return bio; | 656 | return bio; |
622 | out: | 657 | |
658 | out_unmap: | ||
659 | for (i = 0; i < nr_pages; i++) { | ||
660 | if(!pages[i]) | ||
661 | break; | ||
662 | page_cache_release(pages[i]); | ||
663 | } | ||
664 | out: | ||
623 | kfree(pages); | 665 | kfree(pages); |
624 | bio_put(bio); | 666 | bio_put(bio); |
625 | return ERR_PTR(ret); | 667 | return ERR_PTR(ret); |
@@ -639,9 +681,33 @@ out: | |||
639 | struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, | 681 | struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, |
640 | unsigned long uaddr, unsigned int len, int write_to_vm) | 682 | unsigned long uaddr, unsigned int len, int write_to_vm) |
641 | { | 683 | { |
684 | struct sg_iovec iov; | ||
685 | |||
686 | iov.iov_base = (void __user *)uaddr; | ||
687 | iov.iov_len = len; | ||
688 | |||
689 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); | ||
690 | } | ||
691 | |||
692 | /** | ||
693 | * bio_map_user_iov - map user sg_iovec table into bio | ||
694 | * @q: the request_queue_t for the bio | ||
695 | * @bdev: destination block device | ||
696 | * @iov: the iovec. | ||
697 | * @iov_count: number of elements in the iovec | ||
698 | * @write_to_vm: bool indicating writing to pages or not | ||
699 | * | ||
700 | * Map the user space address into a bio suitable for io to a block | ||
701 | * device. Returns an error pointer in case of error. | ||
702 | */ | ||
703 | struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev, | ||
704 | struct sg_iovec *iov, int iov_count, | ||
705 | int write_to_vm) | ||
706 | { | ||
642 | struct bio *bio; | 707 | struct bio *bio; |
708 | int len = 0, i; | ||
643 | 709 | ||
644 | bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm); | 710 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); |
645 | 711 | ||
646 | if (IS_ERR(bio)) | 712 | if (IS_ERR(bio)) |
647 | return bio; | 713 | return bio; |
@@ -654,6 +720,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, | |||
654 | */ | 720 | */ |
655 | bio_get(bio); | 721 | bio_get(bio); |
656 | 722 | ||
723 | for (i = 0; i < iov_count; i++) | ||
724 | len += iov[i].iov_len; | ||
725 | |||
657 | if (bio->bi_size == len) | 726 | if (bio->bi_size == len) |
658 | return bio; | 727 | return bio; |
659 | 728 | ||
@@ -698,6 +767,82 @@ void bio_unmap_user(struct bio *bio) | |||
698 | bio_put(bio); | 767 | bio_put(bio); |
699 | } | 768 | } |
700 | 769 | ||
770 | static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err) | ||
771 | { | ||
772 | if (bio->bi_size) | ||
773 | return 1; | ||
774 | |||
775 | bio_put(bio); | ||
776 | return 0; | ||
777 | } | ||
778 | |||
779 | |||
780 | static struct bio *__bio_map_kern(request_queue_t *q, void *data, | ||
781 | unsigned int len, unsigned int gfp_mask) | ||
782 | { | ||
783 | unsigned long kaddr = (unsigned long)data; | ||
784 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
785 | unsigned long start = kaddr >> PAGE_SHIFT; | ||
786 | const int nr_pages = end - start; | ||
787 | int offset, i; | ||
788 | struct bio *bio; | ||
789 | |||
790 | bio = bio_alloc(gfp_mask, nr_pages); | ||
791 | if (!bio) | ||
792 | return ERR_PTR(-ENOMEM); | ||
793 | |||
794 | offset = offset_in_page(kaddr); | ||
795 | for (i = 0; i < nr_pages; i++) { | ||
796 | unsigned int bytes = PAGE_SIZE - offset; | ||
797 | |||
798 | if (len <= 0) | ||
799 | break; | ||
800 | |||
801 | if (bytes > len) | ||
802 | bytes = len; | ||
803 | |||
804 | if (__bio_add_page(q, bio, virt_to_page(data), bytes, | ||
805 | offset) < bytes) | ||
806 | break; | ||
807 | |||
808 | data += bytes; | ||
809 | len -= bytes; | ||
810 | offset = 0; | ||
811 | } | ||
812 | |||
813 | bio->bi_end_io = bio_map_kern_endio; | ||
814 | return bio; | ||
815 | } | ||
816 | |||
817 | /** | ||
818 | * bio_map_kern - map kernel address into bio | ||
819 | * @q: the request_queue_t for the bio | ||
820 | * @data: pointer to buffer to map | ||
821 | * @len: length in bytes | ||
822 | * @gfp_mask: allocation flags for bio allocation | ||
823 | * | ||
824 | * Map the kernel address into a bio suitable for io to a block | ||
825 | * device. Returns an error pointer in case of error. | ||
826 | */ | ||
827 | struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len, | ||
828 | unsigned int gfp_mask) | ||
829 | { | ||
830 | struct bio *bio; | ||
831 | |||
832 | bio = __bio_map_kern(q, data, len, gfp_mask); | ||
833 | if (IS_ERR(bio)) | ||
834 | return bio; | ||
835 | |||
836 | if (bio->bi_size == len) | ||
837 | return bio; | ||
838 | |||
839 | /* | ||
840 | * Don't support partial mappings. | ||
841 | */ | ||
842 | bio_put(bio); | ||
843 | return ERR_PTR(-EINVAL); | ||
844 | } | ||
845 | |||
701 | /* | 846 | /* |
702 | * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions | 847 | * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions |
703 | * for performing direct-IO in BIOs. | 848 | * for performing direct-IO in BIOs. |
@@ -1075,6 +1220,7 @@ subsys_initcall(init_bio); | |||
1075 | 1220 | ||
1076 | EXPORT_SYMBOL(bio_alloc); | 1221 | EXPORT_SYMBOL(bio_alloc); |
1077 | EXPORT_SYMBOL(bio_put); | 1222 | EXPORT_SYMBOL(bio_put); |
1223 | EXPORT_SYMBOL(bio_free); | ||
1078 | EXPORT_SYMBOL(bio_endio); | 1224 | EXPORT_SYMBOL(bio_endio); |
1079 | EXPORT_SYMBOL(bio_init); | 1225 | EXPORT_SYMBOL(bio_init); |
1080 | EXPORT_SYMBOL(__bio_clone); | 1226 | EXPORT_SYMBOL(__bio_clone); |
@@ -1085,6 +1231,7 @@ EXPORT_SYMBOL(bio_add_page); | |||
1085 | EXPORT_SYMBOL(bio_get_nr_vecs); | 1231 | EXPORT_SYMBOL(bio_get_nr_vecs); |
1086 | EXPORT_SYMBOL(bio_map_user); | 1232 | EXPORT_SYMBOL(bio_map_user); |
1087 | EXPORT_SYMBOL(bio_unmap_user); | 1233 | EXPORT_SYMBOL(bio_unmap_user); |
1234 | EXPORT_SYMBOL(bio_map_kern); | ||
1088 | EXPORT_SYMBOL(bio_pair_release); | 1235 | EXPORT_SYMBOL(bio_pair_release); |
1089 | EXPORT_SYMBOL(bio_split); | 1236 | EXPORT_SYMBOL(bio_split); |
1090 | EXPORT_SYMBOL(bio_split_pool); | 1237 | EXPORT_SYMBOL(bio_split_pool); |
diff --git a/fs/buffer.c b/fs/buffer.c index 6a25d7df89b1..6cbfceabd95d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
41 | #include <linux/bitops.h> | 41 | #include <linux/bitops.h> |
42 | #include <linux/mpage.h> | 42 | #include <linux/mpage.h> |
43 | #include <linux/bit_spinlock.h> | ||
43 | 44 | ||
44 | static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); | 45 | static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); |
45 | static void invalidate_bh_lrus(void); | 46 | static void invalidate_bh_lrus(void); |
@@ -917,8 +918,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
917 | * contents - it is a noop if I/O is still in | 918 | * contents - it is a noop if I/O is still in |
918 | * flight on potentially older contents. | 919 | * flight on potentially older contents. |
919 | */ | 920 | */ |
920 | wait_on_buffer(bh); | 921 | ll_rw_block(SWRITE, 1, &bh); |
921 | ll_rw_block(WRITE, 1, &bh); | ||
922 | brelse(bh); | 922 | brelse(bh); |
923 | spin_lock(lock); | 923 | spin_lock(lock); |
924 | } | 924 | } |
@@ -2793,21 +2793,22 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
2793 | 2793 | ||
2794 | /** | 2794 | /** |
2795 | * ll_rw_block: low-level access to block devices (DEPRECATED) | 2795 | * ll_rw_block: low-level access to block devices (DEPRECATED) |
2796 | * @rw: whether to %READ or %WRITE or maybe %READA (readahead) | 2796 | * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) |
2797 | * @nr: number of &struct buffer_heads in the array | 2797 | * @nr: number of &struct buffer_heads in the array |
2798 | * @bhs: array of pointers to &struct buffer_head | 2798 | * @bhs: array of pointers to &struct buffer_head |
2799 | * | 2799 | * |
2800 | * ll_rw_block() takes an array of pointers to &struct buffer_heads, | 2800 | * ll_rw_block() takes an array of pointers to &struct buffer_heads, and |
2801 | * and requests an I/O operation on them, either a %READ or a %WRITE. | 2801 | * requests an I/O operation on them, either a %READ or a %WRITE. The third |
2802 | * The third %READA option is described in the documentation for | 2802 | * %SWRITE is like %WRITE only we make sure that the *current* data in buffers |
2803 | * generic_make_request() which ll_rw_block() calls. | 2803 | * are sent to disk. The fourth %READA option is described in the documentation |
2804 | * for generic_make_request() which ll_rw_block() calls. | ||
2804 | * | 2805 | * |
2805 | * This function drops any buffer that it cannot get a lock on (with the | 2806 | * This function drops any buffer that it cannot get a lock on (with the |
2806 | * BH_Lock state bit), any buffer that appears to be clean when doing a | 2807 | * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be |
2807 | * write request, and any buffer that appears to be up-to-date when doing | 2808 | * clean when doing a write request, and any buffer that appears to be |
2808 | * read request. Further it marks as clean buffers that are processed for | 2809 | * up-to-date when doing read request. Further it marks as clean buffers that |
2809 | * writing (the buffer cache won't assume that they are actually clean until | 2810 | * are processed for writing (the buffer cache won't assume that they are |
2810 | * the buffer gets unlocked). | 2811 | * actually clean until the buffer gets unlocked). |
2811 | * | 2812 | * |
2812 | * ll_rw_block sets b_end_io to simple completion handler that marks | 2813 | * ll_rw_block sets b_end_io to simple completion handler that marks |
2813 | * the buffer up-to-date (if approriate), unlocks the buffer and wakes | 2814 | * the buffer up-to-date (if approriate), unlocks the buffer and wakes |
@@ -2823,11 +2824,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
2823 | for (i = 0; i < nr; i++) { | 2824 | for (i = 0; i < nr; i++) { |
2824 | struct buffer_head *bh = bhs[i]; | 2825 | struct buffer_head *bh = bhs[i]; |
2825 | 2826 | ||
2826 | if (test_set_buffer_locked(bh)) | 2827 | if (rw == SWRITE) |
2828 | lock_buffer(bh); | ||
2829 | else if (test_set_buffer_locked(bh)) | ||
2827 | continue; | 2830 | continue; |
2828 | 2831 | ||
2829 | get_bh(bh); | 2832 | get_bh(bh); |
2830 | if (rw == WRITE) { | 2833 | if (rw == WRITE || rw == SWRITE) { |
2831 | if (test_clear_buffer_dirty(bh)) { | 2834 | if (test_clear_buffer_dirty(bh)) { |
2832 | bh->b_end_io = end_buffer_write_sync; | 2835 | bh->b_end_io = end_buffer_write_sync; |
2833 | submit_bh(WRITE, bh); | 2836 | submit_bh(WRITE, bh); |
@@ -3046,10 +3049,9 @@ struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags) | |||
3046 | { | 3049 | { |
3047 | struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); | 3050 | struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); |
3048 | if (ret) { | 3051 | if (ret) { |
3049 | preempt_disable(); | 3052 | get_cpu_var(bh_accounting).nr++; |
3050 | __get_cpu_var(bh_accounting).nr++; | ||
3051 | recalc_bh_state(); | 3053 | recalc_bh_state(); |
3052 | preempt_enable(); | 3054 | put_cpu_var(bh_accounting); |
3053 | } | 3055 | } |
3054 | return ret; | 3056 | return ret; |
3055 | } | 3057 | } |
@@ -3059,10 +3061,9 @@ void free_buffer_head(struct buffer_head *bh) | |||
3059 | { | 3061 | { |
3060 | BUG_ON(!list_empty(&bh->b_assoc_buffers)); | 3062 | BUG_ON(!list_empty(&bh->b_assoc_buffers)); |
3061 | kmem_cache_free(bh_cachep, bh); | 3063 | kmem_cache_free(bh_cachep, bh); |
3062 | preempt_disable(); | 3064 | get_cpu_var(bh_accounting).nr--; |
3063 | __get_cpu_var(bh_accounting).nr--; | ||
3064 | recalc_bh_state(); | 3065 | recalc_bh_state(); |
3065 | preempt_enable(); | 3066 | put_cpu_var(bh_accounting); |
3066 | } | 3067 | } |
3067 | EXPORT_SYMBOL(free_buffer_head); | 3068 | EXPORT_SYMBOL(free_buffer_head); |
3068 | 3069 | ||
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e568cc47a7f9..2335f14a1583 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -836,7 +836,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) | |||
836 | /* go from value to value + temp_len condensing | 836 | /* go from value to value + temp_len condensing |
837 | double commas to singles. Note that this ends up | 837 | double commas to singles. Note that this ends up |
838 | allocating a few bytes too many, which is ok */ | 838 | allocating a few bytes too many, which is ok */ |
839 | vol->password = kcalloc(1, temp_len, GFP_KERNEL); | 839 | vol->password = kzalloc(temp_len, GFP_KERNEL); |
840 | if(vol->password == NULL) { | 840 | if(vol->password == NULL) { |
841 | printk("CIFS: no memory for pass\n"); | 841 | printk("CIFS: no memory for pass\n"); |
842 | return 1; | 842 | return 1; |
@@ -851,7 +851,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) | |||
851 | } | 851 | } |
852 | vol->password[j] = 0; | 852 | vol->password[j] = 0; |
853 | } else { | 853 | } else { |
854 | vol->password = kcalloc(1, temp_len+1, GFP_KERNEL); | 854 | vol->password = kzalloc(temp_len+1, GFP_KERNEL); |
855 | if(vol->password == NULL) { | 855 | if(vol->password == NULL) { |
856 | printk("CIFS: no memory for pass\n"); | 856 | printk("CIFS: no memory for pass\n"); |
857 | return 1; | 857 | return 1; |
@@ -1317,7 +1317,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, | |||
1317 | sessinit is sent but no second negprot */ | 1317 | sessinit is sent but no second negprot */ |
1318 | struct rfc1002_session_packet * ses_init_buf; | 1318 | struct rfc1002_session_packet * ses_init_buf; |
1319 | struct smb_hdr * smb_buf; | 1319 | struct smb_hdr * smb_buf; |
1320 | ses_init_buf = kcalloc(1, sizeof(struct rfc1002_session_packet), GFP_KERNEL); | 1320 | ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet), GFP_KERNEL); |
1321 | if(ses_init_buf) { | 1321 | if(ses_init_buf) { |
1322 | ses_init_buf->trailer.session_req.called_len = 32; | 1322 | ses_init_buf->trailer.session_req.called_len = 32; |
1323 | rfc1002mangle(ses_init_buf->trailer.session_req.called_name, | 1323 | rfc1002mangle(ses_init_buf->trailer.session_req.called_name, |
@@ -1964,7 +1964,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
1964 | /* We look for obvious messed up bcc or strings in response so we do not go off | 1964 | /* We look for obvious messed up bcc or strings in response so we do not go off |
1965 | the end since (at least) WIN2K and Windows XP have a major bug in not null | 1965 | the end since (at least) WIN2K and Windows XP have a major bug in not null |
1966 | terminating last Unicode string in response */ | 1966 | terminating last Unicode string in response */ |
1967 | ses->serverOS = kcalloc(1, 2 * (len + 1), GFP_KERNEL); | 1967 | ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL); |
1968 | if(ses->serverOS == NULL) | 1968 | if(ses->serverOS == NULL) |
1969 | goto sesssetup_nomem; | 1969 | goto sesssetup_nomem; |
1970 | cifs_strfromUCS_le(ses->serverOS, | 1970 | cifs_strfromUCS_le(ses->serverOS, |
@@ -1976,7 +1976,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
1976 | if (remaining_words > 0) { | 1976 | if (remaining_words > 0) { |
1977 | len = UniStrnlen((wchar_t *)bcc_ptr, | 1977 | len = UniStrnlen((wchar_t *)bcc_ptr, |
1978 | remaining_words-1); | 1978 | remaining_words-1); |
1979 | ses->serverNOS = kcalloc(1, 2 * (len + 1),GFP_KERNEL); | 1979 | ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL); |
1980 | if(ses->serverNOS == NULL) | 1980 | if(ses->serverNOS == NULL) |
1981 | goto sesssetup_nomem; | 1981 | goto sesssetup_nomem; |
1982 | cifs_strfromUCS_le(ses->serverNOS, | 1982 | cifs_strfromUCS_le(ses->serverNOS, |
@@ -1994,7 +1994,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
1994 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); | 1994 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); |
1995 | /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ | 1995 | /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ |
1996 | ses->serverDomain = | 1996 | ses->serverDomain = |
1997 | kcalloc(1, 2*(len+1),GFP_KERNEL); | 1997 | kzalloc(2*(len+1),GFP_KERNEL); |
1998 | if(ses->serverDomain == NULL) | 1998 | if(ses->serverDomain == NULL) |
1999 | goto sesssetup_nomem; | 1999 | goto sesssetup_nomem; |
2000 | cifs_strfromUCS_le(ses->serverDomain, | 2000 | cifs_strfromUCS_le(ses->serverDomain, |
@@ -2005,22 +2005,22 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2005 | } /* else no more room so create dummy domain string */ | 2005 | } /* else no more room so create dummy domain string */ |
2006 | else | 2006 | else |
2007 | ses->serverDomain = | 2007 | ses->serverDomain = |
2008 | kcalloc(1, 2, GFP_KERNEL); | 2008 | kzalloc(2, GFP_KERNEL); |
2009 | } else { /* no room so create dummy domain and NOS string */ | 2009 | } else { /* no room so create dummy domain and NOS string */ |
2010 | /* if these kcallocs fail not much we | 2010 | /* if these kcallocs fail not much we |
2011 | can do, but better to not fail the | 2011 | can do, but better to not fail the |
2012 | sesssetup itself */ | 2012 | sesssetup itself */ |
2013 | ses->serverDomain = | 2013 | ses->serverDomain = |
2014 | kcalloc(1, 2, GFP_KERNEL); | 2014 | kzalloc(2, GFP_KERNEL); |
2015 | ses->serverNOS = | 2015 | ses->serverNOS = |
2016 | kcalloc(1, 2, GFP_KERNEL); | 2016 | kzalloc(2, GFP_KERNEL); |
2017 | } | 2017 | } |
2018 | } else { /* ASCII */ | 2018 | } else { /* ASCII */ |
2019 | len = strnlen(bcc_ptr, 1024); | 2019 | len = strnlen(bcc_ptr, 1024); |
2020 | if (((long) bcc_ptr + len) - (long) | 2020 | if (((long) bcc_ptr + len) - (long) |
2021 | pByteArea(smb_buffer_response) | 2021 | pByteArea(smb_buffer_response) |
2022 | <= BCC(smb_buffer_response)) { | 2022 | <= BCC(smb_buffer_response)) { |
2023 | ses->serverOS = kcalloc(1, len + 1,GFP_KERNEL); | 2023 | ses->serverOS = kzalloc(len + 1,GFP_KERNEL); |
2024 | if(ses->serverOS == NULL) | 2024 | if(ses->serverOS == NULL) |
2025 | goto sesssetup_nomem; | 2025 | goto sesssetup_nomem; |
2026 | strncpy(ses->serverOS,bcc_ptr, len); | 2026 | strncpy(ses->serverOS,bcc_ptr, len); |
@@ -2030,7 +2030,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2030 | bcc_ptr++; | 2030 | bcc_ptr++; |
2031 | 2031 | ||
2032 | len = strnlen(bcc_ptr, 1024); | 2032 | len = strnlen(bcc_ptr, 1024); |
2033 | ses->serverNOS = kcalloc(1, len + 1,GFP_KERNEL); | 2033 | ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); |
2034 | if(ses->serverNOS == NULL) | 2034 | if(ses->serverNOS == NULL) |
2035 | goto sesssetup_nomem; | 2035 | goto sesssetup_nomem; |
2036 | strncpy(ses->serverNOS, bcc_ptr, len); | 2036 | strncpy(ses->serverNOS, bcc_ptr, len); |
@@ -2039,7 +2039,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2039 | bcc_ptr++; | 2039 | bcc_ptr++; |
2040 | 2040 | ||
2041 | len = strnlen(bcc_ptr, 1024); | 2041 | len = strnlen(bcc_ptr, 1024); |
2042 | ses->serverDomain = kcalloc(1, len + 1,GFP_KERNEL); | 2042 | ses->serverDomain = kzalloc(len + 1,GFP_KERNEL); |
2043 | if(ses->serverDomain == NULL) | 2043 | if(ses->serverDomain == NULL) |
2044 | goto sesssetup_nomem; | 2044 | goto sesssetup_nomem; |
2045 | strncpy(ses->serverDomain, bcc_ptr, len); | 2045 | strncpy(ses->serverDomain, bcc_ptr, len); |
@@ -2240,7 +2240,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2240 | the end since (at least) WIN2K and Windows XP have a major bug in not null | 2240 | the end since (at least) WIN2K and Windows XP have a major bug in not null |
2241 | terminating last Unicode string in response */ | 2241 | terminating last Unicode string in response */ |
2242 | ses->serverOS = | 2242 | ses->serverOS = |
2243 | kcalloc(1, 2 * (len + 1), GFP_KERNEL); | 2243 | kzalloc(2 * (len + 1), GFP_KERNEL); |
2244 | cifs_strfromUCS_le(ses->serverOS, | 2244 | cifs_strfromUCS_le(ses->serverOS, |
2245 | (wchar_t *) | 2245 | (wchar_t *) |
2246 | bcc_ptr, len, | 2246 | bcc_ptr, len, |
@@ -2254,7 +2254,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2254 | remaining_words | 2254 | remaining_words |
2255 | - 1); | 2255 | - 1); |
2256 | ses->serverNOS = | 2256 | ses->serverNOS = |
2257 | kcalloc(1, 2 * (len + 1), | 2257 | kzalloc(2 * (len + 1), |
2258 | GFP_KERNEL); | 2258 | GFP_KERNEL); |
2259 | cifs_strfromUCS_le(ses->serverNOS, | 2259 | cifs_strfromUCS_le(ses->serverNOS, |
2260 | (wchar_t *)bcc_ptr, | 2260 | (wchar_t *)bcc_ptr, |
@@ -2267,7 +2267,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2267 | if (remaining_words > 0) { | 2267 | if (remaining_words > 0) { |
2268 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); | 2268 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); |
2269 | /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ | 2269 | /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ |
2270 | ses->serverDomain = kcalloc(1, 2*(len+1),GFP_KERNEL); | 2270 | ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL); |
2271 | cifs_strfromUCS_le(ses->serverDomain, | 2271 | cifs_strfromUCS_le(ses->serverDomain, |
2272 | (wchar_t *)bcc_ptr, | 2272 | (wchar_t *)bcc_ptr, |
2273 | len, | 2273 | len, |
@@ -2278,10 +2278,10 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2278 | } /* else no more room so create dummy domain string */ | 2278 | } /* else no more room so create dummy domain string */ |
2279 | else | 2279 | else |
2280 | ses->serverDomain = | 2280 | ses->serverDomain = |
2281 | kcalloc(1, 2,GFP_KERNEL); | 2281 | kzalloc(2,GFP_KERNEL); |
2282 | } else { /* no room so create dummy domain and NOS string */ | 2282 | } else { /* no room so create dummy domain and NOS string */ |
2283 | ses->serverDomain = kcalloc(1, 2, GFP_KERNEL); | 2283 | ses->serverDomain = kzalloc(2, GFP_KERNEL); |
2284 | ses->serverNOS = kcalloc(1, 2, GFP_KERNEL); | 2284 | ses->serverNOS = kzalloc(2, GFP_KERNEL); |
2285 | } | 2285 | } |
2286 | } else { /* ASCII */ | 2286 | } else { /* ASCII */ |
2287 | 2287 | ||
@@ -2289,7 +2289,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2289 | if (((long) bcc_ptr + len) - (long) | 2289 | if (((long) bcc_ptr + len) - (long) |
2290 | pByteArea(smb_buffer_response) | 2290 | pByteArea(smb_buffer_response) |
2291 | <= BCC(smb_buffer_response)) { | 2291 | <= BCC(smb_buffer_response)) { |
2292 | ses->serverOS = kcalloc(1, len + 1, GFP_KERNEL); | 2292 | ses->serverOS = kzalloc(len + 1, GFP_KERNEL); |
2293 | strncpy(ses->serverOS, bcc_ptr, len); | 2293 | strncpy(ses->serverOS, bcc_ptr, len); |
2294 | 2294 | ||
2295 | bcc_ptr += len; | 2295 | bcc_ptr += len; |
@@ -2297,14 +2297,14 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2297 | bcc_ptr++; | 2297 | bcc_ptr++; |
2298 | 2298 | ||
2299 | len = strnlen(bcc_ptr, 1024); | 2299 | len = strnlen(bcc_ptr, 1024); |
2300 | ses->serverNOS = kcalloc(1, len + 1,GFP_KERNEL); | 2300 | ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); |
2301 | strncpy(ses->serverNOS, bcc_ptr, len); | 2301 | strncpy(ses->serverNOS, bcc_ptr, len); |
2302 | bcc_ptr += len; | 2302 | bcc_ptr += len; |
2303 | bcc_ptr[0] = 0; | 2303 | bcc_ptr[0] = 0; |
2304 | bcc_ptr++; | 2304 | bcc_ptr++; |
2305 | 2305 | ||
2306 | len = strnlen(bcc_ptr, 1024); | 2306 | len = strnlen(bcc_ptr, 1024); |
2307 | ses->serverDomain = kcalloc(1, len + 1, GFP_KERNEL); | 2307 | ses->serverDomain = kzalloc(len + 1, GFP_KERNEL); |
2308 | strncpy(ses->serverDomain, bcc_ptr, len); | 2308 | strncpy(ses->serverDomain, bcc_ptr, len); |
2309 | bcc_ptr += len; | 2309 | bcc_ptr += len; |
2310 | bcc_ptr[0] = 0; | 2310 | bcc_ptr[0] = 0; |
@@ -2554,7 +2554,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
2554 | the end since (at least) WIN2K and Windows XP have a major bug in not null | 2554 | the end since (at least) WIN2K and Windows XP have a major bug in not null |
2555 | terminating last Unicode string in response */ | 2555 | terminating last Unicode string in response */ |
2556 | ses->serverOS = | 2556 | ses->serverOS = |
2557 | kcalloc(1, 2 * (len + 1), GFP_KERNEL); | 2557 | kzalloc(2 * (len + 1), GFP_KERNEL); |
2558 | cifs_strfromUCS_le(ses->serverOS, | 2558 | cifs_strfromUCS_le(ses->serverOS, |
2559 | (wchar_t *) | 2559 | (wchar_t *) |
2560 | bcc_ptr, len, | 2560 | bcc_ptr, len, |
@@ -2569,7 +2569,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
2569 | remaining_words | 2569 | remaining_words |
2570 | - 1); | 2570 | - 1); |
2571 | ses->serverNOS = | 2571 | ses->serverNOS = |
2572 | kcalloc(1, 2 * (len + 1), | 2572 | kzalloc(2 * (len + 1), |
2573 | GFP_KERNEL); | 2573 | GFP_KERNEL); |
2574 | cifs_strfromUCS_le(ses-> | 2574 | cifs_strfromUCS_le(ses-> |
2575 | serverNOS, | 2575 | serverNOS, |
@@ -2586,7 +2586,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
2586 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); | 2586 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); |
2587 | /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ | 2587 | /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ |
2588 | ses->serverDomain = | 2588 | ses->serverDomain = |
2589 | kcalloc(1, 2 * | 2589 | kzalloc(2 * |
2590 | (len + | 2590 | (len + |
2591 | 1), | 2591 | 1), |
2592 | GFP_KERNEL); | 2592 | GFP_KERNEL); |
@@ -2612,13 +2612,13 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
2612 | } /* else no more room so create dummy domain string */ | 2612 | } /* else no more room so create dummy domain string */ |
2613 | else | 2613 | else |
2614 | ses->serverDomain = | 2614 | ses->serverDomain = |
2615 | kcalloc(1, 2, | 2615 | kzalloc(2, |
2616 | GFP_KERNEL); | 2616 | GFP_KERNEL); |
2617 | } else { /* no room so create dummy domain and NOS string */ | 2617 | } else { /* no room so create dummy domain and NOS string */ |
2618 | ses->serverDomain = | 2618 | ses->serverDomain = |
2619 | kcalloc(1, 2, GFP_KERNEL); | 2619 | kzalloc(2, GFP_KERNEL); |
2620 | ses->serverNOS = | 2620 | ses->serverNOS = |
2621 | kcalloc(1, 2, GFP_KERNEL); | 2621 | kzalloc(2, GFP_KERNEL); |
2622 | } | 2622 | } |
2623 | } else { /* ASCII */ | 2623 | } else { /* ASCII */ |
2624 | len = strnlen(bcc_ptr, 1024); | 2624 | len = strnlen(bcc_ptr, 1024); |
@@ -2626,7 +2626,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
2626 | pByteArea(smb_buffer_response) | 2626 | pByteArea(smb_buffer_response) |
2627 | <= BCC(smb_buffer_response)) { | 2627 | <= BCC(smb_buffer_response)) { |
2628 | ses->serverOS = | 2628 | ses->serverOS = |
2629 | kcalloc(1, len + 1, | 2629 | kzalloc(len + 1, |
2630 | GFP_KERNEL); | 2630 | GFP_KERNEL); |
2631 | strncpy(ses->serverOS, | 2631 | strncpy(ses->serverOS, |
2632 | bcc_ptr, len); | 2632 | bcc_ptr, len); |
@@ -2637,7 +2637,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
2637 | 2637 | ||
2638 | len = strnlen(bcc_ptr, 1024); | 2638 | len = strnlen(bcc_ptr, 1024); |
2639 | ses->serverNOS = | 2639 | ses->serverNOS = |
2640 | kcalloc(1, len + 1, | 2640 | kzalloc(len + 1, |
2641 | GFP_KERNEL); | 2641 | GFP_KERNEL); |
2642 | strncpy(ses->serverNOS, bcc_ptr, len); | 2642 | strncpy(ses->serverNOS, bcc_ptr, len); |
2643 | bcc_ptr += len; | 2643 | bcc_ptr += len; |
@@ -2646,7 +2646,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
2646 | 2646 | ||
2647 | len = strnlen(bcc_ptr, 1024); | 2647 | len = strnlen(bcc_ptr, 1024); |
2648 | ses->serverDomain = | 2648 | ses->serverDomain = |
2649 | kcalloc(1, len + 1, | 2649 | kzalloc(len + 1, |
2650 | GFP_KERNEL); | 2650 | GFP_KERNEL); |
2651 | strncpy(ses->serverDomain, bcc_ptr, len); | 2651 | strncpy(ses->serverDomain, bcc_ptr, len); |
2652 | bcc_ptr += len; | 2652 | bcc_ptr += len; |
@@ -2948,7 +2948,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2948 | the end since (at least) WIN2K and Windows XP have a major bug in not null | 2948 | the end since (at least) WIN2K and Windows XP have a major bug in not null |
2949 | terminating last Unicode string in response */ | 2949 | terminating last Unicode string in response */ |
2950 | ses->serverOS = | 2950 | ses->serverOS = |
2951 | kcalloc(1, 2 * (len + 1), GFP_KERNEL); | 2951 | kzalloc(2 * (len + 1), GFP_KERNEL); |
2952 | cifs_strfromUCS_le(ses->serverOS, | 2952 | cifs_strfromUCS_le(ses->serverOS, |
2953 | (wchar_t *) | 2953 | (wchar_t *) |
2954 | bcc_ptr, len, | 2954 | bcc_ptr, len, |
@@ -2963,7 +2963,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2963 | remaining_words | 2963 | remaining_words |
2964 | - 1); | 2964 | - 1); |
2965 | ses->serverNOS = | 2965 | ses->serverNOS = |
2966 | kcalloc(1, 2 * (len + 1), | 2966 | kzalloc(2 * (len + 1), |
2967 | GFP_KERNEL); | 2967 | GFP_KERNEL); |
2968 | cifs_strfromUCS_le(ses-> | 2968 | cifs_strfromUCS_le(ses-> |
2969 | serverNOS, | 2969 | serverNOS, |
@@ -2979,7 +2979,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2979 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); | 2979 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); |
2980 | /* last string not always null terminated (e.g. for Windows XP & 2000) */ | 2980 | /* last string not always null terminated (e.g. for Windows XP & 2000) */ |
2981 | ses->serverDomain = | 2981 | ses->serverDomain = |
2982 | kcalloc(1, 2 * | 2982 | kzalloc(2 * |
2983 | (len + | 2983 | (len + |
2984 | 1), | 2984 | 1), |
2985 | GFP_KERNEL); | 2985 | GFP_KERNEL); |
@@ -3004,17 +3004,17 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
3004 | = 0; | 3004 | = 0; |
3005 | } /* else no more room so create dummy domain string */ | 3005 | } /* else no more room so create dummy domain string */ |
3006 | else | 3006 | else |
3007 | ses->serverDomain = kcalloc(1, 2,GFP_KERNEL); | 3007 | ses->serverDomain = kzalloc(2,GFP_KERNEL); |
3008 | } else { /* no room so create dummy domain and NOS string */ | 3008 | } else { /* no room so create dummy domain and NOS string */ |
3009 | ses->serverDomain = kcalloc(1, 2, GFP_KERNEL); | 3009 | ses->serverDomain = kzalloc(2, GFP_KERNEL); |
3010 | ses->serverNOS = kcalloc(1, 2, GFP_KERNEL); | 3010 | ses->serverNOS = kzalloc(2, GFP_KERNEL); |
3011 | } | 3011 | } |
3012 | } else { /* ASCII */ | 3012 | } else { /* ASCII */ |
3013 | len = strnlen(bcc_ptr, 1024); | 3013 | len = strnlen(bcc_ptr, 1024); |
3014 | if (((long) bcc_ptr + len) - | 3014 | if (((long) bcc_ptr + len) - |
3015 | (long) pByteArea(smb_buffer_response) | 3015 | (long) pByteArea(smb_buffer_response) |
3016 | <= BCC(smb_buffer_response)) { | 3016 | <= BCC(smb_buffer_response)) { |
3017 | ses->serverOS = kcalloc(1, len + 1,GFP_KERNEL); | 3017 | ses->serverOS = kzalloc(len + 1,GFP_KERNEL); |
3018 | strncpy(ses->serverOS,bcc_ptr, len); | 3018 | strncpy(ses->serverOS,bcc_ptr, len); |
3019 | 3019 | ||
3020 | bcc_ptr += len; | 3020 | bcc_ptr += len; |
@@ -3022,14 +3022,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
3022 | bcc_ptr++; | 3022 | bcc_ptr++; |
3023 | 3023 | ||
3024 | len = strnlen(bcc_ptr, 1024); | 3024 | len = strnlen(bcc_ptr, 1024); |
3025 | ses->serverNOS = kcalloc(1, len+1,GFP_KERNEL); | 3025 | ses->serverNOS = kzalloc(len+1,GFP_KERNEL); |
3026 | strncpy(ses->serverNOS, bcc_ptr, len); | 3026 | strncpy(ses->serverNOS, bcc_ptr, len); |
3027 | bcc_ptr += len; | 3027 | bcc_ptr += len; |
3028 | bcc_ptr[0] = 0; | 3028 | bcc_ptr[0] = 0; |
3029 | bcc_ptr++; | 3029 | bcc_ptr++; |
3030 | 3030 | ||
3031 | len = strnlen(bcc_ptr, 1024); | 3031 | len = strnlen(bcc_ptr, 1024); |
3032 | ses->serverDomain = kcalloc(1, len+1,GFP_KERNEL); | 3032 | ses->serverDomain = kzalloc(len+1,GFP_KERNEL); |
3033 | strncpy(ses->serverDomain, bcc_ptr, len); | 3033 | strncpy(ses->serverDomain, bcc_ptr, len); |
3034 | bcc_ptr += len; | 3034 | bcc_ptr += len; |
3035 | bcc_ptr[0] = 0; | 3035 | bcc_ptr[0] = 0; |
@@ -3141,7 +3141,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
3141 | if(tcon->nativeFileSystem) | 3141 | if(tcon->nativeFileSystem) |
3142 | kfree(tcon->nativeFileSystem); | 3142 | kfree(tcon->nativeFileSystem); |
3143 | tcon->nativeFileSystem = | 3143 | tcon->nativeFileSystem = |
3144 | kcalloc(1, length + 2, GFP_KERNEL); | 3144 | kzalloc(length + 2, GFP_KERNEL); |
3145 | cifs_strfromUCS_le(tcon->nativeFileSystem, | 3145 | cifs_strfromUCS_le(tcon->nativeFileSystem, |
3146 | (wchar_t *) bcc_ptr, | 3146 | (wchar_t *) bcc_ptr, |
3147 | length, nls_codepage); | 3147 | length, nls_codepage); |
@@ -3159,7 +3159,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
3159 | if(tcon->nativeFileSystem) | 3159 | if(tcon->nativeFileSystem) |
3160 | kfree(tcon->nativeFileSystem); | 3160 | kfree(tcon->nativeFileSystem); |
3161 | tcon->nativeFileSystem = | 3161 | tcon->nativeFileSystem = |
3162 | kcalloc(1, length + 1, GFP_KERNEL); | 3162 | kzalloc(length + 1, GFP_KERNEL); |
3163 | strncpy(tcon->nativeFileSystem, bcc_ptr, | 3163 | strncpy(tcon->nativeFileSystem, bcc_ptr, |
3164 | length); | 3164 | length); |
3165 | } | 3165 | } |
@@ -3215,10 +3215,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) | |||
3215 | } | 3215 | } |
3216 | 3216 | ||
3217 | cifs_sb->tcon = NULL; | 3217 | cifs_sb->tcon = NULL; |
3218 | if (ses) { | 3218 | if (ses) |
3219 | set_current_state(TASK_INTERRUPTIBLE); | 3219 | schedule_timeout_interruptible(msecs_to_jiffies(500)); |
3220 | schedule_timeout(HZ / 2); | ||
3221 | } | ||
3222 | if (ses) | 3220 | if (ses) |
3223 | sesInfoFree(ses); | 3221 | sesInfoFree(ses); |
3224 | 3222 | ||
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 3f3538d4a1fa..d335269bd91c 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -145,24 +145,23 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
145 | return -ENOMEM; | 145 | return -ENOMEM; |
146 | } | 146 | } |
147 | 147 | ||
148 | if(nd) { | 148 | if(nd && (nd->flags & LOOKUP_OPEN)) { |
149 | if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) | 149 | int oflags = nd->intent.open.flags; |
150 | desiredAccess = GENERIC_READ; | 150 | |
151 | else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) { | 151 | desiredAccess = 0; |
152 | desiredAccess = GENERIC_WRITE; | 152 | if (oflags & FMODE_READ) |
153 | write_only = TRUE; | 153 | desiredAccess |= GENERIC_READ; |
154 | } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { | 154 | if (oflags & FMODE_WRITE) { |
155 | /* GENERIC_ALL is too much permission to request */ | 155 | desiredAccess |= GENERIC_WRITE; |
156 | /* can cause unnecessary access denied on create */ | 156 | if (!(oflags & FMODE_READ)) |
157 | /* desiredAccess = GENERIC_ALL; */ | 157 | write_only = TRUE; |
158 | desiredAccess = GENERIC_READ | GENERIC_WRITE; | ||
159 | } | 158 | } |
160 | 159 | ||
161 | if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) | 160 | if((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) |
162 | disposition = FILE_CREATE; | 161 | disposition = FILE_CREATE; |
163 | else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) | 162 | else if((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) |
164 | disposition = FILE_OVERWRITE_IF; | 163 | disposition = FILE_OVERWRITE_IF; |
165 | else if((nd->intent.open.flags & O_CREAT) == O_CREAT) | 164 | else if((oflags & O_CREAT) == O_CREAT) |
166 | disposition = FILE_OPEN_IF; | 165 | disposition = FILE_OPEN_IF; |
167 | else { | 166 | else { |
168 | cFYI(1,("Create flag not set in create function")); | 167 | cFYI(1,("Create flag not set in create function")); |
diff --git a/fs/compat.c b/fs/compat.c index 6b06b6bae35e..ac3fb9ed8eea 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -310,96 +310,6 @@ static int __init init_sys32_ioctl(void) | |||
310 | 310 | ||
311 | __initcall(init_sys32_ioctl); | 311 | __initcall(init_sys32_ioctl); |
312 | 312 | ||
313 | int register_ioctl32_conversion(unsigned int cmd, | ||
314 | ioctl_trans_handler_t handler) | ||
315 | { | ||
316 | struct ioctl_trans *t; | ||
317 | struct ioctl_trans *new_t; | ||
318 | unsigned long hash = ioctl32_hash(cmd); | ||
319 | |||
320 | new_t = kmalloc(sizeof(*new_t), GFP_KERNEL); | ||
321 | if (!new_t) | ||
322 | return -ENOMEM; | ||
323 | |||
324 | down_write(&ioctl32_sem); | ||
325 | for (t = ioctl32_hash_table[hash]; t; t = t->next) { | ||
326 | if (t->cmd == cmd) { | ||
327 | printk(KERN_ERR "Trying to register duplicated ioctl32 " | ||
328 | "handler %x\n", cmd); | ||
329 | up_write(&ioctl32_sem); | ||
330 | kfree(new_t); | ||
331 | return -EINVAL; | ||
332 | } | ||
333 | } | ||
334 | new_t->next = NULL; | ||
335 | new_t->cmd = cmd; | ||
336 | new_t->handler = handler; | ||
337 | ioctl32_insert_translation(new_t); | ||
338 | |||
339 | up_write(&ioctl32_sem); | ||
340 | return 0; | ||
341 | } | ||
342 | EXPORT_SYMBOL(register_ioctl32_conversion); | ||
343 | |||
344 | static inline int builtin_ioctl(struct ioctl_trans *t) | ||
345 | { | ||
346 | return t >= ioctl_start && t < (ioctl_start + ioctl_table_size); | ||
347 | } | ||
348 | |||
349 | /* Problem: | ||
350 | This function cannot unregister duplicate ioctls, because they are not | ||
351 | unique. | ||
352 | When they happen we need to extend the prototype to pass the handler too. */ | ||
353 | |||
354 | int unregister_ioctl32_conversion(unsigned int cmd) | ||
355 | { | ||
356 | unsigned long hash = ioctl32_hash(cmd); | ||
357 | struct ioctl_trans *t, *t1; | ||
358 | |||
359 | down_write(&ioctl32_sem); | ||
360 | |||
361 | t = ioctl32_hash_table[hash]; | ||
362 | if (!t) { | ||
363 | up_write(&ioctl32_sem); | ||
364 | return -EINVAL; | ||
365 | } | ||
366 | |||
367 | if (t->cmd == cmd) { | ||
368 | if (builtin_ioctl(t)) { | ||
369 | printk("%p tried to unregister builtin ioctl %x\n", | ||
370 | __builtin_return_address(0), cmd); | ||
371 | } else { | ||
372 | ioctl32_hash_table[hash] = t->next; | ||
373 | up_write(&ioctl32_sem); | ||
374 | kfree(t); | ||
375 | return 0; | ||
376 | } | ||
377 | } | ||
378 | while (t->next) { | ||
379 | t1 = t->next; | ||
380 | if (t1->cmd == cmd) { | ||
381 | if (builtin_ioctl(t1)) { | ||
382 | printk("%p tried to unregister builtin " | ||
383 | "ioctl %x\n", | ||
384 | __builtin_return_address(0), cmd); | ||
385 | goto out; | ||
386 | } else { | ||
387 | t->next = t1->next; | ||
388 | up_write(&ioctl32_sem); | ||
389 | kfree(t1); | ||
390 | return 0; | ||
391 | } | ||
392 | } | ||
393 | t = t1; | ||
394 | } | ||
395 | printk(KERN_ERR "Trying to free unknown 32bit ioctl handler %x\n", | ||
396 | cmd); | ||
397 | out: | ||
398 | up_write(&ioctl32_sem); | ||
399 | return -EINVAL; | ||
400 | } | ||
401 | EXPORT_SYMBOL(unregister_ioctl32_conversion); | ||
402 | |||
403 | static void compat_ioctl_error(struct file *filp, unsigned int fd, | 313 | static void compat_ioctl_error(struct file *filp, unsigned int fd, |
404 | unsigned int cmd, unsigned long arg) | 314 | unsigned int cmd, unsigned long arg) |
405 | { | 315 | { |
@@ -720,14 +630,14 @@ compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb) | |||
720 | struct compat_ncp_mount_data { | 630 | struct compat_ncp_mount_data { |
721 | compat_int_t version; | 631 | compat_int_t version; |
722 | compat_uint_t ncp_fd; | 632 | compat_uint_t ncp_fd; |
723 | compat_uid_t mounted_uid; | 633 | __compat_uid_t mounted_uid; |
724 | compat_pid_t wdog_pid; | 634 | compat_pid_t wdog_pid; |
725 | unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; | 635 | unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; |
726 | compat_uint_t time_out; | 636 | compat_uint_t time_out; |
727 | compat_uint_t retry_count; | 637 | compat_uint_t retry_count; |
728 | compat_uint_t flags; | 638 | compat_uint_t flags; |
729 | compat_uid_t uid; | 639 | __compat_uid_t uid; |
730 | compat_gid_t gid; | 640 | __compat_gid_t gid; |
731 | compat_mode_t file_mode; | 641 | compat_mode_t file_mode; |
732 | compat_mode_t dir_mode; | 642 | compat_mode_t dir_mode; |
733 | }; | 643 | }; |
@@ -784,9 +694,9 @@ static void *do_ncp_super_data_conv(void *raw_data) | |||
784 | 694 | ||
785 | struct compat_smb_mount_data { | 695 | struct compat_smb_mount_data { |
786 | compat_int_t version; | 696 | compat_int_t version; |
787 | compat_uid_t mounted_uid; | 697 | __compat_uid_t mounted_uid; |
788 | compat_uid_t uid; | 698 | __compat_uid_t uid; |
789 | compat_gid_t gid; | 699 | __compat_gid_t gid; |
790 | compat_mode_t file_mode; | 700 | compat_mode_t file_mode; |
791 | compat_mode_t dir_mode; | 701 | compat_mode_t dir_mode; |
792 | }; | 702 | }; |
@@ -1365,6 +1275,16 @@ out: | |||
1365 | } | 1275 | } |
1366 | 1276 | ||
1367 | /* | 1277 | /* |
1278 | * Exactly like fs/open.c:sys_open(), except that it doesn't set the | ||
1279 | * O_LARGEFILE flag. | ||
1280 | */ | ||
1281 | asmlinkage long | ||
1282 | compat_sys_open(const char __user *filename, int flags, int mode) | ||
1283 | { | ||
1284 | return do_sys_open(filename, flags, mode); | ||
1285 | } | ||
1286 | |||
1287 | /* | ||
1368 | * compat_count() counts the number of arguments/envelopes. It is basically | 1288 | * compat_count() counts the number of arguments/envelopes. It is basically |
1369 | * a copy of count() from fs/exec.c, except that it works with 32 bit argv | 1289 | * a copy of count() from fs/exec.c, except that it works with 32 bit argv |
1370 | * and envp pointers. | 1290 | * and envp pointers. |
@@ -1699,6 +1619,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp | |||
1699 | char *bits; | 1619 | char *bits; |
1700 | long timeout; | 1620 | long timeout; |
1701 | int size, max_fdset, ret = -EINVAL; | 1621 | int size, max_fdset, ret = -EINVAL; |
1622 | struct fdtable *fdt; | ||
1702 | 1623 | ||
1703 | timeout = MAX_SCHEDULE_TIMEOUT; | 1624 | timeout = MAX_SCHEDULE_TIMEOUT; |
1704 | if (tvp) { | 1625 | if (tvp) { |
@@ -1724,7 +1645,10 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp | |||
1724 | goto out_nofds; | 1645 | goto out_nofds; |
1725 | 1646 | ||
1726 | /* max_fdset can increase, so grab it once to avoid race */ | 1647 | /* max_fdset can increase, so grab it once to avoid race */ |
1727 | max_fdset = current->files->max_fdset; | 1648 | rcu_read_lock(); |
1649 | fdt = files_fdtable(current->files); | ||
1650 | max_fdset = fdt->max_fdset; | ||
1651 | rcu_read_unlock(); | ||
1728 | if (n > max_fdset) | 1652 | if (n > max_fdset) |
1729 | n = max_fdset; | 1653 | n = max_fdset; |
1730 | 1654 | ||
@@ -1808,8 +1732,8 @@ struct compat_nfsctl_export { | |||
1808 | compat_dev_t ex32_dev; | 1732 | compat_dev_t ex32_dev; |
1809 | compat_ino_t ex32_ino; | 1733 | compat_ino_t ex32_ino; |
1810 | compat_int_t ex32_flags; | 1734 | compat_int_t ex32_flags; |
1811 | compat_uid_t ex32_anon_uid; | 1735 | __compat_uid_t ex32_anon_uid; |
1812 | compat_gid_t ex32_anon_gid; | 1736 | __compat_gid_t ex32_anon_gid; |
1813 | }; | 1737 | }; |
1814 | 1738 | ||
1815 | struct compat_nfsctl_fdparm { | 1739 | struct compat_nfsctl_fdparm { |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 155e612635f1..e28a74203f3b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -798,13 +798,16 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) | |||
798 | r = (void *) &r4; | 798 | r = (void *) &r4; |
799 | } | 799 | } |
800 | 800 | ||
801 | if (ret) | 801 | if (ret) { |
802 | return -EFAULT; | 802 | ret = -EFAULT; |
803 | goto out; | ||
804 | } | ||
803 | 805 | ||
804 | set_fs (KERNEL_DS); | 806 | set_fs (KERNEL_DS); |
805 | ret = sys_ioctl (fd, cmd, (unsigned long) r); | 807 | ret = sys_ioctl (fd, cmd, (unsigned long) r); |
806 | set_fs (old_fs); | 808 | set_fs (old_fs); |
807 | 809 | ||
810 | out: | ||
808 | if (mysock) | 811 | if (mysock) |
809 | sockfd_put(mysock); | 812 | sockfd_put(mysock); |
810 | 813 | ||
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 6c285efa2004..7fe85415ae7c 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -39,12 +39,47 @@ static DECLARE_MUTEX(read_mutex); | |||
39 | #define CRAMINO(x) ((x)->offset?(x)->offset<<2:1) | 39 | #define CRAMINO(x) ((x)->offset?(x)->offset<<2:1) |
40 | #define OFFSET(x) ((x)->i_ino) | 40 | #define OFFSET(x) ((x)->i_ino) |
41 | 41 | ||
42 | static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inode * cramfs_inode) | 42 | |
43 | static int cramfs_iget5_test(struct inode *inode, void *opaque) | ||
44 | { | ||
45 | struct cramfs_inode *cramfs_inode = opaque; | ||
46 | |||
47 | if (inode->i_ino != CRAMINO(cramfs_inode)) | ||
48 | return 0; /* does not match */ | ||
49 | |||
50 | if (inode->i_ino != 1) | ||
51 | return 1; | ||
52 | |||
53 | /* all empty directories, char, block, pipe, and sock, share inode #1 */ | ||
54 | |||
55 | if ((inode->i_mode != cramfs_inode->mode) || | ||
56 | (inode->i_gid != cramfs_inode->gid) || | ||
57 | (inode->i_uid != cramfs_inode->uid)) | ||
58 | return 0; /* does not match */ | ||
59 | |||
60 | if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) && | ||
61 | (inode->i_rdev != old_decode_dev(cramfs_inode->size))) | ||
62 | return 0; /* does not match */ | ||
63 | |||
64 | return 1; /* matches */ | ||
65 | } | ||
66 | |||
67 | static int cramfs_iget5_set(struct inode *inode, void *opaque) | ||
68 | { | ||
69 | struct cramfs_inode *cramfs_inode = opaque; | ||
70 | inode->i_ino = CRAMINO(cramfs_inode); | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | static struct inode *get_cramfs_inode(struct super_block *sb, | ||
75 | struct cramfs_inode * cramfs_inode) | ||
43 | { | 76 | { |
44 | struct inode * inode = new_inode(sb); | 77 | struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), |
78 | cramfs_iget5_test, cramfs_iget5_set, | ||
79 | cramfs_inode); | ||
45 | static struct timespec zerotime; | 80 | static struct timespec zerotime; |
46 | 81 | ||
47 | if (inode) { | 82 | if (inode && (inode->i_state & I_NEW)) { |
48 | inode->i_mode = cramfs_inode->mode; | 83 | inode->i_mode = cramfs_inode->mode; |
49 | inode->i_uid = cramfs_inode->uid; | 84 | inode->i_uid = cramfs_inode->uid; |
50 | inode->i_size = cramfs_inode->size; | 85 | inode->i_size = cramfs_inode->size; |
@@ -58,7 +93,6 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inod | |||
58 | but it's the best we can do without reading the directory | 93 | but it's the best we can do without reading the directory |
59 | contents. 1 yields the right result in GNU find, even | 94 | contents. 1 yields the right result in GNU find, even |
60 | without -noleaf option. */ | 95 | without -noleaf option. */ |
61 | insert_inode_hash(inode); | ||
62 | if (S_ISREG(inode->i_mode)) { | 96 | if (S_ISREG(inode->i_mode)) { |
63 | inode->i_fop = &generic_ro_fops; | 97 | inode->i_fop = &generic_ro_fops; |
64 | inode->i_data.a_ops = &cramfs_aops; | 98 | inode->i_data.a_ops = &cramfs_aops; |
@@ -74,6 +108,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inod | |||
74 | init_special_inode(inode, inode->i_mode, | 108 | init_special_inode(inode, inode->i_mode, |
75 | old_decode_dev(cramfs_inode->size)); | 109 | old_decode_dev(cramfs_inode->size)); |
76 | } | 110 | } |
111 | unlock_new_inode(inode); | ||
77 | } | 112 | } |
78 | return inode; | 113 | return inode; |
79 | } | 114 | } |
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c index 5034365b06a8..8def89f2c438 100644 --- a/fs/cramfs/uncompress.c +++ b/fs/cramfs/uncompress.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/errno.h> | 19 | #include <linux/errno.h> |
20 | #include <linux/vmalloc.h> | 20 | #include <linux/vmalloc.h> |
21 | #include <linux/zlib.h> | 21 | #include <linux/zlib.h> |
22 | #include <linux/cramfs_fs.h> | ||
22 | 23 | ||
23 | static z_stream stream; | 24 | static z_stream stream; |
24 | static int initialized; | 25 | static int initialized; |
diff --git a/fs/dcache.c b/fs/dcache.c index a15a2e1f5520..7376b61269fb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -337,12 +337,10 @@ struct dentry * d_find_alias(struct inode *inode) | |||
337 | */ | 337 | */ |
338 | void d_prune_aliases(struct inode *inode) | 338 | void d_prune_aliases(struct inode *inode) |
339 | { | 339 | { |
340 | struct list_head *tmp, *head = &inode->i_dentry; | 340 | struct dentry *dentry; |
341 | restart: | 341 | restart: |
342 | spin_lock(&dcache_lock); | 342 | spin_lock(&dcache_lock); |
343 | tmp = head; | 343 | list_for_each_entry(dentry, &inode->i_dentry, d_alias) { |
344 | while ((tmp = tmp->next) != head) { | ||
345 | struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); | ||
346 | spin_lock(&dentry->d_lock); | 344 | spin_lock(&dentry->d_lock); |
347 | if (!atomic_read(&dentry->d_count)) { | 345 | if (!atomic_read(&dentry->d_count)) { |
348 | __dget_locked(dentry); | 346 | __dget_locked(dentry); |
@@ -463,10 +461,7 @@ void shrink_dcache_sb(struct super_block * sb) | |||
463 | * superblock to the most recent end of the unused list. | 461 | * superblock to the most recent end of the unused list. |
464 | */ | 462 | */ |
465 | spin_lock(&dcache_lock); | 463 | spin_lock(&dcache_lock); |
466 | next = dentry_unused.next; | 464 | list_for_each_safe(tmp, next, &dentry_unused) { |
467 | while (next != &dentry_unused) { | ||
468 | tmp = next; | ||
469 | next = tmp->next; | ||
470 | dentry = list_entry(tmp, struct dentry, d_lru); | 465 | dentry = list_entry(tmp, struct dentry, d_lru); |
471 | if (dentry->d_sb != sb) | 466 | if (dentry->d_sb != sb) |
472 | continue; | 467 | continue; |
@@ -478,10 +473,7 @@ void shrink_dcache_sb(struct super_block * sb) | |||
478 | * Pass two ... free the dentries for this superblock. | 473 | * Pass two ... free the dentries for this superblock. |
479 | */ | 474 | */ |
480 | repeat: | 475 | repeat: |
481 | next = dentry_unused.next; | 476 | list_for_each_safe(tmp, next, &dentry_unused) { |
482 | while (next != &dentry_unused) { | ||
483 | tmp = next; | ||
484 | next = tmp->next; | ||
485 | dentry = list_entry(tmp, struct dentry, d_lru); | 477 | dentry = list_entry(tmp, struct dentry, d_lru); |
486 | if (dentry->d_sb != sb) | 478 | if (dentry->d_sb != sb) |
487 | continue; | 479 | continue; |
diff --git a/fs/devpts/Makefile b/fs/devpts/Makefile index 5800df2e50c8..236696efcbac 100644 --- a/fs/devpts/Makefile +++ b/fs/devpts/Makefile | |||
@@ -5,4 +5,3 @@ | |||
5 | obj-$(CONFIG_UNIX98_PTYS) += devpts.o | 5 | obj-$(CONFIG_UNIX98_PTYS) += devpts.o |
6 | 6 | ||
7 | devpts-$(CONFIG_UNIX98_PTYS) := inode.o | 7 | devpts-$(CONFIG_UNIX98_PTYS) := inode.o |
8 | devpts-$(CONFIG_DEVPTS_FS_SECURITY) += xattr_security.o | ||
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 1571c8d6c232..f2be44d4491f 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -18,28 +18,9 @@ | |||
18 | #include <linux/mount.h> | 18 | #include <linux/mount.h> |
19 | #include <linux/tty.h> | 19 | #include <linux/tty.h> |
20 | #include <linux/devpts_fs.h> | 20 | #include <linux/devpts_fs.h> |
21 | #include <linux/xattr.h> | ||
22 | 21 | ||
23 | #define DEVPTS_SUPER_MAGIC 0x1cd1 | 22 | #define DEVPTS_SUPER_MAGIC 0x1cd1 |
24 | 23 | ||
25 | extern struct xattr_handler devpts_xattr_security_handler; | ||
26 | |||
27 | static struct xattr_handler *devpts_xattr_handlers[] = { | ||
28 | #ifdef CONFIG_DEVPTS_FS_SECURITY | ||
29 | &devpts_xattr_security_handler, | ||
30 | #endif | ||
31 | NULL | ||
32 | }; | ||
33 | |||
34 | static struct inode_operations devpts_file_inode_operations = { | ||
35 | #ifdef CONFIG_DEVPTS_FS_XATTR | ||
36 | .setxattr = generic_setxattr, | ||
37 | .getxattr = generic_getxattr, | ||
38 | .listxattr = generic_listxattr, | ||
39 | .removexattr = generic_removexattr, | ||
40 | #endif | ||
41 | }; | ||
42 | |||
43 | static struct vfsmount *devpts_mnt; | 24 | static struct vfsmount *devpts_mnt; |
44 | static struct dentry *devpts_root; | 25 | static struct dentry *devpts_root; |
45 | 26 | ||
@@ -102,7 +83,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent) | |||
102 | s->s_blocksize_bits = 10; | 83 | s->s_blocksize_bits = 10; |
103 | s->s_magic = DEVPTS_SUPER_MAGIC; | 84 | s->s_magic = DEVPTS_SUPER_MAGIC; |
104 | s->s_op = &devpts_sops; | 85 | s->s_op = &devpts_sops; |
105 | s->s_xattr = devpts_xattr_handlers; | ||
106 | s->s_time_gran = 1; | 86 | s->s_time_gran = 1; |
107 | 87 | ||
108 | inode = new_inode(s); | 88 | inode = new_inode(s); |
@@ -175,7 +155,6 @@ int devpts_pty_new(struct tty_struct *tty) | |||
175 | inode->i_gid = config.setgid ? config.gid : current->fsgid; | 155 | inode->i_gid = config.setgid ? config.gid : current->fsgid; |
176 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 156 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
177 | init_special_inode(inode, S_IFCHR|config.mode, device); | 157 | init_special_inode(inode, S_IFCHR|config.mode, device); |
178 | inode->i_op = &devpts_file_inode_operations; | ||
179 | inode->u.generic_ip = tty; | 158 | inode->u.generic_ip = tty; |
180 | 159 | ||
181 | dentry = get_node(number); | 160 | dentry = get_node(number); |
diff --git a/fs/devpts/xattr_security.c b/fs/devpts/xattr_security.c deleted file mode 100644 index 864cb5c79baa..000000000000 --- a/fs/devpts/xattr_security.c +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | /* | ||
2 | * Security xattr support for devpts. | ||
3 | * | ||
4 | * Author: Stephen Smalley <sds@epoch.ncsc.mil> | ||
5 | * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms of the GNU General Public License as published by the Free | ||
9 | * Software Foundation; either version 2 of the License, or (at your option) | ||
10 | * any later version. | ||
11 | */ | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/fs.h> | ||
14 | #include <linux/security.h> | ||
15 | #include <linux/xattr.h> | ||
16 | |||
17 | static size_t | ||
18 | devpts_xattr_security_list(struct inode *inode, char *list, size_t list_len, | ||
19 | const char *name, size_t name_len) | ||
20 | { | ||
21 | return security_inode_listsecurity(inode, list, list_len); | ||
22 | } | ||
23 | |||
24 | static int | ||
25 | devpts_xattr_security_get(struct inode *inode, const char *name, | ||
26 | void *buffer, size_t size) | ||
27 | { | ||
28 | if (strcmp(name, "") == 0) | ||
29 | return -EINVAL; | ||
30 | return security_inode_getsecurity(inode, name, buffer, size); | ||
31 | } | ||
32 | |||
33 | static int | ||
34 | devpts_xattr_security_set(struct inode *inode, const char *name, | ||
35 | const void *value, size_t size, int flags) | ||
36 | { | ||
37 | if (strcmp(name, "") == 0) | ||
38 | return -EINVAL; | ||
39 | return security_inode_setsecurity(inode, name, value, size, flags); | ||
40 | } | ||
41 | |||
42 | struct xattr_handler devpts_xattr_security_handler = { | ||
43 | .prefix = XATTR_SECURITY_PREFIX, | ||
44 | .list = devpts_xattr_security_list, | ||
45 | .get = devpts_xattr_security_get, | ||
46 | .set = devpts_xattr_security_set, | ||
47 | }; | ||
@@ -798,6 +798,7 @@ no_thread_group: | |||
798 | static inline void flush_old_files(struct files_struct * files) | 798 | static inline void flush_old_files(struct files_struct * files) |
799 | { | 799 | { |
800 | long j = -1; | 800 | long j = -1; |
801 | struct fdtable *fdt; | ||
801 | 802 | ||
802 | spin_lock(&files->file_lock); | 803 | spin_lock(&files->file_lock); |
803 | for (;;) { | 804 | for (;;) { |
@@ -805,12 +806,13 @@ static inline void flush_old_files(struct files_struct * files) | |||
805 | 806 | ||
806 | j++; | 807 | j++; |
807 | i = j * __NFDBITS; | 808 | i = j * __NFDBITS; |
808 | if (i >= files->max_fds || i >= files->max_fdset) | 809 | fdt = files_fdtable(files); |
810 | if (i >= fdt->max_fds || i >= fdt->max_fdset) | ||
809 | break; | 811 | break; |
810 | set = files->close_on_exec->fds_bits[j]; | 812 | set = fdt->close_on_exec->fds_bits[j]; |
811 | if (!set) | 813 | if (!set) |
812 | continue; | 814 | continue; |
813 | files->close_on_exec->fds_bits[j] = 0; | 815 | fdt->close_on_exec->fds_bits[j] = 0; |
814 | spin_unlock(&files->file_lock); | 816 | spin_unlock(&files->file_lock); |
815 | for ( ; set ; i++,set >>= 1) { | 817 | for ( ; set ; i++,set >>= 1) { |
816 | if (set & 1) { | 818 | if (set & 1) { |
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 161f156d98c8..c8d07030c897 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c | |||
@@ -615,6 +615,11 @@ got: | |||
615 | DQUOT_DROP(inode); | 615 | DQUOT_DROP(inode); |
616 | goto fail2; | 616 | goto fail2; |
617 | } | 617 | } |
618 | err = ext2_init_security(inode,dir); | ||
619 | if (err) { | ||
620 | DQUOT_FREE_INODE(inode); | ||
621 | goto fail2; | ||
622 | } | ||
618 | mark_inode_dirty(inode); | 623 | mark_inode_dirty(inode); |
619 | ext2_debug("allocating inode %lu\n", inode->i_ino); | 624 | ext2_debug("allocating inode %lu\n", inode->i_ino); |
620 | ext2_preread_inode(inode); | 625 | ext2_preread_inode(inode); |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 53dceb0c6593..fdba4d1d3c60 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -71,6 +71,8 @@ void ext2_put_inode(struct inode *inode) | |||
71 | */ | 71 | */ |
72 | void ext2_delete_inode (struct inode * inode) | 72 | void ext2_delete_inode (struct inode * inode) |
73 | { | 73 | { |
74 | truncate_inode_pages(&inode->i_data, 0); | ||
75 | |||
74 | if (is_bad_inode(inode)) | 76 | if (is_bad_inode(inode)) |
75 | goto no_delete; | 77 | goto no_delete; |
76 | EXT2_I(inode)->i_dtime = get_seconds(); | 78 | EXT2_I(inode)->i_dtime = get_seconds(); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index dcfe331dc4c4..3c0c7c6a5b44 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/config.h> | 19 | #include <linux/config.h> |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/fs.h> | ||
22 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
23 | #include <linux/init.h> | 24 | #include <linux/init.h> |
24 | #include <linux/blkdev.h> | 25 | #include <linux/blkdev.h> |
@@ -27,6 +28,8 @@ | |||
27 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
28 | #include <linux/smp_lock.h> | 29 | #include <linux/smp_lock.h> |
29 | #include <linux/vfs.h> | 30 | #include <linux/vfs.h> |
31 | #include <linux/seq_file.h> | ||
32 | #include <linux/mount.h> | ||
30 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
31 | #include "ext2.h" | 34 | #include "ext2.h" |
32 | #include "xattr.h" | 35 | #include "xattr.h" |
@@ -201,6 +204,26 @@ static void ext2_clear_inode(struct inode *inode) | |||
201 | #endif | 204 | #endif |
202 | } | 205 | } |
203 | 206 | ||
207 | static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) | ||
208 | { | ||
209 | struct ext2_sb_info *sbi = EXT2_SB(vfs->mnt_sb); | ||
210 | |||
211 | if (sbi->s_mount_opt & EXT2_MOUNT_GRPID) | ||
212 | seq_puts(seq, ",grpid"); | ||
213 | else | ||
214 | seq_puts(seq, ",nogrpid"); | ||
215 | |||
216 | #if defined(CONFIG_QUOTA) | ||
217 | if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA) | ||
218 | seq_puts(seq, ",usrquota"); | ||
219 | |||
220 | if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA) | ||
221 | seq_puts(seq, ",grpquota"); | ||
222 | #endif | ||
223 | |||
224 | return 0; | ||
225 | } | ||
226 | |||
204 | #ifdef CONFIG_QUOTA | 227 | #ifdef CONFIG_QUOTA |
205 | static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); | 228 | static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); |
206 | static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); | 229 | static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); |
@@ -218,6 +241,7 @@ static struct super_operations ext2_sops = { | |||
218 | .statfs = ext2_statfs, | 241 | .statfs = ext2_statfs, |
219 | .remount_fs = ext2_remount, | 242 | .remount_fs = ext2_remount, |
220 | .clear_inode = ext2_clear_inode, | 243 | .clear_inode = ext2_clear_inode, |
244 | .show_options = ext2_show_options, | ||
221 | #ifdef CONFIG_QUOTA | 245 | #ifdef CONFIG_QUOTA |
222 | .quota_read = ext2_quota_read, | 246 | .quota_read = ext2_quota_read, |
223 | .quota_write = ext2_quota_write, | 247 | .quota_write = ext2_quota_write, |
@@ -256,10 +280,11 @@ static unsigned long get_sb_block(void **data) | |||
256 | 280 | ||
257 | enum { | 281 | enum { |
258 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, | 282 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, |
259 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, | 283 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, |
260 | Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, | 284 | Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, |
261 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, | 285 | Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, |
262 | Opt_ignore, Opt_err, | 286 | Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, |
287 | Opt_usrquota, Opt_grpquota | ||
263 | }; | 288 | }; |
264 | 289 | ||
265 | static match_table_t tokens = { | 290 | static match_table_t tokens = { |
@@ -288,10 +313,10 @@ static match_table_t tokens = { | |||
288 | {Opt_acl, "acl"}, | 313 | {Opt_acl, "acl"}, |
289 | {Opt_noacl, "noacl"}, | 314 | {Opt_noacl, "noacl"}, |
290 | {Opt_xip, "xip"}, | 315 | {Opt_xip, "xip"}, |
291 | {Opt_ignore, "grpquota"}, | 316 | {Opt_grpquota, "grpquota"}, |
292 | {Opt_ignore, "noquota"}, | 317 | {Opt_ignore, "noquota"}, |
293 | {Opt_ignore, "quota"}, | 318 | {Opt_quota, "quota"}, |
294 | {Opt_ignore, "usrquota"}, | 319 | {Opt_usrquota, "usrquota"}, |
295 | {Opt_err, NULL} | 320 | {Opt_err, NULL} |
296 | }; | 321 | }; |
297 | 322 | ||
@@ -406,6 +431,26 @@ static int parse_options (char * options, | |||
406 | printk("EXT2 xip option not supported\n"); | 431 | printk("EXT2 xip option not supported\n"); |
407 | #endif | 432 | #endif |
408 | break; | 433 | break; |
434 | |||
435 | #if defined(CONFIG_QUOTA) | ||
436 | case Opt_quota: | ||
437 | case Opt_usrquota: | ||
438 | set_opt(sbi->s_mount_opt, USRQUOTA); | ||
439 | break; | ||
440 | |||
441 | case Opt_grpquota: | ||
442 | set_opt(sbi->s_mount_opt, GRPQUOTA); | ||
443 | break; | ||
444 | #else | ||
445 | case Opt_quota: | ||
446 | case Opt_usrquota: | ||
447 | case Opt_grpquota: | ||
448 | printk(KERN_ERR | ||
449 | "EXT2-fs: quota operations not supported.\n"); | ||
450 | |||
451 | break; | ||
452 | #endif | ||
453 | |||
409 | case Opt_ignore: | 454 | case Opt_ignore: |
410 | break; | 455 | break; |
411 | default: | 456 | default: |
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h index 5f3bfde3b810..67cfeb66e897 100644 --- a/fs/ext2/xattr.h +++ b/fs/ext2/xattr.h | |||
@@ -116,3 +116,11 @@ exit_ext2_xattr(void) | |||
116 | 116 | ||
117 | # endif /* CONFIG_EXT2_FS_XATTR */ | 117 | # endif /* CONFIG_EXT2_FS_XATTR */ |
118 | 118 | ||
119 | #ifdef CONFIG_EXT2_FS_SECURITY | ||
120 | extern int ext2_init_security(struct inode *inode, struct inode *dir); | ||
121 | #else | ||
122 | static inline int ext2_init_security(struct inode *inode, struct inode *dir) | ||
123 | { | ||
124 | return 0; | ||
125 | } | ||
126 | #endif | ||
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index 6a6c59fbe599..a26612798471 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
9 | #include <linux/smp_lock.h> | 9 | #include <linux/smp_lock.h> |
10 | #include <linux/ext2_fs.h> | 10 | #include <linux/ext2_fs.h> |
11 | #include <linux/security.h> | ||
11 | #include "xattr.h" | 12 | #include "xattr.h" |
12 | 13 | ||
13 | static size_t | 14 | static size_t |
@@ -45,6 +46,27 @@ ext2_xattr_security_set(struct inode *inode, const char *name, | |||
45 | value, size, flags); | 46 | value, size, flags); |
46 | } | 47 | } |
47 | 48 | ||
49 | int | ||
50 | ext2_init_security(struct inode *inode, struct inode *dir) | ||
51 | { | ||
52 | int err; | ||
53 | size_t len; | ||
54 | void *value; | ||
55 | char *name; | ||
56 | |||
57 | err = security_inode_init_security(inode, dir, &name, &value, &len); | ||
58 | if (err) { | ||
59 | if (err == -EOPNOTSUPP) | ||
60 | return 0; | ||
61 | return err; | ||
62 | } | ||
63 | err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, | ||
64 | name, value, len, 0); | ||
65 | kfree(name); | ||
66 | kfree(value); | ||
67 | return err; | ||
68 | } | ||
69 | |||
48 | struct xattr_handler ext2_xattr_security_handler = { | 70 | struct xattr_handler ext2_xattr_security_handler = { |
49 | .prefix = XATTR_SECURITY_PREFIX, | 71 | .prefix = XATTR_SECURITY_PREFIX, |
50 | .list = ext2_xattr_security_list, | 72 | .list = ext2_xattr_security_list, |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 6981bd014ede..96552769d039 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -607,6 +607,11 @@ got: | |||
607 | DQUOT_DROP(inode); | 607 | DQUOT_DROP(inode); |
608 | goto fail2; | 608 | goto fail2; |
609 | } | 609 | } |
610 | err = ext3_init_security(handle,inode, dir); | ||
611 | if (err) { | ||
612 | DQUOT_FREE_INODE(inode); | ||
613 | goto fail2; | ||
614 | } | ||
610 | err = ext3_mark_inode_dirty(handle, inode); | 615 | err = ext3_mark_inode_dirty(handle, inode); |
611 | if (err) { | 616 | if (err) { |
612 | ext3_std_error(sb, err); | 617 | ext3_std_error(sb, err); |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 9989fdcf4d5a..b5177c90d6f1 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -187,6 +187,8 @@ void ext3_delete_inode (struct inode * inode) | |||
187 | { | 187 | { |
188 | handle_t *handle; | 188 | handle_t *handle; |
189 | 189 | ||
190 | truncate_inode_pages(&inode->i_data, 0); | ||
191 | |||
190 | if (is_bad_inode(inode)) | 192 | if (is_bad_inode(inode)) |
191 | goto no_delete; | 193 | goto no_delete; |
192 | 194 | ||
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3c3c6e399fb3..a93c3609025d 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
36 | #include <linux/namei.h> | 36 | #include <linux/namei.h> |
37 | #include <linux/quotaops.h> | 37 | #include <linux/quotaops.h> |
38 | #include <linux/seq_file.h> | ||
38 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
39 | #include "xattr.h" | 40 | #include "xattr.h" |
40 | #include "acl.h" | 41 | #include "acl.h" |
@@ -509,8 +510,41 @@ static void ext3_clear_inode(struct inode *inode) | |||
509 | kfree(rsv); | 510 | kfree(rsv); |
510 | } | 511 | } |
511 | 512 | ||
512 | #ifdef CONFIG_QUOTA | 513 | static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) |
514 | { | ||
515 | struct ext3_sb_info *sbi = EXT3_SB(vfs->mnt_sb); | ||
516 | |||
517 | if (sbi->s_mount_opt & EXT3_MOUNT_JOURNAL_DATA) | ||
518 | seq_puts(seq, ",data=journal"); | ||
519 | |||
520 | if (sbi->s_mount_opt & EXT3_MOUNT_ORDERED_DATA) | ||
521 | seq_puts(seq, ",data=ordered"); | ||
522 | |||
523 | if (sbi->s_mount_opt & EXT3_MOUNT_WRITEBACK_DATA) | ||
524 | seq_puts(seq, ",data=writeback"); | ||
525 | |||
526 | #if defined(CONFIG_QUOTA) | ||
527 | if (sbi->s_jquota_fmt) | ||
528 | seq_printf(seq, ",jqfmt=%s", | ||
529 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); | ||
530 | |||
531 | if (sbi->s_qf_names[USRQUOTA]) | ||
532 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | ||
533 | |||
534 | if (sbi->s_qf_names[GRPQUOTA]) | ||
535 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); | ||
513 | 536 | ||
537 | if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) | ||
538 | seq_puts(seq, ",usrquota"); | ||
539 | |||
540 | if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) | ||
541 | seq_puts(seq, ",grpquota"); | ||
542 | #endif | ||
543 | |||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | #ifdef CONFIG_QUOTA | ||
514 | #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") | 548 | #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") |
515 | #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) | 549 | #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) |
516 | 550 | ||
@@ -569,6 +603,7 @@ static struct super_operations ext3_sops = { | |||
569 | .statfs = ext3_statfs, | 603 | .statfs = ext3_statfs, |
570 | .remount_fs = ext3_remount, | 604 | .remount_fs = ext3_remount, |
571 | .clear_inode = ext3_clear_inode, | 605 | .clear_inode = ext3_clear_inode, |
606 | .show_options = ext3_show_options, | ||
572 | #ifdef CONFIG_QUOTA | 607 | #ifdef CONFIG_QUOTA |
573 | .quota_read = ext3_quota_read, | 608 | .quota_read = ext3_quota_read, |
574 | .quota_write = ext3_quota_write, | 609 | .quota_write = ext3_quota_write, |
@@ -590,7 +625,8 @@ enum { | |||
590 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 625 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
591 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 626 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
592 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 627 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
593 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, | 628 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
629 | Opt_grpquota | ||
594 | }; | 630 | }; |
595 | 631 | ||
596 | static match_table_t tokens = { | 632 | static match_table_t tokens = { |
@@ -634,10 +670,10 @@ static match_table_t tokens = { | |||
634 | {Opt_grpjquota, "grpjquota=%s"}, | 670 | {Opt_grpjquota, "grpjquota=%s"}, |
635 | {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, | 671 | {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, |
636 | {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, | 672 | {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, |
637 | {Opt_quota, "grpquota"}, | 673 | {Opt_grpquota, "grpquota"}, |
638 | {Opt_noquota, "noquota"}, | 674 | {Opt_noquota, "noquota"}, |
639 | {Opt_quota, "quota"}, | 675 | {Opt_quota, "quota"}, |
640 | {Opt_quota, "usrquota"}, | 676 | {Opt_usrquota, "usrquota"}, |
641 | {Opt_barrier, "barrier=%u"}, | 677 | {Opt_barrier, "barrier=%u"}, |
642 | {Opt_err, NULL}, | 678 | {Opt_err, NULL}, |
643 | {Opt_resize, "resize"}, | 679 | {Opt_resize, "resize"}, |
@@ -903,7 +939,13 @@ clear_qf_name: | |||
903 | sbi->s_jquota_fmt = QFMT_VFS_V0; | 939 | sbi->s_jquota_fmt = QFMT_VFS_V0; |
904 | break; | 940 | break; |
905 | case Opt_quota: | 941 | case Opt_quota: |
942 | case Opt_usrquota: | ||
906 | set_opt(sbi->s_mount_opt, QUOTA); | 943 | set_opt(sbi->s_mount_opt, QUOTA); |
944 | set_opt(sbi->s_mount_opt, USRQUOTA); | ||
945 | break; | ||
946 | case Opt_grpquota: | ||
947 | set_opt(sbi->s_mount_opt, QUOTA); | ||
948 | set_opt(sbi->s_mount_opt, GRPQUOTA); | ||
907 | break; | 949 | break; |
908 | case Opt_noquota: | 950 | case Opt_noquota: |
909 | if (sb_any_quota_enabled(sb)) { | 951 | if (sb_any_quota_enabled(sb)) { |
@@ -912,8 +954,13 @@ clear_qf_name: | |||
912 | return 0; | 954 | return 0; |
913 | } | 955 | } |
914 | clear_opt(sbi->s_mount_opt, QUOTA); | 956 | clear_opt(sbi->s_mount_opt, QUOTA); |
957 | clear_opt(sbi->s_mount_opt, USRQUOTA); | ||
958 | clear_opt(sbi->s_mount_opt, GRPQUOTA); | ||
915 | break; | 959 | break; |
916 | #else | 960 | #else |
961 | case Opt_quota: | ||
962 | case Opt_usrquota: | ||
963 | case Opt_grpquota: | ||
917 | case Opt_usrjquota: | 964 | case Opt_usrjquota: |
918 | case Opt_grpjquota: | 965 | case Opt_grpjquota: |
919 | case Opt_offusrjquota: | 966 | case Opt_offusrjquota: |
@@ -924,7 +971,6 @@ clear_qf_name: | |||
924 | "EXT3-fs: journalled quota options not " | 971 | "EXT3-fs: journalled quota options not " |
925 | "supported.\n"); | 972 | "supported.\n"); |
926 | break; | 973 | break; |
927 | case Opt_quota: | ||
928 | case Opt_noquota: | 974 | case Opt_noquota: |
929 | break; | 975 | break; |
930 | #endif | 976 | #endif |
@@ -962,14 +1008,38 @@ clear_qf_name: | |||
962 | } | 1008 | } |
963 | } | 1009 | } |
964 | #ifdef CONFIG_QUOTA | 1010 | #ifdef CONFIG_QUOTA |
965 | if (!sbi->s_jquota_fmt && (sbi->s_qf_names[USRQUOTA] || | 1011 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { |
966 | sbi->s_qf_names[GRPQUOTA])) { | 1012 | if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) && |
967 | printk(KERN_ERR | 1013 | sbi->s_qf_names[USRQUOTA]) |
968 | "EXT3-fs: journalled quota format not specified.\n"); | 1014 | clear_opt(sbi->s_mount_opt, USRQUOTA); |
969 | return 0; | 1015 | |
1016 | if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) && | ||
1017 | sbi->s_qf_names[GRPQUOTA]) | ||
1018 | clear_opt(sbi->s_mount_opt, GRPQUOTA); | ||
1019 | |||
1020 | if ((sbi->s_qf_names[USRQUOTA] && | ||
1021 | (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) || | ||
1022 | (sbi->s_qf_names[GRPQUOTA] && | ||
1023 | (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) { | ||
1024 | printk(KERN_ERR "EXT3-fs: old and new quota " | ||
1025 | "format mixing.\n"); | ||
1026 | return 0; | ||
1027 | } | ||
1028 | |||
1029 | if (!sbi->s_jquota_fmt) { | ||
1030 | printk(KERN_ERR "EXT3-fs: journalled quota format " | ||
1031 | "not specified.\n"); | ||
1032 | return 0; | ||
1033 | } | ||
1034 | } else { | ||
1035 | if (sbi->s_jquota_fmt) { | ||
1036 | printk(KERN_ERR "EXT3-fs: journalled quota format " | ||
1037 | "specified with no journalling " | ||
1038 | "enabled.\n"); | ||
1039 | return 0; | ||
1040 | } | ||
970 | } | 1041 | } |
971 | #endif | 1042 | #endif |
972 | |||
973 | return 1; | 1043 | return 1; |
974 | } | 1044 | } |
975 | 1045 | ||
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h index eb31a69e82dc..2ceae38f3d49 100644 --- a/fs/ext3/xattr.h +++ b/fs/ext3/xattr.h | |||
@@ -133,3 +133,14 @@ exit_ext3_xattr(void) | |||
133 | #define ext3_xattr_handlers NULL | 133 | #define ext3_xattr_handlers NULL |
134 | 134 | ||
135 | # endif /* CONFIG_EXT3_FS_XATTR */ | 135 | # endif /* CONFIG_EXT3_FS_XATTR */ |
136 | |||
137 | #ifdef CONFIG_EXT3_FS_SECURITY | ||
138 | extern int ext3_init_security(handle_t *handle, struct inode *inode, | ||
139 | struct inode *dir); | ||
140 | #else | ||
141 | static inline int ext3_init_security(handle_t *handle, struct inode *inode, | ||
142 | struct inode *dir) | ||
143 | { | ||
144 | return 0; | ||
145 | } | ||
146 | #endif | ||
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index ddc1c41750e1..b9c40c15647b 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/smp_lock.h> | 9 | #include <linux/smp_lock.h> |
10 | #include <linux/ext3_jbd.h> | 10 | #include <linux/ext3_jbd.h> |
11 | #include <linux/ext3_fs.h> | 11 | #include <linux/ext3_fs.h> |
12 | #include <linux/security.h> | ||
12 | #include "xattr.h" | 13 | #include "xattr.h" |
13 | 14 | ||
14 | static size_t | 15 | static size_t |
@@ -47,6 +48,27 @@ ext3_xattr_security_set(struct inode *inode, const char *name, | |||
47 | value, size, flags); | 48 | value, size, flags); |
48 | } | 49 | } |
49 | 50 | ||
51 | int | ||
52 | ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir) | ||
53 | { | ||
54 | int err; | ||
55 | size_t len; | ||
56 | void *value; | ||
57 | char *name; | ||
58 | |||
59 | err = security_inode_init_security(inode, dir, &name, &value, &len); | ||
60 | if (err) { | ||
61 | if (err == -EOPNOTSUPP) | ||
62 | return 0; | ||
63 | return err; | ||
64 | } | ||
65 | err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY, | ||
66 | name, value, len, 0); | ||
67 | kfree(name); | ||
68 | kfree(value); | ||
69 | return err; | ||
70 | } | ||
71 | |||
50 | struct xattr_handler ext3_xattr_security_handler = { | 72 | struct xattr_handler ext3_xattr_security_handler = { |
51 | .prefix = XATTR_SECURITY_PREFIX, | 73 | .prefix = XATTR_SECURITY_PREFIX, |
52 | .list = ext3_xattr_security_list, | 74 | .list = ext3_xattr_security_list, |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index e5ae1b720dde..895049b2ac9c 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -30,6 +30,29 @@ static inline loff_t fat_make_i_pos(struct super_block *sb, | |||
30 | | (de - (struct msdos_dir_entry *)bh->b_data); | 30 | | (de - (struct msdos_dir_entry *)bh->b_data); |
31 | } | 31 | } |
32 | 32 | ||
33 | static inline void fat_dir_readahead(struct inode *dir, sector_t iblock, | ||
34 | sector_t phys) | ||
35 | { | ||
36 | struct super_block *sb = dir->i_sb; | ||
37 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | ||
38 | struct buffer_head *bh; | ||
39 | int sec; | ||
40 | |||
41 | /* This is not a first sector of cluster, or sec_per_clus == 1 */ | ||
42 | if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1) | ||
43 | return; | ||
44 | /* root dir of FAT12/FAT16 */ | ||
45 | if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO)) | ||
46 | return; | ||
47 | |||
48 | bh = sb_getblk(sb, phys); | ||
49 | if (bh && !buffer_uptodate(bh)) { | ||
50 | for (sec = 0; sec < sbi->sec_per_clus; sec++) | ||
51 | sb_breadahead(sb, phys + sec); | ||
52 | } | ||
53 | brelse(bh); | ||
54 | } | ||
55 | |||
33 | /* Returns the inode number of the directory entry at offset pos. If bh is | 56 | /* Returns the inode number of the directory entry at offset pos. If bh is |
34 | non-NULL, it is brelse'd before. Pos is incremented. The buffer header is | 57 | non-NULL, it is brelse'd before. Pos is incremented. The buffer header is |
35 | returned in bh. | 58 | returned in bh. |
@@ -58,6 +81,8 @@ next: | |||
58 | if (err || !phys) | 81 | if (err || !phys) |
59 | return -1; /* beyond EOF or error */ | 82 | return -1; /* beyond EOF or error */ |
60 | 83 | ||
84 | fat_dir_readahead(dir, iblock, phys); | ||
85 | |||
61 | *bh = sb_bread(sb, phys); | 86 | *bh = sb_bread(sb, phys); |
62 | if (*bh == NULL) { | 87 | if (*bh == NULL) { |
63 | printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", | 88 | printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", |
@@ -635,8 +660,7 @@ RecEnd: | |||
635 | EODir: | 660 | EODir: |
636 | filp->f_pos = cpos; | 661 | filp->f_pos = cpos; |
637 | FillFailed: | 662 | FillFailed: |
638 | if (bh) | 663 | brelse(bh); |
639 | brelse(bh); | ||
640 | if (unicode) | 664 | if (unicode) |
641 | free_page((unsigned long)unicode); | 665 | free_page((unsigned long)unicode); |
642 | out: | 666 | out: |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 96ae85b67eba..a7cbe68e2259 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -335,6 +335,8 @@ EXPORT_SYMBOL(fat_build_inode); | |||
335 | 335 | ||
336 | static void fat_delete_inode(struct inode *inode) | 336 | static void fat_delete_inode(struct inode *inode) |
337 | { | 337 | { |
338 | truncate_inode_pages(&inode->i_data, 0); | ||
339 | |||
338 | if (!is_bad_inode(inode)) { | 340 | if (!is_bad_inode(inode)) { |
339 | inode->i_size = 0; | 341 | inode->i_size = 0; |
340 | fat_truncate(inode); | 342 | fat_truncate(inode); |
diff --git a/fs/fcntl.c b/fs/fcntl.c index 6fbc9d8fcc36..863b46e0d78a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/security.h> | 16 | #include <linux/security.h> |
17 | #include <linux/ptrace.h> | 17 | #include <linux/ptrace.h> |
18 | #include <linux/signal.h> | 18 | #include <linux/signal.h> |
19 | #include <linux/rcupdate.h> | ||
19 | 20 | ||
20 | #include <asm/poll.h> | 21 | #include <asm/poll.h> |
21 | #include <asm/siginfo.h> | 22 | #include <asm/siginfo.h> |
@@ -24,21 +25,25 @@ | |||
24 | void fastcall set_close_on_exec(unsigned int fd, int flag) | 25 | void fastcall set_close_on_exec(unsigned int fd, int flag) |
25 | { | 26 | { |
26 | struct files_struct *files = current->files; | 27 | struct files_struct *files = current->files; |
28 | struct fdtable *fdt; | ||
27 | spin_lock(&files->file_lock); | 29 | spin_lock(&files->file_lock); |
30 | fdt = files_fdtable(files); | ||
28 | if (flag) | 31 | if (flag) |
29 | FD_SET(fd, files->close_on_exec); | 32 | FD_SET(fd, fdt->close_on_exec); |
30 | else | 33 | else |
31 | FD_CLR(fd, files->close_on_exec); | 34 | FD_CLR(fd, fdt->close_on_exec); |
32 | spin_unlock(&files->file_lock); | 35 | spin_unlock(&files->file_lock); |
33 | } | 36 | } |
34 | 37 | ||
35 | static inline int get_close_on_exec(unsigned int fd) | 38 | static inline int get_close_on_exec(unsigned int fd) |
36 | { | 39 | { |
37 | struct files_struct *files = current->files; | 40 | struct files_struct *files = current->files; |
41 | struct fdtable *fdt; | ||
38 | int res; | 42 | int res; |
39 | spin_lock(&files->file_lock); | 43 | rcu_read_lock(); |
40 | res = FD_ISSET(fd, files->close_on_exec); | 44 | fdt = files_fdtable(files); |
41 | spin_unlock(&files->file_lock); | 45 | res = FD_ISSET(fd, fdt->close_on_exec); |
46 | rcu_read_unlock(); | ||
42 | return res; | 47 | return res; |
43 | } | 48 | } |
44 | 49 | ||
@@ -54,24 +59,26 @@ static int locate_fd(struct files_struct *files, | |||
54 | unsigned int newfd; | 59 | unsigned int newfd; |
55 | unsigned int start; | 60 | unsigned int start; |
56 | int error; | 61 | int error; |
62 | struct fdtable *fdt; | ||
57 | 63 | ||
58 | error = -EINVAL; | 64 | error = -EINVAL; |
59 | if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) | 65 | if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) |
60 | goto out; | 66 | goto out; |
61 | 67 | ||
62 | repeat: | 68 | repeat: |
69 | fdt = files_fdtable(files); | ||
63 | /* | 70 | /* |
64 | * Someone might have closed fd's in the range | 71 | * Someone might have closed fd's in the range |
65 | * orig_start..files->next_fd | 72 | * orig_start..fdt->next_fd |
66 | */ | 73 | */ |
67 | start = orig_start; | 74 | start = orig_start; |
68 | if (start < files->next_fd) | 75 | if (start < fdt->next_fd) |
69 | start = files->next_fd; | 76 | start = fdt->next_fd; |
70 | 77 | ||
71 | newfd = start; | 78 | newfd = start; |
72 | if (start < files->max_fdset) { | 79 | if (start < fdt->max_fdset) { |
73 | newfd = find_next_zero_bit(files->open_fds->fds_bits, | 80 | newfd = find_next_zero_bit(fdt->open_fds->fds_bits, |
74 | files->max_fdset, start); | 81 | fdt->max_fdset, start); |
75 | } | 82 | } |
76 | 83 | ||
77 | error = -EMFILE; | 84 | error = -EMFILE; |
@@ -89,9 +96,15 @@ repeat: | |||
89 | if (error) | 96 | if (error) |
90 | goto repeat; | 97 | goto repeat; |
91 | 98 | ||
92 | if (start <= files->next_fd) | 99 | /* |
93 | files->next_fd = newfd + 1; | 100 | * We reacquired files_lock, so we are safe as long as |
94 | 101 | * we reacquire the fdtable pointer and use it while holding | |
102 | * the lock, no one can free it during that time. | ||
103 | */ | ||
104 | fdt = files_fdtable(files); | ||
105 | if (start <= fdt->next_fd) | ||
106 | fdt->next_fd = newfd + 1; | ||
107 | |||
95 | error = newfd; | 108 | error = newfd; |
96 | 109 | ||
97 | out: | 110 | out: |
@@ -101,13 +114,16 @@ out: | |||
101 | static int dupfd(struct file *file, unsigned int start) | 114 | static int dupfd(struct file *file, unsigned int start) |
102 | { | 115 | { |
103 | struct files_struct * files = current->files; | 116 | struct files_struct * files = current->files; |
117 | struct fdtable *fdt; | ||
104 | int fd; | 118 | int fd; |
105 | 119 | ||
106 | spin_lock(&files->file_lock); | 120 | spin_lock(&files->file_lock); |
107 | fd = locate_fd(files, file, start); | 121 | fd = locate_fd(files, file, start); |
108 | if (fd >= 0) { | 122 | if (fd >= 0) { |
109 | FD_SET(fd, files->open_fds); | 123 | /* locate_fd() may have expanded fdtable, load the ptr */ |
110 | FD_CLR(fd, files->close_on_exec); | 124 | fdt = files_fdtable(files); |
125 | FD_SET(fd, fdt->open_fds); | ||
126 | FD_CLR(fd, fdt->close_on_exec); | ||
111 | spin_unlock(&files->file_lock); | 127 | spin_unlock(&files->file_lock); |
112 | fd_install(fd, file); | 128 | fd_install(fd, file); |
113 | } else { | 129 | } else { |
@@ -123,6 +139,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) | |||
123 | int err = -EBADF; | 139 | int err = -EBADF; |
124 | struct file * file, *tofree; | 140 | struct file * file, *tofree; |
125 | struct files_struct * files = current->files; | 141 | struct files_struct * files = current->files; |
142 | struct fdtable *fdt; | ||
126 | 143 | ||
127 | spin_lock(&files->file_lock); | 144 | spin_lock(&files->file_lock); |
128 | if (!(file = fcheck(oldfd))) | 145 | if (!(file = fcheck(oldfd))) |
@@ -148,13 +165,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) | |||
148 | 165 | ||
149 | /* Yes. It's a race. In user space. Nothing sane to do */ | 166 | /* Yes. It's a race. In user space. Nothing sane to do */ |
150 | err = -EBUSY; | 167 | err = -EBUSY; |
151 | tofree = files->fd[newfd]; | 168 | fdt = files_fdtable(files); |
152 | if (!tofree && FD_ISSET(newfd, files->open_fds)) | 169 | tofree = fdt->fd[newfd]; |
170 | if (!tofree && FD_ISSET(newfd, fdt->open_fds)) | ||
153 | goto out_fput; | 171 | goto out_fput; |
154 | 172 | ||
155 | files->fd[newfd] = file; | 173 | rcu_assign_pointer(fdt->fd[newfd], file); |
156 | FD_SET(newfd, files->open_fds); | 174 | FD_SET(newfd, fdt->open_fds); |
157 | FD_CLR(newfd, files->close_on_exec); | 175 | FD_CLR(newfd, fdt->close_on_exec); |
158 | spin_unlock(&files->file_lock); | 176 | spin_unlock(&files->file_lock); |
159 | 177 | ||
160 | if (tofree) | 178 | if (tofree) |
@@ -13,6 +13,25 @@ | |||
13 | #include <linux/vmalloc.h> | 13 | #include <linux/vmalloc.h> |
14 | #include <linux/file.h> | 14 | #include <linux/file.h> |
15 | #include <linux/bitops.h> | 15 | #include <linux/bitops.h> |
16 | #include <linux/interrupt.h> | ||
17 | #include <linux/spinlock.h> | ||
18 | #include <linux/rcupdate.h> | ||
19 | #include <linux/workqueue.h> | ||
20 | |||
21 | struct fdtable_defer { | ||
22 | spinlock_t lock; | ||
23 | struct work_struct wq; | ||
24 | struct timer_list timer; | ||
25 | struct fdtable *next; | ||
26 | }; | ||
27 | |||
28 | /* | ||
29 | * We use this list to defer free fdtables that have vmalloced | ||
30 | * sets/arrays. By keeping a per-cpu list, we avoid having to embed | ||
31 | * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in | ||
32 | * this per-task structure. | ||
33 | */ | ||
34 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); | ||
16 | 35 | ||
17 | 36 | ||
18 | /* | 37 | /* |
@@ -48,82 +67,143 @@ void free_fd_array(struct file **array, int num) | |||
48 | vfree(array); | 67 | vfree(array); |
49 | } | 68 | } |
50 | 69 | ||
51 | /* | 70 | static void __free_fdtable(struct fdtable *fdt) |
52 | * Expand the fd array in the files_struct. Called with the files | 71 | { |
53 | * spinlock held for write. | 72 | int fdset_size, fdarray_size; |
54 | */ | ||
55 | 73 | ||
56 | static int expand_fd_array(struct files_struct *files, int nr) | 74 | fdset_size = fdt->max_fdset / 8; |
57 | __releases(files->file_lock) | 75 | fdarray_size = fdt->max_fds * sizeof(struct file *); |
58 | __acquires(files->file_lock) | 76 | free_fdset(fdt->open_fds, fdset_size); |
77 | free_fdset(fdt->close_on_exec, fdset_size); | ||
78 | free_fd_array(fdt->fd, fdarray_size); | ||
79 | kfree(fdt); | ||
80 | } | ||
81 | |||
82 | static void fdtable_timer(unsigned long data) | ||
59 | { | 83 | { |
60 | struct file **new_fds; | 84 | struct fdtable_defer *fddef = (struct fdtable_defer *)data; |
61 | int error, nfds; | ||
62 | 85 | ||
63 | 86 | spin_lock(&fddef->lock); | |
64 | error = -EMFILE; | 87 | /* |
65 | if (files->max_fds >= NR_OPEN || nr >= NR_OPEN) | 88 | * If someone already emptied the queue return. |
89 | */ | ||
90 | if (!fddef->next) | ||
66 | goto out; | 91 | goto out; |
92 | if (!schedule_work(&fddef->wq)) | ||
93 | mod_timer(&fddef->timer, 5); | ||
94 | out: | ||
95 | spin_unlock(&fddef->lock); | ||
96 | } | ||
67 | 97 | ||
68 | nfds = files->max_fds; | 98 | static void free_fdtable_work(struct fdtable_defer *f) |
69 | spin_unlock(&files->file_lock); | 99 | { |
100 | struct fdtable *fdt; | ||
70 | 101 | ||
71 | /* | 102 | spin_lock_bh(&f->lock); |
72 | * Expand to the max in easy steps, and keep expanding it until | 103 | fdt = f->next; |
73 | * we have enough for the requested fd array size. | 104 | f->next = NULL; |
74 | */ | 105 | spin_unlock_bh(&f->lock); |
106 | while(fdt) { | ||
107 | struct fdtable *next = fdt->next; | ||
108 | __free_fdtable(fdt); | ||
109 | fdt = next; | ||
110 | } | ||
111 | } | ||
75 | 112 | ||
76 | do { | 113 | static void free_fdtable_rcu(struct rcu_head *rcu) |
77 | #if NR_OPEN_DEFAULT < 256 | 114 | { |
78 | if (nfds < 256) | 115 | struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); |
79 | nfds = 256; | 116 | int fdset_size, fdarray_size; |
80 | else | 117 | struct fdtable_defer *fddef; |
81 | #endif | ||
82 | if (nfds < (PAGE_SIZE / sizeof(struct file *))) | ||
83 | nfds = PAGE_SIZE / sizeof(struct file *); | ||
84 | else { | ||
85 | nfds = nfds * 2; | ||
86 | if (nfds > NR_OPEN) | ||
87 | nfds = NR_OPEN; | ||
88 | } | ||
89 | } while (nfds <= nr); | ||
90 | 118 | ||
91 | error = -ENOMEM; | 119 | BUG_ON(!fdt); |
92 | new_fds = alloc_fd_array(nfds); | 120 | fdset_size = fdt->max_fdset / 8; |
93 | spin_lock(&files->file_lock); | 121 | fdarray_size = fdt->max_fds * sizeof(struct file *); |
94 | if (!new_fds) | ||
95 | goto out; | ||
96 | 122 | ||
97 | /* Copy the existing array and install the new pointer */ | 123 | if (fdt->free_files) { |
98 | 124 | /* | |
99 | if (nfds > files->max_fds) { | 125 | * The this fdtable was embedded in the files structure |
100 | struct file **old_fds; | 126 | * and the files structure itself was getting destroyed. |
101 | int i; | 127 | * It is now safe to free the files structure. |
102 | 128 | */ | |
103 | old_fds = xchg(&files->fd, new_fds); | 129 | kmem_cache_free(files_cachep, fdt->free_files); |
104 | i = xchg(&files->max_fds, nfds); | 130 | return; |
105 | 131 | } | |
106 | /* Don't copy/clear the array if we are creating a new | 132 | if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) { |
107 | fd array for fork() */ | 133 | /* |
108 | if (i) { | 134 | * The fdtable was embedded |
109 | memcpy(new_fds, old_fds, i * sizeof(struct file *)); | 135 | */ |
110 | /* clear the remainder of the array */ | 136 | return; |
111 | memset(&new_fds[i], 0, | 137 | } |
112 | (nfds-i) * sizeof(struct file *)); | 138 | if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { |
113 | 139 | kfree(fdt->open_fds); | |
114 | spin_unlock(&files->file_lock); | 140 | kfree(fdt->close_on_exec); |
115 | free_fd_array(old_fds, i); | 141 | kfree(fdt->fd); |
116 | spin_lock(&files->file_lock); | 142 | kfree(fdt); |
117 | } | ||
118 | } else { | 143 | } else { |
119 | /* Somebody expanded the array while we slept ... */ | 144 | fddef = &get_cpu_var(fdtable_defer_list); |
120 | spin_unlock(&files->file_lock); | 145 | spin_lock(&fddef->lock); |
121 | free_fd_array(new_fds, nfds); | 146 | fdt->next = fddef->next; |
122 | spin_lock(&files->file_lock); | 147 | fddef->next = fdt; |
148 | /* | ||
149 | * vmallocs are handled from the workqueue context. | ||
150 | * If the per-cpu workqueue is running, then we | ||
151 | * defer work scheduling through a timer. | ||
152 | */ | ||
153 | if (!schedule_work(&fddef->wq)) | ||
154 | mod_timer(&fddef->timer, 5); | ||
155 | spin_unlock(&fddef->lock); | ||
156 | put_cpu_var(fdtable_defer_list); | ||
123 | } | 157 | } |
124 | error = 0; | 158 | } |
125 | out: | 159 | |
126 | return error; | 160 | void free_fdtable(struct fdtable *fdt) |
161 | { | ||
162 | if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE || | ||
163 | fdt->max_fds > NR_OPEN_DEFAULT) | ||
164 | call_rcu(&fdt->rcu, free_fdtable_rcu); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Expand the fdset in the files_struct. Called with the files spinlock | ||
169 | * held for write. | ||
170 | */ | ||
171 | static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) | ||
172 | { | ||
173 | int i; | ||
174 | int count; | ||
175 | |||
176 | BUG_ON(nfdt->max_fdset < fdt->max_fdset); | ||
177 | BUG_ON(nfdt->max_fds < fdt->max_fds); | ||
178 | /* Copy the existing tables and install the new pointers */ | ||
179 | |||
180 | i = fdt->max_fdset / (sizeof(unsigned long) * 8); | ||
181 | count = (nfdt->max_fdset - fdt->max_fdset) / 8; | ||
182 | |||
183 | /* | ||
184 | * Don't copy the entire array if the current fdset is | ||
185 | * not yet initialised. | ||
186 | */ | ||
187 | if (i) { | ||
188 | memcpy (nfdt->open_fds, fdt->open_fds, | ||
189 | fdt->max_fdset/8); | ||
190 | memcpy (nfdt->close_on_exec, fdt->close_on_exec, | ||
191 | fdt->max_fdset/8); | ||
192 | memset (&nfdt->open_fds->fds_bits[i], 0, count); | ||
193 | memset (&nfdt->close_on_exec->fds_bits[i], 0, count); | ||
194 | } | ||
195 | |||
196 | /* Don't copy/clear the array if we are creating a new | ||
197 | fd array for fork() */ | ||
198 | if (fdt->max_fds) { | ||
199 | memcpy(nfdt->fd, fdt->fd, | ||
200 | fdt->max_fds * sizeof(struct file *)); | ||
201 | /* clear the remainder of the array */ | ||
202 | memset(&nfdt->fd[fdt->max_fds], 0, | ||
203 | (nfdt->max_fds - fdt->max_fds) * | ||
204 | sizeof(struct file *)); | ||
205 | } | ||
206 | nfdt->next_fd = fdt->next_fd; | ||
127 | } | 207 | } |
128 | 208 | ||
129 | /* | 209 | /* |
@@ -154,26 +234,21 @@ void free_fdset(fd_set *array, int num) | |||
154 | vfree(array); | 234 | vfree(array); |
155 | } | 235 | } |
156 | 236 | ||
157 | /* | 237 | static struct fdtable *alloc_fdtable(int nr) |
158 | * Expand the fdset in the files_struct. Called with the files spinlock | ||
159 | * held for write. | ||
160 | */ | ||
161 | static int expand_fdset(struct files_struct *files, int nr) | ||
162 | __releases(file->file_lock) | ||
163 | __acquires(file->file_lock) | ||
164 | { | 238 | { |
165 | fd_set *new_openset = NULL, *new_execset = NULL; | 239 | struct fdtable *fdt = NULL; |
166 | int error, nfds = 0; | 240 | int nfds = 0; |
167 | 241 | fd_set *new_openset = NULL, *new_execset = NULL; | |
168 | error = -EMFILE; | 242 | struct file **new_fds; |
169 | if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN) | ||
170 | goto out; | ||
171 | 243 | ||
172 | nfds = files->max_fdset; | 244 | fdt = kmalloc(sizeof(*fdt), GFP_KERNEL); |
173 | spin_unlock(&files->file_lock); | 245 | if (!fdt) |
246 | goto out; | ||
247 | memset(fdt, 0, sizeof(*fdt)); | ||
174 | 248 | ||
175 | /* Expand to the max in easy steps */ | 249 | nfds = __FD_SETSIZE; |
176 | do { | 250 | /* Expand to the max in easy steps */ |
251 | do { | ||
177 | if (nfds < (PAGE_SIZE * 8)) | 252 | if (nfds < (PAGE_SIZE * 8)) |
178 | nfds = PAGE_SIZE * 8; | 253 | nfds = PAGE_SIZE * 8; |
179 | else { | 254 | else { |
@@ -183,49 +258,88 @@ static int expand_fdset(struct files_struct *files, int nr) | |||
183 | } | 258 | } |
184 | } while (nfds <= nr); | 259 | } while (nfds <= nr); |
185 | 260 | ||
186 | error = -ENOMEM; | 261 | new_openset = alloc_fdset(nfds); |
187 | new_openset = alloc_fdset(nfds); | 262 | new_execset = alloc_fdset(nfds); |
188 | new_execset = alloc_fdset(nfds); | 263 | if (!new_openset || !new_execset) |
189 | spin_lock(&files->file_lock); | 264 | goto out; |
190 | if (!new_openset || !new_execset) | 265 | fdt->open_fds = new_openset; |
266 | fdt->close_on_exec = new_execset; | ||
267 | fdt->max_fdset = nfds; | ||
268 | |||
269 | nfds = NR_OPEN_DEFAULT; | ||
270 | /* | ||
271 | * Expand to the max in easy steps, and keep expanding it until | ||
272 | * we have enough for the requested fd array size. | ||
273 | */ | ||
274 | do { | ||
275 | #if NR_OPEN_DEFAULT < 256 | ||
276 | if (nfds < 256) | ||
277 | nfds = 256; | ||
278 | else | ||
279 | #endif | ||
280 | if (nfds < (PAGE_SIZE / sizeof(struct file *))) | ||
281 | nfds = PAGE_SIZE / sizeof(struct file *); | ||
282 | else { | ||
283 | nfds = nfds * 2; | ||
284 | if (nfds > NR_OPEN) | ||
285 | nfds = NR_OPEN; | ||
286 | } | ||
287 | } while (nfds <= nr); | ||
288 | new_fds = alloc_fd_array(nfds); | ||
289 | if (!new_fds) | ||
290 | goto out; | ||
291 | fdt->fd = new_fds; | ||
292 | fdt->max_fds = nfds; | ||
293 | fdt->free_files = NULL; | ||
294 | return fdt; | ||
295 | out: | ||
296 | if (new_openset) | ||
297 | free_fdset(new_openset, nfds); | ||
298 | if (new_execset) | ||
299 | free_fdset(new_execset, nfds); | ||
300 | kfree(fdt); | ||
301 | return NULL; | ||
302 | } | ||
303 | |||
304 | /* | ||
305 | * Expands the file descriptor table - it will allocate a new fdtable and | ||
306 | * both fd array and fdset. It is expected to be called with the | ||
307 | * files_lock held. | ||
308 | */ | ||
309 | static int expand_fdtable(struct files_struct *files, int nr) | ||
310 | __releases(files->file_lock) | ||
311 | __acquires(files->file_lock) | ||
312 | { | ||
313 | int error = 0; | ||
314 | struct fdtable *fdt; | ||
315 | struct fdtable *nfdt = NULL; | ||
316 | |||
317 | spin_unlock(&files->file_lock); | ||
318 | nfdt = alloc_fdtable(nr); | ||
319 | if (!nfdt) { | ||
320 | error = -ENOMEM; | ||
321 | spin_lock(&files->file_lock); | ||
191 | goto out; | 322 | goto out; |
323 | } | ||
192 | 324 | ||
193 | error = 0; | 325 | spin_lock(&files->file_lock); |
194 | 326 | fdt = files_fdtable(files); | |
195 | /* Copy the existing tables and install the new pointers */ | 327 | /* |
196 | if (nfds > files->max_fdset) { | 328 | * Check again since another task may have expanded the |
197 | int i = files->max_fdset / (sizeof(unsigned long) * 8); | 329 | * fd table while we dropped the lock |
198 | int count = (nfds - files->max_fdset) / 8; | 330 | */ |
199 | 331 | if (nr >= fdt->max_fds || nr >= fdt->max_fdset) { | |
200 | /* | 332 | copy_fdtable(nfdt, fdt); |
201 | * Don't copy the entire array if the current fdset is | 333 | } else { |
202 | * not yet initialised. | 334 | /* Somebody expanded while we dropped file_lock */ |
203 | */ | ||
204 | if (i) { | ||
205 | memcpy (new_openset, files->open_fds, files->max_fdset/8); | ||
206 | memcpy (new_execset, files->close_on_exec, files->max_fdset/8); | ||
207 | memset (&new_openset->fds_bits[i], 0, count); | ||
208 | memset (&new_execset->fds_bits[i], 0, count); | ||
209 | } | ||
210 | |||
211 | nfds = xchg(&files->max_fdset, nfds); | ||
212 | new_openset = xchg(&files->open_fds, new_openset); | ||
213 | new_execset = xchg(&files->close_on_exec, new_execset); | ||
214 | spin_unlock(&files->file_lock); | 335 | spin_unlock(&files->file_lock); |
215 | free_fdset (new_openset, nfds); | 336 | __free_fdtable(nfdt); |
216 | free_fdset (new_execset, nfds); | ||
217 | spin_lock(&files->file_lock); | 337 | spin_lock(&files->file_lock); |
218 | return 0; | 338 | goto out; |
219 | } | 339 | } |
220 | /* Somebody expanded the array while we slept ... */ | 340 | rcu_assign_pointer(files->fdt, nfdt); |
221 | 341 | free_fdtable(fdt); | |
222 | out: | 342 | out: |
223 | spin_unlock(&files->file_lock); | ||
224 | if (new_openset) | ||
225 | free_fdset(new_openset, nfds); | ||
226 | if (new_execset) | ||
227 | free_fdset(new_execset, nfds); | ||
228 | spin_lock(&files->file_lock); | ||
229 | return error; | 343 | return error; |
230 | } | 344 | } |
231 | 345 | ||
@@ -237,18 +351,39 @@ out: | |||
237 | int expand_files(struct files_struct *files, int nr) | 351 | int expand_files(struct files_struct *files, int nr) |
238 | { | 352 | { |
239 | int err, expand = 0; | 353 | int err, expand = 0; |
354 | struct fdtable *fdt; | ||
240 | 355 | ||
241 | if (nr >= files->max_fdset) { | 356 | fdt = files_fdtable(files); |
242 | expand = 1; | 357 | if (nr >= fdt->max_fdset || nr >= fdt->max_fds) { |
243 | if ((err = expand_fdset(files, nr))) | 358 | if (fdt->max_fdset >= NR_OPEN || |
359 | fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) { | ||
360 | err = -EMFILE; | ||
244 | goto out; | 361 | goto out; |
245 | } | 362 | } |
246 | if (nr >= files->max_fds) { | ||
247 | expand = 1; | 363 | expand = 1; |
248 | if ((err = expand_fd_array(files, nr))) | 364 | if ((err = expand_fdtable(files, nr))) |
249 | goto out; | 365 | goto out; |
250 | } | 366 | } |
251 | err = expand; | 367 | err = expand; |
252 | out: | 368 | out: |
253 | return err; | 369 | return err; |
254 | } | 370 | } |
371 | |||
372 | static void __devinit fdtable_defer_list_init(int cpu) | ||
373 | { | ||
374 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); | ||
375 | spin_lock_init(&fddef->lock); | ||
376 | INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef); | ||
377 | init_timer(&fddef->timer); | ||
378 | fddef->timer.data = (unsigned long)fddef; | ||
379 | fddef->timer.function = fdtable_timer; | ||
380 | fddef->next = NULL; | ||
381 | } | ||
382 | |||
383 | void __init files_defer_init(void) | ||
384 | { | ||
385 | int i; | ||
386 | /* Really early - can't use for_each_cpu */ | ||
387 | for (i = 0; i < NR_CPUS; i++) | ||
388 | fdtable_defer_list_init(i); | ||
389 | } | ||
diff --git a/fs/file_table.c b/fs/file_table.c index 1d3de78e6bc9..86ec8ae985b4 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
15 | #include <linux/security.h> | 15 | #include <linux/security.h> |
16 | #include <linux/eventpoll.h> | 16 | #include <linux/eventpoll.h> |
17 | #include <linux/rcupdate.h> | ||
17 | #include <linux/mount.h> | 18 | #include <linux/mount.h> |
18 | #include <linux/cdev.h> | 19 | #include <linux/cdev.h> |
19 | #include <linux/fsnotify.h> | 20 | #include <linux/fsnotify.h> |
@@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags) | |||
53 | spin_unlock_irqrestore(&filp_count_lock, flags); | 54 | spin_unlock_irqrestore(&filp_count_lock, flags); |
54 | } | 55 | } |
55 | 56 | ||
56 | static inline void file_free(struct file *f) | 57 | static inline void file_free_rcu(struct rcu_head *head) |
57 | { | 58 | { |
59 | struct file *f = container_of(head, struct file, f_rcuhead); | ||
58 | kmem_cache_free(filp_cachep, f); | 60 | kmem_cache_free(filp_cachep, f); |
59 | } | 61 | } |
60 | 62 | ||
63 | static inline void file_free(struct file *f) | ||
64 | { | ||
65 | call_rcu(&f->f_rcuhead, file_free_rcu); | ||
66 | } | ||
67 | |||
61 | /* Find an unused file structure and return a pointer to it. | 68 | /* Find an unused file structure and return a pointer to it. |
62 | * Returns NULL, if there are no more free file structures or | 69 | * Returns NULL, if there are no more free file structures or |
63 | * we run out of memory. | 70 | * we run out of memory. |
@@ -89,7 +96,6 @@ struct file *get_empty_filp(void) | |||
89 | rwlock_init(&f->f_owner.lock); | 96 | rwlock_init(&f->f_owner.lock); |
90 | /* f->f_version: 0 */ | 97 | /* f->f_version: 0 */ |
91 | INIT_LIST_HEAD(&f->f_list); | 98 | INIT_LIST_HEAD(&f->f_list); |
92 | f->f_maxcount = INT_MAX; | ||
93 | return f; | 99 | return f; |
94 | 100 | ||
95 | over: | 101 | over: |
@@ -111,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp); | |||
111 | 117 | ||
112 | void fastcall fput(struct file *file) | 118 | void fastcall fput(struct file *file) |
113 | { | 119 | { |
114 | if (atomic_dec_and_test(&file->f_count)) | 120 | if (rcuref_dec_and_test(&file->f_count)) |
115 | __fput(file); | 121 | __fput(file); |
116 | } | 122 | } |
117 | 123 | ||
@@ -157,11 +163,17 @@ struct file fastcall *fget(unsigned int fd) | |||
157 | struct file *file; | 163 | struct file *file; |
158 | struct files_struct *files = current->files; | 164 | struct files_struct *files = current->files; |
159 | 165 | ||
160 | spin_lock(&files->file_lock); | 166 | rcu_read_lock(); |
161 | file = fcheck_files(files, fd); | 167 | file = fcheck_files(files, fd); |
162 | if (file) | 168 | if (file) { |
163 | get_file(file); | 169 | if (!rcuref_inc_lf(&file->f_count)) { |
164 | spin_unlock(&files->file_lock); | 170 | /* File object ref couldn't be taken */ |
171 | rcu_read_unlock(); | ||
172 | return NULL; | ||
173 | } | ||
174 | } | ||
175 | rcu_read_unlock(); | ||
176 | |||
165 | return file; | 177 | return file; |
166 | } | 178 | } |
167 | 179 | ||
@@ -183,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed) | |||
183 | if (likely((atomic_read(&files->count) == 1))) { | 195 | if (likely((atomic_read(&files->count) == 1))) { |
184 | file = fcheck_files(files, fd); | 196 | file = fcheck_files(files, fd); |
185 | } else { | 197 | } else { |
186 | spin_lock(&files->file_lock); | 198 | rcu_read_lock(); |
187 | file = fcheck_files(files, fd); | 199 | file = fcheck_files(files, fd); |
188 | if (file) { | 200 | if (file) { |
189 | get_file(file); | 201 | if (rcuref_inc_lf(&file->f_count)) |
190 | *fput_needed = 1; | 202 | *fput_needed = 1; |
203 | else | ||
204 | /* Didn't get the reference, someone's freed */ | ||
205 | file = NULL; | ||
191 | } | 206 | } |
192 | spin_unlock(&files->file_lock); | 207 | rcu_read_unlock(); |
193 | } | 208 | } |
209 | |||
194 | return file; | 210 | return file; |
195 | } | 211 | } |
196 | 212 | ||
197 | 213 | ||
198 | void put_filp(struct file *file) | 214 | void put_filp(struct file *file) |
199 | { | 215 | { |
200 | if (atomic_dec_and_test(&file->f_count)) { | 216 | if (rcuref_dec_and_test(&file->f_count)) { |
201 | security_file_free(file); | 217 | security_file_free(file); |
202 | file_kill(file); | 218 | file_kill(file); |
203 | file_free(file); | 219 | file_free(file); |
@@ -258,4 +274,5 @@ void __init files_init(unsigned long mempages) | |||
258 | files_stat.max_files = n; | 274 | files_stat.max_files = n; |
259 | if (files_stat.max_files < NR_FILE) | 275 | if (files_stat.max_files < NR_FILE) |
260 | files_stat.max_files = NR_FILE; | 276 | files_stat.max_files = NR_FILE; |
277 | files_defer_init(); | ||
261 | } | 278 | } |
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 27f66d3e8a04..6aa6fbe4f8ee 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c | |||
@@ -155,7 +155,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) | |||
155 | 155 | ||
156 | sbp->s_flags |= MS_RDONLY; | 156 | sbp->s_flags |= MS_RDONLY; |
157 | 157 | ||
158 | infp = kcalloc(1, sizeof(*infp), GFP_KERNEL); | 158 | infp = kzalloc(sizeof(*infp), GFP_KERNEL); |
159 | if (!infp) { | 159 | if (!infp) { |
160 | printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n"); | 160 | printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n"); |
161 | return -ENOMEM; | 161 | return -ENOMEM; |
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile new file mode 100644 index 000000000000..c3e1f760cac9 --- /dev/null +++ b/fs/fuse/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | # | ||
2 | # Makefile for the FUSE filesystem. | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_FUSE_FS) += fuse.o | ||
6 | |||
7 | fuse-objs := dev.o dir.o file.o inode.o | ||
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c new file mode 100644 index 000000000000..d4c869c6d01b --- /dev/null +++ b/fs/fuse/dev.c | |||
@@ -0,0 +1,877 @@ | |||
1 | /* | ||
2 | FUSE: Filesystem in Userspace | ||
3 | Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> | ||
4 | |||
5 | This program can be distributed under the terms of the GNU GPL. | ||
6 | See the file COPYING. | ||
7 | */ | ||
8 | |||
9 | #include "fuse_i.h" | ||
10 | |||
11 | #include <linux/init.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/poll.h> | ||
14 | #include <linux/uio.h> | ||
15 | #include <linux/miscdevice.h> | ||
16 | #include <linux/pagemap.h> | ||
17 | #include <linux/file.h> | ||
18 | #include <linux/slab.h> | ||
19 | |||
20 | MODULE_ALIAS_MISCDEV(FUSE_MINOR); | ||
21 | |||
22 | static kmem_cache_t *fuse_req_cachep; | ||
23 | |||
24 | static inline struct fuse_conn *fuse_get_conn(struct file *file) | ||
25 | { | ||
26 | struct fuse_conn *fc; | ||
27 | spin_lock(&fuse_lock); | ||
28 | fc = file->private_data; | ||
29 | if (fc && !fc->mounted) | ||
30 | fc = NULL; | ||
31 | spin_unlock(&fuse_lock); | ||
32 | return fc; | ||
33 | } | ||
34 | |||
35 | static inline void fuse_request_init(struct fuse_req *req) | ||
36 | { | ||
37 | memset(req, 0, sizeof(*req)); | ||
38 | INIT_LIST_HEAD(&req->list); | ||
39 | init_waitqueue_head(&req->waitq); | ||
40 | atomic_set(&req->count, 1); | ||
41 | } | ||
42 | |||
43 | struct fuse_req *fuse_request_alloc(void) | ||
44 | { | ||
45 | struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL); | ||
46 | if (req) | ||
47 | fuse_request_init(req); | ||
48 | return req; | ||
49 | } | ||
50 | |||
51 | void fuse_request_free(struct fuse_req *req) | ||
52 | { | ||
53 | kmem_cache_free(fuse_req_cachep, req); | ||
54 | } | ||
55 | |||
56 | static inline void block_sigs(sigset_t *oldset) | ||
57 | { | ||
58 | sigset_t mask; | ||
59 | |||
60 | siginitsetinv(&mask, sigmask(SIGKILL)); | ||
61 | sigprocmask(SIG_BLOCK, &mask, oldset); | ||
62 | } | ||
63 | |||
64 | static inline void restore_sigs(sigset_t *oldset) | ||
65 | { | ||
66 | sigprocmask(SIG_SETMASK, oldset, NULL); | ||
67 | } | ||
68 | |||
69 | void fuse_reset_request(struct fuse_req *req) | ||
70 | { | ||
71 | int preallocated = req->preallocated; | ||
72 | BUG_ON(atomic_read(&req->count) != 1); | ||
73 | fuse_request_init(req); | ||
74 | req->preallocated = preallocated; | ||
75 | } | ||
76 | |||
77 | static void __fuse_get_request(struct fuse_req *req) | ||
78 | { | ||
79 | atomic_inc(&req->count); | ||
80 | } | ||
81 | |||
82 | /* Must be called with > 1 refcount */ | ||
83 | static void __fuse_put_request(struct fuse_req *req) | ||
84 | { | ||
85 | BUG_ON(atomic_read(&req->count) < 2); | ||
86 | atomic_dec(&req->count); | ||
87 | } | ||
88 | |||
89 | static struct fuse_req *do_get_request(struct fuse_conn *fc) | ||
90 | { | ||
91 | struct fuse_req *req; | ||
92 | |||
93 | spin_lock(&fuse_lock); | ||
94 | BUG_ON(list_empty(&fc->unused_list)); | ||
95 | req = list_entry(fc->unused_list.next, struct fuse_req, list); | ||
96 | list_del_init(&req->list); | ||
97 | spin_unlock(&fuse_lock); | ||
98 | fuse_request_init(req); | ||
99 | req->preallocated = 1; | ||
100 | req->in.h.uid = current->fsuid; | ||
101 | req->in.h.gid = current->fsgid; | ||
102 | req->in.h.pid = current->pid; | ||
103 | return req; | ||
104 | } | ||
105 | |||
106 | /* This can return NULL, but only in case it's interrupted by a SIGKILL */ | ||
107 | struct fuse_req *fuse_get_request(struct fuse_conn *fc) | ||
108 | { | ||
109 | int intr; | ||
110 | sigset_t oldset; | ||
111 | |||
112 | block_sigs(&oldset); | ||
113 | intr = down_interruptible(&fc->outstanding_sem); | ||
114 | restore_sigs(&oldset); | ||
115 | return intr ? NULL : do_get_request(fc); | ||
116 | } | ||
117 | |||
118 | static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) | ||
119 | { | ||
120 | spin_lock(&fuse_lock); | ||
121 | if (req->preallocated) | ||
122 | list_add(&req->list, &fc->unused_list); | ||
123 | else | ||
124 | fuse_request_free(req); | ||
125 | |||
126 | /* If we are in debt decrease that first */ | ||
127 | if (fc->outstanding_debt) | ||
128 | fc->outstanding_debt--; | ||
129 | else | ||
130 | up(&fc->outstanding_sem); | ||
131 | spin_unlock(&fuse_lock); | ||
132 | } | ||
133 | |||
134 | void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) | ||
135 | { | ||
136 | if (atomic_dec_and_test(&req->count)) | ||
137 | fuse_putback_request(fc, req); | ||
138 | } | ||
139 | |||
140 | void fuse_release_background(struct fuse_req *req) | ||
141 | { | ||
142 | iput(req->inode); | ||
143 | iput(req->inode2); | ||
144 | if (req->file) | ||
145 | fput(req->file); | ||
146 | spin_lock(&fuse_lock); | ||
147 | list_del(&req->bg_entry); | ||
148 | spin_unlock(&fuse_lock); | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * This function is called when a request is finished. Either a reply | ||
153 | * has arrived or it was interrupted (and not yet sent) or some error | ||
154 | * occured during communication with userspace, or the device file was | ||
155 | * closed. It decreases the referece count for the request. In case | ||
156 | * of a background request the referece to the stored objects are | ||
157 | * released. The requester thread is woken up (if still waiting), and | ||
158 | * finally the request is either freed or put on the unused_list | ||
159 | * | ||
160 | * Called with fuse_lock, unlocks it | ||
161 | */ | ||
162 | static void request_end(struct fuse_conn *fc, struct fuse_req *req) | ||
163 | { | ||
164 | int putback; | ||
165 | req->finished = 1; | ||
166 | putback = atomic_dec_and_test(&req->count); | ||
167 | spin_unlock(&fuse_lock); | ||
168 | if (req->background) { | ||
169 | down_read(&fc->sbput_sem); | ||
170 | if (fc->mounted) | ||
171 | fuse_release_background(req); | ||
172 | up_read(&fc->sbput_sem); | ||
173 | } | ||
174 | wake_up(&req->waitq); | ||
175 | if (req->in.h.opcode == FUSE_INIT) { | ||
176 | int i; | ||
177 | |||
178 | if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION) | ||
179 | fc->conn_error = 1; | ||
180 | |||
181 | /* After INIT reply is received other requests can go | ||
182 | out. So do (FUSE_MAX_OUTSTANDING - 1) number of | ||
183 | up()s on outstanding_sem. The last up() is done in | ||
184 | fuse_putback_request() */ | ||
185 | for (i = 1; i < FUSE_MAX_OUTSTANDING; i++) | ||
186 | up(&fc->outstanding_sem); | ||
187 | } | ||
188 | if (putback) | ||
189 | fuse_putback_request(fc, req); | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * Unfortunately request interruption not just solves the deadlock | ||
194 | * problem, it causes problems too. These stem from the fact, that an | ||
195 | * interrupted request is continued to be processed in userspace, | ||
196 | * while all the locks and object references (inode and file) held | ||
197 | * during the operation are released. | ||
198 | * | ||
199 | * To release the locks is exactly why there's a need to interrupt the | ||
200 | * request, so there's not a lot that can be done about this, except | ||
201 | * introduce additional locking in userspace. | ||
202 | * | ||
203 | * More important is to keep inode and file references until userspace | ||
204 | * has replied, otherwise FORGET and RELEASE could be sent while the | ||
205 | * inode/file is still used by the filesystem. | ||
206 | * | ||
207 | * For this reason the concept of "background" request is introduced. | ||
208 | * An interrupted request is backgrounded if it has been already sent | ||
209 | * to userspace. Backgrounding involves getting an extra reference to | ||
210 | * inode(s) or file used in the request, and adding the request to | ||
211 | * fc->background list. When a reply is received for a background | ||
212 | * request, the object references are released, and the request is | ||
213 | * removed from the list. If the filesystem is unmounted while there | ||
214 | * are still background requests, the list is walked and references | ||
215 | * are released as if a reply was received. | ||
216 | * | ||
217 | * There's one more use for a background request. The RELEASE message is | ||
218 | * always sent as background, since it doesn't return an error or | ||
219 | * data. | ||
220 | */ | ||
221 | static void background_request(struct fuse_conn *fc, struct fuse_req *req) | ||
222 | { | ||
223 | req->background = 1; | ||
224 | list_add(&req->bg_entry, &fc->background); | ||
225 | if (req->inode) | ||
226 | req->inode = igrab(req->inode); | ||
227 | if (req->inode2) | ||
228 | req->inode2 = igrab(req->inode2); | ||
229 | if (req->file) | ||
230 | get_file(req->file); | ||
231 | } | ||
232 | |||
233 | /* Called with fuse_lock held. Releases, and then reacquires it. */ | ||
234 | static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) | ||
235 | { | ||
236 | sigset_t oldset; | ||
237 | |||
238 | spin_unlock(&fuse_lock); | ||
239 | block_sigs(&oldset); | ||
240 | wait_event_interruptible(req->waitq, req->finished); | ||
241 | restore_sigs(&oldset); | ||
242 | spin_lock(&fuse_lock); | ||
243 | if (req->finished) | ||
244 | return; | ||
245 | |||
246 | req->out.h.error = -EINTR; | ||
247 | req->interrupted = 1; | ||
248 | if (req->locked) { | ||
249 | /* This is uninterruptible sleep, because data is | ||
250 | being copied to/from the buffers of req. During | ||
251 | locked state, there mustn't be any filesystem | ||
252 | operation (e.g. page fault), since that could lead | ||
253 | to deadlock */ | ||
254 | spin_unlock(&fuse_lock); | ||
255 | wait_event(req->waitq, !req->locked); | ||
256 | spin_lock(&fuse_lock); | ||
257 | } | ||
258 | if (!req->sent && !list_empty(&req->list)) { | ||
259 | list_del(&req->list); | ||
260 | __fuse_put_request(req); | ||
261 | } else if (!req->finished && req->sent) | ||
262 | background_request(fc, req); | ||
263 | } | ||
264 | |||
265 | static unsigned len_args(unsigned numargs, struct fuse_arg *args) | ||
266 | { | ||
267 | unsigned nbytes = 0; | ||
268 | unsigned i; | ||
269 | |||
270 | for (i = 0; i < numargs; i++) | ||
271 | nbytes += args[i].size; | ||
272 | |||
273 | return nbytes; | ||
274 | } | ||
275 | |||
276 | static void queue_request(struct fuse_conn *fc, struct fuse_req *req) | ||
277 | { | ||
278 | fc->reqctr++; | ||
279 | /* zero is special */ | ||
280 | if (fc->reqctr == 0) | ||
281 | fc->reqctr = 1; | ||
282 | req->in.h.unique = fc->reqctr; | ||
283 | req->in.h.len = sizeof(struct fuse_in_header) + | ||
284 | len_args(req->in.numargs, (struct fuse_arg *) req->in.args); | ||
285 | if (!req->preallocated) { | ||
286 | /* If request is not preallocated (either FORGET or | ||
287 | RELEASE), then still decrease outstanding_sem, so | ||
288 | user can't open infinite number of files while not | ||
289 | processing the RELEASE requests. However for | ||
290 | efficiency do it without blocking, so if down() | ||
291 | would block, just increase the debt instead */ | ||
292 | if (down_trylock(&fc->outstanding_sem)) | ||
293 | fc->outstanding_debt++; | ||
294 | } | ||
295 | list_add_tail(&req->list, &fc->pending); | ||
296 | wake_up(&fc->waitq); | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * This can only be interrupted by a SIGKILL | ||
301 | */ | ||
302 | void request_send(struct fuse_conn *fc, struct fuse_req *req) | ||
303 | { | ||
304 | req->isreply = 1; | ||
305 | spin_lock(&fuse_lock); | ||
306 | if (!fc->connected) | ||
307 | req->out.h.error = -ENOTCONN; | ||
308 | else if (fc->conn_error) | ||
309 | req->out.h.error = -ECONNREFUSED; | ||
310 | else { | ||
311 | queue_request(fc, req); | ||
312 | /* acquire extra reference, since request is still needed | ||
313 | after request_end() */ | ||
314 | __fuse_get_request(req); | ||
315 | |||
316 | request_wait_answer(fc, req); | ||
317 | } | ||
318 | spin_unlock(&fuse_lock); | ||
319 | } | ||
320 | |||
321 | static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) | ||
322 | { | ||
323 | spin_lock(&fuse_lock); | ||
324 | if (fc->connected) { | ||
325 | queue_request(fc, req); | ||
326 | spin_unlock(&fuse_lock); | ||
327 | } else { | ||
328 | req->out.h.error = -ENOTCONN; | ||
329 | request_end(fc, req); | ||
330 | } | ||
331 | } | ||
332 | |||
333 | void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req) | ||
334 | { | ||
335 | req->isreply = 0; | ||
336 | request_send_nowait(fc, req); | ||
337 | } | ||
338 | |||
339 | void request_send_background(struct fuse_conn *fc, struct fuse_req *req) | ||
340 | { | ||
341 | req->isreply = 1; | ||
342 | spin_lock(&fuse_lock); | ||
343 | background_request(fc, req); | ||
344 | spin_unlock(&fuse_lock); | ||
345 | request_send_nowait(fc, req); | ||
346 | } | ||
347 | |||
348 | void fuse_send_init(struct fuse_conn *fc) | ||
349 | { | ||
350 | /* This is called from fuse_read_super() so there's guaranteed | ||
351 | to be a request available */ | ||
352 | struct fuse_req *req = do_get_request(fc); | ||
353 | struct fuse_init_in_out *arg = &req->misc.init_in_out; | ||
354 | arg->major = FUSE_KERNEL_VERSION; | ||
355 | arg->minor = FUSE_KERNEL_MINOR_VERSION; | ||
356 | req->in.h.opcode = FUSE_INIT; | ||
357 | req->in.numargs = 1; | ||
358 | req->in.args[0].size = sizeof(*arg); | ||
359 | req->in.args[0].value = arg; | ||
360 | req->out.numargs = 1; | ||
361 | req->out.args[0].size = sizeof(*arg); | ||
362 | req->out.args[0].value = arg; | ||
363 | request_send_background(fc, req); | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * Lock the request. Up to the next unlock_request() there mustn't be | ||
368 | * anything that could cause a page-fault. If the request was already | ||
369 | * interrupted bail out. | ||
370 | */ | ||
371 | static inline int lock_request(struct fuse_req *req) | ||
372 | { | ||
373 | int err = 0; | ||
374 | if (req) { | ||
375 | spin_lock(&fuse_lock); | ||
376 | if (req->interrupted) | ||
377 | err = -ENOENT; | ||
378 | else | ||
379 | req->locked = 1; | ||
380 | spin_unlock(&fuse_lock); | ||
381 | } | ||
382 | return err; | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * Unlock request. If it was interrupted during being locked, the | ||
387 | * requester thread is currently waiting for it to be unlocked, so | ||
388 | * wake it up. | ||
389 | */ | ||
390 | static inline void unlock_request(struct fuse_req *req) | ||
391 | { | ||
392 | if (req) { | ||
393 | spin_lock(&fuse_lock); | ||
394 | req->locked = 0; | ||
395 | if (req->interrupted) | ||
396 | wake_up(&req->waitq); | ||
397 | spin_unlock(&fuse_lock); | ||
398 | } | ||
399 | } | ||
400 | |||
401 | struct fuse_copy_state { | ||
402 | int write; | ||
403 | struct fuse_req *req; | ||
404 | const struct iovec *iov; | ||
405 | unsigned long nr_segs; | ||
406 | unsigned long seglen; | ||
407 | unsigned long addr; | ||
408 | struct page *pg; | ||
409 | void *mapaddr; | ||
410 | void *buf; | ||
411 | unsigned len; | ||
412 | }; | ||
413 | |||
414 | static void fuse_copy_init(struct fuse_copy_state *cs, int write, | ||
415 | struct fuse_req *req, const struct iovec *iov, | ||
416 | unsigned long nr_segs) | ||
417 | { | ||
418 | memset(cs, 0, sizeof(*cs)); | ||
419 | cs->write = write; | ||
420 | cs->req = req; | ||
421 | cs->iov = iov; | ||
422 | cs->nr_segs = nr_segs; | ||
423 | } | ||
424 | |||
425 | /* Unmap and put previous page of userspace buffer */ | ||
426 | static inline void fuse_copy_finish(struct fuse_copy_state *cs) | ||
427 | { | ||
428 | if (cs->mapaddr) { | ||
429 | kunmap_atomic(cs->mapaddr, KM_USER0); | ||
430 | if (cs->write) { | ||
431 | flush_dcache_page(cs->pg); | ||
432 | set_page_dirty_lock(cs->pg); | ||
433 | } | ||
434 | put_page(cs->pg); | ||
435 | cs->mapaddr = NULL; | ||
436 | } | ||
437 | } | ||
438 | |||
439 | /* | ||
440 | * Get another pagefull of userspace buffer, and map it to kernel | ||
441 | * address space, and lock request | ||
442 | */ | ||
443 | static int fuse_copy_fill(struct fuse_copy_state *cs) | ||
444 | { | ||
445 | unsigned long offset; | ||
446 | int err; | ||
447 | |||
448 | unlock_request(cs->req); | ||
449 | fuse_copy_finish(cs); | ||
450 | if (!cs->seglen) { | ||
451 | BUG_ON(!cs->nr_segs); | ||
452 | cs->seglen = cs->iov[0].iov_len; | ||
453 | cs->addr = (unsigned long) cs->iov[0].iov_base; | ||
454 | cs->iov ++; | ||
455 | cs->nr_segs --; | ||
456 | } | ||
457 | down_read(¤t->mm->mmap_sem); | ||
458 | err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0, | ||
459 | &cs->pg, NULL); | ||
460 | up_read(¤t->mm->mmap_sem); | ||
461 | if (err < 0) | ||
462 | return err; | ||
463 | BUG_ON(err != 1); | ||
464 | offset = cs->addr % PAGE_SIZE; | ||
465 | cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); | ||
466 | cs->buf = cs->mapaddr + offset; | ||
467 | cs->len = min(PAGE_SIZE - offset, cs->seglen); | ||
468 | cs->seglen -= cs->len; | ||
469 | cs->addr += cs->len; | ||
470 | |||
471 | return lock_request(cs->req); | ||
472 | } | ||
473 | |||
474 | /* Do as much copy to/from userspace buffer as we can */ | ||
475 | static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val, | ||
476 | unsigned *size) | ||
477 | { | ||
478 | unsigned ncpy = min(*size, cs->len); | ||
479 | if (val) { | ||
480 | if (cs->write) | ||
481 | memcpy(cs->buf, *val, ncpy); | ||
482 | else | ||
483 | memcpy(*val, cs->buf, ncpy); | ||
484 | *val += ncpy; | ||
485 | } | ||
486 | *size -= ncpy; | ||
487 | cs->len -= ncpy; | ||
488 | cs->buf += ncpy; | ||
489 | return ncpy; | ||
490 | } | ||
491 | |||
492 | /* | ||
493 | * Copy a page in the request to/from the userspace buffer. Must be | ||
494 | * done atomically | ||
495 | */ | ||
496 | static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, | ||
497 | unsigned offset, unsigned count, int zeroing) | ||
498 | { | ||
499 | if (page && zeroing && count < PAGE_SIZE) { | ||
500 | void *mapaddr = kmap_atomic(page, KM_USER1); | ||
501 | memset(mapaddr, 0, PAGE_SIZE); | ||
502 | kunmap_atomic(mapaddr, KM_USER1); | ||
503 | } | ||
504 | while (count) { | ||
505 | int err; | ||
506 | if (!cs->len && (err = fuse_copy_fill(cs))) | ||
507 | return err; | ||
508 | if (page) { | ||
509 | void *mapaddr = kmap_atomic(page, KM_USER1); | ||
510 | void *buf = mapaddr + offset; | ||
511 | offset += fuse_copy_do(cs, &buf, &count); | ||
512 | kunmap_atomic(mapaddr, KM_USER1); | ||
513 | } else | ||
514 | offset += fuse_copy_do(cs, NULL, &count); | ||
515 | } | ||
516 | if (page && !cs->write) | ||
517 | flush_dcache_page(page); | ||
518 | return 0; | ||
519 | } | ||
520 | |||
521 | /* Copy pages in the request to/from userspace buffer */ | ||
522 | static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, | ||
523 | int zeroing) | ||
524 | { | ||
525 | unsigned i; | ||
526 | struct fuse_req *req = cs->req; | ||
527 | unsigned offset = req->page_offset; | ||
528 | unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); | ||
529 | |||
530 | for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { | ||
531 | struct page *page = req->pages[i]; | ||
532 | int err = fuse_copy_page(cs, page, offset, count, zeroing); | ||
533 | if (err) | ||
534 | return err; | ||
535 | |||
536 | nbytes -= count; | ||
537 | count = min(nbytes, (unsigned) PAGE_SIZE); | ||
538 | offset = 0; | ||
539 | } | ||
540 | return 0; | ||
541 | } | ||
542 | |||
543 | /* Copy a single argument in the request to/from userspace buffer */ | ||
544 | static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) | ||
545 | { | ||
546 | while (size) { | ||
547 | int err; | ||
548 | if (!cs->len && (err = fuse_copy_fill(cs))) | ||
549 | return err; | ||
550 | fuse_copy_do(cs, &val, &size); | ||
551 | } | ||
552 | return 0; | ||
553 | } | ||
554 | |||
555 | /* Copy request arguments to/from userspace buffer */ | ||
556 | static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, | ||
557 | unsigned argpages, struct fuse_arg *args, | ||
558 | int zeroing) | ||
559 | { | ||
560 | int err = 0; | ||
561 | unsigned i; | ||
562 | |||
563 | for (i = 0; !err && i < numargs; i++) { | ||
564 | struct fuse_arg *arg = &args[i]; | ||
565 | if (i == numargs - 1 && argpages) | ||
566 | err = fuse_copy_pages(cs, arg->size, zeroing); | ||
567 | else | ||
568 | err = fuse_copy_one(cs, arg->value, arg->size); | ||
569 | } | ||
570 | return err; | ||
571 | } | ||
572 | |||
573 | /* Wait until a request is available on the pending list */ | ||
574 | static void request_wait(struct fuse_conn *fc) | ||
575 | { | ||
576 | DECLARE_WAITQUEUE(wait, current); | ||
577 | |||
578 | add_wait_queue_exclusive(&fc->waitq, &wait); | ||
579 | while (fc->mounted && list_empty(&fc->pending)) { | ||
580 | set_current_state(TASK_INTERRUPTIBLE); | ||
581 | if (signal_pending(current)) | ||
582 | break; | ||
583 | |||
584 | spin_unlock(&fuse_lock); | ||
585 | schedule(); | ||
586 | spin_lock(&fuse_lock); | ||
587 | } | ||
588 | set_current_state(TASK_RUNNING); | ||
589 | remove_wait_queue(&fc->waitq, &wait); | ||
590 | } | ||
591 | |||
592 | /* | ||
593 | * Read a single request into the userspace filesystem's buffer. This | ||
594 | * function waits until a request is available, then removes it from | ||
595 | * the pending list and copies request data to userspace buffer. If | ||
596 | * no reply is needed (FORGET) or request has been interrupted or | ||
597 | * there was an error during the copying then it's finished by calling | ||
598 | * request_end(). Otherwise add it to the processing list, and set | ||
599 | * the 'sent' flag. | ||
600 | */ | ||
601 | static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, | ||
602 | unsigned long nr_segs, loff_t *off) | ||
603 | { | ||
604 | int err; | ||
605 | struct fuse_conn *fc; | ||
606 | struct fuse_req *req; | ||
607 | struct fuse_in *in; | ||
608 | struct fuse_copy_state cs; | ||
609 | unsigned reqsize; | ||
610 | |||
611 | spin_lock(&fuse_lock); | ||
612 | fc = file->private_data; | ||
613 | err = -EPERM; | ||
614 | if (!fc) | ||
615 | goto err_unlock; | ||
616 | request_wait(fc); | ||
617 | err = -ENODEV; | ||
618 | if (!fc->mounted) | ||
619 | goto err_unlock; | ||
620 | err = -ERESTARTSYS; | ||
621 | if (list_empty(&fc->pending)) | ||
622 | goto err_unlock; | ||
623 | |||
624 | req = list_entry(fc->pending.next, struct fuse_req, list); | ||
625 | list_del_init(&req->list); | ||
626 | spin_unlock(&fuse_lock); | ||
627 | |||
628 | in = &req->in; | ||
629 | reqsize = req->in.h.len; | ||
630 | fuse_copy_init(&cs, 1, req, iov, nr_segs); | ||
631 | err = -EINVAL; | ||
632 | if (iov_length(iov, nr_segs) >= reqsize) { | ||
633 | err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); | ||
634 | if (!err) | ||
635 | err = fuse_copy_args(&cs, in->numargs, in->argpages, | ||
636 | (struct fuse_arg *) in->args, 0); | ||
637 | } | ||
638 | fuse_copy_finish(&cs); | ||
639 | |||
640 | spin_lock(&fuse_lock); | ||
641 | req->locked = 0; | ||
642 | if (!err && req->interrupted) | ||
643 | err = -ENOENT; | ||
644 | if (err) { | ||
645 | if (!req->interrupted) | ||
646 | req->out.h.error = -EIO; | ||
647 | request_end(fc, req); | ||
648 | return err; | ||
649 | } | ||
650 | if (!req->isreply) | ||
651 | request_end(fc, req); | ||
652 | else { | ||
653 | req->sent = 1; | ||
654 | list_add_tail(&req->list, &fc->processing); | ||
655 | spin_unlock(&fuse_lock); | ||
656 | } | ||
657 | return reqsize; | ||
658 | |||
659 | err_unlock: | ||
660 | spin_unlock(&fuse_lock); | ||
661 | return err; | ||
662 | } | ||
663 | |||
664 | static ssize_t fuse_dev_read(struct file *file, char __user *buf, | ||
665 | size_t nbytes, loff_t *off) | ||
666 | { | ||
667 | struct iovec iov; | ||
668 | iov.iov_len = nbytes; | ||
669 | iov.iov_base = buf; | ||
670 | return fuse_dev_readv(file, &iov, 1, off); | ||
671 | } | ||
672 | |||
673 | /* Look up request on processing list by unique ID */ | ||
674 | static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) | ||
675 | { | ||
676 | struct list_head *entry; | ||
677 | |||
678 | list_for_each(entry, &fc->processing) { | ||
679 | struct fuse_req *req; | ||
680 | req = list_entry(entry, struct fuse_req, list); | ||
681 | if (req->in.h.unique == unique) | ||
682 | return req; | ||
683 | } | ||
684 | return NULL; | ||
685 | } | ||
686 | |||
687 | static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, | ||
688 | unsigned nbytes) | ||
689 | { | ||
690 | unsigned reqsize = sizeof(struct fuse_out_header); | ||
691 | |||
692 | if (out->h.error) | ||
693 | return nbytes != reqsize ? -EINVAL : 0; | ||
694 | |||
695 | reqsize += len_args(out->numargs, out->args); | ||
696 | |||
697 | if (reqsize < nbytes || (reqsize > nbytes && !out->argvar)) | ||
698 | return -EINVAL; | ||
699 | else if (reqsize > nbytes) { | ||
700 | struct fuse_arg *lastarg = &out->args[out->numargs-1]; | ||
701 | unsigned diffsize = reqsize - nbytes; | ||
702 | if (diffsize > lastarg->size) | ||
703 | return -EINVAL; | ||
704 | lastarg->size -= diffsize; | ||
705 | } | ||
706 | return fuse_copy_args(cs, out->numargs, out->argpages, out->args, | ||
707 | out->page_zeroing); | ||
708 | } | ||
709 | |||
710 | /* | ||
711 | * Write a single reply to a request. First the header is copied from | ||
712 | * the write buffer. The request is then searched on the processing | ||
713 | * list by the unique ID found in the header. If found, then remove | ||
714 | * it from the list and copy the rest of the buffer to the request. | ||
715 | * The request is finished by calling request_end() | ||
716 | */ | ||
717 | static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, | ||
718 | unsigned long nr_segs, loff_t *off) | ||
719 | { | ||
720 | int err; | ||
721 | unsigned nbytes = iov_length(iov, nr_segs); | ||
722 | struct fuse_req *req; | ||
723 | struct fuse_out_header oh; | ||
724 | struct fuse_copy_state cs; | ||
725 | struct fuse_conn *fc = fuse_get_conn(file); | ||
726 | if (!fc) | ||
727 | return -ENODEV; | ||
728 | |||
729 | fuse_copy_init(&cs, 0, NULL, iov, nr_segs); | ||
730 | if (nbytes < sizeof(struct fuse_out_header)) | ||
731 | return -EINVAL; | ||
732 | |||
733 | err = fuse_copy_one(&cs, &oh, sizeof(oh)); | ||
734 | if (err) | ||
735 | goto err_finish; | ||
736 | err = -EINVAL; | ||
737 | if (!oh.unique || oh.error <= -1000 || oh.error > 0 || | ||
738 | oh.len != nbytes) | ||
739 | goto err_finish; | ||
740 | |||
741 | spin_lock(&fuse_lock); | ||
742 | req = request_find(fc, oh.unique); | ||
743 | err = -EINVAL; | ||
744 | if (!req) | ||
745 | goto err_unlock; | ||
746 | |||
747 | list_del_init(&req->list); | ||
748 | if (req->interrupted) { | ||
749 | request_end(fc, req); | ||
750 | fuse_copy_finish(&cs); | ||
751 | return -ENOENT; | ||
752 | } | ||
753 | req->out.h = oh; | ||
754 | req->locked = 1; | ||
755 | cs.req = req; | ||
756 | spin_unlock(&fuse_lock); | ||
757 | |||
758 | err = copy_out_args(&cs, &req->out, nbytes); | ||
759 | fuse_copy_finish(&cs); | ||
760 | |||
761 | spin_lock(&fuse_lock); | ||
762 | req->locked = 0; | ||
763 | if (!err) { | ||
764 | if (req->interrupted) | ||
765 | err = -ENOENT; | ||
766 | } else if (!req->interrupted) | ||
767 | req->out.h.error = -EIO; | ||
768 | request_end(fc, req); | ||
769 | |||
770 | return err ? err : nbytes; | ||
771 | |||
772 | err_unlock: | ||
773 | spin_unlock(&fuse_lock); | ||
774 | err_finish: | ||
775 | fuse_copy_finish(&cs); | ||
776 | return err; | ||
777 | } | ||
778 | |||
779 | static ssize_t fuse_dev_write(struct file *file, const char __user *buf, | ||
780 | size_t nbytes, loff_t *off) | ||
781 | { | ||
782 | struct iovec iov; | ||
783 | iov.iov_len = nbytes; | ||
784 | iov.iov_base = (char __user *) buf; | ||
785 | return fuse_dev_writev(file, &iov, 1, off); | ||
786 | } | ||
787 | |||
788 | static unsigned fuse_dev_poll(struct file *file, poll_table *wait) | ||
789 | { | ||
790 | struct fuse_conn *fc = fuse_get_conn(file); | ||
791 | unsigned mask = POLLOUT | POLLWRNORM; | ||
792 | |||
793 | if (!fc) | ||
794 | return -ENODEV; | ||
795 | |||
796 | poll_wait(file, &fc->waitq, wait); | ||
797 | |||
798 | spin_lock(&fuse_lock); | ||
799 | if (!list_empty(&fc->pending)) | ||
800 | mask |= POLLIN | POLLRDNORM; | ||
801 | spin_unlock(&fuse_lock); | ||
802 | |||
803 | return mask; | ||
804 | } | ||
805 | |||
806 | /* Abort all requests on the given list (pending or processing) */ | ||
807 | static void end_requests(struct fuse_conn *fc, struct list_head *head) | ||
808 | { | ||
809 | while (!list_empty(head)) { | ||
810 | struct fuse_req *req; | ||
811 | req = list_entry(head->next, struct fuse_req, list); | ||
812 | list_del_init(&req->list); | ||
813 | req->out.h.error = -ECONNABORTED; | ||
814 | request_end(fc, req); | ||
815 | spin_lock(&fuse_lock); | ||
816 | } | ||
817 | } | ||
818 | |||
819 | static int fuse_dev_release(struct inode *inode, struct file *file) | ||
820 | { | ||
821 | struct fuse_conn *fc; | ||
822 | |||
823 | spin_lock(&fuse_lock); | ||
824 | fc = file->private_data; | ||
825 | if (fc) { | ||
826 | fc->connected = 0; | ||
827 | end_requests(fc, &fc->pending); | ||
828 | end_requests(fc, &fc->processing); | ||
829 | fuse_release_conn(fc); | ||
830 | } | ||
831 | spin_unlock(&fuse_lock); | ||
832 | return 0; | ||
833 | } | ||
834 | |||
835 | struct file_operations fuse_dev_operations = { | ||
836 | .owner = THIS_MODULE, | ||
837 | .llseek = no_llseek, | ||
838 | .read = fuse_dev_read, | ||
839 | .readv = fuse_dev_readv, | ||
840 | .write = fuse_dev_write, | ||
841 | .writev = fuse_dev_writev, | ||
842 | .poll = fuse_dev_poll, | ||
843 | .release = fuse_dev_release, | ||
844 | }; | ||
845 | |||
846 | static struct miscdevice fuse_miscdevice = { | ||
847 | .minor = FUSE_MINOR, | ||
848 | .name = "fuse", | ||
849 | .fops = &fuse_dev_operations, | ||
850 | }; | ||
851 | |||
852 | int __init fuse_dev_init(void) | ||
853 | { | ||
854 | int err = -ENOMEM; | ||
855 | fuse_req_cachep = kmem_cache_create("fuse_request", | ||
856 | sizeof(struct fuse_req), | ||
857 | 0, 0, NULL, NULL); | ||
858 | if (!fuse_req_cachep) | ||
859 | goto out; | ||
860 | |||
861 | err = misc_register(&fuse_miscdevice); | ||
862 | if (err) | ||
863 | goto out_cache_clean; | ||
864 | |||
865 | return 0; | ||
866 | |||
867 | out_cache_clean: | ||
868 | kmem_cache_destroy(fuse_req_cachep); | ||
869 | out: | ||
870 | return err; | ||
871 | } | ||
872 | |||
873 | void fuse_dev_cleanup(void) | ||
874 | { | ||
875 | misc_deregister(&fuse_miscdevice); | ||
876 | kmem_cache_destroy(fuse_req_cachep); | ||
877 | } | ||
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c new file mode 100644 index 000000000000..e79e49b3eec7 --- /dev/null +++ b/fs/fuse/dir.c | |||
@@ -0,0 +1,982 @@ | |||
1 | /* | ||
2 | FUSE: Filesystem in Userspace | ||
3 | Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> | ||
4 | |||
5 | This program can be distributed under the terms of the GNU GPL. | ||
6 | See the file COPYING. | ||
7 | */ | ||
8 | |||
9 | #include "fuse_i.h" | ||
10 | |||
11 | #include <linux/pagemap.h> | ||
12 | #include <linux/file.h> | ||
13 | #include <linux/gfp.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/namei.h> | ||
16 | |||
17 | static inline unsigned long time_to_jiffies(unsigned long sec, | ||
18 | unsigned long nsec) | ||
19 | { | ||
20 | struct timespec ts = {sec, nsec}; | ||
21 | return jiffies + timespec_to_jiffies(&ts); | ||
22 | } | ||
23 | |||
24 | static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, | ||
25 | struct dentry *entry, | ||
26 | struct fuse_entry_out *outarg) | ||
27 | { | ||
28 | req->in.h.opcode = FUSE_LOOKUP; | ||
29 | req->in.h.nodeid = get_node_id(dir); | ||
30 | req->inode = dir; | ||
31 | req->in.numargs = 1; | ||
32 | req->in.args[0].size = entry->d_name.len + 1; | ||
33 | req->in.args[0].value = entry->d_name.name; | ||
34 | req->out.numargs = 1; | ||
35 | req->out.args[0].size = sizeof(struct fuse_entry_out); | ||
36 | req->out.args[0].value = outarg; | ||
37 | } | ||
38 | |||
39 | static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) | ||
40 | { | ||
41 | if (!entry->d_inode || is_bad_inode(entry->d_inode)) | ||
42 | return 0; | ||
43 | else if (time_after(jiffies, entry->d_time)) { | ||
44 | int err; | ||
45 | struct fuse_entry_out outarg; | ||
46 | struct inode *inode = entry->d_inode; | ||
47 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
48 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
49 | struct fuse_req *req = fuse_get_request(fc); | ||
50 | if (!req) | ||
51 | return 0; | ||
52 | |||
53 | fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg); | ||
54 | request_send(fc, req); | ||
55 | err = req->out.h.error; | ||
56 | if (!err) { | ||
57 | if (outarg.nodeid != get_node_id(inode)) { | ||
58 | fuse_send_forget(fc, req, outarg.nodeid, 1); | ||
59 | return 0; | ||
60 | } | ||
61 | fi->nlookup ++; | ||
62 | } | ||
63 | fuse_put_request(fc, req); | ||
64 | if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) | ||
65 | return 0; | ||
66 | |||
67 | fuse_change_attributes(inode, &outarg.attr); | ||
68 | entry->d_time = time_to_jiffies(outarg.entry_valid, | ||
69 | outarg.entry_valid_nsec); | ||
70 | fi->i_time = time_to_jiffies(outarg.attr_valid, | ||
71 | outarg.attr_valid_nsec); | ||
72 | } | ||
73 | return 1; | ||
74 | } | ||
75 | |||
76 | static struct dentry_operations fuse_dentry_operations = { | ||
77 | .d_revalidate = fuse_dentry_revalidate, | ||
78 | }; | ||
79 | |||
80 | static int fuse_lookup_iget(struct inode *dir, struct dentry *entry, | ||
81 | struct inode **inodep) | ||
82 | { | ||
83 | int err; | ||
84 | struct fuse_entry_out outarg; | ||
85 | struct inode *inode = NULL; | ||
86 | struct fuse_conn *fc = get_fuse_conn(dir); | ||
87 | struct fuse_req *req; | ||
88 | |||
89 | if (entry->d_name.len > FUSE_NAME_MAX) | ||
90 | return -ENAMETOOLONG; | ||
91 | |||
92 | req = fuse_get_request(fc); | ||
93 | if (!req) | ||
94 | return -EINTR; | ||
95 | |||
96 | fuse_lookup_init(req, dir, entry, &outarg); | ||
97 | request_send(fc, req); | ||
98 | err = req->out.h.error; | ||
99 | if (!err) { | ||
100 | inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, | ||
101 | &outarg.attr); | ||
102 | if (!inode) { | ||
103 | fuse_send_forget(fc, req, outarg.nodeid, 1); | ||
104 | return -ENOMEM; | ||
105 | } | ||
106 | } | ||
107 | fuse_put_request(fc, req); | ||
108 | if (err && err != -ENOENT) | ||
109 | return err; | ||
110 | |||
111 | if (inode) { | ||
112 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
113 | entry->d_time = time_to_jiffies(outarg.entry_valid, | ||
114 | outarg.entry_valid_nsec); | ||
115 | fi->i_time = time_to_jiffies(outarg.attr_valid, | ||
116 | outarg.attr_valid_nsec); | ||
117 | } | ||
118 | |||
119 | entry->d_op = &fuse_dentry_operations; | ||
120 | *inodep = inode; | ||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | void fuse_invalidate_attr(struct inode *inode) | ||
125 | { | ||
126 | get_fuse_inode(inode)->i_time = jiffies - 1; | ||
127 | } | ||
128 | |||
129 | static void fuse_invalidate_entry(struct dentry *entry) | ||
130 | { | ||
131 | d_invalidate(entry); | ||
132 | entry->d_time = jiffies - 1; | ||
133 | } | ||
134 | |||
135 | static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, | ||
136 | struct inode *dir, struct dentry *entry, | ||
137 | int mode) | ||
138 | { | ||
139 | struct fuse_entry_out outarg; | ||
140 | struct inode *inode; | ||
141 | struct fuse_inode *fi; | ||
142 | int err; | ||
143 | |||
144 | req->in.h.nodeid = get_node_id(dir); | ||
145 | req->inode = dir; | ||
146 | req->out.numargs = 1; | ||
147 | req->out.args[0].size = sizeof(outarg); | ||
148 | req->out.args[0].value = &outarg; | ||
149 | request_send(fc, req); | ||
150 | err = req->out.h.error; | ||
151 | if (err) { | ||
152 | fuse_put_request(fc, req); | ||
153 | return err; | ||
154 | } | ||
155 | inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, | ||
156 | &outarg.attr); | ||
157 | if (!inode) { | ||
158 | fuse_send_forget(fc, req, outarg.nodeid, 1); | ||
159 | return -ENOMEM; | ||
160 | } | ||
161 | fuse_put_request(fc, req); | ||
162 | |||
163 | /* Don't allow userspace to do really stupid things... */ | ||
164 | if ((inode->i_mode ^ mode) & S_IFMT) { | ||
165 | iput(inode); | ||
166 | return -EIO; | ||
167 | } | ||
168 | |||
169 | entry->d_time = time_to_jiffies(outarg.entry_valid, | ||
170 | outarg.entry_valid_nsec); | ||
171 | |||
172 | fi = get_fuse_inode(inode); | ||
173 | fi->i_time = time_to_jiffies(outarg.attr_valid, | ||
174 | outarg.attr_valid_nsec); | ||
175 | |||
176 | d_instantiate(entry, inode); | ||
177 | fuse_invalidate_attr(dir); | ||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, | ||
182 | dev_t rdev) | ||
183 | { | ||
184 | struct fuse_mknod_in inarg; | ||
185 | struct fuse_conn *fc = get_fuse_conn(dir); | ||
186 | struct fuse_req *req = fuse_get_request(fc); | ||
187 | if (!req) | ||
188 | return -EINTR; | ||
189 | |||
190 | memset(&inarg, 0, sizeof(inarg)); | ||
191 | inarg.mode = mode; | ||
192 | inarg.rdev = new_encode_dev(rdev); | ||
193 | req->in.h.opcode = FUSE_MKNOD; | ||
194 | req->in.numargs = 2; | ||
195 | req->in.args[0].size = sizeof(inarg); | ||
196 | req->in.args[0].value = &inarg; | ||
197 | req->in.args[1].size = entry->d_name.len + 1; | ||
198 | req->in.args[1].value = entry->d_name.name; | ||
199 | return create_new_entry(fc, req, dir, entry, mode); | ||
200 | } | ||
201 | |||
202 | static int fuse_create(struct inode *dir, struct dentry *entry, int mode, | ||
203 | struct nameidata *nd) | ||
204 | { | ||
205 | return fuse_mknod(dir, entry, mode, 0); | ||
206 | } | ||
207 | |||
208 | static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) | ||
209 | { | ||
210 | struct fuse_mkdir_in inarg; | ||
211 | struct fuse_conn *fc = get_fuse_conn(dir); | ||
212 | struct fuse_req *req = fuse_get_request(fc); | ||
213 | if (!req) | ||
214 | return -EINTR; | ||
215 | |||
216 | memset(&inarg, 0, sizeof(inarg)); | ||
217 | inarg.mode = mode; | ||
218 | req->in.h.opcode = FUSE_MKDIR; | ||
219 | req->in.numargs = 2; | ||
220 | req->in.args[0].size = sizeof(inarg); | ||
221 | req->in.args[0].value = &inarg; | ||
222 | req->in.args[1].size = entry->d_name.len + 1; | ||
223 | req->in.args[1].value = entry->d_name.name; | ||
224 | return create_new_entry(fc, req, dir, entry, S_IFDIR); | ||
225 | } | ||
226 | |||
227 | static int fuse_symlink(struct inode *dir, struct dentry *entry, | ||
228 | const char *link) | ||
229 | { | ||
230 | struct fuse_conn *fc = get_fuse_conn(dir); | ||
231 | unsigned len = strlen(link) + 1; | ||
232 | struct fuse_req *req; | ||
233 | |||
234 | if (len > FUSE_SYMLINK_MAX) | ||
235 | return -ENAMETOOLONG; | ||
236 | |||
237 | req = fuse_get_request(fc); | ||
238 | if (!req) | ||
239 | return -EINTR; | ||
240 | |||
241 | req->in.h.opcode = FUSE_SYMLINK; | ||
242 | req->in.numargs = 2; | ||
243 | req->in.args[0].size = entry->d_name.len + 1; | ||
244 | req->in.args[0].value = entry->d_name.name; | ||
245 | req->in.args[1].size = len; | ||
246 | req->in.args[1].value = link; | ||
247 | return create_new_entry(fc, req, dir, entry, S_IFLNK); | ||
248 | } | ||
249 | |||
250 | static int fuse_unlink(struct inode *dir, struct dentry *entry) | ||
251 | { | ||
252 | int err; | ||
253 | struct fuse_conn *fc = get_fuse_conn(dir); | ||
254 | struct fuse_req *req = fuse_get_request(fc); | ||
255 | if (!req) | ||
256 | return -EINTR; | ||
257 | |||
258 | req->in.h.opcode = FUSE_UNLINK; | ||
259 | req->in.h.nodeid = get_node_id(dir); | ||
260 | req->inode = dir; | ||
261 | req->in.numargs = 1; | ||
262 | req->in.args[0].size = entry->d_name.len + 1; | ||
263 | req->in.args[0].value = entry->d_name.name; | ||
264 | request_send(fc, req); | ||
265 | err = req->out.h.error; | ||
266 | fuse_put_request(fc, req); | ||
267 | if (!err) { | ||
268 | struct inode *inode = entry->d_inode; | ||
269 | |||
270 | /* Set nlink to zero so the inode can be cleared, if | ||
271 | the inode does have more links this will be | ||
272 | discovered at the next lookup/getattr */ | ||
273 | inode->i_nlink = 0; | ||
274 | fuse_invalidate_attr(inode); | ||
275 | fuse_invalidate_attr(dir); | ||
276 | } else if (err == -EINTR) | ||
277 | fuse_invalidate_entry(entry); | ||
278 | return err; | ||
279 | } | ||
280 | |||
281 | static int fuse_rmdir(struct inode *dir, struct dentry *entry) | ||
282 | { | ||
283 | int err; | ||
284 | struct fuse_conn *fc = get_fuse_conn(dir); | ||
285 | struct fuse_req *req = fuse_get_request(fc); | ||
286 | if (!req) | ||
287 | return -EINTR; | ||
288 | |||
289 | req->in.h.opcode = FUSE_RMDIR; | ||
290 | req->in.h.nodeid = get_node_id(dir); | ||
291 | req->inode = dir; | ||
292 | req->in.numargs = 1; | ||
293 | req->in.args[0].size = entry->d_name.len + 1; | ||
294 | req->in.args[0].value = entry->d_name.name; | ||
295 | request_send(fc, req); | ||
296 | err = req->out.h.error; | ||
297 | fuse_put_request(fc, req); | ||
298 | if (!err) { | ||
299 | entry->d_inode->i_nlink = 0; | ||
300 | fuse_invalidate_attr(dir); | ||
301 | } else if (err == -EINTR) | ||
302 | fuse_invalidate_entry(entry); | ||
303 | return err; | ||
304 | } | ||
305 | |||
306 | static int fuse_rename(struct inode *olddir, struct dentry *oldent, | ||
307 | struct inode *newdir, struct dentry *newent) | ||
308 | { | ||
309 | int err; | ||
310 | struct fuse_rename_in inarg; | ||
311 | struct fuse_conn *fc = get_fuse_conn(olddir); | ||
312 | struct fuse_req *req = fuse_get_request(fc); | ||
313 | if (!req) | ||
314 | return -EINTR; | ||
315 | |||
316 | memset(&inarg, 0, sizeof(inarg)); | ||
317 | inarg.newdir = get_node_id(newdir); | ||
318 | req->in.h.opcode = FUSE_RENAME; | ||
319 | req->in.h.nodeid = get_node_id(olddir); | ||
320 | req->inode = olddir; | ||
321 | req->inode2 = newdir; | ||
322 | req->in.numargs = 3; | ||
323 | req->in.args[0].size = sizeof(inarg); | ||
324 | req->in.args[0].value = &inarg; | ||
325 | req->in.args[1].size = oldent->d_name.len + 1; | ||
326 | req->in.args[1].value = oldent->d_name.name; | ||
327 | req->in.args[2].size = newent->d_name.len + 1; | ||
328 | req->in.args[2].value = newent->d_name.name; | ||
329 | request_send(fc, req); | ||
330 | err = req->out.h.error; | ||
331 | fuse_put_request(fc, req); | ||
332 | if (!err) { | ||
333 | fuse_invalidate_attr(olddir); | ||
334 | if (olddir != newdir) | ||
335 | fuse_invalidate_attr(newdir); | ||
336 | } else if (err == -EINTR) { | ||
337 | /* If request was interrupted, DEITY only knows if the | ||
338 | rename actually took place. If the invalidation | ||
339 | fails (e.g. some process has CWD under the renamed | ||
340 | directory), then there can be inconsistency between | ||
341 | the dcache and the real filesystem. Tough luck. */ | ||
342 | fuse_invalidate_entry(oldent); | ||
343 | if (newent->d_inode) | ||
344 | fuse_invalidate_entry(newent); | ||
345 | } | ||
346 | |||
347 | return err; | ||
348 | } | ||
349 | |||
350 | static int fuse_link(struct dentry *entry, struct inode *newdir, | ||
351 | struct dentry *newent) | ||
352 | { | ||
353 | int err; | ||
354 | struct fuse_link_in inarg; | ||
355 | struct inode *inode = entry->d_inode; | ||
356 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
357 | struct fuse_req *req = fuse_get_request(fc); | ||
358 | if (!req) | ||
359 | return -EINTR; | ||
360 | |||
361 | memset(&inarg, 0, sizeof(inarg)); | ||
362 | inarg.oldnodeid = get_node_id(inode); | ||
363 | req->in.h.opcode = FUSE_LINK; | ||
364 | req->inode2 = inode; | ||
365 | req->in.numargs = 2; | ||
366 | req->in.args[0].size = sizeof(inarg); | ||
367 | req->in.args[0].value = &inarg; | ||
368 | req->in.args[1].size = newent->d_name.len + 1; | ||
369 | req->in.args[1].value = newent->d_name.name; | ||
370 | err = create_new_entry(fc, req, newdir, newent, inode->i_mode); | ||
371 | /* Contrary to "normal" filesystems it can happen that link | ||
372 | makes two "logical" inodes point to the same "physical" | ||
373 | inode. We invalidate the attributes of the old one, so it | ||
374 | will reflect changes in the backing inode (link count, | ||
375 | etc.) | ||
376 | */ | ||
377 | if (!err || err == -EINTR) | ||
378 | fuse_invalidate_attr(inode); | ||
379 | return err; | ||
380 | } | ||
381 | |||
382 | int fuse_do_getattr(struct inode *inode) | ||
383 | { | ||
384 | int err; | ||
385 | struct fuse_attr_out arg; | ||
386 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
387 | struct fuse_req *req = fuse_get_request(fc); | ||
388 | if (!req) | ||
389 | return -EINTR; | ||
390 | |||
391 | req->in.h.opcode = FUSE_GETATTR; | ||
392 | req->in.h.nodeid = get_node_id(inode); | ||
393 | req->inode = inode; | ||
394 | req->out.numargs = 1; | ||
395 | req->out.args[0].size = sizeof(arg); | ||
396 | req->out.args[0].value = &arg; | ||
397 | request_send(fc, req); | ||
398 | err = req->out.h.error; | ||
399 | fuse_put_request(fc, req); | ||
400 | if (!err) { | ||
401 | if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) { | ||
402 | make_bad_inode(inode); | ||
403 | err = -EIO; | ||
404 | } else { | ||
405 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
406 | fuse_change_attributes(inode, &arg.attr); | ||
407 | fi->i_time = time_to_jiffies(arg.attr_valid, | ||
408 | arg.attr_valid_nsec); | ||
409 | } | ||
410 | } | ||
411 | return err; | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * Calling into a user-controlled filesystem gives the filesystem | ||
416 | * daemon ptrace-like capabilities over the requester process. This | ||
417 | * means, that the filesystem daemon is able to record the exact | ||
418 | * filesystem operations performed, and can also control the behavior | ||
419 | * of the requester process in otherwise impossible ways. For example | ||
420 | * it can delay the operation for arbitrary length of time allowing | ||
421 | * DoS against the requester. | ||
422 | * | ||
423 | * For this reason only those processes can call into the filesystem, | ||
424 | * for which the owner of the mount has ptrace privilege. This | ||
425 | * excludes processes started by other users, suid or sgid processes. | ||
426 | */ | ||
427 | static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) | ||
428 | { | ||
429 | if (fc->flags & FUSE_ALLOW_OTHER) | ||
430 | return 1; | ||
431 | |||
432 | if (task->euid == fc->user_id && | ||
433 | task->suid == fc->user_id && | ||
434 | task->uid == fc->user_id && | ||
435 | task->egid == fc->group_id && | ||
436 | task->sgid == fc->group_id && | ||
437 | task->gid == fc->group_id) | ||
438 | return 1; | ||
439 | |||
440 | return 0; | ||
441 | } | ||
442 | |||
443 | static int fuse_revalidate(struct dentry *entry) | ||
444 | { | ||
445 | struct inode *inode = entry->d_inode; | ||
446 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
447 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
448 | |||
449 | if (!fuse_allow_task(fc, current)) | ||
450 | return -EACCES; | ||
451 | if (get_node_id(inode) != FUSE_ROOT_ID && | ||
452 | time_before_eq(jiffies, fi->i_time)) | ||
453 | return 0; | ||
454 | |||
455 | return fuse_do_getattr(inode); | ||
456 | } | ||
457 | |||
458 | static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
459 | { | ||
460 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
461 | |||
462 | if (!fuse_allow_task(fc, current)) | ||
463 | return -EACCES; | ||
464 | else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { | ||
465 | int err = generic_permission(inode, mask, NULL); | ||
466 | |||
467 | /* If permission is denied, try to refresh file | ||
468 | attributes. This is also needed, because the root | ||
469 | node will at first have no permissions */ | ||
470 | if (err == -EACCES) { | ||
471 | err = fuse_do_getattr(inode); | ||
472 | if (!err) | ||
473 | err = generic_permission(inode, mask, NULL); | ||
474 | } | ||
475 | |||
476 | /* FIXME: Need some mechanism to revoke permissions: | ||
477 | currently if the filesystem suddenly changes the | ||
478 | file mode, we will not be informed about it, and | ||
479 | continue to allow access to the file/directory. | ||
480 | |||
481 | This is actually not so grave, since the user can | ||
482 | simply keep access to the file/directory anyway by | ||
483 | keeping it open... */ | ||
484 | |||
485 | return err; | ||
486 | } else { | ||
487 | int mode = inode->i_mode; | ||
488 | if ((mask & MAY_WRITE) && IS_RDONLY(inode) && | ||
489 | (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) | ||
490 | return -EROFS; | ||
491 | if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO)) | ||
492 | return -EACCES; | ||
493 | return 0; | ||
494 | } | ||
495 | } | ||
496 | |||
497 | static int parse_dirfile(char *buf, size_t nbytes, struct file *file, | ||
498 | void *dstbuf, filldir_t filldir) | ||
499 | { | ||
500 | while (nbytes >= FUSE_NAME_OFFSET) { | ||
501 | struct fuse_dirent *dirent = (struct fuse_dirent *) buf; | ||
502 | size_t reclen = FUSE_DIRENT_SIZE(dirent); | ||
503 | int over; | ||
504 | if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) | ||
505 | return -EIO; | ||
506 | if (reclen > nbytes) | ||
507 | break; | ||
508 | |||
509 | over = filldir(dstbuf, dirent->name, dirent->namelen, | ||
510 | file->f_pos, dirent->ino, dirent->type); | ||
511 | if (over) | ||
512 | break; | ||
513 | |||
514 | buf += reclen; | ||
515 | nbytes -= reclen; | ||
516 | file->f_pos = dirent->off; | ||
517 | } | ||
518 | |||
519 | return 0; | ||
520 | } | ||
521 | |||
522 | static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file, | ||
523 | struct inode *inode, loff_t pos, | ||
524 | size_t count) | ||
525 | { | ||
526 | return fuse_send_read_common(req, file, inode, pos, count, 1); | ||
527 | } | ||
528 | |||
529 | static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | ||
530 | { | ||
531 | int err; | ||
532 | size_t nbytes; | ||
533 | struct page *page; | ||
534 | struct inode *inode = file->f_dentry->d_inode; | ||
535 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
536 | struct fuse_req *req = fuse_get_request(fc); | ||
537 | if (!req) | ||
538 | return -EINTR; | ||
539 | |||
540 | page = alloc_page(GFP_KERNEL); | ||
541 | if (!page) { | ||
542 | fuse_put_request(fc, req); | ||
543 | return -ENOMEM; | ||
544 | } | ||
545 | req->num_pages = 1; | ||
546 | req->pages[0] = page; | ||
547 | nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE); | ||
548 | err = req->out.h.error; | ||
549 | fuse_put_request(fc, req); | ||
550 | if (!err) | ||
551 | err = parse_dirfile(page_address(page), nbytes, file, dstbuf, | ||
552 | filldir); | ||
553 | |||
554 | __free_page(page); | ||
555 | fuse_invalidate_attr(inode); /* atime changed */ | ||
556 | return err; | ||
557 | } | ||
558 | |||
559 | static char *read_link(struct dentry *dentry) | ||
560 | { | ||
561 | struct inode *inode = dentry->d_inode; | ||
562 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
563 | struct fuse_req *req = fuse_get_request(fc); | ||
564 | char *link; | ||
565 | |||
566 | if (!req) | ||
567 | return ERR_PTR(-EINTR); | ||
568 | |||
569 | link = (char *) __get_free_page(GFP_KERNEL); | ||
570 | if (!link) { | ||
571 | link = ERR_PTR(-ENOMEM); | ||
572 | goto out; | ||
573 | } | ||
574 | req->in.h.opcode = FUSE_READLINK; | ||
575 | req->in.h.nodeid = get_node_id(inode); | ||
576 | req->inode = inode; | ||
577 | req->out.argvar = 1; | ||
578 | req->out.numargs = 1; | ||
579 | req->out.args[0].size = PAGE_SIZE - 1; | ||
580 | req->out.args[0].value = link; | ||
581 | request_send(fc, req); | ||
582 | if (req->out.h.error) { | ||
583 | free_page((unsigned long) link); | ||
584 | link = ERR_PTR(req->out.h.error); | ||
585 | } else | ||
586 | link[req->out.args[0].size] = '\0'; | ||
587 | out: | ||
588 | fuse_put_request(fc, req); | ||
589 | fuse_invalidate_attr(inode); /* atime changed */ | ||
590 | return link; | ||
591 | } | ||
592 | |||
593 | static void free_link(char *link) | ||
594 | { | ||
595 | if (!IS_ERR(link)) | ||
596 | free_page((unsigned long) link); | ||
597 | } | ||
598 | |||
599 | static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
600 | { | ||
601 | nd_set_link(nd, read_link(dentry)); | ||
602 | return NULL; | ||
603 | } | ||
604 | |||
605 | static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c) | ||
606 | { | ||
607 | free_link(nd_get_link(nd)); | ||
608 | } | ||
609 | |||
610 | static int fuse_dir_open(struct inode *inode, struct file *file) | ||
611 | { | ||
612 | return fuse_open_common(inode, file, 1); | ||
613 | } | ||
614 | |||
615 | static int fuse_dir_release(struct inode *inode, struct file *file) | ||
616 | { | ||
617 | return fuse_release_common(inode, file, 1); | ||
618 | } | ||
619 | |||
620 | static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) | ||
621 | { | ||
622 | /* nfsd can call this with no file */ | ||
623 | return file ? fuse_fsync_common(file, de, datasync, 1) : 0; | ||
624 | } | ||
625 | |||
626 | static unsigned iattr_to_fattr(struct iattr *iattr, struct fuse_attr *fattr) | ||
627 | { | ||
628 | unsigned ivalid = iattr->ia_valid; | ||
629 | unsigned fvalid = 0; | ||
630 | |||
631 | memset(fattr, 0, sizeof(*fattr)); | ||
632 | |||
633 | if (ivalid & ATTR_MODE) | ||
634 | fvalid |= FATTR_MODE, fattr->mode = iattr->ia_mode; | ||
635 | if (ivalid & ATTR_UID) | ||
636 | fvalid |= FATTR_UID, fattr->uid = iattr->ia_uid; | ||
637 | if (ivalid & ATTR_GID) | ||
638 | fvalid |= FATTR_GID, fattr->gid = iattr->ia_gid; | ||
639 | if (ivalid & ATTR_SIZE) | ||
640 | fvalid |= FATTR_SIZE, fattr->size = iattr->ia_size; | ||
641 | /* You can only _set_ these together (they may change by themselves) */ | ||
642 | if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) { | ||
643 | fvalid |= FATTR_ATIME | FATTR_MTIME; | ||
644 | fattr->atime = iattr->ia_atime.tv_sec; | ||
645 | fattr->mtime = iattr->ia_mtime.tv_sec; | ||
646 | } | ||
647 | |||
648 | return fvalid; | ||
649 | } | ||
650 | |||
651 | static int fuse_setattr(struct dentry *entry, struct iattr *attr) | ||
652 | { | ||
653 | struct inode *inode = entry->d_inode; | ||
654 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
655 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
656 | struct fuse_req *req; | ||
657 | struct fuse_setattr_in inarg; | ||
658 | struct fuse_attr_out outarg; | ||
659 | int err; | ||
660 | int is_truncate = 0; | ||
661 | |||
662 | if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { | ||
663 | err = inode_change_ok(inode, attr); | ||
664 | if (err) | ||
665 | return err; | ||
666 | } | ||
667 | |||
668 | if (attr->ia_valid & ATTR_SIZE) { | ||
669 | unsigned long limit; | ||
670 | is_truncate = 1; | ||
671 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
672 | if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) { | ||
673 | send_sig(SIGXFSZ, current, 0); | ||
674 | return -EFBIG; | ||
675 | } | ||
676 | } | ||
677 | |||
678 | req = fuse_get_request(fc); | ||
679 | if (!req) | ||
680 | return -EINTR; | ||
681 | |||
682 | memset(&inarg, 0, sizeof(inarg)); | ||
683 | inarg.valid = iattr_to_fattr(attr, &inarg.attr); | ||
684 | req->in.h.opcode = FUSE_SETATTR; | ||
685 | req->in.h.nodeid = get_node_id(inode); | ||
686 | req->inode = inode; | ||
687 | req->in.numargs = 1; | ||
688 | req->in.args[0].size = sizeof(inarg); | ||
689 | req->in.args[0].value = &inarg; | ||
690 | req->out.numargs = 1; | ||
691 | req->out.args[0].size = sizeof(outarg); | ||
692 | req->out.args[0].value = &outarg; | ||
693 | request_send(fc, req); | ||
694 | err = req->out.h.error; | ||
695 | fuse_put_request(fc, req); | ||
696 | if (!err) { | ||
697 | if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { | ||
698 | make_bad_inode(inode); | ||
699 | err = -EIO; | ||
700 | } else { | ||
701 | if (is_truncate) { | ||
702 | loff_t origsize = i_size_read(inode); | ||
703 | i_size_write(inode, outarg.attr.size); | ||
704 | if (origsize > outarg.attr.size) | ||
705 | vmtruncate(inode, outarg.attr.size); | ||
706 | } | ||
707 | fuse_change_attributes(inode, &outarg.attr); | ||
708 | fi->i_time = time_to_jiffies(outarg.attr_valid, | ||
709 | outarg.attr_valid_nsec); | ||
710 | } | ||
711 | } else if (err == -EINTR) | ||
712 | fuse_invalidate_attr(inode); | ||
713 | |||
714 | return err; | ||
715 | } | ||
716 | |||
717 | static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, | ||
718 | struct kstat *stat) | ||
719 | { | ||
720 | struct inode *inode = entry->d_inode; | ||
721 | int err = fuse_revalidate(entry); | ||
722 | if (!err) | ||
723 | generic_fillattr(inode, stat); | ||
724 | |||
725 | return err; | ||
726 | } | ||
727 | |||
728 | static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, | ||
729 | struct nameidata *nd) | ||
730 | { | ||
731 | struct inode *inode; | ||
732 | int err = fuse_lookup_iget(dir, entry, &inode); | ||
733 | if (err) | ||
734 | return ERR_PTR(err); | ||
735 | if (inode && S_ISDIR(inode->i_mode)) { | ||
736 | /* Don't allow creating an alias to a directory */ | ||
737 | struct dentry *alias = d_find_alias(inode); | ||
738 | if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) { | ||
739 | dput(alias); | ||
740 | iput(inode); | ||
741 | return ERR_PTR(-EIO); | ||
742 | } | ||
743 | } | ||
744 | return d_splice_alias(inode, entry); | ||
745 | } | ||
746 | |||
747 | static int fuse_setxattr(struct dentry *entry, const char *name, | ||
748 | const void *value, size_t size, int flags) | ||
749 | { | ||
750 | struct inode *inode = entry->d_inode; | ||
751 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
752 | struct fuse_req *req; | ||
753 | struct fuse_setxattr_in inarg; | ||
754 | int err; | ||
755 | |||
756 | if (size > FUSE_XATTR_SIZE_MAX) | ||
757 | return -E2BIG; | ||
758 | |||
759 | if (fc->no_setxattr) | ||
760 | return -EOPNOTSUPP; | ||
761 | |||
762 | req = fuse_get_request(fc); | ||
763 | if (!req) | ||
764 | return -EINTR; | ||
765 | |||
766 | memset(&inarg, 0, sizeof(inarg)); | ||
767 | inarg.size = size; | ||
768 | inarg.flags = flags; | ||
769 | req->in.h.opcode = FUSE_SETXATTR; | ||
770 | req->in.h.nodeid = get_node_id(inode); | ||
771 | req->inode = inode; | ||
772 | req->in.numargs = 3; | ||
773 | req->in.args[0].size = sizeof(inarg); | ||
774 | req->in.args[0].value = &inarg; | ||
775 | req->in.args[1].size = strlen(name) + 1; | ||
776 | req->in.args[1].value = name; | ||
777 | req->in.args[2].size = size; | ||
778 | req->in.args[2].value = value; | ||
779 | request_send(fc, req); | ||
780 | err = req->out.h.error; | ||
781 | fuse_put_request(fc, req); | ||
782 | if (err == -ENOSYS) { | ||
783 | fc->no_setxattr = 1; | ||
784 | err = -EOPNOTSUPP; | ||
785 | } | ||
786 | return err; | ||
787 | } | ||
788 | |||
789 | static ssize_t fuse_getxattr(struct dentry *entry, const char *name, | ||
790 | void *value, size_t size) | ||
791 | { | ||
792 | struct inode *inode = entry->d_inode; | ||
793 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
794 | struct fuse_req *req; | ||
795 | struct fuse_getxattr_in inarg; | ||
796 | struct fuse_getxattr_out outarg; | ||
797 | ssize_t ret; | ||
798 | |||
799 | if (fc->no_getxattr) | ||
800 | return -EOPNOTSUPP; | ||
801 | |||
802 | req = fuse_get_request(fc); | ||
803 | if (!req) | ||
804 | return -EINTR; | ||
805 | |||
806 | memset(&inarg, 0, sizeof(inarg)); | ||
807 | inarg.size = size; | ||
808 | req->in.h.opcode = FUSE_GETXATTR; | ||
809 | req->in.h.nodeid = get_node_id(inode); | ||
810 | req->inode = inode; | ||
811 | req->in.numargs = 2; | ||
812 | req->in.args[0].size = sizeof(inarg); | ||
813 | req->in.args[0].value = &inarg; | ||
814 | req->in.args[1].size = strlen(name) + 1; | ||
815 | req->in.args[1].value = name; | ||
816 | /* This is really two different operations rolled into one */ | ||
817 | req->out.numargs = 1; | ||
818 | if (size) { | ||
819 | req->out.argvar = 1; | ||
820 | req->out.args[0].size = size; | ||
821 | req->out.args[0].value = value; | ||
822 | } else { | ||
823 | req->out.args[0].size = sizeof(outarg); | ||
824 | req->out.args[0].value = &outarg; | ||
825 | } | ||
826 | request_send(fc, req); | ||
827 | ret = req->out.h.error; | ||
828 | if (!ret) | ||
829 | ret = size ? req->out.args[0].size : outarg.size; | ||
830 | else { | ||
831 | if (ret == -ENOSYS) { | ||
832 | fc->no_getxattr = 1; | ||
833 | ret = -EOPNOTSUPP; | ||
834 | } | ||
835 | } | ||
836 | fuse_put_request(fc, req); | ||
837 | return ret; | ||
838 | } | ||
839 | |||
840 | static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) | ||
841 | { | ||
842 | struct inode *inode = entry->d_inode; | ||
843 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
844 | struct fuse_req *req; | ||
845 | struct fuse_getxattr_in inarg; | ||
846 | struct fuse_getxattr_out outarg; | ||
847 | ssize_t ret; | ||
848 | |||
849 | if (fc->no_listxattr) | ||
850 | return -EOPNOTSUPP; | ||
851 | |||
852 | req = fuse_get_request(fc); | ||
853 | if (!req) | ||
854 | return -EINTR; | ||
855 | |||
856 | memset(&inarg, 0, sizeof(inarg)); | ||
857 | inarg.size = size; | ||
858 | req->in.h.opcode = FUSE_LISTXATTR; | ||
859 | req->in.h.nodeid = get_node_id(inode); | ||
860 | req->inode = inode; | ||
861 | req->in.numargs = 1; | ||
862 | req->in.args[0].size = sizeof(inarg); | ||
863 | req->in.args[0].value = &inarg; | ||
864 | /* This is really two different operations rolled into one */ | ||
865 | req->out.numargs = 1; | ||
866 | if (size) { | ||
867 | req->out.argvar = 1; | ||
868 | req->out.args[0].size = size; | ||
869 | req->out.args[0].value = list; | ||
870 | } else { | ||
871 | req->out.args[0].size = sizeof(outarg); | ||
872 | req->out.args[0].value = &outarg; | ||
873 | } | ||
874 | request_send(fc, req); | ||
875 | ret = req->out.h.error; | ||
876 | if (!ret) | ||
877 | ret = size ? req->out.args[0].size : outarg.size; | ||
878 | else { | ||
879 | if (ret == -ENOSYS) { | ||
880 | fc->no_listxattr = 1; | ||
881 | ret = -EOPNOTSUPP; | ||
882 | } | ||
883 | } | ||
884 | fuse_put_request(fc, req); | ||
885 | return ret; | ||
886 | } | ||
887 | |||
888 | static int fuse_removexattr(struct dentry *entry, const char *name) | ||
889 | { | ||
890 | struct inode *inode = entry->d_inode; | ||
891 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
892 | struct fuse_req *req; | ||
893 | int err; | ||
894 | |||
895 | if (fc->no_removexattr) | ||
896 | return -EOPNOTSUPP; | ||
897 | |||
898 | req = fuse_get_request(fc); | ||
899 | if (!req) | ||
900 | return -EINTR; | ||
901 | |||
902 | req->in.h.opcode = FUSE_REMOVEXATTR; | ||
903 | req->in.h.nodeid = get_node_id(inode); | ||
904 | req->inode = inode; | ||
905 | req->in.numargs = 1; | ||
906 | req->in.args[0].size = strlen(name) + 1; | ||
907 | req->in.args[0].value = name; | ||
908 | request_send(fc, req); | ||
909 | err = req->out.h.error; | ||
910 | fuse_put_request(fc, req); | ||
911 | if (err == -ENOSYS) { | ||
912 | fc->no_removexattr = 1; | ||
913 | err = -EOPNOTSUPP; | ||
914 | } | ||
915 | return err; | ||
916 | } | ||
917 | |||
918 | static struct inode_operations fuse_dir_inode_operations = { | ||
919 | .lookup = fuse_lookup, | ||
920 | .mkdir = fuse_mkdir, | ||
921 | .symlink = fuse_symlink, | ||
922 | .unlink = fuse_unlink, | ||
923 | .rmdir = fuse_rmdir, | ||
924 | .rename = fuse_rename, | ||
925 | .link = fuse_link, | ||
926 | .setattr = fuse_setattr, | ||
927 | .create = fuse_create, | ||
928 | .mknod = fuse_mknod, | ||
929 | .permission = fuse_permission, | ||
930 | .getattr = fuse_getattr, | ||
931 | .setxattr = fuse_setxattr, | ||
932 | .getxattr = fuse_getxattr, | ||
933 | .listxattr = fuse_listxattr, | ||
934 | .removexattr = fuse_removexattr, | ||
935 | }; | ||
936 | |||
937 | static struct file_operations fuse_dir_operations = { | ||
938 | .llseek = generic_file_llseek, | ||
939 | .read = generic_read_dir, | ||
940 | .readdir = fuse_readdir, | ||
941 | .open = fuse_dir_open, | ||
942 | .release = fuse_dir_release, | ||
943 | .fsync = fuse_dir_fsync, | ||
944 | }; | ||
945 | |||
946 | static struct inode_operations fuse_common_inode_operations = { | ||
947 | .setattr = fuse_setattr, | ||
948 | .permission = fuse_permission, | ||
949 | .getattr = fuse_getattr, | ||
950 | .setxattr = fuse_setxattr, | ||
951 | .getxattr = fuse_getxattr, | ||
952 | .listxattr = fuse_listxattr, | ||
953 | .removexattr = fuse_removexattr, | ||
954 | }; | ||
955 | |||
956 | static struct inode_operations fuse_symlink_inode_operations = { | ||
957 | .setattr = fuse_setattr, | ||
958 | .follow_link = fuse_follow_link, | ||
959 | .put_link = fuse_put_link, | ||
960 | .readlink = generic_readlink, | ||
961 | .getattr = fuse_getattr, | ||
962 | .setxattr = fuse_setxattr, | ||
963 | .getxattr = fuse_getxattr, | ||
964 | .listxattr = fuse_listxattr, | ||
965 | .removexattr = fuse_removexattr, | ||
966 | }; | ||
967 | |||
968 | void fuse_init_common(struct inode *inode) | ||
969 | { | ||
970 | inode->i_op = &fuse_common_inode_operations; | ||
971 | } | ||
972 | |||
973 | void fuse_init_dir(struct inode *inode) | ||
974 | { | ||
975 | inode->i_op = &fuse_dir_inode_operations; | ||
976 | inode->i_fop = &fuse_dir_operations; | ||
977 | } | ||
978 | |||
979 | void fuse_init_symlink(struct inode *inode) | ||
980 | { | ||
981 | inode->i_op = &fuse_symlink_inode_operations; | ||
982 | } | ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c new file mode 100644 index 000000000000..6454022b0536 --- /dev/null +++ b/fs/fuse/file.c | |||
@@ -0,0 +1,555 @@ | |||
1 | /* | ||
2 | FUSE: Filesystem in Userspace | ||
3 | Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> | ||
4 | |||
5 | This program can be distributed under the terms of the GNU GPL. | ||
6 | See the file COPYING. | ||
7 | */ | ||
8 | |||
9 | #include "fuse_i.h" | ||
10 | |||
11 | #include <linux/pagemap.h> | ||
12 | #include <linux/slab.h> | ||
13 | #include <linux/kernel.h> | ||
14 | |||
15 | static struct file_operations fuse_direct_io_file_operations; | ||
16 | |||
17 | int fuse_open_common(struct inode *inode, struct file *file, int isdir) | ||
18 | { | ||
19 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
20 | struct fuse_req *req; | ||
21 | struct fuse_open_in inarg; | ||
22 | struct fuse_open_out outarg; | ||
23 | struct fuse_file *ff; | ||
24 | int err; | ||
25 | |||
26 | err = generic_file_open(inode, file); | ||
27 | if (err) | ||
28 | return err; | ||
29 | |||
30 | /* If opening the root node, no lookup has been performed on | ||
31 | it, so the attributes must be refreshed */ | ||
32 | if (get_node_id(inode) == FUSE_ROOT_ID) { | ||
33 | int err = fuse_do_getattr(inode); | ||
34 | if (err) | ||
35 | return err; | ||
36 | } | ||
37 | |||
38 | req = fuse_get_request(fc); | ||
39 | if (!req) | ||
40 | return -EINTR; | ||
41 | |||
42 | err = -ENOMEM; | ||
43 | ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); | ||
44 | if (!ff) | ||
45 | goto out_put_request; | ||
46 | |||
47 | ff->release_req = fuse_request_alloc(); | ||
48 | if (!ff->release_req) { | ||
49 | kfree(ff); | ||
50 | goto out_put_request; | ||
51 | } | ||
52 | |||
53 | memset(&inarg, 0, sizeof(inarg)); | ||
54 | inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); | ||
55 | req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; | ||
56 | req->in.h.nodeid = get_node_id(inode); | ||
57 | req->inode = inode; | ||
58 | req->in.numargs = 1; | ||
59 | req->in.args[0].size = sizeof(inarg); | ||
60 | req->in.args[0].value = &inarg; | ||
61 | req->out.numargs = 1; | ||
62 | req->out.args[0].size = sizeof(outarg); | ||
63 | req->out.args[0].value = &outarg; | ||
64 | request_send(fc, req); | ||
65 | err = req->out.h.error; | ||
66 | if (err) { | ||
67 | fuse_request_free(ff->release_req); | ||
68 | kfree(ff); | ||
69 | } else { | ||
70 | if (!isdir && (outarg.open_flags & FOPEN_DIRECT_IO)) | ||
71 | file->f_op = &fuse_direct_io_file_operations; | ||
72 | if (!(outarg.open_flags & FOPEN_KEEP_CACHE)) | ||
73 | invalidate_inode_pages(inode->i_mapping); | ||
74 | ff->fh = outarg.fh; | ||
75 | file->private_data = ff; | ||
76 | } | ||
77 | |||
78 | out_put_request: | ||
79 | fuse_put_request(fc, req); | ||
80 | return err; | ||
81 | } | ||
82 | |||
83 | int fuse_release_common(struct inode *inode, struct file *file, int isdir) | ||
84 | { | ||
85 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
86 | struct fuse_file *ff = file->private_data; | ||
87 | struct fuse_req *req = ff->release_req; | ||
88 | struct fuse_release_in *inarg = &req->misc.release_in; | ||
89 | |||
90 | inarg->fh = ff->fh; | ||
91 | inarg->flags = file->f_flags & ~O_EXCL; | ||
92 | req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; | ||
93 | req->in.h.nodeid = get_node_id(inode); | ||
94 | req->inode = inode; | ||
95 | req->in.numargs = 1; | ||
96 | req->in.args[0].size = sizeof(struct fuse_release_in); | ||
97 | req->in.args[0].value = inarg; | ||
98 | request_send_background(fc, req); | ||
99 | kfree(ff); | ||
100 | |||
101 | /* Return value is ignored by VFS */ | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | static int fuse_open(struct inode *inode, struct file *file) | ||
106 | { | ||
107 | return fuse_open_common(inode, file, 0); | ||
108 | } | ||
109 | |||
110 | static int fuse_release(struct inode *inode, struct file *file) | ||
111 | { | ||
112 | return fuse_release_common(inode, file, 0); | ||
113 | } | ||
114 | |||
115 | static int fuse_flush(struct file *file) | ||
116 | { | ||
117 | struct inode *inode = file->f_dentry->d_inode; | ||
118 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
119 | struct fuse_file *ff = file->private_data; | ||
120 | struct fuse_req *req; | ||
121 | struct fuse_flush_in inarg; | ||
122 | int err; | ||
123 | |||
124 | if (fc->no_flush) | ||
125 | return 0; | ||
126 | |||
127 | req = fuse_get_request(fc); | ||
128 | if (!req) | ||
129 | return -EINTR; | ||
130 | |||
131 | memset(&inarg, 0, sizeof(inarg)); | ||
132 | inarg.fh = ff->fh; | ||
133 | req->in.h.opcode = FUSE_FLUSH; | ||
134 | req->in.h.nodeid = get_node_id(inode); | ||
135 | req->inode = inode; | ||
136 | req->file = file; | ||
137 | req->in.numargs = 1; | ||
138 | req->in.args[0].size = sizeof(inarg); | ||
139 | req->in.args[0].value = &inarg; | ||
140 | request_send(fc, req); | ||
141 | err = req->out.h.error; | ||
142 | fuse_put_request(fc, req); | ||
143 | if (err == -ENOSYS) { | ||
144 | fc->no_flush = 1; | ||
145 | err = 0; | ||
146 | } | ||
147 | return err; | ||
148 | } | ||
149 | |||
150 | int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, | ||
151 | int isdir) | ||
152 | { | ||
153 | struct inode *inode = de->d_inode; | ||
154 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
155 | struct fuse_file *ff = file->private_data; | ||
156 | struct fuse_req *req; | ||
157 | struct fuse_fsync_in inarg; | ||
158 | int err; | ||
159 | |||
160 | if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) | ||
161 | return 0; | ||
162 | |||
163 | req = fuse_get_request(fc); | ||
164 | if (!req) | ||
165 | return -EINTR; | ||
166 | |||
167 | memset(&inarg, 0, sizeof(inarg)); | ||
168 | inarg.fh = ff->fh; | ||
169 | inarg.fsync_flags = datasync ? 1 : 0; | ||
170 | req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC; | ||
171 | req->in.h.nodeid = get_node_id(inode); | ||
172 | req->inode = inode; | ||
173 | req->file = file; | ||
174 | req->in.numargs = 1; | ||
175 | req->in.args[0].size = sizeof(inarg); | ||
176 | req->in.args[0].value = &inarg; | ||
177 | request_send(fc, req); | ||
178 | err = req->out.h.error; | ||
179 | fuse_put_request(fc, req); | ||
180 | if (err == -ENOSYS) { | ||
181 | if (isdir) | ||
182 | fc->no_fsyncdir = 1; | ||
183 | else | ||
184 | fc->no_fsync = 1; | ||
185 | err = 0; | ||
186 | } | ||
187 | return err; | ||
188 | } | ||
189 | |||
190 | static int fuse_fsync(struct file *file, struct dentry *de, int datasync) | ||
191 | { | ||
192 | return fuse_fsync_common(file, de, datasync, 0); | ||
193 | } | ||
194 | |||
195 | size_t fuse_send_read_common(struct fuse_req *req, struct file *file, | ||
196 | struct inode *inode, loff_t pos, size_t count, | ||
197 | int isdir) | ||
198 | { | ||
199 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
200 | struct fuse_file *ff = file->private_data; | ||
201 | struct fuse_read_in inarg; | ||
202 | |||
203 | memset(&inarg, 0, sizeof(struct fuse_read_in)); | ||
204 | inarg.fh = ff->fh; | ||
205 | inarg.offset = pos; | ||
206 | inarg.size = count; | ||
207 | req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ; | ||
208 | req->in.h.nodeid = get_node_id(inode); | ||
209 | req->inode = inode; | ||
210 | req->file = file; | ||
211 | req->in.numargs = 1; | ||
212 | req->in.args[0].size = sizeof(struct fuse_read_in); | ||
213 | req->in.args[0].value = &inarg; | ||
214 | req->out.argpages = 1; | ||
215 | req->out.argvar = 1; | ||
216 | req->out.numargs = 1; | ||
217 | req->out.args[0].size = count; | ||
218 | request_send(fc, req); | ||
219 | return req->out.args[0].size; | ||
220 | } | ||
221 | |||
222 | static inline size_t fuse_send_read(struct fuse_req *req, struct file *file, | ||
223 | struct inode *inode, loff_t pos, | ||
224 | size_t count) | ||
225 | { | ||
226 | return fuse_send_read_common(req, file, inode, pos, count, 0); | ||
227 | } | ||
228 | |||
229 | static int fuse_readpage(struct file *file, struct page *page) | ||
230 | { | ||
231 | struct inode *inode = page->mapping->host; | ||
232 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
233 | loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT; | ||
234 | struct fuse_req *req = fuse_get_request(fc); | ||
235 | int err = -EINTR; | ||
236 | if (!req) | ||
237 | goto out; | ||
238 | |||
239 | req->out.page_zeroing = 1; | ||
240 | req->num_pages = 1; | ||
241 | req->pages[0] = page; | ||
242 | fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE); | ||
243 | err = req->out.h.error; | ||
244 | fuse_put_request(fc, req); | ||
245 | if (!err) | ||
246 | SetPageUptodate(page); | ||
247 | fuse_invalidate_attr(inode); /* atime changed */ | ||
248 | out: | ||
249 | unlock_page(page); | ||
250 | return err; | ||
251 | } | ||
252 | |||
253 | static int fuse_send_readpages(struct fuse_req *req, struct file *file, | ||
254 | struct inode *inode) | ||
255 | { | ||
256 | loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT; | ||
257 | size_t count = req->num_pages << PAGE_CACHE_SHIFT; | ||
258 | unsigned i; | ||
259 | req->out.page_zeroing = 1; | ||
260 | fuse_send_read(req, file, inode, pos, count); | ||
261 | for (i = 0; i < req->num_pages; i++) { | ||
262 | struct page *page = req->pages[i]; | ||
263 | if (!req->out.h.error) | ||
264 | SetPageUptodate(page); | ||
265 | unlock_page(page); | ||
266 | } | ||
267 | return req->out.h.error; | ||
268 | } | ||
269 | |||
270 | struct fuse_readpages_data { | ||
271 | struct fuse_req *req; | ||
272 | struct file *file; | ||
273 | struct inode *inode; | ||
274 | }; | ||
275 | |||
276 | static int fuse_readpages_fill(void *_data, struct page *page) | ||
277 | { | ||
278 | struct fuse_readpages_data *data = _data; | ||
279 | struct fuse_req *req = data->req; | ||
280 | struct inode *inode = data->inode; | ||
281 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
282 | |||
283 | if (req->num_pages && | ||
284 | (req->num_pages == FUSE_MAX_PAGES_PER_REQ || | ||
285 | (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || | ||
286 | req->pages[req->num_pages - 1]->index + 1 != page->index)) { | ||
287 | int err = fuse_send_readpages(req, data->file, inode); | ||
288 | if (err) { | ||
289 | unlock_page(page); | ||
290 | return err; | ||
291 | } | ||
292 | fuse_reset_request(req); | ||
293 | } | ||
294 | req->pages[req->num_pages] = page; | ||
295 | req->num_pages ++; | ||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | static int fuse_readpages(struct file *file, struct address_space *mapping, | ||
300 | struct list_head *pages, unsigned nr_pages) | ||
301 | { | ||
302 | struct inode *inode = mapping->host; | ||
303 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
304 | struct fuse_readpages_data data; | ||
305 | int err; | ||
306 | data.file = file; | ||
307 | data.inode = inode; | ||
308 | data.req = fuse_get_request(fc); | ||
309 | if (!data.req) | ||
310 | return -EINTR; | ||
311 | |||
312 | err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); | ||
313 | if (!err && data.req->num_pages) | ||
314 | err = fuse_send_readpages(data.req, file, inode); | ||
315 | fuse_put_request(fc, data.req); | ||
316 | fuse_invalidate_attr(inode); /* atime changed */ | ||
317 | return err; | ||
318 | } | ||
319 | |||
320 | static size_t fuse_send_write(struct fuse_req *req, struct file *file, | ||
321 | struct inode *inode, loff_t pos, size_t count) | ||
322 | { | ||
323 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
324 | struct fuse_file *ff = file->private_data; | ||
325 | struct fuse_write_in inarg; | ||
326 | struct fuse_write_out outarg; | ||
327 | |||
328 | memset(&inarg, 0, sizeof(struct fuse_write_in)); | ||
329 | inarg.fh = ff->fh; | ||
330 | inarg.offset = pos; | ||
331 | inarg.size = count; | ||
332 | req->in.h.opcode = FUSE_WRITE; | ||
333 | req->in.h.nodeid = get_node_id(inode); | ||
334 | req->inode = inode; | ||
335 | req->file = file; | ||
336 | req->in.argpages = 1; | ||
337 | req->in.numargs = 2; | ||
338 | req->in.args[0].size = sizeof(struct fuse_write_in); | ||
339 | req->in.args[0].value = &inarg; | ||
340 | req->in.args[1].size = count; | ||
341 | req->out.numargs = 1; | ||
342 | req->out.args[0].size = sizeof(struct fuse_write_out); | ||
343 | req->out.args[0].value = &outarg; | ||
344 | request_send(fc, req); | ||
345 | return outarg.size; | ||
346 | } | ||
347 | |||
348 | static int fuse_prepare_write(struct file *file, struct page *page, | ||
349 | unsigned offset, unsigned to) | ||
350 | { | ||
351 | /* No op */ | ||
352 | return 0; | ||
353 | } | ||
354 | |||
355 | static int fuse_commit_write(struct file *file, struct page *page, | ||
356 | unsigned offset, unsigned to) | ||
357 | { | ||
358 | int err; | ||
359 | size_t nres; | ||
360 | unsigned count = to - offset; | ||
361 | struct inode *inode = page->mapping->host; | ||
362 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
363 | loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset; | ||
364 | struct fuse_req *req = fuse_get_request(fc); | ||
365 | if (!req) | ||
366 | return -EINTR; | ||
367 | |||
368 | req->num_pages = 1; | ||
369 | req->pages[0] = page; | ||
370 | req->page_offset = offset; | ||
371 | nres = fuse_send_write(req, file, inode, pos, count); | ||
372 | err = req->out.h.error; | ||
373 | fuse_put_request(fc, req); | ||
374 | if (!err && nres != count) | ||
375 | err = -EIO; | ||
376 | if (!err) { | ||
377 | pos += count; | ||
378 | if (pos > i_size_read(inode)) | ||
379 | i_size_write(inode, pos); | ||
380 | |||
381 | if (offset == 0 && to == PAGE_CACHE_SIZE) { | ||
382 | clear_page_dirty(page); | ||
383 | SetPageUptodate(page); | ||
384 | } | ||
385 | } | ||
386 | fuse_invalidate_attr(inode); | ||
387 | return err; | ||
388 | } | ||
389 | |||
390 | static void fuse_release_user_pages(struct fuse_req *req, int write) | ||
391 | { | ||
392 | unsigned i; | ||
393 | |||
394 | for (i = 0; i < req->num_pages; i++) { | ||
395 | struct page *page = req->pages[i]; | ||
396 | if (write) | ||
397 | set_page_dirty_lock(page); | ||
398 | put_page(page); | ||
399 | } | ||
400 | } | ||
401 | |||
402 | static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, | ||
403 | unsigned nbytes, int write) | ||
404 | { | ||
405 | unsigned long user_addr = (unsigned long) buf; | ||
406 | unsigned offset = user_addr & ~PAGE_MASK; | ||
407 | int npages; | ||
408 | |||
409 | /* This doesn't work with nfsd */ | ||
410 | if (!current->mm) | ||
411 | return -EPERM; | ||
412 | |||
413 | nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); | ||
414 | npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
415 | npages = min(npages, FUSE_MAX_PAGES_PER_REQ); | ||
416 | down_read(¤t->mm->mmap_sem); | ||
417 | npages = get_user_pages(current, current->mm, user_addr, npages, write, | ||
418 | 0, req->pages, NULL); | ||
419 | up_read(¤t->mm->mmap_sem); | ||
420 | if (npages < 0) | ||
421 | return npages; | ||
422 | |||
423 | req->num_pages = npages; | ||
424 | req->page_offset = offset; | ||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | static ssize_t fuse_direct_io(struct file *file, const char __user *buf, | ||
429 | size_t count, loff_t *ppos, int write) | ||
430 | { | ||
431 | struct inode *inode = file->f_dentry->d_inode; | ||
432 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
433 | size_t nmax = write ? fc->max_write : fc->max_read; | ||
434 | loff_t pos = *ppos; | ||
435 | ssize_t res = 0; | ||
436 | struct fuse_req *req = fuse_get_request(fc); | ||
437 | if (!req) | ||
438 | return -EINTR; | ||
439 | |||
440 | while (count) { | ||
441 | size_t tmp; | ||
442 | size_t nres; | ||
443 | size_t nbytes = min(count, nmax); | ||
444 | int err = fuse_get_user_pages(req, buf, nbytes, !write); | ||
445 | if (err) { | ||
446 | res = err; | ||
447 | break; | ||
448 | } | ||
449 | tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset; | ||
450 | nbytes = min(nbytes, tmp); | ||
451 | if (write) | ||
452 | nres = fuse_send_write(req, file, inode, pos, nbytes); | ||
453 | else | ||
454 | nres = fuse_send_read(req, file, inode, pos, nbytes); | ||
455 | fuse_release_user_pages(req, !write); | ||
456 | if (req->out.h.error) { | ||
457 | if (!res) | ||
458 | res = req->out.h.error; | ||
459 | break; | ||
460 | } else if (nres > nbytes) { | ||
461 | res = -EIO; | ||
462 | break; | ||
463 | } | ||
464 | count -= nres; | ||
465 | res += nres; | ||
466 | pos += nres; | ||
467 | buf += nres; | ||
468 | if (nres != nbytes) | ||
469 | break; | ||
470 | if (count) | ||
471 | fuse_reset_request(req); | ||
472 | } | ||
473 | fuse_put_request(fc, req); | ||
474 | if (res > 0) { | ||
475 | if (write && pos > i_size_read(inode)) | ||
476 | i_size_write(inode, pos); | ||
477 | *ppos = pos; | ||
478 | } | ||
479 | fuse_invalidate_attr(inode); | ||
480 | |||
481 | return res; | ||
482 | } | ||
483 | |||
484 | static ssize_t fuse_direct_read(struct file *file, char __user *buf, | ||
485 | size_t count, loff_t *ppos) | ||
486 | { | ||
487 | return fuse_direct_io(file, buf, count, ppos, 0); | ||
488 | } | ||
489 | |||
490 | static ssize_t fuse_direct_write(struct file *file, const char __user *buf, | ||
491 | size_t count, loff_t *ppos) | ||
492 | { | ||
493 | struct inode *inode = file->f_dentry->d_inode; | ||
494 | ssize_t res; | ||
495 | /* Don't allow parallel writes to the same file */ | ||
496 | down(&inode->i_sem); | ||
497 | res = fuse_direct_io(file, buf, count, ppos, 1); | ||
498 | up(&inode->i_sem); | ||
499 | return res; | ||
500 | } | ||
501 | |||
502 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
503 | { | ||
504 | if ((vma->vm_flags & VM_SHARED)) { | ||
505 | if ((vma->vm_flags & VM_WRITE)) | ||
506 | return -ENODEV; | ||
507 | else | ||
508 | vma->vm_flags &= ~VM_MAYWRITE; | ||
509 | } | ||
510 | return generic_file_mmap(file, vma); | ||
511 | } | ||
512 | |||
513 | static int fuse_set_page_dirty(struct page *page) | ||
514 | { | ||
515 | printk("fuse_set_page_dirty: should not happen\n"); | ||
516 | dump_stack(); | ||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | static struct file_operations fuse_file_operations = { | ||
521 | .llseek = generic_file_llseek, | ||
522 | .read = generic_file_read, | ||
523 | .write = generic_file_write, | ||
524 | .mmap = fuse_file_mmap, | ||
525 | .open = fuse_open, | ||
526 | .flush = fuse_flush, | ||
527 | .release = fuse_release, | ||
528 | .fsync = fuse_fsync, | ||
529 | .sendfile = generic_file_sendfile, | ||
530 | }; | ||
531 | |||
532 | static struct file_operations fuse_direct_io_file_operations = { | ||
533 | .llseek = generic_file_llseek, | ||
534 | .read = fuse_direct_read, | ||
535 | .write = fuse_direct_write, | ||
536 | .open = fuse_open, | ||
537 | .flush = fuse_flush, | ||
538 | .release = fuse_release, | ||
539 | .fsync = fuse_fsync, | ||
540 | /* no mmap and sendfile */ | ||
541 | }; | ||
542 | |||
543 | static struct address_space_operations fuse_file_aops = { | ||
544 | .readpage = fuse_readpage, | ||
545 | .prepare_write = fuse_prepare_write, | ||
546 | .commit_write = fuse_commit_write, | ||
547 | .readpages = fuse_readpages, | ||
548 | .set_page_dirty = fuse_set_page_dirty, | ||
549 | }; | ||
550 | |||
551 | void fuse_init_file_inode(struct inode *inode) | ||
552 | { | ||
553 | inode->i_fop = &fuse_file_operations; | ||
554 | inode->i_data.a_ops = &fuse_file_aops; | ||
555 | } | ||
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h new file mode 100644 index 000000000000..24d761518d86 --- /dev/null +++ b/fs/fuse/fuse_i.h | |||
@@ -0,0 +1,451 @@ | |||
1 | /* | ||
2 | FUSE: Filesystem in Userspace | ||
3 | Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> | ||
4 | |||
5 | This program can be distributed under the terms of the GNU GPL. | ||
6 | See the file COPYING. | ||
7 | */ | ||
8 | |||
9 | #include <linux/fuse.h> | ||
10 | #include <linux/fs.h> | ||
11 | #include <linux/wait.h> | ||
12 | #include <linux/list.h> | ||
13 | #include <linux/spinlock.h> | ||
14 | #include <linux/mm.h> | ||
15 | #include <linux/backing-dev.h> | ||
16 | #include <asm/semaphore.h> | ||
17 | |||
18 | /** Max number of pages that can be used in a single read request */ | ||
19 | #define FUSE_MAX_PAGES_PER_REQ 32 | ||
20 | |||
21 | /** If more requests are outstanding, then the operation will block */ | ||
22 | #define FUSE_MAX_OUTSTANDING 10 | ||
23 | |||
24 | /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem | ||
25 | module will check permissions based on the file mode. Otherwise no | ||
26 | permission checking is done in the kernel */ | ||
27 | #define FUSE_DEFAULT_PERMISSIONS (1 << 0) | ||
28 | |||
29 | /** If the FUSE_ALLOW_OTHER flag is given, then not only the user | ||
30 | doing the mount will be allowed to access the filesystem */ | ||
31 | #define FUSE_ALLOW_OTHER (1 << 1) | ||
32 | |||
33 | |||
34 | /** FUSE inode */ | ||
35 | struct fuse_inode { | ||
36 | /** Inode data */ | ||
37 | struct inode inode; | ||
38 | |||
39 | /** Unique ID, which identifies the inode between userspace | ||
40 | * and kernel */ | ||
41 | u64 nodeid; | ||
42 | |||
43 | /** Number of lookups on this inode */ | ||
44 | u64 nlookup; | ||
45 | |||
46 | /** The request used for sending the FORGET message */ | ||
47 | struct fuse_req *forget_req; | ||
48 | |||
49 | /** Time in jiffies until the file attributes are valid */ | ||
50 | unsigned long i_time; | ||
51 | }; | ||
52 | |||
53 | /** FUSE specific file data */ | ||
54 | struct fuse_file { | ||
55 | /** Request reserved for flush and release */ | ||
56 | struct fuse_req *release_req; | ||
57 | |||
58 | /** File handle used by userspace */ | ||
59 | u64 fh; | ||
60 | }; | ||
61 | |||
62 | /** One input argument of a request */ | ||
63 | struct fuse_in_arg { | ||
64 | unsigned size; | ||
65 | const void *value; | ||
66 | }; | ||
67 | |||
68 | /** The request input */ | ||
69 | struct fuse_in { | ||
70 | /** The request header */ | ||
71 | struct fuse_in_header h; | ||
72 | |||
73 | /** True if the data for the last argument is in req->pages */ | ||
74 | unsigned argpages:1; | ||
75 | |||
76 | /** Number of arguments */ | ||
77 | unsigned numargs; | ||
78 | |||
79 | /** Array of arguments */ | ||
80 | struct fuse_in_arg args[3]; | ||
81 | }; | ||
82 | |||
83 | /** One output argument of a request */ | ||
84 | struct fuse_arg { | ||
85 | unsigned size; | ||
86 | void *value; | ||
87 | }; | ||
88 | |||
89 | /** The request output */ | ||
90 | struct fuse_out { | ||
91 | /** Header returned from userspace */ | ||
92 | struct fuse_out_header h; | ||
93 | |||
94 | /** Last argument is variable length (can be shorter than | ||
95 | arg->size) */ | ||
96 | unsigned argvar:1; | ||
97 | |||
98 | /** Last argument is a list of pages to copy data to */ | ||
99 | unsigned argpages:1; | ||
100 | |||
101 | /** Zero partially or not copied pages */ | ||
102 | unsigned page_zeroing:1; | ||
103 | |||
104 | /** Number or arguments */ | ||
105 | unsigned numargs; | ||
106 | |||
107 | /** Array of arguments */ | ||
108 | struct fuse_arg args[3]; | ||
109 | }; | ||
110 | |||
111 | struct fuse_req; | ||
112 | struct fuse_conn; | ||
113 | |||
114 | /** | ||
115 | * A request to the client | ||
116 | */ | ||
117 | struct fuse_req { | ||
118 | /** This can be on either unused_list, pending or processing | ||
119 | lists in fuse_conn */ | ||
120 | struct list_head list; | ||
121 | |||
122 | /** Entry on the background list */ | ||
123 | struct list_head bg_entry; | ||
124 | |||
125 | /** refcount */ | ||
126 | atomic_t count; | ||
127 | |||
128 | /** True if the request has reply */ | ||
129 | unsigned isreply:1; | ||
130 | |||
131 | /** The request is preallocated */ | ||
132 | unsigned preallocated:1; | ||
133 | |||
134 | /** The request was interrupted */ | ||
135 | unsigned interrupted:1; | ||
136 | |||
137 | /** Request is sent in the background */ | ||
138 | unsigned background:1; | ||
139 | |||
140 | /** Data is being copied to/from the request */ | ||
141 | unsigned locked:1; | ||
142 | |||
143 | /** Request has been sent to userspace */ | ||
144 | unsigned sent:1; | ||
145 | |||
146 | /** The request is finished */ | ||
147 | unsigned finished:1; | ||
148 | |||
149 | /** The request input */ | ||
150 | struct fuse_in in; | ||
151 | |||
152 | /** The request output */ | ||
153 | struct fuse_out out; | ||
154 | |||
155 | /** Used to wake up the task waiting for completion of request*/ | ||
156 | wait_queue_head_t waitq; | ||
157 | |||
158 | /** Data for asynchronous requests */ | ||
159 | union { | ||
160 | struct fuse_forget_in forget_in; | ||
161 | struct fuse_release_in release_in; | ||
162 | struct fuse_init_in_out init_in_out; | ||
163 | } misc; | ||
164 | |||
165 | /** page vector */ | ||
166 | struct page *pages[FUSE_MAX_PAGES_PER_REQ]; | ||
167 | |||
168 | /** number of pages in vector */ | ||
169 | unsigned num_pages; | ||
170 | |||
171 | /** offset of data on first page */ | ||
172 | unsigned page_offset; | ||
173 | |||
174 | /** Inode used in the request */ | ||
175 | struct inode *inode; | ||
176 | |||
177 | /** Second inode used in the request (or NULL) */ | ||
178 | struct inode *inode2; | ||
179 | |||
180 | /** File used in the request (or NULL) */ | ||
181 | struct file *file; | ||
182 | }; | ||
183 | |||
184 | /** | ||
185 | * A Fuse connection. | ||
186 | * | ||
187 | * This structure is created, when the filesystem is mounted, and is | ||
188 | * destroyed, when the client device is closed and the filesystem is | ||
189 | * unmounted. | ||
190 | */ | ||
191 | struct fuse_conn { | ||
192 | /** Reference count */ | ||
193 | int count; | ||
194 | |||
195 | /** The user id for this mount */ | ||
196 | uid_t user_id; | ||
197 | |||
198 | /** The group id for this mount */ | ||
199 | gid_t group_id; | ||
200 | |||
201 | /** The fuse mount flags for this mount */ | ||
202 | unsigned flags; | ||
203 | |||
204 | /** Maximum read size */ | ||
205 | unsigned max_read; | ||
206 | |||
207 | /** Maximum write size */ | ||
208 | unsigned max_write; | ||
209 | |||
210 | /** Readers of the connection are waiting on this */ | ||
211 | wait_queue_head_t waitq; | ||
212 | |||
213 | /** The list of pending requests */ | ||
214 | struct list_head pending; | ||
215 | |||
216 | /** The list of requests being processed */ | ||
217 | struct list_head processing; | ||
218 | |||
219 | /** Requests put in the background (RELEASE or any other | ||
220 | interrupted request) */ | ||
221 | struct list_head background; | ||
222 | |||
223 | /** Controls the maximum number of outstanding requests */ | ||
224 | struct semaphore outstanding_sem; | ||
225 | |||
226 | /** This counts the number of outstanding requests if | ||
227 | outstanding_sem would go negative */ | ||
228 | unsigned outstanding_debt; | ||
229 | |||
230 | /** RW semaphore for exclusion with fuse_put_super() */ | ||
231 | struct rw_semaphore sbput_sem; | ||
232 | |||
233 | /** The list of unused requests */ | ||
234 | struct list_head unused_list; | ||
235 | |||
236 | /** The next unique request id */ | ||
237 | u64 reqctr; | ||
238 | |||
239 | /** Mount is active */ | ||
240 | unsigned mounted : 1; | ||
241 | |||
242 | /** Connection established */ | ||
243 | unsigned connected : 1; | ||
244 | |||
245 | /** Connection failed (version mismatch) */ | ||
246 | unsigned conn_error : 1; | ||
247 | |||
248 | /** Is fsync not implemented by fs? */ | ||
249 | unsigned no_fsync : 1; | ||
250 | |||
251 | /** Is fsyncdir not implemented by fs? */ | ||
252 | unsigned no_fsyncdir : 1; | ||
253 | |||
254 | /** Is flush not implemented by fs? */ | ||
255 | unsigned no_flush : 1; | ||
256 | |||
257 | /** Is setxattr not implemented by fs? */ | ||
258 | unsigned no_setxattr : 1; | ||
259 | |||
260 | /** Is getxattr not implemented by fs? */ | ||
261 | unsigned no_getxattr : 1; | ||
262 | |||
263 | /** Is listxattr not implemented by fs? */ | ||
264 | unsigned no_listxattr : 1; | ||
265 | |||
266 | /** Is removexattr not implemented by fs? */ | ||
267 | unsigned no_removexattr : 1; | ||
268 | |||
269 | /** Backing dev info */ | ||
270 | struct backing_dev_info bdi; | ||
271 | }; | ||
272 | |||
273 | static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb) | ||
274 | { | ||
275 | return (struct fuse_conn **) &sb->s_fs_info; | ||
276 | } | ||
277 | |||
278 | static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) | ||
279 | { | ||
280 | return *get_fuse_conn_super_p(sb); | ||
281 | } | ||
282 | |||
283 | static inline struct fuse_conn *get_fuse_conn(struct inode *inode) | ||
284 | { | ||
285 | return get_fuse_conn_super(inode->i_sb); | ||
286 | } | ||
287 | |||
288 | static inline struct fuse_inode *get_fuse_inode(struct inode *inode) | ||
289 | { | ||
290 | return container_of(inode, struct fuse_inode, inode); | ||
291 | } | ||
292 | |||
293 | static inline u64 get_node_id(struct inode *inode) | ||
294 | { | ||
295 | return get_fuse_inode(inode)->nodeid; | ||
296 | } | ||
297 | |||
298 | /** Device operations */ | ||
299 | extern struct file_operations fuse_dev_operations; | ||
300 | |||
301 | /** | ||
302 | * This is the single global spinlock which protects FUSE's structures | ||
303 | * | ||
304 | * The following data is protected by this lock: | ||
305 | * | ||
306 | * - the private_data field of the device file | ||
307 | * - the s_fs_info field of the super block | ||
308 | * - unused_list, pending, processing lists in fuse_conn | ||
309 | * - background list in fuse_conn | ||
310 | * - the unique request ID counter reqctr in fuse_conn | ||
311 | * - the sb (super_block) field in fuse_conn | ||
312 | * - the file (device file) field in fuse_conn | ||
313 | */ | ||
314 | extern spinlock_t fuse_lock; | ||
315 | |||
316 | /** | ||
317 | * Get a filled in inode | ||
318 | */ | ||
319 | struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, | ||
320 | int generation, struct fuse_attr *attr); | ||
321 | |||
322 | /** | ||
323 | * Send FORGET command | ||
324 | */ | ||
325 | void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, | ||
326 | unsigned long nodeid, u64 nlookup); | ||
327 | |||
328 | /** | ||
329 | * Send READ or READDIR request | ||
330 | */ | ||
331 | size_t fuse_send_read_common(struct fuse_req *req, struct file *file, | ||
332 | struct inode *inode, loff_t pos, size_t count, | ||
333 | int isdir); | ||
334 | |||
335 | /** | ||
336 | * Send OPEN or OPENDIR request | ||
337 | */ | ||
338 | int fuse_open_common(struct inode *inode, struct file *file, int isdir); | ||
339 | |||
340 | /** | ||
341 | * Send RELEASE or RELEASEDIR request | ||
342 | */ | ||
343 | int fuse_release_common(struct inode *inode, struct file *file, int isdir); | ||
344 | |||
345 | /** | ||
346 | * Send FSYNC or FSYNCDIR request | ||
347 | */ | ||
348 | int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, | ||
349 | int isdir); | ||
350 | |||
351 | /** | ||
352 | * Initialise file operations on a regular file | ||
353 | */ | ||
354 | void fuse_init_file_inode(struct inode *inode); | ||
355 | |||
356 | /** | ||
357 | * Initialise inode operations on regular files and special files | ||
358 | */ | ||
359 | void fuse_init_common(struct inode *inode); | ||
360 | |||
361 | /** | ||
362 | * Initialise inode and file operations on a directory | ||
363 | */ | ||
364 | void fuse_init_dir(struct inode *inode); | ||
365 | |||
366 | /** | ||
367 | * Initialise inode operations on a symlink | ||
368 | */ | ||
369 | void fuse_init_symlink(struct inode *inode); | ||
370 | |||
371 | /** | ||
372 | * Change attributes of an inode | ||
373 | */ | ||
374 | void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr); | ||
375 | |||
376 | /** | ||
377 | * Check if the connection can be released, and if yes, then free the | ||
378 | * connection structure | ||
379 | */ | ||
380 | void fuse_release_conn(struct fuse_conn *fc); | ||
381 | |||
382 | /** | ||
383 | * Initialize the client device | ||
384 | */ | ||
385 | int fuse_dev_init(void); | ||
386 | |||
387 | /** | ||
388 | * Cleanup the client device | ||
389 | */ | ||
390 | void fuse_dev_cleanup(void); | ||
391 | |||
392 | /** | ||
393 | * Allocate a request | ||
394 | */ | ||
395 | struct fuse_req *fuse_request_alloc(void); | ||
396 | |||
397 | /** | ||
398 | * Free a request | ||
399 | */ | ||
400 | void fuse_request_free(struct fuse_req *req); | ||
401 | |||
402 | /** | ||
403 | * Reinitialize a request, the preallocated flag is left unmodified | ||
404 | */ | ||
405 | void fuse_reset_request(struct fuse_req *req); | ||
406 | |||
407 | /** | ||
408 | * Reserve a preallocated request | ||
409 | */ | ||
410 | struct fuse_req *fuse_get_request(struct fuse_conn *fc); | ||
411 | |||
412 | /** | ||
413 | * Decrement reference count of a request. If count goes to zero put | ||
414 | * on unused list (preallocated) or free reqest (not preallocated). | ||
415 | */ | ||
416 | void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); | ||
417 | |||
418 | /** | ||
419 | * Send a request (synchronous) | ||
420 | */ | ||
421 | void request_send(struct fuse_conn *fc, struct fuse_req *req); | ||
422 | |||
423 | /** | ||
424 | * Send a request with no reply | ||
425 | */ | ||
426 | void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); | ||
427 | |||
428 | /** | ||
429 | * Send a request in the background | ||
430 | */ | ||
431 | void request_send_background(struct fuse_conn *fc, struct fuse_req *req); | ||
432 | |||
433 | /** | ||
434 | * Release inodes and file assiciated with background request | ||
435 | */ | ||
436 | void fuse_release_background(struct fuse_req *req); | ||
437 | |||
438 | /** | ||
439 | * Get the attributes of a file | ||
440 | */ | ||
441 | int fuse_do_getattr(struct inode *inode); | ||
442 | |||
443 | /** | ||
444 | * Invalidate inode attributes | ||
445 | */ | ||
446 | void fuse_invalidate_attr(struct inode *inode); | ||
447 | |||
448 | /** | ||
449 | * Send the INIT message | ||
450 | */ | ||
451 | void fuse_send_init(struct fuse_conn *fc); | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c new file mode 100644 index 000000000000..e69a546844d0 --- /dev/null +++ b/fs/fuse/inode.c | |||
@@ -0,0 +1,591 @@ | |||
1 | /* | ||
2 | FUSE: Filesystem in Userspace | ||
3 | Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> | ||
4 | |||
5 | This program can be distributed under the terms of the GNU GPL. | ||
6 | See the file COPYING. | ||
7 | */ | ||
8 | |||
9 | #include "fuse_i.h" | ||
10 | |||
11 | #include <linux/pagemap.h> | ||
12 | #include <linux/slab.h> | ||
13 | #include <linux/file.h> | ||
14 | #include <linux/mount.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/parser.h> | ||
19 | #include <linux/statfs.h> | ||
20 | |||
21 | MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); | ||
22 | MODULE_DESCRIPTION("Filesystem in Userspace"); | ||
23 | MODULE_LICENSE("GPL"); | ||
24 | |||
25 | spinlock_t fuse_lock; | ||
26 | static kmem_cache_t *fuse_inode_cachep; | ||
27 | |||
28 | #define FUSE_SUPER_MAGIC 0x65735546 | ||
29 | |||
30 | struct fuse_mount_data { | ||
31 | int fd; | ||
32 | unsigned rootmode; | ||
33 | unsigned user_id; | ||
34 | unsigned group_id; | ||
35 | unsigned fd_present : 1; | ||
36 | unsigned rootmode_present : 1; | ||
37 | unsigned user_id_present : 1; | ||
38 | unsigned group_id_present : 1; | ||
39 | unsigned flags; | ||
40 | unsigned max_read; | ||
41 | }; | ||
42 | |||
43 | static struct inode *fuse_alloc_inode(struct super_block *sb) | ||
44 | { | ||
45 | struct inode *inode; | ||
46 | struct fuse_inode *fi; | ||
47 | |||
48 | inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL); | ||
49 | if (!inode) | ||
50 | return NULL; | ||
51 | |||
52 | fi = get_fuse_inode(inode); | ||
53 | fi->i_time = jiffies - 1; | ||
54 | fi->nodeid = 0; | ||
55 | fi->nlookup = 0; | ||
56 | fi->forget_req = fuse_request_alloc(); | ||
57 | if (!fi->forget_req) { | ||
58 | kmem_cache_free(fuse_inode_cachep, inode); | ||
59 | return NULL; | ||
60 | } | ||
61 | |||
62 | return inode; | ||
63 | } | ||
64 | |||
65 | static void fuse_destroy_inode(struct inode *inode) | ||
66 | { | ||
67 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
68 | if (fi->forget_req) | ||
69 | fuse_request_free(fi->forget_req); | ||
70 | kmem_cache_free(fuse_inode_cachep, inode); | ||
71 | } | ||
72 | |||
73 | static void fuse_read_inode(struct inode *inode) | ||
74 | { | ||
75 | /* No op */ | ||
76 | } | ||
77 | |||
78 | void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, | ||
79 | unsigned long nodeid, u64 nlookup) | ||
80 | { | ||
81 | struct fuse_forget_in *inarg = &req->misc.forget_in; | ||
82 | inarg->nlookup = nlookup; | ||
83 | req->in.h.opcode = FUSE_FORGET; | ||
84 | req->in.h.nodeid = nodeid; | ||
85 | req->in.numargs = 1; | ||
86 | req->in.args[0].size = sizeof(struct fuse_forget_in); | ||
87 | req->in.args[0].value = inarg; | ||
88 | request_send_noreply(fc, req); | ||
89 | } | ||
90 | |||
91 | static void fuse_clear_inode(struct inode *inode) | ||
92 | { | ||
93 | if (inode->i_sb->s_flags & MS_ACTIVE) { | ||
94 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
95 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
96 | fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup); | ||
97 | fi->forget_req = NULL; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) | ||
102 | { | ||
103 | if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size) | ||
104 | invalidate_inode_pages(inode->i_mapping); | ||
105 | |||
106 | inode->i_ino = attr->ino; | ||
107 | inode->i_mode = (inode->i_mode & S_IFMT) + (attr->mode & 07777); | ||
108 | inode->i_nlink = attr->nlink; | ||
109 | inode->i_uid = attr->uid; | ||
110 | inode->i_gid = attr->gid; | ||
111 | i_size_write(inode, attr->size); | ||
112 | inode->i_blksize = PAGE_CACHE_SIZE; | ||
113 | inode->i_blocks = attr->blocks; | ||
114 | inode->i_atime.tv_sec = attr->atime; | ||
115 | inode->i_atime.tv_nsec = attr->atimensec; | ||
116 | inode->i_mtime.tv_sec = attr->mtime; | ||
117 | inode->i_mtime.tv_nsec = attr->mtimensec; | ||
118 | inode->i_ctime.tv_sec = attr->ctime; | ||
119 | inode->i_ctime.tv_nsec = attr->ctimensec; | ||
120 | } | ||
121 | |||
122 | static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) | ||
123 | { | ||
124 | inode->i_mode = attr->mode & S_IFMT; | ||
125 | i_size_write(inode, attr->size); | ||
126 | if (S_ISREG(inode->i_mode)) { | ||
127 | fuse_init_common(inode); | ||
128 | fuse_init_file_inode(inode); | ||
129 | } else if (S_ISDIR(inode->i_mode)) | ||
130 | fuse_init_dir(inode); | ||
131 | else if (S_ISLNK(inode->i_mode)) | ||
132 | fuse_init_symlink(inode); | ||
133 | else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || | ||
134 | S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { | ||
135 | fuse_init_common(inode); | ||
136 | init_special_inode(inode, inode->i_mode, | ||
137 | new_decode_dev(attr->rdev)); | ||
138 | } else { | ||
139 | /* Don't let user create weird files */ | ||
140 | inode->i_mode = S_IFREG; | ||
141 | fuse_init_common(inode); | ||
142 | fuse_init_file_inode(inode); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | static int fuse_inode_eq(struct inode *inode, void *_nodeidp) | ||
147 | { | ||
148 | unsigned long nodeid = *(unsigned long *) _nodeidp; | ||
149 | if (get_node_id(inode) == nodeid) | ||
150 | return 1; | ||
151 | else | ||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | static int fuse_inode_set(struct inode *inode, void *_nodeidp) | ||
156 | { | ||
157 | unsigned long nodeid = *(unsigned long *) _nodeidp; | ||
158 | get_fuse_inode(inode)->nodeid = nodeid; | ||
159 | return 0; | ||
160 | } | ||
161 | |||
162 | struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, | ||
163 | int generation, struct fuse_attr *attr) | ||
164 | { | ||
165 | struct inode *inode; | ||
166 | struct fuse_inode *fi; | ||
167 | struct fuse_conn *fc = get_fuse_conn_super(sb); | ||
168 | int retried = 0; | ||
169 | |||
170 | retry: | ||
171 | inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); | ||
172 | if (!inode) | ||
173 | return NULL; | ||
174 | |||
175 | if ((inode->i_state & I_NEW)) { | ||
176 | inode->i_flags |= S_NOATIME|S_NOCMTIME; | ||
177 | inode->i_generation = generation; | ||
178 | inode->i_data.backing_dev_info = &fc->bdi; | ||
179 | fuse_init_inode(inode, attr); | ||
180 | unlock_new_inode(inode); | ||
181 | } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { | ||
182 | BUG_ON(retried); | ||
183 | /* Inode has changed type, any I/O on the old should fail */ | ||
184 | make_bad_inode(inode); | ||
185 | iput(inode); | ||
186 | retried = 1; | ||
187 | goto retry; | ||
188 | } | ||
189 | |||
190 | fi = get_fuse_inode(inode); | ||
191 | fi->nlookup ++; | ||
192 | fuse_change_attributes(inode, attr); | ||
193 | return inode; | ||
194 | } | ||
195 | |||
196 | static void fuse_put_super(struct super_block *sb) | ||
197 | { | ||
198 | struct fuse_conn *fc = get_fuse_conn_super(sb); | ||
199 | |||
200 | down_write(&fc->sbput_sem); | ||
201 | while (!list_empty(&fc->background)) | ||
202 | fuse_release_background(list_entry(fc->background.next, | ||
203 | struct fuse_req, bg_entry)); | ||
204 | |||
205 | spin_lock(&fuse_lock); | ||
206 | fc->mounted = 0; | ||
207 | fc->user_id = 0; | ||
208 | fc->group_id = 0; | ||
209 | fc->flags = 0; | ||
210 | /* Flush all readers on this fs */ | ||
211 | wake_up_all(&fc->waitq); | ||
212 | up_write(&fc->sbput_sem); | ||
213 | fuse_release_conn(fc); | ||
214 | spin_unlock(&fuse_lock); | ||
215 | } | ||
216 | |||
217 | static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) | ||
218 | { | ||
219 | stbuf->f_type = FUSE_SUPER_MAGIC; | ||
220 | stbuf->f_bsize = attr->bsize; | ||
221 | stbuf->f_blocks = attr->blocks; | ||
222 | stbuf->f_bfree = attr->bfree; | ||
223 | stbuf->f_bavail = attr->bavail; | ||
224 | stbuf->f_files = attr->files; | ||
225 | stbuf->f_ffree = attr->ffree; | ||
226 | stbuf->f_namelen = attr->namelen; | ||
227 | /* fsid is left zero */ | ||
228 | } | ||
229 | |||
230 | static int fuse_statfs(struct super_block *sb, struct kstatfs *buf) | ||
231 | { | ||
232 | struct fuse_conn *fc = get_fuse_conn_super(sb); | ||
233 | struct fuse_req *req; | ||
234 | struct fuse_statfs_out outarg; | ||
235 | int err; | ||
236 | |||
237 | req = fuse_get_request(fc); | ||
238 | if (!req) | ||
239 | return -EINTR; | ||
240 | |||
241 | req->in.numargs = 0; | ||
242 | req->in.h.opcode = FUSE_STATFS; | ||
243 | req->out.numargs = 1; | ||
244 | req->out.args[0].size = sizeof(outarg); | ||
245 | req->out.args[0].value = &outarg; | ||
246 | request_send(fc, req); | ||
247 | err = req->out.h.error; | ||
248 | if (!err) | ||
249 | convert_fuse_statfs(buf, &outarg.st); | ||
250 | fuse_put_request(fc, req); | ||
251 | return err; | ||
252 | } | ||
253 | |||
254 | enum { | ||
255 | OPT_FD, | ||
256 | OPT_ROOTMODE, | ||
257 | OPT_USER_ID, | ||
258 | OPT_GROUP_ID, | ||
259 | OPT_DEFAULT_PERMISSIONS, | ||
260 | OPT_ALLOW_OTHER, | ||
261 | OPT_MAX_READ, | ||
262 | OPT_ERR | ||
263 | }; | ||
264 | |||
265 | static match_table_t tokens = { | ||
266 | {OPT_FD, "fd=%u"}, | ||
267 | {OPT_ROOTMODE, "rootmode=%o"}, | ||
268 | {OPT_USER_ID, "user_id=%u"}, | ||
269 | {OPT_GROUP_ID, "group_id=%u"}, | ||
270 | {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, | ||
271 | {OPT_ALLOW_OTHER, "allow_other"}, | ||
272 | {OPT_MAX_READ, "max_read=%u"}, | ||
273 | {OPT_ERR, NULL} | ||
274 | }; | ||
275 | |||
276 | static int parse_fuse_opt(char *opt, struct fuse_mount_data *d) | ||
277 | { | ||
278 | char *p; | ||
279 | memset(d, 0, sizeof(struct fuse_mount_data)); | ||
280 | d->max_read = ~0; | ||
281 | |||
282 | while ((p = strsep(&opt, ",")) != NULL) { | ||
283 | int token; | ||
284 | int value; | ||
285 | substring_t args[MAX_OPT_ARGS]; | ||
286 | if (!*p) | ||
287 | continue; | ||
288 | |||
289 | token = match_token(p, tokens, args); | ||
290 | switch (token) { | ||
291 | case OPT_FD: | ||
292 | if (match_int(&args[0], &value)) | ||
293 | return 0; | ||
294 | d->fd = value; | ||
295 | d->fd_present = 1; | ||
296 | break; | ||
297 | |||
298 | case OPT_ROOTMODE: | ||
299 | if (match_octal(&args[0], &value)) | ||
300 | return 0; | ||
301 | d->rootmode = value; | ||
302 | d->rootmode_present = 1; | ||
303 | break; | ||
304 | |||
305 | case OPT_USER_ID: | ||
306 | if (match_int(&args[0], &value)) | ||
307 | return 0; | ||
308 | d->user_id = value; | ||
309 | d->user_id_present = 1; | ||
310 | break; | ||
311 | |||
312 | case OPT_GROUP_ID: | ||
313 | if (match_int(&args[0], &value)) | ||
314 | return 0; | ||
315 | d->group_id = value; | ||
316 | d->group_id_present = 1; | ||
317 | break; | ||
318 | |||
319 | case OPT_DEFAULT_PERMISSIONS: | ||
320 | d->flags |= FUSE_DEFAULT_PERMISSIONS; | ||
321 | break; | ||
322 | |||
323 | case OPT_ALLOW_OTHER: | ||
324 | d->flags |= FUSE_ALLOW_OTHER; | ||
325 | break; | ||
326 | |||
327 | case OPT_MAX_READ: | ||
328 | if (match_int(&args[0], &value)) | ||
329 | return 0; | ||
330 | d->max_read = value; | ||
331 | break; | ||
332 | |||
333 | default: | ||
334 | return 0; | ||
335 | } | ||
336 | } | ||
337 | |||
338 | if (!d->fd_present || !d->rootmode_present || | ||
339 | !d->user_id_present || !d->group_id_present) | ||
340 | return 0; | ||
341 | |||
342 | return 1; | ||
343 | } | ||
344 | |||
345 | static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
346 | { | ||
347 | struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb); | ||
348 | |||
349 | seq_printf(m, ",user_id=%u", fc->user_id); | ||
350 | seq_printf(m, ",group_id=%u", fc->group_id); | ||
351 | if (fc->flags & FUSE_DEFAULT_PERMISSIONS) | ||
352 | seq_puts(m, ",default_permissions"); | ||
353 | if (fc->flags & FUSE_ALLOW_OTHER) | ||
354 | seq_puts(m, ",allow_other"); | ||
355 | if (fc->max_read != ~0) | ||
356 | seq_printf(m, ",max_read=%u", fc->max_read); | ||
357 | return 0; | ||
358 | } | ||
359 | |||
360 | static void free_conn(struct fuse_conn *fc) | ||
361 | { | ||
362 | while (!list_empty(&fc->unused_list)) { | ||
363 | struct fuse_req *req; | ||
364 | req = list_entry(fc->unused_list.next, struct fuse_req, list); | ||
365 | list_del(&req->list); | ||
366 | fuse_request_free(req); | ||
367 | } | ||
368 | kfree(fc); | ||
369 | } | ||
370 | |||
371 | /* Must be called with the fuse lock held */ | ||
372 | void fuse_release_conn(struct fuse_conn *fc) | ||
373 | { | ||
374 | fc->count--; | ||
375 | if (!fc->count) | ||
376 | free_conn(fc); | ||
377 | } | ||
378 | |||
379 | static struct fuse_conn *new_conn(void) | ||
380 | { | ||
381 | struct fuse_conn *fc; | ||
382 | |||
383 | fc = kmalloc(sizeof(*fc), GFP_KERNEL); | ||
384 | if (fc != NULL) { | ||
385 | int i; | ||
386 | memset(fc, 0, sizeof(*fc)); | ||
387 | init_waitqueue_head(&fc->waitq); | ||
388 | INIT_LIST_HEAD(&fc->pending); | ||
389 | INIT_LIST_HEAD(&fc->processing); | ||
390 | INIT_LIST_HEAD(&fc->unused_list); | ||
391 | INIT_LIST_HEAD(&fc->background); | ||
392 | sema_init(&fc->outstanding_sem, 0); | ||
393 | init_rwsem(&fc->sbput_sem); | ||
394 | for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) { | ||
395 | struct fuse_req *req = fuse_request_alloc(); | ||
396 | if (!req) { | ||
397 | free_conn(fc); | ||
398 | return NULL; | ||
399 | } | ||
400 | list_add(&req->list, &fc->unused_list); | ||
401 | } | ||
402 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | ||
403 | fc->bdi.unplug_io_fn = default_unplug_io_fn; | ||
404 | fc->reqctr = 0; | ||
405 | } | ||
406 | return fc; | ||
407 | } | ||
408 | |||
409 | static struct fuse_conn *get_conn(struct file *file, struct super_block *sb) | ||
410 | { | ||
411 | struct fuse_conn *fc; | ||
412 | |||
413 | if (file->f_op != &fuse_dev_operations) | ||
414 | return ERR_PTR(-EINVAL); | ||
415 | fc = new_conn(); | ||
416 | if (fc == NULL) | ||
417 | return ERR_PTR(-ENOMEM); | ||
418 | spin_lock(&fuse_lock); | ||
419 | if (file->private_data) { | ||
420 | free_conn(fc); | ||
421 | fc = ERR_PTR(-EINVAL); | ||
422 | } else { | ||
423 | file->private_data = fc; | ||
424 | *get_fuse_conn_super_p(sb) = fc; | ||
425 | fc->mounted = 1; | ||
426 | fc->connected = 1; | ||
427 | fc->count = 2; | ||
428 | } | ||
429 | spin_unlock(&fuse_lock); | ||
430 | return fc; | ||
431 | } | ||
432 | |||
433 | static struct inode *get_root_inode(struct super_block *sb, unsigned mode) | ||
434 | { | ||
435 | struct fuse_attr attr; | ||
436 | memset(&attr, 0, sizeof(attr)); | ||
437 | |||
438 | attr.mode = mode; | ||
439 | attr.ino = FUSE_ROOT_ID; | ||
440 | return fuse_iget(sb, 1, 0, &attr); | ||
441 | } | ||
442 | |||
443 | static struct super_operations fuse_super_operations = { | ||
444 | .alloc_inode = fuse_alloc_inode, | ||
445 | .destroy_inode = fuse_destroy_inode, | ||
446 | .read_inode = fuse_read_inode, | ||
447 | .clear_inode = fuse_clear_inode, | ||
448 | .put_super = fuse_put_super, | ||
449 | .statfs = fuse_statfs, | ||
450 | .show_options = fuse_show_options, | ||
451 | }; | ||
452 | |||
453 | static int fuse_fill_super(struct super_block *sb, void *data, int silent) | ||
454 | { | ||
455 | struct fuse_conn *fc; | ||
456 | struct inode *root; | ||
457 | struct fuse_mount_data d; | ||
458 | struct file *file; | ||
459 | int err; | ||
460 | |||
461 | if (!parse_fuse_opt((char *) data, &d)) | ||
462 | return -EINVAL; | ||
463 | |||
464 | sb->s_blocksize = PAGE_CACHE_SIZE; | ||
465 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | ||
466 | sb->s_magic = FUSE_SUPER_MAGIC; | ||
467 | sb->s_op = &fuse_super_operations; | ||
468 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
469 | |||
470 | file = fget(d.fd); | ||
471 | if (!file) | ||
472 | return -EINVAL; | ||
473 | |||
474 | fc = get_conn(file, sb); | ||
475 | fput(file); | ||
476 | if (IS_ERR(fc)) | ||
477 | return PTR_ERR(fc); | ||
478 | |||
479 | fc->flags = d.flags; | ||
480 | fc->user_id = d.user_id; | ||
481 | fc->group_id = d.group_id; | ||
482 | fc->max_read = d.max_read; | ||
483 | if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages) | ||
484 | fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE; | ||
485 | fc->max_write = FUSE_MAX_IN / 2; | ||
486 | |||
487 | err = -ENOMEM; | ||
488 | root = get_root_inode(sb, d.rootmode); | ||
489 | if (root == NULL) | ||
490 | goto err; | ||
491 | |||
492 | sb->s_root = d_alloc_root(root); | ||
493 | if (!sb->s_root) { | ||
494 | iput(root); | ||
495 | goto err; | ||
496 | } | ||
497 | fuse_send_init(fc); | ||
498 | return 0; | ||
499 | |||
500 | err: | ||
501 | spin_lock(&fuse_lock); | ||
502 | fuse_release_conn(fc); | ||
503 | spin_unlock(&fuse_lock); | ||
504 | return err; | ||
505 | } | ||
506 | |||
507 | static struct super_block *fuse_get_sb(struct file_system_type *fs_type, | ||
508 | int flags, const char *dev_name, | ||
509 | void *raw_data) | ||
510 | { | ||
511 | return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super); | ||
512 | } | ||
513 | |||
514 | static struct file_system_type fuse_fs_type = { | ||
515 | .owner = THIS_MODULE, | ||
516 | .name = "fuse", | ||
517 | .get_sb = fuse_get_sb, | ||
518 | .kill_sb = kill_anon_super, | ||
519 | }; | ||
520 | |||
521 | static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep, | ||
522 | unsigned long flags) | ||
523 | { | ||
524 | struct inode * inode = foo; | ||
525 | |||
526 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
527 | SLAB_CTOR_CONSTRUCTOR) | ||
528 | inode_init_once(inode); | ||
529 | } | ||
530 | |||
531 | static int __init fuse_fs_init(void) | ||
532 | { | ||
533 | int err; | ||
534 | |||
535 | err = register_filesystem(&fuse_fs_type); | ||
536 | if (err) | ||
537 | printk("fuse: failed to register filesystem\n"); | ||
538 | else { | ||
539 | fuse_inode_cachep = kmem_cache_create("fuse_inode", | ||
540 | sizeof(struct fuse_inode), | ||
541 | 0, SLAB_HWCACHE_ALIGN, | ||
542 | fuse_inode_init_once, NULL); | ||
543 | if (!fuse_inode_cachep) { | ||
544 | unregister_filesystem(&fuse_fs_type); | ||
545 | err = -ENOMEM; | ||
546 | } | ||
547 | } | ||
548 | |||
549 | return err; | ||
550 | } | ||
551 | |||
552 | static void fuse_fs_cleanup(void) | ||
553 | { | ||
554 | unregister_filesystem(&fuse_fs_type); | ||
555 | kmem_cache_destroy(fuse_inode_cachep); | ||
556 | } | ||
557 | |||
558 | static int __init fuse_init(void) | ||
559 | { | ||
560 | int res; | ||
561 | |||
562 | printk("fuse init (API version %i.%i)\n", | ||
563 | FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); | ||
564 | |||
565 | spin_lock_init(&fuse_lock); | ||
566 | res = fuse_fs_init(); | ||
567 | if (res) | ||
568 | goto err; | ||
569 | |||
570 | res = fuse_dev_init(); | ||
571 | if (res) | ||
572 | goto err_fs_cleanup; | ||
573 | |||
574 | return 0; | ||
575 | |||
576 | err_fs_cleanup: | ||
577 | fuse_fs_cleanup(); | ||
578 | err: | ||
579 | return res; | ||
580 | } | ||
581 | |||
582 | static void __exit fuse_exit(void) | ||
583 | { | ||
584 | printk(KERN_DEBUG "fuse exit\n"); | ||
585 | |||
586 | fuse_fs_cleanup(); | ||
587 | fuse_dev_cleanup(); | ||
588 | } | ||
589 | |||
590 | module_init(fuse_init); | ||
591 | module_exit(fuse_exit); | ||
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index a096c5a56664..3d5cdc6847c0 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c | |||
@@ -13,8 +13,6 @@ | |||
13 | 13 | ||
14 | #include "btree.h" | 14 | #include "btree.h" |
15 | 15 | ||
16 | #define REF_PAGES 0 | ||
17 | |||
18 | void hfs_bnode_read(struct hfs_bnode *node, void *buf, | 16 | void hfs_bnode_read(struct hfs_bnode *node, void *buf, |
19 | int off, int len) | 17 | int off, int len) |
20 | { | 18 | { |
@@ -289,9 +287,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) | |||
289 | page_cache_release(page); | 287 | page_cache_release(page); |
290 | goto fail; | 288 | goto fail; |
291 | } | 289 | } |
292 | #if !REF_PAGES | ||
293 | page_cache_release(page); | 290 | page_cache_release(page); |
294 | #endif | ||
295 | node->page[i] = page; | 291 | node->page[i] = page; |
296 | } | 292 | } |
297 | 293 | ||
@@ -449,13 +445,6 @@ void hfs_bnode_get(struct hfs_bnode *node) | |||
449 | { | 445 | { |
450 | if (node) { | 446 | if (node) { |
451 | atomic_inc(&node->refcnt); | 447 | atomic_inc(&node->refcnt); |
452 | #if REF_PAGES | ||
453 | { | ||
454 | int i; | ||
455 | for (i = 0; i < node->tree->pages_per_bnode; i++) | ||
456 | get_page(node->page[i]); | ||
457 | } | ||
458 | #endif | ||
459 | dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", | 448 | dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", |
460 | node->tree->cnid, node->this, atomic_read(&node->refcnt)); | 449 | node->tree->cnid, node->this, atomic_read(&node->refcnt)); |
461 | } | 450 | } |
@@ -472,20 +461,12 @@ void hfs_bnode_put(struct hfs_bnode *node) | |||
472 | node->tree->cnid, node->this, atomic_read(&node->refcnt)); | 461 | node->tree->cnid, node->this, atomic_read(&node->refcnt)); |
473 | if (!atomic_read(&node->refcnt)) | 462 | if (!atomic_read(&node->refcnt)) |
474 | BUG(); | 463 | BUG(); |
475 | if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) { | 464 | if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) |
476 | #if REF_PAGES | ||
477 | for (i = 0; i < tree->pages_per_bnode; i++) | ||
478 | put_page(node->page[i]); | ||
479 | #endif | ||
480 | return; | 465 | return; |
481 | } | ||
482 | for (i = 0; i < tree->pages_per_bnode; i++) { | 466 | for (i = 0; i < tree->pages_per_bnode; i++) { |
483 | if (!node->page[i]) | 467 | if (!node->page[i]) |
484 | continue; | 468 | continue; |
485 | mark_page_accessed(node->page[i]); | 469 | mark_page_accessed(node->page[i]); |
486 | #if REF_PAGES | ||
487 | put_page(node->page[i]); | ||
488 | #endif | ||
489 | } | 470 | } |
490 | 471 | ||
491 | if (test_bit(HFS_BNODE_DELETED, &node->flags)) { | 472 | if (test_bit(HFS_BNODE_DELETED, &node->flags)) { |
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index 65dedefcabfc..2fcd679f0238 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c | |||
@@ -20,12 +20,12 @@ | |||
20 | * | 20 | * |
21 | * Given the ID of the parent and the name build a search key. | 21 | * Given the ID of the parent and the name build a search key. |
22 | */ | 22 | */ |
23 | void hfs_cat_build_key(btree_key *key, u32 parent, struct qstr *name) | 23 | void hfs_cat_build_key(struct super_block *sb, btree_key *key, u32 parent, struct qstr *name) |
24 | { | 24 | { |
25 | key->cat.reserved = 0; | 25 | key->cat.reserved = 0; |
26 | key->cat.ParID = cpu_to_be32(parent); | 26 | key->cat.ParID = cpu_to_be32(parent); |
27 | if (name) { | 27 | if (name) { |
28 | hfs_triv2mac(&key->cat.CName, name); | 28 | hfs_asc2mac(sb, &key->cat.CName, name); |
29 | key->key_len = 6 + key->cat.CName.len; | 29 | key->key_len = 6 + key->cat.CName.len; |
30 | } else { | 30 | } else { |
31 | memset(&key->cat.CName, 0, sizeof(struct hfs_name)); | 31 | memset(&key->cat.CName, 0, sizeof(struct hfs_name)); |
@@ -62,13 +62,14 @@ static int hfs_cat_build_record(hfs_cat_rec *rec, u32 cnid, struct inode *inode) | |||
62 | } | 62 | } |
63 | } | 63 | } |
64 | 64 | ||
65 | static int hfs_cat_build_thread(hfs_cat_rec *rec, int type, | 65 | static int hfs_cat_build_thread(struct super_block *sb, |
66 | hfs_cat_rec *rec, int type, | ||
66 | u32 parentid, struct qstr *name) | 67 | u32 parentid, struct qstr *name) |
67 | { | 68 | { |
68 | rec->type = type; | 69 | rec->type = type; |
69 | memset(rec->thread.reserved, 0, sizeof(rec->thread.reserved)); | 70 | memset(rec->thread.reserved, 0, sizeof(rec->thread.reserved)); |
70 | rec->thread.ParID = cpu_to_be32(parentid); | 71 | rec->thread.ParID = cpu_to_be32(parentid); |
71 | hfs_triv2mac(&rec->thread.CName, name); | 72 | hfs_asc2mac(sb, &rec->thread.CName, name); |
72 | return sizeof(struct hfs_cat_thread); | 73 | return sizeof(struct hfs_cat_thread); |
73 | } | 74 | } |
74 | 75 | ||
@@ -93,8 +94,8 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode * | |||
93 | sb = dir->i_sb; | 94 | sb = dir->i_sb; |
94 | hfs_find_init(HFS_SB(sb)->cat_tree, &fd); | 95 | hfs_find_init(HFS_SB(sb)->cat_tree, &fd); |
95 | 96 | ||
96 | hfs_cat_build_key(fd.search_key, cnid, NULL); | 97 | hfs_cat_build_key(sb, fd.search_key, cnid, NULL); |
97 | entry_size = hfs_cat_build_thread(&entry, S_ISDIR(inode->i_mode) ? | 98 | entry_size = hfs_cat_build_thread(sb, &entry, S_ISDIR(inode->i_mode) ? |
98 | HFS_CDR_THD : HFS_CDR_FTH, | 99 | HFS_CDR_THD : HFS_CDR_FTH, |
99 | dir->i_ino, str); | 100 | dir->i_ino, str); |
100 | err = hfs_brec_find(&fd); | 101 | err = hfs_brec_find(&fd); |
@@ -107,7 +108,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode * | |||
107 | if (err) | 108 | if (err) |
108 | goto err2; | 109 | goto err2; |
109 | 110 | ||
110 | hfs_cat_build_key(fd.search_key, dir->i_ino, str); | 111 | hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str); |
111 | entry_size = hfs_cat_build_record(&entry, cnid, inode); | 112 | entry_size = hfs_cat_build_record(&entry, cnid, inode); |
112 | err = hfs_brec_find(&fd); | 113 | err = hfs_brec_find(&fd); |
113 | if (err != -ENOENT) { | 114 | if (err != -ENOENT) { |
@@ -127,7 +128,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode * | |||
127 | return 0; | 128 | return 0; |
128 | 129 | ||
129 | err1: | 130 | err1: |
130 | hfs_cat_build_key(fd.search_key, cnid, NULL); | 131 | hfs_cat_build_key(sb, fd.search_key, cnid, NULL); |
131 | if (!hfs_brec_find(&fd)) | 132 | if (!hfs_brec_find(&fd)) |
132 | hfs_brec_remove(&fd); | 133 | hfs_brec_remove(&fd); |
133 | err2: | 134 | err2: |
@@ -176,7 +177,7 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid, | |||
176 | hfs_cat_rec rec; | 177 | hfs_cat_rec rec; |
177 | int res, len, type; | 178 | int res, len, type; |
178 | 179 | ||
179 | hfs_cat_build_key(fd->search_key, cnid, NULL); | 180 | hfs_cat_build_key(sb, fd->search_key, cnid, NULL); |
180 | res = hfs_brec_read(fd, &rec, sizeof(rec)); | 181 | res = hfs_brec_read(fd, &rec, sizeof(rec)); |
181 | if (res) | 182 | if (res) |
182 | return res; | 183 | return res; |
@@ -211,7 +212,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str) | |||
211 | sb = dir->i_sb; | 212 | sb = dir->i_sb; |
212 | hfs_find_init(HFS_SB(sb)->cat_tree, &fd); | 213 | hfs_find_init(HFS_SB(sb)->cat_tree, &fd); |
213 | 214 | ||
214 | hfs_cat_build_key(fd.search_key, dir->i_ino, str); | 215 | hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str); |
215 | res = hfs_brec_find(&fd); | 216 | res = hfs_brec_find(&fd); |
216 | if (res) | 217 | if (res) |
217 | goto out; | 218 | goto out; |
@@ -239,7 +240,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str) | |||
239 | if (res) | 240 | if (res) |
240 | goto out; | 241 | goto out; |
241 | 242 | ||
242 | hfs_cat_build_key(fd.search_key, cnid, NULL); | 243 | hfs_cat_build_key(sb, fd.search_key, cnid, NULL); |
243 | res = hfs_brec_find(&fd); | 244 | res = hfs_brec_find(&fd); |
244 | if (!res) { | 245 | if (!res) { |
245 | res = hfs_brec_remove(&fd); | 246 | res = hfs_brec_remove(&fd); |
@@ -280,7 +281,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, | |||
280 | dst_fd = src_fd; | 281 | dst_fd = src_fd; |
281 | 282 | ||
282 | /* find the old dir entry and read the data */ | 283 | /* find the old dir entry and read the data */ |
283 | hfs_cat_build_key(src_fd.search_key, src_dir->i_ino, src_name); | 284 | hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); |
284 | err = hfs_brec_find(&src_fd); | 285 | err = hfs_brec_find(&src_fd); |
285 | if (err) | 286 | if (err) |
286 | goto out; | 287 | goto out; |
@@ -289,7 +290,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, | |||
289 | src_fd.entrylength); | 290 | src_fd.entrylength); |
290 | 291 | ||
291 | /* create new dir entry with the data from the old entry */ | 292 | /* create new dir entry with the data from the old entry */ |
292 | hfs_cat_build_key(dst_fd.search_key, dst_dir->i_ino, dst_name); | 293 | hfs_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); |
293 | err = hfs_brec_find(&dst_fd); | 294 | err = hfs_brec_find(&dst_fd); |
294 | if (err != -ENOENT) { | 295 | if (err != -ENOENT) { |
295 | if (!err) | 296 | if (!err) |
@@ -305,7 +306,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, | |||
305 | mark_inode_dirty(dst_dir); | 306 | mark_inode_dirty(dst_dir); |
306 | 307 | ||
307 | /* finally remove the old entry */ | 308 | /* finally remove the old entry */ |
308 | hfs_cat_build_key(src_fd.search_key, src_dir->i_ino, src_name); | 309 | hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); |
309 | err = hfs_brec_find(&src_fd); | 310 | err = hfs_brec_find(&src_fd); |
310 | if (err) | 311 | if (err) |
311 | goto out; | 312 | goto out; |
@@ -321,7 +322,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, | |||
321 | goto out; | 322 | goto out; |
322 | 323 | ||
323 | /* remove old thread entry */ | 324 | /* remove old thread entry */ |
324 | hfs_cat_build_key(src_fd.search_key, cnid, NULL); | 325 | hfs_cat_build_key(sb, src_fd.search_key, cnid, NULL); |
325 | err = hfs_brec_find(&src_fd); | 326 | err = hfs_brec_find(&src_fd); |
326 | if (err) | 327 | if (err) |
327 | goto out; | 328 | goto out; |
@@ -330,8 +331,8 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, | |||
330 | goto out; | 331 | goto out; |
331 | 332 | ||
332 | /* create new thread entry */ | 333 | /* create new thread entry */ |
333 | hfs_cat_build_key(dst_fd.search_key, cnid, NULL); | 334 | hfs_cat_build_key(sb, dst_fd.search_key, cnid, NULL); |
334 | entry_size = hfs_cat_build_thread(&entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD, | 335 | entry_size = hfs_cat_build_thread(sb, &entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD, |
335 | dst_dir->i_ino, dst_name); | 336 | dst_dir->i_ino, dst_name); |
336 | err = hfs_brec_find(&dst_fd); | 337 | err = hfs_brec_find(&dst_fd); |
337 | if (err != -ENOENT) { | 338 | if (err != -ENOENT) { |
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index c55998262aed..e1f24befba58 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c | |||
@@ -28,7 +28,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, | |||
28 | dentry->d_op = &hfs_dentry_operations; | 28 | dentry->d_op = &hfs_dentry_operations; |
29 | 29 | ||
30 | hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); | 30 | hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); |
31 | hfs_cat_build_key(fd.search_key, dir->i_ino, &dentry->d_name); | 31 | hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); |
32 | res = hfs_brec_read(&fd, &rec, sizeof(rec)); | 32 | res = hfs_brec_read(&fd, &rec, sizeof(rec)); |
33 | if (res) { | 33 | if (res) { |
34 | hfs_find_exit(&fd); | 34 | hfs_find_exit(&fd); |
@@ -56,7 +56,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
56 | struct inode *inode = filp->f_dentry->d_inode; | 56 | struct inode *inode = filp->f_dentry->d_inode; |
57 | struct super_block *sb = inode->i_sb; | 57 | struct super_block *sb = inode->i_sb; |
58 | int len, err; | 58 | int len, err; |
59 | char strbuf[HFS_NAMELEN + 1]; | 59 | char strbuf[HFS_MAX_NAMELEN]; |
60 | union hfs_cat_rec entry; | 60 | union hfs_cat_rec entry; |
61 | struct hfs_find_data fd; | 61 | struct hfs_find_data fd; |
62 | struct hfs_readdir_data *rd; | 62 | struct hfs_readdir_data *rd; |
@@ -66,7 +66,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
66 | return 0; | 66 | return 0; |
67 | 67 | ||
68 | hfs_find_init(HFS_SB(sb)->cat_tree, &fd); | 68 | hfs_find_init(HFS_SB(sb)->cat_tree, &fd); |
69 | hfs_cat_build_key(fd.search_key, inode->i_ino, NULL); | 69 | hfs_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); |
70 | err = hfs_brec_find(&fd); | 70 | err = hfs_brec_find(&fd); |
71 | if (err) | 71 | if (err) |
72 | goto out; | 72 | goto out; |
@@ -111,7 +111,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
111 | } | 111 | } |
112 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); | 112 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); |
113 | type = entry.type; | 113 | type = entry.type; |
114 | len = hfs_mac2triv(strbuf, &fd.key->cat.CName); | 114 | len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName); |
115 | if (type == HFS_CDR_DIR) { | 115 | if (type == HFS_CDR_DIR) { |
116 | if (fd.entrylength < sizeof(struct hfs_cat_dir)) { | 116 | if (fd.entrylength < sizeof(struct hfs_cat_dir)) { |
117 | printk("HFS: small dir entry\n"); | 117 | printk("HFS: small dir entry\n"); |
@@ -307,7 +307,8 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
307 | old_dir, &old_dentry->d_name, | 307 | old_dir, &old_dentry->d_name, |
308 | new_dir, &new_dentry->d_name); | 308 | new_dir, &new_dentry->d_name); |
309 | if (!res) | 309 | if (!res) |
310 | hfs_cat_build_key((btree_key *)&HFS_I(old_dentry->d_inode)->cat_key, | 310 | hfs_cat_build_key(old_dir->i_sb, |
311 | (btree_key *)&HFS_I(old_dentry->d_inode)->cat_key, | ||
311 | new_dir->i_ino, &new_dentry->d_name); | 312 | new_dir->i_ino, &new_dentry->d_name); |
312 | return res; | 313 | return res; |
313 | } | 314 | } |
diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h index df6b33adee3b..88099ab1a180 100644 --- a/fs/hfs/hfs.h +++ b/fs/hfs/hfs.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #define HFS_SECTOR_SIZE 512 /* size of an HFS sector */ | 25 | #define HFS_SECTOR_SIZE 512 /* size of an HFS sector */ |
26 | #define HFS_SECTOR_SIZE_BITS 9 /* log_2(HFS_SECTOR_SIZE) */ | 26 | #define HFS_SECTOR_SIZE_BITS 9 /* log_2(HFS_SECTOR_SIZE) */ |
27 | #define HFS_NAMELEN 31 /* maximum length of an HFS filename */ | 27 | #define HFS_NAMELEN 31 /* maximum length of an HFS filename */ |
28 | #define HFS_MAX_NAMELEN 128 | ||
28 | #define HFS_MAX_VALENCE 32767U | 29 | #define HFS_MAX_VALENCE 32767U |
29 | 30 | ||
30 | /* Meanings of the drAtrb field of the MDB, | 31 | /* Meanings of the drAtrb field of the MDB, |
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 0dc8ef8e14de..aae019aadf88 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h | |||
@@ -141,6 +141,8 @@ struct hfs_sb_info { | |||
141 | 141 | ||
142 | int session, part; | 142 | int session, part; |
143 | 143 | ||
144 | struct nls_table *nls_io, *nls_disk; | ||
145 | |||
144 | struct semaphore bitmap_lock; | 146 | struct semaphore bitmap_lock; |
145 | 147 | ||
146 | unsigned long flags; | 148 | unsigned long flags; |
@@ -168,7 +170,7 @@ extern int hfs_cat_create(u32, struct inode *, struct qstr *, struct inode *); | |||
168 | extern int hfs_cat_delete(u32, struct inode *, struct qstr *); | 170 | extern int hfs_cat_delete(u32, struct inode *, struct qstr *); |
169 | extern int hfs_cat_move(u32, struct inode *, struct qstr *, | 171 | extern int hfs_cat_move(u32, struct inode *, struct qstr *, |
170 | struct inode *, struct qstr *); | 172 | struct inode *, struct qstr *); |
171 | extern void hfs_cat_build_key(btree_key *, u32, struct qstr *); | 173 | extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qstr *); |
172 | 174 | ||
173 | /* dir.c */ | 175 | /* dir.c */ |
174 | extern struct file_operations hfs_dir_operations; | 176 | extern struct file_operations hfs_dir_operations; |
@@ -222,8 +224,8 @@ extern int hfs_strcmp(const unsigned char *, unsigned int, | |||
222 | extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); | 224 | extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); |
223 | 225 | ||
224 | /* trans.c */ | 226 | /* trans.c */ |
225 | extern void hfs_triv2mac(struct hfs_name *, struct qstr *); | 227 | extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); |
226 | extern int hfs_mac2triv(char *, const struct hfs_name *); | 228 | extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *); |
227 | 229 | ||
228 | extern struct timezone sys_tz; | 230 | extern struct timezone sys_tz; |
229 | 231 | ||
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 751912326094..f1570b9f9de3 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
@@ -160,7 +160,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) | |||
160 | 160 | ||
161 | init_MUTEX(&HFS_I(inode)->extents_lock); | 161 | init_MUTEX(&HFS_I(inode)->extents_lock); |
162 | INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); | 162 | INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); |
163 | hfs_cat_build_key((btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); | 163 | hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); |
164 | inode->i_ino = HFS_SB(sb)->next_id++; | 164 | inode->i_ino = HFS_SB(sb)->next_id++; |
165 | inode->i_mode = mode; | 165 | inode->i_mode = mode; |
166 | inode->i_uid = current->fsuid; | 166 | inode->i_uid = current->fsuid; |
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 217e32f37e0b..0a473f79c89f 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/cdrom.h> | 11 | #include <linux/cdrom.h> |
12 | #include <linux/genhd.h> | 12 | #include <linux/genhd.h> |
13 | #include <linux/nls.h> | ||
13 | 14 | ||
14 | #include "hfs_fs.h" | 15 | #include "hfs_fs.h" |
15 | #include "btree.h" | 16 | #include "btree.h" |
@@ -343,6 +344,11 @@ void hfs_mdb_put(struct super_block *sb) | |||
343 | brelse(HFS_SB(sb)->mdb_bh); | 344 | brelse(HFS_SB(sb)->mdb_bh); |
344 | brelse(HFS_SB(sb)->alt_mdb_bh); | 345 | brelse(HFS_SB(sb)->alt_mdb_bh); |
345 | 346 | ||
347 | if (HFS_SB(sb)->nls_io) | ||
348 | unload_nls(HFS_SB(sb)->nls_io); | ||
349 | if (HFS_SB(sb)->nls_disk) | ||
350 | unload_nls(HFS_SB(sb)->nls_disk); | ||
351 | |||
346 | kfree(HFS_SB(sb)); | 352 | kfree(HFS_SB(sb)); |
347 | sb->s_fs_info = NULL; | 353 | sb->s_fs_info = NULL; |
348 | } | 354 | } |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index ab783f6afa3b..c5074aeafcae 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -15,8 +15,11 @@ | |||
15 | #include <linux/config.h> | 15 | #include <linux/config.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/mount.h> | ||
18 | #include <linux/init.h> | 19 | #include <linux/init.h> |
20 | #include <linux/nls.h> | ||
19 | #include <linux/parser.h> | 21 | #include <linux/parser.h> |
22 | #include <linux/seq_file.h> | ||
20 | #include <linux/vfs.h> | 23 | #include <linux/vfs.h> |
21 | 24 | ||
22 | #include "hfs_fs.h" | 25 | #include "hfs_fs.h" |
@@ -111,6 +114,32 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data) | |||
111 | return 0; | 114 | return 0; |
112 | } | 115 | } |
113 | 116 | ||
117 | static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt) | ||
118 | { | ||
119 | struct hfs_sb_info *sbi = HFS_SB(mnt->mnt_sb); | ||
120 | |||
121 | if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f)) | ||
122 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); | ||
123 | if (sbi->s_type != cpu_to_be32(0x3f3f3f3f)) | ||
124 | seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type); | ||
125 | seq_printf(seq, ",uid=%u,gid=%u", sbi->s_uid, sbi->s_gid); | ||
126 | if (sbi->s_file_umask != 0133) | ||
127 | seq_printf(seq, ",file_umask=%o", sbi->s_file_umask); | ||
128 | if (sbi->s_dir_umask != 0022) | ||
129 | seq_printf(seq, ",dir_umask=%o", sbi->s_dir_umask); | ||
130 | if (sbi->part >= 0) | ||
131 | seq_printf(seq, ",part=%u", sbi->part); | ||
132 | if (sbi->session >= 0) | ||
133 | seq_printf(seq, ",session=%u", sbi->session); | ||
134 | if (sbi->nls_disk) | ||
135 | seq_printf(seq, ",codepage=%s", sbi->nls_disk->charset); | ||
136 | if (sbi->nls_io) | ||
137 | seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset); | ||
138 | if (sbi->s_quiet) | ||
139 | seq_printf(seq, ",quiet"); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
114 | static struct inode *hfs_alloc_inode(struct super_block *sb) | 143 | static struct inode *hfs_alloc_inode(struct super_block *sb) |
115 | { | 144 | { |
116 | struct hfs_inode_info *i; | 145 | struct hfs_inode_info *i; |
@@ -133,11 +162,13 @@ static struct super_operations hfs_super_operations = { | |||
133 | .write_super = hfs_write_super, | 162 | .write_super = hfs_write_super, |
134 | .statfs = hfs_statfs, | 163 | .statfs = hfs_statfs, |
135 | .remount_fs = hfs_remount, | 164 | .remount_fs = hfs_remount, |
165 | .show_options = hfs_show_options, | ||
136 | }; | 166 | }; |
137 | 167 | ||
138 | enum { | 168 | enum { |
139 | opt_uid, opt_gid, opt_umask, opt_file_umask, opt_dir_umask, | 169 | opt_uid, opt_gid, opt_umask, opt_file_umask, opt_dir_umask, |
140 | opt_part, opt_session, opt_type, opt_creator, opt_quiet, | 170 | opt_part, opt_session, opt_type, opt_creator, opt_quiet, |
171 | opt_codepage, opt_iocharset, | ||
141 | opt_err | 172 | opt_err |
142 | }; | 173 | }; |
143 | 174 | ||
@@ -152,6 +183,8 @@ static match_table_t tokens = { | |||
152 | { opt_type, "type=%s" }, | 183 | { opt_type, "type=%s" }, |
153 | { opt_creator, "creator=%s" }, | 184 | { opt_creator, "creator=%s" }, |
154 | { opt_quiet, "quiet" }, | 185 | { opt_quiet, "quiet" }, |
186 | { opt_codepage, "codepage=%s" }, | ||
187 | { opt_iocharset, "iocharset=%s" }, | ||
155 | { opt_err, NULL } | 188 | { opt_err, NULL } |
156 | }; | 189 | }; |
157 | 190 | ||
@@ -257,11 +290,46 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) | |||
257 | case opt_quiet: | 290 | case opt_quiet: |
258 | hsb->s_quiet = 1; | 291 | hsb->s_quiet = 1; |
259 | break; | 292 | break; |
293 | case opt_codepage: | ||
294 | if (hsb->nls_disk) { | ||
295 | printk("HFS+-fs: unable to change codepage\n"); | ||
296 | return 0; | ||
297 | } | ||
298 | p = match_strdup(&args[0]); | ||
299 | hsb->nls_disk = load_nls(p); | ||
300 | if (!hsb->nls_disk) { | ||
301 | printk("HFS+-fs: unable to load codepage \"%s\"\n", p); | ||
302 | kfree(p); | ||
303 | return 0; | ||
304 | } | ||
305 | kfree(p); | ||
306 | break; | ||
307 | case opt_iocharset: | ||
308 | if (hsb->nls_io) { | ||
309 | printk("HFS: unable to change iocharset\n"); | ||
310 | return 0; | ||
311 | } | ||
312 | p = match_strdup(&args[0]); | ||
313 | hsb->nls_io = load_nls(p); | ||
314 | if (!hsb->nls_io) { | ||
315 | printk("HFS: unable to load iocharset \"%s\"\n", p); | ||
316 | kfree(p); | ||
317 | return 0; | ||
318 | } | ||
319 | kfree(p); | ||
320 | break; | ||
260 | default: | 321 | default: |
261 | return 0; | 322 | return 0; |
262 | } | 323 | } |
263 | } | 324 | } |
264 | 325 | ||
326 | if (hsb->nls_disk && !hsb->nls_io) { | ||
327 | hsb->nls_io = load_nls_default(); | ||
328 | if (!hsb->nls_io) { | ||
329 | printk("HFS: unable to load default iocharset\n"); | ||
330 | return 0; | ||
331 | } | ||
332 | } | ||
265 | hsb->s_dir_umask &= 0777; | 333 | hsb->s_dir_umask &= 0777; |
266 | hsb->s_file_umask &= 0577; | 334 | hsb->s_file_umask &= 0577; |
267 | 335 | ||
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c index fb9720abbadd..e673a88b8ae7 100644 --- a/fs/hfs/trans.c +++ b/fs/hfs/trans.c | |||
@@ -9,12 +9,15 @@ | |||
9 | * with ':' vs. '/' as the path-element separator. | 9 | * with ':' vs. '/' as the path-element separator. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/types.h> | ||
13 | #include <linux/nls.h> | ||
14 | |||
12 | #include "hfs_fs.h" | 15 | #include "hfs_fs.h" |
13 | 16 | ||
14 | /*================ Global functions ================*/ | 17 | /*================ Global functions ================*/ |
15 | 18 | ||
16 | /* | 19 | /* |
17 | * hfs_mac2triv() | 20 | * hfs_mac2asc() |
18 | * | 21 | * |
19 | * Given a 'Pascal String' (a string preceded by a length byte) in | 22 | * Given a 'Pascal String' (a string preceded by a length byte) in |
20 | * the Macintosh character set produce the corresponding filename using | 23 | * the Macintosh character set produce the corresponding filename using |
@@ -27,23 +30,58 @@ | |||
27 | * by ':' which never appears in HFS filenames. All other characters | 30 | * by ':' which never appears in HFS filenames. All other characters |
28 | * are passed unchanged from input to output. | 31 | * are passed unchanged from input to output. |
29 | */ | 32 | */ |
30 | int hfs_mac2triv(char *out, const struct hfs_name *in) | 33 | int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in) |
31 | { | 34 | { |
32 | const char *p; | 35 | struct nls_table *nls_disk = HFS_SB(sb)->nls_disk; |
33 | char c; | 36 | struct nls_table *nls_io = HFS_SB(sb)->nls_io; |
34 | int i, len; | 37 | const char *src; |
38 | char *dst; | ||
39 | int srclen, dstlen, size; | ||
40 | |||
41 | src = in->name; | ||
42 | srclen = in->len; | ||
43 | dst = out; | ||
44 | dstlen = HFS_MAX_NAMELEN; | ||
45 | if (nls_io) { | ||
46 | wchar_t ch; | ||
35 | 47 | ||
36 | len = in->len; | 48 | while (srclen > 0) { |
37 | p = in->name; | 49 | if (nls_disk) { |
38 | for (i = 0; i < len; i++) { | 50 | size = nls_disk->char2uni(src, srclen, &ch); |
39 | c = *p++; | 51 | if (size <= 0) { |
40 | *out++ = c == '/' ? ':' : c; | 52 | ch = '?'; |
53 | size = 1; | ||
54 | } | ||
55 | src += size; | ||
56 | srclen -= size; | ||
57 | } else { | ||
58 | ch = *src++; | ||
59 | srclen--; | ||
60 | } | ||
61 | if (ch == '/') | ||
62 | ch = ':'; | ||
63 | size = nls_io->uni2char(ch, dst, dstlen); | ||
64 | if (size < 0) { | ||
65 | if (size == -ENAMETOOLONG) | ||
66 | goto out; | ||
67 | *dst = '?'; | ||
68 | size = 1; | ||
69 | } | ||
70 | dst += size; | ||
71 | dstlen -= size; | ||
72 | } | ||
73 | } else { | ||
74 | char ch; | ||
75 | |||
76 | while (--srclen >= 0) | ||
77 | *dst++ = (ch = *src++) == '/' ? ':' : ch; | ||
41 | } | 78 | } |
42 | return i; | 79 | out: |
80 | return dst - out; | ||
43 | } | 81 | } |
44 | 82 | ||
45 | /* | 83 | /* |
46 | * hfs_triv2mac() | 84 | * hfs_asc2mac() |
47 | * | 85 | * |
48 | * Given an ASCII string (not null-terminated) and its length, | 86 | * Given an ASCII string (not null-terminated) and its length, |
49 | * generate the corresponding filename in the Macintosh character set | 87 | * generate the corresponding filename in the Macintosh character set |
@@ -54,19 +92,57 @@ int hfs_mac2triv(char *out, const struct hfs_name *in) | |||
54 | * This routine is a inverse to hfs_mac2triv(). | 92 | * This routine is a inverse to hfs_mac2triv(). |
55 | * A ':' is replaced by a '/'. | 93 | * A ':' is replaced by a '/'. |
56 | */ | 94 | */ |
57 | void hfs_triv2mac(struct hfs_name *out, struct qstr *in) | 95 | void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, struct qstr *in) |
58 | { | 96 | { |
97 | struct nls_table *nls_disk = HFS_SB(sb)->nls_disk; | ||
98 | struct nls_table *nls_io = HFS_SB(sb)->nls_io; | ||
59 | const char *src; | 99 | const char *src; |
60 | char *dst, c; | 100 | char *dst; |
61 | int i, len; | 101 | int srclen, dstlen, size; |
62 | 102 | ||
63 | out->len = len = min((unsigned int)HFS_NAMELEN, in->len); | ||
64 | src = in->name; | 103 | src = in->name; |
104 | srclen = in->len; | ||
65 | dst = out->name; | 105 | dst = out->name; |
66 | for (i = 0; i < len; i++) { | 106 | dstlen = HFS_NAMELEN; |
67 | c = *src++; | 107 | if (nls_io) { |
68 | *dst++ = c == ':' ? '/' : c; | 108 | wchar_t ch; |
109 | |||
110 | while (srclen > 0) { | ||
111 | size = nls_io->char2uni(src, srclen, &ch); | ||
112 | if (size < 0) { | ||
113 | ch = '?'; | ||
114 | size = 1; | ||
115 | } | ||
116 | src += size; | ||
117 | srclen -= size; | ||
118 | if (ch == ':') | ||
119 | ch = '/'; | ||
120 | if (nls_disk) { | ||
121 | size = nls_disk->uni2char(ch, dst, dstlen); | ||
122 | if (size < 0) { | ||
123 | if (size == -ENAMETOOLONG) | ||
124 | goto out; | ||
125 | *dst = '?'; | ||
126 | size = 1; | ||
127 | } | ||
128 | dst += size; | ||
129 | dstlen -= size; | ||
130 | } else { | ||
131 | *dst++ = ch > 0xff ? '?' : ch; | ||
132 | dstlen--; | ||
133 | } | ||
134 | } | ||
135 | } else { | ||
136 | char ch; | ||
137 | |||
138 | if (dstlen > srclen) | ||
139 | dstlen = srclen; | ||
140 | while (--dstlen >= 0) | ||
141 | *dst++ = (ch = *src++) == ':' ? '/' : ch; | ||
69 | } | 142 | } |
70 | for (; i < HFS_NAMELEN; i++) | 143 | out: |
144 | out->len = dst - (char *)out->name; | ||
145 | dstlen = HFS_NAMELEN - out->len; | ||
146 | while (--dstlen >= 0) | ||
71 | *dst++ = 0; | 147 | *dst++ = 0; |
72 | } | 148 | } |
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 8868d3b766fd..b85abc6e6f83 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c | |||
@@ -18,8 +18,6 @@ | |||
18 | #include "hfsplus_fs.h" | 18 | #include "hfsplus_fs.h" |
19 | #include "hfsplus_raw.h" | 19 | #include "hfsplus_raw.h" |
20 | 20 | ||
21 | #define REF_PAGES 0 | ||
22 | |||
23 | /* Copy a specified range of bytes from the raw data of a node */ | 21 | /* Copy a specified range of bytes from the raw data of a node */ |
24 | void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) | 22 | void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) |
25 | { | 23 | { |
@@ -450,9 +448,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) | |||
450 | page_cache_release(page); | 448 | page_cache_release(page); |
451 | goto fail; | 449 | goto fail; |
452 | } | 450 | } |
453 | #if !REF_PAGES | ||
454 | page_cache_release(page); | 451 | page_cache_release(page); |
455 | #endif | ||
456 | node->page[i] = page; | 452 | node->page[i] = page; |
457 | } | 453 | } |
458 | 454 | ||
@@ -612,13 +608,6 @@ void hfs_bnode_get(struct hfs_bnode *node) | |||
612 | { | 608 | { |
613 | if (node) { | 609 | if (node) { |
614 | atomic_inc(&node->refcnt); | 610 | atomic_inc(&node->refcnt); |
615 | #if REF_PAGES | ||
616 | { | ||
617 | int i; | ||
618 | for (i = 0; i < node->tree->pages_per_bnode; i++) | ||
619 | get_page(node->page[i]); | ||
620 | } | ||
621 | #endif | ||
622 | dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", | 611 | dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", |
623 | node->tree->cnid, node->this, atomic_read(&node->refcnt)); | 612 | node->tree->cnid, node->this, atomic_read(&node->refcnt)); |
624 | } | 613 | } |
@@ -635,20 +624,12 @@ void hfs_bnode_put(struct hfs_bnode *node) | |||
635 | node->tree->cnid, node->this, atomic_read(&node->refcnt)); | 624 | node->tree->cnid, node->this, atomic_read(&node->refcnt)); |
636 | if (!atomic_read(&node->refcnt)) | 625 | if (!atomic_read(&node->refcnt)) |
637 | BUG(); | 626 | BUG(); |
638 | if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) { | 627 | if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) |
639 | #if REF_PAGES | ||
640 | for (i = 0; i < tree->pages_per_bnode; i++) | ||
641 | put_page(node->page[i]); | ||
642 | #endif | ||
643 | return; | 628 | return; |
644 | } | ||
645 | for (i = 0; i < tree->pages_per_bnode; i++) { | 629 | for (i = 0; i < tree->pages_per_bnode; i++) { |
646 | if (!node->page[i]) | 630 | if (!node->page[i]) |
647 | continue; | 631 | continue; |
648 | mark_page_accessed(node->page[i]); | 632 | mark_page_accessed(node->page[i]); |
649 | #if REF_PAGES | ||
650 | put_page(node->page[i]); | ||
651 | #endif | ||
652 | } | 633 | } |
653 | 634 | ||
654 | if (test_bit(HFS_BNODE_DELETED, &node->flags)) { | 635 | if (test_bit(HFS_BNODE_DELETED, &node->flags)) { |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 533094a570df..2bc0cdd30e56 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -343,8 +343,9 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | |||
343 | ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); | 343 | ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); |
344 | 344 | ||
345 | /* options.c */ | 345 | /* options.c */ |
346 | int parse_options(char *, struct hfsplus_sb_info *); | 346 | int hfsplus_parse_options(char *, struct hfsplus_sb_info *); |
347 | void fill_defaults(struct hfsplus_sb_info *); | 347 | void hfsplus_fill_defaults(struct hfsplus_sb_info *); |
348 | int hfsplus_show_options(struct seq_file *, struct vfsmount *); | ||
348 | 349 | ||
349 | /* tables.c */ | 350 | /* tables.c */ |
350 | extern u16 hfsplus_case_fold_table[]; | 351 | extern u16 hfsplus_case_fold_table[]; |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 1cca0102c98d..cca0818aa4ca 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
@@ -13,6 +13,8 @@ | |||
13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
14 | #include <linux/parser.h> | 14 | #include <linux/parser.h> |
15 | #include <linux/nls.h> | 15 | #include <linux/nls.h> |
16 | #include <linux/mount.h> | ||
17 | #include <linux/seq_file.h> | ||
16 | #include "hfsplus_fs.h" | 18 | #include "hfsplus_fs.h" |
17 | 19 | ||
18 | enum { | 20 | enum { |
@@ -38,7 +40,7 @@ static match_table_t tokens = { | |||
38 | }; | 40 | }; |
39 | 41 | ||
40 | /* Initialize an options object to reasonable defaults */ | 42 | /* Initialize an options object to reasonable defaults */ |
41 | void fill_defaults(struct hfsplus_sb_info *opts) | 43 | void hfsplus_fill_defaults(struct hfsplus_sb_info *opts) |
42 | { | 44 | { |
43 | if (!opts) | 45 | if (!opts) |
44 | return; | 46 | return; |
@@ -63,7 +65,7 @@ static inline int match_fourchar(substring_t *arg, u32 *result) | |||
63 | 65 | ||
64 | /* Parse options from mount. Returns 0 on failure */ | 66 | /* Parse options from mount. Returns 0 on failure */ |
65 | /* input is the options passed to mount() as a string */ | 67 | /* input is the options passed to mount() as a string */ |
66 | int parse_options(char *input, struct hfsplus_sb_info *sbi) | 68 | int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) |
67 | { | 69 | { |
68 | char *p; | 70 | char *p; |
69 | substring_t args[MAX_OPT_ARGS]; | 71 | substring_t args[MAX_OPT_ARGS]; |
@@ -160,3 +162,23 @@ done: | |||
160 | 162 | ||
161 | return 1; | 163 | return 1; |
162 | } | 164 | } |
165 | |||
166 | int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) | ||
167 | { | ||
168 | struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb); | ||
169 | |||
170 | if (sbi->creator != HFSPLUS_DEF_CR_TYPE) | ||
171 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); | ||
172 | if (sbi->type != HFSPLUS_DEF_CR_TYPE) | ||
173 | seq_printf(seq, ",type=%.4s", (char *)&sbi->type); | ||
174 | seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, sbi->uid, sbi->gid); | ||
175 | if (sbi->part >= 0) | ||
176 | seq_printf(seq, ",part=%u", sbi->part); | ||
177 | if (sbi->session >= 0) | ||
178 | seq_printf(seq, ",session=%u", sbi->session); | ||
179 | if (sbi->nls) | ||
180 | seq_printf(seq, ",nls=%s", sbi->nls->charset); | ||
181 | if (sbi->flags & HFSPLUS_SB_NODECOMPOSE) | ||
182 | seq_printf(seq, ",nodecompose"); | ||
183 | return 0; | ||
184 | } | ||
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index d55ad67b8e42..fd0f0f050e1d 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -217,8 +217,7 @@ static void hfsplus_put_super(struct super_block *sb) | |||
217 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); | 217 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); |
218 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); | 218 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); |
219 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 219 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); |
220 | ll_rw_block(WRITE, 1, &HFSPLUS_SB(sb).s_vhbh); | 220 | sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); |
221 | wait_on_buffer(HFSPLUS_SB(sb).s_vhbh); | ||
222 | } | 221 | } |
223 | 222 | ||
224 | hfs_btree_close(HFSPLUS_SB(sb).cat_tree); | 223 | hfs_btree_close(HFSPLUS_SB(sb).cat_tree); |
@@ -277,6 +276,7 @@ static struct super_operations hfsplus_sops = { | |||
277 | .write_super = hfsplus_write_super, | 276 | .write_super = hfsplus_write_super, |
278 | .statfs = hfsplus_statfs, | 277 | .statfs = hfsplus_statfs, |
279 | .remount_fs = hfsplus_remount, | 278 | .remount_fs = hfsplus_remount, |
279 | .show_options = hfsplus_show_options, | ||
280 | }; | 280 | }; |
281 | 281 | ||
282 | static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | 282 | static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) |
@@ -297,8 +297,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
297 | memset(sbi, 0, sizeof(HFSPLUS_SB(sb))); | 297 | memset(sbi, 0, sizeof(HFSPLUS_SB(sb))); |
298 | sb->s_fs_info = sbi; | 298 | sb->s_fs_info = sbi; |
299 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); | 299 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); |
300 | fill_defaults(sbi); | 300 | hfsplus_fill_defaults(sbi); |
301 | if (!parse_options(data, sbi)) { | 301 | if (!hfsplus_parse_options(data, sbi)) { |
302 | if (!silent) | 302 | if (!silent) |
303 | printk("HFS+-fs: unable to parse mount options\n"); | 303 | printk("HFS+-fs: unable to parse mount options\n"); |
304 | err = -EINVAL; | 304 | err = -EINVAL; |
@@ -415,8 +415,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
415 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); | 415 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); |
416 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); | 416 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); |
417 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 417 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); |
418 | ll_rw_block(WRITE, 1, &HFSPLUS_SB(sb).s_vhbh); | 418 | sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); |
419 | wait_on_buffer(HFSPLUS_SB(sb).s_vhbh); | ||
420 | 419 | ||
421 | if (!HFSPLUS_SB(sb).hidden_dir) { | 420 | if (!HFSPLUS_SB(sb).hidden_dir) { |
422 | printk("HFS+: create hidden dir...\n"); | 421 | printk("HFS+: create hidden dir...\n"); |
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 67bca0d4a33b..cca3fb693f99 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h | |||
@@ -49,7 +49,6 @@ struct hostfs_iattr { | |||
49 | struct timespec ia_atime; | 49 | struct timespec ia_atime; |
50 | struct timespec ia_mtime; | 50 | struct timespec ia_mtime; |
51 | struct timespec ia_ctime; | 51 | struct timespec ia_ctime; |
52 | unsigned int ia_attr_flags; | ||
53 | }; | 52 | }; |
54 | 53 | ||
55 | extern int stat_file(const char *path, unsigned long long *inode_out, | 54 | extern int stat_file(const char *path, unsigned long long *inode_out, |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index b2d18200a003..59c5062cd63f 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -284,6 +284,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) | |||
284 | 284 | ||
285 | static void hostfs_delete_inode(struct inode *inode) | 285 | static void hostfs_delete_inode(struct inode *inode) |
286 | { | 286 | { |
287 | truncate_inode_pages(&inode->i_data, 0); | ||
287 | if(HOSTFS_I(inode)->fd != -1) { | 288 | if(HOSTFS_I(inode)->fd != -1) { |
288 | close_file(&HOSTFS_I(inode)->fd); | 289 | close_file(&HOSTFS_I(inode)->fd); |
289 | HOSTFS_I(inode)->fd = -1; | 290 | HOSTFS_I(inode)->fd = -1; |
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 38b1741fa539..e3d17e9ea6c1 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c | |||
@@ -284,6 +284,7 @@ void hpfs_write_if_changed(struct inode *inode) | |||
284 | 284 | ||
285 | void hpfs_delete_inode(struct inode *inode) | 285 | void hpfs_delete_inode(struct inode *inode) |
286 | { | 286 | { |
287 | truncate_inode_pages(&inode->i_data, 0); | ||
287 | lock_kernel(); | 288 | lock_kernel(); |
288 | hpfs_remove_fnode(inode->i_sb, inode->i_ino); | 289 | hpfs_remove_fnode(inode->i_sb, inode->i_ino); |
289 | unlock_kernel(); | 290 | unlock_kernel(); |
diff --git a/fs/inode.c b/fs/inode.c index e57f1724db3e..f80a79ff156b 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1034,19 +1034,21 @@ void generic_delete_inode(struct inode *inode) | |||
1034 | inodes_stat.nr_inodes--; | 1034 | inodes_stat.nr_inodes--; |
1035 | spin_unlock(&inode_lock); | 1035 | spin_unlock(&inode_lock); |
1036 | 1036 | ||
1037 | if (inode->i_data.nrpages) | ||
1038 | truncate_inode_pages(&inode->i_data, 0); | ||
1039 | |||
1040 | security_inode_delete(inode); | 1037 | security_inode_delete(inode); |
1041 | 1038 | ||
1042 | if (op->delete_inode) { | 1039 | if (op->delete_inode) { |
1043 | void (*delete)(struct inode *) = op->delete_inode; | 1040 | void (*delete)(struct inode *) = op->delete_inode; |
1044 | if (!is_bad_inode(inode)) | 1041 | if (!is_bad_inode(inode)) |
1045 | DQUOT_INIT(inode); | 1042 | DQUOT_INIT(inode); |
1046 | /* s_op->delete_inode internally recalls clear_inode() */ | 1043 | /* Filesystems implementing their own |
1044 | * s_op->delete_inode are required to call | ||
1045 | * truncate_inode_pages and clear_inode() | ||
1046 | * internally */ | ||
1047 | delete(inode); | 1047 | delete(inode); |
1048 | } else | 1048 | } else { |
1049 | truncate_inode_pages(&inode->i_data, 0); | ||
1049 | clear_inode(inode); | 1050 | clear_inode(inode); |
1051 | } | ||
1050 | spin_lock(&inode_lock); | 1052 | spin_lock(&inode_lock); |
1051 | hlist_del_init(&inode->i_hash); | 1053 | hlist_del_init(&inode->i_hash); |
1052 | spin_unlock(&inode_lock); | 1054 | spin_unlock(&inode_lock); |
@@ -1195,9 +1197,6 @@ void update_atime(struct inode *inode) | |||
1195 | if (!timespec_equal(&inode->i_atime, &now)) { | 1197 | if (!timespec_equal(&inode->i_atime, &now)) { |
1196 | inode->i_atime = now; | 1198 | inode->i_atime = now; |
1197 | mark_inode_dirty_sync(inode); | 1199 | mark_inode_dirty_sync(inode); |
1198 | } else { | ||
1199 | if (!timespec_equal(&inode->i_atime, &now)) | ||
1200 | inode->i_atime = now; | ||
1201 | } | 1200 | } |
1202 | } | 1201 | } |
1203 | 1202 | ||
diff --git a/fs/inotify.c b/fs/inotify.c index 2e4e2a57708c..a37e9fb1da58 100644 --- a/fs/inotify.c +++ b/fs/inotify.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <asm/ioctls.h> | 37 | #include <asm/ioctls.h> |
38 | 38 | ||
39 | static atomic_t inotify_cookie; | 39 | static atomic_t inotify_cookie; |
40 | static atomic_t inotify_watches; | ||
40 | 41 | ||
41 | static kmem_cache_t *watch_cachep; | 42 | static kmem_cache_t *watch_cachep; |
42 | static kmem_cache_t *event_cachep; | 43 | static kmem_cache_t *event_cachep; |
@@ -422,6 +423,7 @@ static struct inotify_watch *create_watch(struct inotify_device *dev, | |||
422 | get_inotify_watch(watch); | 423 | get_inotify_watch(watch); |
423 | 424 | ||
424 | atomic_inc(&dev->user->inotify_watches); | 425 | atomic_inc(&dev->user->inotify_watches); |
426 | atomic_inc(&inotify_watches); | ||
425 | 427 | ||
426 | return watch; | 428 | return watch; |
427 | } | 429 | } |
@@ -454,6 +456,7 @@ static void remove_watch_no_event(struct inotify_watch *watch, | |||
454 | list_del(&watch->d_list); | 456 | list_del(&watch->d_list); |
455 | 457 | ||
456 | atomic_dec(&dev->user->inotify_watches); | 458 | atomic_dec(&dev->user->inotify_watches); |
459 | atomic_dec(&inotify_watches); | ||
457 | idr_remove(&dev->idr, watch->wd); | 460 | idr_remove(&dev->idr, watch->wd); |
458 | put_inotify_watch(watch); | 461 | put_inotify_watch(watch); |
459 | } | 462 | } |
@@ -532,6 +535,9 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask, | |||
532 | struct dentry *parent; | 535 | struct dentry *parent; |
533 | struct inode *inode; | 536 | struct inode *inode; |
534 | 537 | ||
538 | if (!atomic_read (&inotify_watches)) | ||
539 | return; | ||
540 | |||
535 | spin_lock(&dentry->d_lock); | 541 | spin_lock(&dentry->d_lock); |
536 | parent = dentry->d_parent; | 542 | parent = dentry->d_parent; |
537 | inode = parent->d_inode; | 543 | inode = parent->d_inode; |
@@ -925,6 +931,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) | |||
925 | struct nameidata nd; | 931 | struct nameidata nd; |
926 | struct file *filp; | 932 | struct file *filp; |
927 | int ret, fput_needed; | 933 | int ret, fput_needed; |
934 | int mask_add = 0; | ||
928 | 935 | ||
929 | filp = fget_light(fd, &fput_needed); | 936 | filp = fget_light(fd, &fput_needed); |
930 | if (unlikely(!filp)) | 937 | if (unlikely(!filp)) |
@@ -947,6 +954,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) | |||
947 | down(&inode->inotify_sem); | 954 | down(&inode->inotify_sem); |
948 | down(&dev->sem); | 955 | down(&dev->sem); |
949 | 956 | ||
957 | if (mask & IN_MASK_ADD) | ||
958 | mask_add = 1; | ||
959 | |||
950 | /* don't let user-space set invalid bits: we don't want flags set */ | 960 | /* don't let user-space set invalid bits: we don't want flags set */ |
951 | mask &= IN_ALL_EVENTS; | 961 | mask &= IN_ALL_EVENTS; |
952 | if (unlikely(!mask)) { | 962 | if (unlikely(!mask)) { |
@@ -960,7 +970,10 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) | |||
960 | */ | 970 | */ |
961 | old = inode_find_dev(inode, dev); | 971 | old = inode_find_dev(inode, dev); |
962 | if (unlikely(old)) { | 972 | if (unlikely(old)) { |
963 | old->mask = mask; | 973 | if (mask_add) |
974 | old->mask |= mask; | ||
975 | else | ||
976 | old->mask = mask; | ||
964 | ret = old->wd; | 977 | ret = old->wd; |
965 | goto out; | 978 | goto out; |
966 | } | 979 | } |
@@ -1043,6 +1056,7 @@ static int __init inotify_setup(void) | |||
1043 | inotify_max_user_watches = 8192; | 1056 | inotify_max_user_watches = 8192; |
1044 | 1057 | ||
1045 | atomic_set(&inotify_cookie, 0); | 1058 | atomic_set(&inotify_cookie, 0); |
1059 | atomic_set(&inotify_watches, 0); | ||
1046 | 1060 | ||
1047 | watch_cachep = kmem_cache_create("inotify_watch_cache", | 1061 | watch_cachep = kmem_cache_create("inotify_watch_cache", |
1048 | sizeof(struct inotify_watch), | 1062 | sizeof(struct inotify_watch), |
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 5a97e346bd95..014a51fd00d7 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -204,7 +204,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
204 | int i; | 204 | int i; |
205 | 205 | ||
206 | spin_unlock(&journal->j_list_lock); | 206 | spin_unlock(&journal->j_list_lock); |
207 | ll_rw_block(WRITE, *batch_count, bhs); | 207 | ll_rw_block(SWRITE, *batch_count, bhs); |
208 | spin_lock(&journal->j_list_lock); | 208 | spin_lock(&journal->j_list_lock); |
209 | for (i = 0; i < *batch_count; i++) { | 209 | for (i = 0; i < *batch_count; i++) { |
210 | struct buffer_head *bh = bhs[i]; | 210 | struct buffer_head *bh = bhs[i]; |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index dac720c837ab..2a3e310f79ef 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -358,7 +358,7 @@ write_out_data: | |||
358 | jbd_debug(2, "submit %d writes\n", | 358 | jbd_debug(2, "submit %d writes\n", |
359 | bufs); | 359 | bufs); |
360 | spin_unlock(&journal->j_list_lock); | 360 | spin_unlock(&journal->j_list_lock); |
361 | ll_rw_block(WRITE, bufs, wbuf); | 361 | ll_rw_block(SWRITE, bufs, wbuf); |
362 | journal_brelse_array(wbuf, bufs); | 362 | journal_brelse_array(wbuf, bufs); |
363 | bufs = 0; | 363 | bufs = 0; |
364 | goto write_out_data; | 364 | goto write_out_data; |
@@ -381,7 +381,7 @@ write_out_data: | |||
381 | 381 | ||
382 | if (bufs) { | 382 | if (bufs) { |
383 | spin_unlock(&journal->j_list_lock); | 383 | spin_unlock(&journal->j_list_lock); |
384 | ll_rw_block(WRITE, bufs, wbuf); | 384 | ll_rw_block(SWRITE, bufs, wbuf); |
385 | journal_brelse_array(wbuf, bufs); | 385 | journal_brelse_array(wbuf, bufs); |
386 | spin_lock(&journal->j_list_lock); | 386 | spin_lock(&journal->j_list_lock); |
387 | } | 387 | } |
@@ -720,11 +720,17 @@ wait_for_iobuf: | |||
720 | J_ASSERT(commit_transaction->t_log_list == NULL); | 720 | J_ASSERT(commit_transaction->t_log_list == NULL); |
721 | 721 | ||
722 | restart_loop: | 722 | restart_loop: |
723 | /* | ||
724 | * As there are other places (journal_unmap_buffer()) adding buffers | ||
725 | * to this list we have to be careful and hold the j_list_lock. | ||
726 | */ | ||
727 | spin_lock(&journal->j_list_lock); | ||
723 | while (commit_transaction->t_forget) { | 728 | while (commit_transaction->t_forget) { |
724 | transaction_t *cp_transaction; | 729 | transaction_t *cp_transaction; |
725 | struct buffer_head *bh; | 730 | struct buffer_head *bh; |
726 | 731 | ||
727 | jh = commit_transaction->t_forget; | 732 | jh = commit_transaction->t_forget; |
733 | spin_unlock(&journal->j_list_lock); | ||
728 | bh = jh2bh(jh); | 734 | bh = jh2bh(jh); |
729 | jbd_lock_bh_state(bh); | 735 | jbd_lock_bh_state(bh); |
730 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || | 736 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || |
@@ -792,9 +798,25 @@ restart_loop: | |||
792 | journal_remove_journal_head(bh); /* needs a brelse */ | 798 | journal_remove_journal_head(bh); /* needs a brelse */ |
793 | release_buffer_page(bh); | 799 | release_buffer_page(bh); |
794 | } | 800 | } |
801 | cond_resched_lock(&journal->j_list_lock); | ||
802 | } | ||
803 | spin_unlock(&journal->j_list_lock); | ||
804 | /* | ||
805 | * This is a bit sleazy. We borrow j_list_lock to protect | ||
806 | * journal->j_committing_transaction in __journal_remove_checkpoint. | ||
807 | * Really, __journal_remove_checkpoint should be using j_state_lock but | ||
808 | * it's a bit hassle to hold that across __journal_remove_checkpoint | ||
809 | */ | ||
810 | spin_lock(&journal->j_state_lock); | ||
811 | spin_lock(&journal->j_list_lock); | ||
812 | /* | ||
813 | * Now recheck if some buffers did not get attached to the transaction | ||
814 | * while the lock was dropped... | ||
815 | */ | ||
816 | if (commit_transaction->t_forget) { | ||
795 | spin_unlock(&journal->j_list_lock); | 817 | spin_unlock(&journal->j_list_lock); |
796 | if (cond_resched()) | 818 | spin_unlock(&journal->j_state_lock); |
797 | goto restart_loop; | 819 | goto restart_loop; |
798 | } | 820 | } |
799 | 821 | ||
800 | /* Done with this transaction! */ | 822 | /* Done with this transaction! */ |
@@ -803,14 +825,6 @@ restart_loop: | |||
803 | 825 | ||
804 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 826 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
805 | 827 | ||
806 | /* | ||
807 | * This is a bit sleazy. We borrow j_list_lock to protect | ||
808 | * journal->j_committing_transaction in __journal_remove_checkpoint. | ||
809 | * Really, __jornal_remove_checkpoint should be using j_state_lock but | ||
810 | * it's a bit hassle to hold that across __journal_remove_checkpoint | ||
811 | */ | ||
812 | spin_lock(&journal->j_state_lock); | ||
813 | spin_lock(&journal->j_list_lock); | ||
814 | commit_transaction->t_state = T_FINISHED; | 828 | commit_transaction->t_state = T_FINISHED; |
815 | J_ASSERT(commit_transaction == journal->j_committing_transaction); | 829 | J_ASSERT(commit_transaction == journal->j_committing_transaction); |
816 | journal->j_commit_sequence = commit_transaction->t_tid; | 830 | journal->j_commit_sequence = commit_transaction->t_tid; |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 5e7b43949517..7ae2c4fe506b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -65,7 +65,6 @@ EXPORT_SYMBOL(journal_set_features); | |||
65 | EXPORT_SYMBOL(journal_create); | 65 | EXPORT_SYMBOL(journal_create); |
66 | EXPORT_SYMBOL(journal_load); | 66 | EXPORT_SYMBOL(journal_load); |
67 | EXPORT_SYMBOL(journal_destroy); | 67 | EXPORT_SYMBOL(journal_destroy); |
68 | EXPORT_SYMBOL(journal_recover); | ||
69 | EXPORT_SYMBOL(journal_update_superblock); | 68 | EXPORT_SYMBOL(journal_update_superblock); |
70 | EXPORT_SYMBOL(journal_abort); | 69 | EXPORT_SYMBOL(journal_abort); |
71 | EXPORT_SYMBOL(journal_errno); | 70 | EXPORT_SYMBOL(journal_errno); |
@@ -81,6 +80,7 @@ EXPORT_SYMBOL(journal_try_to_free_buffers); | |||
81 | EXPORT_SYMBOL(journal_force_commit); | 80 | EXPORT_SYMBOL(journal_force_commit); |
82 | 81 | ||
83 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | 82 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); |
83 | static void __journal_abort_soft (journal_t *journal, int errno); | ||
84 | 84 | ||
85 | /* | 85 | /* |
86 | * Helper function used to manage commit timeouts | 86 | * Helper function used to manage commit timeouts |
@@ -93,16 +93,6 @@ static void commit_timeout(unsigned long __data) | |||
93 | wake_up_process(p); | 93 | wake_up_process(p); |
94 | } | 94 | } |
95 | 95 | ||
96 | /* Static check for data structure consistency. There's no code | ||
97 | * invoked --- we'll just get a linker failure if things aren't right. | ||
98 | */ | ||
99 | void __journal_internal_check(void) | ||
100 | { | ||
101 | extern void journal_bad_superblock_size(void); | ||
102 | if (sizeof(struct journal_superblock_s) != 1024) | ||
103 | journal_bad_superblock_size(); | ||
104 | } | ||
105 | |||
106 | /* | 96 | /* |
107 | * kjournald: The main thread function used to manage a logging device | 97 | * kjournald: The main thread function used to manage a logging device |
108 | * journal. | 98 | * journal. |
@@ -119,16 +109,12 @@ void __journal_internal_check(void) | |||
119 | * known as checkpointing, and this thread is responsible for that job. | 109 | * known as checkpointing, and this thread is responsible for that job. |
120 | */ | 110 | */ |
121 | 111 | ||
122 | journal_t *current_journal; // AKPM: debug | 112 | static int kjournald(void *arg) |
123 | |||
124 | int kjournald(void *arg) | ||
125 | { | 113 | { |
126 | journal_t *journal = (journal_t *) arg; | 114 | journal_t *journal = (journal_t *) arg; |
127 | transaction_t *transaction; | 115 | transaction_t *transaction; |
128 | struct timer_list timer; | 116 | struct timer_list timer; |
129 | 117 | ||
130 | current_journal = journal; | ||
131 | |||
132 | daemonize("kjournald"); | 118 | daemonize("kjournald"); |
133 | 119 | ||
134 | /* Set up an interval timer which can be used to trigger a | 120 | /* Set up an interval timer which can be used to trigger a |
@@ -193,6 +179,8 @@ loop: | |||
193 | if (transaction && time_after_eq(jiffies, | 179 | if (transaction && time_after_eq(jiffies, |
194 | transaction->t_expires)) | 180 | transaction->t_expires)) |
195 | should_sleep = 0; | 181 | should_sleep = 0; |
182 | if (journal->j_flags & JFS_UNMOUNT) | ||
183 | should_sleep = 0; | ||
196 | if (should_sleep) { | 184 | if (should_sleep) { |
197 | spin_unlock(&journal->j_state_lock); | 185 | spin_unlock(&journal->j_state_lock); |
198 | schedule(); | 186 | schedule(); |
@@ -969,7 +957,7 @@ void journal_update_superblock(journal_t *journal, int wait) | |||
969 | if (wait) | 957 | if (wait) |
970 | sync_dirty_buffer(bh); | 958 | sync_dirty_buffer(bh); |
971 | else | 959 | else |
972 | ll_rw_block(WRITE, 1, &bh); | 960 | ll_rw_block(SWRITE, 1, &bh); |
973 | 961 | ||
974 | out: | 962 | out: |
975 | /* If we have just flushed the log (by marking s_start==0), then | 963 | /* If we have just flushed the log (by marking s_start==0), then |
@@ -1439,7 +1427,7 @@ int journal_wipe(journal_t *journal, int write) | |||
1439 | * device this journal is present. | 1427 | * device this journal is present. |
1440 | */ | 1428 | */ |
1441 | 1429 | ||
1442 | const char *journal_dev_name(journal_t *journal, char *buffer) | 1430 | static const char *journal_dev_name(journal_t *journal, char *buffer) |
1443 | { | 1431 | { |
1444 | struct block_device *bdev; | 1432 | struct block_device *bdev; |
1445 | 1433 | ||
@@ -1485,7 +1473,7 @@ void __journal_abort_hard(journal_t *journal) | |||
1485 | 1473 | ||
1486 | /* Soft abort: record the abort error status in the journal superblock, | 1474 | /* Soft abort: record the abort error status in the journal superblock, |
1487 | * but don't do any other IO. */ | 1475 | * but don't do any other IO. */ |
1488 | void __journal_abort_soft (journal_t *journal, int errno) | 1476 | static void __journal_abort_soft (journal_t *journal, int errno) |
1489 | { | 1477 | { |
1490 | if (journal->j_flags & JFS_ABORT) | 1478 | if (journal->j_flags & JFS_ABORT) |
1491 | return; | 1479 | return; |
@@ -1880,7 +1868,7 @@ EXPORT_SYMBOL(journal_enable_debug); | |||
1880 | 1868 | ||
1881 | static struct proc_dir_entry *proc_jbd_debug; | 1869 | static struct proc_dir_entry *proc_jbd_debug; |
1882 | 1870 | ||
1883 | int read_jbd_debug(char *page, char **start, off_t off, | 1871 | static int read_jbd_debug(char *page, char **start, off_t off, |
1884 | int count, int *eof, void *data) | 1872 | int count, int *eof, void *data) |
1885 | { | 1873 | { |
1886 | int ret; | 1874 | int ret; |
@@ -1890,7 +1878,7 @@ int read_jbd_debug(char *page, char **start, off_t off, | |||
1890 | return ret; | 1878 | return ret; |
1891 | } | 1879 | } |
1892 | 1880 | ||
1893 | int write_jbd_debug(struct file *file, const char __user *buffer, | 1881 | static int write_jbd_debug(struct file *file, const char __user *buffer, |
1894 | unsigned long count, void *data) | 1882 | unsigned long count, void *data) |
1895 | { | 1883 | { |
1896 | char buf[32]; | 1884 | char buf[32]; |
@@ -1979,6 +1967,14 @@ static int __init journal_init(void) | |||
1979 | { | 1967 | { |
1980 | int ret; | 1968 | int ret; |
1981 | 1969 | ||
1970 | /* Static check for data structure consistency. There's no code | ||
1971 | * invoked --- we'll just get a linker failure if things aren't right. | ||
1972 | */ | ||
1973 | extern void journal_bad_superblock_size(void); | ||
1974 | if (sizeof(struct journal_superblock_s) != 1024) | ||
1975 | journal_bad_superblock_size(); | ||
1976 | |||
1977 | |||
1982 | ret = journal_init_caches(); | 1978 | ret = journal_init_caches(); |
1983 | if (ret != 0) | 1979 | if (ret != 0) |
1984 | journal_destroy_caches(); | 1980 | journal_destroy_caches(); |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index d327a598f861..a56144183462 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -116,7 +116,8 @@ static inline int hash(journal_t *journal, unsigned long block) | |||
116 | (block << (hash_shift - 12))) & (table->hash_size - 1); | 116 | (block << (hash_shift - 12))) & (table->hash_size - 1); |
117 | } | 117 | } |
118 | 118 | ||
119 | int insert_revoke_hash(journal_t *journal, unsigned long blocknr, tid_t seq) | 119 | static int insert_revoke_hash(journal_t *journal, unsigned long blocknr, |
120 | tid_t seq) | ||
120 | { | 121 | { |
121 | struct list_head *hash_list; | 122 | struct list_head *hash_list; |
122 | struct jbd_revoke_record_s *record; | 123 | struct jbd_revoke_record_s *record; |
@@ -613,7 +614,7 @@ static void flush_descriptor(journal_t *journal, | |||
613 | set_buffer_jwrite(bh); | 614 | set_buffer_jwrite(bh); |
614 | BUFFER_TRACE(bh, "write"); | 615 | BUFFER_TRACE(bh, "write"); |
615 | set_buffer_dirty(bh); | 616 | set_buffer_dirty(bh); |
616 | ll_rw_block(WRITE, 1, &bh); | 617 | ll_rw_block(SWRITE, 1, &bh); |
617 | } | 618 | } |
618 | #endif | 619 | #endif |
619 | 620 | ||
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 77b7662b840b..49bbc2be3d72 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -490,23 +490,21 @@ void journal_unlock_updates (journal_t *journal) | |||
490 | */ | 490 | */ |
491 | static void jbd_unexpected_dirty_buffer(struct journal_head *jh) | 491 | static void jbd_unexpected_dirty_buffer(struct journal_head *jh) |
492 | { | 492 | { |
493 | struct buffer_head *bh = jh2bh(jh); | ||
494 | int jlist; | 493 | int jlist; |
495 | 494 | ||
496 | if (buffer_dirty(bh)) { | 495 | /* If this buffer is one which might reasonably be dirty |
497 | /* If this buffer is one which might reasonably be dirty | 496 | * --- ie. data, or not part of this journal --- then |
498 | * --- ie. data, or not part of this journal --- then | 497 | * we're OK to leave it alone, but otherwise we need to |
499 | * we're OK to leave it alone, but otherwise we need to | 498 | * move the dirty bit to the journal's own internal |
500 | * move the dirty bit to the journal's own internal | 499 | * JBDDirty bit. */ |
501 | * JBDDirty bit. */ | 500 | jlist = jh->b_jlist; |
502 | jlist = jh->b_jlist; | 501 | |
503 | 502 | if (jlist == BJ_Metadata || jlist == BJ_Reserved || | |
504 | if (jlist == BJ_Metadata || jlist == BJ_Reserved || | 503 | jlist == BJ_Shadow || jlist == BJ_Forget) { |
505 | jlist == BJ_Shadow || jlist == BJ_Forget) { | 504 | struct buffer_head *bh = jh2bh(jh); |
506 | if (test_clear_buffer_dirty(jh2bh(jh))) { | 505 | |
507 | set_bit(BH_JBDDirty, &jh2bh(jh)->b_state); | 506 | if (test_clear_buffer_dirty(bh)) |
508 | } | 507 | set_buffer_jbddirty(bh); |
509 | } | ||
510 | } | 508 | } |
511 | } | 509 | } |
512 | 510 | ||
@@ -574,9 +572,14 @@ repeat: | |||
574 | if (jh->b_next_transaction) | 572 | if (jh->b_next_transaction) |
575 | J_ASSERT_JH(jh, jh->b_next_transaction == | 573 | J_ASSERT_JH(jh, jh->b_next_transaction == |
576 | transaction); | 574 | transaction); |
577 | JBUFFER_TRACE(jh, "Unexpected dirty buffer"); | 575 | } |
578 | jbd_unexpected_dirty_buffer(jh); | 576 | /* |
579 | } | 577 | * In any case we need to clean the dirty flag and we must |
578 | * do it under the buffer lock to be sure we don't race | ||
579 | * with running write-out. | ||
580 | */ | ||
581 | JBUFFER_TRACE(jh, "Unexpected dirty buffer"); | ||
582 | jbd_unexpected_dirty_buffer(jh); | ||
580 | } | 583 | } |
581 | 584 | ||
582 | unlock_buffer(bh); | 585 | unlock_buffer(bh); |
@@ -1337,8 +1340,7 @@ int journal_stop(handle_t *handle) | |||
1337 | if (handle->h_sync) { | 1340 | if (handle->h_sync) { |
1338 | do { | 1341 | do { |
1339 | old_handle_count = transaction->t_handle_count; | 1342 | old_handle_count = transaction->t_handle_count; |
1340 | set_current_state(TASK_UNINTERRUPTIBLE); | 1343 | schedule_timeout_uninterruptible(1); |
1341 | schedule_timeout(1); | ||
1342 | } while (old_handle_count != transaction->t_handle_count); | 1344 | } while (old_handle_count != transaction->t_handle_count); |
1343 | } | 1345 | } |
1344 | 1346 | ||
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index bfbeb4c86e03..3dcc6d2162cb 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c | |||
@@ -1629,9 +1629,6 @@ static int jffs_fsync(struct file *f, struct dentry *d, int datasync) | |||
1629 | } | 1629 | } |
1630 | 1630 | ||
1631 | 1631 | ||
1632 | extern int generic_file_open(struct inode *, struct file *) __attribute__((weak)); | ||
1633 | extern loff_t generic_file_llseek(struct file *, loff_t, int) __attribute__((weak)); | ||
1634 | |||
1635 | static struct file_operations jffs_file_operations = | 1632 | static struct file_operations jffs_file_operations = |
1636 | { | 1633 | { |
1637 | .open = generic_file_open, | 1634 | .open = generic_file_open, |
@@ -1747,6 +1744,7 @@ jffs_delete_inode(struct inode *inode) | |||
1747 | D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n", | 1744 | D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n", |
1748 | inode->i_ino)); | 1745 | inode->i_ino)); |
1749 | 1746 | ||
1747 | truncate_inode_pages(&inode->i_data, 0); | ||
1750 | lock_kernel(); | 1748 | lock_kernel(); |
1751 | inode->i_size = 0; | 1749 | inode->i_size = 0; |
1752 | inode->i_blocks = 0; | 1750 | inode->i_blocks = 0; |
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c index 456d7e6e29c2..27f199e94cfc 100644 --- a/fs/jffs/intrep.c +++ b/fs/jffs/intrep.c | |||
@@ -1701,12 +1701,10 @@ jffs_find_file(struct jffs_control *c, __u32 ino) | |||
1701 | { | 1701 | { |
1702 | struct jffs_file *f; | 1702 | struct jffs_file *f; |
1703 | int i = ino % c->hash_len; | 1703 | int i = ino % c->hash_len; |
1704 | struct list_head *tmp; | ||
1705 | 1704 | ||
1706 | D3(printk("jffs_find_file(): ino: %u\n", ino)); | 1705 | D3(printk("jffs_find_file(): ino: %u\n", ino)); |
1707 | 1706 | ||
1708 | for (tmp = c->hash[i].next; tmp != &c->hash[i]; tmp = tmp->next) { | 1707 | list_for_each_entry(f, &c->hash[i], hash) { |
1709 | f = list_entry(tmp, struct jffs_file, hash); | ||
1710 | if (ino != f->ino) | 1708 | if (ino != f->ino) |
1711 | continue; | 1709 | continue; |
1712 | D3(printk("jffs_find_file(): Found file with ino " | 1710 | D3(printk("jffs_find_file(): Found file with ino " |
@@ -2102,13 +2100,12 @@ jffs_foreach_file(struct jffs_control *c, int (*func)(struct jffs_file *)) | |||
2102 | int result = 0; | 2100 | int result = 0; |
2103 | 2101 | ||
2104 | for (pos = 0; pos < c->hash_len; pos++) { | 2102 | for (pos = 0; pos < c->hash_len; pos++) { |
2105 | struct list_head *p, *next; | 2103 | struct jffs_file *f, *next; |
2106 | for (p = c->hash[pos].next; p != &c->hash[pos]; p = next) { | 2104 | |
2107 | /* We need a reference to the next file in the | 2105 | /* We must do _safe, because 'func' might remove the |
2108 | list because `func' might remove the current | 2106 | current file 'f' from the list. */ |
2109 | file `f'. */ | 2107 | list_for_each_entry_safe(f, next, &c->hash[pos], hash) { |
2110 | next = p->next; | 2108 | r = func(f); |
2111 | r = func(list_entry(p, struct jffs_file, hash)); | ||
2112 | if (r < 0) | 2109 | if (r < 0) |
2113 | return r; | 2110 | return r; |
2114 | result += r; | 2111 | result += r; |
@@ -2613,9 +2610,8 @@ jffs_print_hash_table(struct jffs_control *c) | |||
2613 | 2610 | ||
2614 | printk("JFFS: Dumping the file system's hash table...\n"); | 2611 | printk("JFFS: Dumping the file system's hash table...\n"); |
2615 | for (i = 0; i < c->hash_len; i++) { | 2612 | for (i = 0; i < c->hash_len; i++) { |
2616 | struct list_head *p; | 2613 | struct jffs_file *f; |
2617 | for (p = c->hash[i].next; p != &c->hash[i]; p = p->next) { | 2614 | list_for_each_entry(f, &c->hash[i], hash) { |
2618 | struct jffs_file *f=list_entry(p,struct jffs_file,hash); | ||
2619 | printk("*** c->hash[%u]: \"%s\" " | 2615 | printk("*** c->hash[%u]: \"%s\" " |
2620 | "(ino: %u, pino: %u)\n", | 2616 | "(ino: %u, pino: %u)\n", |
2621 | i, (f->name ? f->name : ""), | 2617 | i, (f->name ? f->name : ""), |
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index bd9ed9b0247b..8279bf0133ff 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
@@ -21,9 +21,6 @@ | |||
21 | #include <linux/jffs2.h> | 21 | #include <linux/jffs2.h> |
22 | #include "nodelist.h" | 22 | #include "nodelist.h" |
23 | 23 | ||
24 | extern int generic_file_open(struct inode *, struct file *) __attribute__((weak)); | ||
25 | extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) __attribute__((weak)); | ||
26 | |||
27 | static int jffs2_commit_write (struct file *filp, struct page *pg, | 24 | static int jffs2_commit_write (struct file *filp, struct page *pg, |
28 | unsigned start, unsigned end); | 25 | unsigned start, unsigned end); |
29 | static int jffs2_prepare_write (struct file *filp, struct page *pg, | 26 | static int jffs2_prepare_write (struct file *filp, struct page *pg, |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 37da3e33e750..0ec62d5310db 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
@@ -131,6 +131,8 @@ void jfs_delete_inode(struct inode *inode) | |||
131 | if (!is_bad_inode(inode) && | 131 | if (!is_bad_inode(inode) && |
132 | (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) { | 132 | (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) { |
133 | 133 | ||
134 | truncate_inode_pages(&inode->i_data, 0); | ||
135 | |||
134 | if (test_cflag(COMMIT_Freewmap, inode)) | 136 | if (test_cflag(COMMIT_Freewmap, inode)) |
135 | jfs_free_zero_link(inode); | 137 | jfs_free_zero_link(inode); |
136 | 138 | ||
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index 86ccac80f0ab..72a5588faeca 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h | |||
@@ -37,6 +37,9 @@ | |||
37 | #define JFS_ERR_CONTINUE 0x00000004 /* continue */ | 37 | #define JFS_ERR_CONTINUE 0x00000004 /* continue */ |
38 | #define JFS_ERR_PANIC 0x00000008 /* panic */ | 38 | #define JFS_ERR_PANIC 0x00000008 /* panic */ |
39 | 39 | ||
40 | #define JFS_USRQUOTA 0x00000010 | ||
41 | #define JFS_GRPQUOTA 0x00000020 | ||
42 | |||
40 | /* platform option (conditional compilation) */ | 43 | /* platform option (conditional compilation) */ |
41 | #define JFS_AIX 0x80000000 /* AIX support */ | 44 | #define JFS_AIX 0x80000000 /* AIX support */ |
42 | /* POSIX name/directory support */ | 45 | /* POSIX name/directory support */ |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 9ff89720f93b..71bc34b96b2b 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -23,9 +23,11 @@ | |||
23 | #include <linux/parser.h> | 23 | #include <linux/parser.h> |
24 | #include <linux/completion.h> | 24 | #include <linux/completion.h> |
25 | #include <linux/vfs.h> | 25 | #include <linux/vfs.h> |
26 | #include <linux/mount.h> | ||
26 | #include <linux/moduleparam.h> | 27 | #include <linux/moduleparam.h> |
27 | #include <linux/posix_acl.h> | 28 | #include <linux/posix_acl.h> |
28 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
30 | #include <linux/seq_file.h> | ||
29 | 31 | ||
30 | #include "jfs_incore.h" | 32 | #include "jfs_incore.h" |
31 | #include "jfs_filsys.h" | 33 | #include "jfs_filsys.h" |
@@ -192,7 +194,8 @@ static void jfs_put_super(struct super_block *sb) | |||
192 | 194 | ||
193 | enum { | 195 | enum { |
194 | Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, | 196 | Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, |
195 | Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, | 197 | Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, |
198 | Opt_usrquota, Opt_grpquota | ||
196 | }; | 199 | }; |
197 | 200 | ||
198 | static match_table_t tokens = { | 201 | static match_table_t tokens = { |
@@ -204,8 +207,8 @@ static match_table_t tokens = { | |||
204 | {Opt_errors, "errors=%s"}, | 207 | {Opt_errors, "errors=%s"}, |
205 | {Opt_ignore, "noquota"}, | 208 | {Opt_ignore, "noquota"}, |
206 | {Opt_ignore, "quota"}, | 209 | {Opt_ignore, "quota"}, |
207 | {Opt_ignore, "usrquota"}, | 210 | {Opt_usrquota, "usrquota"}, |
208 | {Opt_ignore, "grpquota"}, | 211 | {Opt_grpquota, "grpquota"}, |
209 | {Opt_err, NULL} | 212 | {Opt_err, NULL} |
210 | }; | 213 | }; |
211 | 214 | ||
@@ -293,6 +296,24 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, | |||
293 | } | 296 | } |
294 | break; | 297 | break; |
295 | } | 298 | } |
299 | |||
300 | #if defined(CONFIG_QUOTA) | ||
301 | case Opt_quota: | ||
302 | case Opt_usrquota: | ||
303 | *flag |= JFS_USRQUOTA; | ||
304 | break; | ||
305 | case Opt_grpquota: | ||
306 | *flag |= JFS_GRPQUOTA; | ||
307 | break; | ||
308 | #else | ||
309 | case Opt_usrquota: | ||
310 | case Opt_grpquota: | ||
311 | case Opt_quota: | ||
312 | printk(KERN_ERR | ||
313 | "JFS: quota operations not supported\n"); | ||
314 | break; | ||
315 | #endif | ||
316 | |||
296 | default: | 317 | default: |
297 | printk("jfs: Unrecognized mount option \"%s\" " | 318 | printk("jfs: Unrecognized mount option \"%s\" " |
298 | " or missing value\n", p); | 319 | " or missing value\n", p); |
@@ -539,6 +560,26 @@ static int jfs_sync_fs(struct super_block *sb, int wait) | |||
539 | return 0; | 560 | return 0; |
540 | } | 561 | } |
541 | 562 | ||
563 | static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | ||
564 | { | ||
565 | struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb); | ||
566 | |||
567 | if (sbi->flag & JFS_NOINTEGRITY) | ||
568 | seq_puts(seq, ",nointegrity"); | ||
569 | else | ||
570 | seq_puts(seq, ",integrity"); | ||
571 | |||
572 | #if defined(CONFIG_QUOTA) | ||
573 | if (sbi->flag & JFS_USRQUOTA) | ||
574 | seq_puts(seq, ",usrquota"); | ||
575 | |||
576 | if (sbi->flag & JFS_GRPQUOTA) | ||
577 | seq_puts(seq, ",grpquota"); | ||
578 | #endif | ||
579 | |||
580 | return 0; | ||
581 | } | ||
582 | |||
542 | static struct super_operations jfs_super_operations = { | 583 | static struct super_operations jfs_super_operations = { |
543 | .alloc_inode = jfs_alloc_inode, | 584 | .alloc_inode = jfs_alloc_inode, |
544 | .destroy_inode = jfs_destroy_inode, | 585 | .destroy_inode = jfs_destroy_inode, |
@@ -552,6 +593,7 @@ static struct super_operations jfs_super_operations = { | |||
552 | .unlockfs = jfs_unlockfs, | 593 | .unlockfs = jfs_unlockfs, |
553 | .statfs = jfs_statfs, | 594 | .statfs = jfs_statfs, |
554 | .remount_fs = jfs_remount, | 595 | .remount_fs = jfs_remount, |
596 | .show_options = jfs_show_options | ||
555 | }; | 597 | }; |
556 | 598 | ||
557 | static struct export_operations jfs_export_operations = { | 599 | static struct export_operations jfs_export_operations = { |
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 14b3ce87fa29..87332f30141b 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -299,8 +299,7 @@ nlmclnt_alloc_call(void) | |||
299 | return call; | 299 | return call; |
300 | } | 300 | } |
301 | printk("nlmclnt_alloc_call: failed, waiting for memory\n"); | 301 | printk("nlmclnt_alloc_call: failed, waiting for memory\n"); |
302 | current->state = TASK_INTERRUPTIBLE; | 302 | schedule_timeout_interruptible(5*HZ); |
303 | schedule_timeout(5*HZ); | ||
304 | } | 303 | } |
305 | return NULL; | 304 | return NULL; |
306 | } | 305 | } |
diff --git a/fs/locks.c b/fs/locks.c index 11956b6179ff..c2c09b4798d6 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -2198,21 +2198,23 @@ void steal_locks(fl_owner_t from) | |||
2198 | { | 2198 | { |
2199 | struct files_struct *files = current->files; | 2199 | struct files_struct *files = current->files; |
2200 | int i, j; | 2200 | int i, j; |
2201 | struct fdtable *fdt; | ||
2201 | 2202 | ||
2202 | if (from == files) | 2203 | if (from == files) |
2203 | return; | 2204 | return; |
2204 | 2205 | ||
2205 | lock_kernel(); | 2206 | lock_kernel(); |
2206 | j = 0; | 2207 | j = 0; |
2208 | fdt = files_fdtable(files); | ||
2207 | for (;;) { | 2209 | for (;;) { |
2208 | unsigned long set; | 2210 | unsigned long set; |
2209 | i = j * __NFDBITS; | 2211 | i = j * __NFDBITS; |
2210 | if (i >= files->max_fdset || i >= files->max_fds) | 2212 | if (i >= fdt->max_fdset || i >= fdt->max_fds) |
2211 | break; | 2213 | break; |
2212 | set = files->open_fds->fds_bits[j++]; | 2214 | set = fdt->open_fds->fds_bits[j++]; |
2213 | while (set) { | 2215 | while (set) { |
2214 | if (set & 1) { | 2216 | if (set & 1) { |
2215 | struct file *file = files->fd[i]; | 2217 | struct file *file = fdt->fd[i]; |
2216 | if (file) | 2218 | if (file) |
2217 | __steal_locks(file, from); | 2219 | __steal_locks(file, from); |
2218 | } | 2220 | } |
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 3f18c21198d7..790cc0d0e970 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
@@ -24,6 +24,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data); | |||
24 | 24 | ||
25 | static void minix_delete_inode(struct inode *inode) | 25 | static void minix_delete_inode(struct inode *inode) |
26 | { | 26 | { |
27 | truncate_inode_pages(&inode->i_data, 0); | ||
27 | inode->i_size = 0; | 28 | inode->i_size = 0; |
28 | minix_truncate(inode); | 29 | minix_truncate(inode); |
29 | minix_free_inode(inode); | 30 | minix_free_inode(inode); |
diff --git a/fs/namei.c b/fs/namei.c index 6ec1f0fefc5b..21d85f1ac839 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -525,6 +525,22 @@ static inline int __do_follow_link(struct path *path, struct nameidata *nd) | |||
525 | return error; | 525 | return error; |
526 | } | 526 | } |
527 | 527 | ||
528 | static inline void dput_path(struct path *path, struct nameidata *nd) | ||
529 | { | ||
530 | dput(path->dentry); | ||
531 | if (path->mnt != nd->mnt) | ||
532 | mntput(path->mnt); | ||
533 | } | ||
534 | |||
535 | static inline void path_to_nameidata(struct path *path, struct nameidata *nd) | ||
536 | { | ||
537 | dput(nd->dentry); | ||
538 | if (nd->mnt != path->mnt) | ||
539 | mntput(nd->mnt); | ||
540 | nd->mnt = path->mnt; | ||
541 | nd->dentry = path->dentry; | ||
542 | } | ||
543 | |||
528 | /* | 544 | /* |
529 | * This limits recursive symlink follows to 8, while | 545 | * This limits recursive symlink follows to 8, while |
530 | * limiting consecutive symlinks to 40. | 546 | * limiting consecutive symlinks to 40. |
@@ -552,9 +568,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd) | |||
552 | nd->depth--; | 568 | nd->depth--; |
553 | return err; | 569 | return err; |
554 | loop: | 570 | loop: |
555 | dput(path->dentry); | 571 | dput_path(path, nd); |
556 | if (path->mnt != nd->mnt) | ||
557 | mntput(path->mnt); | ||
558 | path_release(nd); | 572 | path_release(nd); |
559 | return err; | 573 | return err; |
560 | } | 574 | } |
@@ -813,13 +827,8 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) | |||
813 | err = -ENOTDIR; | 827 | err = -ENOTDIR; |
814 | if (!inode->i_op) | 828 | if (!inode->i_op) |
815 | break; | 829 | break; |
816 | } else { | 830 | } else |
817 | dput(nd->dentry); | 831 | path_to_nameidata(&next, nd); |
818 | if (nd->mnt != next.mnt) | ||
819 | mntput(nd->mnt); | ||
820 | nd->mnt = next.mnt; | ||
821 | nd->dentry = next.dentry; | ||
822 | } | ||
823 | err = -ENOTDIR; | 832 | err = -ENOTDIR; |
824 | if (!inode->i_op->lookup) | 833 | if (!inode->i_op->lookup) |
825 | break; | 834 | break; |
@@ -859,13 +868,8 @@ last_component: | |||
859 | if (err) | 868 | if (err) |
860 | goto return_err; | 869 | goto return_err; |
861 | inode = nd->dentry->d_inode; | 870 | inode = nd->dentry->d_inode; |
862 | } else { | 871 | } else |
863 | dput(nd->dentry); | 872 | path_to_nameidata(&next, nd); |
864 | if (nd->mnt != next.mnt) | ||
865 | mntput(nd->mnt); | ||
866 | nd->mnt = next.mnt; | ||
867 | nd->dentry = next.dentry; | ||
868 | } | ||
869 | err = -ENOENT; | 873 | err = -ENOENT; |
870 | if (!inode) | 874 | if (!inode) |
871 | break; | 875 | break; |
@@ -901,9 +905,7 @@ return_reval: | |||
901 | return_base: | 905 | return_base: |
902 | return 0; | 906 | return 0; |
903 | out_dput: | 907 | out_dput: |
904 | dput(next.dentry); | 908 | dput_path(&next, nd); |
905 | if (nd->mnt != next.mnt) | ||
906 | mntput(next.mnt); | ||
907 | break; | 909 | break; |
908 | } | 910 | } |
909 | path_release(nd); | 911 | path_release(nd); |
@@ -1314,10 +1316,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1314 | return error; | 1316 | return error; |
1315 | DQUOT_INIT(dir); | 1317 | DQUOT_INIT(dir); |
1316 | error = dir->i_op->create(dir, dentry, mode, nd); | 1318 | error = dir->i_op->create(dir, dentry, mode, nd); |
1317 | if (!error) { | 1319 | if (!error) |
1318 | fsnotify_create(dir, dentry->d_name.name); | 1320 | fsnotify_create(dir, dentry->d_name.name); |
1319 | security_inode_post_create(dir, dentry, mode); | ||
1320 | } | ||
1321 | return error; | 1321 | return error; |
1322 | } | 1322 | } |
1323 | 1323 | ||
@@ -1507,11 +1507,7 @@ do_last: | |||
1507 | if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) | 1507 | if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) |
1508 | goto do_link; | 1508 | goto do_link; |
1509 | 1509 | ||
1510 | dput(nd->dentry); | 1510 | path_to_nameidata(&path, nd); |
1511 | nd->dentry = path.dentry; | ||
1512 | if (nd->mnt != path.mnt) | ||
1513 | mntput(nd->mnt); | ||
1514 | nd->mnt = path.mnt; | ||
1515 | error = -EISDIR; | 1511 | error = -EISDIR; |
1516 | if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) | 1512 | if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) |
1517 | goto exit; | 1513 | goto exit; |
@@ -1522,9 +1518,7 @@ ok: | |||
1522 | return 0; | 1518 | return 0; |
1523 | 1519 | ||
1524 | exit_dput: | 1520 | exit_dput: |
1525 | dput(path.dentry); | 1521 | dput_path(&path, nd); |
1526 | if (nd->mnt != path.mnt) | ||
1527 | mntput(path.mnt); | ||
1528 | exit: | 1522 | exit: |
1529 | path_release(nd); | 1523 | path_release(nd); |
1530 | return error; | 1524 | return error; |
@@ -1639,10 +1633,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | |||
1639 | 1633 | ||
1640 | DQUOT_INIT(dir); | 1634 | DQUOT_INIT(dir); |
1641 | error = dir->i_op->mknod(dir, dentry, mode, dev); | 1635 | error = dir->i_op->mknod(dir, dentry, mode, dev); |
1642 | if (!error) { | 1636 | if (!error) |
1643 | fsnotify_create(dir, dentry->d_name.name); | 1637 | fsnotify_create(dir, dentry->d_name.name); |
1644 | security_inode_post_mknod(dir, dentry, mode, dev); | ||
1645 | } | ||
1646 | return error; | 1638 | return error; |
1647 | } | 1639 | } |
1648 | 1640 | ||
@@ -1712,10 +1704,8 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1712 | 1704 | ||
1713 | DQUOT_INIT(dir); | 1705 | DQUOT_INIT(dir); |
1714 | error = dir->i_op->mkdir(dir, dentry, mode); | 1706 | error = dir->i_op->mkdir(dir, dentry, mode); |
1715 | if (!error) { | 1707 | if (!error) |
1716 | fsnotify_mkdir(dir, dentry->d_name.name); | 1708 | fsnotify_mkdir(dir, dentry->d_name.name); |
1717 | security_inode_post_mkdir(dir,dentry, mode); | ||
1718 | } | ||
1719 | return error; | 1709 | return error; |
1720 | } | 1710 | } |
1721 | 1711 | ||
@@ -1951,10 +1941,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i | |||
1951 | 1941 | ||
1952 | DQUOT_INIT(dir); | 1942 | DQUOT_INIT(dir); |
1953 | error = dir->i_op->symlink(dir, dentry, oldname); | 1943 | error = dir->i_op->symlink(dir, dentry, oldname); |
1954 | if (!error) { | 1944 | if (!error) |
1955 | fsnotify_create(dir, dentry->d_name.name); | 1945 | fsnotify_create(dir, dentry->d_name.name); |
1956 | security_inode_post_symlink(dir, dentry, oldname); | ||
1957 | } | ||
1958 | return error; | 1946 | return error; |
1959 | } | 1947 | } |
1960 | 1948 | ||
@@ -2024,10 +2012,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de | |||
2024 | DQUOT_INIT(dir); | 2012 | DQUOT_INIT(dir); |
2025 | error = dir->i_op->link(old_dentry, dir, new_dentry); | 2013 | error = dir->i_op->link(old_dentry, dir, new_dentry); |
2026 | up(&old_dentry->d_inode->i_sem); | 2014 | up(&old_dentry->d_inode->i_sem); |
2027 | if (!error) { | 2015 | if (!error) |
2028 | fsnotify_create(dir, new_dentry->d_name.name); | 2016 | fsnotify_create(dir, new_dentry->d_name.name); |
2029 | security_inode_post_link(old_dentry, dir, new_dentry); | ||
2030 | } | ||
2031 | return error; | 2017 | return error; |
2032 | } | 2018 | } |
2033 | 2019 | ||
@@ -2146,11 +2132,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
2146 | d_rehash(new_dentry); | 2132 | d_rehash(new_dentry); |
2147 | dput(new_dentry); | 2133 | dput(new_dentry); |
2148 | } | 2134 | } |
2149 | if (!error) { | 2135 | if (!error) |
2150 | d_move(old_dentry,new_dentry); | 2136 | d_move(old_dentry,new_dentry); |
2151 | security_inode_post_rename(old_dir, old_dentry, | ||
2152 | new_dir, new_dentry); | ||
2153 | } | ||
2154 | return error; | 2137 | return error; |
2155 | } | 2138 | } |
2156 | 2139 | ||
@@ -2176,7 +2159,6 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, | |||
2176 | /* The following d_move() should become unconditional */ | 2159 | /* The following d_move() should become unconditional */ |
2177 | if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) | 2160 | if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) |
2178 | d_move(old_dentry, new_dentry); | 2161 | d_move(old_dentry, new_dentry); |
2179 | security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
2180 | } | 2162 | } |
2181 | if (target) | 2163 | if (target) |
2182 | up(&target->i_sem); | 2164 | up(&target->i_sem); |
diff --git a/fs/namespace.c b/fs/namespace.c index 79bd8a46e1e7..2fa9fdf7d6f5 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -40,7 +40,7 @@ static inline int sysfs_init(void) | |||
40 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); | 40 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); |
41 | 41 | ||
42 | static struct list_head *mount_hashtable; | 42 | static struct list_head *mount_hashtable; |
43 | static int hash_mask, hash_bits; | 43 | static int hash_mask __read_mostly, hash_bits __read_mostly; |
44 | static kmem_cache_t *mnt_cache; | 44 | static kmem_cache_t *mnt_cache; |
45 | 45 | ||
46 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | 46 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) |
@@ -537,7 +537,6 @@ lives_below_in_same_fs(struct dentry *d, struct dentry *dentry) | |||
537 | static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) | 537 | static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) |
538 | { | 538 | { |
539 | struct vfsmount *res, *p, *q, *r, *s; | 539 | struct vfsmount *res, *p, *q, *r, *s; |
540 | struct list_head *h; | ||
541 | struct nameidata nd; | 540 | struct nameidata nd; |
542 | 541 | ||
543 | res = q = clone_mnt(mnt, dentry); | 542 | res = q = clone_mnt(mnt, dentry); |
@@ -546,8 +545,7 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) | |||
546 | q->mnt_mountpoint = mnt->mnt_mountpoint; | 545 | q->mnt_mountpoint = mnt->mnt_mountpoint; |
547 | 546 | ||
548 | p = mnt; | 547 | p = mnt; |
549 | for (h = mnt->mnt_mounts.next; h != &mnt->mnt_mounts; h = h->next) { | 548 | list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { |
550 | r = list_entry(h, struct vfsmount, mnt_child); | ||
551 | if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) | 549 | if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) |
552 | continue; | 550 | continue; |
553 | 551 | ||
@@ -1334,8 +1332,12 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p | |||
1334 | error = -EINVAL; | 1332 | error = -EINVAL; |
1335 | if (user_nd.mnt->mnt_root != user_nd.dentry) | 1333 | if (user_nd.mnt->mnt_root != user_nd.dentry) |
1336 | goto out2; /* not a mountpoint */ | 1334 | goto out2; /* not a mountpoint */ |
1335 | if (user_nd.mnt->mnt_parent == user_nd.mnt) | ||
1336 | goto out2; /* not attached */ | ||
1337 | if (new_nd.mnt->mnt_root != new_nd.dentry) | 1337 | if (new_nd.mnt->mnt_root != new_nd.dentry) |
1338 | goto out2; /* not a mountpoint */ | 1338 | goto out2; /* not a mountpoint */ |
1339 | if (new_nd.mnt->mnt_parent == new_nd.mnt) | ||
1340 | goto out2; /* not attached */ | ||
1339 | tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ | 1341 | tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ |
1340 | spin_lock(&vfsmount_lock); | 1342 | spin_lock(&vfsmount_lock); |
1341 | if (tmp != new_nd.mnt) { | 1343 | if (tmp != new_nd.mnt) { |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 44795d2f4b30..8c8839203cd5 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -286,6 +286,8 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) | |||
286 | static void | 286 | static void |
287 | ncp_delete_inode(struct inode *inode) | 287 | ncp_delete_inode(struct inode *inode) |
288 | { | 288 | { |
289 | truncate_inode_pages(&inode->i_data, 0); | ||
290 | |||
289 | if (S_ISDIR(inode->i_mode)) { | 291 | if (S_ISDIR(inode->i_mode)) { |
290 | DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); | 292 | DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); |
291 | } | 293 | } |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 541b418327c8..6922469d6fc5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -146,6 +146,8 @@ nfs_delete_inode(struct inode * inode) | |||
146 | { | 146 | { |
147 | dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); | 147 | dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); |
148 | 148 | ||
149 | truncate_inode_pages(&inode->i_data, 0); | ||
150 | |||
149 | nfs_wb_all(inode); | 151 | nfs_wb_all(inode); |
150 | /* | 152 | /* |
151 | * The following should never happen... | 153 | * The following should never happen... |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2681485cf2d0..edc95514046d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -34,8 +34,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) | |||
34 | res = rpc_call_sync(clnt, msg, flags); | 34 | res = rpc_call_sync(clnt, msg, flags); |
35 | if (res != -EJUKEBOX) | 35 | if (res != -EJUKEBOX) |
36 | break; | 36 | break; |
37 | set_current_state(TASK_INTERRUPTIBLE); | 37 | schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME); |
38 | schedule_timeout(NFS_JUKEBOX_RETRY_TIME); | ||
39 | res = -ERESTARTSYS; | 38 | res = -ERESTARTSYS; |
40 | } while (!signalled()); | 39 | } while (!signalled()); |
41 | rpc_clnt_sigunmask(clnt, &oldset); | 40 | rpc_clnt_sigunmask(clnt, &oldset); |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0c5a308e4963..9701ca8c9428 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -2418,14 +2418,11 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) | |||
2418 | *timeout = NFS4_POLL_RETRY_MAX; | 2418 | *timeout = NFS4_POLL_RETRY_MAX; |
2419 | rpc_clnt_sigmask(clnt, &oldset); | 2419 | rpc_clnt_sigmask(clnt, &oldset); |
2420 | if (clnt->cl_intr) { | 2420 | if (clnt->cl_intr) { |
2421 | set_current_state(TASK_INTERRUPTIBLE); | 2421 | schedule_timeout_interruptible(*timeout); |
2422 | schedule_timeout(*timeout); | ||
2423 | if (signalled()) | 2422 | if (signalled()) |
2424 | res = -ERESTARTSYS; | 2423 | res = -ERESTARTSYS; |
2425 | } else { | 2424 | } else |
2426 | set_current_state(TASK_UNINTERRUPTIBLE); | 2425 | schedule_timeout_uninterruptible(*timeout); |
2427 | schedule_timeout(*timeout); | ||
2428 | } | ||
2429 | rpc_clnt_sigunmask(clnt, &oldset); | 2426 | rpc_clnt_sigunmask(clnt, &oldset); |
2430 | *timeout <<= 1; | 2427 | *timeout <<= 1; |
2431 | return res; | 2428 | return res; |
@@ -2578,8 +2575,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 | |||
2578 | static unsigned long | 2575 | static unsigned long |
2579 | nfs4_set_lock_task_retry(unsigned long timeout) | 2576 | nfs4_set_lock_task_retry(unsigned long timeout) |
2580 | { | 2577 | { |
2581 | current->state = TASK_INTERRUPTIBLE; | 2578 | schedule_timeout_interruptible(timeout); |
2582 | schedule_timeout(timeout); | ||
2583 | timeout <<= 1; | 2579 | timeout <<= 1; |
2584 | if (timeout > NFS4_LOCK_MAXTIMEOUT) | 2580 | if (timeout > NFS4_LOCK_MAXTIMEOUT) |
2585 | return NFS4_LOCK_MAXTIMEOUT; | 2581 | return NFS4_LOCK_MAXTIMEOUT; |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9a11aa39e2e4..057aff745506 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/namei.h> | 26 | #include <linux/namei.h> |
27 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
28 | #include <linux/hash.h> | 28 | #include <linux/hash.h> |
29 | #include <linux/module.h> | ||
29 | 30 | ||
30 | #include <linux/sunrpc/svc.h> | 31 | #include <linux/sunrpc/svc.h> |
31 | #include <linux/nfsd/nfsd.h> | 32 | #include <linux/nfsd/nfsd.h> |
@@ -221,6 +222,7 @@ static int expkey_show(struct seq_file *m, | |||
221 | } | 222 | } |
222 | 223 | ||
223 | struct cache_detail svc_expkey_cache = { | 224 | struct cache_detail svc_expkey_cache = { |
225 | .owner = THIS_MODULE, | ||
224 | .hash_size = EXPKEY_HASHMAX, | 226 | .hash_size = EXPKEY_HASHMAX, |
225 | .hash_table = expkey_table, | 227 | .hash_table = expkey_table, |
226 | .name = "nfsd.fh", | 228 | .name = "nfsd.fh", |
@@ -456,6 +458,7 @@ static int svc_export_show(struct seq_file *m, | |||
456 | return 0; | 458 | return 0; |
457 | } | 459 | } |
458 | struct cache_detail svc_export_cache = { | 460 | struct cache_detail svc_export_cache = { |
461 | .owner = THIS_MODULE, | ||
459 | .hash_size = EXPORT_HASHMAX, | 462 | .hash_size = EXPORT_HASHMAX, |
460 | .hash_table = export_table, | 463 | .hash_table = export_table, |
461 | .name = "nfsd.export", | 464 | .name = "nfsd.export", |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 5605a26efc57..13369650cdf9 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -187,6 +187,7 @@ static int idtoname_parse(struct cache_detail *, char *, int); | |||
187 | static struct ent *idtoname_lookup(struct ent *, int); | 187 | static struct ent *idtoname_lookup(struct ent *, int); |
188 | 188 | ||
189 | static struct cache_detail idtoname_cache = { | 189 | static struct cache_detail idtoname_cache = { |
190 | .owner = THIS_MODULE, | ||
190 | .hash_size = ENT_HASHMAX, | 191 | .hash_size = ENT_HASHMAX, |
191 | .hash_table = idtoname_table, | 192 | .hash_table = idtoname_table, |
192 | .name = "nfs4.idtoname", | 193 | .name = "nfs4.idtoname", |
@@ -320,6 +321,7 @@ static struct ent *nametoid_lookup(struct ent *, int); | |||
320 | static int nametoid_parse(struct cache_detail *, char *, int); | 321 | static int nametoid_parse(struct cache_detail *, char *, int); |
321 | 322 | ||
322 | static struct cache_detail nametoid_cache = { | 323 | static struct cache_detail nametoid_cache = { |
324 | .owner = THIS_MODULE, | ||
323 | .hash_size = ENT_HASHMAX, | 325 | .hash_size = ENT_HASHMAX, |
324 | .hash_table = nametoid_table, | 326 | .hash_table = nametoid_table, |
325 | .name = "nfs4.nametoid", | 327 | .name = "nfs4.nametoid", |
@@ -404,8 +406,10 @@ nfsd_idmap_init(void) | |||
404 | void | 406 | void |
405 | nfsd_idmap_shutdown(void) | 407 | nfsd_idmap_shutdown(void) |
406 | { | 408 | { |
407 | cache_unregister(&idtoname_cache); | 409 | if (cache_unregister(&idtoname_cache)) |
408 | cache_unregister(&nametoid_cache); | 410 | printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n"); |
411 | if (cache_unregister(&nametoid_cache)) | ||
412 | printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n"); | ||
409 | } | 413 | } |
410 | 414 | ||
411 | /* | 415 | /* |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 57ed50fe7f85..954cf893d50c 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -93,7 +93,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) | |||
93 | 93 | ||
94 | dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", | 94 | dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", |
95 | clname->len, clname->data); | 95 | clname->len, clname->data); |
96 | tfm = crypto_alloc_tfm("md5", 0); | 96 | tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP); |
97 | if (tfm == NULL) | 97 | if (tfm == NULL) |
98 | goto out; | 98 | goto out; |
99 | cksum.len = crypto_tfm_alg_digestsize(tfm); | 99 | cksum.len = crypto_tfm_alg_digestsize(tfm); |
@@ -114,8 +114,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) | |||
114 | kfree(cksum.data); | 114 | kfree(cksum.data); |
115 | status = nfs_ok; | 115 | status = nfs_ok; |
116 | out: | 116 | out: |
117 | if (tfm) | 117 | crypto_free_tfm(tfm); |
118 | crypto_free_tfm(tfm); | ||
119 | return status; | 118 | return status; |
120 | } | 119 | } |
121 | 120 | ||
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 9eecc9939dfe..e4fd6134244d 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog | |||
@@ -22,6 +22,76 @@ ToDo/Notes: | |||
22 | - Enable the code for setting the NT4 compatibility flag when we start | 22 | - Enable the code for setting the NT4 compatibility flag when we start |
23 | making NTFS 1.2 specific modifications. | 23 | making NTFS 1.2 specific modifications. |
24 | 24 | ||
25 | 2.1.24 - Lots of bug fixes and support more clean journal states. | ||
26 | |||
27 | - Support journals ($LogFile) which have been modified by chkdsk. This | ||
28 | means users can boot into Windows after we marked the volume dirty. | ||
29 | The Windows boot will run chkdsk and then reboot. The user can then | ||
30 | immediately boot into Linux rather than having to do a full Windows | ||
31 | boot first before rebooting into Linux and we will recognize such a | ||
32 | journal and empty it as it is clean by definition. | ||
33 | - Support journals ($LogFile) with only one restart page as well as | ||
34 | journals with two different restart pages. We sanity check both and | ||
35 | either use the only sane one or the more recent one of the two in the | ||
36 | case that both are valid. | ||
37 | - Modify fs/ntfs/malloc.h::ntfs_malloc_nofs() to do the kmalloc() based | ||
38 | allocations with __GFP_HIGHMEM, analogous to how the vmalloc() based | ||
39 | allocations are done. | ||
40 | - Add fs/ntfs/malloc.h::ntfs_malloc_nofs_nofail() which is analogous to | ||
41 | ntfs_malloc_nofs() but it performs allocations with __GFP_NOFAIL and | ||
42 | hence cannot fail. | ||
43 | - Use ntfs_malloc_nofs_nofail() in the two critical regions in | ||
44 | fs/ntfs/runlist.c::ntfs_runlists_merge(). This means we no longer | ||
45 | need to panic() if the allocation fails as it now cannot fail. | ||
46 | - Fix two nasty runlist merging bugs that had gone unnoticed so far. | ||
47 | Thanks to Stefano Picerno for the bug report. | ||
48 | - Remove two bogus BUG_ON()s from fs/ntfs/mft.c. | ||
49 | - Fix handling of valid but empty mapping pairs array in | ||
50 | fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress(). | ||
51 | - Report unrepresentable inodes during ntfs_readdir() as KERN_WARNING | ||
52 | messages and include the inode number. Thanks to Yura Pakhuchiy for | ||
53 | pointing this out. | ||
54 | - Change ntfs_rl_truncate_nolock() to throw away the runlist if the new | ||
55 | length is zero. | ||
56 | - Add runlist.[hc]::ntfs_rl_punch_nolock() which punches a caller | ||
57 | specified hole into a runlist. | ||
58 | - Fix a bug in fs/ntfs/index.c::ntfs_index_lookup(). When the returned | ||
59 | index entry is in the index root, we forgot to set the @ir pointer in | ||
60 | the index context. Thanks to Yura Pakhuchiy for finding this bug. | ||
61 | - Remove bogus setting of PageError in ntfs_read_compressed_block(). | ||
62 | - Add fs/ntfs/attrib.[hc]::ntfs_resident_attr_value_resize(). | ||
63 | - Fix a bug in ntfs_map_runlist_nolock() where we forgot to protect | ||
64 | access to the allocated size in the ntfs inode with the size lock. | ||
65 | - Fix ntfs_attr_vcn_to_lcn_nolock() and ntfs_attr_find_vcn_nolock() to | ||
66 | return LCN_ENOENT when there is no runlist and the allocated size is | ||
67 | zero. | ||
68 | - Fix load_attribute_list() to handle the case of a NULL runlist. | ||
69 | - Fix handling of sparse attributes in ntfs_attr_make_non_resident(). | ||
70 | - Add BUG() checks to ntfs_attr_make_non_resident() and ntfs_attr_set() | ||
71 | to ensure that these functions are never called for compressed or | ||
72 | encrypted attributes. | ||
73 | - Fix cluster (de)allocators to work when the runlist is NULL and more | ||
74 | importantly to take a locked runlist rather than them locking it | ||
75 | which leads to lock reversal. | ||
76 | - Truncate {a,c,m}time to the ntfs supported time granularity when | ||
77 | updating the times in the inode in ntfs_setattr(). | ||
78 | - Fixup handling of sparse, compressed, and encrypted attributes in | ||
79 | fs/ntfs/inode.c::ntfs_read_locked_{,attr_,index_}inode(), | ||
80 | fs/ntfs/aops.c::ntfs_{read,write}page(). | ||
81 | - Make ntfs_write_block() not instantiate sparse blocks if they contain | ||
82 | only zeroes. | ||
83 | - Optimize fs/ntfs/aops.c::ntfs_write_block() by extending the page | ||
84 | lock protection over the buffer submission for i/o which allows the | ||
85 | removal of the get_bh()/put_bh() pairs for each buffer. | ||
86 | - Fix fs/ntfs/aops.c::ntfs_{read,write}_block() to handle the case | ||
87 | where a concurrent truncate has truncated the runlist under our feet. | ||
88 | - Fix page_has_buffers()/page_buffers() handling in fs/ntfs/aops.c. | ||
89 | - In fs/ntfs/aops.c::ntfs_end_buffer_async_read(), use a bit spin lock | ||
90 | in the first buffer head instead of a driver global spin lock to | ||
91 | improve scalability. | ||
92 | - Minor fix to error handling and error message display in | ||
93 | fs/ntfs/aops.c::ntfs_prepare_nonresident_write(). | ||
94 | |||
25 | 2.1.23 - Implement extension of resident files and make writing safe as well as | 95 | 2.1.23 - Implement extension of resident files and make writing safe as well as |
26 | many bug fixes, cleanups, and enhancements... | 96 | many bug fixes, cleanups, and enhancements... |
27 | 97 | ||
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index f083f27d8b69..894b2b876d35 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile | |||
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ | |||
6 | index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ | 6 | index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ |
7 | unistr.o upcase.o | 7 | unistr.o upcase.o |
8 | 8 | ||
9 | EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\" | 9 | EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\" |
10 | 10 | ||
11 | ifeq ($(CONFIG_NTFS_DEBUG),y) | 11 | ifeq ($(CONFIG_NTFS_DEBUG),y) |
12 | EXTRA_CFLAGS += -DDEBUG | 12 | EXTRA_CFLAGS += -DDEBUG |
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 78adad7a988d..b6cc8cf24626 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/swap.h> | 27 | #include <linux/swap.h> |
28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
29 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
30 | #include <linux/bit_spinlock.h> | ||
30 | 31 | ||
31 | #include "aops.h" | 32 | #include "aops.h" |
32 | #include "attrib.h" | 33 | #include "attrib.h" |
@@ -55,9 +56,8 @@ | |||
55 | */ | 56 | */ |
56 | static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | 57 | static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) |
57 | { | 58 | { |
58 | static DEFINE_SPINLOCK(page_uptodate_lock); | ||
59 | unsigned long flags; | 59 | unsigned long flags; |
60 | struct buffer_head *tmp; | 60 | struct buffer_head *first, *tmp; |
61 | struct page *page; | 61 | struct page *page; |
62 | ntfs_inode *ni; | 62 | ntfs_inode *ni; |
63 | int page_uptodate = 1; | 63 | int page_uptodate = 1; |
@@ -89,11 +89,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |||
89 | } | 89 | } |
90 | } else { | 90 | } else { |
91 | clear_buffer_uptodate(bh); | 91 | clear_buffer_uptodate(bh); |
92 | SetPageError(page); | ||
92 | ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", | 93 | ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", |
93 | (unsigned long long)bh->b_blocknr); | 94 | (unsigned long long)bh->b_blocknr); |
94 | SetPageError(page); | ||
95 | } | 95 | } |
96 | spin_lock_irqsave(&page_uptodate_lock, flags); | 96 | first = page_buffers(page); |
97 | local_irq_save(flags); | ||
98 | bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | ||
97 | clear_buffer_async_read(bh); | 99 | clear_buffer_async_read(bh); |
98 | unlock_buffer(bh); | 100 | unlock_buffer(bh); |
99 | tmp = bh; | 101 | tmp = bh; |
@@ -108,7 +110,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |||
108 | } | 110 | } |
109 | tmp = tmp->b_this_page; | 111 | tmp = tmp->b_this_page; |
110 | } while (tmp != bh); | 112 | } while (tmp != bh); |
111 | spin_unlock_irqrestore(&page_uptodate_lock, flags); | 113 | bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); |
114 | local_irq_restore(flags); | ||
112 | /* | 115 | /* |
113 | * If none of the buffers had errors then we can set the page uptodate, | 116 | * If none of the buffers had errors then we can set the page uptodate, |
114 | * but we first have to perform the post read mst fixups, if the | 117 | * but we first have to perform the post read mst fixups, if the |
@@ -141,7 +144,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |||
141 | unlock_page(page); | 144 | unlock_page(page); |
142 | return; | 145 | return; |
143 | still_busy: | 146 | still_busy: |
144 | spin_unlock_irqrestore(&page_uptodate_lock, flags); | 147 | bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); |
148 | local_irq_restore(flags); | ||
145 | return; | 149 | return; |
146 | } | 150 | } |
147 | 151 | ||
@@ -185,13 +189,15 @@ static int ntfs_read_block(struct page *page) | |||
185 | blocksize_bits = VFS_I(ni)->i_blkbits; | 189 | blocksize_bits = VFS_I(ni)->i_blkbits; |
186 | blocksize = 1 << blocksize_bits; | 190 | blocksize = 1 << blocksize_bits; |
187 | 191 | ||
188 | if (!page_has_buffers(page)) | 192 | if (!page_has_buffers(page)) { |
189 | create_empty_buffers(page, blocksize, 0); | 193 | create_empty_buffers(page, blocksize, 0); |
190 | bh = head = page_buffers(page); | 194 | if (unlikely(!page_has_buffers(page))) { |
191 | if (unlikely(!bh)) { | 195 | unlock_page(page); |
192 | unlock_page(page); | 196 | return -ENOMEM; |
193 | return -ENOMEM; | 197 | } |
194 | } | 198 | } |
199 | bh = head = page_buffers(page); | ||
200 | BUG_ON(!bh); | ||
195 | 201 | ||
196 | iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); | 202 | iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); |
197 | read_lock_irqsave(&ni->size_lock, flags); | 203 | read_lock_irqsave(&ni->size_lock, flags); |
@@ -204,6 +210,7 @@ static int ntfs_read_block(struct page *page) | |||
204 | nr = i = 0; | 210 | nr = i = 0; |
205 | do { | 211 | do { |
206 | u8 *kaddr; | 212 | u8 *kaddr; |
213 | int err; | ||
207 | 214 | ||
208 | if (unlikely(buffer_uptodate(bh))) | 215 | if (unlikely(buffer_uptodate(bh))) |
209 | continue; | 216 | continue; |
@@ -211,6 +218,7 @@ static int ntfs_read_block(struct page *page) | |||
211 | arr[nr++] = bh; | 218 | arr[nr++] = bh; |
212 | continue; | 219 | continue; |
213 | } | 220 | } |
221 | err = 0; | ||
214 | bh->b_bdev = vol->sb->s_bdev; | 222 | bh->b_bdev = vol->sb->s_bdev; |
215 | /* Is the block within the allowed limits? */ | 223 | /* Is the block within the allowed limits? */ |
216 | if (iblock < lblock) { | 224 | if (iblock < lblock) { |
@@ -252,7 +260,6 @@ lock_retry_remap: | |||
252 | goto handle_hole; | 260 | goto handle_hole; |
253 | /* If first try and runlist unmapped, map and retry. */ | 261 | /* If first try and runlist unmapped, map and retry. */ |
254 | if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { | 262 | if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { |
255 | int err; | ||
256 | is_retry = TRUE; | 263 | is_retry = TRUE; |
257 | /* | 264 | /* |
258 | * Attempt to map runlist, dropping lock for | 265 | * Attempt to map runlist, dropping lock for |
@@ -263,20 +270,30 @@ lock_retry_remap: | |||
263 | if (likely(!err)) | 270 | if (likely(!err)) |
264 | goto lock_retry_remap; | 271 | goto lock_retry_remap; |
265 | rl = NULL; | 272 | rl = NULL; |
266 | lcn = err; | ||
267 | } else if (!rl) | 273 | } else if (!rl) |
268 | up_read(&ni->runlist.lock); | 274 | up_read(&ni->runlist.lock); |
275 | /* | ||
276 | * If buffer is outside the runlist, treat it as a | ||
277 | * hole. This can happen due to concurrent truncate | ||
278 | * for example. | ||
279 | */ | ||
280 | if (err == -ENOENT || lcn == LCN_ENOENT) { | ||
281 | err = 0; | ||
282 | goto handle_hole; | ||
283 | } | ||
269 | /* Hard error, zero out region. */ | 284 | /* Hard error, zero out region. */ |
285 | if (!err) | ||
286 | err = -EIO; | ||
270 | bh->b_blocknr = -1; | 287 | bh->b_blocknr = -1; |
271 | SetPageError(page); | 288 | SetPageError(page); |
272 | ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " | 289 | ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " |
273 | "attribute type 0x%x, vcn 0x%llx, " | 290 | "attribute type 0x%x, vcn 0x%llx, " |
274 | "offset 0x%x because its location on " | 291 | "offset 0x%x because its location on " |
275 | "disk could not be determined%s " | 292 | "disk could not be determined%s " |
276 | "(error code %lli).", ni->mft_no, | 293 | "(error code %i).", ni->mft_no, |
277 | ni->type, (unsigned long long)vcn, | 294 | ni->type, (unsigned long long)vcn, |
278 | vcn_ofs, is_retry ? " even after " | 295 | vcn_ofs, is_retry ? " even after " |
279 | "retrying" : "", (long long)lcn); | 296 | "retrying" : "", err); |
280 | } | 297 | } |
281 | /* | 298 | /* |
282 | * Either iblock was outside lblock limits or | 299 | * Either iblock was outside lblock limits or |
@@ -289,9 +306,10 @@ handle_hole: | |||
289 | handle_zblock: | 306 | handle_zblock: |
290 | kaddr = kmap_atomic(page, KM_USER0); | 307 | kaddr = kmap_atomic(page, KM_USER0); |
291 | memset(kaddr + i * blocksize, 0, blocksize); | 308 | memset(kaddr + i * blocksize, 0, blocksize); |
292 | flush_dcache_page(page); | ||
293 | kunmap_atomic(kaddr, KM_USER0); | 309 | kunmap_atomic(kaddr, KM_USER0); |
294 | set_buffer_uptodate(bh); | 310 | flush_dcache_page(page); |
311 | if (likely(!err)) | ||
312 | set_buffer_uptodate(bh); | ||
295 | } while (i++, iblock++, (bh = bh->b_this_page) != head); | 313 | } while (i++, iblock++, (bh = bh->b_this_page) != head); |
296 | 314 | ||
297 | /* Release the lock if we took it. */ | 315 | /* Release the lock if we took it. */ |
@@ -367,31 +385,38 @@ retry_readpage: | |||
367 | return 0; | 385 | return 0; |
368 | } | 386 | } |
369 | ni = NTFS_I(page->mapping->host); | 387 | ni = NTFS_I(page->mapping->host); |
370 | 388 | /* | |
389 | * Only $DATA attributes can be encrypted and only unnamed $DATA | ||
390 | * attributes can be compressed. Index root can have the flags set but | ||
391 | * this means to create compressed/encrypted files, not that the | ||
392 | * attribute is compressed/encrypted. | ||
393 | */ | ||
394 | if (ni->type != AT_INDEX_ROOT) { | ||
395 | /* If attribute is encrypted, deny access, just like NT4. */ | ||
396 | if (NInoEncrypted(ni)) { | ||
397 | BUG_ON(ni->type != AT_DATA); | ||
398 | err = -EACCES; | ||
399 | goto err_out; | ||
400 | } | ||
401 | /* Compressed data streams are handled in compress.c. */ | ||
402 | if (NInoNonResident(ni) && NInoCompressed(ni)) { | ||
403 | BUG_ON(ni->type != AT_DATA); | ||
404 | BUG_ON(ni->name_len); | ||
405 | return ntfs_read_compressed_block(page); | ||
406 | } | ||
407 | } | ||
371 | /* NInoNonResident() == NInoIndexAllocPresent() */ | 408 | /* NInoNonResident() == NInoIndexAllocPresent() */ |
372 | if (NInoNonResident(ni)) { | 409 | if (NInoNonResident(ni)) { |
373 | /* | 410 | /* Normal, non-resident data stream. */ |
374 | * Only unnamed $DATA attributes can be compressed or | ||
375 | * encrypted. | ||
376 | */ | ||
377 | if (ni->type == AT_DATA && !ni->name_len) { | ||
378 | /* If file is encrypted, deny access, just like NT4. */ | ||
379 | if (NInoEncrypted(ni)) { | ||
380 | err = -EACCES; | ||
381 | goto err_out; | ||
382 | } | ||
383 | /* Compressed data streams are handled in compress.c. */ | ||
384 | if (NInoCompressed(ni)) | ||
385 | return ntfs_read_compressed_block(page); | ||
386 | } | ||
387 | /* Normal data stream. */ | ||
388 | return ntfs_read_block(page); | 411 | return ntfs_read_block(page); |
389 | } | 412 | } |
390 | /* | 413 | /* |
391 | * Attribute is resident, implying it is not compressed or encrypted. | 414 | * Attribute is resident, implying it is not compressed or encrypted. |
392 | * This also means the attribute is smaller than an mft record and | 415 | * This also means the attribute is smaller than an mft record and |
393 | * hence smaller than a page, so can simply zero out any pages with | 416 | * hence smaller than a page, so can simply zero out any pages with |
394 | * index above 0. | 417 | * index above 0. Note the attribute can actually be marked compressed |
418 | * but if it is resident the actual data is not compressed so we are | ||
419 | * ok to ignore the compressed flag here. | ||
395 | */ | 420 | */ |
396 | if (unlikely(page->index > 0)) { | 421 | if (unlikely(page->index > 0)) { |
397 | kaddr = kmap_atomic(page, KM_USER0); | 422 | kaddr = kmap_atomic(page, KM_USER0); |
@@ -511,19 +536,21 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc) | |||
511 | BUG_ON(!PageUptodate(page)); | 536 | BUG_ON(!PageUptodate(page)); |
512 | create_empty_buffers(page, blocksize, | 537 | create_empty_buffers(page, blocksize, |
513 | (1 << BH_Uptodate) | (1 << BH_Dirty)); | 538 | (1 << BH_Uptodate) | (1 << BH_Dirty)); |
539 | if (unlikely(!page_has_buffers(page))) { | ||
540 | ntfs_warning(vol->sb, "Error allocating page " | ||
541 | "buffers. Redirtying page so we try " | ||
542 | "again later."); | ||
543 | /* | ||
544 | * Put the page back on mapping->dirty_pages, but leave | ||
545 | * its buffers' dirty state as-is. | ||
546 | */ | ||
547 | redirty_page_for_writepage(wbc, page); | ||
548 | unlock_page(page); | ||
549 | return 0; | ||
550 | } | ||
514 | } | 551 | } |
515 | bh = head = page_buffers(page); | 552 | bh = head = page_buffers(page); |
516 | if (unlikely(!bh)) { | 553 | BUG_ON(!bh); |
517 | ntfs_warning(vol->sb, "Error allocating page buffers. " | ||
518 | "Redirtying page so we try again later."); | ||
519 | /* | ||
520 | * Put the page back on mapping->dirty_pages, but leave its | ||
521 | * buffer's dirty state as-is. | ||
522 | */ | ||
523 | redirty_page_for_writepage(wbc, page); | ||
524 | unlock_page(page); | ||
525 | return 0; | ||
526 | } | ||
527 | 554 | ||
528 | /* NOTE: Different naming scheme to ntfs_read_block()! */ | 555 | /* NOTE: Different naming scheme to ntfs_read_block()! */ |
529 | 556 | ||
@@ -670,6 +697,27 @@ lock_retry_remap: | |||
670 | } | 697 | } |
671 | /* It is a hole, need to instantiate it. */ | 698 | /* It is a hole, need to instantiate it. */ |
672 | if (lcn == LCN_HOLE) { | 699 | if (lcn == LCN_HOLE) { |
700 | u8 *kaddr; | ||
701 | unsigned long *bpos, *bend; | ||
702 | |||
703 | /* Check if the buffer is zero. */ | ||
704 | kaddr = kmap_atomic(page, KM_USER0); | ||
705 | bpos = (unsigned long *)(kaddr + bh_offset(bh)); | ||
706 | bend = (unsigned long *)((u8*)bpos + blocksize); | ||
707 | do { | ||
708 | if (unlikely(*bpos)) | ||
709 | break; | ||
710 | } while (likely(++bpos < bend)); | ||
711 | kunmap_atomic(kaddr, KM_USER0); | ||
712 | if (bpos == bend) { | ||
713 | /* | ||
714 | * Buffer is zero and sparse, no need to write | ||
715 | * it. | ||
716 | */ | ||
717 | bh->b_blocknr = -1; | ||
718 | clear_buffer_dirty(bh); | ||
719 | continue; | ||
720 | } | ||
673 | // TODO: Instantiate the hole. | 721 | // TODO: Instantiate the hole. |
674 | // clear_buffer_new(bh); | 722 | // clear_buffer_new(bh); |
675 | // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); | 723 | // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); |
@@ -690,20 +738,37 @@ lock_retry_remap: | |||
690 | if (likely(!err)) | 738 | if (likely(!err)) |
691 | goto lock_retry_remap; | 739 | goto lock_retry_remap; |
692 | rl = NULL; | 740 | rl = NULL; |
693 | lcn = err; | ||
694 | } else if (!rl) | 741 | } else if (!rl) |
695 | up_read(&ni->runlist.lock); | 742 | up_read(&ni->runlist.lock); |
743 | /* | ||
744 | * If buffer is outside the runlist, truncate has cut it out | ||
745 | * of the runlist. Just clean and clear the buffer and set it | ||
746 | * uptodate so it can get discarded by the VM. | ||
747 | */ | ||
748 | if (err == -ENOENT || lcn == LCN_ENOENT) { | ||
749 | u8 *kaddr; | ||
750 | |||
751 | bh->b_blocknr = -1; | ||
752 | clear_buffer_dirty(bh); | ||
753 | kaddr = kmap_atomic(page, KM_USER0); | ||
754 | memset(kaddr + bh_offset(bh), 0, blocksize); | ||
755 | kunmap_atomic(kaddr, KM_USER0); | ||
756 | flush_dcache_page(page); | ||
757 | set_buffer_uptodate(bh); | ||
758 | err = 0; | ||
759 | continue; | ||
760 | } | ||
696 | /* Failed to map the buffer, even after retrying. */ | 761 | /* Failed to map the buffer, even after retrying. */ |
762 | if (!err) | ||
763 | err = -EIO; | ||
697 | bh->b_blocknr = -1; | 764 | bh->b_blocknr = -1; |
698 | ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " | 765 | ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " |
699 | "attribute type 0x%x, vcn 0x%llx, offset 0x%x " | 766 | "attribute type 0x%x, vcn 0x%llx, offset 0x%x " |
700 | "because its location on disk could not be " | 767 | "because its location on disk could not be " |
701 | "determined%s (error code %lli).", ni->mft_no, | 768 | "determined%s (error code %i).", ni->mft_no, |
702 | ni->type, (unsigned long long)vcn, | 769 | ni->type, (unsigned long long)vcn, |
703 | vcn_ofs, is_retry ? " even after " | 770 | vcn_ofs, is_retry ? " even after " |
704 | "retrying" : "", (long long)lcn); | 771 | "retrying" : "", err); |
705 | if (!err) | ||
706 | err = -EIO; | ||
707 | break; | 772 | break; |
708 | } while (block++, (bh = bh->b_this_page) != head); | 773 | } while (block++, (bh = bh->b_this_page) != head); |
709 | 774 | ||
@@ -714,7 +779,7 @@ lock_retry_remap: | |||
714 | /* For the error case, need to reset bh to the beginning. */ | 779 | /* For the error case, need to reset bh to the beginning. */ |
715 | bh = head; | 780 | bh = head; |
716 | 781 | ||
717 | /* Just an optimization, so ->readpage() isn't called later. */ | 782 | /* Just an optimization, so ->readpage() is not called later. */ |
718 | if (unlikely(!PageUptodate(page))) { | 783 | if (unlikely(!PageUptodate(page))) { |
719 | int uptodate = 1; | 784 | int uptodate = 1; |
720 | do { | 785 | do { |
@@ -730,7 +795,6 @@ lock_retry_remap: | |||
730 | 795 | ||
731 | /* Setup all mapped, dirty buffers for async write i/o. */ | 796 | /* Setup all mapped, dirty buffers for async write i/o. */ |
732 | do { | 797 | do { |
733 | get_bh(bh); | ||
734 | if (buffer_mapped(bh) && buffer_dirty(bh)) { | 798 | if (buffer_mapped(bh) && buffer_dirty(bh)) { |
735 | lock_buffer(bh); | 799 | lock_buffer(bh); |
736 | if (test_clear_buffer_dirty(bh)) { | 800 | if (test_clear_buffer_dirty(bh)) { |
@@ -768,14 +832,8 @@ lock_retry_remap: | |||
768 | 832 | ||
769 | BUG_ON(PageWriteback(page)); | 833 | BUG_ON(PageWriteback(page)); |
770 | set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ | 834 | set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ |
771 | unlock_page(page); | ||
772 | 835 | ||
773 | /* | 836 | /* Submit the prepared buffers for i/o. */ |
774 | * Submit the prepared buffers for i/o. Note the page is unlocked, | ||
775 | * and the async write i/o completion handler can end_page_writeback() | ||
776 | * at any time after the *first* submit_bh(). So the buffers can then | ||
777 | * disappear... | ||
778 | */ | ||
779 | need_end_writeback = TRUE; | 837 | need_end_writeback = TRUE; |
780 | do { | 838 | do { |
781 | struct buffer_head *next = bh->b_this_page; | 839 | struct buffer_head *next = bh->b_this_page; |
@@ -783,9 +841,9 @@ lock_retry_remap: | |||
783 | submit_bh(WRITE, bh); | 841 | submit_bh(WRITE, bh); |
784 | need_end_writeback = FALSE; | 842 | need_end_writeback = FALSE; |
785 | } | 843 | } |
786 | put_bh(bh); | ||
787 | bh = next; | 844 | bh = next; |
788 | } while (bh != head); | 845 | } while (bh != head); |
846 | unlock_page(page); | ||
789 | 847 | ||
790 | /* If no i/o was started, need to end_page_writeback(). */ | 848 | /* If no i/o was started, need to end_page_writeback(). */ |
791 | if (unlikely(need_end_writeback)) | 849 | if (unlikely(need_end_writeback)) |
@@ -860,7 +918,6 @@ static int ntfs_write_mst_block(struct page *page, | |||
860 | sync = (wbc->sync_mode == WB_SYNC_ALL); | 918 | sync = (wbc->sync_mode == WB_SYNC_ALL); |
861 | 919 | ||
862 | /* Make sure we have mapped buffers. */ | 920 | /* Make sure we have mapped buffers. */ |
863 | BUG_ON(!page_has_buffers(page)); | ||
864 | bh = head = page_buffers(page); | 921 | bh = head = page_buffers(page); |
865 | BUG_ON(!bh); | 922 | BUG_ON(!bh); |
866 | 923 | ||
@@ -1280,38 +1337,42 @@ retry_writepage: | |||
1280 | ntfs_debug("Write outside i_size - truncated?"); | 1337 | ntfs_debug("Write outside i_size - truncated?"); |
1281 | return 0; | 1338 | return 0; |
1282 | } | 1339 | } |
1340 | /* | ||
1341 | * Only $DATA attributes can be encrypted and only unnamed $DATA | ||
1342 | * attributes can be compressed. Index root can have the flags set but | ||
1343 | * this means to create compressed/encrypted files, not that the | ||
1344 | * attribute is compressed/encrypted. | ||
1345 | */ | ||
1346 | if (ni->type != AT_INDEX_ROOT) { | ||
1347 | /* If file is encrypted, deny access, just like NT4. */ | ||
1348 | if (NInoEncrypted(ni)) { | ||
1349 | unlock_page(page); | ||
1350 | BUG_ON(ni->type != AT_DATA); | ||
1351 | ntfs_debug("Denying write access to encrypted " | ||
1352 | "file."); | ||
1353 | return -EACCES; | ||
1354 | } | ||
1355 | /* Compressed data streams are handled in compress.c. */ | ||
1356 | if (NInoNonResident(ni) && NInoCompressed(ni)) { | ||
1357 | BUG_ON(ni->type != AT_DATA); | ||
1358 | BUG_ON(ni->name_len); | ||
1359 | // TODO: Implement and replace this with | ||
1360 | // return ntfs_write_compressed_block(page); | ||
1361 | unlock_page(page); | ||
1362 | ntfs_error(vi->i_sb, "Writing to compressed files is " | ||
1363 | "not supported yet. Sorry."); | ||
1364 | return -EOPNOTSUPP; | ||
1365 | } | ||
1366 | // TODO: Implement and remove this check. | ||
1367 | if (NInoNonResident(ni) && NInoSparse(ni)) { | ||
1368 | unlock_page(page); | ||
1369 | ntfs_error(vi->i_sb, "Writing to sparse files is not " | ||
1370 | "supported yet. Sorry."); | ||
1371 | return -EOPNOTSUPP; | ||
1372 | } | ||
1373 | } | ||
1283 | /* NInoNonResident() == NInoIndexAllocPresent() */ | 1374 | /* NInoNonResident() == NInoIndexAllocPresent() */ |
1284 | if (NInoNonResident(ni)) { | 1375 | if (NInoNonResident(ni)) { |
1285 | /* | ||
1286 | * Only unnamed $DATA attributes can be compressed, encrypted, | ||
1287 | * and/or sparse. | ||
1288 | */ | ||
1289 | if (ni->type == AT_DATA && !ni->name_len) { | ||
1290 | /* If file is encrypted, deny access, just like NT4. */ | ||
1291 | if (NInoEncrypted(ni)) { | ||
1292 | unlock_page(page); | ||
1293 | ntfs_debug("Denying write access to encrypted " | ||
1294 | "file."); | ||
1295 | return -EACCES; | ||
1296 | } | ||
1297 | /* Compressed data streams are handled in compress.c. */ | ||
1298 | if (NInoCompressed(ni)) { | ||
1299 | // TODO: Implement and replace this check with | ||
1300 | // return ntfs_write_compressed_block(page); | ||
1301 | unlock_page(page); | ||
1302 | ntfs_error(vi->i_sb, "Writing to compressed " | ||
1303 | "files is not supported yet. " | ||
1304 | "Sorry."); | ||
1305 | return -EOPNOTSUPP; | ||
1306 | } | ||
1307 | // TODO: Implement and remove this check. | ||
1308 | if (NInoSparse(ni)) { | ||
1309 | unlock_page(page); | ||
1310 | ntfs_error(vi->i_sb, "Writing to sparse files " | ||
1311 | "is not supported yet. Sorry."); | ||
1312 | return -EOPNOTSUPP; | ||
1313 | } | ||
1314 | } | ||
1315 | /* We have to zero every time due to mmap-at-end-of-file. */ | 1376 | /* We have to zero every time due to mmap-at-end-of-file. */ |
1316 | if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { | 1377 | if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { |
1317 | /* The page straddles i_size. */ | 1378 | /* The page straddles i_size. */ |
@@ -1324,14 +1385,16 @@ retry_writepage: | |||
1324 | /* Handle mst protected attributes. */ | 1385 | /* Handle mst protected attributes. */ |
1325 | if (NInoMstProtected(ni)) | 1386 | if (NInoMstProtected(ni)) |
1326 | return ntfs_write_mst_block(page, wbc); | 1387 | return ntfs_write_mst_block(page, wbc); |
1327 | /* Normal data stream. */ | 1388 | /* Normal, non-resident data stream. */ |
1328 | return ntfs_write_block(page, wbc); | 1389 | return ntfs_write_block(page, wbc); |
1329 | } | 1390 | } |
1330 | /* | 1391 | /* |
1331 | * Attribute is resident, implying it is not compressed, encrypted, | 1392 | * Attribute is resident, implying it is not compressed, encrypted, or |
1332 | * sparse, or mst protected. This also means the attribute is smaller | 1393 | * mst protected. This also means the attribute is smaller than an mft |
1333 | * than an mft record and hence smaller than a page, so can simply | 1394 | * record and hence smaller than a page, so can simply return error on |
1334 | * return error on any pages with index above 0. | 1395 | * any pages with index above 0. Note the attribute can actually be |
1396 | * marked compressed but if it is resident the actual data is not | ||
1397 | * compressed so we are ok to ignore the compressed flag here. | ||
1335 | */ | 1398 | */ |
1336 | BUG_ON(page_has_buffers(page)); | 1399 | BUG_ON(page_has_buffers(page)); |
1337 | BUG_ON(!PageUptodate(page)); | 1400 | BUG_ON(!PageUptodate(page)); |
@@ -1380,30 +1443,14 @@ retry_writepage: | |||
1380 | BUG_ON(PageWriteback(page)); | 1443 | BUG_ON(PageWriteback(page)); |
1381 | set_page_writeback(page); | 1444 | set_page_writeback(page); |
1382 | unlock_page(page); | 1445 | unlock_page(page); |
1383 | |||
1384 | /* | 1446 | /* |
1385 | * Here, we don't need to zero the out of bounds area everytime because | 1447 | * Here, we do not need to zero the out of bounds area everytime |
1386 | * the below memcpy() already takes care of the mmap-at-end-of-file | 1448 | * because the below memcpy() already takes care of the |
1387 | * requirements. If the file is converted to a non-resident one, then | 1449 | * mmap-at-end-of-file requirements. If the file is converted to a |
1388 | * the code path use is switched to the non-resident one where the | 1450 | * non-resident one, then the code path use is switched to the |
1389 | * zeroing happens on each ntfs_writepage() invocation. | 1451 | * non-resident one where the zeroing happens on each ntfs_writepage() |
1390 | * | 1452 | * invocation. |
1391 | * The above also applies nicely when i_size is decreased. | ||
1392 | * | ||
1393 | * When i_size is increased, the memory between the old and new i_size | ||
1394 | * _must_ be zeroed (or overwritten with new data). Otherwise we will | ||
1395 | * expose data to userspace/disk which should never have been exposed. | ||
1396 | * | ||
1397 | * FIXME: Ensure that i_size increases do the zeroing/overwriting and | ||
1398 | * if we cannot guarantee that, then enable the zeroing below. If the | ||
1399 | * zeroing below is enabled, we MUST move the unlock_page() from above | ||
1400 | * to after the kunmap_atomic(), i.e. just before the | ||
1401 | * end_page_writeback(). | ||
1402 | * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size | ||
1403 | * increases for resident attributes so those are ok. | ||
1404 | * TODO: ntfs_truncate(), others? | ||
1405 | */ | 1453 | */ |
1406 | |||
1407 | attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); | 1454 | attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); |
1408 | i_size = i_size_read(vi); | 1455 | i_size = i_size_read(vi); |
1409 | if (unlikely(attr_len > i_size)) { | 1456 | if (unlikely(attr_len > i_size)) { |
@@ -1681,27 +1728,25 @@ lock_retry_remap: | |||
1681 | if (likely(!err)) | 1728 | if (likely(!err)) |
1682 | goto lock_retry_remap; | 1729 | goto lock_retry_remap; |
1683 | rl = NULL; | 1730 | rl = NULL; |
1684 | lcn = err; | ||
1685 | } else if (!rl) | 1731 | } else if (!rl) |
1686 | up_read(&ni->runlist.lock); | 1732 | up_read(&ni->runlist.lock); |
1687 | /* | 1733 | /* |
1688 | * Failed to map the buffer, even after | 1734 | * Failed to map the buffer, even after |
1689 | * retrying. | 1735 | * retrying. |
1690 | */ | 1736 | */ |
1737 | if (!err) | ||
1738 | err = -EIO; | ||
1691 | bh->b_blocknr = -1; | 1739 | bh->b_blocknr = -1; |
1692 | ntfs_error(vol->sb, "Failed to write to inode " | 1740 | ntfs_error(vol->sb, "Failed to write to inode " |
1693 | "0x%lx, attribute type 0x%x, " | 1741 | "0x%lx, attribute type 0x%x, " |
1694 | "vcn 0x%llx, offset 0x%x " | 1742 | "vcn 0x%llx, offset 0x%x " |
1695 | "because its location on disk " | 1743 | "because its location on disk " |
1696 | "could not be determined%s " | 1744 | "could not be determined%s " |
1697 | "(error code %lli).", | 1745 | "(error code %i).", |
1698 | ni->mft_no, ni->type, | 1746 | ni->mft_no, ni->type, |
1699 | (unsigned long long)vcn, | 1747 | (unsigned long long)vcn, |
1700 | vcn_ofs, is_retry ? " even " | 1748 | vcn_ofs, is_retry ? " even " |
1701 | "after retrying" : "", | 1749 | "after retrying" : "", err); |
1702 | (long long)lcn); | ||
1703 | if (!err) | ||
1704 | err = -EIO; | ||
1705 | goto err_out; | 1750 | goto err_out; |
1706 | } | 1751 | } |
1707 | /* We now have a successful remap, i.e. lcn >= 0. */ | 1752 | /* We now have a successful remap, i.e. lcn >= 0. */ |
@@ -2357,6 +2402,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { | |||
2357 | buffers_to_free = bh; | 2402 | buffers_to_free = bh; |
2358 | } | 2403 | } |
2359 | bh = head = page_buffers(page); | 2404 | bh = head = page_buffers(page); |
2405 | BUG_ON(!bh); | ||
2360 | do { | 2406 | do { |
2361 | bh_ofs = bh_offset(bh); | 2407 | bh_ofs = bh_offset(bh); |
2362 | if (bh_ofs + bh_size <= ofs) | 2408 | if (bh_ofs + bh_size <= ofs) |
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index cd0f9e740b14..3f9a4ff42ee5 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c | |||
@@ -43,6 +43,9 @@ | |||
43 | * which is not an error as such. This is -ENOENT. It means that @vcn is out | 43 | * which is not an error as such. This is -ENOENT. It means that @vcn is out |
44 | * of bounds of the runlist. | 44 | * of bounds of the runlist. |
45 | * | 45 | * |
46 | * Note the runlist can be NULL after this function returns if @vcn is zero and | ||
47 | * the attribute has zero allocated size, i.e. there simply is no runlist. | ||
48 | * | ||
46 | * Locking: - The runlist must be locked for writing. | 49 | * Locking: - The runlist must be locked for writing. |
47 | * - This function modifies the runlist. | 50 | * - This function modifies the runlist. |
48 | */ | 51 | */ |
@@ -54,6 +57,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) | |||
54 | ATTR_RECORD *a; | 57 | ATTR_RECORD *a; |
55 | ntfs_attr_search_ctx *ctx; | 58 | ntfs_attr_search_ctx *ctx; |
56 | runlist_element *rl; | 59 | runlist_element *rl; |
60 | unsigned long flags; | ||
57 | int err = 0; | 61 | int err = 0; |
58 | 62 | ||
59 | ntfs_debug("Mapping runlist part containing vcn 0x%llx.", | 63 | ntfs_debug("Mapping runlist part containing vcn 0x%llx.", |
@@ -85,8 +89,11 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) | |||
85 | * ntfs_mapping_pairs_decompress() fails. | 89 | * ntfs_mapping_pairs_decompress() fails. |
86 | */ | 90 | */ |
87 | end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1; | 91 | end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1; |
88 | if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) | 92 | if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) { |
93 | read_lock_irqsave(&ni->size_lock, flags); | ||
89 | end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits; | 94 | end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits; |
95 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
96 | } | ||
90 | if (unlikely(vcn >= end_vcn)) { | 97 | if (unlikely(vcn >= end_vcn)) { |
91 | err = -ENOENT; | 98 | err = -ENOENT; |
92 | goto err_out; | 99 | goto err_out; |
@@ -165,6 +172,7 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, | |||
165 | const BOOL write_locked) | 172 | const BOOL write_locked) |
166 | { | 173 | { |
167 | LCN lcn; | 174 | LCN lcn; |
175 | unsigned long flags; | ||
168 | BOOL is_retry = FALSE; | 176 | BOOL is_retry = FALSE; |
169 | 177 | ||
170 | ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", | 178 | ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", |
@@ -173,6 +181,14 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, | |||
173 | BUG_ON(!ni); | 181 | BUG_ON(!ni); |
174 | BUG_ON(!NInoNonResident(ni)); | 182 | BUG_ON(!NInoNonResident(ni)); |
175 | BUG_ON(vcn < 0); | 183 | BUG_ON(vcn < 0); |
184 | if (!ni->runlist.rl) { | ||
185 | read_lock_irqsave(&ni->size_lock, flags); | ||
186 | if (!ni->allocated_size) { | ||
187 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
188 | return LCN_ENOENT; | ||
189 | } | ||
190 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
191 | } | ||
176 | retry_remap: | 192 | retry_remap: |
177 | /* Convert vcn to lcn. If that fails map the runlist and retry once. */ | 193 | /* Convert vcn to lcn. If that fails map the runlist and retry once. */ |
178 | lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn); | 194 | lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn); |
@@ -255,6 +271,7 @@ retry_remap: | |||
255 | runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, | 271 | runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, |
256 | const BOOL write_locked) | 272 | const BOOL write_locked) |
257 | { | 273 | { |
274 | unsigned long flags; | ||
258 | runlist_element *rl; | 275 | runlist_element *rl; |
259 | int err = 0; | 276 | int err = 0; |
260 | BOOL is_retry = FALSE; | 277 | BOOL is_retry = FALSE; |
@@ -265,6 +282,14 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, | |||
265 | BUG_ON(!ni); | 282 | BUG_ON(!ni); |
266 | BUG_ON(!NInoNonResident(ni)); | 283 | BUG_ON(!NInoNonResident(ni)); |
267 | BUG_ON(vcn < 0); | 284 | BUG_ON(vcn < 0); |
285 | if (!ni->runlist.rl) { | ||
286 | read_lock_irqsave(&ni->size_lock, flags); | ||
287 | if (!ni->allocated_size) { | ||
288 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
289 | return ERR_PTR(-ENOENT); | ||
290 | } | ||
291 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
292 | } | ||
268 | retry_remap: | 293 | retry_remap: |
269 | rl = ni->runlist.rl; | 294 | rl = ni->runlist.rl; |
270 | if (likely(rl && vcn >= rl[0].vcn)) { | 295 | if (likely(rl && vcn >= rl[0].vcn)) { |
@@ -528,6 +553,11 @@ int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start, | |||
528 | block_size_bits = sb->s_blocksize_bits; | 553 | block_size_bits = sb->s_blocksize_bits; |
529 | down_read(&runlist->lock); | 554 | down_read(&runlist->lock); |
530 | rl = runlist->rl; | 555 | rl = runlist->rl; |
556 | if (!rl) { | ||
557 | ntfs_error(sb, "Cannot read attribute list since runlist is " | ||
558 | "missing."); | ||
559 | goto err_out; | ||
560 | } | ||
531 | /* Read all clusters specified by the runlist one run at a time. */ | 561 | /* Read all clusters specified by the runlist one run at a time. */ |
532 | while (rl->length) { | 562 | while (rl->length) { |
533 | lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn); | 563 | lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn); |
@@ -1247,6 +1277,46 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size) | |||
1247 | } | 1277 | } |
1248 | 1278 | ||
1249 | /** | 1279 | /** |
1280 | * ntfs_resident_attr_value_resize - resize the value of a resident attribute | ||
1281 | * @m: mft record containing attribute record | ||
1282 | * @a: attribute record whose value to resize | ||
1283 | * @new_size: new size in bytes to which to resize the attribute value of @a | ||
1284 | * | ||
1285 | * Resize the value of the attribute @a in the mft record @m to @new_size bytes. | ||
1286 | * If the value is made bigger, the newly allocated space is cleared. | ||
1287 | * | ||
1288 | * Return 0 on success and -errno on error. The following error codes are | ||
1289 | * defined: | ||
1290 | * -ENOSPC - Not enough space in the mft record @m to perform the resize. | ||
1291 | * | ||
1292 | * Note: On error, no modifications have been performed whatsoever. | ||
1293 | * | ||
1294 | * Warning: If you make a record smaller without having copied all the data you | ||
1295 | * are interested in the data may be overwritten. | ||
1296 | */ | ||
1297 | int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, | ||
1298 | const u32 new_size) | ||
1299 | { | ||
1300 | u32 old_size; | ||
1301 | |||
1302 | /* Resize the resident part of the attribute record. */ | ||
1303 | if (ntfs_attr_record_resize(m, a, | ||
1304 | le16_to_cpu(a->data.resident.value_offset) + new_size)) | ||
1305 | return -ENOSPC; | ||
1306 | /* | ||
1307 | * The resize succeeded! If we made the attribute value bigger, clear | ||
1308 | * the area between the old size and @new_size. | ||
1309 | */ | ||
1310 | old_size = le32_to_cpu(a->data.resident.value_length); | ||
1311 | if (new_size > old_size) | ||
1312 | memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) + | ||
1313 | old_size, 0, new_size - old_size); | ||
1314 | /* Finally update the length of the attribute value. */ | ||
1315 | a->data.resident.value_length = cpu_to_le32(new_size); | ||
1316 | return 0; | ||
1317 | } | ||
1318 | |||
1319 | /** | ||
1250 | * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute | 1320 | * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute |
1251 | * @ni: ntfs inode describing the attribute to convert | 1321 | * @ni: ntfs inode describing the attribute to convert |
1252 | * | 1322 | * |
@@ -1302,6 +1372,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) | |||
1302 | return err; | 1372 | return err; |
1303 | } | 1373 | } |
1304 | /* | 1374 | /* |
1375 | * FIXME: Compressed and encrypted attributes are not supported when | ||
1376 | * writing and we should never have gotten here for them. | ||
1377 | */ | ||
1378 | BUG_ON(NInoCompressed(ni)); | ||
1379 | BUG_ON(NInoEncrypted(ni)); | ||
1380 | /* | ||
1305 | * The size needs to be aligned to a cluster boundary for allocation | 1381 | * The size needs to be aligned to a cluster boundary for allocation |
1306 | * purposes. | 1382 | * purposes. |
1307 | */ | 1383 | */ |
@@ -1377,10 +1453,15 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) | |||
1377 | BUG_ON(a->non_resident); | 1453 | BUG_ON(a->non_resident); |
1378 | /* | 1454 | /* |
1379 | * Calculate new offsets for the name and the mapping pairs array. | 1455 | * Calculate new offsets for the name and the mapping pairs array. |
1380 | * We assume the attribute is not compressed or sparse. | ||
1381 | */ | 1456 | */ |
1382 | name_ofs = (offsetof(ATTR_REC, | 1457 | if (NInoSparse(ni) || NInoCompressed(ni)) |
1383 | data.non_resident.compressed_size) + 7) & ~7; | 1458 | name_ofs = (offsetof(ATTR_REC, |
1459 | data.non_resident.compressed_size) + | ||
1460 | sizeof(a->data.non_resident.compressed_size) + | ||
1461 | 7) & ~7; | ||
1462 | else | ||
1463 | name_ofs = (offsetof(ATTR_REC, | ||
1464 | data.non_resident.compressed_size) + 7) & ~7; | ||
1384 | mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7; | 1465 | mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7; |
1385 | /* | 1466 | /* |
1386 | * Determine the size of the resident part of the now non-resident | 1467 | * Determine the size of the resident part of the now non-resident |
@@ -1419,24 +1500,23 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) | |||
1419 | memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset), | 1500 | memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset), |
1420 | a->name_length * sizeof(ntfschar)); | 1501 | a->name_length * sizeof(ntfschar)); |
1421 | a->name_offset = cpu_to_le16(name_ofs); | 1502 | a->name_offset = cpu_to_le16(name_ofs); |
1422 | /* | ||
1423 | * FIXME: For now just clear all of these as we do not support them | ||
1424 | * when writing. | ||
1425 | */ | ||
1426 | a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE | | ||
1427 | ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK)); | ||
1428 | /* Setup the fields specific to non-resident attributes. */ | 1503 | /* Setup the fields specific to non-resident attributes. */ |
1429 | a->data.non_resident.lowest_vcn = 0; | 1504 | a->data.non_resident.lowest_vcn = 0; |
1430 | a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >> | 1505 | a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >> |
1431 | vol->cluster_size_bits); | 1506 | vol->cluster_size_bits); |
1432 | a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs); | 1507 | a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs); |
1433 | a->data.non_resident.compression_unit = 0; | ||
1434 | memset(&a->data.non_resident.reserved, 0, | 1508 | memset(&a->data.non_resident.reserved, 0, |
1435 | sizeof(a->data.non_resident.reserved)); | 1509 | sizeof(a->data.non_resident.reserved)); |
1436 | a->data.non_resident.allocated_size = cpu_to_sle64(new_size); | 1510 | a->data.non_resident.allocated_size = cpu_to_sle64(new_size); |
1437 | a->data.non_resident.data_size = | 1511 | a->data.non_resident.data_size = |
1438 | a->data.non_resident.initialized_size = | 1512 | a->data.non_resident.initialized_size = |
1439 | cpu_to_sle64(attr_size); | 1513 | cpu_to_sle64(attr_size); |
1514 | if (NInoSparse(ni) || NInoCompressed(ni)) { | ||
1515 | a->data.non_resident.compression_unit = 4; | ||
1516 | a->data.non_resident.compressed_size = | ||
1517 | a->data.non_resident.allocated_size; | ||
1518 | } else | ||
1519 | a->data.non_resident.compression_unit = 0; | ||
1440 | /* Generate the mapping pairs array into the attribute record. */ | 1520 | /* Generate the mapping pairs array into the attribute record. */ |
1441 | err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs, | 1521 | err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs, |
1442 | arec_size - mp_ofs, rl, 0, -1, NULL); | 1522 | arec_size - mp_ofs, rl, 0, -1, NULL); |
@@ -1446,16 +1526,19 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) | |||
1446 | goto undo_err_out; | 1526 | goto undo_err_out; |
1447 | } | 1527 | } |
1448 | /* Setup the in-memory attribute structure to be non-resident. */ | 1528 | /* Setup the in-memory attribute structure to be non-resident. */ |
1449 | /* | ||
1450 | * FIXME: For now just clear all of these as we do not support them | ||
1451 | * when writing. | ||
1452 | */ | ||
1453 | NInoClearSparse(ni); | ||
1454 | NInoClearEncrypted(ni); | ||
1455 | NInoClearCompressed(ni); | ||
1456 | ni->runlist.rl = rl; | 1529 | ni->runlist.rl = rl; |
1457 | write_lock_irqsave(&ni->size_lock, flags); | 1530 | write_lock_irqsave(&ni->size_lock, flags); |
1458 | ni->allocated_size = new_size; | 1531 | ni->allocated_size = new_size; |
1532 | if (NInoSparse(ni) || NInoCompressed(ni)) { | ||
1533 | ni->itype.compressed.size = ni->allocated_size; | ||
1534 | ni->itype.compressed.block_size = 1U << | ||
1535 | (a->data.non_resident.compression_unit + | ||
1536 | vol->cluster_size_bits); | ||
1537 | ni->itype.compressed.block_size_bits = | ||
1538 | ffs(ni->itype.compressed.block_size) - 1; | ||
1539 | ni->itype.compressed.block_clusters = 1U << | ||
1540 | a->data.non_resident.compression_unit; | ||
1541 | } | ||
1459 | write_unlock_irqrestore(&ni->size_lock, flags); | 1542 | write_unlock_irqrestore(&ni->size_lock, flags); |
1460 | /* | 1543 | /* |
1461 | * This needs to be last since the address space operations ->readpage | 1544 | * This needs to be last since the address space operations ->readpage |
@@ -1603,6 +1686,12 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) | |||
1603 | BUG_ON(cnt < 0); | 1686 | BUG_ON(cnt < 0); |
1604 | if (!cnt) | 1687 | if (!cnt) |
1605 | goto done; | 1688 | goto done; |
1689 | /* | ||
1690 | * FIXME: Compressed and encrypted attributes are not supported when | ||
1691 | * writing and we should never have gotten here for them. | ||
1692 | */ | ||
1693 | BUG_ON(NInoCompressed(ni)); | ||
1694 | BUG_ON(NInoEncrypted(ni)); | ||
1606 | mapping = VFS_I(ni)->i_mapping; | 1695 | mapping = VFS_I(ni)->i_mapping; |
1607 | /* Work out the starting index and page offset. */ | 1696 | /* Work out the starting index and page offset. */ |
1608 | idx = ofs >> PAGE_CACHE_SHIFT; | 1697 | idx = ofs >> PAGE_CACHE_SHIFT; |
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h index 0e4ac6d3c0e7..0618ed6fd7b3 100644 --- a/fs/ntfs/attrib.h +++ b/fs/ntfs/attrib.h | |||
@@ -99,6 +99,8 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol, | |||
99 | const ATTR_TYPE type); | 99 | const ATTR_TYPE type); |
100 | 100 | ||
101 | extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); | 101 | extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); |
102 | extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, | ||
103 | const u32 new_size); | ||
102 | 104 | ||
103 | extern int ntfs_attr_make_non_resident(ntfs_inode *ni); | 105 | extern int ntfs_attr_make_non_resident(ntfs_inode *ni); |
104 | 106 | ||
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 6d265cfd49aa..25d24106f893 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c | |||
@@ -539,7 +539,6 @@ int ntfs_read_compressed_block(struct page *page) | |||
539 | if (unlikely(!pages || !bhs)) { | 539 | if (unlikely(!pages || !bhs)) { |
540 | kfree(bhs); | 540 | kfree(bhs); |
541 | kfree(pages); | 541 | kfree(pages); |
542 | SetPageError(page); | ||
543 | unlock_page(page); | 542 | unlock_page(page); |
544 | ntfs_error(vol->sb, "Failed to allocate internal buffers."); | 543 | ntfs_error(vol->sb, "Failed to allocate internal buffers."); |
545 | return -ENOMEM; | 544 | return -ENOMEM; |
@@ -871,9 +870,6 @@ lock_retry_remap: | |||
871 | for (; prev_cur_page < cur_page; prev_cur_page++) { | 870 | for (; prev_cur_page < cur_page; prev_cur_page++) { |
872 | page = pages[prev_cur_page]; | 871 | page = pages[prev_cur_page]; |
873 | if (page) { | 872 | if (page) { |
874 | if (prev_cur_page == xpage && | ||
875 | !xpage_done) | ||
876 | SetPageError(page); | ||
877 | flush_dcache_page(page); | 873 | flush_dcache_page(page); |
878 | kunmap(page); | 874 | kunmap(page); |
879 | unlock_page(page); | 875 | unlock_page(page); |
@@ -904,8 +900,6 @@ lock_retry_remap: | |||
904 | "Terminating them with extreme " | 900 | "Terminating them with extreme " |
905 | "prejudice. Inode 0x%lx, page index " | 901 | "prejudice. Inode 0x%lx, page index " |
906 | "0x%lx.", ni->mft_no, page->index); | 902 | "0x%lx.", ni->mft_no, page->index); |
907 | if (cur_page == xpage && !xpage_done) | ||
908 | SetPageError(page); | ||
909 | flush_dcache_page(page); | 903 | flush_dcache_page(page); |
910 | kunmap(page); | 904 | kunmap(page); |
911 | unlock_page(page); | 905 | unlock_page(page); |
@@ -953,8 +947,6 @@ err_out: | |||
953 | for (i = cur_page; i < max_page; i++) { | 947 | for (i = cur_page; i < max_page; i++) { |
954 | page = pages[i]; | 948 | page = pages[i]; |
955 | if (page) { | 949 | if (page) { |
956 | if (i == xpage && !xpage_done) | ||
957 | SetPageError(page); | ||
958 | flush_dcache_page(page); | 950 | flush_dcache_page(page); |
959 | kunmap(page); | 951 | kunmap(page); |
960 | unlock_page(page); | 952 | unlock_page(page); |
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 46779471c542..795c3d1930f5 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c | |||
@@ -1051,7 +1051,8 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, | |||
1051 | ie->key.file_name.file_name_length, &name, | 1051 | ie->key.file_name.file_name_length, &name, |
1052 | NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); | 1052 | NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); |
1053 | if (name_len <= 0) { | 1053 | if (name_len <= 0) { |
1054 | ntfs_debug("Skipping unrepresentable file."); | 1054 | ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.", |
1055 | (long long)MREF_LE(ie->data.dir.indexed_file)); | ||
1055 | return 0; | 1056 | return 0; |
1056 | } | 1057 | } |
1057 | if (ie->key.file_name.file_attributes & | 1058 | if (ie->key.file_name.file_attributes & |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index e0f530ce6b99..be9fd1dd423d 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. | 2 | * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. |
3 | * | 3 | * |
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | 4 | * Copyright (c) 2001-2005 Anton Altaparmakov |
5 | * | 5 | * |
6 | * This program/include file is free software; you can redistribute it and/or | 6 | * This program/include file is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public License as published | 7 | * modify it under the terms of the GNU General Public License as published |
@@ -94,6 +94,11 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, | |||
94 | if (!datasync || !NInoNonResident(NTFS_I(vi))) | 94 | if (!datasync || !NInoNonResident(NTFS_I(vi))) |
95 | ret = ntfs_write_inode(vi, 1); | 95 | ret = ntfs_write_inode(vi, 1); |
96 | write_inode_now(vi, !datasync); | 96 | write_inode_now(vi, !datasync); |
97 | /* | ||
98 | * NOTE: If we were to use mapping->private_list (see ext2 and | ||
99 | * fs/buffer.c) for dirty blocks then we could optimize the below to be | ||
100 | * sync_mapping_buffers(vi->i_mapping). | ||
101 | */ | ||
97 | err = sync_blockdev(vi->i_sb->s_bdev); | 102 | err = sync_blockdev(vi->i_sb->s_bdev); |
98 | if (unlikely(err && !ret)) | 103 | if (unlikely(err && !ret)) |
99 | ret = err; | 104 | ret = err; |
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index 11fd5307d780..8f2d5727546f 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c | |||
@@ -205,6 +205,7 @@ int ntfs_index_lookup(const void *key, const int key_len, | |||
205 | &ie->key, key_len)) { | 205 | &ie->key, key_len)) { |
206 | ir_done: | 206 | ir_done: |
207 | ictx->is_in_root = TRUE; | 207 | ictx->is_in_root = TRUE; |
208 | ictx->ir = ir; | ||
208 | ictx->actx = actx; | 209 | ictx->actx = actx; |
209 | ictx->base_ni = base_ni; | 210 | ictx->base_ni = base_ni; |
210 | ictx->ia = NULL; | 211 | ictx->ia = NULL; |
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 886214a77f90..dc4bbe3acf5c 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c | |||
@@ -1013,41 +1013,50 @@ skip_large_dir_stuff: | |||
1013 | } | 1013 | } |
1014 | a = ctx->attr; | 1014 | a = ctx->attr; |
1015 | /* Setup the state. */ | 1015 | /* Setup the state. */ |
1016 | if (a->non_resident) { | 1016 | if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { |
1017 | NInoSetNonResident(ni); | 1017 | if (a->flags & ATTR_COMPRESSION_MASK) { |
1018 | if (a->flags & (ATTR_COMPRESSION_MASK | | 1018 | NInoSetCompressed(ni); |
1019 | ATTR_IS_SPARSE)) { | 1019 | if (vol->cluster_size > 4096) { |
1020 | if (a->flags & ATTR_COMPRESSION_MASK) { | 1020 | ntfs_error(vi->i_sb, "Found " |
1021 | NInoSetCompressed(ni); | ||
1022 | if (vol->cluster_size > 4096) { | ||
1023 | ntfs_error(vi->i_sb, "Found " | ||
1024 | "compressed data but " | 1021 | "compressed data but " |
1025 | "compression is " | 1022 | "compression is " |
1026 | "disabled due to " | 1023 | "disabled due to " |
1027 | "cluster size (%i) > " | 1024 | "cluster size (%i) > " |
1028 | "4kiB.", | 1025 | "4kiB.", |
1029 | vol->cluster_size); | 1026 | vol->cluster_size); |
1030 | goto unm_err_out; | 1027 | goto unm_err_out; |
1031 | } | 1028 | } |
1032 | if ((a->flags & ATTR_COMPRESSION_MASK) | 1029 | if ((a->flags & ATTR_COMPRESSION_MASK) |
1033 | != ATTR_IS_COMPRESSED) { | 1030 | != ATTR_IS_COMPRESSED) { |
1034 | ntfs_error(vi->i_sb, "Found " | 1031 | ntfs_error(vi->i_sb, "Found unknown " |
1035 | "unknown compression " | 1032 | "compression method " |
1036 | "method or corrupt " | 1033 | "or corrupt file."); |
1037 | "file."); | 1034 | goto unm_err_out; |
1038 | goto unm_err_out; | ||
1039 | } | ||
1040 | } | 1035 | } |
1041 | if (a->flags & ATTR_IS_SPARSE) | 1036 | } |
1042 | NInoSetSparse(ni); | 1037 | if (a->flags & ATTR_IS_SPARSE) |
1038 | NInoSetSparse(ni); | ||
1039 | } | ||
1040 | if (a->flags & ATTR_IS_ENCRYPTED) { | ||
1041 | if (NInoCompressed(ni)) { | ||
1042 | ntfs_error(vi->i_sb, "Found encrypted and " | ||
1043 | "compressed data."); | ||
1044 | goto unm_err_out; | ||
1045 | } | ||
1046 | NInoSetEncrypted(ni); | ||
1047 | } | ||
1048 | if (a->non_resident) { | ||
1049 | NInoSetNonResident(ni); | ||
1050 | if (NInoCompressed(ni) || NInoSparse(ni)) { | ||
1043 | if (a->data.non_resident.compression_unit != | 1051 | if (a->data.non_resident.compression_unit != |
1044 | 4) { | 1052 | 4) { |
1045 | ntfs_error(vi->i_sb, "Found " | 1053 | ntfs_error(vi->i_sb, "Found " |
1046 | "nonstandard compression unit " | 1054 | "nonstandard " |
1047 | "(%u instead of 4). Cannot " | 1055 | "compression unit (%u " |
1048 | "handle this.", | 1056 | "instead of 4). " |
1049 | a->data.non_resident. | 1057 | "Cannot handle this.", |
1050 | compression_unit); | 1058 | a->data.non_resident. |
1059 | compression_unit); | ||
1051 | err = -EOPNOTSUPP; | 1060 | err = -EOPNOTSUPP; |
1052 | goto unm_err_out; | 1061 | goto unm_err_out; |
1053 | } | 1062 | } |
@@ -1065,14 +1074,6 @@ skip_large_dir_stuff: | |||
1065 | a->data.non_resident. | 1074 | a->data.non_resident. |
1066 | compressed_size); | 1075 | compressed_size); |
1067 | } | 1076 | } |
1068 | if (a->flags & ATTR_IS_ENCRYPTED) { | ||
1069 | if (a->flags & ATTR_COMPRESSION_MASK) { | ||
1070 | ntfs_error(vi->i_sb, "Found encrypted " | ||
1071 | "and compressed data."); | ||
1072 | goto unm_err_out; | ||
1073 | } | ||
1074 | NInoSetEncrypted(ni); | ||
1075 | } | ||
1076 | if (a->data.non_resident.lowest_vcn) { | 1077 | if (a->data.non_resident.lowest_vcn) { |
1077 | ntfs_error(vi->i_sb, "First extent of $DATA " | 1078 | ntfs_error(vi->i_sb, "First extent of $DATA " |
1078 | "attribute has non zero " | 1079 | "attribute has non zero " |
@@ -1212,6 +1213,75 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) | |||
1212 | if (unlikely(err)) | 1213 | if (unlikely(err)) |
1213 | goto unm_err_out; | 1214 | goto unm_err_out; |
1214 | a = ctx->attr; | 1215 | a = ctx->attr; |
1216 | if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { | ||
1217 | if (a->flags & ATTR_COMPRESSION_MASK) { | ||
1218 | NInoSetCompressed(ni); | ||
1219 | if ((ni->type != AT_DATA) || (ni->type == AT_DATA && | ||
1220 | ni->name_len)) { | ||
1221 | ntfs_error(vi->i_sb, "Found compressed " | ||
1222 | "non-data or named data " | ||
1223 | "attribute. Please report " | ||
1224 | "you saw this message to " | ||
1225 | "linux-ntfs-dev@lists." | ||
1226 | "sourceforge.net"); | ||
1227 | goto unm_err_out; | ||
1228 | } | ||
1229 | if (vol->cluster_size > 4096) { | ||
1230 | ntfs_error(vi->i_sb, "Found compressed " | ||
1231 | "attribute but compression is " | ||
1232 | "disabled due to cluster size " | ||
1233 | "(%i) > 4kiB.", | ||
1234 | vol->cluster_size); | ||
1235 | goto unm_err_out; | ||
1236 | } | ||
1237 | if ((a->flags & ATTR_COMPRESSION_MASK) != | ||
1238 | ATTR_IS_COMPRESSED) { | ||
1239 | ntfs_error(vi->i_sb, "Found unknown " | ||
1240 | "compression method."); | ||
1241 | goto unm_err_out; | ||
1242 | } | ||
1243 | } | ||
1244 | /* | ||
1245 | * The encryption flag set in an index root just means to | ||
1246 | * compress all files. | ||
1247 | */ | ||
1248 | if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { | ||
1249 | ntfs_error(vi->i_sb, "Found mst protected attribute " | ||
1250 | "but the attribute is %s. Please " | ||
1251 | "report you saw this message to " | ||
1252 | "linux-ntfs-dev@lists.sourceforge.net", | ||
1253 | NInoCompressed(ni) ? "compressed" : | ||
1254 | "sparse"); | ||
1255 | goto unm_err_out; | ||
1256 | } | ||
1257 | if (a->flags & ATTR_IS_SPARSE) | ||
1258 | NInoSetSparse(ni); | ||
1259 | } | ||
1260 | if (a->flags & ATTR_IS_ENCRYPTED) { | ||
1261 | if (NInoCompressed(ni)) { | ||
1262 | ntfs_error(vi->i_sb, "Found encrypted and compressed " | ||
1263 | "data."); | ||
1264 | goto unm_err_out; | ||
1265 | } | ||
1266 | /* | ||
1267 | * The encryption flag set in an index root just means to | ||
1268 | * encrypt all files. | ||
1269 | */ | ||
1270 | if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { | ||
1271 | ntfs_error(vi->i_sb, "Found mst protected attribute " | ||
1272 | "but the attribute is encrypted. " | ||
1273 | "Please report you saw this message " | ||
1274 | "to linux-ntfs-dev@lists.sourceforge." | ||
1275 | "net"); | ||
1276 | goto unm_err_out; | ||
1277 | } | ||
1278 | if (ni->type != AT_DATA) { | ||
1279 | ntfs_error(vi->i_sb, "Found encrypted non-data " | ||
1280 | "attribute."); | ||
1281 | goto unm_err_out; | ||
1282 | } | ||
1283 | NInoSetEncrypted(ni); | ||
1284 | } | ||
1215 | if (!a->non_resident) { | 1285 | if (!a->non_resident) { |
1216 | /* Ensure the attribute name is placed before the value. */ | 1286 | /* Ensure the attribute name is placed before the value. */ |
1217 | if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= | 1287 | if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= |
@@ -1220,11 +1290,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) | |||
1220 | "the attribute value."); | 1290 | "the attribute value."); |
1221 | goto unm_err_out; | 1291 | goto unm_err_out; |
1222 | } | 1292 | } |
1223 | if (NInoMstProtected(ni) || a->flags) { | 1293 | if (NInoMstProtected(ni)) { |
1224 | ntfs_error(vi->i_sb, "Found mst protected attribute " | 1294 | ntfs_error(vi->i_sb, "Found mst protected attribute " |
1225 | "or attribute with non-zero flags but " | 1295 | "but the attribute is resident. " |
1226 | "the attribute is resident. Please " | 1296 | "Please report you saw this message to " |
1227 | "report you saw this message to " | ||
1228 | "linux-ntfs-dev@lists.sourceforge.net"); | 1297 | "linux-ntfs-dev@lists.sourceforge.net"); |
1229 | goto unm_err_out; | 1298 | goto unm_err_out; |
1230 | } | 1299 | } |
@@ -1250,50 +1319,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) | |||
1250 | "the mapping pairs array."); | 1319 | "the mapping pairs array."); |
1251 | goto unm_err_out; | 1320 | goto unm_err_out; |
1252 | } | 1321 | } |
1253 | if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { | 1322 | if ((NInoCompressed(ni) || NInoSparse(ni)) && |
1254 | if (a->flags & ATTR_COMPRESSION_MASK) { | 1323 | ni->type != AT_INDEX_ROOT) { |
1255 | NInoSetCompressed(ni); | ||
1256 | if ((ni->type != AT_DATA) || (ni->type == | ||
1257 | AT_DATA && ni->name_len)) { | ||
1258 | ntfs_error(vi->i_sb, "Found compressed " | ||
1259 | "non-data or named " | ||
1260 | "data attribute. " | ||
1261 | "Please report you " | ||
1262 | "saw this message to " | ||
1263 | "linux-ntfs-dev@lists." | ||
1264 | "sourceforge.net"); | ||
1265 | goto unm_err_out; | ||
1266 | } | ||
1267 | if (vol->cluster_size > 4096) { | ||
1268 | ntfs_error(vi->i_sb, "Found compressed " | ||
1269 | "attribute but " | ||
1270 | "compression is " | ||
1271 | "disabled due to " | ||
1272 | "cluster size (%i) > " | ||
1273 | "4kiB.", | ||
1274 | vol->cluster_size); | ||
1275 | goto unm_err_out; | ||
1276 | } | ||
1277 | if ((a->flags & ATTR_COMPRESSION_MASK) != | ||
1278 | ATTR_IS_COMPRESSED) { | ||
1279 | ntfs_error(vi->i_sb, "Found unknown " | ||
1280 | "compression method."); | ||
1281 | goto unm_err_out; | ||
1282 | } | ||
1283 | } | ||
1284 | if (NInoMstProtected(ni)) { | ||
1285 | ntfs_error(vi->i_sb, "Found mst protected " | ||
1286 | "attribute but the attribute " | ||
1287 | "is %s. Please report you " | ||
1288 | "saw this message to " | ||
1289 | "linux-ntfs-dev@lists." | ||
1290 | "sourceforge.net", | ||
1291 | NInoCompressed(ni) ? | ||
1292 | "compressed" : "sparse"); | ||
1293 | goto unm_err_out; | ||
1294 | } | ||
1295 | if (a->flags & ATTR_IS_SPARSE) | ||
1296 | NInoSetSparse(ni); | ||
1297 | if (a->data.non_resident.compression_unit != 4) { | 1324 | if (a->data.non_resident.compression_unit != 4) { |
1298 | ntfs_error(vi->i_sb, "Found nonstandard " | 1325 | ntfs_error(vi->i_sb, "Found nonstandard " |
1299 | "compression unit (%u instead " | 1326 | "compression unit (%u instead " |
@@ -1313,23 +1340,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) | |||
1313 | ni->itype.compressed.size = sle64_to_cpu( | 1340 | ni->itype.compressed.size = sle64_to_cpu( |
1314 | a->data.non_resident.compressed_size); | 1341 | a->data.non_resident.compressed_size); |
1315 | } | 1342 | } |
1316 | if (a->flags & ATTR_IS_ENCRYPTED) { | ||
1317 | if (a->flags & ATTR_COMPRESSION_MASK) { | ||
1318 | ntfs_error(vi->i_sb, "Found encrypted and " | ||
1319 | "compressed data."); | ||
1320 | goto unm_err_out; | ||
1321 | } | ||
1322 | if (NInoMstProtected(ni)) { | ||
1323 | ntfs_error(vi->i_sb, "Found mst protected " | ||
1324 | "attribute but the attribute " | ||
1325 | "is encrypted. Please report " | ||
1326 | "you saw this message to " | ||
1327 | "linux-ntfs-dev@lists." | ||
1328 | "sourceforge.net"); | ||
1329 | goto unm_err_out; | ||
1330 | } | ||
1331 | NInoSetEncrypted(ni); | ||
1332 | } | ||
1333 | if (a->data.non_resident.lowest_vcn) { | 1343 | if (a->data.non_resident.lowest_vcn) { |
1334 | ntfs_error(vi->i_sb, "First extent of attribute has " | 1344 | ntfs_error(vi->i_sb, "First extent of attribute has " |
1335 | "non-zero lowest_vcn."); | 1345 | "non-zero lowest_vcn."); |
@@ -1348,12 +1358,12 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) | |||
1348 | vi->i_mapping->a_ops = &ntfs_mst_aops; | 1358 | vi->i_mapping->a_ops = &ntfs_mst_aops; |
1349 | else | 1359 | else |
1350 | vi->i_mapping->a_ops = &ntfs_aops; | 1360 | vi->i_mapping->a_ops = &ntfs_aops; |
1351 | if (NInoCompressed(ni) || NInoSparse(ni)) | 1361 | if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT) |
1352 | vi->i_blocks = ni->itype.compressed.size >> 9; | 1362 | vi->i_blocks = ni->itype.compressed.size >> 9; |
1353 | else | 1363 | else |
1354 | vi->i_blocks = ni->allocated_size >> 9; | 1364 | vi->i_blocks = ni->allocated_size >> 9; |
1355 | /* | 1365 | /* |
1356 | * Make sure the base inode doesn't go away and attach it to the | 1366 | * Make sure the base inode does not go away and attach it to the |
1357 | * attribute inode. | 1367 | * attribute inode. |
1358 | */ | 1368 | */ |
1359 | igrab(base_vi); | 1369 | igrab(base_vi); |
@@ -1480,7 +1490,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) | |||
1480 | "after the attribute value."); | 1490 | "after the attribute value."); |
1481 | goto unm_err_out; | 1491 | goto unm_err_out; |
1482 | } | 1492 | } |
1483 | /* Compressed/encrypted/sparse index root is not allowed. */ | 1493 | /* |
1494 | * Compressed/encrypted/sparse index root is not allowed, except for | ||
1495 | * directories of course but those are not dealt with here. | ||
1496 | */ | ||
1484 | if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | | 1497 | if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | |
1485 | ATTR_IS_SPARSE)) { | 1498 | ATTR_IS_SPARSE)) { |
1486 | ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index " | 1499 | ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index " |
@@ -2430,16 +2443,18 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
2430 | * We skipped the truncate but must still update | 2443 | * We skipped the truncate but must still update |
2431 | * timestamps. | 2444 | * timestamps. |
2432 | */ | 2445 | */ |
2433 | ia_valid |= ATTR_MTIME|ATTR_CTIME; | 2446 | ia_valid |= ATTR_MTIME | ATTR_CTIME; |
2434 | } | 2447 | } |
2435 | } | 2448 | } |
2436 | |||
2437 | if (ia_valid & ATTR_ATIME) | 2449 | if (ia_valid & ATTR_ATIME) |
2438 | vi->i_atime = attr->ia_atime; | 2450 | vi->i_atime = timespec_trunc(attr->ia_atime, |
2451 | vi->i_sb->s_time_gran); | ||
2439 | if (ia_valid & ATTR_MTIME) | 2452 | if (ia_valid & ATTR_MTIME) |
2440 | vi->i_mtime = attr->ia_mtime; | 2453 | vi->i_mtime = timespec_trunc(attr->ia_mtime, |
2454 | vi->i_sb->s_time_gran); | ||
2441 | if (ia_valid & ATTR_CTIME) | 2455 | if (ia_valid & ATTR_CTIME) |
2442 | vi->i_ctime = attr->ia_ctime; | 2456 | vi->i_ctime = timespec_trunc(attr->ia_ctime, |
2457 | vi->i_sb->s_time_gran); | ||
2443 | mark_inode_dirty(vi); | 2458 | mark_inode_dirty(vi); |
2444 | out: | 2459 | out: |
2445 | return err; | 2460 | return err; |
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c index a4bc07616e5d..7b5934290685 100644 --- a/fs/ntfs/lcnalloc.c +++ b/fs/ntfs/lcnalloc.c | |||
@@ -54,6 +54,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, | |||
54 | int ret = 0; | 54 | int ret = 0; |
55 | 55 | ||
56 | ntfs_debug("Entering."); | 56 | ntfs_debug("Entering."); |
57 | if (!rl) | ||
58 | return 0; | ||
57 | for (; rl->length; rl++) { | 59 | for (; rl->length; rl++) { |
58 | int err; | 60 | int err; |
59 | 61 | ||
@@ -163,17 +165,9 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, | |||
163 | BUG_ON(zone < FIRST_ZONE); | 165 | BUG_ON(zone < FIRST_ZONE); |
164 | BUG_ON(zone > LAST_ZONE); | 166 | BUG_ON(zone > LAST_ZONE); |
165 | 167 | ||
166 | /* Return empty runlist if @count == 0 */ | 168 | /* Return NULL if @count is zero. */ |
167 | // FIXME: Do we want to just return NULL instead? (AIA) | 169 | if (!count) |
168 | if (!count) { | 170 | return NULL; |
169 | rl = ntfs_malloc_nofs(PAGE_SIZE); | ||
170 | if (!rl) | ||
171 | return ERR_PTR(-ENOMEM); | ||
172 | rl[0].vcn = start_vcn; | ||
173 | rl[0].lcn = LCN_RL_NOT_MAPPED; | ||
174 | rl[0].length = 0; | ||
175 | return rl; | ||
176 | } | ||
177 | /* Take the lcnbmp lock for writing. */ | 171 | /* Take the lcnbmp lock for writing. */ |
178 | down_write(&vol->lcnbmp_lock); | 172 | down_write(&vol->lcnbmp_lock); |
179 | /* | 173 | /* |
@@ -788,7 +782,8 @@ out: | |||
788 | * @vi: vfs inode whose runlist describes the clusters to free | 782 | * @vi: vfs inode whose runlist describes the clusters to free |
789 | * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters | 783 | * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters |
790 | * @count: number of clusters to free or -1 for all clusters | 784 | * @count: number of clusters to free or -1 for all clusters |
791 | * @is_rollback: if TRUE this is a rollback operation | 785 | * @write_locked: true if the runlist is locked for writing |
786 | * @is_rollback: true if this is a rollback operation | ||
792 | * | 787 | * |
793 | * Free @count clusters starting at the cluster @start_vcn in the runlist | 788 | * Free @count clusters starting at the cluster @start_vcn in the runlist |
794 | * described by the vfs inode @vi. | 789 | * described by the vfs inode @vi. |
@@ -806,17 +801,17 @@ out: | |||
806 | * Return the number of deallocated clusters (not counting sparse ones) on | 801 | * Return the number of deallocated clusters (not counting sparse ones) on |
807 | * success and -errno on error. | 802 | * success and -errno on error. |
808 | * | 803 | * |
809 | * Locking: - The runlist described by @vi must be unlocked on entry and is | 804 | * Locking: - The runlist described by @vi must be locked on entry and is |
810 | * unlocked on return. | 805 | * locked on return. Note if the runlist is locked for reading the |
811 | * - This function takes the runlist lock of @vi for reading and | 806 | * lock may be dropped and reacquired. Note the runlist may be |
812 | * sometimes for writing and sometimes modifies the runlist. | 807 | * modified when needed runlist fragments need to be mapped. |
813 | * - The volume lcn bitmap must be unlocked on entry and is unlocked | 808 | * - The volume lcn bitmap must be unlocked on entry and is unlocked |
814 | * on return. | 809 | * on return. |
815 | * - This function takes the volume lcn bitmap lock for writing and | 810 | * - This function takes the volume lcn bitmap lock for writing and |
816 | * modifies the bitmap contents. | 811 | * modifies the bitmap contents. |
817 | */ | 812 | */ |
818 | s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, | 813 | s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, |
819 | const BOOL is_rollback) | 814 | const BOOL write_locked, const BOOL is_rollback) |
820 | { | 815 | { |
821 | s64 delta, to_free, total_freed, real_freed; | 816 | s64 delta, to_free, total_freed, real_freed; |
822 | ntfs_inode *ni; | 817 | ntfs_inode *ni; |
@@ -848,8 +843,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, | |||
848 | 843 | ||
849 | total_freed = real_freed = 0; | 844 | total_freed = real_freed = 0; |
850 | 845 | ||
851 | down_read(&ni->runlist.lock); | 846 | rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, write_locked); |
852 | rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE); | ||
853 | if (IS_ERR(rl)) { | 847 | if (IS_ERR(rl)) { |
854 | if (!is_rollback) | 848 | if (!is_rollback) |
855 | ntfs_error(vol->sb, "Failed to find first runlist " | 849 | ntfs_error(vol->sb, "Failed to find first runlist " |
@@ -903,7 +897,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, | |||
903 | 897 | ||
904 | /* Attempt to map runlist. */ | 898 | /* Attempt to map runlist. */ |
905 | vcn = rl->vcn; | 899 | vcn = rl->vcn; |
906 | rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE); | 900 | rl = ntfs_attr_find_vcn_nolock(ni, vcn, write_locked); |
907 | if (IS_ERR(rl)) { | 901 | if (IS_ERR(rl)) { |
908 | err = PTR_ERR(rl); | 902 | err = PTR_ERR(rl); |
909 | if (!is_rollback) | 903 | if (!is_rollback) |
@@ -950,7 +944,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, | |||
950 | /* Update the total done clusters. */ | 944 | /* Update the total done clusters. */ |
951 | total_freed += to_free; | 945 | total_freed += to_free; |
952 | } | 946 | } |
953 | up_read(&ni->runlist.lock); | ||
954 | if (likely(!is_rollback)) | 947 | if (likely(!is_rollback)) |
955 | up_write(&vol->lcnbmp_lock); | 948 | up_write(&vol->lcnbmp_lock); |
956 | 949 | ||
@@ -960,7 +953,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, | |||
960 | ntfs_debug("Done."); | 953 | ntfs_debug("Done."); |
961 | return real_freed; | 954 | return real_freed; |
962 | err_out: | 955 | err_out: |
963 | up_read(&ni->runlist.lock); | ||
964 | if (is_rollback) | 956 | if (is_rollback) |
965 | return err; | 957 | return err; |
966 | /* If no real clusters were freed, no need to rollback. */ | 958 | /* If no real clusters were freed, no need to rollback. */ |
@@ -973,7 +965,8 @@ err_out: | |||
973 | * If rollback fails, set the volume errors flag, emit an error | 965 | * If rollback fails, set the volume errors flag, emit an error |
974 | * message, and return the error code. | 966 | * message, and return the error code. |
975 | */ | 967 | */ |
976 | delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE); | 968 | delta = __ntfs_cluster_free(vi, start_vcn, total_freed, write_locked, |
969 | TRUE); | ||
977 | if (delta < 0) { | 970 | if (delta < 0) { |
978 | ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " | 971 | ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " |
979 | "inconsistent metadata! Unmount and run " | 972 | "inconsistent metadata! Unmount and run " |
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h index 4cac1c024af6..e4d7fb98d685 100644 --- a/fs/ntfs/lcnalloc.h +++ b/fs/ntfs/lcnalloc.h | |||
@@ -43,13 +43,14 @@ extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, | |||
43 | const NTFS_CLUSTER_ALLOCATION_ZONES zone); | 43 | const NTFS_CLUSTER_ALLOCATION_ZONES zone); |
44 | 44 | ||
45 | extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, | 45 | extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, |
46 | s64 count, const BOOL is_rollback); | 46 | s64 count, const BOOL write_locked, const BOOL is_rollback); |
47 | 47 | ||
48 | /** | 48 | /** |
49 | * ntfs_cluster_free - free clusters on an ntfs volume | 49 | * ntfs_cluster_free - free clusters on an ntfs volume |
50 | * @vi: vfs inode whose runlist describes the clusters to free | 50 | * @vi: vfs inode whose runlist describes the clusters to free |
51 | * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters | 51 | * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters |
52 | * @count: number of clusters to free or -1 for all clusters | 52 | * @count: number of clusters to free or -1 for all clusters |
53 | * @write_locked: true if the runlist is locked for writing | ||
53 | * | 54 | * |
54 | * Free @count clusters starting at the cluster @start_vcn in the runlist | 55 | * Free @count clusters starting at the cluster @start_vcn in the runlist |
55 | * described by the vfs inode @vi. | 56 | * described by the vfs inode @vi. |
@@ -64,19 +65,19 @@ extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, | |||
64 | * Return the number of deallocated clusters (not counting sparse ones) on | 65 | * Return the number of deallocated clusters (not counting sparse ones) on |
65 | * success and -errno on error. | 66 | * success and -errno on error. |
66 | * | 67 | * |
67 | * Locking: - The runlist described by @vi must be unlocked on entry and is | 68 | * Locking: - The runlist described by @vi must be locked on entry and is |
68 | * unlocked on return. | 69 | * locked on return. Note if the runlist is locked for reading the |
69 | * - This function takes the runlist lock of @vi for reading and | 70 | * lock may be dropped and reacquired. Note the runlist may be |
70 | * sometimes for writing and sometimes modifies the runlist. | 71 | * modified when needed runlist fragments need to be mapped. |
71 | * - The volume lcn bitmap must be unlocked on entry and is unlocked | 72 | * - The volume lcn bitmap must be unlocked on entry and is unlocked |
72 | * on return. | 73 | * on return. |
73 | * - This function takes the volume lcn bitmap lock for writing and | 74 | * - This function takes the volume lcn bitmap lock for writing and |
74 | * modifies the bitmap contents. | 75 | * modifies the bitmap contents. |
75 | */ | 76 | */ |
76 | static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn, | 77 | static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn, |
77 | s64 count) | 78 | s64 count, const BOOL write_locked) |
78 | { | 79 | { |
79 | return __ntfs_cluster_free(vi, start_vcn, count, FALSE); | 80 | return __ntfs_cluster_free(vi, start_vcn, count, write_locked, FALSE); |
80 | } | 81 | } |
81 | 82 | ||
82 | extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, | 83 | extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, |
@@ -93,8 +94,10 @@ extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, | |||
93 | * | 94 | * |
94 | * Return 0 on success and -errno on error. | 95 | * Return 0 on success and -errno on error. |
95 | * | 96 | * |
96 | * Locking: This function takes the volume lcn bitmap lock for writing and | 97 | * Locking: - This function takes the volume lcn bitmap lock for writing and |
97 | * modifies the bitmap contents. | 98 | * modifies the bitmap contents. |
99 | * - The caller must have locked the runlist @rl for reading or | ||
100 | * writing. | ||
98 | */ | 101 | */ |
99 | static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol, | 102 | static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol, |
100 | const runlist_element *rl) | 103 | const runlist_element *rl) |
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 8edb8e20fb08..0173e95500d9 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c | |||
@@ -121,7 +121,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, | |||
121 | */ | 121 | */ |
122 | if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) { | 122 | if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) { |
123 | ntfs_error(vi->i_sb, "$LogFile restart page is not modified " | 123 | ntfs_error(vi->i_sb, "$LogFile restart page is not modified " |
124 | "chkdsk but a chkdsk LSN is specified."); | 124 | "by chkdsk but a chkdsk LSN is specified."); |
125 | return FALSE; | 125 | return FALSE; |
126 | } | 126 | } |
127 | ntfs_debug("Done."); | 127 | ntfs_debug("Done."); |
@@ -312,10 +312,12 @@ err_out: | |||
312 | * @vi: $LogFile inode to which the restart page belongs | 312 | * @vi: $LogFile inode to which the restart page belongs |
313 | * @rp: restart page to check | 313 | * @rp: restart page to check |
314 | * @pos: position in @vi at which the restart page resides | 314 | * @pos: position in @vi at which the restart page resides |
315 | * @wrp: copy of the multi sector transfer deprotected restart page | 315 | * @wrp: [OUT] copy of the multi sector transfer deprotected restart page |
316 | * @lsn: [OUT] set to the current logfile lsn on success | ||
316 | * | 317 | * |
317 | * Check the restart page @rp for consistency and return TRUE if it is | 318 | * Check the restart page @rp for consistency and return 0 if it is consistent |
318 | * consistent and FALSE otherwise. | 319 | * and -errno otherwise. The restart page may have been modified by chkdsk in |
320 | * which case its magic is CHKD instead of RSTR. | ||
319 | * | 321 | * |
320 | * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not | 322 | * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not |
321 | * require the full restart page. | 323 | * require the full restart page. |
@@ -323,25 +325,33 @@ err_out: | |||
323 | * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a | 325 | * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a |
324 | * copy of the complete multi sector transfer deprotected page. On failure, | 326 | * copy of the complete multi sector transfer deprotected page. On failure, |
325 | * *@wrp is undefined. | 327 | * *@wrp is undefined. |
328 | * | ||
329 | * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current | ||
330 | * logfile lsn according to this restart page. On failure, *@lsn is undefined. | ||
331 | * | ||
332 | * The following error codes are defined: | ||
333 | * -EINVAL - The restart page is inconsistent. | ||
334 | * -ENOMEM - Not enough memory to load the restart page. | ||
335 | * -EIO - Failed to reading from $LogFile. | ||
326 | */ | 336 | */ |
327 | static BOOL ntfs_check_and_load_restart_page(struct inode *vi, | 337 | static int ntfs_check_and_load_restart_page(struct inode *vi, |
328 | RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp) | 338 | RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp, |
339 | LSN *lsn) | ||
329 | { | 340 | { |
330 | RESTART_AREA *ra; | 341 | RESTART_AREA *ra; |
331 | RESTART_PAGE_HEADER *trp; | 342 | RESTART_PAGE_HEADER *trp; |
332 | int size; | 343 | int size, err; |
333 | BOOL ret; | ||
334 | 344 | ||
335 | ntfs_debug("Entering."); | 345 | ntfs_debug("Entering."); |
336 | /* Check the restart page header for consistency. */ | 346 | /* Check the restart page header for consistency. */ |
337 | if (!ntfs_check_restart_page_header(vi, rp, pos)) { | 347 | if (!ntfs_check_restart_page_header(vi, rp, pos)) { |
338 | /* Error output already done inside the function. */ | 348 | /* Error output already done inside the function. */ |
339 | return FALSE; | 349 | return -EINVAL; |
340 | } | 350 | } |
341 | /* Check the restart area for consistency. */ | 351 | /* Check the restart area for consistency. */ |
342 | if (!ntfs_check_restart_area(vi, rp)) { | 352 | if (!ntfs_check_restart_area(vi, rp)) { |
343 | /* Error output already done inside the function. */ | 353 | /* Error output already done inside the function. */ |
344 | return FALSE; | 354 | return -EINVAL; |
345 | } | 355 | } |
346 | ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); | 356 | ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); |
347 | /* | 357 | /* |
@@ -352,7 +362,7 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi, | |||
352 | if (!trp) { | 362 | if (!trp) { |
353 | ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile " | 363 | ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile " |
354 | "restart page buffer."); | 364 | "restart page buffer."); |
355 | return FALSE; | 365 | return -ENOMEM; |
356 | } | 366 | } |
357 | /* | 367 | /* |
358 | * Read the whole of the restart page into the buffer. If it fits | 368 | * Read the whole of the restart page into the buffer. If it fits |
@@ -379,6 +389,9 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi, | |||
379 | if (IS_ERR(page)) { | 389 | if (IS_ERR(page)) { |
380 | ntfs_error(vi->i_sb, "Error mapping $LogFile " | 390 | ntfs_error(vi->i_sb, "Error mapping $LogFile " |
381 | "page (index %lu).", idx); | 391 | "page (index %lu).", idx); |
392 | err = PTR_ERR(page); | ||
393 | if (err != -EIO && err != -ENOMEM) | ||
394 | err = -EIO; | ||
382 | goto err_out; | 395 | goto err_out; |
383 | } | 396 | } |
384 | size = min_t(int, to_read, PAGE_CACHE_SIZE); | 397 | size = min_t(int, to_read, PAGE_CACHE_SIZE); |
@@ -392,29 +405,57 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi, | |||
392 | /* Perform the multi sector transfer deprotection on the buffer. */ | 405 | /* Perform the multi sector transfer deprotection on the buffer. */ |
393 | if (post_read_mst_fixup((NTFS_RECORD*)trp, | 406 | if (post_read_mst_fixup((NTFS_RECORD*)trp, |
394 | le32_to_cpu(rp->system_page_size))) { | 407 | le32_to_cpu(rp->system_page_size))) { |
395 | ntfs_error(vi->i_sb, "Multi sector transfer error detected in " | 408 | /* |
396 | "$LogFile restart page."); | 409 | * A multi sector tranfer error was detected. We only need to |
397 | goto err_out; | 410 | * abort if the restart page contents exceed the multi sector |
411 | * transfer fixup of the first sector. | ||
412 | */ | ||
413 | if (le16_to_cpu(rp->restart_area_offset) + | ||
414 | le16_to_cpu(ra->restart_area_length) > | ||
415 | NTFS_BLOCK_SIZE - sizeof(u16)) { | ||
416 | ntfs_error(vi->i_sb, "Multi sector transfer error " | ||
417 | "detected in $LogFile restart page."); | ||
418 | err = -EINVAL; | ||
419 | goto err_out; | ||
420 | } | ||
421 | } | ||
422 | /* | ||
423 | * If the restart page is modified by chkdsk or there are no active | ||
424 | * logfile clients, the logfile is consistent. Otherwise, need to | ||
425 | * check the log client records for consistency, too. | ||
426 | */ | ||
427 | err = 0; | ||
428 | if (ntfs_is_rstr_record(rp->magic) && | ||
429 | ra->client_in_use_list != LOGFILE_NO_CLIENT) { | ||
430 | if (!ntfs_check_log_client_array(vi, trp)) { | ||
431 | err = -EINVAL; | ||
432 | goto err_out; | ||
433 | } | ||
434 | } | ||
435 | if (lsn) { | ||
436 | if (ntfs_is_rstr_record(rp->magic)) | ||
437 | *lsn = sle64_to_cpu(ra->current_lsn); | ||
438 | else /* if (ntfs_is_chkd_record(rp->magic)) */ | ||
439 | *lsn = sle64_to_cpu(rp->chkdsk_lsn); | ||
398 | } | 440 | } |
399 | /* Check the log client records for consistency. */ | ||
400 | ret = ntfs_check_log_client_array(vi, trp); | ||
401 | if (ret && wrp) | ||
402 | *wrp = trp; | ||
403 | else | ||
404 | ntfs_free(trp); | ||
405 | ntfs_debug("Done."); | 441 | ntfs_debug("Done."); |
406 | return ret; | 442 | if (wrp) |
443 | *wrp = trp; | ||
444 | else { | ||
407 | err_out: | 445 | err_out: |
408 | ntfs_free(trp); | 446 | ntfs_free(trp); |
409 | return FALSE; | 447 | } |
448 | return err; | ||
410 | } | 449 | } |
411 | 450 | ||
412 | /** | 451 | /** |
413 | * ntfs_check_logfile - check the journal for consistency | 452 | * ntfs_check_logfile - check the journal for consistency |
414 | * @log_vi: struct inode of loaded journal $LogFile to check | 453 | * @log_vi: struct inode of loaded journal $LogFile to check |
454 | * @rp: [OUT] on success this is a copy of the current restart page | ||
415 | * | 455 | * |
416 | * Check the $LogFile journal for consistency and return TRUE if it is | 456 | * Check the $LogFile journal for consistency and return TRUE if it is |
417 | * consistent and FALSE if not. | 457 | * consistent and FALSE if not. On success, the current restart page is |
458 | * returned in *@rp. Caller must call ntfs_free(*@rp) when finished with it. | ||
418 | * | 459 | * |
419 | * At present we only check the two restart pages and ignore the log record | 460 | * At present we only check the two restart pages and ignore the log record |
420 | * pages. | 461 | * pages. |
@@ -424,19 +465,18 @@ err_out: | |||
424 | * if the $LogFile was created on a system with a different page size to ours | 465 | * if the $LogFile was created on a system with a different page size to ours |
425 | * yet and mst deprotection would fail if our page size is smaller. | 466 | * yet and mst deprotection would fail if our page size is smaller. |
426 | */ | 467 | */ |
427 | BOOL ntfs_check_logfile(struct inode *log_vi) | 468 | BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) |
428 | { | 469 | { |
429 | s64 size, pos, rstr1_pos, rstr2_pos; | 470 | s64 size, pos; |
471 | LSN rstr1_lsn, rstr2_lsn; | ||
430 | ntfs_volume *vol = NTFS_SB(log_vi->i_sb); | 472 | ntfs_volume *vol = NTFS_SB(log_vi->i_sb); |
431 | struct address_space *mapping = log_vi->i_mapping; | 473 | struct address_space *mapping = log_vi->i_mapping; |
432 | struct page *page = NULL; | 474 | struct page *page = NULL; |
433 | u8 *kaddr = NULL; | 475 | u8 *kaddr = NULL; |
434 | RESTART_PAGE_HEADER *rstr1_ph = NULL; | 476 | RESTART_PAGE_HEADER *rstr1_ph = NULL; |
435 | RESTART_PAGE_HEADER *rstr2_ph = NULL; | 477 | RESTART_PAGE_HEADER *rstr2_ph = NULL; |
436 | int log_page_size, log_page_mask, ofs; | 478 | int log_page_size, log_page_mask, err; |
437 | BOOL logfile_is_empty = TRUE; | 479 | BOOL logfile_is_empty = TRUE; |
438 | BOOL rstr1_found = FALSE; | ||
439 | BOOL rstr2_found = FALSE; | ||
440 | u8 log_page_bits; | 480 | u8 log_page_bits; |
441 | 481 | ||
442 | ntfs_debug("Entering."); | 482 | ntfs_debug("Entering."); |
@@ -491,7 +531,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi) | |||
491 | if (IS_ERR(page)) { | 531 | if (IS_ERR(page)) { |
492 | ntfs_error(vol->sb, "Error mapping $LogFile " | 532 | ntfs_error(vol->sb, "Error mapping $LogFile " |
493 | "page (index %lu).", idx); | 533 | "page (index %lu).", idx); |
494 | return FALSE; | 534 | goto err_out; |
495 | } | 535 | } |
496 | } | 536 | } |
497 | kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK); | 537 | kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK); |
@@ -510,99 +550,95 @@ BOOL ntfs_check_logfile(struct inode *log_vi) | |||
510 | */ | 550 | */ |
511 | if (ntfs_is_rcrd_recordp((le32*)kaddr)) | 551 | if (ntfs_is_rcrd_recordp((le32*)kaddr)) |
512 | break; | 552 | break; |
513 | /* | 553 | /* If not a (modified by chkdsk) restart page, continue. */ |
514 | * A modified by chkdsk restart page means we cannot handle | 554 | if (!ntfs_is_rstr_recordp((le32*)kaddr) && |
515 | * this log file. | 555 | !ntfs_is_chkd_recordp((le32*)kaddr)) { |
516 | */ | ||
517 | if (ntfs_is_chkd_recordp((le32*)kaddr)) { | ||
518 | ntfs_error(vol->sb, "$LogFile has been modified by " | ||
519 | "chkdsk. Mount this volume in " | ||
520 | "Windows."); | ||
521 | goto err_out; | ||
522 | } | ||
523 | /* If not a restart page, continue. */ | ||
524 | if (!ntfs_is_rstr_recordp((le32*)kaddr)) { | ||
525 | /* Skip to the minimum page size for the next one. */ | ||
526 | if (!pos) | 556 | if (!pos) |
527 | pos = NTFS_BLOCK_SIZE >> 1; | 557 | pos = NTFS_BLOCK_SIZE >> 1; |
528 | continue; | 558 | continue; |
529 | } | 559 | } |
530 | /* We now know we have a restart page. */ | ||
531 | if (!pos) { | ||
532 | rstr1_found = TRUE; | ||
533 | rstr1_pos = pos; | ||
534 | } else { | ||
535 | if (rstr2_found) { | ||
536 | ntfs_error(vol->sb, "Found more than two " | ||
537 | "restart pages in $LogFile."); | ||
538 | goto err_out; | ||
539 | } | ||
540 | rstr2_found = TRUE; | ||
541 | rstr2_pos = pos; | ||
542 | } | ||
543 | /* | 560 | /* |
544 | * Check the restart page for consistency and get a copy of the | 561 | * Check the (modified by chkdsk) restart page for consistency |
545 | * complete multi sector transfer deprotected restart page. | 562 | * and get a copy of the complete multi sector transfer |
563 | * deprotected restart page. | ||
546 | */ | 564 | */ |
547 | if (!ntfs_check_and_load_restart_page(log_vi, | 565 | err = ntfs_check_and_load_restart_page(log_vi, |
548 | (RESTART_PAGE_HEADER*)kaddr, pos, | 566 | (RESTART_PAGE_HEADER*)kaddr, pos, |
549 | !pos ? &rstr1_ph : &rstr2_ph)) { | 567 | !rstr1_ph ? &rstr1_ph : &rstr2_ph, |
550 | /* Error output already done inside the function. */ | 568 | !rstr1_ph ? &rstr1_lsn : &rstr2_lsn); |
551 | goto err_out; | 569 | if (!err) { |
570 | /* | ||
571 | * If we have now found the first (modified by chkdsk) | ||
572 | * restart page, continue looking for the second one. | ||
573 | */ | ||
574 | if (!pos) { | ||
575 | pos = NTFS_BLOCK_SIZE >> 1; | ||
576 | continue; | ||
577 | } | ||
578 | /* | ||
579 | * We have now found the second (modified by chkdsk) | ||
580 | * restart page, so we can stop looking. | ||
581 | */ | ||
582 | break; | ||
552 | } | 583 | } |
553 | /* | 584 | /* |
554 | * We have a valid restart page. The next one must be after | 585 | * Error output already done inside the function. Note, we do |
555 | * a whole system page size as specified by the valid restart | 586 | * not abort if the restart page was invalid as we might still |
556 | * page. | 587 | * find a valid one further in the file. |
557 | */ | 588 | */ |
589 | if (err != -EINVAL) { | ||
590 | ntfs_unmap_page(page); | ||
591 | goto err_out; | ||
592 | } | ||
593 | /* Continue looking. */ | ||
558 | if (!pos) | 594 | if (!pos) |
559 | pos = le32_to_cpu(rstr1_ph->system_page_size) >> 1; | 595 | pos = NTFS_BLOCK_SIZE >> 1; |
560 | } | 596 | } |
561 | if (page) { | 597 | if (page) |
562 | ntfs_unmap_page(page); | 598 | ntfs_unmap_page(page); |
563 | page = NULL; | ||
564 | } | ||
565 | if (logfile_is_empty) { | 599 | if (logfile_is_empty) { |
566 | NVolSetLogFileEmpty(vol); | 600 | NVolSetLogFileEmpty(vol); |
567 | is_empty: | 601 | is_empty: |
568 | ntfs_debug("Done. ($LogFile is empty.)"); | 602 | ntfs_debug("Done. ($LogFile is empty.)"); |
569 | return TRUE; | 603 | return TRUE; |
570 | } | 604 | } |
571 | if (!rstr1_found || !rstr2_found) { | 605 | if (!rstr1_ph) { |
572 | ntfs_error(vol->sb, "Did not find two restart pages in " | 606 | BUG_ON(rstr2_ph); |
573 | "$LogFile."); | 607 | ntfs_error(vol->sb, "Did not find any restart pages in " |
574 | goto err_out; | 608 | "$LogFile and it was not empty."); |
609 | return FALSE; | ||
610 | } | ||
611 | /* If both restart pages were found, use the more recent one. */ | ||
612 | if (rstr2_ph) { | ||
613 | /* | ||
614 | * If the second restart area is more recent, switch to it. | ||
615 | * Otherwise just throw it away. | ||
616 | */ | ||
617 | if (rstr2_lsn > rstr1_lsn) { | ||
618 | ntfs_free(rstr1_ph); | ||
619 | rstr1_ph = rstr2_ph; | ||
620 | /* rstr1_lsn = rstr2_lsn; */ | ||
621 | } else | ||
622 | ntfs_free(rstr2_ph); | ||
623 | rstr2_ph = NULL; | ||
575 | } | 624 | } |
576 | /* | ||
577 | * The two restart areas must be identical except for the update | ||
578 | * sequence number. | ||
579 | */ | ||
580 | ofs = le16_to_cpu(rstr1_ph->usa_ofs); | ||
581 | if (memcmp(rstr1_ph, rstr2_ph, ofs) || (ofs += sizeof(u16), | ||
582 | memcmp((u8*)rstr1_ph + ofs, (u8*)rstr2_ph + ofs, | ||
583 | le32_to_cpu(rstr1_ph->system_page_size) - ofs))) { | ||
584 | ntfs_error(vol->sb, "The two restart pages in $LogFile do not " | ||
585 | "match."); | ||
586 | goto err_out; | ||
587 | } | ||
588 | ntfs_free(rstr1_ph); | ||
589 | ntfs_free(rstr2_ph); | ||
590 | /* All consistency checks passed. */ | 625 | /* All consistency checks passed. */ |
626 | if (rp) | ||
627 | *rp = rstr1_ph; | ||
628 | else | ||
629 | ntfs_free(rstr1_ph); | ||
591 | ntfs_debug("Done."); | 630 | ntfs_debug("Done."); |
592 | return TRUE; | 631 | return TRUE; |
593 | err_out: | 632 | err_out: |
594 | if (page) | ||
595 | ntfs_unmap_page(page); | ||
596 | if (rstr1_ph) | 633 | if (rstr1_ph) |
597 | ntfs_free(rstr1_ph); | 634 | ntfs_free(rstr1_ph); |
598 | if (rstr2_ph) | ||
599 | ntfs_free(rstr2_ph); | ||
600 | return FALSE; | 635 | return FALSE; |
601 | } | 636 | } |
602 | 637 | ||
603 | /** | 638 | /** |
604 | * ntfs_is_logfile_clean - check in the journal if the volume is clean | 639 | * ntfs_is_logfile_clean - check in the journal if the volume is clean |
605 | * @log_vi: struct inode of loaded journal $LogFile to check | 640 | * @log_vi: struct inode of loaded journal $LogFile to check |
641 | * @rp: copy of the current restart page | ||
606 | * | 642 | * |
607 | * Analyze the $LogFile journal and return TRUE if it indicates the volume was | 643 | * Analyze the $LogFile journal and return TRUE if it indicates the volume was |
608 | * shutdown cleanly and FALSE if not. | 644 | * shutdown cleanly and FALSE if not. |
@@ -619,11 +655,9 @@ err_out: | |||
619 | * is empty this function requires that NVolLogFileEmpty() is true otherwise an | 655 | * is empty this function requires that NVolLogFileEmpty() is true otherwise an |
620 | * empty volume will be reported as dirty. | 656 | * empty volume will be reported as dirty. |
621 | */ | 657 | */ |
622 | BOOL ntfs_is_logfile_clean(struct inode *log_vi) | 658 | BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp) |
623 | { | 659 | { |
624 | ntfs_volume *vol = NTFS_SB(log_vi->i_sb); | 660 | ntfs_volume *vol = NTFS_SB(log_vi->i_sb); |
625 | struct page *page; | ||
626 | RESTART_PAGE_HEADER *rp; | ||
627 | RESTART_AREA *ra; | 661 | RESTART_AREA *ra; |
628 | 662 | ||
629 | ntfs_debug("Entering."); | 663 | ntfs_debug("Entering."); |
@@ -632,24 +666,15 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi) | |||
632 | ntfs_debug("Done. ($LogFile is empty.)"); | 666 | ntfs_debug("Done. ($LogFile is empty.)"); |
633 | return TRUE; | 667 | return TRUE; |
634 | } | 668 | } |
635 | /* | 669 | BUG_ON(!rp); |
636 | * Read the first restart page. It will be possibly incomplete and | 670 | if (!ntfs_is_rstr_record(rp->magic) && |
637 | * will not be multi sector transfer deprotected but we only need the | 671 | !ntfs_is_chkd_record(rp->magic)) { |
638 | * first NTFS_BLOCK_SIZE bytes so it does not matter. | 672 | ntfs_error(vol->sb, "Restart page buffer is invalid. This is " |
639 | */ | 673 | "probably a bug in that the $LogFile should " |
640 | page = ntfs_map_page(log_vi->i_mapping, 0); | 674 | "have been consistency checked before calling " |
641 | if (IS_ERR(page)) { | 675 | "this function."); |
642 | ntfs_error(vol->sb, "Error mapping $LogFile page (index 0)."); | ||
643 | return FALSE; | 676 | return FALSE; |
644 | } | 677 | } |
645 | rp = (RESTART_PAGE_HEADER*)page_address(page); | ||
646 | if (!ntfs_is_rstr_record(rp->magic)) { | ||
647 | ntfs_error(vol->sb, "No restart page found at offset zero in " | ||
648 | "$LogFile. This is probably a bug in that " | ||
649 | "the $LogFile should have been consistency " | ||
650 | "checked before calling this function."); | ||
651 | goto err_out; | ||
652 | } | ||
653 | ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); | 678 | ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); |
654 | /* | 679 | /* |
655 | * If the $LogFile has active clients, i.e. it is open, and we do not | 680 | * If the $LogFile has active clients, i.e. it is open, and we do not |
@@ -659,15 +684,11 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi) | |||
659 | if (ra->client_in_use_list != LOGFILE_NO_CLIENT && | 684 | if (ra->client_in_use_list != LOGFILE_NO_CLIENT && |
660 | !(ra->flags & RESTART_VOLUME_IS_CLEAN)) { | 685 | !(ra->flags & RESTART_VOLUME_IS_CLEAN)) { |
661 | ntfs_debug("Done. $LogFile indicates a dirty shutdown."); | 686 | ntfs_debug("Done. $LogFile indicates a dirty shutdown."); |
662 | goto err_out; | 687 | return FALSE; |
663 | } | 688 | } |
664 | ntfs_unmap_page(page); | ||
665 | /* $LogFile indicates a clean shutdown. */ | 689 | /* $LogFile indicates a clean shutdown. */ |
666 | ntfs_debug("Done. $LogFile indicates a clean shutdown."); | 690 | ntfs_debug("Done. $LogFile indicates a clean shutdown."); |
667 | return TRUE; | 691 | return TRUE; |
668 | err_out: | ||
669 | ntfs_unmap_page(page); | ||
670 | return FALSE; | ||
671 | } | 692 | } |
672 | 693 | ||
673 | /** | 694 | /** |
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h index 4ee4378de061..42388f95ea6d 100644 --- a/fs/ntfs/logfile.h +++ b/fs/ntfs/logfile.h | |||
@@ -2,7 +2,7 @@ | |||
2 | * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of | 2 | * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of |
3 | * the Linux-NTFS project. | 3 | * the Linux-NTFS project. |
4 | * | 4 | * |
5 | * Copyright (c) 2000-2004 Anton Altaparmakov | 5 | * Copyright (c) 2000-2005 Anton Altaparmakov |
6 | * | 6 | * |
7 | * This program/include file is free software; you can redistribute it and/or | 7 | * This program/include file is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU General Public License as published | 8 | * modify it under the terms of the GNU General Public License as published |
@@ -296,9 +296,11 @@ typedef struct { | |||
296 | /* sizeof() = 160 (0xa0) bytes */ | 296 | /* sizeof() = 160 (0xa0) bytes */ |
297 | } __attribute__ ((__packed__)) LOG_CLIENT_RECORD; | 297 | } __attribute__ ((__packed__)) LOG_CLIENT_RECORD; |
298 | 298 | ||
299 | extern BOOL ntfs_check_logfile(struct inode *log_vi); | 299 | extern BOOL ntfs_check_logfile(struct inode *log_vi, |
300 | RESTART_PAGE_HEADER **rp); | ||
300 | 301 | ||
301 | extern BOOL ntfs_is_logfile_clean(struct inode *log_vi); | 302 | extern BOOL ntfs_is_logfile_clean(struct inode *log_vi, |
303 | const RESTART_PAGE_HEADER *rp); | ||
302 | 304 | ||
303 | extern BOOL ntfs_empty_logfile(struct inode *log_vi); | 305 | extern BOOL ntfs_empty_logfile(struct inode *log_vi); |
304 | 306 | ||
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index fac5944df6d8..9994e019a3cf 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h | |||
@@ -27,27 +27,63 @@ | |||
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | 28 | ||
29 | /** | 29 | /** |
30 | * ntfs_malloc_nofs - allocate memory in multiples of pages | 30 | * __ntfs_malloc - allocate memory in multiples of pages |
31 | * @size number of bytes to allocate | 31 | * @size: number of bytes to allocate |
32 | * @gfp_mask: extra flags for the allocator | ||
33 | * | ||
34 | * Internal function. You probably want ntfs_malloc_nofs()... | ||
32 | * | 35 | * |
33 | * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and | 36 | * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and |
34 | * returns a pointer to the allocated memory. | 37 | * returns a pointer to the allocated memory. |
35 | * | 38 | * |
36 | * If there was insufficient memory to complete the request, return NULL. | 39 | * If there was insufficient memory to complete the request, return NULL. |
40 | * Depending on @gfp_mask the allocation may be guaranteed to succeed. | ||
37 | */ | 41 | */ |
38 | static inline void *ntfs_malloc_nofs(unsigned long size) | 42 | static inline void *__ntfs_malloc(unsigned long size, |
43 | unsigned int __nocast gfp_mask) | ||
39 | { | 44 | { |
40 | if (likely(size <= PAGE_SIZE)) { | 45 | if (likely(size <= PAGE_SIZE)) { |
41 | BUG_ON(!size); | 46 | BUG_ON(!size); |
42 | /* kmalloc() has per-CPU caches so is faster for now. */ | 47 | /* kmalloc() has per-CPU caches so is faster for now. */ |
43 | return kmalloc(PAGE_SIZE, GFP_NOFS); | 48 | return kmalloc(PAGE_SIZE, gfp_mask); |
44 | /* return (void *)__get_free_page(GFP_NOFS | __GFP_HIGHMEM); */ | 49 | /* return (void *)__get_free_page(gfp_mask); */ |
45 | } | 50 | } |
46 | if (likely(size >> PAGE_SHIFT < num_physpages)) | 51 | if (likely(size >> PAGE_SHIFT < num_physpages)) |
47 | return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL); | 52 | return __vmalloc(size, gfp_mask, PAGE_KERNEL); |
48 | return NULL; | 53 | return NULL; |
49 | } | 54 | } |
50 | 55 | ||
56 | /** | ||
57 | * ntfs_malloc_nofs - allocate memory in multiples of pages | ||
58 | * @size: number of bytes to allocate | ||
59 | * | ||
60 | * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and | ||
61 | * returns a pointer to the allocated memory. | ||
62 | * | ||
63 | * If there was insufficient memory to complete the request, return NULL. | ||
64 | */ | ||
65 | static inline void *ntfs_malloc_nofs(unsigned long size) | ||
66 | { | ||
67 | return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM); | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * ntfs_malloc_nofs_nofail - allocate memory in multiples of pages | ||
72 | * @size: number of bytes to allocate | ||
73 | * | ||
74 | * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and | ||
75 | * returns a pointer to the allocated memory. | ||
76 | * | ||
77 | * This function guarantees that the allocation will succeed. It will sleep | ||
78 | * for as long as it takes to complete the allocation. | ||
79 | * | ||
80 | * If there was insufficient memory to complete the request, return NULL. | ||
81 | */ | ||
82 | static inline void *ntfs_malloc_nofs_nofail(unsigned long size) | ||
83 | { | ||
84 | return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOFAIL); | ||
85 | } | ||
86 | |||
51 | static inline void ntfs_free(void *addr) | 87 | static inline void ntfs_free(void *addr) |
52 | { | 88 | { |
53 | if (likely(((unsigned long)addr < VMALLOC_START) || | 89 | if (likely(((unsigned long)addr < VMALLOC_START) || |
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 317f7c679fd3..2c32b84385a8 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c | |||
@@ -511,7 +511,6 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, | |||
511 | } while (bh); | 511 | } while (bh); |
512 | tail->b_this_page = head; | 512 | tail->b_this_page = head; |
513 | attach_page_buffers(page, head); | 513 | attach_page_buffers(page, head); |
514 | BUG_ON(!page_has_buffers(page)); | ||
515 | } | 514 | } |
516 | bh = head = page_buffers(page); | 515 | bh = head = page_buffers(page); |
517 | BUG_ON(!bh); | 516 | BUG_ON(!bh); |
@@ -692,7 +691,6 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) | |||
692 | */ | 691 | */ |
693 | if (!NInoTestClearDirty(ni)) | 692 | if (!NInoTestClearDirty(ni)) |
694 | goto done; | 693 | goto done; |
695 | BUG_ON(!page_has_buffers(page)); | ||
696 | bh = head = page_buffers(page); | 694 | bh = head = page_buffers(page); |
697 | BUG_ON(!bh); | 695 | BUG_ON(!bh); |
698 | rl = NULL; | 696 | rl = NULL; |
@@ -1955,7 +1953,7 @@ restore_undo_alloc: | |||
1955 | a = ctx->attr; | 1953 | a = ctx->attr; |
1956 | a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); | 1954 | a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); |
1957 | undo_alloc: | 1955 | undo_alloc: |
1958 | if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1) < 0) { | 1956 | if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1, TRUE) < 0) { |
1959 | ntfs_error(vol->sb, "Failed to free clusters from mft data " | 1957 | ntfs_error(vol->sb, "Failed to free clusters from mft data " |
1960 | "attribute.%s", es); | 1958 | "attribute.%s", es); |
1961 | NVolSetErrors(vol); | 1959 | NVolSetErrors(vol); |
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index 758855b0414e..f5b2ac929081 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c | |||
@@ -35,7 +35,7 @@ static inline void ntfs_rl_mm(runlist_element *base, int dst, int src, | |||
35 | int size) | 35 | int size) |
36 | { | 36 | { |
37 | if (likely((dst != src) && (size > 0))) | 37 | if (likely((dst != src) && (size > 0))) |
38 | memmove(base + dst, base + src, size * sizeof (*base)); | 38 | memmove(base + dst, base + src, size * sizeof(*base)); |
39 | } | 39 | } |
40 | 40 | ||
41 | /** | 41 | /** |
@@ -95,6 +95,51 @@ static inline runlist_element *ntfs_rl_realloc(runlist_element *rl, | |||
95 | } | 95 | } |
96 | 96 | ||
97 | /** | 97 | /** |
98 | * ntfs_rl_realloc_nofail - Reallocate memory for runlists | ||
99 | * @rl: original runlist | ||
100 | * @old_size: number of runlist elements in the original runlist @rl | ||
101 | * @new_size: number of runlist elements we need space for | ||
102 | * | ||
103 | * As the runlists grow, more memory will be required. To prevent the | ||
104 | * kernel having to allocate and reallocate large numbers of small bits of | ||
105 | * memory, this function returns an entire page of memory. | ||
106 | * | ||
107 | * This function guarantees that the allocation will succeed. It will sleep | ||
108 | * for as long as it takes to complete the allocation. | ||
109 | * | ||
110 | * It is up to the caller to serialize access to the runlist @rl. | ||
111 | * | ||
112 | * N.B. If the new allocation doesn't require a different number of pages in | ||
113 | * memory, the function will return the original pointer. | ||
114 | * | ||
115 | * On success, return a pointer to the newly allocated, or recycled, memory. | ||
116 | * On error, return -errno. The following error codes are defined: | ||
117 | * -ENOMEM - Not enough memory to allocate runlist array. | ||
118 | * -EINVAL - Invalid parameters were passed in. | ||
119 | */ | ||
120 | static inline runlist_element *ntfs_rl_realloc_nofail(runlist_element *rl, | ||
121 | int old_size, int new_size) | ||
122 | { | ||
123 | runlist_element *new_rl; | ||
124 | |||
125 | old_size = PAGE_ALIGN(old_size * sizeof(*rl)); | ||
126 | new_size = PAGE_ALIGN(new_size * sizeof(*rl)); | ||
127 | if (old_size == new_size) | ||
128 | return rl; | ||
129 | |||
130 | new_rl = ntfs_malloc_nofs_nofail(new_size); | ||
131 | BUG_ON(!new_rl); | ||
132 | |||
133 | if (likely(rl != NULL)) { | ||
134 | if (unlikely(old_size > new_size)) | ||
135 | old_size = new_size; | ||
136 | memcpy(new_rl, rl, old_size); | ||
137 | ntfs_free(rl); | ||
138 | } | ||
139 | return new_rl; | ||
140 | } | ||
141 | |||
142 | /** | ||
98 | * ntfs_are_rl_mergeable - test if two runlists can be joined together | 143 | * ntfs_are_rl_mergeable - test if two runlists can be joined together |
99 | * @dst: original runlist | 144 | * @dst: original runlist |
100 | * @src: new runlist to test for mergeability with @dst | 145 | * @src: new runlist to test for mergeability with @dst |
@@ -497,6 +542,7 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, | |||
497 | /* Scan to the end of the source runlist. */ | 542 | /* Scan to the end of the source runlist. */ |
498 | for (dend = 0; likely(drl[dend].length); dend++) | 543 | for (dend = 0; likely(drl[dend].length); dend++) |
499 | ; | 544 | ; |
545 | dend++; | ||
500 | drl = ntfs_rl_realloc(drl, dend, dend + 1); | 546 | drl = ntfs_rl_realloc(drl, dend, dend + 1); |
501 | if (IS_ERR(drl)) | 547 | if (IS_ERR(drl)) |
502 | return drl; | 548 | return drl; |
@@ -566,8 +612,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, | |||
566 | ((drl[dins].vcn + drl[dins].length) <= /* End of hole */ | 612 | ((drl[dins].vcn + drl[dins].length) <= /* End of hole */ |
567 | (srl[send - 1].vcn + srl[send - 1].length))); | 613 | (srl[send - 1].vcn + srl[send - 1].length))); |
568 | 614 | ||
569 | /* Or we'll lose an end marker */ | 615 | /* Or we will lose an end marker. */ |
570 | if (start && finish && (drl[dins].length == 0)) | 616 | if (finish && !drl[dins].length) |
571 | ss++; | 617 | ss++; |
572 | if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn)) | 618 | if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn)) |
573 | finish = FALSE; | 619 | finish = FALSE; |
@@ -621,11 +667,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, | |||
621 | if (drl[ds].lcn != LCN_RL_NOT_MAPPED) { | 667 | if (drl[ds].lcn != LCN_RL_NOT_MAPPED) { |
622 | /* Add an unmapped runlist element. */ | 668 | /* Add an unmapped runlist element. */ |
623 | if (!slots) { | 669 | if (!slots) { |
624 | /* FIXME/TODO: We need to have the | 670 | drl = ntfs_rl_realloc_nofail(drl, ds, |
625 | * extra memory already! (AIA) */ | 671 | ds + 2); |
626 | drl = ntfs_rl_realloc(drl, ds, ds + 2); | ||
627 | if (!drl) | ||
628 | goto critical_error; | ||
629 | slots = 2; | 672 | slots = 2; |
630 | } | 673 | } |
631 | ds++; | 674 | ds++; |
@@ -640,13 +683,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, | |||
640 | drl[ds].length = marker_vcn - drl[ds].vcn; | 683 | drl[ds].length = marker_vcn - drl[ds].vcn; |
641 | /* Finally add the ENOENT terminator. */ | 684 | /* Finally add the ENOENT terminator. */ |
642 | ds++; | 685 | ds++; |
643 | if (!slots) { | 686 | if (!slots) |
644 | /* FIXME/TODO: We need to have the extra | 687 | drl = ntfs_rl_realloc_nofail(drl, ds, ds + 1); |
645 | * memory already! (AIA) */ | ||
646 | drl = ntfs_rl_realloc(drl, ds, ds + 1); | ||
647 | if (!drl) | ||
648 | goto critical_error; | ||
649 | } | ||
650 | drl[ds].vcn = marker_vcn; | 688 | drl[ds].vcn = marker_vcn; |
651 | drl[ds].lcn = LCN_ENOENT; | 689 | drl[ds].lcn = LCN_ENOENT; |
652 | drl[ds].length = (s64)0; | 690 | drl[ds].length = (s64)0; |
@@ -659,11 +697,6 @@ finished: | |||
659 | ntfs_debug("Merged runlist:"); | 697 | ntfs_debug("Merged runlist:"); |
660 | ntfs_debug_dump_runlist(drl); | 698 | ntfs_debug_dump_runlist(drl); |
661 | return drl; | 699 | return drl; |
662 | |||
663 | critical_error: | ||
664 | /* Critical error! We cannot afford to fail here. */ | ||
665 | ntfs_error(NULL, "Critical error! Not enough memory."); | ||
666 | panic("NTFS: Cannot continue."); | ||
667 | } | 700 | } |
668 | 701 | ||
669 | /** | 702 | /** |
@@ -727,6 +760,9 @@ runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol, | |||
727 | ntfs_error(vol->sb, "Corrupt attribute."); | 760 | ntfs_error(vol->sb, "Corrupt attribute."); |
728 | return ERR_PTR(-EIO); | 761 | return ERR_PTR(-EIO); |
729 | } | 762 | } |
763 | /* If the mapping pairs array is valid but empty, nothing to do. */ | ||
764 | if (!vcn && !*buf) | ||
765 | return old_rl; | ||
730 | /* Current position in runlist array. */ | 766 | /* Current position in runlist array. */ |
731 | rlpos = 0; | 767 | rlpos = 0; |
732 | /* Allocate first page and set current runlist size to one page. */ | 768 | /* Allocate first page and set current runlist size to one page. */ |
@@ -1419,6 +1455,7 @@ err_out: | |||
1419 | 1455 | ||
1420 | /** | 1456 | /** |
1421 | * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn | 1457 | * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn |
1458 | * @vol: ntfs volume (needed for error output) | ||
1422 | * @runlist: runlist to truncate | 1459 | * @runlist: runlist to truncate |
1423 | * @new_length: the new length of the runlist in VCNs | 1460 | * @new_length: the new length of the runlist in VCNs |
1424 | * | 1461 | * |
@@ -1426,12 +1463,16 @@ err_out: | |||
1426 | * holding the runlist elements to a length of @new_length VCNs. | 1463 | * holding the runlist elements to a length of @new_length VCNs. |
1427 | * | 1464 | * |
1428 | * If @new_length lies within the runlist, the runlist elements with VCNs of | 1465 | * If @new_length lies within the runlist, the runlist elements with VCNs of |
1429 | * @new_length and above are discarded. | 1466 | * @new_length and above are discarded. As a special case if @new_length is |
1467 | * zero, the runlist is discarded and set to NULL. | ||
1430 | * | 1468 | * |
1431 | * If @new_length lies beyond the runlist, a sparse runlist element is added to | 1469 | * If @new_length lies beyond the runlist, a sparse runlist element is added to |
1432 | * the end of the runlist @runlist or if the last runlist element is a sparse | 1470 | * the end of the runlist @runlist or if the last runlist element is a sparse |
1433 | * one already, this is extended. | 1471 | * one already, this is extended. |
1434 | * | 1472 | * |
1473 | * Note, no checking is done for unmapped runlist elements. It is assumed that | ||
1474 | * the caller has mapped any elements that need to be mapped already. | ||
1475 | * | ||
1435 | * Return 0 on success and -errno on error. | 1476 | * Return 0 on success and -errno on error. |
1436 | * | 1477 | * |
1437 | * Locking: The caller must hold @runlist->lock for writing. | 1478 | * Locking: The caller must hold @runlist->lock for writing. |
@@ -1446,6 +1487,13 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, | |||
1446 | BUG_ON(!runlist); | 1487 | BUG_ON(!runlist); |
1447 | BUG_ON(new_length < 0); | 1488 | BUG_ON(new_length < 0); |
1448 | rl = runlist->rl; | 1489 | rl = runlist->rl; |
1490 | if (!new_length) { | ||
1491 | ntfs_debug("Freeing runlist."); | ||
1492 | runlist->rl = NULL; | ||
1493 | if (rl) | ||
1494 | ntfs_free(rl); | ||
1495 | return 0; | ||
1496 | } | ||
1449 | if (unlikely(!rl)) { | 1497 | if (unlikely(!rl)) { |
1450 | /* | 1498 | /* |
1451 | * Create a runlist consisting of a sparse runlist element of | 1499 | * Create a runlist consisting of a sparse runlist element of |
@@ -1553,4 +1601,288 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, | |||
1553 | return 0; | 1601 | return 0; |
1554 | } | 1602 | } |
1555 | 1603 | ||
1604 | /** | ||
1605 | * ntfs_rl_punch_nolock - punch a hole into a runlist | ||
1606 | * @vol: ntfs volume (needed for error output) | ||
1607 | * @runlist: runlist to punch a hole into | ||
1608 | * @start: starting VCN of the hole to be created | ||
1609 | * @length: size of the hole to be created in units of clusters | ||
1610 | * | ||
1611 | * Punch a hole into the runlist @runlist starting at VCN @start and of size | ||
1612 | * @length clusters. | ||
1613 | * | ||
1614 | * Return 0 on success and -errno on error, in which case @runlist has not been | ||
1615 | * modified. | ||
1616 | * | ||
1617 | * If @start and/or @start + @length are outside the runlist return error code | ||
1618 | * -ENOENT. | ||
1619 | * | ||
1620 | * If the runlist contains unmapped or error elements between @start and @start | ||
1621 | * + @length return error code -EINVAL. | ||
1622 | * | ||
1623 | * Locking: The caller must hold @runlist->lock for writing. | ||
1624 | */ | ||
1625 | int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist, | ||
1626 | const VCN start, const s64 length) | ||
1627 | { | ||
1628 | const VCN end = start + length; | ||
1629 | s64 delta; | ||
1630 | runlist_element *rl, *rl_end, *rl_real_end, *trl; | ||
1631 | int old_size; | ||
1632 | BOOL lcn_fixup = FALSE; | ||
1633 | |||
1634 | ntfs_debug("Entering for start 0x%llx, length 0x%llx.", | ||
1635 | (long long)start, (long long)length); | ||
1636 | BUG_ON(!runlist); | ||
1637 | BUG_ON(start < 0); | ||
1638 | BUG_ON(length < 0); | ||
1639 | BUG_ON(end < 0); | ||
1640 | rl = runlist->rl; | ||
1641 | if (unlikely(!rl)) { | ||
1642 | if (likely(!start && !length)) | ||
1643 | return 0; | ||
1644 | return -ENOENT; | ||
1645 | } | ||
1646 | /* Find @start in the runlist. */ | ||
1647 | while (likely(rl->length && start >= rl[1].vcn)) | ||
1648 | rl++; | ||
1649 | rl_end = rl; | ||
1650 | /* Find @end in the runlist. */ | ||
1651 | while (likely(rl_end->length && end >= rl_end[1].vcn)) { | ||
1652 | /* Verify there are no unmapped or error elements. */ | ||
1653 | if (unlikely(rl_end->lcn < LCN_HOLE)) | ||
1654 | return -EINVAL; | ||
1655 | rl_end++; | ||
1656 | } | ||
1657 | /* Check the last element. */ | ||
1658 | if (unlikely(rl_end->length && rl_end->lcn < LCN_HOLE)) | ||
1659 | return -EINVAL; | ||
1660 | /* This covers @start being out of bounds, too. */ | ||
1661 | if (!rl_end->length && end > rl_end->vcn) | ||
1662 | return -ENOENT; | ||
1663 | if (!length) | ||
1664 | return 0; | ||
1665 | if (!rl->length) | ||
1666 | return -ENOENT; | ||
1667 | rl_real_end = rl_end; | ||
1668 | /* Determine the runlist size. */ | ||
1669 | while (likely(rl_real_end->length)) | ||
1670 | rl_real_end++; | ||
1671 | old_size = rl_real_end - runlist->rl + 1; | ||
1672 | /* If @start is in a hole simply extend the hole. */ | ||
1673 | if (rl->lcn == LCN_HOLE) { | ||
1674 | /* | ||
1675 | * If both @start and @end are in the same sparse run, we are | ||
1676 | * done. | ||
1677 | */ | ||
1678 | if (end <= rl[1].vcn) { | ||
1679 | ntfs_debug("Done (requested hole is already sparse)."); | ||
1680 | return 0; | ||
1681 | } | ||
1682 | extend_hole: | ||
1683 | /* Extend the hole. */ | ||
1684 | rl->length = end - rl->vcn; | ||
1685 | /* If @end is in a hole, merge it with the current one. */ | ||
1686 | if (rl_end->lcn == LCN_HOLE) { | ||
1687 | rl_end++; | ||
1688 | rl->length = rl_end->vcn - rl->vcn; | ||
1689 | } | ||
1690 | /* We have done the hole. Now deal with the remaining tail. */ | ||
1691 | rl++; | ||
1692 | /* Cut out all runlist elements up to @end. */ | ||
1693 | if (rl < rl_end) | ||
1694 | memmove(rl, rl_end, (rl_real_end - rl_end + 1) * | ||
1695 | sizeof(*rl)); | ||
1696 | /* Adjust the beginning of the tail if necessary. */ | ||
1697 | if (end > rl->vcn) { | ||
1698 | s64 delta = end - rl->vcn; | ||
1699 | rl->vcn = end; | ||
1700 | rl->length -= delta; | ||
1701 | /* Only adjust the lcn if it is real. */ | ||
1702 | if (rl->lcn >= 0) | ||
1703 | rl->lcn += delta; | ||
1704 | } | ||
1705 | shrink_allocation: | ||
1706 | /* Reallocate memory if the allocation changed. */ | ||
1707 | if (rl < rl_end) { | ||
1708 | rl = ntfs_rl_realloc(runlist->rl, old_size, | ||
1709 | old_size - (rl_end - rl)); | ||
1710 | if (IS_ERR(rl)) | ||
1711 | ntfs_warning(vol->sb, "Failed to shrink " | ||
1712 | "runlist buffer. This just " | ||
1713 | "wastes a bit of memory " | ||
1714 | "temporarily so we ignore it " | ||
1715 | "and return success."); | ||
1716 | else | ||
1717 | runlist->rl = rl; | ||
1718 | } | ||
1719 | ntfs_debug("Done (extend hole)."); | ||
1720 | return 0; | ||
1721 | } | ||
1722 | /* | ||
1723 | * If @start is at the beginning of a run things are easier as there is | ||
1724 | * no need to split the first run. | ||
1725 | */ | ||
1726 | if (start == rl->vcn) { | ||
1727 | /* | ||
1728 | * @start is at the beginning of a run. | ||
1729 | * | ||
1730 | * If the previous run is sparse, extend its hole. | ||
1731 | * | ||
1732 | * If @end is not in the same run, switch the run to be sparse | ||
1733 | * and extend the newly created hole. | ||
1734 | * | ||
1735 | * Thus both of these cases reduce the problem to the above | ||
1736 | * case of "@start is in a hole". | ||
1737 | */ | ||
1738 | if (rl > runlist->rl && (rl - 1)->lcn == LCN_HOLE) { | ||
1739 | rl--; | ||
1740 | goto extend_hole; | ||
1741 | } | ||
1742 | if (end >= rl[1].vcn) { | ||
1743 | rl->lcn = LCN_HOLE; | ||
1744 | goto extend_hole; | ||
1745 | } | ||
1746 | /* | ||
1747 | * The final case is when @end is in the same run as @start. | ||
1748 | * For this need to split the run into two. One run for the | ||
1749 | * sparse region between the beginning of the old run, i.e. | ||
1750 | * @start, and @end and one for the remaining non-sparse | ||
1751 | * region, i.e. between @end and the end of the old run. | ||
1752 | */ | ||
1753 | trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1); | ||
1754 | if (IS_ERR(trl)) | ||
1755 | goto enomem_out; | ||
1756 | old_size++; | ||
1757 | if (runlist->rl != trl) { | ||
1758 | rl = trl + (rl - runlist->rl); | ||
1759 | rl_end = trl + (rl_end - runlist->rl); | ||
1760 | rl_real_end = trl + (rl_real_end - runlist->rl); | ||
1761 | runlist->rl = trl; | ||
1762 | } | ||
1763 | split_end: | ||
1764 | /* Shift all the runs up by one. */ | ||
1765 | memmove(rl + 1, rl, (rl_real_end - rl + 1) * sizeof(*rl)); | ||
1766 | /* Finally, setup the two split runs. */ | ||
1767 | rl->lcn = LCN_HOLE; | ||
1768 | rl->length = length; | ||
1769 | rl++; | ||
1770 | rl->vcn += length; | ||
1771 | /* Only adjust the lcn if it is real. */ | ||
1772 | if (rl->lcn >= 0 || lcn_fixup) | ||
1773 | rl->lcn += length; | ||
1774 | rl->length -= length; | ||
1775 | ntfs_debug("Done (split one)."); | ||
1776 | return 0; | ||
1777 | } | ||
1778 | /* | ||
1779 | * @start is neither in a hole nor at the beginning of a run. | ||
1780 | * | ||
1781 | * If @end is in a hole, things are easier as simply truncating the run | ||
1782 | * @start is in to end at @start - 1, deleting all runs after that up | ||
1783 | * to @end, and finally extending the beginning of the run @end is in | ||
1784 | * to be @start is all that is needed. | ||
1785 | */ | ||
1786 | if (rl_end->lcn == LCN_HOLE) { | ||
1787 | /* Truncate the run containing @start. */ | ||
1788 | rl->length = start - rl->vcn; | ||
1789 | rl++; | ||
1790 | /* Cut out all runlist elements up to @end. */ | ||
1791 | if (rl < rl_end) | ||
1792 | memmove(rl, rl_end, (rl_real_end - rl_end + 1) * | ||
1793 | sizeof(*rl)); | ||
1794 | /* Extend the beginning of the run @end is in to be @start. */ | ||
1795 | rl->vcn = start; | ||
1796 | rl->length = rl[1].vcn - start; | ||
1797 | goto shrink_allocation; | ||
1798 | } | ||
1799 | /* | ||
1800 | * If @end is not in a hole there are still two cases to distinguish. | ||
1801 | * Either @end is or is not in the same run as @start. | ||
1802 | * | ||
1803 | * The second case is easier as it can be reduced to an already solved | ||
1804 | * problem by truncating the run @start is in to end at @start - 1. | ||
1805 | * Then, if @end is in the next run need to split the run into a sparse | ||
1806 | * run followed by a non-sparse run (already covered above) and if @end | ||
1807 | * is not in the next run switching it to be sparse, again reduces the | ||
1808 | * problem to the already covered case of "@start is in a hole". | ||
1809 | */ | ||
1810 | if (end >= rl[1].vcn) { | ||
1811 | /* | ||
1812 | * If @end is not in the next run, reduce the problem to the | ||
1813 | * case of "@start is in a hole". | ||
1814 | */ | ||
1815 | if (rl[1].length && end >= rl[2].vcn) { | ||
1816 | /* Truncate the run containing @start. */ | ||
1817 | rl->length = start - rl->vcn; | ||
1818 | rl++; | ||
1819 | rl->vcn = start; | ||
1820 | rl->lcn = LCN_HOLE; | ||
1821 | goto extend_hole; | ||
1822 | } | ||
1823 | trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1); | ||
1824 | if (IS_ERR(trl)) | ||
1825 | goto enomem_out; | ||
1826 | old_size++; | ||
1827 | if (runlist->rl != trl) { | ||
1828 | rl = trl + (rl - runlist->rl); | ||
1829 | rl_end = trl + (rl_end - runlist->rl); | ||
1830 | rl_real_end = trl + (rl_real_end - runlist->rl); | ||
1831 | runlist->rl = trl; | ||
1832 | } | ||
1833 | /* Truncate the run containing @start. */ | ||
1834 | rl->length = start - rl->vcn; | ||
1835 | rl++; | ||
1836 | /* | ||
1837 | * @end is in the next run, reduce the problem to the case | ||
1838 | * where "@start is at the beginning of a run and @end is in | ||
1839 | * the same run as @start". | ||
1840 | */ | ||
1841 | delta = rl->vcn - start; | ||
1842 | rl->vcn = start; | ||
1843 | if (rl->lcn >= 0) { | ||
1844 | rl->lcn -= delta; | ||
1845 | /* Need this in case the lcn just became negative. */ | ||
1846 | lcn_fixup = TRUE; | ||
1847 | } | ||
1848 | rl->length += delta; | ||
1849 | goto split_end; | ||
1850 | } | ||
1851 | /* | ||
1852 | * The first case from above, i.e. @end is in the same run as @start. | ||
1853 | * We need to split the run into three. One run for the non-sparse | ||
1854 | * region between the beginning of the old run and @start, one for the | ||
1855 | * sparse region between @start and @end, and one for the remaining | ||
1856 | * non-sparse region, i.e. between @end and the end of the old run. | ||
1857 | */ | ||
1858 | trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 2); | ||
1859 | if (IS_ERR(trl)) | ||
1860 | goto enomem_out; | ||
1861 | old_size += 2; | ||
1862 | if (runlist->rl != trl) { | ||
1863 | rl = trl + (rl - runlist->rl); | ||
1864 | rl_end = trl + (rl_end - runlist->rl); | ||
1865 | rl_real_end = trl + (rl_real_end - runlist->rl); | ||
1866 | runlist->rl = trl; | ||
1867 | } | ||
1868 | /* Shift all the runs up by two. */ | ||
1869 | memmove(rl + 2, rl, (rl_real_end - rl + 1) * sizeof(*rl)); | ||
1870 | /* Finally, setup the three split runs. */ | ||
1871 | rl->length = start - rl->vcn; | ||
1872 | rl++; | ||
1873 | rl->vcn = start; | ||
1874 | rl->lcn = LCN_HOLE; | ||
1875 | rl->length = length; | ||
1876 | rl++; | ||
1877 | delta = end - rl->vcn; | ||
1878 | rl->vcn = end; | ||
1879 | rl->lcn += delta; | ||
1880 | rl->length -= delta; | ||
1881 | ntfs_debug("Done (split both)."); | ||
1882 | return 0; | ||
1883 | enomem_out: | ||
1884 | ntfs_error(vol->sb, "Not enough memory to extend runlist buffer."); | ||
1885 | return -ENOMEM; | ||
1886 | } | ||
1887 | |||
1556 | #endif /* NTFS_RW */ | 1888 | #endif /* NTFS_RW */ |
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h index aa0ee6540e7c..47728fbb610b 100644 --- a/fs/ntfs/runlist.h +++ b/fs/ntfs/runlist.h | |||
@@ -94,6 +94,9 @@ extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, | |||
94 | extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol, | 94 | extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol, |
95 | runlist *const runlist, const s64 new_length); | 95 | runlist *const runlist, const s64 new_length); |
96 | 96 | ||
97 | int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist, | ||
98 | const VCN start, const s64 length); | ||
99 | |||
97 | #endif /* NTFS_RW */ | 100 | #endif /* NTFS_RW */ |
98 | 101 | ||
99 | #endif /* _LINUX_NTFS_RUNLIST_H */ | 102 | #endif /* _LINUX_NTFS_RUNLIST_H */ |
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 41aa8eb6755b..b2b392961268 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -1133,7 +1133,8 @@ mft_unmap_out: | |||
1133 | * | 1133 | * |
1134 | * Return TRUE on success or FALSE on error. | 1134 | * Return TRUE on success or FALSE on error. |
1135 | */ | 1135 | */ |
1136 | static BOOL load_and_check_logfile(ntfs_volume *vol) | 1136 | static BOOL load_and_check_logfile(ntfs_volume *vol, |
1137 | RESTART_PAGE_HEADER **rp) | ||
1137 | { | 1138 | { |
1138 | struct inode *tmp_ino; | 1139 | struct inode *tmp_ino; |
1139 | 1140 | ||
@@ -1145,7 +1146,7 @@ static BOOL load_and_check_logfile(ntfs_volume *vol) | |||
1145 | /* Caller will display error message. */ | 1146 | /* Caller will display error message. */ |
1146 | return FALSE; | 1147 | return FALSE; |
1147 | } | 1148 | } |
1148 | if (!ntfs_check_logfile(tmp_ino)) { | 1149 | if (!ntfs_check_logfile(tmp_ino, rp)) { |
1149 | iput(tmp_ino); | 1150 | iput(tmp_ino); |
1150 | /* ntfs_check_logfile() will have displayed error output. */ | 1151 | /* ntfs_check_logfile() will have displayed error output. */ |
1151 | return FALSE; | 1152 | return FALSE; |
@@ -1689,6 +1690,7 @@ static BOOL load_system_files(ntfs_volume *vol) | |||
1689 | VOLUME_INFORMATION *vi; | 1690 | VOLUME_INFORMATION *vi; |
1690 | ntfs_attr_search_ctx *ctx; | 1691 | ntfs_attr_search_ctx *ctx; |
1691 | #ifdef NTFS_RW | 1692 | #ifdef NTFS_RW |
1693 | RESTART_PAGE_HEADER *rp; | ||
1692 | int err; | 1694 | int err; |
1693 | #endif /* NTFS_RW */ | 1695 | #endif /* NTFS_RW */ |
1694 | 1696 | ||
@@ -1841,8 +1843,9 @@ get_ctx_vol_failed: | |||
1841 | * Get the inode for the logfile, check it and determine if the volume | 1843 | * Get the inode for the logfile, check it and determine if the volume |
1842 | * was shutdown cleanly. | 1844 | * was shutdown cleanly. |
1843 | */ | 1845 | */ |
1844 | if (!load_and_check_logfile(vol) || | 1846 | rp = NULL; |
1845 | !ntfs_is_logfile_clean(vol->logfile_ino)) { | 1847 | if (!load_and_check_logfile(vol, &rp) || |
1848 | !ntfs_is_logfile_clean(vol->logfile_ino, rp)) { | ||
1846 | static const char *es1a = "Failed to load $LogFile"; | 1849 | static const char *es1a = "Failed to load $LogFile"; |
1847 | static const char *es1b = "$LogFile is not clean"; | 1850 | static const char *es1b = "$LogFile is not clean"; |
1848 | static const char *es2 = ". Mount in Windows."; | 1851 | static const char *es2 = ". Mount in Windows."; |
@@ -1857,6 +1860,10 @@ get_ctx_vol_failed: | |||
1857 | "continue nor on_errors=" | 1860 | "continue nor on_errors=" |
1858 | "remount-ro was specified%s", | 1861 | "remount-ro was specified%s", |
1859 | es1, es2); | 1862 | es1, es2); |
1863 | if (vol->logfile_ino) { | ||
1864 | BUG_ON(!rp); | ||
1865 | ntfs_free(rp); | ||
1866 | } | ||
1860 | goto iput_logfile_err_out; | 1867 | goto iput_logfile_err_out; |
1861 | } | 1868 | } |
1862 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | 1869 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; |
@@ -1867,6 +1874,7 @@ get_ctx_vol_failed: | |||
1867 | /* This will prevent a read-write remount. */ | 1874 | /* This will prevent a read-write remount. */ |
1868 | NVolSetErrors(vol); | 1875 | NVolSetErrors(vol); |
1869 | } | 1876 | } |
1877 | ntfs_free(rp); | ||
1870 | #endif /* NTFS_RW */ | 1878 | #endif /* NTFS_RW */ |
1871 | /* Get the root directory inode so we can do path lookups. */ | 1879 | /* Get the root directory inode so we can do path lookups. */ |
1872 | vol->root_ino = ntfs_iget(sb, FILE_root); | 1880 | vol->root_ino = ntfs_iget(sb, FILE_root); |
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c index 19c42e231b44..a389a5a16c84 100644 --- a/fs/ntfs/unistr.c +++ b/fs/ntfs/unistr.c | |||
@@ -372,7 +372,8 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, | |||
372 | return -EINVAL; | 372 | return -EINVAL; |
373 | conversion_err: | 373 | conversion_err: |
374 | ntfs_error(vol->sb, "Unicode name contains characters that cannot be " | 374 | ntfs_error(vol->sb, "Unicode name contains characters that cannot be " |
375 | "converted to character set %s.", nls->charset); | 375 | "converted to character set %s. You might want to " |
376 | "try to use the mount option nls=utf8.", nls->charset); | ||
376 | if (ns != *outs) | 377 | if (ns != *outs) |
377 | kfree(ns); | 378 | kfree(ns); |
378 | if (wc != -ENAMETOOLONG) | 379 | if (wc != -ENAMETOOLONG) |
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/personality.h> | 24 | #include <linux/personality.h> |
25 | #include <linux/pagemap.h> | 25 | #include <linux/pagemap.h> |
26 | #include <linux/syscalls.h> | 26 | #include <linux/syscalls.h> |
27 | #include <linux/rcupdate.h> | ||
27 | 28 | ||
28 | #include <asm/unistd.h> | 29 | #include <asm/unistd.h> |
29 | 30 | ||
@@ -842,14 +843,16 @@ int get_unused_fd(void) | |||
842 | { | 843 | { |
843 | struct files_struct * files = current->files; | 844 | struct files_struct * files = current->files; |
844 | int fd, error; | 845 | int fd, error; |
846 | struct fdtable *fdt; | ||
845 | 847 | ||
846 | error = -EMFILE; | 848 | error = -EMFILE; |
847 | spin_lock(&files->file_lock); | 849 | spin_lock(&files->file_lock); |
848 | 850 | ||
849 | repeat: | 851 | repeat: |
850 | fd = find_next_zero_bit(files->open_fds->fds_bits, | 852 | fdt = files_fdtable(files); |
851 | files->max_fdset, | 853 | fd = find_next_zero_bit(fdt->open_fds->fds_bits, |
852 | files->next_fd); | 854 | fdt->max_fdset, |
855 | fdt->next_fd); | ||
853 | 856 | ||
854 | /* | 857 | /* |
855 | * N.B. For clone tasks sharing a files structure, this test | 858 | * N.B. For clone tasks sharing a files structure, this test |
@@ -872,14 +875,14 @@ repeat: | |||
872 | goto repeat; | 875 | goto repeat; |
873 | } | 876 | } |
874 | 877 | ||
875 | FD_SET(fd, files->open_fds); | 878 | FD_SET(fd, fdt->open_fds); |
876 | FD_CLR(fd, files->close_on_exec); | 879 | FD_CLR(fd, fdt->close_on_exec); |
877 | files->next_fd = fd + 1; | 880 | fdt->next_fd = fd + 1; |
878 | #if 1 | 881 | #if 1 |
879 | /* Sanity check */ | 882 | /* Sanity check */ |
880 | if (files->fd[fd] != NULL) { | 883 | if (fdt->fd[fd] != NULL) { |
881 | printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); | 884 | printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); |
882 | files->fd[fd] = NULL; | 885 | fdt->fd[fd] = NULL; |
883 | } | 886 | } |
884 | #endif | 887 | #endif |
885 | error = fd; | 888 | error = fd; |
@@ -893,9 +896,10 @@ EXPORT_SYMBOL(get_unused_fd); | |||
893 | 896 | ||
894 | static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) | 897 | static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) |
895 | { | 898 | { |
896 | __FD_CLR(fd, files->open_fds); | 899 | struct fdtable *fdt = files_fdtable(files); |
897 | if (fd < files->next_fd) | 900 | __FD_CLR(fd, fdt->open_fds); |
898 | files->next_fd = fd; | 901 | if (fd < fdt->next_fd) |
902 | fdt->next_fd = fd; | ||
899 | } | 903 | } |
900 | 904 | ||
901 | void fastcall put_unused_fd(unsigned int fd) | 905 | void fastcall put_unused_fd(unsigned int fd) |
@@ -924,25 +928,21 @@ EXPORT_SYMBOL(put_unused_fd); | |||
924 | void fastcall fd_install(unsigned int fd, struct file * file) | 928 | void fastcall fd_install(unsigned int fd, struct file * file) |
925 | { | 929 | { |
926 | struct files_struct *files = current->files; | 930 | struct files_struct *files = current->files; |
931 | struct fdtable *fdt; | ||
927 | spin_lock(&files->file_lock); | 932 | spin_lock(&files->file_lock); |
928 | if (unlikely(files->fd[fd] != NULL)) | 933 | fdt = files_fdtable(files); |
929 | BUG(); | 934 | BUG_ON(fdt->fd[fd] != NULL); |
930 | files->fd[fd] = file; | 935 | rcu_assign_pointer(fdt->fd[fd], file); |
931 | spin_unlock(&files->file_lock); | 936 | spin_unlock(&files->file_lock); |
932 | } | 937 | } |
933 | 938 | ||
934 | EXPORT_SYMBOL(fd_install); | 939 | EXPORT_SYMBOL(fd_install); |
935 | 940 | ||
936 | asmlinkage long sys_open(const char __user * filename, int flags, int mode) | 941 | long do_sys_open(const char __user *filename, int flags, int mode) |
937 | { | 942 | { |
938 | char * tmp; | 943 | char *tmp = getname(filename); |
939 | int fd; | 944 | int fd = PTR_ERR(tmp); |
940 | 945 | ||
941 | if (force_o_largefile()) | ||
942 | flags |= O_LARGEFILE; | ||
943 | |||
944 | tmp = getname(filename); | ||
945 | fd = PTR_ERR(tmp); | ||
946 | if (!IS_ERR(tmp)) { | 946 | if (!IS_ERR(tmp)) { |
947 | fd = get_unused_fd(); | 947 | fd = get_unused_fd(); |
948 | if (fd >= 0) { | 948 | if (fd >= 0) { |
@@ -959,6 +959,14 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode) | |||
959 | } | 959 | } |
960 | return fd; | 960 | return fd; |
961 | } | 961 | } |
962 | |||
963 | asmlinkage long sys_open(const char __user *filename, int flags, int mode) | ||
964 | { | ||
965 | if (force_o_largefile()) | ||
966 | flags |= O_LARGEFILE; | ||
967 | |||
968 | return do_sys_open(filename, flags, mode); | ||
969 | } | ||
962 | EXPORT_SYMBOL_GPL(sys_open); | 970 | EXPORT_SYMBOL_GPL(sys_open); |
963 | 971 | ||
964 | #ifndef __alpha__ | 972 | #ifndef __alpha__ |
@@ -1007,15 +1015,17 @@ asmlinkage long sys_close(unsigned int fd) | |||
1007 | { | 1015 | { |
1008 | struct file * filp; | 1016 | struct file * filp; |
1009 | struct files_struct *files = current->files; | 1017 | struct files_struct *files = current->files; |
1018 | struct fdtable *fdt; | ||
1010 | 1019 | ||
1011 | spin_lock(&files->file_lock); | 1020 | spin_lock(&files->file_lock); |
1012 | if (fd >= files->max_fds) | 1021 | fdt = files_fdtable(files); |
1022 | if (fd >= fdt->max_fds) | ||
1013 | goto out_unlock; | 1023 | goto out_unlock; |
1014 | filp = files->fd[fd]; | 1024 | filp = fdt->fd[fd]; |
1015 | if (!filp) | 1025 | if (!filp) |
1016 | goto out_unlock; | 1026 | goto out_unlock; |
1017 | files->fd[fd] = NULL; | 1027 | rcu_assign_pointer(fdt->fd[fd], NULL); |
1018 | FD_CLR(fd, files->close_on_exec); | 1028 | FD_CLR(fd, fdt->close_on_exec); |
1019 | __put_unused_fd(files, fd); | 1029 | __put_unused_fd(files, fd); |
1020 | spin_unlock(&files->file_lock); | 1030 | spin_unlock(&files->file_lock); |
1021 | return filp_close(filp, files); | 1031 | return filp_close(filp, files); |
@@ -39,7 +39,11 @@ void pipe_wait(struct inode * inode) | |||
39 | { | 39 | { |
40 | DEFINE_WAIT(wait); | 40 | DEFINE_WAIT(wait); |
41 | 41 | ||
42 | prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE); | 42 | /* |
43 | * Pipes are system-local resources, so sleeping on them | ||
44 | * is considered a noninteractive wait: | ||
45 | */ | ||
46 | prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); | ||
43 | up(PIPE_SEM(*inode)); | 47 | up(PIPE_SEM(*inode)); |
44 | schedule(); | 48 | schedule(); |
45 | finish_wait(PIPE_WAIT(*inode), &wait); | 49 | finish_wait(PIPE_WAIT(*inode), &wait); |
@@ -415,6 +419,10 @@ pipe_poll(struct file *filp, poll_table *wait) | |||
415 | 419 | ||
416 | if (filp->f_mode & FMODE_WRITE) { | 420 | if (filp->f_mode & FMODE_WRITE) { |
417 | mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; | 421 | mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; |
422 | /* | ||
423 | * Most Unices do not set POLLERR for FIFOs but on Linux they | ||
424 | * behave exactly like pipes for poll(). | ||
425 | */ | ||
418 | if (!PIPE_READERS(*inode)) | 426 | if (!PIPE_READERS(*inode)) |
419 | mask |= POLLERR; | 427 | mask |= POLLERR; |
420 | } | 428 | } |
@@ -422,9 +430,6 @@ pipe_poll(struct file *filp, poll_table *wait) | |||
422 | return mask; | 430 | return mask; |
423 | } | 431 | } |
424 | 432 | ||
425 | /* FIXME: most Unices do not set POLLERR for fifos */ | ||
426 | #define fifo_poll pipe_poll | ||
427 | |||
428 | static int | 433 | static int |
429 | pipe_release(struct inode *inode, int decr, int decw) | 434 | pipe_release(struct inode *inode, int decr, int decw) |
430 | { | 435 | { |
@@ -568,7 +573,7 @@ struct file_operations read_fifo_fops = { | |||
568 | .read = pipe_read, | 573 | .read = pipe_read, |
569 | .readv = pipe_readv, | 574 | .readv = pipe_readv, |
570 | .write = bad_pipe_w, | 575 | .write = bad_pipe_w, |
571 | .poll = fifo_poll, | 576 | .poll = pipe_poll, |
572 | .ioctl = pipe_ioctl, | 577 | .ioctl = pipe_ioctl, |
573 | .open = pipe_read_open, | 578 | .open = pipe_read_open, |
574 | .release = pipe_read_release, | 579 | .release = pipe_read_release, |
@@ -580,7 +585,7 @@ struct file_operations write_fifo_fops = { | |||
580 | .read = bad_pipe_r, | 585 | .read = bad_pipe_r, |
581 | .write = pipe_write, | 586 | .write = pipe_write, |
582 | .writev = pipe_writev, | 587 | .writev = pipe_writev, |
583 | .poll = fifo_poll, | 588 | .poll = pipe_poll, |
584 | .ioctl = pipe_ioctl, | 589 | .ioctl = pipe_ioctl, |
585 | .open = pipe_write_open, | 590 | .open = pipe_write_open, |
586 | .release = pipe_write_release, | 591 | .release = pipe_write_release, |
@@ -593,7 +598,7 @@ struct file_operations rdwr_fifo_fops = { | |||
593 | .readv = pipe_readv, | 598 | .readv = pipe_readv, |
594 | .write = pipe_write, | 599 | .write = pipe_write, |
595 | .writev = pipe_writev, | 600 | .writev = pipe_writev, |
596 | .poll = fifo_poll, | 601 | .poll = pipe_poll, |
597 | .ioctl = pipe_ioctl, | 602 | .ioctl = pipe_ioctl, |
598 | .open = pipe_rdwr_open, | 603 | .open = pipe_rdwr_open, |
599 | .release = pipe_rdwr_release, | 604 | .release = pipe_rdwr_release, |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 37668fe998ad..d88d518d30f6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer) | |||
159 | { | 159 | { |
160 | struct group_info *group_info; | 160 | struct group_info *group_info; |
161 | int g; | 161 | int g; |
162 | struct fdtable *fdt = NULL; | ||
162 | 163 | ||
163 | read_lock(&tasklist_lock); | 164 | read_lock(&tasklist_lock); |
164 | buffer += sprintf(buffer, | 165 | buffer += sprintf(buffer, |
@@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer) | |||
179 | p->gid, p->egid, p->sgid, p->fsgid); | 180 | p->gid, p->egid, p->sgid, p->fsgid); |
180 | read_unlock(&tasklist_lock); | 181 | read_unlock(&tasklist_lock); |
181 | task_lock(p); | 182 | task_lock(p); |
183 | if (p->files) | ||
184 | fdt = files_fdtable(p->files); | ||
182 | buffer += sprintf(buffer, | 185 | buffer += sprintf(buffer, |
183 | "FDSize:\t%d\n" | 186 | "FDSize:\t%d\n" |
184 | "Groups:\t", | 187 | "Groups:\t", |
185 | p->files ? p->files->max_fds : 0); | 188 | fdt ? fdt->max_fds : 0); |
186 | 189 | ||
187 | group_info = p->group_info; | 190 | group_info = p->group_info; |
188 | get_group_info(group_info); | 191 | get_group_info(group_info); |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 491f2d9f89ac..23db452ab428 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -11,6 +11,40 @@ | |||
11 | * go into icache. We cache the reference to task_struct upon lookup too. | 11 | * go into icache. We cache the reference to task_struct upon lookup too. |
12 | * Eventually it should become a filesystem in its own. We don't use the | 12 | * Eventually it should become a filesystem in its own. We don't use the |
13 | * rest of procfs anymore. | 13 | * rest of procfs anymore. |
14 | * | ||
15 | * | ||
16 | * Changelog: | ||
17 | * 17-Jan-2005 | ||
18 | * Allan Bezerra | ||
19 | * Bruna Moreira <bruna.moreira@indt.org.br> | ||
20 | * Edjard Mota <edjard.mota@indt.org.br> | ||
21 | * Ilias Biris <ilias.biris@indt.org.br> | ||
22 | * Mauricio Lin <mauricio.lin@indt.org.br> | ||
23 | * | ||
24 | * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT | ||
25 | * | ||
26 | * A new process specific entry (smaps) included in /proc. It shows the | ||
27 | * size of rss for each memory area. The maps entry lacks information | ||
28 | * about physical memory size (rss) for each mapped file, i.e., | ||
29 | * rss information for executables and library files. | ||
30 | * This additional information is useful for any tools that need to know | ||
31 | * about physical memory consumption for a process specific library. | ||
32 | * | ||
33 | * Changelog: | ||
34 | * 21-Feb-2005 | ||
35 | * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT | ||
36 | * Pud inclusion in the page table walking. | ||
37 | * | ||
38 | * ChangeLog: | ||
39 | * 10-Mar-2005 | ||
40 | * 10LE Instituto Nokia de Tecnologia - INdT: | ||
41 | * A better way to walks through the page table as suggested by Hugh Dickins. | ||
42 | * | ||
43 | * Simo Piiroinen <simo.piiroinen@nokia.com>: | ||
44 | * Smaps information related to shared, private, clean and dirty pages. | ||
45 | * | ||
46 | * Paul Mundt <paul.mundt@nokia.com>: | ||
47 | * Overall revision about smaps. | ||
14 | */ | 48 | */ |
15 | 49 | ||
16 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
@@ -28,6 +62,7 @@ | |||
28 | #include <linux/namespace.h> | 62 | #include <linux/namespace.h> |
29 | #include <linux/mm.h> | 63 | #include <linux/mm.h> |
30 | #include <linux/smp_lock.h> | 64 | #include <linux/smp_lock.h> |
65 | #include <linux/rcupdate.h> | ||
31 | #include <linux/kallsyms.h> | 66 | #include <linux/kallsyms.h> |
32 | #include <linux/mount.h> | 67 | #include <linux/mount.h> |
33 | #include <linux/security.h> | 68 | #include <linux/security.h> |
@@ -65,8 +100,10 @@ enum pid_directory_inos { | |||
65 | PROC_TGID_STAT, | 100 | PROC_TGID_STAT, |
66 | PROC_TGID_STATM, | 101 | PROC_TGID_STATM, |
67 | PROC_TGID_MAPS, | 102 | PROC_TGID_MAPS, |
103 | PROC_TGID_NUMA_MAPS, | ||
68 | PROC_TGID_MOUNTS, | 104 | PROC_TGID_MOUNTS, |
69 | PROC_TGID_WCHAN, | 105 | PROC_TGID_WCHAN, |
106 | PROC_TGID_SMAPS, | ||
70 | #ifdef CONFIG_SCHEDSTATS | 107 | #ifdef CONFIG_SCHEDSTATS |
71 | PROC_TGID_SCHEDSTAT, | 108 | PROC_TGID_SCHEDSTAT, |
72 | #endif | 109 | #endif |
@@ -83,7 +120,6 @@ enum pid_directory_inos { | |||
83 | #ifdef CONFIG_AUDITSYSCALL | 120 | #ifdef CONFIG_AUDITSYSCALL |
84 | PROC_TGID_LOGINUID, | 121 | PROC_TGID_LOGINUID, |
85 | #endif | 122 | #endif |
86 | PROC_TGID_FD_DIR, | ||
87 | PROC_TGID_OOM_SCORE, | 123 | PROC_TGID_OOM_SCORE, |
88 | PROC_TGID_OOM_ADJUST, | 124 | PROC_TGID_OOM_ADJUST, |
89 | PROC_TID_INO, | 125 | PROC_TID_INO, |
@@ -102,8 +138,10 @@ enum pid_directory_inos { | |||
102 | PROC_TID_STAT, | 138 | PROC_TID_STAT, |
103 | PROC_TID_STATM, | 139 | PROC_TID_STATM, |
104 | PROC_TID_MAPS, | 140 | PROC_TID_MAPS, |
141 | PROC_TID_NUMA_MAPS, | ||
105 | PROC_TID_MOUNTS, | 142 | PROC_TID_MOUNTS, |
106 | PROC_TID_WCHAN, | 143 | PROC_TID_WCHAN, |
144 | PROC_TID_SMAPS, | ||
107 | #ifdef CONFIG_SCHEDSTATS | 145 | #ifdef CONFIG_SCHEDSTATS |
108 | PROC_TID_SCHEDSTAT, | 146 | PROC_TID_SCHEDSTAT, |
109 | #endif | 147 | #endif |
@@ -120,9 +158,11 @@ enum pid_directory_inos { | |||
120 | #ifdef CONFIG_AUDITSYSCALL | 158 | #ifdef CONFIG_AUDITSYSCALL |
121 | PROC_TID_LOGINUID, | 159 | PROC_TID_LOGINUID, |
122 | #endif | 160 | #endif |
123 | PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ | ||
124 | PROC_TID_OOM_SCORE, | 161 | PROC_TID_OOM_SCORE, |
125 | PROC_TID_OOM_ADJUST, | 162 | PROC_TID_OOM_ADJUST, |
163 | |||
164 | /* Add new entries before this */ | ||
165 | PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ | ||
126 | }; | 166 | }; |
127 | 167 | ||
128 | struct pid_entry { | 168 | struct pid_entry { |
@@ -144,6 +184,9 @@ static struct pid_entry tgid_base_stuff[] = { | |||
144 | E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), | 184 | E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), |
145 | E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), | 185 | E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), |
146 | E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), | 186 | E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), |
187 | #ifdef CONFIG_NUMA | ||
188 | E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), | ||
189 | #endif | ||
147 | E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), | 190 | E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), |
148 | #ifdef CONFIG_SECCOMP | 191 | #ifdef CONFIG_SECCOMP |
149 | E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), | 192 | E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), |
@@ -152,6 +195,7 @@ static struct pid_entry tgid_base_stuff[] = { | |||
152 | E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), | 195 | E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), |
153 | E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), | 196 | E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), |
154 | E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), | 197 | E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), |
198 | E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), | ||
155 | #ifdef CONFIG_SECURITY | 199 | #ifdef CONFIG_SECURITY |
156 | E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), | 200 | E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), |
157 | #endif | 201 | #endif |
@@ -180,6 +224,9 @@ static struct pid_entry tid_base_stuff[] = { | |||
180 | E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), | 224 | E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), |
181 | E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), | 225 | E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), |
182 | E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), | 226 | E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), |
227 | #ifdef CONFIG_NUMA | ||
228 | E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), | ||
229 | #endif | ||
183 | E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), | 230 | E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), |
184 | #ifdef CONFIG_SECCOMP | 231 | #ifdef CONFIG_SECCOMP |
185 | E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), | 232 | E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), |
@@ -188,6 +235,7 @@ static struct pid_entry tid_base_stuff[] = { | |||
188 | E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), | 235 | E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), |
189 | E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), | 236 | E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), |
190 | E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), | 237 | E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), |
238 | E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), | ||
191 | #ifdef CONFIG_SECURITY | 239 | #ifdef CONFIG_SECURITY |
192 | E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), | 240 | E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), |
193 | #endif | 241 | #endif |
@@ -236,30 +284,36 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm | |||
236 | 284 | ||
237 | files = get_files_struct(task); | 285 | files = get_files_struct(task); |
238 | if (files) { | 286 | if (files) { |
239 | spin_lock(&files->file_lock); | 287 | rcu_read_lock(); |
240 | file = fcheck_files(files, fd); | 288 | file = fcheck_files(files, fd); |
241 | if (file) { | 289 | if (file) { |
242 | *mnt = mntget(file->f_vfsmnt); | 290 | *mnt = mntget(file->f_vfsmnt); |
243 | *dentry = dget(file->f_dentry); | 291 | *dentry = dget(file->f_dentry); |
244 | spin_unlock(&files->file_lock); | 292 | rcu_read_unlock(); |
245 | put_files_struct(files); | 293 | put_files_struct(files); |
246 | return 0; | 294 | return 0; |
247 | } | 295 | } |
248 | spin_unlock(&files->file_lock); | 296 | rcu_read_unlock(); |
249 | put_files_struct(files); | 297 | put_files_struct(files); |
250 | } | 298 | } |
251 | return -ENOENT; | 299 | return -ENOENT; |
252 | } | 300 | } |
253 | 301 | ||
254 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 302 | static struct fs_struct *get_fs_struct(struct task_struct *task) |
255 | { | 303 | { |
256 | struct fs_struct *fs; | 304 | struct fs_struct *fs; |
257 | int result = -ENOENT; | 305 | task_lock(task); |
258 | task_lock(proc_task(inode)); | 306 | fs = task->fs; |
259 | fs = proc_task(inode)->fs; | ||
260 | if(fs) | 307 | if(fs) |
261 | atomic_inc(&fs->count); | 308 | atomic_inc(&fs->count); |
262 | task_unlock(proc_task(inode)); | 309 | task_unlock(task); |
310 | return fs; | ||
311 | } | ||
312 | |||
313 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | ||
314 | { | ||
315 | struct fs_struct *fs = get_fs_struct(proc_task(inode)); | ||
316 | int result = -ENOENT; | ||
263 | if (fs) { | 317 | if (fs) { |
264 | read_lock(&fs->lock); | 318 | read_lock(&fs->lock); |
265 | *mnt = mntget(fs->pwdmnt); | 319 | *mnt = mntget(fs->pwdmnt); |
@@ -273,13 +327,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs | |||
273 | 327 | ||
274 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 328 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) |
275 | { | 329 | { |
276 | struct fs_struct *fs; | 330 | struct fs_struct *fs = get_fs_struct(proc_task(inode)); |
277 | int result = -ENOENT; | 331 | int result = -ENOENT; |
278 | task_lock(proc_task(inode)); | ||
279 | fs = proc_task(inode)->fs; | ||
280 | if(fs) | ||
281 | atomic_inc(&fs->count); | ||
282 | task_unlock(proc_task(inode)); | ||
283 | if (fs) { | 332 | if (fs) { |
284 | read_lock(&fs->lock); | 333 | read_lock(&fs->lock); |
285 | *mnt = mntget(fs->rootmnt); | 334 | *mnt = mntget(fs->rootmnt); |
@@ -298,33 +347,6 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf | |||
298 | (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ | 347 | (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ |
299 | security_ptrace(current,task) == 0)) | 348 | security_ptrace(current,task) == 0)) |
300 | 349 | ||
301 | static int may_ptrace_attach(struct task_struct *task) | ||
302 | { | ||
303 | int retval = 0; | ||
304 | |||
305 | task_lock(task); | ||
306 | |||
307 | if (!task->mm) | ||
308 | goto out; | ||
309 | if (((current->uid != task->euid) || | ||
310 | (current->uid != task->suid) || | ||
311 | (current->uid != task->uid) || | ||
312 | (current->gid != task->egid) || | ||
313 | (current->gid != task->sgid) || | ||
314 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) | ||
315 | goto out; | ||
316 | rmb(); | ||
317 | if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE)) | ||
318 | goto out; | ||
319 | if (security_ptrace(current, task)) | ||
320 | goto out; | ||
321 | |||
322 | retval = 1; | ||
323 | out: | ||
324 | task_unlock(task); | ||
325 | return retval; | ||
326 | } | ||
327 | |||
328 | static int proc_pid_environ(struct task_struct *task, char * buffer) | 350 | static int proc_pid_environ(struct task_struct *task, char * buffer) |
329 | { | 351 | { |
330 | int res = 0; | 352 | int res = 0; |
@@ -334,7 +356,7 @@ static int proc_pid_environ(struct task_struct *task, char * buffer) | |||
334 | if (len > PAGE_SIZE) | 356 | if (len > PAGE_SIZE) |
335 | len = PAGE_SIZE; | 357 | len = PAGE_SIZE; |
336 | res = access_process_vm(task, mm->env_start, buffer, len, 0); | 358 | res = access_process_vm(task, mm->env_start, buffer, len, 0); |
337 | if (!may_ptrace_attach(task)) | 359 | if (!ptrace_may_attach(task)) |
338 | res = -ESRCH; | 360 | res = -ESRCH; |
339 | mmput(mm); | 361 | mmput(mm); |
340 | } | 362 | } |
@@ -515,6 +537,46 @@ static struct file_operations proc_maps_operations = { | |||
515 | .release = seq_release, | 537 | .release = seq_release, |
516 | }; | 538 | }; |
517 | 539 | ||
540 | #ifdef CONFIG_NUMA | ||
541 | extern struct seq_operations proc_pid_numa_maps_op; | ||
542 | static int numa_maps_open(struct inode *inode, struct file *file) | ||
543 | { | ||
544 | struct task_struct *task = proc_task(inode); | ||
545 | int ret = seq_open(file, &proc_pid_numa_maps_op); | ||
546 | if (!ret) { | ||
547 | struct seq_file *m = file->private_data; | ||
548 | m->private = task; | ||
549 | } | ||
550 | return ret; | ||
551 | } | ||
552 | |||
553 | static struct file_operations proc_numa_maps_operations = { | ||
554 | .open = numa_maps_open, | ||
555 | .read = seq_read, | ||
556 | .llseek = seq_lseek, | ||
557 | .release = seq_release, | ||
558 | }; | ||
559 | #endif | ||
560 | |||
561 | extern struct seq_operations proc_pid_smaps_op; | ||
562 | static int smaps_open(struct inode *inode, struct file *file) | ||
563 | { | ||
564 | struct task_struct *task = proc_task(inode); | ||
565 | int ret = seq_open(file, &proc_pid_smaps_op); | ||
566 | if (!ret) { | ||
567 | struct seq_file *m = file->private_data; | ||
568 | m->private = task; | ||
569 | } | ||
570 | return ret; | ||
571 | } | ||
572 | |||
573 | static struct file_operations proc_smaps_operations = { | ||
574 | .open = smaps_open, | ||
575 | .read = seq_read, | ||
576 | .llseek = seq_lseek, | ||
577 | .release = seq_release, | ||
578 | }; | ||
579 | |||
518 | extern struct seq_operations mounts_op; | 580 | extern struct seq_operations mounts_op; |
519 | static int mounts_open(struct inode *inode, struct file *file) | 581 | static int mounts_open(struct inode *inode, struct file *file) |
520 | { | 582 | { |
@@ -597,7 +659,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, | |||
597 | int ret = -ESRCH; | 659 | int ret = -ESRCH; |
598 | struct mm_struct *mm; | 660 | struct mm_struct *mm; |
599 | 661 | ||
600 | if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) | 662 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) |
601 | goto out; | 663 | goto out; |
602 | 664 | ||
603 | ret = -ENOMEM; | 665 | ret = -ENOMEM; |
@@ -623,7 +685,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, | |||
623 | 685 | ||
624 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; | 686 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; |
625 | retval = access_process_vm(task, src, page, this_len, 0); | 687 | retval = access_process_vm(task, src, page, this_len, 0); |
626 | if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { | 688 | if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { |
627 | if (!ret) | 689 | if (!ret) |
628 | ret = -EIO; | 690 | ret = -EIO; |
629 | break; | 691 | break; |
@@ -661,7 +723,7 @@ static ssize_t mem_write(struct file * file, const char * buf, | |||
661 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 723 | struct task_struct *task = proc_task(file->f_dentry->d_inode); |
662 | unsigned long dst = *ppos; | 724 | unsigned long dst = *ppos; |
663 | 725 | ||
664 | if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) | 726 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) |
665 | return -ESRCH; | 727 | return -ESRCH; |
666 | 728 | ||
667 | page = (char *)__get_free_page(GFP_USER); | 729 | page = (char *)__get_free_page(GFP_USER); |
@@ -978,6 +1040,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
978 | int retval; | 1040 | int retval; |
979 | char buf[NUMBUF]; | 1041 | char buf[NUMBUF]; |
980 | struct files_struct * files; | 1042 | struct files_struct * files; |
1043 | struct fdtable *fdt; | ||
981 | 1044 | ||
982 | retval = -ENOENT; | 1045 | retval = -ENOENT; |
983 | if (!pid_alive(p)) | 1046 | if (!pid_alive(p)) |
@@ -1000,15 +1063,16 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1000 | files = get_files_struct(p); | 1063 | files = get_files_struct(p); |
1001 | if (!files) | 1064 | if (!files) |
1002 | goto out; | 1065 | goto out; |
1003 | spin_lock(&files->file_lock); | 1066 | rcu_read_lock(); |
1067 | fdt = files_fdtable(files); | ||
1004 | for (fd = filp->f_pos-2; | 1068 | for (fd = filp->f_pos-2; |
1005 | fd < files->max_fds; | 1069 | fd < fdt->max_fds; |
1006 | fd++, filp->f_pos++) { | 1070 | fd++, filp->f_pos++) { |
1007 | unsigned int i,j; | 1071 | unsigned int i,j; |
1008 | 1072 | ||
1009 | if (!fcheck_files(files, fd)) | 1073 | if (!fcheck_files(files, fd)) |
1010 | continue; | 1074 | continue; |
1011 | spin_unlock(&files->file_lock); | 1075 | rcu_read_unlock(); |
1012 | 1076 | ||
1013 | j = NUMBUF; | 1077 | j = NUMBUF; |
1014 | i = fd; | 1078 | i = fd; |
@@ -1020,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1020 | 1084 | ||
1021 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); | 1085 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); |
1022 | if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { | 1086 | if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { |
1023 | spin_lock(&files->file_lock); | 1087 | rcu_read_lock(); |
1024 | break; | 1088 | break; |
1025 | } | 1089 | } |
1026 | spin_lock(&files->file_lock); | 1090 | rcu_read_lock(); |
1027 | } | 1091 | } |
1028 | spin_unlock(&files->file_lock); | 1092 | rcu_read_unlock(); |
1029 | put_files_struct(files); | 1093 | put_files_struct(files); |
1030 | } | 1094 | } |
1031 | out: | 1095 | out: |
@@ -1200,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1200 | 1264 | ||
1201 | files = get_files_struct(task); | 1265 | files = get_files_struct(task); |
1202 | if (files) { | 1266 | if (files) { |
1203 | spin_lock(&files->file_lock); | 1267 | rcu_read_lock(); |
1204 | if (fcheck_files(files, fd)) { | 1268 | if (fcheck_files(files, fd)) { |
1205 | spin_unlock(&files->file_lock); | 1269 | rcu_read_unlock(); |
1206 | put_files_struct(files); | 1270 | put_files_struct(files); |
1207 | if (task_dumpable(task)) { | 1271 | if (task_dumpable(task)) { |
1208 | inode->i_uid = task->euid; | 1272 | inode->i_uid = task->euid; |
@@ -1214,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1214 | security_task_to_inode(task, inode); | 1278 | security_task_to_inode(task, inode); |
1215 | return 1; | 1279 | return 1; |
1216 | } | 1280 | } |
1217 | spin_unlock(&files->file_lock); | 1281 | rcu_read_unlock(); |
1218 | put_files_struct(files); | 1282 | put_files_struct(files); |
1219 | } | 1283 | } |
1220 | d_drop(dentry); | 1284 | d_drop(dentry); |
@@ -1306,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1306 | if (!files) | 1370 | if (!files) |
1307 | goto out_unlock; | 1371 | goto out_unlock; |
1308 | inode->i_mode = S_IFLNK; | 1372 | inode->i_mode = S_IFLNK; |
1309 | spin_lock(&files->file_lock); | 1373 | rcu_read_lock(); |
1310 | file = fcheck_files(files, fd); | 1374 | file = fcheck_files(files, fd); |
1311 | if (!file) | 1375 | if (!file) |
1312 | goto out_unlock2; | 1376 | goto out_unlock2; |
@@ -1314,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1314 | inode->i_mode |= S_IRUSR | S_IXUSR; | 1378 | inode->i_mode |= S_IRUSR | S_IXUSR; |
1315 | if (file->f_mode & 2) | 1379 | if (file->f_mode & 2) |
1316 | inode->i_mode |= S_IWUSR | S_IXUSR; | 1380 | inode->i_mode |= S_IWUSR | S_IXUSR; |
1317 | spin_unlock(&files->file_lock); | 1381 | rcu_read_unlock(); |
1318 | put_files_struct(files); | 1382 | put_files_struct(files); |
1319 | inode->i_op = &proc_pid_link_inode_operations; | 1383 | inode->i_op = &proc_pid_link_inode_operations; |
1320 | inode->i_size = 64; | 1384 | inode->i_size = 64; |
@@ -1324,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1324 | return NULL; | 1388 | return NULL; |
1325 | 1389 | ||
1326 | out_unlock2: | 1390 | out_unlock2: |
1327 | spin_unlock(&files->file_lock); | 1391 | rcu_read_unlock(); |
1328 | put_files_struct(files); | 1392 | put_files_struct(files); |
1329 | out_unlock: | 1393 | out_unlock: |
1330 | iput(inode); | 1394 | iput(inode); |
@@ -1524,6 +1588,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1524 | case PROC_TGID_MAPS: | 1588 | case PROC_TGID_MAPS: |
1525 | inode->i_fop = &proc_maps_operations; | 1589 | inode->i_fop = &proc_maps_operations; |
1526 | break; | 1590 | break; |
1591 | #ifdef CONFIG_NUMA | ||
1592 | case PROC_TID_NUMA_MAPS: | ||
1593 | case PROC_TGID_NUMA_MAPS: | ||
1594 | inode->i_fop = &proc_numa_maps_operations; | ||
1595 | break; | ||
1596 | #endif | ||
1527 | case PROC_TID_MEM: | 1597 | case PROC_TID_MEM: |
1528 | case PROC_TGID_MEM: | 1598 | case PROC_TGID_MEM: |
1529 | inode->i_op = &proc_mem_inode_operations; | 1599 | inode->i_op = &proc_mem_inode_operations; |
@@ -1539,6 +1609,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1539 | case PROC_TGID_MOUNTS: | 1609 | case PROC_TGID_MOUNTS: |
1540 | inode->i_fop = &proc_mounts_operations; | 1610 | inode->i_fop = &proc_mounts_operations; |
1541 | break; | 1611 | break; |
1612 | case PROC_TID_SMAPS: | ||
1613 | case PROC_TGID_SMAPS: | ||
1614 | inode->i_fop = &proc_smaps_operations; | ||
1615 | break; | ||
1542 | #ifdef CONFIG_SECURITY | 1616 | #ifdef CONFIG_SECURITY |
1543 | case PROC_TID_ATTR: | 1617 | case PROC_TID_ATTR: |
1544 | inode->i_nlink = 2; | 1618 | inode->i_nlink = 2; |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index abe8920313fb..8a8c34461d48 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -249,6 +249,18 @@ out: | |||
249 | return error; | 249 | return error; |
250 | } | 250 | } |
251 | 251 | ||
252 | static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
253 | struct kstat *stat) | ||
254 | { | ||
255 | struct inode *inode = dentry->d_inode; | ||
256 | struct proc_dir_entry *de = PROC_I(inode)->pde; | ||
257 | if (de && de->nlink) | ||
258 | inode->i_nlink = de->nlink; | ||
259 | |||
260 | generic_fillattr(inode, stat); | ||
261 | return 0; | ||
262 | } | ||
263 | |||
252 | static struct inode_operations proc_file_inode_operations = { | 264 | static struct inode_operations proc_file_inode_operations = { |
253 | .setattr = proc_notify_change, | 265 | .setattr = proc_notify_change, |
254 | }; | 266 | }; |
@@ -475,6 +487,7 @@ static struct file_operations proc_dir_operations = { | |||
475 | */ | 487 | */ |
476 | static struct inode_operations proc_dir_inode_operations = { | 488 | static struct inode_operations proc_dir_inode_operations = { |
477 | .lookup = proc_lookup, | 489 | .lookup = proc_lookup, |
490 | .getattr = proc_getattr, | ||
478 | .setattr = proc_notify_change, | 491 | .setattr = proc_notify_change, |
479 | }; | 492 | }; |
480 | 493 | ||
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 133c28685105..effa6c0c467a 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -60,6 +60,8 @@ static void proc_delete_inode(struct inode *inode) | |||
60 | struct proc_dir_entry *de; | 60 | struct proc_dir_entry *de; |
61 | struct task_struct *tsk; | 61 | struct task_struct *tsk; |
62 | 62 | ||
63 | truncate_inode_pages(&inode->i_data, 0); | ||
64 | |||
63 | /* Let go of any associated process */ | 65 | /* Let go of any associated process */ |
64 | tsk = PROC_I(inode)->task; | 66 | tsk = PROC_I(inode)->task; |
65 | if (tsk) | 67 | if (tsk) |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 28b4a0253a92..c7ef3e48e35b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -2,8 +2,13 @@ | |||
2 | #include <linux/hugetlb.h> | 2 | #include <linux/hugetlb.h> |
3 | #include <linux/mount.h> | 3 | #include <linux/mount.h> |
4 | #include <linux/seq_file.h> | 4 | #include <linux/seq_file.h> |
5 | #include <linux/highmem.h> | ||
6 | #include <linux/pagemap.h> | ||
7 | #include <linux/mempolicy.h> | ||
8 | |||
5 | #include <asm/elf.h> | 9 | #include <asm/elf.h> |
6 | #include <asm/uaccess.h> | 10 | #include <asm/uaccess.h> |
11 | #include <asm/tlbflush.h> | ||
7 | #include "internal.h" | 12 | #include "internal.h" |
8 | 13 | ||
9 | char *task_mem(struct mm_struct *mm, char *buffer) | 14 | char *task_mem(struct mm_struct *mm, char *buffer) |
@@ -87,49 +92,58 @@ static void pad_len_spaces(struct seq_file *m, int len) | |||
87 | seq_printf(m, "%*c", len, ' '); | 92 | seq_printf(m, "%*c", len, ' '); |
88 | } | 93 | } |
89 | 94 | ||
90 | static int show_map(struct seq_file *m, void *v) | 95 | struct mem_size_stats |
96 | { | ||
97 | unsigned long resident; | ||
98 | unsigned long shared_clean; | ||
99 | unsigned long shared_dirty; | ||
100 | unsigned long private_clean; | ||
101 | unsigned long private_dirty; | ||
102 | }; | ||
103 | |||
104 | static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) | ||
91 | { | 105 | { |
92 | struct task_struct *task = m->private; | 106 | struct task_struct *task = m->private; |
93 | struct vm_area_struct *map = v; | 107 | struct vm_area_struct *vma = v; |
94 | struct mm_struct *mm = map->vm_mm; | 108 | struct mm_struct *mm = vma->vm_mm; |
95 | struct file *file = map->vm_file; | 109 | struct file *file = vma->vm_file; |
96 | int flags = map->vm_flags; | 110 | int flags = vma->vm_flags; |
97 | unsigned long ino = 0; | 111 | unsigned long ino = 0; |
98 | dev_t dev = 0; | 112 | dev_t dev = 0; |
99 | int len; | 113 | int len; |
100 | 114 | ||
101 | if (file) { | 115 | if (file) { |
102 | struct inode *inode = map->vm_file->f_dentry->d_inode; | 116 | struct inode *inode = vma->vm_file->f_dentry->d_inode; |
103 | dev = inode->i_sb->s_dev; | 117 | dev = inode->i_sb->s_dev; |
104 | ino = inode->i_ino; | 118 | ino = inode->i_ino; |
105 | } | 119 | } |
106 | 120 | ||
107 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", | 121 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", |
108 | map->vm_start, | 122 | vma->vm_start, |
109 | map->vm_end, | 123 | vma->vm_end, |
110 | flags & VM_READ ? 'r' : '-', | 124 | flags & VM_READ ? 'r' : '-', |
111 | flags & VM_WRITE ? 'w' : '-', | 125 | flags & VM_WRITE ? 'w' : '-', |
112 | flags & VM_EXEC ? 'x' : '-', | 126 | flags & VM_EXEC ? 'x' : '-', |
113 | flags & VM_MAYSHARE ? 's' : 'p', | 127 | flags & VM_MAYSHARE ? 's' : 'p', |
114 | map->vm_pgoff << PAGE_SHIFT, | 128 | vma->vm_pgoff << PAGE_SHIFT, |
115 | MAJOR(dev), MINOR(dev), ino, &len); | 129 | MAJOR(dev), MINOR(dev), ino, &len); |
116 | 130 | ||
117 | /* | 131 | /* |
118 | * Print the dentry name for named mappings, and a | 132 | * Print the dentry name for named mappings, and a |
119 | * special [heap] marker for the heap: | 133 | * special [heap] marker for the heap: |
120 | */ | 134 | */ |
121 | if (map->vm_file) { | 135 | if (file) { |
122 | pad_len_spaces(m, len); | 136 | pad_len_spaces(m, len); |
123 | seq_path(m, file->f_vfsmnt, file->f_dentry, ""); | 137 | seq_path(m, file->f_vfsmnt, file->f_dentry, "\n"); |
124 | } else { | 138 | } else { |
125 | if (mm) { | 139 | if (mm) { |
126 | if (map->vm_start <= mm->start_brk && | 140 | if (vma->vm_start <= mm->start_brk && |
127 | map->vm_end >= mm->brk) { | 141 | vma->vm_end >= mm->brk) { |
128 | pad_len_spaces(m, len); | 142 | pad_len_spaces(m, len); |
129 | seq_puts(m, "[heap]"); | 143 | seq_puts(m, "[heap]"); |
130 | } else { | 144 | } else { |
131 | if (map->vm_start <= mm->start_stack && | 145 | if (vma->vm_start <= mm->start_stack && |
132 | map->vm_end >= mm->start_stack) { | 146 | vma->vm_end >= mm->start_stack) { |
133 | 147 | ||
134 | pad_len_spaces(m, len); | 148 | pad_len_spaces(m, len); |
135 | seq_puts(m, "[stack]"); | 149 | seq_puts(m, "[stack]"); |
@@ -141,24 +155,146 @@ static int show_map(struct seq_file *m, void *v) | |||
141 | } | 155 | } |
142 | } | 156 | } |
143 | seq_putc(m, '\n'); | 157 | seq_putc(m, '\n'); |
144 | if (m->count < m->size) /* map is copied successfully */ | 158 | |
145 | m->version = (map != get_gate_vma(task))? map->vm_start: 0; | 159 | if (mss) |
160 | seq_printf(m, | ||
161 | "Size: %8lu kB\n" | ||
162 | "Rss: %8lu kB\n" | ||
163 | "Shared_Clean: %8lu kB\n" | ||
164 | "Shared_Dirty: %8lu kB\n" | ||
165 | "Private_Clean: %8lu kB\n" | ||
166 | "Private_Dirty: %8lu kB\n", | ||
167 | (vma->vm_end - vma->vm_start) >> 10, | ||
168 | mss->resident >> 10, | ||
169 | mss->shared_clean >> 10, | ||
170 | mss->shared_dirty >> 10, | ||
171 | mss->private_clean >> 10, | ||
172 | mss->private_dirty >> 10); | ||
173 | |||
174 | if (m->count < m->size) /* vma is copied successfully */ | ||
175 | m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; | ||
146 | return 0; | 176 | return 0; |
147 | } | 177 | } |
148 | 178 | ||
179 | static int show_map(struct seq_file *m, void *v) | ||
180 | { | ||
181 | return show_map_internal(m, v, 0); | ||
182 | } | ||
183 | |||
184 | static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | ||
185 | unsigned long addr, unsigned long end, | ||
186 | struct mem_size_stats *mss) | ||
187 | { | ||
188 | pte_t *pte, ptent; | ||
189 | unsigned long pfn; | ||
190 | struct page *page; | ||
191 | |||
192 | pte = pte_offset_map(pmd, addr); | ||
193 | do { | ||
194 | ptent = *pte; | ||
195 | if (pte_none(ptent) || !pte_present(ptent)) | ||
196 | continue; | ||
197 | |||
198 | mss->resident += PAGE_SIZE; | ||
199 | pfn = pte_pfn(ptent); | ||
200 | if (!pfn_valid(pfn)) | ||
201 | continue; | ||
202 | |||
203 | page = pfn_to_page(pfn); | ||
204 | if (page_count(page) >= 2) { | ||
205 | if (pte_dirty(ptent)) | ||
206 | mss->shared_dirty += PAGE_SIZE; | ||
207 | else | ||
208 | mss->shared_clean += PAGE_SIZE; | ||
209 | } else { | ||
210 | if (pte_dirty(ptent)) | ||
211 | mss->private_dirty += PAGE_SIZE; | ||
212 | else | ||
213 | mss->private_clean += PAGE_SIZE; | ||
214 | } | ||
215 | } while (pte++, addr += PAGE_SIZE, addr != end); | ||
216 | pte_unmap(pte - 1); | ||
217 | cond_resched_lock(&vma->vm_mm->page_table_lock); | ||
218 | } | ||
219 | |||
220 | static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud, | ||
221 | unsigned long addr, unsigned long end, | ||
222 | struct mem_size_stats *mss) | ||
223 | { | ||
224 | pmd_t *pmd; | ||
225 | unsigned long next; | ||
226 | |||
227 | pmd = pmd_offset(pud, addr); | ||
228 | do { | ||
229 | next = pmd_addr_end(addr, end); | ||
230 | if (pmd_none_or_clear_bad(pmd)) | ||
231 | continue; | ||
232 | smaps_pte_range(vma, pmd, addr, next, mss); | ||
233 | } while (pmd++, addr = next, addr != end); | ||
234 | } | ||
235 | |||
236 | static inline void smaps_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | ||
237 | unsigned long addr, unsigned long end, | ||
238 | struct mem_size_stats *mss) | ||
239 | { | ||
240 | pud_t *pud; | ||
241 | unsigned long next; | ||
242 | |||
243 | pud = pud_offset(pgd, addr); | ||
244 | do { | ||
245 | next = pud_addr_end(addr, end); | ||
246 | if (pud_none_or_clear_bad(pud)) | ||
247 | continue; | ||
248 | smaps_pmd_range(vma, pud, addr, next, mss); | ||
249 | } while (pud++, addr = next, addr != end); | ||
250 | } | ||
251 | |||
252 | static inline void smaps_pgd_range(struct vm_area_struct *vma, | ||
253 | unsigned long addr, unsigned long end, | ||
254 | struct mem_size_stats *mss) | ||
255 | { | ||
256 | pgd_t *pgd; | ||
257 | unsigned long next; | ||
258 | |||
259 | pgd = pgd_offset(vma->vm_mm, addr); | ||
260 | do { | ||
261 | next = pgd_addr_end(addr, end); | ||
262 | if (pgd_none_or_clear_bad(pgd)) | ||
263 | continue; | ||
264 | smaps_pud_range(vma, pgd, addr, next, mss); | ||
265 | } while (pgd++, addr = next, addr != end); | ||
266 | } | ||
267 | |||
268 | static int show_smap(struct seq_file *m, void *v) | ||
269 | { | ||
270 | struct vm_area_struct *vma = v; | ||
271 | struct mm_struct *mm = vma->vm_mm; | ||
272 | struct mem_size_stats mss; | ||
273 | |||
274 | memset(&mss, 0, sizeof mss); | ||
275 | |||
276 | if (mm) { | ||
277 | spin_lock(&mm->page_table_lock); | ||
278 | smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss); | ||
279 | spin_unlock(&mm->page_table_lock); | ||
280 | } | ||
281 | |||
282 | return show_map_internal(m, v, &mss); | ||
283 | } | ||
284 | |||
149 | static void *m_start(struct seq_file *m, loff_t *pos) | 285 | static void *m_start(struct seq_file *m, loff_t *pos) |
150 | { | 286 | { |
151 | struct task_struct *task = m->private; | 287 | struct task_struct *task = m->private; |
152 | unsigned long last_addr = m->version; | 288 | unsigned long last_addr = m->version; |
153 | struct mm_struct *mm; | 289 | struct mm_struct *mm; |
154 | struct vm_area_struct *map, *tail_map; | 290 | struct vm_area_struct *vma, *tail_vma; |
155 | loff_t l = *pos; | 291 | loff_t l = *pos; |
156 | 292 | ||
157 | /* | 293 | /* |
158 | * We remember last_addr rather than next_addr to hit with | 294 | * We remember last_addr rather than next_addr to hit with |
159 | * mmap_cache most of the time. We have zero last_addr at | 295 | * mmap_cache most of the time. We have zero last_addr at |
160 | * the begining and also after lseek. We will have -1 last_addr | 296 | * the beginning and also after lseek. We will have -1 last_addr |
161 | * after the end of the maps. | 297 | * after the end of the vmas. |
162 | */ | 298 | */ |
163 | 299 | ||
164 | if (last_addr == -1UL) | 300 | if (last_addr == -1UL) |
@@ -168,47 +304,47 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
168 | if (!mm) | 304 | if (!mm) |
169 | return NULL; | 305 | return NULL; |
170 | 306 | ||
171 | tail_map = get_gate_vma(task); | 307 | tail_vma = get_gate_vma(task); |
172 | down_read(&mm->mmap_sem); | 308 | down_read(&mm->mmap_sem); |
173 | 309 | ||
174 | /* Start with last addr hint */ | 310 | /* Start with last addr hint */ |
175 | if (last_addr && (map = find_vma(mm, last_addr))) { | 311 | if (last_addr && (vma = find_vma(mm, last_addr))) { |
176 | map = map->vm_next; | 312 | vma = vma->vm_next; |
177 | goto out; | 313 | goto out; |
178 | } | 314 | } |
179 | 315 | ||
180 | /* | 316 | /* |
181 | * Check the map index is within the range and do | 317 | * Check the vma index is within the range and do |
182 | * sequential scan until m_index. | 318 | * sequential scan until m_index. |
183 | */ | 319 | */ |
184 | map = NULL; | 320 | vma = NULL; |
185 | if ((unsigned long)l < mm->map_count) { | 321 | if ((unsigned long)l < mm->map_count) { |
186 | map = mm->mmap; | 322 | vma = mm->mmap; |
187 | while (l-- && map) | 323 | while (l-- && vma) |
188 | map = map->vm_next; | 324 | vma = vma->vm_next; |
189 | goto out; | 325 | goto out; |
190 | } | 326 | } |
191 | 327 | ||
192 | if (l != mm->map_count) | 328 | if (l != mm->map_count) |
193 | tail_map = NULL; /* After gate map */ | 329 | tail_vma = NULL; /* After gate vma */ |
194 | 330 | ||
195 | out: | 331 | out: |
196 | if (map) | 332 | if (vma) |
197 | return map; | 333 | return vma; |
198 | 334 | ||
199 | /* End of maps has reached */ | 335 | /* End of vmas has been reached */ |
200 | m->version = (tail_map != NULL)? 0: -1UL; | 336 | m->version = (tail_vma != NULL)? 0: -1UL; |
201 | up_read(&mm->mmap_sem); | 337 | up_read(&mm->mmap_sem); |
202 | mmput(mm); | 338 | mmput(mm); |
203 | return tail_map; | 339 | return tail_vma; |
204 | } | 340 | } |
205 | 341 | ||
206 | static void m_stop(struct seq_file *m, void *v) | 342 | static void m_stop(struct seq_file *m, void *v) |
207 | { | 343 | { |
208 | struct task_struct *task = m->private; | 344 | struct task_struct *task = m->private; |
209 | struct vm_area_struct *map = v; | 345 | struct vm_area_struct *vma = v; |
210 | if (map && map != get_gate_vma(task)) { | 346 | if (vma && vma != get_gate_vma(task)) { |
211 | struct mm_struct *mm = map->vm_mm; | 347 | struct mm_struct *mm = vma->vm_mm; |
212 | up_read(&mm->mmap_sem); | 348 | up_read(&mm->mmap_sem); |
213 | mmput(mm); | 349 | mmput(mm); |
214 | } | 350 | } |
@@ -217,14 +353,14 @@ static void m_stop(struct seq_file *m, void *v) | |||
217 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | 353 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
218 | { | 354 | { |
219 | struct task_struct *task = m->private; | 355 | struct task_struct *task = m->private; |
220 | struct vm_area_struct *map = v; | 356 | struct vm_area_struct *vma = v; |
221 | struct vm_area_struct *tail_map = get_gate_vma(task); | 357 | struct vm_area_struct *tail_vma = get_gate_vma(task); |
222 | 358 | ||
223 | (*pos)++; | 359 | (*pos)++; |
224 | if (map && (map != tail_map) && map->vm_next) | 360 | if (vma && (vma != tail_vma) && vma->vm_next) |
225 | return map->vm_next; | 361 | return vma->vm_next; |
226 | m_stop(m, v); | 362 | m_stop(m, v); |
227 | return (map != tail_map)? tail_map: NULL; | 363 | return (vma != tail_vma)? tail_vma: NULL; |
228 | } | 364 | } |
229 | 365 | ||
230 | struct seq_operations proc_pid_maps_op = { | 366 | struct seq_operations proc_pid_maps_op = { |
@@ -233,3 +369,140 @@ struct seq_operations proc_pid_maps_op = { | |||
233 | .stop = m_stop, | 369 | .stop = m_stop, |
234 | .show = show_map | 370 | .show = show_map |
235 | }; | 371 | }; |
372 | |||
373 | struct seq_operations proc_pid_smaps_op = { | ||
374 | .start = m_start, | ||
375 | .next = m_next, | ||
376 | .stop = m_stop, | ||
377 | .show = show_smap | ||
378 | }; | ||
379 | |||
380 | #ifdef CONFIG_NUMA | ||
381 | |||
382 | struct numa_maps { | ||
383 | unsigned long pages; | ||
384 | unsigned long anon; | ||
385 | unsigned long mapped; | ||
386 | unsigned long mapcount_max; | ||
387 | unsigned long node[MAX_NUMNODES]; | ||
388 | }; | ||
389 | |||
390 | /* | ||
391 | * Calculate numa node maps for a vma | ||
392 | */ | ||
393 | static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma) | ||
394 | { | ||
395 | struct page *page; | ||
396 | unsigned long vaddr; | ||
397 | struct mm_struct *mm = vma->vm_mm; | ||
398 | int i; | ||
399 | struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL); | ||
400 | |||
401 | if (!md) | ||
402 | return NULL; | ||
403 | md->pages = 0; | ||
404 | md->anon = 0; | ||
405 | md->mapped = 0; | ||
406 | md->mapcount_max = 0; | ||
407 | for_each_node(i) | ||
408 | md->node[i] =0; | ||
409 | |||
410 | spin_lock(&mm->page_table_lock); | ||
411 | for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { | ||
412 | page = follow_page(mm, vaddr, 0); | ||
413 | if (page) { | ||
414 | int count = page_mapcount(page); | ||
415 | |||
416 | if (count) | ||
417 | md->mapped++; | ||
418 | if (count > md->mapcount_max) | ||
419 | md->mapcount_max = count; | ||
420 | md->pages++; | ||
421 | if (PageAnon(page)) | ||
422 | md->anon++; | ||
423 | md->node[page_to_nid(page)]++; | ||
424 | } | ||
425 | } | ||
426 | spin_unlock(&mm->page_table_lock); | ||
427 | return md; | ||
428 | } | ||
429 | |||
430 | static int show_numa_map(struct seq_file *m, void *v) | ||
431 | { | ||
432 | struct task_struct *task = m->private; | ||
433 | struct vm_area_struct *vma = v; | ||
434 | struct mempolicy *pol; | ||
435 | struct numa_maps *md; | ||
436 | struct zone **z; | ||
437 | int n; | ||
438 | int first; | ||
439 | |||
440 | if (!vma->vm_mm) | ||
441 | return 0; | ||
442 | |||
443 | md = get_numa_maps(vma); | ||
444 | if (!md) | ||
445 | return 0; | ||
446 | |||
447 | seq_printf(m, "%08lx", vma->vm_start); | ||
448 | pol = get_vma_policy(task, vma, vma->vm_start); | ||
449 | /* Print policy */ | ||
450 | switch (pol->policy) { | ||
451 | case MPOL_PREFERRED: | ||
452 | seq_printf(m, " prefer=%d", pol->v.preferred_node); | ||
453 | break; | ||
454 | case MPOL_BIND: | ||
455 | seq_printf(m, " bind={"); | ||
456 | first = 1; | ||
457 | for (z = pol->v.zonelist->zones; *z; z++) { | ||
458 | |||
459 | if (!first) | ||
460 | seq_putc(m, ','); | ||
461 | else | ||
462 | first = 0; | ||
463 | seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id, | ||
464 | (*z)->name); | ||
465 | } | ||
466 | seq_putc(m, '}'); | ||
467 | break; | ||
468 | case MPOL_INTERLEAVE: | ||
469 | seq_printf(m, " interleave={"); | ||
470 | first = 1; | ||
471 | for_each_node(n) { | ||
472 | if (test_bit(n, pol->v.nodes)) { | ||
473 | if (!first) | ||
474 | seq_putc(m,','); | ||
475 | else | ||
476 | first = 0; | ||
477 | seq_printf(m, "%d",n); | ||
478 | } | ||
479 | } | ||
480 | seq_putc(m, '}'); | ||
481 | break; | ||
482 | default: | ||
483 | seq_printf(m," default"); | ||
484 | break; | ||
485 | } | ||
486 | seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu", | ||
487 | md->mapcount_max, md->pages, md->mapped); | ||
488 | if (md->anon) | ||
489 | seq_printf(m," Anon=%lu",md->anon); | ||
490 | |||
491 | for_each_online_node(n) { | ||
492 | if (md->node[n]) | ||
493 | seq_printf(m, " N%d=%lu", n, md->node[n]); | ||
494 | } | ||
495 | seq_putc(m, '\n'); | ||
496 | kfree(md); | ||
497 | if (m->count < m->size) /* vma is copied successfully */ | ||
498 | m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; | ||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | struct seq_operations proc_pid_numa_maps_op = { | ||
503 | .start = m_start, | ||
504 | .next = m_next, | ||
505 | .stop = m_stop, | ||
506 | .show = show_numa_map | ||
507 | }; | ||
508 | #endif | ||
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index b79162a35478..80f32911c0cb 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -63,6 +63,7 @@ int qnx4_sync_inode(struct inode *inode) | |||
63 | static void qnx4_delete_inode(struct inode *inode) | 63 | static void qnx4_delete_inode(struct inode *inode) |
64 | { | 64 | { |
65 | QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); | 65 | QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); |
66 | truncate_inode_pages(&inode->i_data, 0); | ||
66 | inode->i_size = 0; | 67 | inode->i_size = 0; |
67 | qnx4_truncate(inode); | 68 | qnx4_truncate(inode); |
68 | lock_kernel(); | 69 | lock_kernel(); |
diff --git a/fs/read_write.c b/fs/read_write.c index 563abd09b5c8..b60324aaa2b6 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -188,7 +188,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count | |||
188 | struct inode *inode; | 188 | struct inode *inode; |
189 | loff_t pos; | 189 | loff_t pos; |
190 | 190 | ||
191 | if (unlikely(count > file->f_maxcount)) | 191 | if (unlikely(count > INT_MAX)) |
192 | goto Einval; | 192 | goto Einval; |
193 | pos = *ppos; | 193 | pos = *ppos; |
194 | if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) | 194 | if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ff291c973a56..1a8a1bf2154d 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -33,6 +33,8 @@ void reiserfs_delete_inode(struct inode *inode) | |||
33 | 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); | 33 | 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); |
34 | struct reiserfs_transaction_handle th; | 34 | struct reiserfs_transaction_handle th; |
35 | 35 | ||
36 | truncate_inode_pages(&inode->i_data, 0); | ||
37 | |||
36 | reiserfs_write_lock(inode->i_sb); | 38 | reiserfs_write_lock(inode->i_sb); |
37 | 39 | ||
38 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ | 40 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index ca7989b04be3..4b15761434bc 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -1034,7 +1034,7 @@ static int flush_commit_list(struct super_block *s, | |||
1034 | SB_ONDISK_JOURNAL_SIZE(s); | 1034 | SB_ONDISK_JOURNAL_SIZE(s); |
1035 | tbh = journal_find_get_block(s, bn); | 1035 | tbh = journal_find_get_block(s, bn); |
1036 | if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ | 1036 | if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ |
1037 | ll_rw_block(WRITE, 1, &tbh); | 1037 | ll_rw_block(SWRITE, 1, &tbh); |
1038 | put_bh(tbh); | 1038 | put_bh(tbh); |
1039 | } | 1039 | } |
1040 | atomic_dec(&journal->j_async_throttle); | 1040 | atomic_dec(&journal->j_async_throttle); |
@@ -2172,7 +2172,7 @@ static int journal_read_transaction(struct super_block *p_s_sb, | |||
2172 | /* flush out the real blocks */ | 2172 | /* flush out the real blocks */ |
2173 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2173 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
2174 | set_buffer_dirty(real_blocks[i]); | 2174 | set_buffer_dirty(real_blocks[i]); |
2175 | ll_rw_block(WRITE, 1, real_blocks + i); | 2175 | ll_rw_block(SWRITE, 1, real_blocks + i); |
2176 | } | 2176 | } |
2177 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2177 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
2178 | wait_on_buffer(real_blocks[i]); | 2178 | wait_on_buffer(real_blocks[i]); |
@@ -2868,8 +2868,7 @@ static void let_transaction_grow(struct super_block *sb, unsigned long trans_id) | |||
2868 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 2868 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
2869 | unsigned long bcount = journal->j_bcount; | 2869 | unsigned long bcount = journal->j_bcount; |
2870 | while (1) { | 2870 | while (1) { |
2871 | set_current_state(TASK_UNINTERRUPTIBLE); | 2871 | schedule_timeout_uninterruptible(1); |
2872 | schedule_timeout(1); | ||
2873 | journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; | 2872 | journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; |
2874 | while ((atomic_read(&journal->j_wcount) > 0 || | 2873 | while ((atomic_read(&journal->j_wcount) > 0 || |
2875 | atomic_read(&journal->j_jlock)) && | 2874 | atomic_read(&journal->j_jlock)) && |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 6951c35755be..44b02fc02ebe 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -1934,8 +1934,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1934 | if (SB_AP_BITMAP(s)) | 1934 | if (SB_AP_BITMAP(s)) |
1935 | brelse(SB_AP_BITMAP(s)[j].bh); | 1935 | brelse(SB_AP_BITMAP(s)[j].bh); |
1936 | } | 1936 | } |
1937 | if (SB_AP_BITMAP(s)) | 1937 | vfree(SB_AP_BITMAP(s)); |
1938 | vfree(SB_AP_BITMAP(s)); | ||
1939 | } | 1938 | } |
1940 | if (SB_BUFFER_WITH_SB(s)) | 1939 | if (SB_BUFFER_WITH_SB(s)) |
1941 | brelse(SB_BUFFER_WITH_SB(s)); | 1940 | brelse(SB_BUFFER_WITH_SB(s)); |
diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile new file mode 100644 index 000000000000..e76e182cdb38 --- /dev/null +++ b/fs/relayfs/Makefile | |||
@@ -0,0 +1,4 @@ | |||
1 | obj-$(CONFIG_RELAYFS_FS) += relayfs.o | ||
2 | |||
3 | relayfs-y := relay.o inode.o buffers.o | ||
4 | |||
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c new file mode 100644 index 000000000000..2aa8e2719999 --- /dev/null +++ b/fs/relayfs/buffers.c | |||
@@ -0,0 +1,189 @@ | |||
1 | /* | ||
2 | * RelayFS buffer management code. | ||
3 | * | ||
4 | * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp | ||
5 | * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) | ||
6 | * | ||
7 | * This file is released under the GPL. | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/vmalloc.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/relayfs_fs.h> | ||
14 | #include "relay.h" | ||
15 | #include "buffers.h" | ||
16 | |||
17 | /* | ||
18 | * close() vm_op implementation for relayfs file mapping. | ||
19 | */ | ||
20 | static void relay_file_mmap_close(struct vm_area_struct *vma) | ||
21 | { | ||
22 | struct rchan_buf *buf = vma->vm_private_data; | ||
23 | buf->chan->cb->buf_unmapped(buf, vma->vm_file); | ||
24 | } | ||
25 | |||
26 | /* | ||
27 | * nopage() vm_op implementation for relayfs file mapping. | ||
28 | */ | ||
29 | static struct page *relay_buf_nopage(struct vm_area_struct *vma, | ||
30 | unsigned long address, | ||
31 | int *type) | ||
32 | { | ||
33 | struct page *page; | ||
34 | struct rchan_buf *buf = vma->vm_private_data; | ||
35 | unsigned long offset = address - vma->vm_start; | ||
36 | |||
37 | if (address > vma->vm_end) | ||
38 | return NOPAGE_SIGBUS; /* Disallow mremap */ | ||
39 | if (!buf) | ||
40 | return NOPAGE_OOM; | ||
41 | |||
42 | page = vmalloc_to_page(buf->start + offset); | ||
43 | if (!page) | ||
44 | return NOPAGE_OOM; | ||
45 | get_page(page); | ||
46 | |||
47 | if (type) | ||
48 | *type = VM_FAULT_MINOR; | ||
49 | |||
50 | return page; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * vm_ops for relay file mappings. | ||
55 | */ | ||
56 | static struct vm_operations_struct relay_file_mmap_ops = { | ||
57 | .nopage = relay_buf_nopage, | ||
58 | .close = relay_file_mmap_close, | ||
59 | }; | ||
60 | |||
61 | /** | ||
62 | * relay_mmap_buf: - mmap channel buffer to process address space | ||
63 | * @buf: relay channel buffer | ||
64 | * @vma: vm_area_struct describing memory to be mapped | ||
65 | * | ||
66 | * Returns 0 if ok, negative on error | ||
67 | * | ||
68 | * Caller should already have grabbed mmap_sem. | ||
69 | */ | ||
70 | int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) | ||
71 | { | ||
72 | unsigned long length = vma->vm_end - vma->vm_start; | ||
73 | struct file *filp = vma->vm_file; | ||
74 | |||
75 | if (!buf) | ||
76 | return -EBADF; | ||
77 | |||
78 | if (length != (unsigned long)buf->chan->alloc_size) | ||
79 | return -EINVAL; | ||
80 | |||
81 | vma->vm_ops = &relay_file_mmap_ops; | ||
82 | vma->vm_private_data = buf; | ||
83 | buf->chan->cb->buf_mapped(buf, filp); | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | /** | ||
89 | * relay_alloc_buf - allocate a channel buffer | ||
90 | * @buf: the buffer struct | ||
91 | * @size: total size of the buffer | ||
92 | * | ||
93 | * Returns a pointer to the resulting buffer, NULL if unsuccessful | ||
94 | */ | ||
95 | static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size) | ||
96 | { | ||
97 | void *mem; | ||
98 | unsigned int i, j, n_pages; | ||
99 | |||
100 | size = PAGE_ALIGN(size); | ||
101 | n_pages = size >> PAGE_SHIFT; | ||
102 | |||
103 | buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL); | ||
104 | if (!buf->page_array) | ||
105 | return NULL; | ||
106 | |||
107 | for (i = 0; i < n_pages; i++) { | ||
108 | buf->page_array[i] = alloc_page(GFP_KERNEL); | ||
109 | if (unlikely(!buf->page_array[i])) | ||
110 | goto depopulate; | ||
111 | } | ||
112 | mem = vmap(buf->page_array, n_pages, GFP_KERNEL, PAGE_KERNEL); | ||
113 | if (!mem) | ||
114 | goto depopulate; | ||
115 | |||
116 | memset(mem, 0, size); | ||
117 | buf->page_count = n_pages; | ||
118 | return mem; | ||
119 | |||
120 | depopulate: | ||
121 | for (j = 0; j < i; j++) | ||
122 | __free_page(buf->page_array[j]); | ||
123 | kfree(buf->page_array); | ||
124 | return NULL; | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * relay_create_buf - allocate and initialize a channel buffer | ||
129 | * @alloc_size: size of the buffer to allocate | ||
130 | * @n_subbufs: number of sub-buffers in the channel | ||
131 | * | ||
132 | * Returns channel buffer if successful, NULL otherwise | ||
133 | */ | ||
134 | struct rchan_buf *relay_create_buf(struct rchan *chan) | ||
135 | { | ||
136 | struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL); | ||
137 | if (!buf) | ||
138 | return NULL; | ||
139 | |||
140 | buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL); | ||
141 | if (!buf->padding) | ||
142 | goto free_buf; | ||
143 | |||
144 | buf->start = relay_alloc_buf(buf, chan->alloc_size); | ||
145 | if (!buf->start) | ||
146 | goto free_buf; | ||
147 | |||
148 | buf->chan = chan; | ||
149 | kref_get(&buf->chan->kref); | ||
150 | return buf; | ||
151 | |||
152 | free_buf: | ||
153 | kfree(buf->padding); | ||
154 | kfree(buf); | ||
155 | return NULL; | ||
156 | } | ||
157 | |||
158 | /** | ||
159 | * relay_destroy_buf - destroy an rchan_buf struct and associated buffer | ||
160 | * @buf: the buffer struct | ||
161 | */ | ||
162 | void relay_destroy_buf(struct rchan_buf *buf) | ||
163 | { | ||
164 | struct rchan *chan = buf->chan; | ||
165 | unsigned int i; | ||
166 | |||
167 | if (likely(buf->start)) { | ||
168 | vunmap(buf->start); | ||
169 | for (i = 0; i < buf->page_count; i++) | ||
170 | __free_page(buf->page_array[i]); | ||
171 | kfree(buf->page_array); | ||
172 | } | ||
173 | kfree(buf->padding); | ||
174 | kfree(buf); | ||
175 | kref_put(&chan->kref, relay_destroy_channel); | ||
176 | } | ||
177 | |||
178 | /** | ||
179 | * relay_remove_buf - remove a channel buffer | ||
180 | * | ||
181 | * Removes the file from the relayfs fileystem, which also frees the | ||
182 | * rchan_buf_struct and the channel buffer. Should only be called from | ||
183 | * kref_put(). | ||
184 | */ | ||
185 | void relay_remove_buf(struct kref *kref) | ||
186 | { | ||
187 | struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); | ||
188 | relayfs_remove(buf->dentry); | ||
189 | } | ||
diff --git a/fs/relayfs/buffers.h b/fs/relayfs/buffers.h new file mode 100644 index 000000000000..37a12493f641 --- /dev/null +++ b/fs/relayfs/buffers.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef _BUFFERS_H | ||
2 | #define _BUFFERS_H | ||
3 | |||
4 | /* This inspired by rtai/shmem */ | ||
5 | #define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE | ||
6 | |||
7 | extern int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma); | ||
8 | extern struct rchan_buf *relay_create_buf(struct rchan *chan); | ||
9 | extern void relay_destroy_buf(struct rchan_buf *buf); | ||
10 | extern void relay_remove_buf(struct kref *kref); | ||
11 | |||
12 | #endif/* _BUFFERS_H */ | ||
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c new file mode 100644 index 000000000000..0f7f88d067ad --- /dev/null +++ b/fs/relayfs/inode.c | |||
@@ -0,0 +1,609 @@ | |||
1 | /* | ||
2 | * VFS-related code for RelayFS, a high-speed data relay filesystem. | ||
3 | * | ||
4 | * Copyright (C) 2003-2005 - Tom Zanussi <zanussi@us.ibm.com>, IBM Corp | ||
5 | * Copyright (C) 2003-2005 - Karim Yaghmour <karim@opersys.com> | ||
6 | * | ||
7 | * Based on ramfs, Copyright (C) 2002 - Linus Torvalds | ||
8 | * | ||
9 | * This file is released under the GPL. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/fs.h> | ||
14 | #include <linux/mount.h> | ||
15 | #include <linux/pagemap.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/backing-dev.h> | ||
19 | #include <linux/namei.h> | ||
20 | #include <linux/poll.h> | ||
21 | #include <linux/relayfs_fs.h> | ||
22 | #include "relay.h" | ||
23 | #include "buffers.h" | ||
24 | |||
25 | #define RELAYFS_MAGIC 0xF0B4A981 | ||
26 | |||
27 | static struct vfsmount * relayfs_mount; | ||
28 | static int relayfs_mount_count; | ||
29 | static kmem_cache_t * relayfs_inode_cachep; | ||
30 | |||
31 | static struct backing_dev_info relayfs_backing_dev_info = { | ||
32 | .ra_pages = 0, /* No readahead */ | ||
33 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, | ||
34 | }; | ||
35 | |||
36 | static struct inode *relayfs_get_inode(struct super_block *sb, int mode, | ||
37 | struct rchan *chan) | ||
38 | { | ||
39 | struct rchan_buf *buf = NULL; | ||
40 | struct inode *inode; | ||
41 | |||
42 | if (S_ISREG(mode)) { | ||
43 | BUG_ON(!chan); | ||
44 | buf = relay_create_buf(chan); | ||
45 | if (!buf) | ||
46 | return NULL; | ||
47 | } | ||
48 | |||
49 | inode = new_inode(sb); | ||
50 | if (!inode) { | ||
51 | relay_destroy_buf(buf); | ||
52 | return NULL; | ||
53 | } | ||
54 | |||
55 | inode->i_mode = mode; | ||
56 | inode->i_uid = 0; | ||
57 | inode->i_gid = 0; | ||
58 | inode->i_blksize = PAGE_CACHE_SIZE; | ||
59 | inode->i_blocks = 0; | ||
60 | inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info; | ||
61 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
62 | switch (mode & S_IFMT) { | ||
63 | case S_IFREG: | ||
64 | inode->i_fop = &relayfs_file_operations; | ||
65 | RELAYFS_I(inode)->buf = buf; | ||
66 | break; | ||
67 | case S_IFDIR: | ||
68 | inode->i_op = &simple_dir_inode_operations; | ||
69 | inode->i_fop = &simple_dir_operations; | ||
70 | |||
71 | /* directory inodes start off with i_nlink == 2 (for "." entry) */ | ||
72 | inode->i_nlink++; | ||
73 | break; | ||
74 | default: | ||
75 | break; | ||
76 | } | ||
77 | |||
78 | return inode; | ||
79 | } | ||
80 | |||
81 | /** | ||
82 | * relayfs_create_entry - create a relayfs directory or file | ||
83 | * @name: the name of the file to create | ||
84 | * @parent: parent directory | ||
85 | * @mode: mode | ||
86 | * @chan: relay channel associated with the file | ||
87 | * | ||
88 | * Returns the new dentry, NULL on failure | ||
89 | * | ||
90 | * Creates a file or directory with the specifed permissions. | ||
91 | */ | ||
92 | static struct dentry *relayfs_create_entry(const char *name, | ||
93 | struct dentry *parent, | ||
94 | int mode, | ||
95 | struct rchan *chan) | ||
96 | { | ||
97 | struct dentry *d; | ||
98 | struct inode *inode; | ||
99 | int error = 0; | ||
100 | |||
101 | BUG_ON(!name || !(S_ISREG(mode) || S_ISDIR(mode))); | ||
102 | |||
103 | error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count); | ||
104 | if (error) { | ||
105 | printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error); | ||
106 | return NULL; | ||
107 | } | ||
108 | |||
109 | if (!parent && relayfs_mount && relayfs_mount->mnt_sb) | ||
110 | parent = relayfs_mount->mnt_sb->s_root; | ||
111 | |||
112 | if (!parent) { | ||
113 | simple_release_fs(&relayfs_mount, &relayfs_mount_count); | ||
114 | return NULL; | ||
115 | } | ||
116 | |||
117 | parent = dget(parent); | ||
118 | down(&parent->d_inode->i_sem); | ||
119 | d = lookup_one_len(name, parent, strlen(name)); | ||
120 | if (IS_ERR(d)) { | ||
121 | d = NULL; | ||
122 | goto release_mount; | ||
123 | } | ||
124 | |||
125 | if (d->d_inode) { | ||
126 | d = NULL; | ||
127 | goto release_mount; | ||
128 | } | ||
129 | |||
130 | inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan); | ||
131 | if (!inode) { | ||
132 | d = NULL; | ||
133 | goto release_mount; | ||
134 | } | ||
135 | |||
136 | d_instantiate(d, inode); | ||
137 | dget(d); /* Extra count - pin the dentry in core */ | ||
138 | |||
139 | if (S_ISDIR(mode)) | ||
140 | parent->d_inode->i_nlink++; | ||
141 | |||
142 | goto exit; | ||
143 | |||
144 | release_mount: | ||
145 | simple_release_fs(&relayfs_mount, &relayfs_mount_count); | ||
146 | |||
147 | exit: | ||
148 | up(&parent->d_inode->i_sem); | ||
149 | dput(parent); | ||
150 | return d; | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * relayfs_create_file - create a file in the relay filesystem | ||
155 | * @name: the name of the file to create | ||
156 | * @parent: parent directory | ||
157 | * @mode: mode, if not specied the default perms are used | ||
158 | * @chan: channel associated with the file | ||
159 | * | ||
160 | * Returns file dentry if successful, NULL otherwise. | ||
161 | * | ||
162 | * The file will be created user r on behalf of current user. | ||
163 | */ | ||
164 | struct dentry *relayfs_create_file(const char *name, struct dentry *parent, | ||
165 | int mode, struct rchan *chan) | ||
166 | { | ||
167 | if (!mode) | ||
168 | mode = S_IRUSR; | ||
169 | mode = (mode & S_IALLUGO) | S_IFREG; | ||
170 | |||
171 | return relayfs_create_entry(name, parent, mode, chan); | ||
172 | } | ||
173 | |||
174 | /** | ||
175 | * relayfs_create_dir - create a directory in the relay filesystem | ||
176 | * @name: the name of the directory to create | ||
177 | * @parent: parent directory, NULL if parent should be fs root | ||
178 | * | ||
179 | * Returns directory dentry if successful, NULL otherwise. | ||
180 | * | ||
181 | * The directory will be created world rwx on behalf of current user. | ||
182 | */ | ||
183 | struct dentry *relayfs_create_dir(const char *name, struct dentry *parent) | ||
184 | { | ||
185 | int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; | ||
186 | return relayfs_create_entry(name, parent, mode, NULL); | ||
187 | } | ||
188 | |||
189 | /** | ||
190 | * relayfs_remove - remove a file or directory in the relay filesystem | ||
191 | * @dentry: file or directory dentry | ||
192 | * | ||
193 | * Returns 0 if successful, negative otherwise. | ||
194 | */ | ||
195 | int relayfs_remove(struct dentry *dentry) | ||
196 | { | ||
197 | struct dentry *parent; | ||
198 | int error = 0; | ||
199 | |||
200 | if (!dentry) | ||
201 | return -EINVAL; | ||
202 | parent = dentry->d_parent; | ||
203 | if (!parent) | ||
204 | return -EINVAL; | ||
205 | |||
206 | parent = dget(parent); | ||
207 | down(&parent->d_inode->i_sem); | ||
208 | if (dentry->d_inode) { | ||
209 | if (S_ISDIR(dentry->d_inode->i_mode)) | ||
210 | error = simple_rmdir(parent->d_inode, dentry); | ||
211 | else | ||
212 | error = simple_unlink(parent->d_inode, dentry); | ||
213 | if (!error) | ||
214 | d_delete(dentry); | ||
215 | } | ||
216 | if (!error) | ||
217 | dput(dentry); | ||
218 | up(&parent->d_inode->i_sem); | ||
219 | dput(parent); | ||
220 | |||
221 | if (!error) | ||
222 | simple_release_fs(&relayfs_mount, &relayfs_mount_count); | ||
223 | |||
224 | return error; | ||
225 | } | ||
226 | |||
227 | /** | ||
228 | * relayfs_remove_dir - remove a directory in the relay filesystem | ||
229 | * @dentry: directory dentry | ||
230 | * | ||
231 | * Returns 0 if successful, negative otherwise. | ||
232 | */ | ||
233 | int relayfs_remove_dir(struct dentry *dentry) | ||
234 | { | ||
235 | return relayfs_remove(dentry); | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * relayfs_open - open file op for relayfs files | ||
240 | * @inode: the inode | ||
241 | * @filp: the file | ||
242 | * | ||
243 | * Increments the channel buffer refcount. | ||
244 | */ | ||
245 | static int relayfs_open(struct inode *inode, struct file *filp) | ||
246 | { | ||
247 | struct rchan_buf *buf = RELAYFS_I(inode)->buf; | ||
248 | kref_get(&buf->kref); | ||
249 | |||
250 | return 0; | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * relayfs_mmap - mmap file op for relayfs files | ||
255 | * @filp: the file | ||
256 | * @vma: the vma describing what to map | ||
257 | * | ||
258 | * Calls upon relay_mmap_buf to map the file into user space. | ||
259 | */ | ||
260 | static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma) | ||
261 | { | ||
262 | struct inode *inode = filp->f_dentry->d_inode; | ||
263 | return relay_mmap_buf(RELAYFS_I(inode)->buf, vma); | ||
264 | } | ||
265 | |||
266 | /** | ||
267 | * relayfs_poll - poll file op for relayfs files | ||
268 | * @filp: the file | ||
269 | * @wait: poll table | ||
270 | * | ||
271 | * Poll implemention. | ||
272 | */ | ||
273 | static unsigned int relayfs_poll(struct file *filp, poll_table *wait) | ||
274 | { | ||
275 | unsigned int mask = 0; | ||
276 | struct inode *inode = filp->f_dentry->d_inode; | ||
277 | struct rchan_buf *buf = RELAYFS_I(inode)->buf; | ||
278 | |||
279 | if (buf->finalized) | ||
280 | return POLLERR; | ||
281 | |||
282 | if (filp->f_mode & FMODE_READ) { | ||
283 | poll_wait(filp, &buf->read_wait, wait); | ||
284 | if (!relay_buf_empty(buf)) | ||
285 | mask |= POLLIN | POLLRDNORM; | ||
286 | } | ||
287 | |||
288 | return mask; | ||
289 | } | ||
290 | |||
291 | /** | ||
292 | * relayfs_release - release file op for relayfs files | ||
293 | * @inode: the inode | ||
294 | * @filp: the file | ||
295 | * | ||
296 | * Decrements the channel refcount, as the filesystem is | ||
297 | * no longer using it. | ||
298 | */ | ||
299 | static int relayfs_release(struct inode *inode, struct file *filp) | ||
300 | { | ||
301 | struct rchan_buf *buf = RELAYFS_I(inode)->buf; | ||
302 | kref_put(&buf->kref, relay_remove_buf); | ||
303 | |||
304 | return 0; | ||
305 | } | ||
306 | |||
307 | /** | ||
308 | * relayfs_read_consume - update the consumed count for the buffer | ||
309 | */ | ||
310 | static void relayfs_read_consume(struct rchan_buf *buf, | ||
311 | size_t read_pos, | ||
312 | size_t bytes_consumed) | ||
313 | { | ||
314 | size_t subbuf_size = buf->chan->subbuf_size; | ||
315 | size_t n_subbufs = buf->chan->n_subbufs; | ||
316 | size_t read_subbuf; | ||
317 | |||
318 | if (buf->bytes_consumed + bytes_consumed > subbuf_size) { | ||
319 | relay_subbufs_consumed(buf->chan, buf->cpu, 1); | ||
320 | buf->bytes_consumed = 0; | ||
321 | } | ||
322 | |||
323 | buf->bytes_consumed += bytes_consumed; | ||
324 | read_subbuf = read_pos / buf->chan->subbuf_size; | ||
325 | if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) { | ||
326 | if ((read_subbuf == buf->subbufs_produced % n_subbufs) && | ||
327 | (buf->offset == subbuf_size)) | ||
328 | return; | ||
329 | relay_subbufs_consumed(buf->chan, buf->cpu, 1); | ||
330 | buf->bytes_consumed = 0; | ||
331 | } | ||
332 | } | ||
333 | |||
334 | /** | ||
335 | * relayfs_read_avail - boolean, are there unconsumed bytes available? | ||
336 | */ | ||
337 | static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) | ||
338 | { | ||
339 | size_t bytes_produced, bytes_consumed, write_offset; | ||
340 | size_t subbuf_size = buf->chan->subbuf_size; | ||
341 | size_t n_subbufs = buf->chan->n_subbufs; | ||
342 | size_t produced = buf->subbufs_produced % n_subbufs; | ||
343 | size_t consumed = buf->subbufs_consumed % n_subbufs; | ||
344 | |||
345 | write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset; | ||
346 | |||
347 | if (consumed > produced) { | ||
348 | if ((produced > n_subbufs) && | ||
349 | (produced + n_subbufs - consumed <= n_subbufs)) | ||
350 | produced += n_subbufs; | ||
351 | } else if (consumed == produced) { | ||
352 | if (buf->offset > subbuf_size) { | ||
353 | produced += n_subbufs; | ||
354 | if (buf->subbufs_produced == buf->subbufs_consumed) | ||
355 | consumed += n_subbufs; | ||
356 | } | ||
357 | } | ||
358 | |||
359 | if (buf->offset > subbuf_size) | ||
360 | bytes_produced = (produced - 1) * subbuf_size + write_offset; | ||
361 | else | ||
362 | bytes_produced = produced * subbuf_size + write_offset; | ||
363 | bytes_consumed = consumed * subbuf_size + buf->bytes_consumed; | ||
364 | |||
365 | if (bytes_produced == bytes_consumed) | ||
366 | return 0; | ||
367 | |||
368 | relayfs_read_consume(buf, read_pos, 0); | ||
369 | |||
370 | return 1; | ||
371 | } | ||
372 | |||
373 | /** | ||
374 | * relayfs_read_subbuf_avail - return bytes available in sub-buffer | ||
375 | */ | ||
376 | static size_t relayfs_read_subbuf_avail(size_t read_pos, | ||
377 | struct rchan_buf *buf) | ||
378 | { | ||
379 | size_t padding, avail = 0; | ||
380 | size_t read_subbuf, read_offset, write_subbuf, write_offset; | ||
381 | size_t subbuf_size = buf->chan->subbuf_size; | ||
382 | |||
383 | write_subbuf = (buf->data - buf->start) / subbuf_size; | ||
384 | write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset; | ||
385 | read_subbuf = read_pos / subbuf_size; | ||
386 | read_offset = read_pos % subbuf_size; | ||
387 | padding = buf->padding[read_subbuf]; | ||
388 | |||
389 | if (read_subbuf == write_subbuf) { | ||
390 | if (read_offset + padding < write_offset) | ||
391 | avail = write_offset - (read_offset + padding); | ||
392 | } else | ||
393 | avail = (subbuf_size - padding) - read_offset; | ||
394 | |||
395 | return avail; | ||
396 | } | ||
397 | |||
398 | /** | ||
399 | * relayfs_read_start_pos - find the first available byte to read | ||
400 | * | ||
401 | * If the read_pos is in the middle of padding, return the | ||
402 | * position of the first actually available byte, otherwise | ||
403 | * return the original value. | ||
404 | */ | ||
405 | static size_t relayfs_read_start_pos(size_t read_pos, | ||
406 | struct rchan_buf *buf) | ||
407 | { | ||
408 | size_t read_subbuf, padding, padding_start, padding_end; | ||
409 | size_t subbuf_size = buf->chan->subbuf_size; | ||
410 | size_t n_subbufs = buf->chan->n_subbufs; | ||
411 | |||
412 | read_subbuf = read_pos / subbuf_size; | ||
413 | padding = buf->padding[read_subbuf]; | ||
414 | padding_start = (read_subbuf + 1) * subbuf_size - padding; | ||
415 | padding_end = (read_subbuf + 1) * subbuf_size; | ||
416 | if (read_pos >= padding_start && read_pos < padding_end) { | ||
417 | read_subbuf = (read_subbuf + 1) % n_subbufs; | ||
418 | read_pos = read_subbuf * subbuf_size; | ||
419 | } | ||
420 | |||
421 | return read_pos; | ||
422 | } | ||
423 | |||
424 | /** | ||
425 | * relayfs_read_end_pos - return the new read position | ||
426 | */ | ||
427 | static size_t relayfs_read_end_pos(struct rchan_buf *buf, | ||
428 | size_t read_pos, | ||
429 | size_t count) | ||
430 | { | ||
431 | size_t read_subbuf, padding, end_pos; | ||
432 | size_t subbuf_size = buf->chan->subbuf_size; | ||
433 | size_t n_subbufs = buf->chan->n_subbufs; | ||
434 | |||
435 | read_subbuf = read_pos / subbuf_size; | ||
436 | padding = buf->padding[read_subbuf]; | ||
437 | if (read_pos % subbuf_size + count + padding == subbuf_size) | ||
438 | end_pos = (read_subbuf + 1) * subbuf_size; | ||
439 | else | ||
440 | end_pos = read_pos + count; | ||
441 | if (end_pos >= subbuf_size * n_subbufs) | ||
442 | end_pos = 0; | ||
443 | |||
444 | return end_pos; | ||
445 | } | ||
446 | |||
447 | /** | ||
448 | * relayfs_read - read file op for relayfs files | ||
449 | * @filp: the file | ||
450 | * @buffer: the userspace buffer | ||
451 | * @count: number of bytes to read | ||
452 | * @ppos: position to read from | ||
453 | * | ||
454 | * Reads count bytes or the number of bytes available in the | ||
455 | * current sub-buffer being read, whichever is smaller. | ||
456 | */ | ||
457 | static ssize_t relayfs_read(struct file *filp, | ||
458 | char __user *buffer, | ||
459 | size_t count, | ||
460 | loff_t *ppos) | ||
461 | { | ||
462 | struct inode *inode = filp->f_dentry->d_inode; | ||
463 | struct rchan_buf *buf = RELAYFS_I(inode)->buf; | ||
464 | size_t read_start, avail; | ||
465 | ssize_t ret = 0; | ||
466 | void *from; | ||
467 | |||
468 | down(&inode->i_sem); | ||
469 | if(!relayfs_read_avail(buf, *ppos)) | ||
470 | goto out; | ||
471 | |||
472 | read_start = relayfs_read_start_pos(*ppos, buf); | ||
473 | avail = relayfs_read_subbuf_avail(read_start, buf); | ||
474 | if (!avail) | ||
475 | goto out; | ||
476 | |||
477 | from = buf->start + read_start; | ||
478 | ret = count = min(count, avail); | ||
479 | if (copy_to_user(buffer, from, count)) { | ||
480 | ret = -EFAULT; | ||
481 | goto out; | ||
482 | } | ||
483 | relayfs_read_consume(buf, read_start, count); | ||
484 | *ppos = relayfs_read_end_pos(buf, read_start, count); | ||
485 | out: | ||
486 | up(&inode->i_sem); | ||
487 | return ret; | ||
488 | } | ||
489 | |||
490 | /** | ||
491 | * relayfs alloc_inode() implementation | ||
492 | */ | ||
493 | static struct inode *relayfs_alloc_inode(struct super_block *sb) | ||
494 | { | ||
495 | struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL); | ||
496 | if (!p) | ||
497 | return NULL; | ||
498 | p->buf = NULL; | ||
499 | |||
500 | return &p->vfs_inode; | ||
501 | } | ||
502 | |||
503 | /** | ||
504 | * relayfs destroy_inode() implementation | ||
505 | */ | ||
506 | static void relayfs_destroy_inode(struct inode *inode) | ||
507 | { | ||
508 | if (RELAYFS_I(inode)->buf) | ||
509 | relay_destroy_buf(RELAYFS_I(inode)->buf); | ||
510 | |||
511 | kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode)); | ||
512 | } | ||
513 | |||
514 | static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags) | ||
515 | { | ||
516 | struct relayfs_inode_info *i = p; | ||
517 | if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) | ||
518 | inode_init_once(&i->vfs_inode); | ||
519 | } | ||
520 | |||
521 | struct file_operations relayfs_file_operations = { | ||
522 | .open = relayfs_open, | ||
523 | .poll = relayfs_poll, | ||
524 | .mmap = relayfs_mmap, | ||
525 | .read = relayfs_read, | ||
526 | .llseek = no_llseek, | ||
527 | .release = relayfs_release, | ||
528 | }; | ||
529 | |||
530 | static struct super_operations relayfs_ops = { | ||
531 | .statfs = simple_statfs, | ||
532 | .drop_inode = generic_delete_inode, | ||
533 | .alloc_inode = relayfs_alloc_inode, | ||
534 | .destroy_inode = relayfs_destroy_inode, | ||
535 | }; | ||
536 | |||
537 | static int relayfs_fill_super(struct super_block * sb, void * data, int silent) | ||
538 | { | ||
539 | struct inode *inode; | ||
540 | struct dentry *root; | ||
541 | int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; | ||
542 | |||
543 | sb->s_blocksize = PAGE_CACHE_SIZE; | ||
544 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | ||
545 | sb->s_magic = RELAYFS_MAGIC; | ||
546 | sb->s_op = &relayfs_ops; | ||
547 | inode = relayfs_get_inode(sb, mode, NULL); | ||
548 | |||
549 | if (!inode) | ||
550 | return -ENOMEM; | ||
551 | |||
552 | root = d_alloc_root(inode); | ||
553 | if (!root) { | ||
554 | iput(inode); | ||
555 | return -ENOMEM; | ||
556 | } | ||
557 | sb->s_root = root; | ||
558 | |||
559 | return 0; | ||
560 | } | ||
561 | |||
562 | static struct super_block * relayfs_get_sb(struct file_system_type *fs_type, | ||
563 | int flags, const char *dev_name, | ||
564 | void *data) | ||
565 | { | ||
566 | return get_sb_single(fs_type, flags, data, relayfs_fill_super); | ||
567 | } | ||
568 | |||
569 | static struct file_system_type relayfs_fs_type = { | ||
570 | .owner = THIS_MODULE, | ||
571 | .name = "relayfs", | ||
572 | .get_sb = relayfs_get_sb, | ||
573 | .kill_sb = kill_litter_super, | ||
574 | }; | ||
575 | |||
576 | static int __init init_relayfs_fs(void) | ||
577 | { | ||
578 | int err; | ||
579 | |||
580 | relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache", | ||
581 | sizeof(struct relayfs_inode_info), 0, | ||
582 | 0, init_once, NULL); | ||
583 | if (!relayfs_inode_cachep) | ||
584 | return -ENOMEM; | ||
585 | |||
586 | err = register_filesystem(&relayfs_fs_type); | ||
587 | if (err) | ||
588 | kmem_cache_destroy(relayfs_inode_cachep); | ||
589 | |||
590 | return err; | ||
591 | } | ||
592 | |||
593 | static void __exit exit_relayfs_fs(void) | ||
594 | { | ||
595 | unregister_filesystem(&relayfs_fs_type); | ||
596 | kmem_cache_destroy(relayfs_inode_cachep); | ||
597 | } | ||
598 | |||
599 | module_init(init_relayfs_fs) | ||
600 | module_exit(exit_relayfs_fs) | ||
601 | |||
602 | EXPORT_SYMBOL_GPL(relayfs_file_operations); | ||
603 | EXPORT_SYMBOL_GPL(relayfs_create_dir); | ||
604 | EXPORT_SYMBOL_GPL(relayfs_remove_dir); | ||
605 | |||
606 | MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>"); | ||
607 | MODULE_DESCRIPTION("Relay Filesystem"); | ||
608 | MODULE_LICENSE("GPL"); | ||
609 | |||
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c new file mode 100644 index 000000000000..16446a15c96d --- /dev/null +++ b/fs/relayfs/relay.c | |||
@@ -0,0 +1,431 @@ | |||
1 | /* | ||
2 | * Public API and common code for RelayFS. | ||
3 | * | ||
4 | * See Documentation/filesystems/relayfs.txt for an overview of relayfs. | ||
5 | * | ||
6 | * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp | ||
7 | * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) | ||
8 | * | ||
9 | * This file is released under the GPL. | ||
10 | */ | ||
11 | |||
12 | #include <linux/errno.h> | ||
13 | #include <linux/stddef.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/string.h> | ||
17 | #include <linux/relayfs_fs.h> | ||
18 | #include "relay.h" | ||
19 | #include "buffers.h" | ||
20 | |||
21 | /** | ||
22 | * relay_buf_empty - boolean, is the channel buffer empty? | ||
23 | * @buf: channel buffer | ||
24 | * | ||
25 | * Returns 1 if the buffer is empty, 0 otherwise. | ||
26 | */ | ||
27 | int relay_buf_empty(struct rchan_buf *buf) | ||
28 | { | ||
29 | return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; | ||
30 | } | ||
31 | |||
32 | /** | ||
33 | * relay_buf_full - boolean, is the channel buffer full? | ||
34 | * @buf: channel buffer | ||
35 | * | ||
36 | * Returns 1 if the buffer is full, 0 otherwise. | ||
37 | */ | ||
38 | int relay_buf_full(struct rchan_buf *buf) | ||
39 | { | ||
40 | size_t ready = buf->subbufs_produced - buf->subbufs_consumed; | ||
41 | return (ready >= buf->chan->n_subbufs) ? 1 : 0; | ||
42 | } | ||
43 | |||
44 | /* | ||
45 | * High-level relayfs kernel API and associated functions. | ||
46 | */ | ||
47 | |||
48 | /* | ||
49 | * rchan_callback implementations defining default channel behavior. Used | ||
50 | * in place of corresponding NULL values in client callback struct. | ||
51 | */ | ||
52 | |||
53 | /* | ||
54 | * subbuf_start() default callback. Does nothing. | ||
55 | */ | ||
56 | static int subbuf_start_default_callback (struct rchan_buf *buf, | ||
57 | void *subbuf, | ||
58 | void *prev_subbuf, | ||
59 | size_t prev_padding) | ||
60 | { | ||
61 | if (relay_buf_full(buf)) | ||
62 | return 0; | ||
63 | |||
64 | return 1; | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * buf_mapped() default callback. Does nothing. | ||
69 | */ | ||
70 | static void buf_mapped_default_callback(struct rchan_buf *buf, | ||
71 | struct file *filp) | ||
72 | { | ||
73 | } | ||
74 | |||
75 | /* | ||
76 | * buf_unmapped() default callback. Does nothing. | ||
77 | */ | ||
78 | static void buf_unmapped_default_callback(struct rchan_buf *buf, | ||
79 | struct file *filp) | ||
80 | { | ||
81 | } | ||
82 | |||
83 | /* relay channel default callbacks */ | ||
84 | static struct rchan_callbacks default_channel_callbacks = { | ||
85 | .subbuf_start = subbuf_start_default_callback, | ||
86 | .buf_mapped = buf_mapped_default_callback, | ||
87 | .buf_unmapped = buf_unmapped_default_callback, | ||
88 | }; | ||
89 | |||
90 | /** | ||
91 | * wakeup_readers - wake up readers waiting on a channel | ||
92 | * @private: the channel buffer | ||
93 | * | ||
94 | * This is the work function used to defer reader waking. The | ||
95 | * reason waking is deferred is that calling directly from write | ||
96 | * causes problems if you're writing from say the scheduler. | ||
97 | */ | ||
98 | static void wakeup_readers(void *private) | ||
99 | { | ||
100 | struct rchan_buf *buf = private; | ||
101 | wake_up_interruptible(&buf->read_wait); | ||
102 | } | ||
103 | |||
104 | /** | ||
105 | * __relay_reset - reset a channel buffer | ||
106 | * @buf: the channel buffer | ||
107 | * @init: 1 if this is a first-time initialization | ||
108 | * | ||
109 | * See relay_reset for description of effect. | ||
110 | */ | ||
111 | static inline void __relay_reset(struct rchan_buf *buf, unsigned int init) | ||
112 | { | ||
113 | size_t i; | ||
114 | |||
115 | if (init) { | ||
116 | init_waitqueue_head(&buf->read_wait); | ||
117 | kref_init(&buf->kref); | ||
118 | INIT_WORK(&buf->wake_readers, NULL, NULL); | ||
119 | } else { | ||
120 | cancel_delayed_work(&buf->wake_readers); | ||
121 | flush_scheduled_work(); | ||
122 | } | ||
123 | |||
124 | buf->subbufs_produced = 0; | ||
125 | buf->subbufs_consumed = 0; | ||
126 | buf->bytes_consumed = 0; | ||
127 | buf->finalized = 0; | ||
128 | buf->data = buf->start; | ||
129 | buf->offset = 0; | ||
130 | |||
131 | for (i = 0; i < buf->chan->n_subbufs; i++) | ||
132 | buf->padding[i] = 0; | ||
133 | |||
134 | buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0); | ||
135 | } | ||
136 | |||
137 | /** | ||
138 | * relay_reset - reset the channel | ||
139 | * @chan: the channel | ||
140 | * | ||
141 | * This has the effect of erasing all data from all channel buffers | ||
142 | * and restarting the channel in its initial state. The buffers | ||
143 | * are not freed, so any mappings are still in effect. | ||
144 | * | ||
145 | * NOTE: Care should be taken that the channel isn't actually | ||
146 | * being used by anything when this call is made. | ||
147 | */ | ||
148 | void relay_reset(struct rchan *chan) | ||
149 | { | ||
150 | unsigned int i; | ||
151 | |||
152 | if (!chan) | ||
153 | return; | ||
154 | |||
155 | for (i = 0; i < NR_CPUS; i++) { | ||
156 | if (!chan->buf[i]) | ||
157 | continue; | ||
158 | __relay_reset(chan->buf[i], 0); | ||
159 | } | ||
160 | } | ||
161 | |||
162 | /** | ||
163 | * relay_open_buf - create a new channel buffer in relayfs | ||
164 | * | ||
165 | * Internal - used by relay_open(). | ||
166 | */ | ||
167 | static struct rchan_buf *relay_open_buf(struct rchan *chan, | ||
168 | const char *filename, | ||
169 | struct dentry *parent) | ||
170 | { | ||
171 | struct rchan_buf *buf; | ||
172 | struct dentry *dentry; | ||
173 | |||
174 | /* Create file in fs */ | ||
175 | dentry = relayfs_create_file(filename, parent, S_IRUSR, chan); | ||
176 | if (!dentry) | ||
177 | return NULL; | ||
178 | |||
179 | buf = RELAYFS_I(dentry->d_inode)->buf; | ||
180 | buf->dentry = dentry; | ||
181 | __relay_reset(buf, 1); | ||
182 | |||
183 | return buf; | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * relay_close_buf - close a channel buffer | ||
188 | * @buf: channel buffer | ||
189 | * | ||
190 | * Marks the buffer finalized and restores the default callbacks. | ||
191 | * The channel buffer and channel buffer data structure are then freed | ||
192 | * automatically when the last reference is given up. | ||
193 | */ | ||
194 | static inline void relay_close_buf(struct rchan_buf *buf) | ||
195 | { | ||
196 | buf->finalized = 1; | ||
197 | buf->chan->cb = &default_channel_callbacks; | ||
198 | cancel_delayed_work(&buf->wake_readers); | ||
199 | flush_scheduled_work(); | ||
200 | kref_put(&buf->kref, relay_remove_buf); | ||
201 | } | ||
202 | |||
203 | static inline void setup_callbacks(struct rchan *chan, | ||
204 | struct rchan_callbacks *cb) | ||
205 | { | ||
206 | if (!cb) { | ||
207 | chan->cb = &default_channel_callbacks; | ||
208 | return; | ||
209 | } | ||
210 | |||
211 | if (!cb->subbuf_start) | ||
212 | cb->subbuf_start = subbuf_start_default_callback; | ||
213 | if (!cb->buf_mapped) | ||
214 | cb->buf_mapped = buf_mapped_default_callback; | ||
215 | if (!cb->buf_unmapped) | ||
216 | cb->buf_unmapped = buf_unmapped_default_callback; | ||
217 | chan->cb = cb; | ||
218 | } | ||
219 | |||
220 | /** | ||
221 | * relay_open - create a new relayfs channel | ||
222 | * @base_filename: base name of files to create | ||
223 | * @parent: dentry of parent directory, NULL for root directory | ||
224 | * @subbuf_size: size of sub-buffers | ||
225 | * @n_subbufs: number of sub-buffers | ||
226 | * @cb: client callback functions | ||
227 | * | ||
228 | * Returns channel pointer if successful, NULL otherwise. | ||
229 | * | ||
230 | * Creates a channel buffer for each cpu using the sizes and | ||
231 | * attributes specified. The created channel buffer files | ||
232 | * will be named base_filename0...base_filenameN-1. File | ||
233 | * permissions will be S_IRUSR. | ||
234 | */ | ||
235 | struct rchan *relay_open(const char *base_filename, | ||
236 | struct dentry *parent, | ||
237 | size_t subbuf_size, | ||
238 | size_t n_subbufs, | ||
239 | struct rchan_callbacks *cb) | ||
240 | { | ||
241 | unsigned int i; | ||
242 | struct rchan *chan; | ||
243 | char *tmpname; | ||
244 | |||
245 | if (!base_filename) | ||
246 | return NULL; | ||
247 | |||
248 | if (!(subbuf_size && n_subbufs)) | ||
249 | return NULL; | ||
250 | |||
251 | chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL); | ||
252 | if (!chan) | ||
253 | return NULL; | ||
254 | |||
255 | chan->version = RELAYFS_CHANNEL_VERSION; | ||
256 | chan->n_subbufs = n_subbufs; | ||
257 | chan->subbuf_size = subbuf_size; | ||
258 | chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs); | ||
259 | setup_callbacks(chan, cb); | ||
260 | kref_init(&chan->kref); | ||
261 | |||
262 | tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL); | ||
263 | if (!tmpname) | ||
264 | goto free_chan; | ||
265 | |||
266 | for_each_online_cpu(i) { | ||
267 | sprintf(tmpname, "%s%d", base_filename, i); | ||
268 | chan->buf[i] = relay_open_buf(chan, tmpname, parent); | ||
269 | chan->buf[i]->cpu = i; | ||
270 | if (!chan->buf[i]) | ||
271 | goto free_bufs; | ||
272 | } | ||
273 | |||
274 | kfree(tmpname); | ||
275 | return chan; | ||
276 | |||
277 | free_bufs: | ||
278 | for (i = 0; i < NR_CPUS; i++) { | ||
279 | if (!chan->buf[i]) | ||
280 | break; | ||
281 | relay_close_buf(chan->buf[i]); | ||
282 | } | ||
283 | kfree(tmpname); | ||
284 | |||
285 | free_chan: | ||
286 | kref_put(&chan->kref, relay_destroy_channel); | ||
287 | return NULL; | ||
288 | } | ||
289 | |||
290 | /** | ||
291 | * relay_switch_subbuf - switch to a new sub-buffer | ||
292 | * @buf: channel buffer | ||
293 | * @length: size of current event | ||
294 | * | ||
295 | * Returns either the length passed in or 0 if full. | ||
296 | |||
297 | * Performs sub-buffer-switch tasks such as invoking callbacks, | ||
298 | * updating padding counts, waking up readers, etc. | ||
299 | */ | ||
300 | size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) | ||
301 | { | ||
302 | void *old, *new; | ||
303 | size_t old_subbuf, new_subbuf; | ||
304 | |||
305 | if (unlikely(length > buf->chan->subbuf_size)) | ||
306 | goto toobig; | ||
307 | |||
308 | if (buf->offset != buf->chan->subbuf_size + 1) { | ||
309 | buf->prev_padding = buf->chan->subbuf_size - buf->offset; | ||
310 | old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; | ||
311 | buf->padding[old_subbuf] = buf->prev_padding; | ||
312 | buf->subbufs_produced++; | ||
313 | if (waitqueue_active(&buf->read_wait)) { | ||
314 | PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf); | ||
315 | schedule_delayed_work(&buf->wake_readers, 1); | ||
316 | } | ||
317 | } | ||
318 | |||
319 | old = buf->data; | ||
320 | new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; | ||
321 | new = buf->start + new_subbuf * buf->chan->subbuf_size; | ||
322 | buf->offset = 0; | ||
323 | if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) { | ||
324 | buf->offset = buf->chan->subbuf_size + 1; | ||
325 | return 0; | ||
326 | } | ||
327 | buf->data = new; | ||
328 | buf->padding[new_subbuf] = 0; | ||
329 | |||
330 | if (unlikely(length + buf->offset > buf->chan->subbuf_size)) | ||
331 | goto toobig; | ||
332 | |||
333 | return length; | ||
334 | |||
335 | toobig: | ||
336 | printk(KERN_WARNING "relayfs: event too large (%Zd)\n", length); | ||
337 | WARN_ON(1); | ||
338 | return 0; | ||
339 | } | ||
340 | |||
341 | /** | ||
342 | * relay_subbufs_consumed - update the buffer's sub-buffers-consumed count | ||
343 | * @chan: the channel | ||
344 | * @cpu: the cpu associated with the channel buffer to update | ||
345 | * @subbufs_consumed: number of sub-buffers to add to current buf's count | ||
346 | * | ||
347 | * Adds to the channel buffer's consumed sub-buffer count. | ||
348 | * subbufs_consumed should be the number of sub-buffers newly consumed, | ||
349 | * not the total consumed. | ||
350 | * | ||
351 | * NOTE: kernel clients don't need to call this function if the channel | ||
352 | * mode is 'overwrite'. | ||
353 | */ | ||
354 | void relay_subbufs_consumed(struct rchan *chan, | ||
355 | unsigned int cpu, | ||
356 | size_t subbufs_consumed) | ||
357 | { | ||
358 | struct rchan_buf *buf; | ||
359 | |||
360 | if (!chan) | ||
361 | return; | ||
362 | |||
363 | if (cpu >= NR_CPUS || !chan->buf[cpu]) | ||
364 | return; | ||
365 | |||
366 | buf = chan->buf[cpu]; | ||
367 | buf->subbufs_consumed += subbufs_consumed; | ||
368 | if (buf->subbufs_consumed > buf->subbufs_produced) | ||
369 | buf->subbufs_consumed = buf->subbufs_produced; | ||
370 | } | ||
371 | |||
372 | /** | ||
373 | * relay_destroy_channel - free the channel struct | ||
374 | * | ||
375 | * Should only be called from kref_put(). | ||
376 | */ | ||
377 | void relay_destroy_channel(struct kref *kref) | ||
378 | { | ||
379 | struct rchan *chan = container_of(kref, struct rchan, kref); | ||
380 | kfree(chan); | ||
381 | } | ||
382 | |||
383 | /** | ||
384 | * relay_close - close the channel | ||
385 | * @chan: the channel | ||
386 | * | ||
387 | * Closes all channel buffers and frees the channel. | ||
388 | */ | ||
389 | void relay_close(struct rchan *chan) | ||
390 | { | ||
391 | unsigned int i; | ||
392 | |||
393 | if (!chan) | ||
394 | return; | ||
395 | |||
396 | for (i = 0; i < NR_CPUS; i++) { | ||
397 | if (!chan->buf[i]) | ||
398 | continue; | ||
399 | relay_close_buf(chan->buf[i]); | ||
400 | } | ||
401 | |||
402 | kref_put(&chan->kref, relay_destroy_channel); | ||
403 | } | ||
404 | |||
405 | /** | ||
406 | * relay_flush - close the channel | ||
407 | * @chan: the channel | ||
408 | * | ||
409 | * Flushes all channel buffers i.e. forces buffer switch. | ||
410 | */ | ||
411 | void relay_flush(struct rchan *chan) | ||
412 | { | ||
413 | unsigned int i; | ||
414 | |||
415 | if (!chan) | ||
416 | return; | ||
417 | |||
418 | for (i = 0; i < NR_CPUS; i++) { | ||
419 | if (!chan->buf[i]) | ||
420 | continue; | ||
421 | relay_switch_subbuf(chan->buf[i], 0); | ||
422 | } | ||
423 | } | ||
424 | |||
425 | EXPORT_SYMBOL_GPL(relay_open); | ||
426 | EXPORT_SYMBOL_GPL(relay_close); | ||
427 | EXPORT_SYMBOL_GPL(relay_flush); | ||
428 | EXPORT_SYMBOL_GPL(relay_reset); | ||
429 | EXPORT_SYMBOL_GPL(relay_subbufs_consumed); | ||
430 | EXPORT_SYMBOL_GPL(relay_switch_subbuf); | ||
431 | EXPORT_SYMBOL_GPL(relay_buf_full); | ||
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h new file mode 100644 index 000000000000..703503fa22b6 --- /dev/null +++ b/fs/relayfs/relay.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef _RELAY_H | ||
2 | #define _RELAY_H | ||
3 | |||
4 | struct dentry *relayfs_create_file(const char *name, | ||
5 | struct dentry *parent, | ||
6 | int mode, | ||
7 | struct rchan *chan); | ||
8 | extern int relayfs_remove(struct dentry *dentry); | ||
9 | extern int relay_buf_empty(struct rchan_buf *buf); | ||
10 | extern void relay_destroy_channel(struct kref *kref); | ||
11 | |||
12 | #endif /* _RELAY_H */ | ||
diff --git a/fs/select.c b/fs/select.c index b80e7eb0ac0d..f10a10317d54 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/personality.h> /* for STICKY_TIMEOUTS */ | 22 | #include <linux/personality.h> /* for STICKY_TIMEOUTS */ |
23 | #include <linux/file.h> | 23 | #include <linux/file.h> |
24 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
25 | #include <linux/rcupdate.h> | ||
25 | 26 | ||
26 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
27 | 28 | ||
@@ -132,11 +133,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) | |||
132 | unsigned long *open_fds; | 133 | unsigned long *open_fds; |
133 | unsigned long set; | 134 | unsigned long set; |
134 | int max; | 135 | int max; |
136 | struct fdtable *fdt; | ||
135 | 137 | ||
136 | /* handle last in-complete long-word first */ | 138 | /* handle last in-complete long-word first */ |
137 | set = ~(~0UL << (n & (__NFDBITS-1))); | 139 | set = ~(~0UL << (n & (__NFDBITS-1))); |
138 | n /= __NFDBITS; | 140 | n /= __NFDBITS; |
139 | open_fds = current->files->open_fds->fds_bits+n; | 141 | fdt = files_fdtable(current->files); |
142 | open_fds = fdt->open_fds->fds_bits+n; | ||
140 | max = 0; | 143 | max = 0; |
141 | if (set) { | 144 | if (set) { |
142 | set &= BITS(fds, n); | 145 | set &= BITS(fds, n); |
@@ -183,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout) | |||
183 | int retval, i; | 186 | int retval, i; |
184 | long __timeout = *timeout; | 187 | long __timeout = *timeout; |
185 | 188 | ||
186 | spin_lock(¤t->files->file_lock); | 189 | rcu_read_lock(); |
187 | retval = max_select_fd(n, fds); | 190 | retval = max_select_fd(n, fds); |
188 | spin_unlock(¤t->files->file_lock); | 191 | rcu_read_unlock(); |
189 | 192 | ||
190 | if (retval < 0) | 193 | if (retval < 0) |
191 | return retval; | 194 | return retval; |
@@ -299,6 +302,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s | |||
299 | char *bits; | 302 | char *bits; |
300 | long timeout; | 303 | long timeout; |
301 | int ret, size, max_fdset; | 304 | int ret, size, max_fdset; |
305 | struct fdtable *fdt; | ||
302 | 306 | ||
303 | timeout = MAX_SCHEDULE_TIMEOUT; | 307 | timeout = MAX_SCHEDULE_TIMEOUT; |
304 | if (tvp) { | 308 | if (tvp) { |
@@ -326,7 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s | |||
326 | goto out_nofds; | 330 | goto out_nofds; |
327 | 331 | ||
328 | /* max_fdset can increase, so grab it once to avoid race */ | 332 | /* max_fdset can increase, so grab it once to avoid race */ |
329 | max_fdset = current->files->max_fdset; | 333 | rcu_read_lock(); |
334 | fdt = files_fdtable(current->files); | ||
335 | max_fdset = fdt->max_fdset; | ||
336 | rcu_read_unlock(); | ||
330 | if (n > max_fdset) | 337 | if (n > max_fdset) |
331 | n = max_fdset; | 338 | n = max_fdset; |
332 | 339 | ||
@@ -464,9 +471,15 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti | |||
464 | unsigned int i; | 471 | unsigned int i; |
465 | struct poll_list *head; | 472 | struct poll_list *head; |
466 | struct poll_list *walk; | 473 | struct poll_list *walk; |
474 | struct fdtable *fdt; | ||
475 | int max_fdset; | ||
467 | 476 | ||
468 | /* Do a sanity check on nfds ... */ | 477 | /* Do a sanity check on nfds ... */ |
469 | if (nfds > current->files->max_fdset && nfds > OPEN_MAX) | 478 | rcu_read_lock(); |
479 | fdt = files_fdtable(current->files); | ||
480 | max_fdset = fdt->max_fdset; | ||
481 | rcu_read_unlock(); | ||
482 | if (nfds > max_fdset && nfds > OPEN_MAX) | ||
470 | return -EINVAL; | 483 | return -EINVAL; |
471 | 484 | ||
472 | if (timeout) { | 485 | if (timeout) { |
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 4765aaac9fd2..10b994428fef 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c | |||
@@ -331,6 +331,7 @@ static void | |||
331 | smb_delete_inode(struct inode *ino) | 331 | smb_delete_inode(struct inode *ino) |
332 | { | 332 | { |
333 | DEBUG1("ino=%ld\n", ino->i_ino); | 333 | DEBUG1("ino=%ld\n", ino->i_ino); |
334 | truncate_inode_pages(&ino->i_data, 0); | ||
334 | lock_kernel(); | 335 | lock_kernel(); |
335 | if (smb_close(ino)) | 336 | if (smb_close(ino)) |
336 | PARANOIA("could not close inode %ld\n", ino->i_ino); | 337 | PARANOIA("could not close inode %ld\n", ino->i_ino); |
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index 220babe91efd..38ab558835c4 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c | |||
@@ -2397,8 +2397,7 @@ smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir, | |||
2397 | if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) { | 2397 | if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) { |
2398 | /* a damn Win95 bug - sometimes it clags if you | 2398 | /* a damn Win95 bug - sometimes it clags if you |
2399 | ask it too fast */ | 2399 | ask it too fast */ |
2400 | current->state = TASK_INTERRUPTIBLE; | 2400 | schedule_timeout_interruptible(msecs_to_jiffies(200)); |
2401 | schedule_timeout(HZ/5); | ||
2402 | continue; | 2401 | continue; |
2403 | } | 2402 | } |
2404 | 2403 | ||
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 0530077d9dd8..fa33eceb0011 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
@@ -292,6 +292,7 @@ int sysv_sync_inode(struct inode * inode) | |||
292 | 292 | ||
293 | static void sysv_delete_inode(struct inode *inode) | 293 | static void sysv_delete_inode(struct inode *inode) |
294 | { | 294 | { |
295 | truncate_inode_pages(&inode->i_data, 0); | ||
295 | inode->i_size = 0; | 296 | inode->i_size = 0; |
296 | sysv_truncate(inode); | 297 | sysv_truncate(inode); |
297 | lock_kernel(); | 298 | lock_kernel(); |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 3d68de39fad6..b83890beaaac 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -87,6 +87,8 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); | |||
87 | */ | 87 | */ |
88 | void udf_delete_inode(struct inode * inode) | 88 | void udf_delete_inode(struct inode * inode) |
89 | { | 89 | { |
90 | truncate_inode_pages(&inode->i_data, 0); | ||
91 | |||
90 | if (is_bad_inode(inode)) | 92 | if (is_bad_inode(inode)) |
91 | goto no_delete; | 93 | goto no_delete; |
92 | 94 | ||
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 997640c99c7d..faf1512173eb 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c | |||
@@ -114,8 +114,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count | |||
114 | ubh_mark_buffer_dirty (USPI_UBH); | 114 | ubh_mark_buffer_dirty (USPI_UBH); |
115 | ubh_mark_buffer_dirty (UCPI_UBH); | 115 | ubh_mark_buffer_dirty (UCPI_UBH); |
116 | if (sb->s_flags & MS_SYNCHRONOUS) { | 116 | if (sb->s_flags & MS_SYNCHRONOUS) { |
117 | ubh_wait_on_buffer (UCPI_UBH); | 117 | ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); |
118 | ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi); | ||
119 | ubh_wait_on_buffer (UCPI_UBH); | 118 | ubh_wait_on_buffer (UCPI_UBH); |
120 | } | 119 | } |
121 | sb->s_dirt = 1; | 120 | sb->s_dirt = 1; |
@@ -200,8 +199,7 @@ do_more: | |||
200 | ubh_mark_buffer_dirty (USPI_UBH); | 199 | ubh_mark_buffer_dirty (USPI_UBH); |
201 | ubh_mark_buffer_dirty (UCPI_UBH); | 200 | ubh_mark_buffer_dirty (UCPI_UBH); |
202 | if (sb->s_flags & MS_SYNCHRONOUS) { | 201 | if (sb->s_flags & MS_SYNCHRONOUS) { |
203 | ubh_wait_on_buffer (UCPI_UBH); | 202 | ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); |
204 | ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi); | ||
205 | ubh_wait_on_buffer (UCPI_UBH); | 203 | ubh_wait_on_buffer (UCPI_UBH); |
206 | } | 204 | } |
207 | 205 | ||
@@ -459,8 +457,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment, | |||
459 | ubh_mark_buffer_dirty (USPI_UBH); | 457 | ubh_mark_buffer_dirty (USPI_UBH); |
460 | ubh_mark_buffer_dirty (UCPI_UBH); | 458 | ubh_mark_buffer_dirty (UCPI_UBH); |
461 | if (sb->s_flags & MS_SYNCHRONOUS) { | 459 | if (sb->s_flags & MS_SYNCHRONOUS) { |
462 | ubh_wait_on_buffer (UCPI_UBH); | 460 | ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); |
463 | ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi); | ||
464 | ubh_wait_on_buffer (UCPI_UBH); | 461 | ubh_wait_on_buffer (UCPI_UBH); |
465 | } | 462 | } |
466 | sb->s_dirt = 1; | 463 | sb->s_dirt = 1; |
@@ -585,8 +582,7 @@ succed: | |||
585 | ubh_mark_buffer_dirty (USPI_UBH); | 582 | ubh_mark_buffer_dirty (USPI_UBH); |
586 | ubh_mark_buffer_dirty (UCPI_UBH); | 583 | ubh_mark_buffer_dirty (UCPI_UBH); |
587 | if (sb->s_flags & MS_SYNCHRONOUS) { | 584 | if (sb->s_flags & MS_SYNCHRONOUS) { |
588 | ubh_wait_on_buffer (UCPI_UBH); | 585 | ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi); |
589 | ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi); | ||
590 | ubh_wait_on_buffer (UCPI_UBH); | 586 | ubh_wait_on_buffer (UCPI_UBH); |
591 | } | 587 | } |
592 | sb->s_dirt = 1; | 588 | sb->s_dirt = 1; |
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 61a6b1542fc5..0938945b9cbc 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c | |||
@@ -124,8 +124,7 @@ void ufs_free_inode (struct inode * inode) | |||
124 | ubh_mark_buffer_dirty (USPI_UBH); | 124 | ubh_mark_buffer_dirty (USPI_UBH); |
125 | ubh_mark_buffer_dirty (UCPI_UBH); | 125 | ubh_mark_buffer_dirty (UCPI_UBH); |
126 | if (sb->s_flags & MS_SYNCHRONOUS) { | 126 | if (sb->s_flags & MS_SYNCHRONOUS) { |
127 | ubh_wait_on_buffer (UCPI_UBH); | 127 | ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi); |
128 | ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **) &ucpi); | ||
129 | ubh_wait_on_buffer (UCPI_UBH); | 128 | ubh_wait_on_buffer (UCPI_UBH); |
130 | } | 129 | } |
131 | 130 | ||
@@ -249,8 +248,7 @@ cg_found: | |||
249 | ubh_mark_buffer_dirty (USPI_UBH); | 248 | ubh_mark_buffer_dirty (USPI_UBH); |
250 | ubh_mark_buffer_dirty (UCPI_UBH); | 249 | ubh_mark_buffer_dirty (UCPI_UBH); |
251 | if (sb->s_flags & MS_SYNCHRONOUS) { | 250 | if (sb->s_flags & MS_SYNCHRONOUS) { |
252 | ubh_wait_on_buffer (UCPI_UBH); | 251 | ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi); |
253 | ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **) &ucpi); | ||
254 | ubh_wait_on_buffer (UCPI_UBH); | 252 | ubh_wait_on_buffer (UCPI_UBH); |
255 | } | 253 | } |
256 | sb->s_dirt = 1; | 254 | sb->s_dirt = 1; |
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 718627ca8b5c..55f4aa16e3fc 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
@@ -804,6 +804,7 @@ int ufs_sync_inode (struct inode *inode) | |||
804 | 804 | ||
805 | void ufs_delete_inode (struct inode * inode) | 805 | void ufs_delete_inode (struct inode * inode) |
806 | { | 806 | { |
807 | truncate_inode_pages(&inode->i_data, 0); | ||
807 | /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ | 808 | /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ |
808 | lock_kernel(); | 809 | lock_kernel(); |
809 | mark_inode_dirty(inode); | 810 | mark_inode_dirty(inode); |
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index e312bf8bad9f..61d2e35012a4 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c | |||
@@ -285,8 +285,7 @@ next:; | |||
285 | } | 285 | } |
286 | } | 286 | } |
287 | if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { | 287 | if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { |
288 | ubh_wait_on_buffer (ind_ubh); | 288 | ubh_ll_rw_block (SWRITE, 1, &ind_ubh); |
289 | ubh_ll_rw_block (WRITE, 1, &ind_ubh); | ||
290 | ubh_wait_on_buffer (ind_ubh); | 289 | ubh_wait_on_buffer (ind_ubh); |
291 | } | 290 | } |
292 | ubh_brelse (ind_ubh); | 291 | ubh_brelse (ind_ubh); |
@@ -353,8 +352,7 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p) | |||
353 | } | 352 | } |
354 | } | 353 | } |
355 | if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { | 354 | if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { |
356 | ubh_wait_on_buffer (dind_bh); | 355 | ubh_ll_rw_block (SWRITE, 1, &dind_bh); |
357 | ubh_ll_rw_block (WRITE, 1, &dind_bh); | ||
358 | ubh_wait_on_buffer (dind_bh); | 356 | ubh_wait_on_buffer (dind_bh); |
359 | } | 357 | } |
360 | ubh_brelse (dind_bh); | 358 | ubh_brelse (dind_bh); |
@@ -418,8 +416,7 @@ static int ufs_trunc_tindirect (struct inode * inode) | |||
418 | } | 416 | } |
419 | } | 417 | } |
420 | if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { | 418 | if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { |
421 | ubh_wait_on_buffer (tind_bh); | 419 | ubh_ll_rw_block (SWRITE, 1, &tind_bh); |
422 | ubh_ll_rw_block (WRITE, 1, &tind_bh); | ||
423 | ubh_wait_on_buffer (tind_bh); | 420 | ubh_wait_on_buffer (tind_bh); |
424 | } | 421 | } |
425 | ubh_brelse (tind_bh); | 422 | ubh_brelse (tind_bh); |
diff --git a/fs/umsdos/notes b/fs/umsdos/notes deleted file mode 100644 index 3c47d1f4fc47..000000000000 --- a/fs/umsdos/notes +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | This file contain idea and things I don't want to forget | ||
2 | |||
3 | Possible bug in fs/read_write.c | ||
4 | Function sys_readdir() | ||
5 | |||
6 | There is a call the verify_area that does not take in account | ||
7 | the count parameter. I guess it should read | ||
8 | |||
9 | error = verify_area(VERIFY_WRITE, dirent, count*sizeof (*dirent)); | ||
10 | |||
11 | instead of | ||
12 | |||
13 | error = verify_area(VERIFY_WRITE, dirent, sizeof (*dirent)); | ||
14 | |||
15 | Of course, now , count is always 1 | ||
16 | |||
17 | |||
diff --git a/fs/xattr.c b/fs/xattr.c index 6acd5c63da91..3f9c64bea151 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -51,20 +51,29 @@ setxattr(struct dentry *d, char __user *name, void __user *value, | |||
51 | } | 51 | } |
52 | } | 52 | } |
53 | 53 | ||
54 | down(&d->d_inode->i_sem); | ||
55 | error = security_inode_setxattr(d, kname, kvalue, size, flags); | ||
56 | if (error) | ||
57 | goto out; | ||
54 | error = -EOPNOTSUPP; | 58 | error = -EOPNOTSUPP; |
55 | if (d->d_inode->i_op && d->d_inode->i_op->setxattr) { | 59 | if (d->d_inode->i_op && d->d_inode->i_op->setxattr) { |
56 | down(&d->d_inode->i_sem); | 60 | error = d->d_inode->i_op->setxattr(d, kname, kvalue, |
57 | error = security_inode_setxattr(d, kname, kvalue, size, flags); | 61 | size, flags); |
58 | if (error) | ||
59 | goto out; | ||
60 | error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags); | ||
61 | if (!error) { | 62 | if (!error) { |
62 | fsnotify_xattr(d); | 63 | fsnotify_xattr(d); |
63 | security_inode_post_setxattr(d, kname, kvalue, size, flags); | 64 | security_inode_post_setxattr(d, kname, kvalue, |
65 | size, flags); | ||
64 | } | 66 | } |
65 | out: | 67 | } else if (!strncmp(kname, XATTR_SECURITY_PREFIX, |
66 | up(&d->d_inode->i_sem); | 68 | sizeof XATTR_SECURITY_PREFIX - 1)) { |
69 | const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1; | ||
70 | error = security_inode_setsecurity(d->d_inode, suffix, kvalue, | ||
71 | size, flags); | ||
72 | if (!error) | ||
73 | fsnotify_xattr(d); | ||
67 | } | 74 | } |
75 | out: | ||
76 | up(&d->d_inode->i_sem); | ||
68 | if (kvalue) | 77 | if (kvalue) |
69 | kfree(kvalue); | 78 | kfree(kvalue); |
70 | return error; | 79 | return error; |
@@ -139,20 +148,25 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) | |||
139 | return -ENOMEM; | 148 | return -ENOMEM; |
140 | } | 149 | } |
141 | 150 | ||
151 | error = security_inode_getxattr(d, kname); | ||
152 | if (error) | ||
153 | goto out; | ||
142 | error = -EOPNOTSUPP; | 154 | error = -EOPNOTSUPP; |
143 | if (d->d_inode->i_op && d->d_inode->i_op->getxattr) { | 155 | if (d->d_inode->i_op && d->d_inode->i_op->getxattr) |
144 | error = security_inode_getxattr(d, kname); | ||
145 | if (error) | ||
146 | goto out; | ||
147 | error = d->d_inode->i_op->getxattr(d, kname, kvalue, size); | 156 | error = d->d_inode->i_op->getxattr(d, kname, kvalue, size); |
148 | if (error > 0) { | 157 | else if (!strncmp(kname, XATTR_SECURITY_PREFIX, |
149 | if (size && copy_to_user(value, kvalue, error)) | 158 | sizeof XATTR_SECURITY_PREFIX - 1)) { |
150 | error = -EFAULT; | 159 | const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1; |
151 | } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { | 160 | error = security_inode_getsecurity(d->d_inode, suffix, kvalue, |
152 | /* The file system tried to returned a value bigger | 161 | size); |
153 | than XATTR_SIZE_MAX bytes. Not possible. */ | 162 | } |
154 | error = -E2BIG; | 163 | if (error > 0) { |
155 | } | 164 | if (size && copy_to_user(value, kvalue, error)) |
165 | error = -EFAULT; | ||
166 | } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { | ||
167 | /* The file system tried to returned a value bigger | ||
168 | than XATTR_SIZE_MAX bytes. Not possible. */ | ||
169 | error = -E2BIG; | ||
156 | } | 170 | } |
157 | out: | 171 | out: |
158 | if (kvalue) | 172 | if (kvalue) |
@@ -221,20 +235,24 @@ listxattr(struct dentry *d, char __user *list, size_t size) | |||
221 | return -ENOMEM; | 235 | return -ENOMEM; |
222 | } | 236 | } |
223 | 237 | ||
238 | error = security_inode_listxattr(d); | ||
239 | if (error) | ||
240 | goto out; | ||
224 | error = -EOPNOTSUPP; | 241 | error = -EOPNOTSUPP; |
225 | if (d->d_inode->i_op && d->d_inode->i_op->listxattr) { | 242 | if (d->d_inode->i_op && d->d_inode->i_op->listxattr) { |
226 | error = security_inode_listxattr(d); | ||
227 | if (error) | ||
228 | goto out; | ||
229 | error = d->d_inode->i_op->listxattr(d, klist, size); | 243 | error = d->d_inode->i_op->listxattr(d, klist, size); |
230 | if (error > 0) { | 244 | } else { |
231 | if (size && copy_to_user(list, klist, error)) | 245 | error = security_inode_listsecurity(d->d_inode, klist, size); |
232 | error = -EFAULT; | 246 | if (size && error >= size) |
233 | } else if (error == -ERANGE && size >= XATTR_LIST_MAX) { | 247 | error = -ERANGE; |
234 | /* The file system tried to returned a list bigger | 248 | } |
235 | than XATTR_LIST_MAX bytes. Not possible. */ | 249 | if (error > 0) { |
236 | error = -E2BIG; | 250 | if (size && copy_to_user(list, klist, error)) |
237 | } | 251 | error = -EFAULT; |
252 | } else if (error == -ERANGE && size >= XATTR_LIST_MAX) { | ||
253 | /* The file system tried to returned a list bigger | ||
254 | than XATTR_LIST_MAX bytes. Not possible. */ | ||
255 | error = -E2BIG; | ||
238 | } | 256 | } |
239 | out: | 257 | out: |
240 | if (klist) | 258 | if (klist) |
@@ -307,6 +325,8 @@ removexattr(struct dentry *d, char __user *name) | |||
307 | down(&d->d_inode->i_sem); | 325 | down(&d->d_inode->i_sem); |
308 | error = d->d_inode->i_op->removexattr(d, kname); | 326 | error = d->d_inode->i_op->removexattr(d, kname); |
309 | up(&d->d_inode->i_sem); | 327 | up(&d->d_inode->i_sem); |
328 | if (!error) | ||
329 | fsnotify_xattr(d); | ||
310 | } | 330 | } |
311 | out: | 331 | out: |
312 | return error; | 332 | return error; |
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index c92306f0fdc5..8e8f32dabe53 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig | |||
@@ -1,5 +1,3 @@ | |||
1 | menu "XFS support" | ||
2 | |||
3 | config XFS_FS | 1 | config XFS_FS |
4 | tristate "XFS filesystem support" | 2 | tristate "XFS filesystem support" |
5 | select EXPORTFS if NFSD!=n | 3 | select EXPORTFS if NFSD!=n |
@@ -22,27 +20,11 @@ config XFS_FS | |||
22 | 20 | ||
23 | config XFS_EXPORT | 21 | config XFS_EXPORT |
24 | bool | 22 | bool |
25 | default y if XFS_FS && EXPORTFS | 23 | depends on XFS_FS && EXPORTFS |
26 | 24 | default y | |
27 | config XFS_RT | ||
28 | bool "Realtime support (EXPERIMENTAL)" | ||
29 | depends on XFS_FS && EXPERIMENTAL | ||
30 | help | ||
31 | If you say Y here you will be able to mount and use XFS filesystems | ||
32 | which contain a realtime subvolume. The realtime subvolume is a | ||
33 | separate area of disk space where only file data is stored. The | ||
34 | realtime subvolume is designed to provide very deterministic | ||
35 | data rates suitable for media streaming applications. | ||
36 | |||
37 | See the xfs man page in section 5 for a bit more information. | ||
38 | |||
39 | This feature is unsupported at this time, is not yet fully | ||
40 | functional, and may cause serious problems. | ||
41 | |||
42 | If unsure, say N. | ||
43 | 25 | ||
44 | config XFS_QUOTA | 26 | config XFS_QUOTA |
45 | bool "Quota support" | 27 | tristate "XFS Quota support" |
46 | depends on XFS_FS | 28 | depends on XFS_FS |
47 | help | 29 | help |
48 | If you say Y here, you will be able to set limits for disk usage on | 30 | If you say Y here, you will be able to set limits for disk usage on |
@@ -59,7 +41,7 @@ config XFS_QUOTA | |||
59 | they are completely independent subsystems. | 41 | they are completely independent subsystems. |
60 | 42 | ||
61 | config XFS_SECURITY | 43 | config XFS_SECURITY |
62 | bool "Security Label support" | 44 | bool "XFS Security Label support" |
63 | depends on XFS_FS | 45 | depends on XFS_FS |
64 | help | 46 | help |
65 | Security labels support alternative access control models | 47 | Security labels support alternative access control models |
@@ -71,7 +53,7 @@ config XFS_SECURITY | |||
71 | extended attributes for inode security labels, say N. | 53 | extended attributes for inode security labels, say N. |
72 | 54 | ||
73 | config XFS_POSIX_ACL | 55 | config XFS_POSIX_ACL |
74 | bool "POSIX ACL support" | 56 | bool "XFS POSIX ACL support" |
75 | depends on XFS_FS | 57 | depends on XFS_FS |
76 | help | 58 | help |
77 | POSIX Access Control Lists (ACLs) support permissions for users and | 59 | POSIX Access Control Lists (ACLs) support permissions for users and |
@@ -82,4 +64,19 @@ config XFS_POSIX_ACL | |||
82 | 64 | ||
83 | If you don't know what Access Control Lists are, say N. | 65 | If you don't know what Access Control Lists are, say N. |
84 | 66 | ||
85 | endmenu | 67 | config XFS_RT |
68 | bool "XFS Realtime support (EXPERIMENTAL)" | ||
69 | depends on XFS_FS && EXPERIMENTAL | ||
70 | help | ||
71 | If you say Y here you will be able to mount and use XFS filesystems | ||
72 | which contain a realtime subvolume. The realtime subvolume is a | ||
73 | separate area of disk space where only file data is stored. The | ||
74 | realtime subvolume is designed to provide very deterministic | ||
75 | data rates suitable for media streaming applications. | ||
76 | |||
77 | See the xfs man page in section 5 for a bit more information. | ||
78 | |||
79 | This feature is unsupported at this time, is not yet fully | ||
80 | functional, and may cause serious problems. | ||
81 | |||
82 | If unsure, say N. | ||
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index d3ff78354638..49e3e7e5e3dc 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -1,150 +1 @@ | |||
1 | # | include $(TOPDIR)/fs/xfs/Makefile-linux-$(VERSION).$(PATCHLEVEL) | |
2 | # Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | # | ||
4 | # This program is free software; you can redistribute it and/or modify it | ||
5 | # under the terms of version 2 of the GNU General Public License as | ||
6 | # published by the Free Software Foundation. | ||
7 | # | ||
8 | # This program is distributed in the hope that it would be useful, but | ||
9 | # WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | # | ||
12 | # Further, this software is distributed without any warranty that it is | ||
13 | # free of the rightful claim of any third person regarding infringement | ||
14 | # or the like. Any license provided herein, whether implied or | ||
15 | # otherwise, applies only to this software file. Patent licenses, if | ||
16 | # any, provided herein do not apply to combinations of this program with | ||
17 | # other software, or any other product whatsoever. | ||
18 | # | ||
19 | # You should have received a copy of the GNU General Public License along | ||
20 | # with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | # Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | # | ||
23 | # Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | # Mountain View, CA 94043, or: | ||
25 | # | ||
26 | # http://www.sgi.com | ||
27 | # | ||
28 | # For further information regarding this notice, see: | ||
29 | # | ||
30 | # http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | # | ||
32 | |||
33 | EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char | ||
34 | |||
35 | ifeq ($(CONFIG_XFS_DEBUG),y) | ||
36 | EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG | ||
37 | EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING | ||
38 | endif | ||
39 | ifeq ($(CONFIG_XFS_TRACE),y) | ||
40 | EXTRA_CFLAGS += -DXFS_ALLOC_TRACE | ||
41 | EXTRA_CFLAGS += -DXFS_ATTR_TRACE | ||
42 | EXTRA_CFLAGS += -DXFS_BLI_TRACE | ||
43 | EXTRA_CFLAGS += -DXFS_BMAP_TRACE | ||
44 | EXTRA_CFLAGS += -DXFS_BMBT_TRACE | ||
45 | EXTRA_CFLAGS += -DXFS_DIR_TRACE | ||
46 | EXTRA_CFLAGS += -DXFS_DIR2_TRACE | ||
47 | EXTRA_CFLAGS += -DXFS_DQUOT_TRACE | ||
48 | EXTRA_CFLAGS += -DXFS_ILOCK_TRACE | ||
49 | EXTRA_CFLAGS += -DXFS_LOG_TRACE | ||
50 | EXTRA_CFLAGS += -DXFS_RW_TRACE | ||
51 | EXTRA_CFLAGS += -DPAGEBUF_TRACE | ||
52 | EXTRA_CFLAGS += -DXFS_VNODE_TRACE | ||
53 | endif | ||
54 | |||
55 | obj-$(CONFIG_XFS_FS) += xfs.o | ||
56 | |||
57 | xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ | ||
58 | xfs_dquot.o \ | ||
59 | xfs_dquot_item.o \ | ||
60 | xfs_trans_dquot.o \ | ||
61 | xfs_qm_syscalls.o \ | ||
62 | xfs_qm_bhv.o \ | ||
63 | xfs_qm.o) | ||
64 | ifeq ($(CONFIG_XFS_QUOTA),y) | ||
65 | xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o | ||
66 | endif | ||
67 | |||
68 | xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o | ||
69 | xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o | ||
70 | xfs-$(CONFIG_PROC_FS) += linux-2.6/xfs_stats.o | ||
71 | xfs-$(CONFIG_SYSCTL) += linux-2.6/xfs_sysctl.o | ||
72 | xfs-$(CONFIG_COMPAT) += linux-2.6/xfs_ioctl32.o | ||
73 | xfs-$(CONFIG_XFS_EXPORT) += linux-2.6/xfs_export.o | ||
74 | |||
75 | |||
76 | xfs-y += xfs_alloc.o \ | ||
77 | xfs_alloc_btree.o \ | ||
78 | xfs_attr.o \ | ||
79 | xfs_attr_leaf.o \ | ||
80 | xfs_behavior.o \ | ||
81 | xfs_bit.o \ | ||
82 | xfs_bmap.o \ | ||
83 | xfs_bmap_btree.o \ | ||
84 | xfs_btree.o \ | ||
85 | xfs_buf_item.o \ | ||
86 | xfs_da_btree.o \ | ||
87 | xfs_dir.o \ | ||
88 | xfs_dir2.o \ | ||
89 | xfs_dir2_block.o \ | ||
90 | xfs_dir2_data.o \ | ||
91 | xfs_dir2_leaf.o \ | ||
92 | xfs_dir2_node.o \ | ||
93 | xfs_dir2_sf.o \ | ||
94 | xfs_dir_leaf.o \ | ||
95 | xfs_error.o \ | ||
96 | xfs_extfree_item.o \ | ||
97 | xfs_fsops.o \ | ||
98 | xfs_ialloc.o \ | ||
99 | xfs_ialloc_btree.o \ | ||
100 | xfs_iget.o \ | ||
101 | xfs_inode.o \ | ||
102 | xfs_inode_item.o \ | ||
103 | xfs_iocore.o \ | ||
104 | xfs_iomap.o \ | ||
105 | xfs_itable.o \ | ||
106 | xfs_dfrag.o \ | ||
107 | xfs_log.o \ | ||
108 | xfs_log_recover.o \ | ||
109 | xfs_macros.o \ | ||
110 | xfs_mount.o \ | ||
111 | xfs_rename.o \ | ||
112 | xfs_trans.o \ | ||
113 | xfs_trans_ail.o \ | ||
114 | xfs_trans_buf.o \ | ||
115 | xfs_trans_extfree.o \ | ||
116 | xfs_trans_inode.o \ | ||
117 | xfs_trans_item.o \ | ||
118 | xfs_utils.o \ | ||
119 | xfs_vfsops.o \ | ||
120 | xfs_vnodeops.o \ | ||
121 | xfs_rw.o \ | ||
122 | xfs_dmops.o \ | ||
123 | xfs_qmops.o | ||
124 | |||
125 | xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o | ||
126 | |||
127 | # Objects in linux-2.6/ | ||
128 | xfs-y += $(addprefix linux-2.6/, \ | ||
129 | kmem.o \ | ||
130 | xfs_aops.o \ | ||
131 | xfs_buf.o \ | ||
132 | xfs_file.o \ | ||
133 | xfs_fs_subr.o \ | ||
134 | xfs_globals.o \ | ||
135 | xfs_ioctl.o \ | ||
136 | xfs_iops.o \ | ||
137 | xfs_lrw.o \ | ||
138 | xfs_super.o \ | ||
139 | xfs_vfs.o \ | ||
140 | xfs_vnode.o) | ||
141 | |||
142 | # Objects in support/ | ||
143 | xfs-y += $(addprefix support/, \ | ||
144 | debug.o \ | ||
145 | move.o \ | ||
146 | qsort.o \ | ||
147 | uuid.o) | ||
148 | |||
149 | xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o | ||
150 | |||
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 new file mode 100644 index 000000000000..d8c87fa21ad1 --- /dev/null +++ b/fs/xfs/Makefile-linux-2.6 | |||
@@ -0,0 +1,152 @@ | |||
1 | # | ||
2 | # Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | # | ||
4 | # This program is free software; you can redistribute it and/or modify it | ||
5 | # under the terms of version 2 of the GNU General Public License as | ||
6 | # published by the Free Software Foundation. | ||
7 | # | ||
8 | # This program is distributed in the hope that it would be useful, but | ||
9 | # WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | # | ||
12 | # Further, this software is distributed without any warranty that it is | ||
13 | # free of the rightful claim of any third person regarding infringement | ||
14 | # or the like. Any license provided herein, whether implied or | ||
15 | # otherwise, applies only to this software file. Patent licenses, if | ||
16 | # any, provided herein do not apply to combinations of this program with | ||
17 | # other software, or any other product whatsoever. | ||
18 | # | ||
19 | # You should have received a copy of the GNU General Public License along | ||
20 | # with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | # Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | # | ||
23 | # Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | # Mountain View, CA 94043, or: | ||
25 | # | ||
26 | # http://www.sgi.com | ||
27 | # | ||
28 | # For further information regarding this notice, see: | ||
29 | # | ||
30 | # http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | # | ||
32 | |||
33 | EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char | ||
34 | |||
35 | XFS_LINUX := linux-2.6 | ||
36 | |||
37 | ifeq ($(CONFIG_XFS_DEBUG),y) | ||
38 | EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG | ||
39 | EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING | ||
40 | endif | ||
41 | ifeq ($(CONFIG_XFS_TRACE),y) | ||
42 | EXTRA_CFLAGS += -DXFS_ALLOC_TRACE | ||
43 | EXTRA_CFLAGS += -DXFS_ATTR_TRACE | ||
44 | EXTRA_CFLAGS += -DXFS_BLI_TRACE | ||
45 | EXTRA_CFLAGS += -DXFS_BMAP_TRACE | ||
46 | EXTRA_CFLAGS += -DXFS_BMBT_TRACE | ||
47 | EXTRA_CFLAGS += -DXFS_DIR_TRACE | ||
48 | EXTRA_CFLAGS += -DXFS_DIR2_TRACE | ||
49 | EXTRA_CFLAGS += -DXFS_DQUOT_TRACE | ||
50 | EXTRA_CFLAGS += -DXFS_ILOCK_TRACE | ||
51 | EXTRA_CFLAGS += -DXFS_LOG_TRACE | ||
52 | EXTRA_CFLAGS += -DXFS_RW_TRACE | ||
53 | EXTRA_CFLAGS += -DPAGEBUF_TRACE | ||
54 | EXTRA_CFLAGS += -DXFS_VNODE_TRACE | ||
55 | endif | ||
56 | |||
57 | obj-$(CONFIG_XFS_FS) += xfs.o | ||
58 | |||
59 | xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ | ||
60 | xfs_dquot.o \ | ||
61 | xfs_dquot_item.o \ | ||
62 | xfs_trans_dquot.o \ | ||
63 | xfs_qm_syscalls.o \ | ||
64 | xfs_qm_bhv.o \ | ||
65 | xfs_qm.o) | ||
66 | |||
67 | ifeq ($(CONFIG_XFS_QUOTA),y) | ||
68 | xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o | ||
69 | endif | ||
70 | |||
71 | xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o | ||
72 | xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o | ||
73 | xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o | ||
74 | xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o | ||
75 | xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o | ||
76 | xfs-$(CONFIG_XFS_EXPORT) += $(XFS_LINUX)/xfs_export.o | ||
77 | |||
78 | |||
79 | xfs-y += xfs_alloc.o \ | ||
80 | xfs_alloc_btree.o \ | ||
81 | xfs_attr.o \ | ||
82 | xfs_attr_leaf.o \ | ||
83 | xfs_behavior.o \ | ||
84 | xfs_bit.o \ | ||
85 | xfs_bmap.o \ | ||
86 | xfs_bmap_btree.o \ | ||
87 | xfs_btree.o \ | ||
88 | xfs_buf_item.o \ | ||
89 | xfs_da_btree.o \ | ||
90 | xfs_dir.o \ | ||
91 | xfs_dir2.o \ | ||
92 | xfs_dir2_block.o \ | ||
93 | xfs_dir2_data.o \ | ||
94 | xfs_dir2_leaf.o \ | ||
95 | xfs_dir2_node.o \ | ||
96 | xfs_dir2_sf.o \ | ||
97 | xfs_dir_leaf.o \ | ||
98 | xfs_error.o \ | ||
99 | xfs_extfree_item.o \ | ||
100 | xfs_fsops.o \ | ||
101 | xfs_ialloc.o \ | ||
102 | xfs_ialloc_btree.o \ | ||
103 | xfs_iget.o \ | ||
104 | xfs_inode.o \ | ||
105 | xfs_inode_item.o \ | ||
106 | xfs_iocore.o \ | ||
107 | xfs_iomap.o \ | ||
108 | xfs_itable.o \ | ||
109 | xfs_dfrag.o \ | ||
110 | xfs_log.o \ | ||
111 | xfs_log_recover.o \ | ||
112 | xfs_macros.o \ | ||
113 | xfs_mount.o \ | ||
114 | xfs_rename.o \ | ||
115 | xfs_trans.o \ | ||
116 | xfs_trans_ail.o \ | ||
117 | xfs_trans_buf.o \ | ||
118 | xfs_trans_extfree.o \ | ||
119 | xfs_trans_inode.o \ | ||
120 | xfs_trans_item.o \ | ||
121 | xfs_utils.o \ | ||
122 | xfs_vfsops.o \ | ||
123 | xfs_vnodeops.o \ | ||
124 | xfs_rw.o \ | ||
125 | xfs_dmops.o \ | ||
126 | xfs_qmops.o | ||
127 | |||
128 | xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o | ||
129 | |||
130 | # Objects in linux/ | ||
131 | xfs-y += $(addprefix $(XFS_LINUX)/, \ | ||
132 | kmem.o \ | ||
133 | xfs_aops.o \ | ||
134 | xfs_buf.o \ | ||
135 | xfs_file.o \ | ||
136 | xfs_fs_subr.o \ | ||
137 | xfs_globals.o \ | ||
138 | xfs_ioctl.o \ | ||
139 | xfs_iops.o \ | ||
140 | xfs_lrw.o \ | ||
141 | xfs_super.o \ | ||
142 | xfs_vfs.o \ | ||
143 | xfs_vnode.o) | ||
144 | |||
145 | # Objects in support/ | ||
146 | xfs-y += $(addprefix support/, \ | ||
147 | debug.o \ | ||
148 | move.o \ | ||
149 | uuid.o) | ||
150 | |||
151 | xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o | ||
152 | |||
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 364ea8c386b1..4b184559f231 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c | |||
@@ -45,11 +45,11 @@ | |||
45 | 45 | ||
46 | 46 | ||
47 | void * | 47 | void * |
48 | kmem_alloc(size_t size, int flags) | 48 | kmem_alloc(size_t size, unsigned int __nocast flags) |
49 | { | 49 | { |
50 | int retries = 0; | 50 | int retries = 0; |
51 | int lflags = kmem_flags_convert(flags); | 51 | unsigned int lflags = kmem_flags_convert(flags); |
52 | void *ptr; | 52 | void *ptr; |
53 | 53 | ||
54 | do { | 54 | do { |
55 | if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) | 55 | if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) |
@@ -67,7 +67,7 @@ kmem_alloc(size_t size, int flags) | |||
67 | } | 67 | } |
68 | 68 | ||
69 | void * | 69 | void * |
70 | kmem_zalloc(size_t size, int flags) | 70 | kmem_zalloc(size_t size, unsigned int __nocast flags) |
71 | { | 71 | { |
72 | void *ptr; | 72 | void *ptr; |
73 | 73 | ||
@@ -89,7 +89,8 @@ kmem_free(void *ptr, size_t size) | |||
89 | } | 89 | } |
90 | 90 | ||
91 | void * | 91 | void * |
92 | kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) | 92 | kmem_realloc(void *ptr, size_t newsize, size_t oldsize, |
93 | unsigned int __nocast flags) | ||
93 | { | 94 | { |
94 | void *new; | 95 | void *new; |
95 | 96 | ||
@@ -104,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) | |||
104 | } | 105 | } |
105 | 106 | ||
106 | void * | 107 | void * |
107 | kmem_zone_alloc(kmem_zone_t *zone, int flags) | 108 | kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) |
108 | { | 109 | { |
109 | int retries = 0; | 110 | int retries = 0; |
110 | int lflags = kmem_flags_convert(flags); | 111 | unsigned int lflags = kmem_flags_convert(flags); |
111 | void *ptr; | 112 | void *ptr; |
112 | 113 | ||
113 | do { | 114 | do { |
114 | ptr = kmem_cache_alloc(zone, lflags); | 115 | ptr = kmem_cache_alloc(zone, lflags); |
@@ -123,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, int flags) | |||
123 | } | 124 | } |
124 | 125 | ||
125 | void * | 126 | void * |
126 | kmem_zone_zalloc(kmem_zone_t *zone, int flags) | 127 | kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) |
127 | { | 128 | { |
128 | void *ptr; | 129 | void *ptr; |
129 | 130 | ||
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 1397b669b059..109fcf27e256 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h | |||
@@ -39,10 +39,10 @@ | |||
39 | /* | 39 | /* |
40 | * memory management routines | 40 | * memory management routines |
41 | */ | 41 | */ |
42 | #define KM_SLEEP 0x0001 | 42 | #define KM_SLEEP 0x0001u |
43 | #define KM_NOSLEEP 0x0002 | 43 | #define KM_NOSLEEP 0x0002u |
44 | #define KM_NOFS 0x0004 | 44 | #define KM_NOFS 0x0004u |
45 | #define KM_MAYFAIL 0x0008 | 45 | #define KM_MAYFAIL 0x0008u |
46 | 46 | ||
47 | #define kmem_zone kmem_cache_s | 47 | #define kmem_zone kmem_cache_s |
48 | #define kmem_zone_t kmem_cache_t | 48 | #define kmem_zone_t kmem_cache_t |
@@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t; | |||
81 | *(NSTATEP) = *(OSTATEP); \ | 81 | *(NSTATEP) = *(OSTATEP); \ |
82 | } while (0) | 82 | } while (0) |
83 | 83 | ||
84 | static __inline unsigned int kmem_flags_convert(int flags) | 84 | static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags) |
85 | { | 85 | { |
86 | int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ | 86 | unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ |
87 | 87 | ||
88 | #ifdef DEBUG | 88 | #ifdef DEBUG |
89 | if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { | 89 | if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { |
@@ -125,12 +125,13 @@ kmem_zone_destroy(kmem_zone_t *zone) | |||
125 | BUG(); | 125 | BUG(); |
126 | } | 126 | } |
127 | 127 | ||
128 | extern void *kmem_zone_zalloc(kmem_zone_t *, int); | 128 | extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); |
129 | extern void *kmem_zone_alloc(kmem_zone_t *, int); | 129 | extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); |
130 | 130 | ||
131 | extern void *kmem_alloc(size_t, int); | 131 | extern void *kmem_alloc(size_t, unsigned int __nocast); |
132 | extern void *kmem_realloc(void *, size_t, size_t, int); | 132 | extern void *kmem_realloc(void *, size_t, size_t, |
133 | extern void *kmem_zalloc(size_t, int); | 133 | unsigned int __nocast); |
134 | extern void *kmem_zalloc(size_t, unsigned int __nocast); | ||
134 | extern void kmem_free(void *, size_t); | 135 | extern void kmem_free(void *, size_t); |
135 | 136 | ||
136 | typedef struct shrinker *kmem_shaker_t; | 137 | typedef struct shrinker *kmem_shaker_t; |
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h index bcf60a0b8df0..0039504069a5 100644 --- a/fs/xfs/linux-2.6/spin.h +++ b/fs/xfs/linux-2.6/spin.h | |||
@@ -45,6 +45,9 @@ | |||
45 | typedef spinlock_t lock_t; | 45 | typedef spinlock_t lock_t; |
46 | 46 | ||
47 | #define SPLDECL(s) unsigned long s | 47 | #define SPLDECL(s) unsigned long s |
48 | #ifndef DEFINE_SPINLOCK | ||
49 | #define DEFINE_SPINLOCK(s) spinlock_t s = SPIN_LOCK_UNLOCKED | ||
50 | #endif | ||
48 | 51 | ||
49 | #define spinlock_init(lock, name) spin_lock_init(lock) | 52 | #define spinlock_init(lock, name) spin_lock_init(lock) |
50 | #define spinlock_destroy(lock) | 53 | #define spinlock_destroy(lock) |
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h index 6c6fd0faa8e1..b0d2873ab274 100644 --- a/fs/xfs/linux-2.6/time.h +++ b/fs/xfs/linux-2.6/time.h | |||
@@ -39,8 +39,7 @@ typedef struct timespec timespec_t; | |||
39 | 39 | ||
40 | static inline void delay(long ticks) | 40 | static inline void delay(long ticks) |
41 | { | 41 | { |
42 | set_current_state(TASK_UNINTERRUPTIBLE); | 42 | schedule_timeout_uninterruptible(ticks); |
43 | schedule_timeout(ticks); | ||
44 | } | 43 | } |
45 | 44 | ||
46 | static inline void nanotime(struct timespec *tvp) | 45 | static inline void nanotime(struct timespec *tvp) |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index a3a4b5aaf5d9..c6c077978fe3 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -104,66 +104,114 @@ xfs_page_trace( | |||
104 | #define xfs_page_trace(tag, inode, page, mask) | 104 | #define xfs_page_trace(tag, inode, page, mask) |
105 | #endif | 105 | #endif |
106 | 106 | ||
107 | void | 107 | /* |
108 | linvfs_unwritten_done( | 108 | * Schedule IO completion handling on a xfsdatad if this was |
109 | struct buffer_head *bh, | 109 | * the final hold on this ioend. |
110 | int uptodate) | 110 | */ |
111 | STATIC void | ||
112 | xfs_finish_ioend( | ||
113 | xfs_ioend_t *ioend) | ||
111 | { | 114 | { |
112 | xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; | 115 | if (atomic_dec_and_test(&ioend->io_remaining)) |
116 | queue_work(xfsdatad_workqueue, &ioend->io_work); | ||
117 | } | ||
113 | 118 | ||
114 | ASSERT(buffer_unwritten(bh)); | 119 | STATIC void |
115 | bh->b_end_io = NULL; | 120 | xfs_destroy_ioend( |
116 | clear_buffer_unwritten(bh); | 121 | xfs_ioend_t *ioend) |
117 | if (!uptodate) | 122 | { |
118 | pagebuf_ioerror(pb, EIO); | 123 | vn_iowake(ioend->io_vnode); |
119 | if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { | 124 | mempool_free(ioend, xfs_ioend_pool); |
120 | pagebuf_iodone(pb, 1, 1); | ||
121 | } | ||
122 | end_buffer_async_write(bh, uptodate); | ||
123 | } | 125 | } |
124 | 126 | ||
125 | /* | 127 | /* |
126 | * Issue transactions to convert a buffer range from unwritten | 128 | * Issue transactions to convert a buffer range from unwritten |
127 | * to written extents (buffered IO). | 129 | * to written extents. |
128 | */ | 130 | */ |
129 | STATIC void | 131 | STATIC void |
130 | linvfs_unwritten_convert( | 132 | xfs_end_bio_unwritten( |
131 | xfs_buf_t *bp) | 133 | void *data) |
132 | { | 134 | { |
133 | vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); | 135 | xfs_ioend_t *ioend = data; |
134 | int error; | 136 | vnode_t *vp = ioend->io_vnode; |
137 | xfs_off_t offset = ioend->io_offset; | ||
138 | size_t size = ioend->io_size; | ||
139 | struct buffer_head *bh, *next; | ||
140 | int error; | ||
141 | |||
142 | if (ioend->io_uptodate) | ||
143 | VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); | ||
144 | |||
145 | /* ioend->io_buffer_head is only non-NULL for buffered I/O */ | ||
146 | for (bh = ioend->io_buffer_head; bh; bh = next) { | ||
147 | next = bh->b_private; | ||
148 | |||
149 | bh->b_end_io = NULL; | ||
150 | clear_buffer_unwritten(bh); | ||
151 | end_buffer_async_write(bh, ioend->io_uptodate); | ||
152 | } | ||
135 | 153 | ||
136 | BUG_ON(atomic_read(&bp->pb_hold) < 1); | 154 | xfs_destroy_ioend(ioend); |
137 | VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp), | ||
138 | BMAPI_UNWRITTEN, NULL, NULL, error); | ||
139 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | ||
140 | XFS_BUF_CLR_IODONE_FUNC(bp); | ||
141 | XFS_BUF_UNDATAIO(bp); | ||
142 | iput(LINVFS_GET_IP(vp)); | ||
143 | pagebuf_iodone(bp, 0, 0); | ||
144 | } | 155 | } |
145 | 156 | ||
146 | /* | 157 | /* |
147 | * Issue transactions to convert a buffer range from unwritten | 158 | * Allocate and initialise an IO completion structure. |
148 | * to written extents (direct IO). | 159 | * We need to track unwritten extent write completion here initially. |
160 | * We'll need to extend this for updating the ondisk inode size later | ||
161 | * (vs. incore size). | ||
149 | */ | 162 | */ |
150 | STATIC void | 163 | STATIC xfs_ioend_t * |
151 | linvfs_unwritten_convert_direct( | 164 | xfs_alloc_ioend( |
152 | struct kiocb *iocb, | 165 | struct inode *inode) |
153 | loff_t offset, | ||
154 | ssize_t size, | ||
155 | void *private) | ||
156 | { | 166 | { |
157 | struct inode *inode = iocb->ki_filp->f_dentry->d_inode; | 167 | xfs_ioend_t *ioend; |
158 | ASSERT(!private || inode == (struct inode *)private); | ||
159 | 168 | ||
160 | /* private indicates an unwritten extent lay beneath this IO */ | 169 | ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); |
161 | if (private && size > 0) { | ||
162 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
163 | int error; | ||
164 | 170 | ||
165 | VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); | 171 | /* |
166 | } | 172 | * Set the count to 1 initially, which will prevent an I/O |
173 | * completion callback from happening before we have started | ||
174 | * all the I/O from calling the completion routine too early. | ||
175 | */ | ||
176 | atomic_set(&ioend->io_remaining, 1); | ||
177 | ioend->io_uptodate = 1; /* cleared if any I/O fails */ | ||
178 | ioend->io_vnode = LINVFS_GET_VP(inode); | ||
179 | ioend->io_buffer_head = NULL; | ||
180 | atomic_inc(&ioend->io_vnode->v_iocount); | ||
181 | ioend->io_offset = 0; | ||
182 | ioend->io_size = 0; | ||
183 | |||
184 | INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); | ||
185 | |||
186 | return ioend; | ||
187 | } | ||
188 | |||
189 | void | ||
190 | linvfs_unwritten_done( | ||
191 | struct buffer_head *bh, | ||
192 | int uptodate) | ||
193 | { | ||
194 | xfs_ioend_t *ioend = bh->b_private; | ||
195 | static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED; | ||
196 | unsigned long flags; | ||
197 | |||
198 | ASSERT(buffer_unwritten(bh)); | ||
199 | bh->b_end_io = NULL; | ||
200 | |||
201 | if (!uptodate) | ||
202 | ioend->io_uptodate = 0; | ||
203 | |||
204 | /* | ||
205 | * Deep magic here. We reuse b_private in the buffer_heads to build | ||
206 | * a chain for completing the I/O from user context after we've issued | ||
207 | * a transaction to convert the unwritten extent. | ||
208 | */ | ||
209 | spin_lock_irqsave(&unwritten_done_lock, flags); | ||
210 | bh->b_private = ioend->io_buffer_head; | ||
211 | ioend->io_buffer_head = bh; | ||
212 | spin_unlock_irqrestore(&unwritten_done_lock, flags); | ||
213 | |||
214 | xfs_finish_ioend(ioend); | ||
167 | } | 215 | } |
168 | 216 | ||
169 | STATIC int | 217 | STATIC int |
@@ -255,7 +303,7 @@ xfs_probe_unwritten_page( | |||
255 | struct address_space *mapping, | 303 | struct address_space *mapping, |
256 | pgoff_t index, | 304 | pgoff_t index, |
257 | xfs_iomap_t *iomapp, | 305 | xfs_iomap_t *iomapp, |
258 | xfs_buf_t *pb, | 306 | xfs_ioend_t *ioend, |
259 | unsigned long max_offset, | 307 | unsigned long max_offset, |
260 | unsigned long *fsbs, | 308 | unsigned long *fsbs, |
261 | unsigned int bbits) | 309 | unsigned int bbits) |
@@ -283,7 +331,7 @@ xfs_probe_unwritten_page( | |||
283 | break; | 331 | break; |
284 | xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); | 332 | xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); |
285 | set_buffer_unwritten_io(bh); | 333 | set_buffer_unwritten_io(bh); |
286 | bh->b_private = pb; | 334 | bh->b_private = ioend; |
287 | p_offset += bh->b_size; | 335 | p_offset += bh->b_size; |
288 | (*fsbs)++; | 336 | (*fsbs)++; |
289 | } while ((bh = bh->b_this_page) != head); | 337 | } while ((bh = bh->b_this_page) != head); |
@@ -434,34 +482,15 @@ xfs_map_unwritten( | |||
434 | { | 482 | { |
435 | struct buffer_head *bh = curr; | 483 | struct buffer_head *bh = curr; |
436 | xfs_iomap_t *tmp; | 484 | xfs_iomap_t *tmp; |
437 | xfs_buf_t *pb; | 485 | xfs_ioend_t *ioend; |
438 | loff_t offset, size; | 486 | loff_t offset; |
439 | unsigned long nblocks = 0; | 487 | unsigned long nblocks = 0; |
440 | 488 | ||
441 | offset = start_page->index; | 489 | offset = start_page->index; |
442 | offset <<= PAGE_CACHE_SHIFT; | 490 | offset <<= PAGE_CACHE_SHIFT; |
443 | offset += p_offset; | 491 | offset += p_offset; |
444 | 492 | ||
445 | /* get an "empty" pagebuf to manage IO completion | 493 | ioend = xfs_alloc_ioend(inode); |
446 | * Proper values will be set before returning */ | ||
447 | pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0); | ||
448 | if (!pb) | ||
449 | return -EAGAIN; | ||
450 | |||
451 | /* Take a reference to the inode to prevent it from | ||
452 | * being reclaimed while we have outstanding unwritten | ||
453 | * extent IO on it. | ||
454 | */ | ||
455 | if ((igrab(inode)) != inode) { | ||
456 | pagebuf_free(pb); | ||
457 | return -EAGAIN; | ||
458 | } | ||
459 | |||
460 | /* Set the count to 1 initially, this will stop an I/O | ||
461 | * completion callout which happens before we have started | ||
462 | * all the I/O from calling pagebuf_iodone too early. | ||
463 | */ | ||
464 | atomic_set(&pb->pb_io_remaining, 1); | ||
465 | 494 | ||
466 | /* First map forwards in the page consecutive buffers | 495 | /* First map forwards in the page consecutive buffers |
467 | * covering this unwritten extent | 496 | * covering this unwritten extent |
@@ -474,12 +503,12 @@ xfs_map_unwritten( | |||
474 | break; | 503 | break; |
475 | xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); | 504 | xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); |
476 | set_buffer_unwritten_io(bh); | 505 | set_buffer_unwritten_io(bh); |
477 | bh->b_private = pb; | 506 | bh->b_private = ioend; |
478 | p_offset += bh->b_size; | 507 | p_offset += bh->b_size; |
479 | nblocks++; | 508 | nblocks++; |
480 | } while ((bh = bh->b_this_page) != head); | 509 | } while ((bh = bh->b_this_page) != head); |
481 | 510 | ||
482 | atomic_add(nblocks, &pb->pb_io_remaining); | 511 | atomic_add(nblocks, &ioend->io_remaining); |
483 | 512 | ||
484 | /* If we reached the end of the page, map forwards in any | 513 | /* If we reached the end of the page, map forwards in any |
485 | * following pages which are also covered by this extent. | 514 | * following pages which are also covered by this extent. |
@@ -496,13 +525,13 @@ xfs_map_unwritten( | |||
496 | tloff = min(tlast, tloff); | 525 | tloff = min(tlast, tloff); |
497 | for (tindex = start_page->index + 1; tindex < tloff; tindex++) { | 526 | for (tindex = start_page->index + 1; tindex < tloff; tindex++) { |
498 | page = xfs_probe_unwritten_page(mapping, | 527 | page = xfs_probe_unwritten_page(mapping, |
499 | tindex, iomapp, pb, | 528 | tindex, iomapp, ioend, |
500 | PAGE_CACHE_SIZE, &bs, bbits); | 529 | PAGE_CACHE_SIZE, &bs, bbits); |
501 | if (!page) | 530 | if (!page) |
502 | break; | 531 | break; |
503 | nblocks += bs; | 532 | nblocks += bs; |
504 | atomic_add(bs, &pb->pb_io_remaining); | 533 | atomic_add(bs, &ioend->io_remaining); |
505 | xfs_convert_page(inode, page, iomapp, wbc, pb, | 534 | xfs_convert_page(inode, page, iomapp, wbc, ioend, |
506 | startio, all_bh); | 535 | startio, all_bh); |
507 | /* stop if converting the next page might add | 536 | /* stop if converting the next page might add |
508 | * enough blocks that the corresponding byte | 537 | * enough blocks that the corresponding byte |
@@ -514,12 +543,12 @@ xfs_map_unwritten( | |||
514 | if (tindex == tlast && | 543 | if (tindex == tlast && |
515 | (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { | 544 | (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { |
516 | page = xfs_probe_unwritten_page(mapping, | 545 | page = xfs_probe_unwritten_page(mapping, |
517 | tindex, iomapp, pb, | 546 | tindex, iomapp, ioend, |
518 | pg_offset, &bs, bbits); | 547 | pg_offset, &bs, bbits); |
519 | if (page) { | 548 | if (page) { |
520 | nblocks += bs; | 549 | nblocks += bs; |
521 | atomic_add(bs, &pb->pb_io_remaining); | 550 | atomic_add(bs, &ioend->io_remaining); |
522 | xfs_convert_page(inode, page, iomapp, wbc, pb, | 551 | xfs_convert_page(inode, page, iomapp, wbc, ioend, |
523 | startio, all_bh); | 552 | startio, all_bh); |
524 | if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) | 553 | if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) |
525 | goto enough; | 554 | goto enough; |
@@ -528,21 +557,9 @@ xfs_map_unwritten( | |||
528 | } | 557 | } |
529 | 558 | ||
530 | enough: | 559 | enough: |
531 | size = nblocks; /* NB: using 64bit number here */ | 560 | ioend->io_size = (xfs_off_t)nblocks << block_bits; |
532 | size <<= block_bits; /* convert fsb's to byte range */ | 561 | ioend->io_offset = offset; |
533 | 562 | xfs_finish_ioend(ioend); | |
534 | XFS_BUF_DATAIO(pb); | ||
535 | XFS_BUF_ASYNC(pb); | ||
536 | XFS_BUF_SET_SIZE(pb, size); | ||
537 | XFS_BUF_SET_COUNT(pb, size); | ||
538 | XFS_BUF_SET_OFFSET(pb, offset); | ||
539 | XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)); | ||
540 | XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert); | ||
541 | |||
542 | if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { | ||
543 | pagebuf_iodone(pb, 1, 1); | ||
544 | } | ||
545 | |||
546 | return 0; | 563 | return 0; |
547 | } | 564 | } |
548 | 565 | ||
@@ -787,7 +804,7 @@ xfs_page_state_convert( | |||
787 | continue; | 804 | continue; |
788 | if (!iomp) { | 805 | if (!iomp) { |
789 | err = xfs_map_blocks(inode, offset, len, &iomap, | 806 | err = xfs_map_blocks(inode, offset, len, &iomap, |
790 | BMAPI_READ|BMAPI_IGNSTATE); | 807 | BMAPI_WRITE|BMAPI_IGNSTATE); |
791 | if (err) { | 808 | if (err) { |
792 | goto error; | 809 | goto error; |
793 | } | 810 | } |
@@ -1028,6 +1045,44 @@ linvfs_get_blocks_direct( | |||
1028 | create, 1, BMAPI_WRITE|BMAPI_DIRECT); | 1045 | create, 1, BMAPI_WRITE|BMAPI_DIRECT); |
1029 | } | 1046 | } |
1030 | 1047 | ||
1048 | STATIC void | ||
1049 | linvfs_end_io_direct( | ||
1050 | struct kiocb *iocb, | ||
1051 | loff_t offset, | ||
1052 | ssize_t size, | ||
1053 | void *private) | ||
1054 | { | ||
1055 | xfs_ioend_t *ioend = iocb->private; | ||
1056 | |||
1057 | /* | ||
1058 | * Non-NULL private data means we need to issue a transaction to | ||
1059 | * convert a range from unwritten to written extents. This needs | ||
1060 | * to happen from process contect but aio+dio I/O completion | ||
1061 | * happens from irq context so we need to defer it to a workqueue. | ||
1062 | * This is not nessecary for synchronous direct I/O, but we do | ||
1063 | * it anyway to keep the code uniform and simpler. | ||
1064 | * | ||
1065 | * The core direct I/O code might be changed to always call the | ||
1066 | * completion handler in the future, in which case all this can | ||
1067 | * go away. | ||
1068 | */ | ||
1069 | if (private && size > 0) { | ||
1070 | ioend->io_offset = offset; | ||
1071 | ioend->io_size = size; | ||
1072 | xfs_finish_ioend(ioend); | ||
1073 | } else { | ||
1074 | ASSERT(size >= 0); | ||
1075 | xfs_destroy_ioend(ioend); | ||
1076 | } | ||
1077 | |||
1078 | /* | ||
1079 | * blockdev_direct_IO can return an error even afer the I/O | ||
1080 | * completion handler was called. Thus we need to protect | ||
1081 | * against double-freeing. | ||
1082 | */ | ||
1083 | iocb->private = NULL; | ||
1084 | } | ||
1085 | |||
1031 | STATIC ssize_t | 1086 | STATIC ssize_t |
1032 | linvfs_direct_IO( | 1087 | linvfs_direct_IO( |
1033 | int rw, | 1088 | int rw, |
@@ -1042,16 +1097,23 @@ linvfs_direct_IO( | |||
1042 | xfs_iomap_t iomap; | 1097 | xfs_iomap_t iomap; |
1043 | int maps = 1; | 1098 | int maps = 1; |
1044 | int error; | 1099 | int error; |
1100 | ssize_t ret; | ||
1045 | 1101 | ||
1046 | VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); | 1102 | VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); |
1047 | if (error) | 1103 | if (error) |
1048 | return -error; | 1104 | return -error; |
1049 | 1105 | ||
1050 | return blockdev_direct_IO_own_locking(rw, iocb, inode, | 1106 | iocb->private = xfs_alloc_ioend(inode); |
1107 | |||
1108 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, | ||
1051 | iomap.iomap_target->pbr_bdev, | 1109 | iomap.iomap_target->pbr_bdev, |
1052 | iov, offset, nr_segs, | 1110 | iov, offset, nr_segs, |
1053 | linvfs_get_blocks_direct, | 1111 | linvfs_get_blocks_direct, |
1054 | linvfs_unwritten_convert_direct); | 1112 | linvfs_end_io_direct); |
1113 | |||
1114 | if (unlikely(ret <= 0 && iocb->private)) | ||
1115 | xfs_destroy_ioend(iocb->private); | ||
1116 | return ret; | ||
1055 | } | 1117 | } |
1056 | 1118 | ||
1057 | 1119 | ||
@@ -1202,6 +1264,16 @@ out_unlock: | |||
1202 | return error; | 1264 | return error; |
1203 | } | 1265 | } |
1204 | 1266 | ||
1267 | STATIC int | ||
1268 | linvfs_invalidate_page( | ||
1269 | struct page *page, | ||
1270 | unsigned long offset) | ||
1271 | { | ||
1272 | xfs_page_trace(XFS_INVALIDPAGE_ENTER, | ||
1273 | page->mapping->host, page, offset); | ||
1274 | return block_invalidatepage(page, offset); | ||
1275 | } | ||
1276 | |||
1205 | /* | 1277 | /* |
1206 | * Called to move a page into cleanable state - and from there | 1278 | * Called to move a page into cleanable state - and from there |
1207 | * to be released. Possibly the page is already clean. We always | 1279 | * to be released. Possibly the page is already clean. We always |
@@ -1279,6 +1351,7 @@ struct address_space_operations linvfs_aops = { | |||
1279 | .writepage = linvfs_writepage, | 1351 | .writepage = linvfs_writepage, |
1280 | .sync_page = block_sync_page, | 1352 | .sync_page = block_sync_page, |
1281 | .releasepage = linvfs_release_page, | 1353 | .releasepage = linvfs_release_page, |
1354 | .invalidatepage = linvfs_invalidate_page, | ||
1282 | .prepare_write = linvfs_prepare_write, | 1355 | .prepare_write = linvfs_prepare_write, |
1283 | .commit_write = generic_commit_write, | 1356 | .commit_write = generic_commit_write, |
1284 | .bmap = linvfs_bmap, | 1357 | .bmap = linvfs_bmap, |
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h new file mode 100644 index 000000000000..2fa62974a04d --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_aops.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | * | ||
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | #ifndef __XFS_AOPS_H__ | ||
33 | #define __XFS_AOPS_H__ | ||
34 | |||
35 | extern struct workqueue_struct *xfsdatad_workqueue; | ||
36 | extern mempool_t *xfs_ioend_pool; | ||
37 | |||
38 | typedef void (*xfs_ioend_func_t)(void *); | ||
39 | |||
40 | typedef struct xfs_ioend { | ||
41 | unsigned int io_uptodate; /* I/O status register */ | ||
42 | atomic_t io_remaining; /* hold count */ | ||
43 | struct vnode *io_vnode; /* file being written to */ | ||
44 | struct buffer_head *io_buffer_head;/* buffer linked list head */ | ||
45 | size_t io_size; /* size of the extent */ | ||
46 | xfs_off_t io_offset; /* offset in the file */ | ||
47 | struct work_struct io_work; /* xfsdatad work queue */ | ||
48 | } xfs_ioend_t; | ||
49 | |||
50 | #endif /* __XFS_IOPS_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index df0cba239dd5..e82cf72ac599 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | 5 | * under the terms of version 2 of the GNU General Public License as |
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/percpu.h> | 54 | #include <linux/percpu.h> |
55 | #include <linux/blkdev.h> | 55 | #include <linux/blkdev.h> |
56 | #include <linux/hash.h> | 56 | #include <linux/hash.h> |
57 | #include <linux/kthread.h> | ||
57 | 58 | ||
58 | #include "xfs_linux.h" | 59 | #include "xfs_linux.h" |
59 | 60 | ||
@@ -67,7 +68,7 @@ STATIC int xfsbufd_wakeup(int, unsigned int); | |||
67 | STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); | 68 | STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); |
68 | 69 | ||
69 | STATIC struct workqueue_struct *xfslogd_workqueue; | 70 | STATIC struct workqueue_struct *xfslogd_workqueue; |
70 | STATIC struct workqueue_struct *xfsdatad_workqueue; | 71 | struct workqueue_struct *xfsdatad_workqueue; |
71 | 72 | ||
72 | /* | 73 | /* |
73 | * Pagebuf debugging | 74 | * Pagebuf debugging |
@@ -590,8 +591,10 @@ found: | |||
590 | PB_SET_OWNER(pb); | 591 | PB_SET_OWNER(pb); |
591 | } | 592 | } |
592 | 593 | ||
593 | if (pb->pb_flags & PBF_STALE) | 594 | if (pb->pb_flags & PBF_STALE) { |
595 | ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0); | ||
594 | pb->pb_flags &= PBF_MAPPED; | 596 | pb->pb_flags &= PBF_MAPPED; |
597 | } | ||
595 | PB_TRACE(pb, "got_lock", 0); | 598 | PB_TRACE(pb, "got_lock", 0); |
596 | XFS_STATS_INC(pb_get_locked); | 599 | XFS_STATS_INC(pb_get_locked); |
597 | return (pb); | 600 | return (pb); |
@@ -700,25 +703,6 @@ xfs_buf_read_flags( | |||
700 | } | 703 | } |
701 | 704 | ||
702 | /* | 705 | /* |
703 | * Create a skeletal pagebuf (no pages associated with it). | ||
704 | */ | ||
705 | xfs_buf_t * | ||
706 | pagebuf_lookup( | ||
707 | xfs_buftarg_t *target, | ||
708 | loff_t ioff, | ||
709 | size_t isize, | ||
710 | page_buf_flags_t flags) | ||
711 | { | ||
712 | xfs_buf_t *pb; | ||
713 | |||
714 | pb = pagebuf_allocate(flags); | ||
715 | if (pb) { | ||
716 | _pagebuf_initialize(pb, target, ioff, isize, flags); | ||
717 | } | ||
718 | return pb; | ||
719 | } | ||
720 | |||
721 | /* | ||
722 | * If we are not low on memory then do the readahead in a deadlock | 706 | * If we are not low on memory then do the readahead in a deadlock |
723 | * safe manner. | 707 | * safe manner. |
724 | */ | 708 | */ |
@@ -913,22 +897,23 @@ pagebuf_rele( | |||
913 | do_free = 0; | 897 | do_free = 0; |
914 | } | 898 | } |
915 | 899 | ||
916 | if (pb->pb_flags & PBF_DELWRI) { | 900 | if (pb->pb_flags & PBF_FS_MANAGED) { |
917 | pb->pb_flags |= PBF_ASYNC; | ||
918 | atomic_inc(&pb->pb_hold); | ||
919 | pagebuf_delwri_queue(pb, 0); | ||
920 | do_free = 0; | ||
921 | } else if (pb->pb_flags & PBF_FS_MANAGED) { | ||
922 | do_free = 0; | 901 | do_free = 0; |
923 | } | 902 | } |
924 | 903 | ||
925 | if (do_free) { | 904 | if (do_free) { |
905 | ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0); | ||
926 | list_del_init(&pb->pb_hash_list); | 906 | list_del_init(&pb->pb_hash_list); |
927 | spin_unlock(&hash->bh_lock); | 907 | spin_unlock(&hash->bh_lock); |
928 | pagebuf_free(pb); | 908 | pagebuf_free(pb); |
929 | } else { | 909 | } else { |
930 | spin_unlock(&hash->bh_lock); | 910 | spin_unlock(&hash->bh_lock); |
931 | } | 911 | } |
912 | } else { | ||
913 | /* | ||
914 | * Catch reference count leaks | ||
915 | */ | ||
916 | ASSERT(atomic_read(&pb->pb_hold) >= 0); | ||
932 | } | 917 | } |
933 | } | 918 | } |
934 | 919 | ||
@@ -1006,13 +991,24 @@ pagebuf_lock( | |||
1006 | * pagebuf_unlock | 991 | * pagebuf_unlock |
1007 | * | 992 | * |
1008 | * pagebuf_unlock releases the lock on the buffer object created by | 993 | * pagebuf_unlock releases the lock on the buffer object created by |
1009 | * pagebuf_lock or pagebuf_cond_lock (not any | 994 | * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages |
1010 | * pinning of underlying pages created by pagebuf_pin). | 995 | * created by pagebuf_pin). |
996 | * | ||
997 | * If the buffer is marked delwri but is not queued, do so before we | ||
998 | * unlock the buffer as we need to set flags correctly. We also need to | ||
999 | * take a reference for the delwri queue because the unlocker is going to | ||
1000 | * drop their's and they don't know we just queued it. | ||
1011 | */ | 1001 | */ |
1012 | void | 1002 | void |
1013 | pagebuf_unlock( /* unlock buffer */ | 1003 | pagebuf_unlock( /* unlock buffer */ |
1014 | xfs_buf_t *pb) /* buffer to unlock */ | 1004 | xfs_buf_t *pb) /* buffer to unlock */ |
1015 | { | 1005 | { |
1006 | if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) { | ||
1007 | atomic_inc(&pb->pb_hold); | ||
1008 | pb->pb_flags |= PBF_ASYNC; | ||
1009 | pagebuf_delwri_queue(pb, 0); | ||
1010 | } | ||
1011 | |||
1016 | PB_CLEAR_OWNER(pb); | 1012 | PB_CLEAR_OWNER(pb); |
1017 | up(&pb->pb_sema); | 1013 | up(&pb->pb_sema); |
1018 | PB_TRACE(pb, "unlock", 0); | 1014 | PB_TRACE(pb, "unlock", 0); |
@@ -1249,8 +1245,8 @@ bio_end_io_pagebuf( | |||
1249 | int error) | 1245 | int error) |
1250 | { | 1246 | { |
1251 | xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; | 1247 | xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; |
1252 | unsigned int i, blocksize = pb->pb_target->pbr_bsize; | 1248 | unsigned int blocksize = pb->pb_target->pbr_bsize; |
1253 | struct bio_vec *bvec = bio->bi_io_vec; | 1249 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; |
1254 | 1250 | ||
1255 | if (bio->bi_size) | 1251 | if (bio->bi_size) |
1256 | return 1; | 1252 | return 1; |
@@ -1258,10 +1254,12 @@ bio_end_io_pagebuf( | |||
1258 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 1254 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
1259 | pb->pb_error = EIO; | 1255 | pb->pb_error = EIO; |
1260 | 1256 | ||
1261 | for (i = 0; i < bio->bi_vcnt; i++, bvec++) { | 1257 | do { |
1262 | struct page *page = bvec->bv_page; | 1258 | struct page *page = bvec->bv_page; |
1263 | 1259 | ||
1264 | if (pb->pb_error) { | 1260 | if (unlikely(pb->pb_error)) { |
1261 | if (pb->pb_flags & PBF_READ) | ||
1262 | ClearPageUptodate(page); | ||
1265 | SetPageError(page); | 1263 | SetPageError(page); |
1266 | } else if (blocksize == PAGE_CACHE_SIZE) { | 1264 | } else if (blocksize == PAGE_CACHE_SIZE) { |
1267 | SetPageUptodate(page); | 1265 | SetPageUptodate(page); |
@@ -1270,10 +1268,13 @@ bio_end_io_pagebuf( | |||
1270 | set_page_region(page, bvec->bv_offset, bvec->bv_len); | 1268 | set_page_region(page, bvec->bv_offset, bvec->bv_len); |
1271 | } | 1269 | } |
1272 | 1270 | ||
1271 | if (--bvec >= bio->bi_io_vec) | ||
1272 | prefetchw(&bvec->bv_page->flags); | ||
1273 | |||
1273 | if (_pagebuf_iolocked(pb)) { | 1274 | if (_pagebuf_iolocked(pb)) { |
1274 | unlock_page(page); | 1275 | unlock_page(page); |
1275 | } | 1276 | } |
1276 | } | 1277 | } while (bvec >= bio->bi_io_vec); |
1277 | 1278 | ||
1278 | _pagebuf_iodone(pb, 1); | 1279 | _pagebuf_iodone(pb, 1); |
1279 | bio_put(bio); | 1280 | bio_put(bio); |
@@ -1511,6 +1512,11 @@ again: | |||
1511 | ASSERT(btp == bp->pb_target); | 1512 | ASSERT(btp == bp->pb_target); |
1512 | if (!(bp->pb_flags & PBF_FS_MANAGED)) { | 1513 | if (!(bp->pb_flags & PBF_FS_MANAGED)) { |
1513 | spin_unlock(&hash->bh_lock); | 1514 | spin_unlock(&hash->bh_lock); |
1515 | /* | ||
1516 | * Catch superblock reference count leaks | ||
1517 | * immediately | ||
1518 | */ | ||
1519 | BUG_ON(bp->pb_bn == 0); | ||
1514 | delay(100); | 1520 | delay(100); |
1515 | goto again; | 1521 | goto again; |
1516 | } | 1522 | } |
@@ -1686,17 +1692,20 @@ pagebuf_delwri_queue( | |||
1686 | int unlock) | 1692 | int unlock) |
1687 | { | 1693 | { |
1688 | PB_TRACE(pb, "delwri_q", (long)unlock); | 1694 | PB_TRACE(pb, "delwri_q", (long)unlock); |
1689 | ASSERT(pb->pb_flags & PBF_DELWRI); | 1695 | ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == |
1696 | (PBF_DELWRI|PBF_ASYNC)); | ||
1690 | 1697 | ||
1691 | spin_lock(&pbd_delwrite_lock); | 1698 | spin_lock(&pbd_delwrite_lock); |
1692 | /* If already in the queue, dequeue and place at tail */ | 1699 | /* If already in the queue, dequeue and place at tail */ |
1693 | if (!list_empty(&pb->pb_list)) { | 1700 | if (!list_empty(&pb->pb_list)) { |
1701 | ASSERT(pb->pb_flags & _PBF_DELWRI_Q); | ||
1694 | if (unlock) { | 1702 | if (unlock) { |
1695 | atomic_dec(&pb->pb_hold); | 1703 | atomic_dec(&pb->pb_hold); |
1696 | } | 1704 | } |
1697 | list_del(&pb->pb_list); | 1705 | list_del(&pb->pb_list); |
1698 | } | 1706 | } |
1699 | 1707 | ||
1708 | pb->pb_flags |= _PBF_DELWRI_Q; | ||
1700 | list_add_tail(&pb->pb_list, &pbd_delwrite_queue); | 1709 | list_add_tail(&pb->pb_list, &pbd_delwrite_queue); |
1701 | pb->pb_queuetime = jiffies; | 1710 | pb->pb_queuetime = jiffies; |
1702 | spin_unlock(&pbd_delwrite_lock); | 1711 | spin_unlock(&pbd_delwrite_lock); |
@@ -1713,10 +1722,11 @@ pagebuf_delwri_dequeue( | |||
1713 | 1722 | ||
1714 | spin_lock(&pbd_delwrite_lock); | 1723 | spin_lock(&pbd_delwrite_lock); |
1715 | if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { | 1724 | if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { |
1725 | ASSERT(pb->pb_flags & _PBF_DELWRI_Q); | ||
1716 | list_del_init(&pb->pb_list); | 1726 | list_del_init(&pb->pb_list); |
1717 | dequeued = 1; | 1727 | dequeued = 1; |
1718 | } | 1728 | } |
1719 | pb->pb_flags &= ~PBF_DELWRI; | 1729 | pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); |
1720 | spin_unlock(&pbd_delwrite_lock); | 1730 | spin_unlock(&pbd_delwrite_lock); |
1721 | 1731 | ||
1722 | if (dequeued) | 1732 | if (dequeued) |
@@ -1733,9 +1743,7 @@ pagebuf_runall_queues( | |||
1733 | } | 1743 | } |
1734 | 1744 | ||
1735 | /* Defines for pagebuf daemon */ | 1745 | /* Defines for pagebuf daemon */ |
1736 | STATIC DECLARE_COMPLETION(xfsbufd_done); | ||
1737 | STATIC struct task_struct *xfsbufd_task; | 1746 | STATIC struct task_struct *xfsbufd_task; |
1738 | STATIC int xfsbufd_active; | ||
1739 | STATIC int xfsbufd_force_flush; | 1747 | STATIC int xfsbufd_force_flush; |
1740 | STATIC int xfsbufd_force_sleep; | 1748 | STATIC int xfsbufd_force_sleep; |
1741 | 1749 | ||
@@ -1761,14 +1769,8 @@ xfsbufd( | |||
1761 | xfs_buftarg_t *target; | 1769 | xfs_buftarg_t *target; |
1762 | xfs_buf_t *pb, *n; | 1770 | xfs_buf_t *pb, *n; |
1763 | 1771 | ||
1764 | /* Set up the thread */ | ||
1765 | daemonize("xfsbufd"); | ||
1766 | current->flags |= PF_MEMALLOC; | 1772 | current->flags |= PF_MEMALLOC; |
1767 | 1773 | ||
1768 | xfsbufd_task = current; | ||
1769 | xfsbufd_active = 1; | ||
1770 | barrier(); | ||
1771 | |||
1772 | INIT_LIST_HEAD(&tmp); | 1774 | INIT_LIST_HEAD(&tmp); |
1773 | do { | 1775 | do { |
1774 | if (unlikely(freezing(current))) { | 1776 | if (unlikely(freezing(current))) { |
@@ -1778,10 +1780,10 @@ xfsbufd( | |||
1778 | xfsbufd_force_sleep = 0; | 1780 | xfsbufd_force_sleep = 0; |
1779 | } | 1781 | } |
1780 | 1782 | ||
1781 | set_current_state(TASK_INTERRUPTIBLE); | 1783 | schedule_timeout_interruptible |
1782 | schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100); | 1784 | (xfs_buf_timer_centisecs * msecs_to_jiffies(10)); |
1783 | 1785 | ||
1784 | age = (xfs_buf_age_centisecs * HZ) / 100; | 1786 | age = xfs_buf_age_centisecs * msecs_to_jiffies(10); |
1785 | spin_lock(&pbd_delwrite_lock); | 1787 | spin_lock(&pbd_delwrite_lock); |
1786 | list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { | 1788 | list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { |
1787 | PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); | 1789 | PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); |
@@ -1795,7 +1797,7 @@ xfsbufd( | |||
1795 | break; | 1797 | break; |
1796 | } | 1798 | } |
1797 | 1799 | ||
1798 | pb->pb_flags &= ~PBF_DELWRI; | 1800 | pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); |
1799 | pb->pb_flags |= PBF_WRITE; | 1801 | pb->pb_flags |= PBF_WRITE; |
1800 | list_move(&pb->pb_list, &tmp); | 1802 | list_move(&pb->pb_list, &tmp); |
1801 | } | 1803 | } |
@@ -1816,9 +1818,9 @@ xfsbufd( | |||
1816 | purge_addresses(); | 1818 | purge_addresses(); |
1817 | 1819 | ||
1818 | xfsbufd_force_flush = 0; | 1820 | xfsbufd_force_flush = 0; |
1819 | } while (xfsbufd_active); | 1821 | } while (!kthread_should_stop()); |
1820 | 1822 | ||
1821 | complete_and_exit(&xfsbufd_done, 0); | 1823 | return 0; |
1822 | } | 1824 | } |
1823 | 1825 | ||
1824 | /* | 1826 | /* |
@@ -1845,15 +1847,13 @@ xfs_flush_buftarg( | |||
1845 | if (pb->pb_target != target) | 1847 | if (pb->pb_target != target) |
1846 | continue; | 1848 | continue; |
1847 | 1849 | ||
1848 | ASSERT(pb->pb_flags & PBF_DELWRI); | 1850 | ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)); |
1849 | PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); | 1851 | PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); |
1850 | if (pagebuf_ispin(pb)) { | 1852 | if (pagebuf_ispin(pb)) { |
1851 | pincount++; | 1853 | pincount++; |
1852 | continue; | 1854 | continue; |
1853 | } | 1855 | } |
1854 | 1856 | ||
1855 | pb->pb_flags &= ~PBF_DELWRI; | ||
1856 | pb->pb_flags |= PBF_WRITE; | ||
1857 | list_move(&pb->pb_list, &tmp); | 1857 | list_move(&pb->pb_list, &tmp); |
1858 | } | 1858 | } |
1859 | spin_unlock(&pbd_delwrite_lock); | 1859 | spin_unlock(&pbd_delwrite_lock); |
@@ -1862,12 +1862,14 @@ xfs_flush_buftarg( | |||
1862 | * Dropped the delayed write list lock, now walk the temporary list | 1862 | * Dropped the delayed write list lock, now walk the temporary list |
1863 | */ | 1863 | */ |
1864 | list_for_each_entry_safe(pb, n, &tmp, pb_list) { | 1864 | list_for_each_entry_safe(pb, n, &tmp, pb_list) { |
1865 | pagebuf_lock(pb); | ||
1866 | pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); | ||
1867 | pb->pb_flags |= PBF_WRITE; | ||
1865 | if (wait) | 1868 | if (wait) |
1866 | pb->pb_flags &= ~PBF_ASYNC; | 1869 | pb->pb_flags &= ~PBF_ASYNC; |
1867 | else | 1870 | else |
1868 | list_del_init(&pb->pb_list); | 1871 | list_del_init(&pb->pb_list); |
1869 | 1872 | ||
1870 | pagebuf_lock(pb); | ||
1871 | pagebuf_iostrategy(pb); | 1873 | pagebuf_iostrategy(pb); |
1872 | } | 1874 | } |
1873 | 1875 | ||
@@ -1901,9 +1903,11 @@ xfs_buf_daemons_start(void) | |||
1901 | if (!xfsdatad_workqueue) | 1903 | if (!xfsdatad_workqueue) |
1902 | goto out_destroy_xfslogd_workqueue; | 1904 | goto out_destroy_xfslogd_workqueue; |
1903 | 1905 | ||
1904 | error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES); | 1906 | xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); |
1905 | if (error < 0) | 1907 | if (IS_ERR(xfsbufd_task)) { |
1908 | error = PTR_ERR(xfsbufd_task); | ||
1906 | goto out_destroy_xfsdatad_workqueue; | 1909 | goto out_destroy_xfsdatad_workqueue; |
1910 | } | ||
1907 | return 0; | 1911 | return 0; |
1908 | 1912 | ||
1909 | out_destroy_xfsdatad_workqueue: | 1913 | out_destroy_xfsdatad_workqueue: |
@@ -1920,10 +1924,7 @@ xfs_buf_daemons_start(void) | |||
1920 | STATIC void | 1924 | STATIC void |
1921 | xfs_buf_daemons_stop(void) | 1925 | xfs_buf_daemons_stop(void) |
1922 | { | 1926 | { |
1923 | xfsbufd_active = 0; | 1927 | kthread_stop(xfsbufd_task); |
1924 | barrier(); | ||
1925 | wait_for_completion(&xfsbufd_done); | ||
1926 | |||
1927 | destroy_workqueue(xfslogd_workqueue); | 1928 | destroy_workqueue(xfslogd_workqueue); |
1928 | destroy_workqueue(xfsdatad_workqueue); | 1929 | destroy_workqueue(xfsdatad_workqueue); |
1929 | } | 1930 | } |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 3f8f69a66aea..67c19f799232 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -89,6 +89,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */ | |||
89 | _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ | 89 | _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ |
90 | _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ | 90 | _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ |
91 | _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ | 91 | _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ |
92 | _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ | ||
92 | } page_buf_flags_t; | 93 | } page_buf_flags_t; |
93 | 94 | ||
94 | #define PBF_UPDATE (PBF_READ | PBF_WRITE) | 95 | #define PBF_UPDATE (PBF_READ | PBF_WRITE) |
@@ -206,13 +207,6 @@ extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */ | |||
206 | #define xfs_buf_read(target, blkno, len, flags) \ | 207 | #define xfs_buf_read(target, blkno, len, flags) \ |
207 | xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) | 208 | xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) |
208 | 209 | ||
209 | extern xfs_buf_t *pagebuf_lookup( | ||
210 | xfs_buftarg_t *, | ||
211 | loff_t, /* starting offset of range */ | ||
212 | size_t, /* length of range */ | ||
213 | page_buf_flags_t); /* PBF_READ, PBF_WRITE, */ | ||
214 | /* PBF_FORCEIO, */ | ||
215 | |||
216 | extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ | 210 | extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ |
217 | /* no memory or disk address */ | 211 | /* no memory or disk address */ |
218 | size_t len, | 212 | size_t len, |
@@ -344,8 +338,6 @@ extern void pagebuf_trace( | |||
344 | 338 | ||
345 | 339 | ||
346 | 340 | ||
347 | |||
348 | |||
349 | /* These are just for xfs_syncsub... it sets an internal variable | 341 | /* These are just for xfs_syncsub... it sets an internal variable |
350 | * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t | 342 | * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t |
351 | */ | 343 | */ |
@@ -452,7 +444,7 @@ extern void pagebuf_trace( | |||
452 | 444 | ||
453 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) | 445 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) |
454 | 446 | ||
455 | extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) | 447 | static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) |
456 | { | 448 | { |
457 | if (bp->pb_flags & PBF_MAPPED) | 449 | if (bp->pb_flags & PBF_MAPPED) |
458 | return XFS_BUF_PTR(bp) + offset; | 450 | return XFS_BUF_PTR(bp) + offset; |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f1ce4323f56e..3881622bcf08 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -311,6 +311,31 @@ linvfs_fsync( | |||
311 | 311 | ||
312 | #define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) | 312 | #define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) |
313 | 313 | ||
314 | #ifdef CONFIG_XFS_DMAPI | ||
315 | |||
316 | STATIC struct page * | ||
317 | linvfs_filemap_nopage( | ||
318 | struct vm_area_struct *area, | ||
319 | unsigned long address, | ||
320 | int *type) | ||
321 | { | ||
322 | struct inode *inode = area->vm_file->f_dentry->d_inode; | ||
323 | vnode_t *vp = LINVFS_GET_VP(inode); | ||
324 | xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); | ||
325 | int error; | ||
326 | |||
327 | ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); | ||
328 | |||
329 | error = XFS_SEND_MMAP(mp, area, 0); | ||
330 | if (error) | ||
331 | return NULL; | ||
332 | |||
333 | return filemap_nopage(area, address, type); | ||
334 | } | ||
335 | |||
336 | #endif /* CONFIG_XFS_DMAPI */ | ||
337 | |||
338 | |||
314 | STATIC int | 339 | STATIC int |
315 | linvfs_readdir( | 340 | linvfs_readdir( |
316 | struct file *filp, | 341 | struct file *filp, |
@@ -390,14 +415,6 @@ done: | |||
390 | return -error; | 415 | return -error; |
391 | } | 416 | } |
392 | 417 | ||
393 | #ifdef CONFIG_XFS_DMAPI | ||
394 | STATIC void | ||
395 | linvfs_mmap_close( | ||
396 | struct vm_area_struct *vma) | ||
397 | { | ||
398 | xfs_dm_mm_put(vma); | ||
399 | } | ||
400 | #endif /* CONFIG_XFS_DMAPI */ | ||
401 | 418 | ||
402 | STATIC int | 419 | STATIC int |
403 | linvfs_file_mmap( | 420 | linvfs_file_mmap( |
@@ -411,16 +428,11 @@ linvfs_file_mmap( | |||
411 | 428 | ||
412 | vma->vm_ops = &linvfs_file_vm_ops; | 429 | vma->vm_ops = &linvfs_file_vm_ops; |
413 | 430 | ||
414 | if (vp->v_vfsp->vfs_flag & VFS_DMI) { | ||
415 | xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); | ||
416 | |||
417 | error = -XFS_SEND_MMAP(mp, vma, 0); | ||
418 | if (error) | ||
419 | return error; | ||
420 | #ifdef CONFIG_XFS_DMAPI | 431 | #ifdef CONFIG_XFS_DMAPI |
432 | if (vp->v_vfsp->vfs_flag & VFS_DMI) { | ||
421 | vma->vm_ops = &linvfs_dmapi_file_vm_ops; | 433 | vma->vm_ops = &linvfs_dmapi_file_vm_ops; |
422 | #endif | ||
423 | } | 434 | } |
435 | #endif /* CONFIG_XFS_DMAPI */ | ||
424 | 436 | ||
425 | VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); | 437 | VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); |
426 | if (!error) | 438 | if (!error) |
@@ -474,6 +486,7 @@ linvfs_ioctl_invis( | |||
474 | return error; | 486 | return error; |
475 | } | 487 | } |
476 | 488 | ||
489 | #ifdef CONFIG_XFS_DMAPI | ||
477 | #ifdef HAVE_VMOP_MPROTECT | 490 | #ifdef HAVE_VMOP_MPROTECT |
478 | STATIC int | 491 | STATIC int |
479 | linvfs_mprotect( | 492 | linvfs_mprotect( |
@@ -494,6 +507,7 @@ linvfs_mprotect( | |||
494 | return error; | 507 | return error; |
495 | } | 508 | } |
496 | #endif /* HAVE_VMOP_MPROTECT */ | 509 | #endif /* HAVE_VMOP_MPROTECT */ |
510 | #endif /* CONFIG_XFS_DMAPI */ | ||
497 | 511 | ||
498 | #ifdef HAVE_FOP_OPEN_EXEC | 512 | #ifdef HAVE_FOP_OPEN_EXEC |
499 | /* If the user is attempting to execute a file that is offline then | 513 | /* If the user is attempting to execute a file that is offline then |
@@ -528,49 +542,10 @@ open_exec_out: | |||
528 | } | 542 | } |
529 | #endif /* HAVE_FOP_OPEN_EXEC */ | 543 | #endif /* HAVE_FOP_OPEN_EXEC */ |
530 | 544 | ||
531 | /* | ||
532 | * Temporary workaround to the AIO direct IO write problem. | ||
533 | * This code can go and we can revert to do_sync_write once | ||
534 | * the writepage(s) rework is merged. | ||
535 | */ | ||
536 | STATIC ssize_t | ||
537 | linvfs_write( | ||
538 | struct file *filp, | ||
539 | const char __user *buf, | ||
540 | size_t len, | ||
541 | loff_t *ppos) | ||
542 | { | ||
543 | struct kiocb kiocb; | ||
544 | ssize_t ret; | ||
545 | |||
546 | init_sync_kiocb(&kiocb, filp); | ||
547 | kiocb.ki_pos = *ppos; | ||
548 | ret = __linvfs_write(&kiocb, buf, 0, len, kiocb.ki_pos); | ||
549 | *ppos = kiocb.ki_pos; | ||
550 | return ret; | ||
551 | } | ||
552 | STATIC ssize_t | ||
553 | linvfs_write_invis( | ||
554 | struct file *filp, | ||
555 | const char __user *buf, | ||
556 | size_t len, | ||
557 | loff_t *ppos) | ||
558 | { | ||
559 | struct kiocb kiocb; | ||
560 | ssize_t ret; | ||
561 | |||
562 | init_sync_kiocb(&kiocb, filp); | ||
563 | kiocb.ki_pos = *ppos; | ||
564 | ret = __linvfs_write(&kiocb, buf, IO_INVIS, len, kiocb.ki_pos); | ||
565 | *ppos = kiocb.ki_pos; | ||
566 | return ret; | ||
567 | } | ||
568 | |||
569 | |||
570 | struct file_operations linvfs_file_operations = { | 545 | struct file_operations linvfs_file_operations = { |
571 | .llseek = generic_file_llseek, | 546 | .llseek = generic_file_llseek, |
572 | .read = do_sync_read, | 547 | .read = do_sync_read, |
573 | .write = linvfs_write, | 548 | .write = do_sync_write, |
574 | .readv = linvfs_readv, | 549 | .readv = linvfs_readv, |
575 | .writev = linvfs_writev, | 550 | .writev = linvfs_writev, |
576 | .aio_read = linvfs_aio_read, | 551 | .aio_read = linvfs_aio_read, |
@@ -592,7 +567,7 @@ struct file_operations linvfs_file_operations = { | |||
592 | struct file_operations linvfs_invis_file_operations = { | 567 | struct file_operations linvfs_invis_file_operations = { |
593 | .llseek = generic_file_llseek, | 568 | .llseek = generic_file_llseek, |
594 | .read = do_sync_read, | 569 | .read = do_sync_read, |
595 | .write = linvfs_write_invis, | 570 | .write = do_sync_write, |
596 | .readv = linvfs_readv_invis, | 571 | .readv = linvfs_readv_invis, |
597 | .writev = linvfs_writev_invis, | 572 | .writev = linvfs_writev_invis, |
598 | .aio_read = linvfs_aio_read_invis, | 573 | .aio_read = linvfs_aio_read_invis, |
@@ -626,8 +601,7 @@ static struct vm_operations_struct linvfs_file_vm_ops = { | |||
626 | 601 | ||
627 | #ifdef CONFIG_XFS_DMAPI | 602 | #ifdef CONFIG_XFS_DMAPI |
628 | static struct vm_operations_struct linvfs_dmapi_file_vm_ops = { | 603 | static struct vm_operations_struct linvfs_dmapi_file_vm_ops = { |
629 | .close = linvfs_mmap_close, | 604 | .nopage = linvfs_filemap_nopage, |
630 | .nopage = filemap_nopage, | ||
631 | .populate = filemap_populate, | 605 | .populate = filemap_populate, |
632 | #ifdef HAVE_VMOP_MPROTECT | 606 | #ifdef HAVE_VMOP_MPROTECT |
633 | .mprotect = linvfs_mprotect, | 607 | .mprotect = linvfs_mprotect, |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 05a447e51cc0..6a3326bcd8d0 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -141,13 +141,19 @@ xfs_find_handle( | |||
141 | return -XFS_ERROR(EINVAL); | 141 | return -XFS_ERROR(EINVAL); |
142 | } | 142 | } |
143 | 143 | ||
144 | /* we need the vnode */ | 144 | switch (inode->i_mode & S_IFMT) { |
145 | vp = LINVFS_GET_VP(inode); | 145 | case S_IFREG: |
146 | if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { | 146 | case S_IFDIR: |
147 | case S_IFLNK: | ||
148 | break; | ||
149 | default: | ||
147 | iput(inode); | 150 | iput(inode); |
148 | return -XFS_ERROR(EBADF); | 151 | return -XFS_ERROR(EBADF); |
149 | } | 152 | } |
150 | 153 | ||
154 | /* we need the vnode */ | ||
155 | vp = LINVFS_GET_VP(inode); | ||
156 | |||
151 | /* now we can grab the fsid */ | 157 | /* now we can grab the fsid */ |
152 | memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); | 158 | memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); |
153 | hsize = sizeof(xfs_fsid_t); | 159 | hsize = sizeof(xfs_fsid_t); |
@@ -386,7 +392,7 @@ xfs_readlink_by_handle( | |||
386 | return -error; | 392 | return -error; |
387 | 393 | ||
388 | /* Restrict this handle operation to symlinks only. */ | 394 | /* Restrict this handle operation to symlinks only. */ |
389 | if (vp->v_type != VLNK) { | 395 | if (!S_ISLNK(inode->i_mode)) { |
390 | VN_RELE(vp); | 396 | VN_RELE(vp); |
391 | return -XFS_ERROR(EINVAL); | 397 | return -XFS_ERROR(EINVAL); |
392 | } | 398 | } |
@@ -982,10 +988,10 @@ xfs_ioc_space( | |||
982 | if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) | 988 | if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) |
983 | return -XFS_ERROR(EPERM); | 989 | return -XFS_ERROR(EPERM); |
984 | 990 | ||
985 | if (!(filp->f_flags & FMODE_WRITE)) | 991 | if (!(filp->f_mode & FMODE_WRITE)) |
986 | return -XFS_ERROR(EBADF); | 992 | return -XFS_ERROR(EBADF); |
987 | 993 | ||
988 | if (vp->v_type != VREG) | 994 | if (!VN_ISREG(vp)) |
989 | return -XFS_ERROR(EINVAL); | 995 | return -XFS_ERROR(EINVAL); |
990 | 996 | ||
991 | if (copy_from_user(&bf, arg, sizeof(bf))) | 997 | if (copy_from_user(&bf, arg, sizeof(bf))) |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 0f8f1384eb36..4636b7f86f1f 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -47,8 +47,52 @@ | |||
47 | #include "xfs_vnode.h" | 47 | #include "xfs_vnode.h" |
48 | #include "xfs_dfrag.h" | 48 | #include "xfs_dfrag.h" |
49 | 49 | ||
50 | #define _NATIVE_IOC(cmd, type) \ | ||
51 | _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) | ||
52 | |||
50 | #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) | 53 | #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) |
51 | #define BROKEN_X86_ALIGNMENT | 54 | #define BROKEN_X86_ALIGNMENT |
55 | /* on ia32 l_start is on a 32-bit boundary */ | ||
56 | typedef struct xfs_flock64_32 { | ||
57 | __s16 l_type; | ||
58 | __s16 l_whence; | ||
59 | __s64 l_start __attribute__((packed)); | ||
60 | /* len == 0 means until end of file */ | ||
61 | __s64 l_len __attribute__((packed)); | ||
62 | __s32 l_sysid; | ||
63 | __u32 l_pid; | ||
64 | __s32 l_pad[4]; /* reserve area */ | ||
65 | } xfs_flock64_32_t; | ||
66 | |||
67 | #define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32) | ||
68 | #define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32) | ||
69 | #define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32) | ||
70 | #define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32) | ||
71 | #define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32) | ||
72 | #define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32) | ||
73 | #define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32) | ||
74 | #define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32) | ||
75 | |||
76 | /* just account for different alignment */ | ||
77 | STATIC unsigned long | ||
78 | xfs_ioctl32_flock( | ||
79 | unsigned long arg) | ||
80 | { | ||
81 | xfs_flock64_32_t __user *p32 = (void __user *)arg; | ||
82 | xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); | ||
83 | |||
84 | if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || | ||
85 | copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || | ||
86 | copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || | ||
87 | copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || | ||
88 | copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || | ||
89 | copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || | ||
90 | copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32))) | ||
91 | return -EFAULT; | ||
92 | |||
93 | return (unsigned long)p; | ||
94 | } | ||
95 | |||
52 | #else | 96 | #else |
53 | 97 | ||
54 | typedef struct xfs_fsop_bulkreq32 { | 98 | typedef struct xfs_fsop_bulkreq32 { |
@@ -103,7 +147,6 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) | |||
103 | /* not handled | 147 | /* not handled |
104 | case XFS_IOC_FD_TO_HANDLE: | 148 | case XFS_IOC_FD_TO_HANDLE: |
105 | case XFS_IOC_PATH_TO_HANDLE: | 149 | case XFS_IOC_PATH_TO_HANDLE: |
106 | case XFS_IOC_PATH_TO_HANDLE: | ||
107 | case XFS_IOC_PATH_TO_FSHANDLE: | 150 | case XFS_IOC_PATH_TO_FSHANDLE: |
108 | case XFS_IOC_OPEN_BY_HANDLE: | 151 | case XFS_IOC_OPEN_BY_HANDLE: |
109 | case XFS_IOC_FSSETDM_BY_HANDLE: | 152 | case XFS_IOC_FSSETDM_BY_HANDLE: |
@@ -124,8 +167,21 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) | |||
124 | case XFS_IOC_ERROR_CLEARALL: | 167 | case XFS_IOC_ERROR_CLEARALL: |
125 | break; | 168 | break; |
126 | 169 | ||
127 | #ifndef BROKEN_X86_ALIGNMENT | 170 | #ifdef BROKEN_X86_ALIGNMENT |
128 | /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */ | 171 | /* xfs_flock_t has wrong u32 vs u64 alignment */ |
172 | case XFS_IOC_ALLOCSP_32: | ||
173 | case XFS_IOC_FREESP_32: | ||
174 | case XFS_IOC_ALLOCSP64_32: | ||
175 | case XFS_IOC_FREESP64_32: | ||
176 | case XFS_IOC_RESVSP_32: | ||
177 | case XFS_IOC_UNRESVSP_32: | ||
178 | case XFS_IOC_RESVSP64_32: | ||
179 | case XFS_IOC_UNRESVSP64_32: | ||
180 | arg = xfs_ioctl32_flock(arg); | ||
181 | cmd = _NATIVE_IOC(cmd, struct xfs_flock64); | ||
182 | break; | ||
183 | |||
184 | #else /* These are handled fine if no alignment issues */ | ||
129 | case XFS_IOC_ALLOCSP: | 185 | case XFS_IOC_ALLOCSP: |
130 | case XFS_IOC_FREESP: | 186 | case XFS_IOC_FREESP: |
131 | case XFS_IOC_RESVSP: | 187 | case XFS_IOC_RESVSP: |
@@ -134,6 +190,9 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) | |||
134 | case XFS_IOC_FREESP64: | 190 | case XFS_IOC_FREESP64: |
135 | case XFS_IOC_RESVSP64: | 191 | case XFS_IOC_RESVSP64: |
136 | case XFS_IOC_UNRESVSP64: | 192 | case XFS_IOC_UNRESVSP64: |
193 | break; | ||
194 | |||
195 | /* xfs_bstat_t still has wrong u32 vs u64 alignment */ | ||
137 | case XFS_IOC_SWAPEXT: | 196 | case XFS_IOC_SWAPEXT: |
138 | break; | 197 | break; |
139 | 198 | ||
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index f252605514eb..77708a8c9f87 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -140,7 +140,6 @@ linvfs_mknod( | |||
140 | 140 | ||
141 | memset(&va, 0, sizeof(va)); | 141 | memset(&va, 0, sizeof(va)); |
142 | va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; | 142 | va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; |
143 | va.va_type = IFTOVT(mode); | ||
144 | va.va_mode = mode; | 143 | va.va_mode = mode; |
145 | 144 | ||
146 | switch (mode & S_IFMT) { | 145 | switch (mode & S_IFMT) { |
@@ -308,14 +307,13 @@ linvfs_symlink( | |||
308 | cvp = NULL; | 307 | cvp = NULL; |
309 | 308 | ||
310 | memset(&va, 0, sizeof(va)); | 309 | memset(&va, 0, sizeof(va)); |
311 | va.va_type = VLNK; | 310 | va.va_mode = S_IFLNK | |
312 | va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; | 311 | (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); |
313 | va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; | 312 | va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; |
314 | 313 | ||
315 | error = 0; | 314 | error = 0; |
316 | VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); | 315 | VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); |
317 | if (!error && cvp) { | 316 | if (!error && cvp) { |
318 | ASSERT(cvp->v_type == VLNK); | ||
319 | ip = LINVFS_GET_IP(cvp); | 317 | ip = LINVFS_GET_IP(cvp); |
320 | d_instantiate(dentry, ip); | 318 | d_instantiate(dentry, ip); |
321 | validate_fields(dir); | 319 | validate_fields(dir); |
@@ -425,9 +423,14 @@ linvfs_follow_link( | |||
425 | return NULL; | 423 | return NULL; |
426 | } | 424 | } |
427 | 425 | ||
428 | static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) | 426 | STATIC void |
427 | linvfs_put_link( | ||
428 | struct dentry *dentry, | ||
429 | struct nameidata *nd, | ||
430 | void *p) | ||
429 | { | 431 | { |
430 | char *s = nd_get_link(nd); | 432 | char *s = nd_get_link(nd); |
433 | |||
431 | if (!IS_ERR(s)) | 434 | if (!IS_ERR(s)) |
432 | kfree(s); | 435 | kfree(s); |
433 | } | 436 | } |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 42dc5e4662ed..68c5d885ed9c 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -64,7 +64,6 @@ | |||
64 | #include <sema.h> | 64 | #include <sema.h> |
65 | #include <time.h> | 65 | #include <time.h> |
66 | 66 | ||
67 | #include <support/qsort.h> | ||
68 | #include <support/ktrace.h> | 67 | #include <support/ktrace.h> |
69 | #include <support/debug.h> | 68 | #include <support/debug.h> |
70 | #include <support/move.h> | 69 | #include <support/move.h> |
@@ -104,6 +103,7 @@ | |||
104 | #include <xfs_stats.h> | 103 | #include <xfs_stats.h> |
105 | #include <xfs_sysctl.h> | 104 | #include <xfs_sysctl.h> |
106 | #include <xfs_iops.h> | 105 | #include <xfs_iops.h> |
106 | #include <xfs_aops.h> | ||
107 | #include <xfs_super.h> | 107 | #include <xfs_super.h> |
108 | #include <xfs_globals.h> | 108 | #include <xfs_globals.h> |
109 | #include <xfs_fs_subr.h> | 109 | #include <xfs_fs_subr.h> |
@@ -254,11 +254,18 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh) | |||
254 | #define MAX(a,b) (max(a,b)) | 254 | #define MAX(a,b) (max(a,b)) |
255 | #define howmany(x, y) (((x)+((y)-1))/(y)) | 255 | #define howmany(x, y) (((x)+((y)-1))/(y)) |
256 | #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) | 256 | #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) |
257 | #define qsort(a,n,s,fn) sort(a,n,s,fn,NULL) | ||
257 | 258 | ||
259 | /* | ||
260 | * Various platform dependent calls that don't fit anywhere else | ||
261 | */ | ||
258 | #define xfs_stack_trace() dump_stack() | 262 | #define xfs_stack_trace() dump_stack() |
259 | |||
260 | #define xfs_itruncate_data(ip, off) \ | 263 | #define xfs_itruncate_data(ip, off) \ |
261 | (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) | 264 | (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) |
265 | #define xfs_statvfs_fsid(statp, mp) \ | ||
266 | ({ u64 id = huge_encode_dev((mp)->m_dev); \ | ||
267 | __kernel_fsid_t *fsid = &(statp)->f_fsid; \ | ||
268 | (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) | ||
262 | 269 | ||
263 | 270 | ||
264 | /* Move the kernel do_div definition off to one side */ | 271 | /* Move the kernel do_div definition off to one side */ |
@@ -371,6 +378,4 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) | |||
371 | return(x * y); | 378 | return(x * y); |
372 | } | 379 | } |
373 | 380 | ||
374 | #define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL) | ||
375 | |||
376 | #endif /* __XFS_LINUX__ */ | 381 | #endif /* __XFS_LINUX__ */ |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index acab58c48043..3b5fabe8dae9 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -660,9 +660,6 @@ xfs_write( | |||
660 | (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? | 660 | (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? |
661 | mp->m_rtdev_targp : mp->m_ddev_targp; | 661 | mp->m_rtdev_targp : mp->m_ddev_targp; |
662 | 662 | ||
663 | if (ioflags & IO_ISAIO) | ||
664 | return XFS_ERROR(-ENOSYS); | ||
665 | |||
666 | if ((pos & target->pbr_smask) || (count & target->pbr_smask)) | 663 | if ((pos & target->pbr_smask) || (count & target->pbr_smask)) |
667 | return XFS_ERROR(-EINVAL); | 664 | return XFS_ERROR(-EINVAL); |
668 | 665 | ||
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index f197a720e394..6294dcdb797c 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h | |||
@@ -70,9 +70,10 @@ struct xfs_iomap; | |||
70 | #define XFS_SENDFILE_ENTER 21 | 70 | #define XFS_SENDFILE_ENTER 21 |
71 | #define XFS_WRITEPAGE_ENTER 22 | 71 | #define XFS_WRITEPAGE_ENTER 22 |
72 | #define XFS_RELEASEPAGE_ENTER 23 | 72 | #define XFS_RELEASEPAGE_ENTER 23 |
73 | #define XFS_IOMAP_ALLOC_ENTER 24 | 73 | #define XFS_INVALIDPAGE_ENTER 24 |
74 | #define XFS_IOMAP_ALLOC_MAP 25 | 74 | #define XFS_IOMAP_ALLOC_ENTER 25 |
75 | #define XFS_IOMAP_UNWRITTEN 26 | 75 | #define XFS_IOMAP_ALLOC_MAP 26 |
76 | #define XFS_IOMAP_UNWRITTEN 27 | ||
76 | extern void xfs_rw_enter_trace(int, struct xfs_iocore *, | 77 | extern void xfs_rw_enter_trace(int, struct xfs_iocore *, |
77 | void *, size_t, loff_t, int); | 78 | void *, size_t, loff_t, int); |
78 | extern void xfs_inval_cached_trace(struct xfs_iocore *, | 79 | extern void xfs_inval_cached_trace(struct xfs_iocore *, |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index f6dd7de25927..2302454d8d47 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -70,11 +70,15 @@ | |||
70 | #include <linux/namei.h> | 70 | #include <linux/namei.h> |
71 | #include <linux/init.h> | 71 | #include <linux/init.h> |
72 | #include <linux/mount.h> | 72 | #include <linux/mount.h> |
73 | #include <linux/mempool.h> | ||
73 | #include <linux/writeback.h> | 74 | #include <linux/writeback.h> |
75 | #include <linux/kthread.h> | ||
74 | 76 | ||
75 | STATIC struct quotactl_ops linvfs_qops; | 77 | STATIC struct quotactl_ops linvfs_qops; |
76 | STATIC struct super_operations linvfs_sops; | 78 | STATIC struct super_operations linvfs_sops; |
77 | STATIC kmem_zone_t *linvfs_inode_zone; | 79 | STATIC kmem_zone_t *xfs_vnode_zone; |
80 | STATIC kmem_zone_t *xfs_ioend_zone; | ||
81 | mempool_t *xfs_ioend_pool; | ||
78 | 82 | ||
79 | STATIC struct xfs_mount_args * | 83 | STATIC struct xfs_mount_args * |
80 | xfs_args_allocate( | 84 | xfs_args_allocate( |
@@ -138,24 +142,25 @@ STATIC __inline__ void | |||
138 | xfs_set_inodeops( | 142 | xfs_set_inodeops( |
139 | struct inode *inode) | 143 | struct inode *inode) |
140 | { | 144 | { |
141 | vnode_t *vp = LINVFS_GET_VP(inode); | 145 | switch (inode->i_mode & S_IFMT) { |
142 | 146 | case S_IFREG: | |
143 | if (vp->v_type == VNON) { | ||
144 | vn_mark_bad(vp); | ||
145 | } else if (S_ISREG(inode->i_mode)) { | ||
146 | inode->i_op = &linvfs_file_inode_operations; | 147 | inode->i_op = &linvfs_file_inode_operations; |
147 | inode->i_fop = &linvfs_file_operations; | 148 | inode->i_fop = &linvfs_file_operations; |
148 | inode->i_mapping->a_ops = &linvfs_aops; | 149 | inode->i_mapping->a_ops = &linvfs_aops; |
149 | } else if (S_ISDIR(inode->i_mode)) { | 150 | break; |
151 | case S_IFDIR: | ||
150 | inode->i_op = &linvfs_dir_inode_operations; | 152 | inode->i_op = &linvfs_dir_inode_operations; |
151 | inode->i_fop = &linvfs_dir_operations; | 153 | inode->i_fop = &linvfs_dir_operations; |
152 | } else if (S_ISLNK(inode->i_mode)) { | 154 | break; |
155 | case S_IFLNK: | ||
153 | inode->i_op = &linvfs_symlink_inode_operations; | 156 | inode->i_op = &linvfs_symlink_inode_operations; |
154 | if (inode->i_blocks) | 157 | if (inode->i_blocks) |
155 | inode->i_mapping->a_ops = &linvfs_aops; | 158 | inode->i_mapping->a_ops = &linvfs_aops; |
156 | } else { | 159 | break; |
160 | default: | ||
157 | inode->i_op = &linvfs_file_inode_operations; | 161 | inode->i_op = &linvfs_file_inode_operations; |
158 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | 162 | init_special_inode(inode, inode->i_mode, inode->i_rdev); |
163 | break; | ||
159 | } | 164 | } |
160 | } | 165 | } |
161 | 166 | ||
@@ -167,16 +172,23 @@ xfs_revalidate_inode( | |||
167 | { | 172 | { |
168 | struct inode *inode = LINVFS_GET_IP(vp); | 173 | struct inode *inode = LINVFS_GET_IP(vp); |
169 | 174 | ||
170 | inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type); | 175 | inode->i_mode = ip->i_d.di_mode; |
171 | inode->i_nlink = ip->i_d.di_nlink; | 176 | inode->i_nlink = ip->i_d.di_nlink; |
172 | inode->i_uid = ip->i_d.di_uid; | 177 | inode->i_uid = ip->i_d.di_uid; |
173 | inode->i_gid = ip->i_d.di_gid; | 178 | inode->i_gid = ip->i_d.di_gid; |
174 | if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { | 179 | |
180 | switch (inode->i_mode & S_IFMT) { | ||
181 | case S_IFBLK: | ||
182 | case S_IFCHR: | ||
183 | inode->i_rdev = | ||
184 | MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, | ||
185 | sysv_minor(ip->i_df.if_u2.if_rdev)); | ||
186 | break; | ||
187 | default: | ||
175 | inode->i_rdev = 0; | 188 | inode->i_rdev = 0; |
176 | } else { | 189 | break; |
177 | xfs_dev_t dev = ip->i_df.if_u2.if_rdev; | ||
178 | inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); | ||
179 | } | 190 | } |
191 | |||
180 | inode->i_blksize = PAGE_CACHE_SIZE; | 192 | inode->i_blksize = PAGE_CACHE_SIZE; |
181 | inode->i_generation = ip->i_d.di_gen; | 193 | inode->i_generation = ip->i_d.di_gen; |
182 | i_size_write(inode, ip->i_d.di_size); | 194 | i_size_write(inode, ip->i_d.di_size); |
@@ -231,7 +243,6 @@ xfs_initialize_vnode( | |||
231 | * finish our work. | 243 | * finish our work. |
232 | */ | 244 | */ |
233 | if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) { | 245 | if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) { |
234 | vp->v_type = IFTOVT(ip->i_d.di_mode); | ||
235 | xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); | 246 | xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); |
236 | xfs_set_inodeops(inode); | 247 | xfs_set_inodeops(inode); |
237 | 248 | ||
@@ -274,8 +285,7 @@ linvfs_alloc_inode( | |||
274 | { | 285 | { |
275 | vnode_t *vp; | 286 | vnode_t *vp; |
276 | 287 | ||
277 | vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, | 288 | vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP)); |
278 | kmem_flags_convert(KM_SLEEP)); | ||
279 | if (!vp) | 289 | if (!vp) |
280 | return NULL; | 290 | return NULL; |
281 | return LINVFS_GET_IP(vp); | 291 | return LINVFS_GET_IP(vp); |
@@ -285,11 +295,11 @@ STATIC void | |||
285 | linvfs_destroy_inode( | 295 | linvfs_destroy_inode( |
286 | struct inode *inode) | 296 | struct inode *inode) |
287 | { | 297 | { |
288 | kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode)); | 298 | kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode)); |
289 | } | 299 | } |
290 | 300 | ||
291 | STATIC void | 301 | STATIC void |
292 | init_once( | 302 | linvfs_inode_init_once( |
293 | void *data, | 303 | void *data, |
294 | kmem_cache_t *cachep, | 304 | kmem_cache_t *cachep, |
295 | unsigned long flags) | 305 | unsigned long flags) |
@@ -302,21 +312,41 @@ init_once( | |||
302 | } | 312 | } |
303 | 313 | ||
304 | STATIC int | 314 | STATIC int |
305 | init_inodecache( void ) | 315 | linvfs_init_zones(void) |
306 | { | 316 | { |
307 | linvfs_inode_zone = kmem_cache_create("linvfs_icache", | 317 | xfs_vnode_zone = kmem_cache_create("xfs_vnode", |
308 | sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT, | 318 | sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT, |
309 | init_once, NULL); | 319 | linvfs_inode_init_once, NULL); |
310 | if (linvfs_inode_zone == NULL) | 320 | if (!xfs_vnode_zone) |
311 | return -ENOMEM; | 321 | goto out; |
322 | |||
323 | xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); | ||
324 | if (!xfs_ioend_zone) | ||
325 | goto out_destroy_vnode_zone; | ||
326 | |||
327 | xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE, | ||
328 | mempool_alloc_slab, mempool_free_slab, | ||
329 | xfs_ioend_zone); | ||
330 | if (!xfs_ioend_pool) | ||
331 | goto out_free_ioend_zone; | ||
332 | |||
312 | return 0; | 333 | return 0; |
334 | |||
335 | |||
336 | out_free_ioend_zone: | ||
337 | kmem_zone_destroy(xfs_ioend_zone); | ||
338 | out_destroy_vnode_zone: | ||
339 | kmem_zone_destroy(xfs_vnode_zone); | ||
340 | out: | ||
341 | return -ENOMEM; | ||
313 | } | 342 | } |
314 | 343 | ||
315 | STATIC void | 344 | STATIC void |
316 | destroy_inodecache( void ) | 345 | linvfs_destroy_zones(void) |
317 | { | 346 | { |
318 | if (kmem_cache_destroy(linvfs_inode_zone)) | 347 | mempool_destroy(xfs_ioend_pool); |
319 | printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); | 348 | kmem_zone_destroy(xfs_vnode_zone); |
349 | kmem_zone_destroy(xfs_ioend_zone); | ||
320 | } | 350 | } |
321 | 351 | ||
322 | /* | 352 | /* |
@@ -354,17 +384,38 @@ linvfs_clear_inode( | |||
354 | struct inode *inode) | 384 | struct inode *inode) |
355 | { | 385 | { |
356 | vnode_t *vp = LINVFS_GET_VP(inode); | 386 | vnode_t *vp = LINVFS_GET_VP(inode); |
387 | int error, cache; | ||
357 | 388 | ||
358 | if (vp) { | 389 | vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address); |
359 | vn_rele(vp); | 390 | |
360 | vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); | 391 | XFS_STATS_INC(vn_rele); |
361 | /* | 392 | XFS_STATS_INC(vn_remove); |
362 | * Do all our cleanup, and remove this vnode. | 393 | XFS_STATS_INC(vn_reclaim); |
363 | */ | 394 | XFS_STATS_DEC(vn_active); |
364 | vn_remove(vp); | 395 | |
396 | /* | ||
397 | * This can happen because xfs_iget_core calls xfs_idestroy if we | ||
398 | * find an inode with di_mode == 0 but without IGET_CREATE set. | ||
399 | */ | ||
400 | if (vp->v_fbhv) | ||
401 | VOP_INACTIVE(vp, NULL, cache); | ||
402 | |||
403 | VN_LOCK(vp); | ||
404 | vp->v_flag &= ~VMODIFIED; | ||
405 | VN_UNLOCK(vp, 0); | ||
406 | |||
407 | if (vp->v_fbhv) { | ||
408 | VOP_RECLAIM(vp, error); | ||
409 | if (error) | ||
410 | panic("vn_purge: cannot reclaim"); | ||
365 | } | 411 | } |
366 | } | ||
367 | 412 | ||
413 | ASSERT(vp->v_fbhv == NULL); | ||
414 | |||
415 | #ifdef XFS_VNODE_TRACE | ||
416 | ktrace_free(vp->v_trace); | ||
417 | #endif | ||
418 | } | ||
368 | 419 | ||
369 | /* | 420 | /* |
370 | * Enqueue a work item to be picked up by the vfs xfssyncd thread. | 421 | * Enqueue a work item to be picked up by the vfs xfssyncd thread. |
@@ -416,7 +467,7 @@ xfs_flush_inode( | |||
416 | 467 | ||
417 | igrab(inode); | 468 | igrab(inode); |
418 | xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work); | 469 | xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work); |
419 | delay(HZ/2); | 470 | delay(msecs_to_jiffies(500)); |
420 | } | 471 | } |
421 | 472 | ||
422 | /* | 473 | /* |
@@ -441,7 +492,7 @@ xfs_flush_device( | |||
441 | 492 | ||
442 | igrab(inode); | 493 | igrab(inode); |
443 | xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work); | 494 | xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work); |
444 | delay(HZ/2); | 495 | delay(msecs_to_jiffies(500)); |
445 | xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); | 496 | xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); |
446 | } | 497 | } |
447 | 498 | ||
@@ -466,25 +517,15 @@ xfssyncd( | |||
466 | { | 517 | { |
467 | long timeleft; | 518 | long timeleft; |
468 | vfs_t *vfsp = (vfs_t *) arg; | 519 | vfs_t *vfsp = (vfs_t *) arg; |
469 | struct list_head tmp; | ||
470 | struct vfs_sync_work *work, *n; | 520 | struct vfs_sync_work *work, *n; |
521 | LIST_HEAD (tmp); | ||
471 | 522 | ||
472 | daemonize("xfssyncd"); | 523 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); |
473 | |||
474 | vfsp->vfs_sync_work.w_vfs = vfsp; | ||
475 | vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; | ||
476 | vfsp->vfs_sync_task = current; | ||
477 | wmb(); | ||
478 | wake_up(&vfsp->vfs_wait_sync_task); | ||
479 | |||
480 | INIT_LIST_HEAD(&tmp); | ||
481 | timeleft = (xfs_syncd_centisecs * HZ) / 100; | ||
482 | for (;;) { | 524 | for (;;) { |
483 | set_current_state(TASK_INTERRUPTIBLE); | 525 | timeleft = schedule_timeout_interruptible(timeleft); |
484 | timeleft = schedule_timeout(timeleft); | ||
485 | /* swsusp */ | 526 | /* swsusp */ |
486 | try_to_freeze(); | 527 | try_to_freeze(); |
487 | if (vfsp->vfs_flag & VFS_UMOUNT) | 528 | if (kthread_should_stop()) |
488 | break; | 529 | break; |
489 | 530 | ||
490 | spin_lock(&vfsp->vfs_sync_lock); | 531 | spin_lock(&vfsp->vfs_sync_lock); |
@@ -495,7 +536,8 @@ xfssyncd( | |||
495 | */ | 536 | */ |
496 | if (!timeleft || list_empty(&vfsp->vfs_sync_list)) { | 537 | if (!timeleft || list_empty(&vfsp->vfs_sync_list)) { |
497 | if (!timeleft) | 538 | if (!timeleft) |
498 | timeleft = (xfs_syncd_centisecs * HZ) / 100; | 539 | timeleft = xfs_syncd_centisecs * |
540 | msecs_to_jiffies(10); | ||
499 | INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list); | 541 | INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list); |
500 | list_add_tail(&vfsp->vfs_sync_work.w_list, | 542 | list_add_tail(&vfsp->vfs_sync_work.w_list, |
501 | &vfsp->vfs_sync_list); | 543 | &vfsp->vfs_sync_list); |
@@ -513,10 +555,6 @@ xfssyncd( | |||
513 | } | 555 | } |
514 | } | 556 | } |
515 | 557 | ||
516 | vfsp->vfs_sync_task = NULL; | ||
517 | wmb(); | ||
518 | wake_up(&vfsp->vfs_wait_sync_task); | ||
519 | |||
520 | return 0; | 558 | return 0; |
521 | } | 559 | } |
522 | 560 | ||
@@ -524,13 +562,11 @@ STATIC int | |||
524 | linvfs_start_syncd( | 562 | linvfs_start_syncd( |
525 | vfs_t *vfsp) | 563 | vfs_t *vfsp) |
526 | { | 564 | { |
527 | int pid; | 565 | vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; |
528 | 566 | vfsp->vfs_sync_work.w_vfs = vfsp; | |
529 | pid = kernel_thread(xfssyncd, (void *) vfsp, | 567 | vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd"); |
530 | CLONE_VM | CLONE_FS | CLONE_FILES); | 568 | if (IS_ERR(vfsp->vfs_sync_task)) |
531 | if (pid < 0) | 569 | return -PTR_ERR(vfsp->vfs_sync_task); |
532 | return -pid; | ||
533 | wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task); | ||
534 | return 0; | 570 | return 0; |
535 | } | 571 | } |
536 | 572 | ||
@@ -538,11 +574,7 @@ STATIC void | |||
538 | linvfs_stop_syncd( | 574 | linvfs_stop_syncd( |
539 | vfs_t *vfsp) | 575 | vfs_t *vfsp) |
540 | { | 576 | { |
541 | vfsp->vfs_flag |= VFS_UMOUNT; | 577 | kthread_stop(vfsp->vfs_sync_task); |
542 | wmb(); | ||
543 | |||
544 | wake_up_process(vfsp->vfs_sync_task); | ||
545 | wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task); | ||
546 | } | 578 | } |
547 | 579 | ||
548 | STATIC void | 580 | STATIC void |
@@ -866,9 +898,9 @@ init_xfs_fs( void ) | |||
866 | 898 | ||
867 | ktrace_init(64); | 899 | ktrace_init(64); |
868 | 900 | ||
869 | error = init_inodecache(); | 901 | error = linvfs_init_zones(); |
870 | if (error < 0) | 902 | if (error < 0) |
871 | goto undo_inodecache; | 903 | goto undo_zones; |
872 | 904 | ||
873 | error = pagebuf_init(); | 905 | error = pagebuf_init(); |
874 | if (error < 0) | 906 | if (error < 0) |
@@ -889,9 +921,9 @@ undo_register: | |||
889 | pagebuf_terminate(); | 921 | pagebuf_terminate(); |
890 | 922 | ||
891 | undo_pagebuf: | 923 | undo_pagebuf: |
892 | destroy_inodecache(); | 924 | linvfs_destroy_zones(); |
893 | 925 | ||
894 | undo_inodecache: | 926 | undo_zones: |
895 | return error; | 927 | return error; |
896 | } | 928 | } |
897 | 929 | ||
@@ -903,7 +935,7 @@ exit_xfs_fs( void ) | |||
903 | unregister_filesystem(&xfs_fs_type); | 935 | unregister_filesystem(&xfs_fs_type); |
904 | xfs_cleanup(); | 936 | xfs_cleanup(); |
905 | pagebuf_terminate(); | 937 | pagebuf_terminate(); |
906 | destroy_inodecache(); | 938 | linvfs_destroy_zones(); |
907 | ktrace_uninit(); | 939 | ktrace_uninit(); |
908 | } | 940 | } |
909 | 941 | ||
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c index 669c61644959..34cc902ec119 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.c +++ b/fs/xfs/linux-2.6/xfs_vfs.c | |||
@@ -251,7 +251,6 @@ vfs_allocate( void ) | |||
251 | bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); | 251 | bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); |
252 | INIT_LIST_HEAD(&vfsp->vfs_sync_list); | 252 | INIT_LIST_HEAD(&vfsp->vfs_sync_list); |
253 | spin_lock_init(&vfsp->vfs_sync_lock); | 253 | spin_lock_init(&vfsp->vfs_sync_lock); |
254 | init_waitqueue_head(&vfsp->vfs_wait_sync_task); | ||
255 | init_waitqueue_head(&vfsp->vfs_wait_single_sync_task); | 254 | init_waitqueue_head(&vfsp->vfs_wait_single_sync_task); |
256 | return vfsp; | 255 | return vfsp; |
257 | } | 256 | } |
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 7ee1f714e9ba..f0ab574fb47a 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h | |||
@@ -65,7 +65,6 @@ typedef struct vfs { | |||
65 | spinlock_t vfs_sync_lock; /* work item list lock */ | 65 | spinlock_t vfs_sync_lock; /* work item list lock */ |
66 | int vfs_sync_seq; /* sync thread generation no. */ | 66 | int vfs_sync_seq; /* sync thread generation no. */ |
67 | wait_queue_head_t vfs_wait_single_sync_task; | 67 | wait_queue_head_t vfs_wait_single_sync_task; |
68 | wait_queue_head_t vfs_wait_sync_task; | ||
69 | } vfs_t; | 68 | } vfs_t; |
70 | 69 | ||
71 | #define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ | 70 | #define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ |
@@ -96,7 +95,6 @@ typedef enum { | |||
96 | #define VFS_RDONLY 0x0001 /* read-only vfs */ | 95 | #define VFS_RDONLY 0x0001 /* read-only vfs */ |
97 | #define VFS_GRPID 0x0002 /* group-ID assigned from directory */ | 96 | #define VFS_GRPID 0x0002 /* group-ID assigned from directory */ |
98 | #define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ | 97 | #define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ |
99 | #define VFS_UMOUNT 0x0008 /* unmount in progress */ | ||
100 | #define VFS_END 0x0008 /* max flag */ | 98 | #define VFS_END 0x0008 /* max flag */ |
101 | 99 | ||
102 | #define SYNC_ATTR 0x0001 /* sync attributes */ | 100 | #define SYNC_ATTR 0x0001 /* sync attributes */ |
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 250cad54e892..268f45bf6a9a 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c | |||
@@ -42,93 +42,33 @@ DEFINE_SPINLOCK(vnumber_lock); | |||
42 | */ | 42 | */ |
43 | #define NVSYNC 37 | 43 | #define NVSYNC 37 |
44 | #define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) | 44 | #define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) |
45 | sv_t vsync[NVSYNC]; | 45 | STATIC wait_queue_head_t vsync[NVSYNC]; |
46 | |||
47 | /* | ||
48 | * Translate stat(2) file types to vnode types and vice versa. | ||
49 | * Aware of numeric order of S_IFMT and vnode type values. | ||
50 | */ | ||
51 | enum vtype iftovt_tab[] = { | ||
52 | VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, | ||
53 | VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON | ||
54 | }; | ||
55 | |||
56 | u_short vttoif_tab[] = { | ||
57 | 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK | ||
58 | }; | ||
59 | 46 | ||
60 | 47 | ||
61 | void | 48 | void |
62 | vn_init(void) | 49 | vn_init(void) |
63 | { | 50 | { |
64 | register sv_t *svp; | 51 | int i; |
65 | register int i; | ||
66 | 52 | ||
67 | for (svp = vsync, i = 0; i < NVSYNC; i++, svp++) | 53 | for (i = 0; i < NVSYNC; i++) |
68 | init_sv(svp, SV_DEFAULT, "vsy", i); | 54 | init_waitqueue_head(&vsync[i]); |
69 | } | 55 | } |
70 | 56 | ||
71 | /* | 57 | void |
72 | * Clean a vnode of filesystem-specific data and prepare it for reuse. | 58 | vn_iowait( |
73 | */ | ||
74 | STATIC int | ||
75 | vn_reclaim( | ||
76 | struct vnode *vp) | 59 | struct vnode *vp) |
77 | { | 60 | { |
78 | int error; | 61 | wait_queue_head_t *wq = vptosync(vp); |
79 | 62 | ||
80 | XFS_STATS_INC(vn_reclaim); | 63 | wait_event(*wq, (atomic_read(&vp->v_iocount) == 0)); |
81 | vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address); | ||
82 | |||
83 | /* | ||
84 | * Only make the VOP_RECLAIM call if there are behaviors | ||
85 | * to call. | ||
86 | */ | ||
87 | if (vp->v_fbhv) { | ||
88 | VOP_RECLAIM(vp, error); | ||
89 | if (error) | ||
90 | return -error; | ||
91 | } | ||
92 | ASSERT(vp->v_fbhv == NULL); | ||
93 | |||
94 | VN_LOCK(vp); | ||
95 | vp->v_flag &= (VRECLM|VWAIT); | ||
96 | VN_UNLOCK(vp, 0); | ||
97 | |||
98 | vp->v_type = VNON; | ||
99 | vp->v_fbhv = NULL; | ||
100 | |||
101 | #ifdef XFS_VNODE_TRACE | ||
102 | ktrace_free(vp->v_trace); | ||
103 | vp->v_trace = NULL; | ||
104 | #endif | ||
105 | |||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | STATIC void | ||
110 | vn_wakeup( | ||
111 | struct vnode *vp) | ||
112 | { | ||
113 | VN_LOCK(vp); | ||
114 | if (vp->v_flag & VWAIT) | ||
115 | sv_broadcast(vptosync(vp)); | ||
116 | vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED); | ||
117 | VN_UNLOCK(vp, 0); | ||
118 | } | 64 | } |
119 | 65 | ||
120 | int | 66 | void |
121 | vn_wait( | 67 | vn_iowake( |
122 | struct vnode *vp) | 68 | struct vnode *vp) |
123 | { | 69 | { |
124 | VN_LOCK(vp); | 70 | if (atomic_dec_and_test(&vp->v_iocount)) |
125 | if (vp->v_flag & (VINACT | VRECLM)) { | 71 | wake_up(vptosync(vp)); |
126 | vp->v_flag |= VWAIT; | ||
127 | sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); | ||
128 | return 1; | ||
129 | } | ||
130 | VN_UNLOCK(vp, 0); | ||
131 | return 0; | ||
132 | } | 72 | } |
133 | 73 | ||
134 | struct vnode * | 74 | struct vnode * |
@@ -154,6 +94,8 @@ vn_initialize( | |||
154 | /* Initialize the first behavior and the behavior chain head. */ | 94 | /* Initialize the first behavior and the behavior chain head. */ |
155 | vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); | 95 | vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); |
156 | 96 | ||
97 | atomic_set(&vp->v_iocount, 0); | ||
98 | |||
157 | #ifdef XFS_VNODE_TRACE | 99 | #ifdef XFS_VNODE_TRACE |
158 | vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); | 100 | vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); |
159 | #endif /* XFS_VNODE_TRACE */ | 101 | #endif /* XFS_VNODE_TRACE */ |
@@ -163,30 +105,6 @@ vn_initialize( | |||
163 | } | 105 | } |
164 | 106 | ||
165 | /* | 107 | /* |
166 | * Get a reference on a vnode. | ||
167 | */ | ||
168 | vnode_t * | ||
169 | vn_get( | ||
170 | struct vnode *vp, | ||
171 | vmap_t *vmap) | ||
172 | { | ||
173 | struct inode *inode; | ||
174 | |||
175 | XFS_STATS_INC(vn_get); | ||
176 | inode = LINVFS_GET_IP(vp); | ||
177 | if (inode->i_state & I_FREEING) | ||
178 | return NULL; | ||
179 | |||
180 | inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino); | ||
181 | if (!inode) /* Inode not present */ | ||
182 | return NULL; | ||
183 | |||
184 | vn_trace_exit(vp, "vn_get", (inst_t *)__return_address); | ||
185 | |||
186 | return vp; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Revalidate the Linux inode from the vattr. | 108 | * Revalidate the Linux inode from the vattr. |
191 | * Note: i_size _not_ updated; we must hold the inode | 109 | * Note: i_size _not_ updated; we must hold the inode |
192 | * semaphore when doing that - callers responsibility. | 110 | * semaphore when doing that - callers responsibility. |
@@ -198,7 +116,7 @@ vn_revalidate_core( | |||
198 | { | 116 | { |
199 | struct inode *inode = LINVFS_GET_IP(vp); | 117 | struct inode *inode = LINVFS_GET_IP(vp); |
200 | 118 | ||
201 | inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode; | 119 | inode->i_mode = vap->va_mode; |
202 | inode->i_nlink = vap->va_nlink; | 120 | inode->i_nlink = vap->va_nlink; |
203 | inode->i_uid = vap->va_uid; | 121 | inode->i_uid = vap->va_uid; |
204 | inode->i_gid = vap->va_gid; | 122 | inode->i_gid = vap->va_gid; |
@@ -247,71 +165,6 @@ vn_revalidate( | |||
247 | } | 165 | } |
248 | 166 | ||
249 | /* | 167 | /* |
250 | * purge a vnode from the cache | ||
251 | * At this point the vnode is guaranteed to have no references (vn_count == 0) | ||
252 | * The caller has to make sure that there are no ways someone could | ||
253 | * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock). | ||
254 | */ | ||
255 | void | ||
256 | vn_purge( | ||
257 | struct vnode *vp, | ||
258 | vmap_t *vmap) | ||
259 | { | ||
260 | vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address); | ||
261 | |||
262 | again: | ||
263 | /* | ||
264 | * Check whether vp has already been reclaimed since our caller | ||
265 | * sampled its version while holding a filesystem cache lock that | ||
266 | * its VOP_RECLAIM function acquires. | ||
267 | */ | ||
268 | VN_LOCK(vp); | ||
269 | if (vp->v_number != vmap->v_number) { | ||
270 | VN_UNLOCK(vp, 0); | ||
271 | return; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * If vp is being reclaimed or inactivated, wait until it is inert, | ||
276 | * then proceed. Can't assume that vnode is actually reclaimed | ||
277 | * just because the reclaimed flag is asserted -- a vn_alloc | ||
278 | * reclaim can fail. | ||
279 | */ | ||
280 | if (vp->v_flag & (VINACT | VRECLM)) { | ||
281 | ASSERT(vn_count(vp) == 0); | ||
282 | vp->v_flag |= VWAIT; | ||
283 | sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); | ||
284 | goto again; | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * Another process could have raced in and gotten this vnode... | ||
289 | */ | ||
290 | if (vn_count(vp) > 0) { | ||
291 | VN_UNLOCK(vp, 0); | ||
292 | return; | ||
293 | } | ||
294 | |||
295 | XFS_STATS_DEC(vn_active); | ||
296 | vp->v_flag |= VRECLM; | ||
297 | VN_UNLOCK(vp, 0); | ||
298 | |||
299 | /* | ||
300 | * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells | ||
301 | * vp's filesystem to flush and invalidate all cached resources. | ||
302 | * When vn_reclaim returns, vp should have no private data, | ||
303 | * either in a system cache or attached to v_data. | ||
304 | */ | ||
305 | if (vn_reclaim(vp) != 0) | ||
306 | panic("vn_purge: cannot reclaim"); | ||
307 | |||
308 | /* | ||
309 | * Wakeup anyone waiting for vp to be reclaimed. | ||
310 | */ | ||
311 | vn_wakeup(vp); | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | * Add a reference to a referenced vnode. | 168 | * Add a reference to a referenced vnode. |
316 | */ | 169 | */ |
317 | struct vnode * | 170 | struct vnode * |
@@ -330,80 +183,6 @@ vn_hold( | |||
330 | return vp; | 183 | return vp; |
331 | } | 184 | } |
332 | 185 | ||
333 | /* | ||
334 | * Call VOP_INACTIVE on last reference. | ||
335 | */ | ||
336 | void | ||
337 | vn_rele( | ||
338 | struct vnode *vp) | ||
339 | { | ||
340 | int vcnt; | ||
341 | int cache; | ||
342 | |||
343 | XFS_STATS_INC(vn_rele); | ||
344 | |||
345 | VN_LOCK(vp); | ||
346 | |||
347 | vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address); | ||
348 | vcnt = vn_count(vp); | ||
349 | |||
350 | /* | ||
351 | * Since we always get called from put_inode we know | ||
352 | * that i_count won't be decremented after we | ||
353 | * return. | ||
354 | */ | ||
355 | if (!vcnt) { | ||
356 | /* | ||
357 | * As soon as we turn this on, noone can find us in vn_get | ||
358 | * until we turn off VINACT or VRECLM | ||
359 | */ | ||
360 | vp->v_flag |= VINACT; | ||
361 | VN_UNLOCK(vp, 0); | ||
362 | |||
363 | /* | ||
364 | * Do not make the VOP_INACTIVE call if there | ||
365 | * are no behaviors attached to the vnode to call. | ||
366 | */ | ||
367 | if (vp->v_fbhv) | ||
368 | VOP_INACTIVE(vp, NULL, cache); | ||
369 | |||
370 | VN_LOCK(vp); | ||
371 | if (vp->v_flag & VWAIT) | ||
372 | sv_broadcast(vptosync(vp)); | ||
373 | |||
374 | vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED); | ||
375 | } | ||
376 | |||
377 | VN_UNLOCK(vp, 0); | ||
378 | |||
379 | vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address); | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * Finish the removal of a vnode. | ||
384 | */ | ||
385 | void | ||
386 | vn_remove( | ||
387 | struct vnode *vp) | ||
388 | { | ||
389 | vmap_t vmap; | ||
390 | |||
391 | /* Make sure we don't do this to the same vnode twice */ | ||
392 | if (!(vp->v_fbhv)) | ||
393 | return; | ||
394 | |||
395 | XFS_STATS_INC(vn_remove); | ||
396 | vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address); | ||
397 | |||
398 | /* | ||
399 | * After the following purge the vnode | ||
400 | * will no longer exist. | ||
401 | */ | ||
402 | VMAP(vp, vmap); | ||
403 | vn_purge(vp, &vmap); | ||
404 | } | ||
405 | |||
406 | |||
407 | #ifdef XFS_VNODE_TRACE | 186 | #ifdef XFS_VNODE_TRACE |
408 | 187 | ||
409 | #define KTRACE_ENTER(vp, vk, s, line, ra) \ | 188 | #define KTRACE_ENTER(vp, vk, s, line, ra) \ |
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index a6e57c647be4..35f306cebb87 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | 5 | * under the terms of version 2 of the GNU General Public License as |
@@ -65,10 +65,6 @@ struct vattr; | |||
65 | struct xfs_iomap; | 65 | struct xfs_iomap; |
66 | struct attrlist_cursor_kern; | 66 | struct attrlist_cursor_kern; |
67 | 67 | ||
68 | /* | ||
69 | * Vnode types. VNON means no type. | ||
70 | */ | ||
71 | enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK }; | ||
72 | 68 | ||
73 | typedef xfs_ino_t vnumber_t; | 69 | typedef xfs_ino_t vnumber_t; |
74 | typedef struct dentry vname_t; | 70 | typedef struct dentry vname_t; |
@@ -77,15 +73,14 @@ typedef bhv_head_t vn_bhv_head_t; | |||
77 | /* | 73 | /* |
78 | * MP locking protocols: | 74 | * MP locking protocols: |
79 | * v_flag, v_vfsp VN_LOCK/VN_UNLOCK | 75 | * v_flag, v_vfsp VN_LOCK/VN_UNLOCK |
80 | * v_type read-only or fs-dependent | ||
81 | */ | 76 | */ |
82 | typedef struct vnode { | 77 | typedef struct vnode { |
83 | __u32 v_flag; /* vnode flags (see below) */ | 78 | __u32 v_flag; /* vnode flags (see below) */ |
84 | enum vtype v_type; /* vnode type */ | ||
85 | struct vfs *v_vfsp; /* ptr to containing VFS */ | 79 | struct vfs *v_vfsp; /* ptr to containing VFS */ |
86 | vnumber_t v_number; /* in-core vnode number */ | 80 | vnumber_t v_number; /* in-core vnode number */ |
87 | vn_bhv_head_t v_bh; /* behavior head */ | 81 | vn_bhv_head_t v_bh; /* behavior head */ |
88 | spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ | 82 | spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ |
83 | atomic_t v_iocount; /* outstanding I/O count */ | ||
89 | #ifdef XFS_VNODE_TRACE | 84 | #ifdef XFS_VNODE_TRACE |
90 | struct ktrace *v_trace; /* trace header structure */ | 85 | struct ktrace *v_trace; /* trace header structure */ |
91 | #endif | 86 | #endif |
@@ -93,6 +88,12 @@ typedef struct vnode { | |||
93 | /* inode MUST be last */ | 88 | /* inode MUST be last */ |
94 | } vnode_t; | 89 | } vnode_t; |
95 | 90 | ||
91 | #define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode) | ||
92 | #define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode) | ||
93 | #define VN_ISDIR(vp) S_ISDIR((vp)->v_inode.i_mode) | ||
94 | #define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode) | ||
95 | #define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode) | ||
96 | |||
96 | #define v_fbhv v_bh.bh_first /* first behavior */ | 97 | #define v_fbhv v_bh.bh_first /* first behavior */ |
97 | #define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ | 98 | #define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ |
98 | 99 | ||
@@ -133,22 +134,8 @@ typedef enum { | |||
133 | #define LINVFS_GET_IP(vp) (&(vp)->v_inode) | 134 | #define LINVFS_GET_IP(vp) (&(vp)->v_inode) |
134 | 135 | ||
135 | /* | 136 | /* |
136 | * Convert between vnode types and inode formats (since POSIX.1 | ||
137 | * defines mode word of stat structure in terms of inode formats). | ||
138 | */ | ||
139 | extern enum vtype iftovt_tab[]; | ||
140 | extern u_short vttoif_tab[]; | ||
141 | #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) | ||
142 | #define VTTOIF(indx) (vttoif_tab[(int)(indx)]) | ||
143 | #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) | ||
144 | |||
145 | |||
146 | /* | ||
147 | * Vnode flags. | 137 | * Vnode flags. |
148 | */ | 138 | */ |
149 | #define VINACT 0x1 /* vnode is being inactivated */ | ||
150 | #define VRECLM 0x2 /* vnode is being reclaimed */ | ||
151 | #define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */ | ||
152 | #define VMODIFIED 0x8 /* XFS inode state possibly differs */ | 139 | #define VMODIFIED 0x8 /* XFS inode state possibly differs */ |
153 | /* to the Linux inode state. */ | 140 | /* to the Linux inode state. */ |
154 | 141 | ||
@@ -408,7 +395,6 @@ typedef struct vnodeops { | |||
408 | */ | 395 | */ |
409 | typedef struct vattr { | 396 | typedef struct vattr { |
410 | int va_mask; /* bit-mask of attributes present */ | 397 | int va_mask; /* bit-mask of attributes present */ |
411 | enum vtype va_type; /* vnode type (for create) */ | ||
412 | mode_t va_mode; /* file access mode and type */ | 398 | mode_t va_mode; /* file access mode and type */ |
413 | xfs_nlink_t va_nlink; /* number of references to file */ | 399 | xfs_nlink_t va_nlink; /* number of references to file */ |
414 | uid_t va_uid; /* owner user id */ | 400 | uid_t va_uid; /* owner user id */ |
@@ -498,27 +484,12 @@ typedef struct vattr { | |||
498 | * Check whether mandatory file locking is enabled. | 484 | * Check whether mandatory file locking is enabled. |
499 | */ | 485 | */ |
500 | #define MANDLOCK(vp, mode) \ | 486 | #define MANDLOCK(vp, mode) \ |
501 | ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) | 487 | (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) |
502 | 488 | ||
503 | extern void vn_init(void); | 489 | extern void vn_init(void); |
504 | extern int vn_wait(struct vnode *); | ||
505 | extern vnode_t *vn_initialize(struct inode *); | 490 | extern vnode_t *vn_initialize(struct inode *); |
506 | 491 | ||
507 | /* | 492 | /* |
508 | * Acquiring and invalidating vnodes: | ||
509 | * | ||
510 | * if (vn_get(vp, version, 0)) | ||
511 | * ...; | ||
512 | * vn_purge(vp, version); | ||
513 | * | ||
514 | * vn_get and vn_purge must be called with vmap_t arguments, sampled | ||
515 | * while a lock that the vnode's VOP_RECLAIM function acquires is | ||
516 | * held, to ensure that the vnode sampled with the lock held isn't | ||
517 | * recycled (VOP_RECLAIMed) or deallocated between the release of the lock | ||
518 | * and the subsequent vn_get or vn_purge. | ||
519 | */ | ||
520 | |||
521 | /* | ||
522 | * vnode_map structures _must_ match vn_epoch and vnode structure sizes. | 493 | * vnode_map structures _must_ match vn_epoch and vnode structure sizes. |
523 | */ | 494 | */ |
524 | typedef struct vnode_map { | 495 | typedef struct vnode_map { |
@@ -531,11 +502,11 @@ typedef struct vnode_map { | |||
531 | (vmap).v_number = (vp)->v_number, \ | 502 | (vmap).v_number = (vp)->v_number, \ |
532 | (vmap).v_ino = (vp)->v_inode.i_ino; } | 503 | (vmap).v_ino = (vp)->v_inode.i_ino; } |
533 | 504 | ||
534 | extern void vn_purge(struct vnode *, vmap_t *); | ||
535 | extern vnode_t *vn_get(struct vnode *, vmap_t *); | ||
536 | extern int vn_revalidate(struct vnode *); | 505 | extern int vn_revalidate(struct vnode *); |
537 | extern void vn_revalidate_core(struct vnode *, vattr_t *); | 506 | extern void vn_revalidate_core(struct vnode *, vattr_t *); |
538 | extern void vn_remove(struct vnode *); | 507 | |
508 | extern void vn_iowait(struct vnode *vp); | ||
509 | extern void vn_iowake(struct vnode *vp); | ||
539 | 510 | ||
540 | static inline int vn_count(struct vnode *vp) | 511 | static inline int vn_count(struct vnode *vp) |
541 | { | 512 | { |
@@ -546,7 +517,6 @@ static inline int vn_count(struct vnode *vp) | |||
546 | * Vnode reference counting functions (and macros for compatibility). | 517 | * Vnode reference counting functions (and macros for compatibility). |
547 | */ | 518 | */ |
548 | extern vnode_t *vn_hold(struct vnode *); | 519 | extern vnode_t *vn_hold(struct vnode *); |
549 | extern void vn_rele(struct vnode *); | ||
550 | 520 | ||
551 | #if defined(XFS_VNODE_TRACE) | 521 | #if defined(XFS_VNODE_TRACE) |
552 | #define VN_HOLD(vp) \ | 522 | #define VN_HOLD(vp) \ |
@@ -560,6 +530,12 @@ extern void vn_rele(struct vnode *); | |||
560 | #define VN_RELE(vp) (iput(LINVFS_GET_IP(vp))) | 530 | #define VN_RELE(vp) (iput(LINVFS_GET_IP(vp))) |
561 | #endif | 531 | #endif |
562 | 532 | ||
533 | static inline struct vnode *vn_grab(struct vnode *vp) | ||
534 | { | ||
535 | struct inode *inode = igrab(LINVFS_GET_IP(vp)); | ||
536 | return inode ? LINVFS_GET_VP(inode) : NULL; | ||
537 | } | ||
538 | |||
563 | /* | 539 | /* |
564 | * Vname handling macros. | 540 | * Vname handling macros. |
565 | */ | 541 | */ |
diff --git a/fs/xfs/quota/Makefile b/fs/xfs/quota/Makefile new file mode 100644 index 000000000000..7a4f725b2824 --- /dev/null +++ b/fs/xfs/quota/Makefile | |||
@@ -0,0 +1 @@ | |||
include $(TOPDIR)/fs/xfs/quota/Makefile-linux-$(VERSION).$(PATCHLEVEL) | |||
diff --git a/fs/xfs/quota/Makefile-linux-2.6 b/fs/xfs/quota/Makefile-linux-2.6 new file mode 100644 index 000000000000..93e60e839355 --- /dev/null +++ b/fs/xfs/quota/Makefile-linux-2.6 | |||
@@ -0,0 +1,53 @@ | |||
1 | # | ||
2 | # Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | ||
3 | # | ||
4 | # This program is free software; you can redistribute it and/or modify it | ||
5 | # under the terms of version 2 of the GNU General Public License as | ||
6 | # published by the Free Software Foundation. | ||
7 | # | ||
8 | # This program is distributed in the hope that it would be useful, but | ||
9 | # WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
11 | # | ||
12 | # Further, this software is distributed without any warranty that it is | ||
13 | # free of the rightful claim of any third person regarding infringement | ||
14 | # or the like. Any license provided herein, whether implied or | ||
15 | # otherwise, applies only to this software file. Patent licenses, if | ||
16 | # any, provided herein do not apply to combinations of this program with | ||
17 | # other software, or any other product whatsoever. | ||
18 | # | ||
19 | # You should have received a copy of the GNU General Public License along | ||
20 | # with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | # Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | # | ||
23 | # Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | # Mountain View, CA 94043, or: | ||
25 | # | ||
26 | # http://www.sgi.com | ||
27 | # | ||
28 | # For further information regarding this notice, see: | ||
29 | # | ||
30 | # http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | # | ||
32 | |||
33 | EXTRA_CFLAGS += -I $(TOPDIR)/fs/xfs -I $(TOPDIR)/fs/xfs/linux-2.6 | ||
34 | |||
35 | ifeq ($(CONFIG_XFS_DEBUG),y) | ||
36 | EXTRA_CFLAGS += -g -DDEBUG | ||
37 | #EXTRA_CFLAGS += -DQUOTADEBUG | ||
38 | endif | ||
39 | ifeq ($(CONFIG_XFS_TRACE),y) | ||
40 | EXTRA_CFLAGS += -DXFS_DQUOT_TRACE | ||
41 | EXTRA_CFLAGS += -DXFS_VNODE_TRACE | ||
42 | endif | ||
43 | |||
44 | xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ | ||
45 | xfs_dquot_item.o \ | ||
46 | xfs_trans_dquot.o \ | ||
47 | xfs_qm_syscalls.o \ | ||
48 | xfs_qm_bhv.o \ | ||
49 | xfs_qm.o | ||
50 | |||
51 | ifeq ($(CONFIG_XFS_QUOTA),y) | ||
52 | xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o | ||
53 | endif | ||
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 46ce1e3ce1d6..e2e8d35fa4d0 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -421,7 +421,7 @@ xfs_qm_init_dquot_blk( | |||
421 | */ | 421 | */ |
422 | STATIC int | 422 | STATIC int |
423 | xfs_qm_dqalloc( | 423 | xfs_qm_dqalloc( |
424 | xfs_trans_t *tp, | 424 | xfs_trans_t **tpp, |
425 | xfs_mount_t *mp, | 425 | xfs_mount_t *mp, |
426 | xfs_dquot_t *dqp, | 426 | xfs_dquot_t *dqp, |
427 | xfs_inode_t *quotip, | 427 | xfs_inode_t *quotip, |
@@ -433,6 +433,7 @@ xfs_qm_dqalloc( | |||
433 | xfs_bmbt_irec_t map; | 433 | xfs_bmbt_irec_t map; |
434 | int nmaps, error, committed; | 434 | int nmaps, error, committed; |
435 | xfs_buf_t *bp; | 435 | xfs_buf_t *bp; |
436 | xfs_trans_t *tp = *tpp; | ||
436 | 437 | ||
437 | ASSERT(tp != NULL); | 438 | ASSERT(tp != NULL); |
438 | xfs_dqtrace_entry(dqp, "DQALLOC"); | 439 | xfs_dqtrace_entry(dqp, "DQALLOC"); |
@@ -492,10 +493,32 @@ xfs_qm_dqalloc( | |||
492 | xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT), | 493 | xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT), |
493 | dqp->dq_flags & XFS_DQ_ALLTYPES, bp); | 494 | dqp->dq_flags & XFS_DQ_ALLTYPES, bp); |
494 | 495 | ||
495 | if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) { | 496 | /* |
497 | * xfs_bmap_finish() may commit the current transaction and | ||
498 | * start a second transaction if the freelist is not empty. | ||
499 | * | ||
500 | * Since we still want to modify this buffer, we need to | ||
501 | * ensure that the buffer is not released on commit of | ||
502 | * the first transaction and ensure the buffer is added to the | ||
503 | * second transaction. | ||
504 | * | ||
505 | * If there is only one transaction then don't stop the buffer | ||
506 | * from being released when it commits later on. | ||
507 | */ | ||
508 | |||
509 | xfs_trans_bhold(tp, bp); | ||
510 | |||
511 | if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) { | ||
496 | goto error1; | 512 | goto error1; |
497 | } | 513 | } |
498 | 514 | ||
515 | if (committed) { | ||
516 | tp = *tpp; | ||
517 | xfs_trans_bjoin(tp, bp); | ||
518 | } else { | ||
519 | xfs_trans_bhold_release(tp, bp); | ||
520 | } | ||
521 | |||
499 | *O_bpp = bp; | 522 | *O_bpp = bp; |
500 | return 0; | 523 | return 0; |
501 | 524 | ||
@@ -514,7 +537,7 @@ xfs_qm_dqalloc( | |||
514 | */ | 537 | */ |
515 | STATIC int | 538 | STATIC int |
516 | xfs_qm_dqtobp( | 539 | xfs_qm_dqtobp( |
517 | xfs_trans_t *tp, | 540 | xfs_trans_t **tpp, |
518 | xfs_dquot_t *dqp, | 541 | xfs_dquot_t *dqp, |
519 | xfs_disk_dquot_t **O_ddpp, | 542 | xfs_disk_dquot_t **O_ddpp, |
520 | xfs_buf_t **O_bpp, | 543 | xfs_buf_t **O_bpp, |
@@ -528,6 +551,7 @@ xfs_qm_dqtobp( | |||
528 | xfs_disk_dquot_t *ddq; | 551 | xfs_disk_dquot_t *ddq; |
529 | xfs_dqid_t id; | 552 | xfs_dqid_t id; |
530 | boolean_t newdquot; | 553 | boolean_t newdquot; |
554 | xfs_trans_t *tp = (tpp ? *tpp : NULL); | ||
531 | 555 | ||
532 | mp = dqp->q_mount; | 556 | mp = dqp->q_mount; |
533 | id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT); | 557 | id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT); |
@@ -579,9 +603,10 @@ xfs_qm_dqtobp( | |||
579 | return (ENOENT); | 603 | return (ENOENT); |
580 | 604 | ||
581 | ASSERT(tp); | 605 | ASSERT(tp); |
582 | if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip, | 606 | if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, |
583 | dqp->q_fileoffset, &bp))) | 607 | dqp->q_fileoffset, &bp))) |
584 | return (error); | 608 | return (error); |
609 | tp = *tpp; | ||
585 | newdquot = B_TRUE; | 610 | newdquot = B_TRUE; |
586 | } else { | 611 | } else { |
587 | /* | 612 | /* |
@@ -645,7 +670,7 @@ xfs_qm_dqtobp( | |||
645 | /* ARGSUSED */ | 670 | /* ARGSUSED */ |
646 | STATIC int | 671 | STATIC int |
647 | xfs_qm_dqread( | 672 | xfs_qm_dqread( |
648 | xfs_trans_t *tp, | 673 | xfs_trans_t **tpp, |
649 | xfs_dqid_t id, | 674 | xfs_dqid_t id, |
650 | xfs_dquot_t *dqp, /* dquot to get filled in */ | 675 | xfs_dquot_t *dqp, /* dquot to get filled in */ |
651 | uint flags) | 676 | uint flags) |
@@ -653,15 +678,19 @@ xfs_qm_dqread( | |||
653 | xfs_disk_dquot_t *ddqp; | 678 | xfs_disk_dquot_t *ddqp; |
654 | xfs_buf_t *bp; | 679 | xfs_buf_t *bp; |
655 | int error; | 680 | int error; |
681 | xfs_trans_t *tp; | ||
682 | |||
683 | ASSERT(tpp); | ||
656 | 684 | ||
657 | /* | 685 | /* |
658 | * get a pointer to the on-disk dquot and the buffer containing it | 686 | * get a pointer to the on-disk dquot and the buffer containing it |
659 | * dqp already knows its own type (GROUP/USER). | 687 | * dqp already knows its own type (GROUP/USER). |
660 | */ | 688 | */ |
661 | xfs_dqtrace_entry(dqp, "DQREAD"); | 689 | xfs_dqtrace_entry(dqp, "DQREAD"); |
662 | if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) { | 690 | if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { |
663 | return (error); | 691 | return (error); |
664 | } | 692 | } |
693 | tp = *tpp; | ||
665 | 694 | ||
666 | /* copy everything from disk dquot to the incore dquot */ | 695 | /* copy everything from disk dquot to the incore dquot */ |
667 | memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); | 696 | memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); |
@@ -740,7 +769,7 @@ xfs_qm_idtodq( | |||
740 | * Read it from disk; xfs_dqread() takes care of | 769 | * Read it from disk; xfs_dqread() takes care of |
741 | * all the necessary initialization of dquot's fields (locks, etc) | 770 | * all the necessary initialization of dquot's fields (locks, etc) |
742 | */ | 771 | */ |
743 | if ((error = xfs_qm_dqread(tp, id, dqp, flags))) { | 772 | if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) { |
744 | /* | 773 | /* |
745 | * This can happen if quotas got turned off (ESRCH), | 774 | * This can happen if quotas got turned off (ESRCH), |
746 | * or if the dquot didn't exist on disk and we ask to | 775 | * or if the dquot didn't exist on disk and we ask to |
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 39175103c8e0..8ebc87176c78 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | 5 | * under the terms of version 2 of the GNU General Public License as |
@@ -113,20 +113,6 @@ typedef struct xfs_dquot { | |||
113 | 113 | ||
114 | #define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) | 114 | #define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) |
115 | 115 | ||
116 | /* | ||
117 | * Quota Accounting/Enforcement flags | ||
118 | */ | ||
119 | #define XFS_ALL_QUOTA_ACCT \ | ||
120 | (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) | ||
121 | #define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) | ||
122 | #define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) | ||
123 | |||
124 | #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) | ||
125 | #define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) | ||
126 | #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) | ||
127 | #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) | ||
128 | #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) | ||
129 | |||
130 | #ifdef DEBUG | 116 | #ifdef DEBUG |
131 | static inline int | 117 | static inline int |
132 | XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) | 118 | XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) |
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index f5271b7b1e84..e74eaa7dd1bc 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c | |||
@@ -509,6 +509,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, | |||
509 | 509 | ||
510 | log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); | 510 | log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); |
511 | log_vector->i_len = sizeof(xfs_qoff_logitem_t); | 511 | log_vector->i_len = sizeof(xfs_qoff_logitem_t); |
512 | XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF); | ||
512 | qf->qql_format.qf_size = 1; | 513 | qf->qql_format.qf_size = 1; |
513 | } | 514 | } |
514 | 515 | ||
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index f665ca8f9e96..efde16e0a913 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | 5 | * under the terms of version 2 of the GNU General Public License as |
@@ -365,16 +365,6 @@ xfs_qm_mount_quotas( | |||
365 | int error = 0; | 365 | int error = 0; |
366 | uint sbf; | 366 | uint sbf; |
367 | 367 | ||
368 | /* | ||
369 | * If a file system had quotas running earlier, but decided to | ||
370 | * mount without -o uquota/pquota/gquota options, revoke the | ||
371 | * quotachecked license, and bail out. | ||
372 | */ | ||
373 | if (! XFS_IS_QUOTA_ON(mp) && | ||
374 | (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) { | ||
375 | mp->m_qflags = 0; | ||
376 | goto write_changes; | ||
377 | } | ||
378 | 368 | ||
379 | /* | 369 | /* |
380 | * If quotas on realtime volumes is not supported, we disable | 370 | * If quotas on realtime volumes is not supported, we disable |
@@ -388,11 +378,8 @@ xfs_qm_mount_quotas( | |||
388 | goto write_changes; | 378 | goto write_changes; |
389 | } | 379 | } |
390 | 380 | ||
391 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) | ||
392 | cmn_err(CE_NOTE, "Attempting to turn on disk quotas."); | ||
393 | #endif | ||
394 | |||
395 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 381 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
382 | |||
396 | /* | 383 | /* |
397 | * Allocate the quotainfo structure inside the mount struct, and | 384 | * Allocate the quotainfo structure inside the mount struct, and |
398 | * create quotainode(s), and change/rev superblock if necessary. | 385 | * create quotainode(s), and change/rev superblock if necessary. |
@@ -410,19 +397,14 @@ xfs_qm_mount_quotas( | |||
410 | */ | 397 | */ |
411 | if (XFS_QM_NEED_QUOTACHECK(mp) && | 398 | if (XFS_QM_NEED_QUOTACHECK(mp) && |
412 | !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { | 399 | !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { |
413 | #ifdef DEBUG | ||
414 | cmn_err(CE_NOTE, "Doing a quotacheck. Please wait."); | ||
415 | #endif | ||
416 | if ((error = xfs_qm_quotacheck(mp))) { | 400 | if ((error = xfs_qm_quotacheck(mp))) { |
417 | /* Quotacheck has failed and quotas have | 401 | /* Quotacheck has failed and quotas have |
418 | * been disabled. | 402 | * been disabled. |
419 | */ | 403 | */ |
420 | return XFS_ERROR(error); | 404 | return XFS_ERROR(error); |
421 | } | 405 | } |
422 | #ifdef DEBUG | ||
423 | cmn_err(CE_NOTE, "Done quotacheck."); | ||
424 | #endif | ||
425 | } | 406 | } |
407 | |||
426 | write_changes: | 408 | write_changes: |
427 | /* | 409 | /* |
428 | * We actually don't have to acquire the SB_LOCK at all. | 410 | * We actually don't have to acquire the SB_LOCK at all. |
@@ -2010,7 +1992,7 @@ xfs_qm_quotacheck( | |||
2010 | ASSERT(mp->m_quotainfo != NULL); | 1992 | ASSERT(mp->m_quotainfo != NULL); |
2011 | ASSERT(xfs_Gqm != NULL); | 1993 | ASSERT(xfs_Gqm != NULL); |
2012 | xfs_qm_destroy_quotainfo(mp); | 1994 | xfs_qm_destroy_quotainfo(mp); |
2013 | xfs_mount_reset_sbqflags(mp); | 1995 | (void)xfs_mount_reset_sbqflags(mp); |
2014 | } else { | 1996 | } else { |
2015 | cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); | 1997 | cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); |
2016 | } | 1998 | } |
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index b03eecf3b6cb..0b00b3c67015 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h | |||
@@ -184,8 +184,6 @@ typedef struct xfs_dquot_acct { | |||
184 | #define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) | 184 | #define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) |
185 | #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) | 185 | #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) |
186 | 186 | ||
187 | extern void xfs_mount_reset_sbqflags(xfs_mount_t *); | ||
188 | |||
189 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); | 187 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); |
190 | extern int xfs_qm_mount_quotas(xfs_mount_t *, int); | 188 | extern int xfs_qm_mount_quotas(xfs_mount_t *, int); |
191 | extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint); | 189 | extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint); |
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index dc3c37a1e158..8890a18a99d8 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | 5 | * under the terms of version 2 of the GNU General Public License as |
@@ -229,48 +229,6 @@ xfs_qm_syncall( | |||
229 | return error; | 229 | return error; |
230 | } | 230 | } |
231 | 231 | ||
232 | /* | ||
233 | * Clear the quotaflags in memory and in the superblock. | ||
234 | */ | ||
235 | void | ||
236 | xfs_mount_reset_sbqflags( | ||
237 | xfs_mount_t *mp) | ||
238 | { | ||
239 | xfs_trans_t *tp; | ||
240 | unsigned long s; | ||
241 | |||
242 | mp->m_qflags = 0; | ||
243 | /* | ||
244 | * It is OK to look at sb_qflags here in mount path, | ||
245 | * without SB_LOCK. | ||
246 | */ | ||
247 | if (mp->m_sb.sb_qflags == 0) | ||
248 | return; | ||
249 | s = XFS_SB_LOCK(mp); | ||
250 | mp->m_sb.sb_qflags = 0; | ||
251 | XFS_SB_UNLOCK(mp, s); | ||
252 | |||
253 | /* | ||
254 | * if the fs is readonly, let the incore superblock run | ||
255 | * with quotas off but don't flush the update out to disk | ||
256 | */ | ||
257 | if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) | ||
258 | return; | ||
259 | #ifdef QUOTADEBUG | ||
260 | xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); | ||
261 | #endif | ||
262 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); | ||
263 | if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, | ||
264 | XFS_DEFAULT_LOG_COUNT)) { | ||
265 | xfs_trans_cancel(tp, 0); | ||
266 | xfs_fs_cmn_err(CE_ALERT, mp, | ||
267 | "xfs_mount_reset_sbqflags: Superblock update failed!"); | ||
268 | return; | ||
269 | } | ||
270 | xfs_mod_sb(tp, XFS_SB_QFLAGS); | ||
271 | xfs_trans_commit(tp, 0, NULL); | ||
272 | } | ||
273 | |||
274 | STATIC int | 232 | STATIC int |
275 | xfs_qm_newmount( | 233 | xfs_qm_newmount( |
276 | xfs_mount_t *mp, | 234 | xfs_mount_t *mp, |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 68e98962dbef..15e02e8a9d4f 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -1053,7 +1053,6 @@ xfs_qm_dqrele_all_inodes( | |||
1053 | struct xfs_mount *mp, | 1053 | struct xfs_mount *mp, |
1054 | uint flags) | 1054 | uint flags) |
1055 | { | 1055 | { |
1056 | vmap_t vmap; | ||
1057 | xfs_inode_t *ip, *topino; | 1056 | xfs_inode_t *ip, *topino; |
1058 | uint ireclaims; | 1057 | uint ireclaims; |
1059 | vnode_t *vp; | 1058 | vnode_t *vp; |
@@ -1061,8 +1060,8 @@ xfs_qm_dqrele_all_inodes( | |||
1061 | 1060 | ||
1062 | ASSERT(mp->m_quotainfo); | 1061 | ASSERT(mp->m_quotainfo); |
1063 | 1062 | ||
1064 | again: | ||
1065 | XFS_MOUNT_ILOCK(mp); | 1063 | XFS_MOUNT_ILOCK(mp); |
1064 | again: | ||
1066 | ip = mp->m_inodes; | 1065 | ip = mp->m_inodes; |
1067 | if (ip == NULL) { | 1066 | if (ip == NULL) { |
1068 | XFS_MOUNT_IUNLOCK(mp); | 1067 | XFS_MOUNT_IUNLOCK(mp); |
@@ -1090,18 +1089,14 @@ again: | |||
1090 | } | 1089 | } |
1091 | vnode_refd = B_FALSE; | 1090 | vnode_refd = B_FALSE; |
1092 | if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { | 1091 | if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { |
1093 | /* | ||
1094 | * Sample vp mapping while holding the mplock, lest | ||
1095 | * we come across a non-existent vnode. | ||
1096 | */ | ||
1097 | VMAP(vp, vmap); | ||
1098 | ireclaims = mp->m_ireclaims; | 1092 | ireclaims = mp->m_ireclaims; |
1099 | topino = mp->m_inodes; | 1093 | topino = mp->m_inodes; |
1100 | XFS_MOUNT_IUNLOCK(mp); | 1094 | vp = vn_grab(vp); |
1095 | if (!vp) | ||
1096 | goto again; | ||
1101 | 1097 | ||
1098 | XFS_MOUNT_IUNLOCK(mp); | ||
1102 | /* XXX restart limit ? */ | 1099 | /* XXX restart limit ? */ |
1103 | if ( ! (vp = vn_get(vp, &vmap))) | ||
1104 | goto again; | ||
1105 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1100 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
1106 | vnode_refd = B_TRUE; | 1101 | vnode_refd = B_TRUE; |
1107 | } else { | 1102 | } else { |
@@ -1137,7 +1132,6 @@ again: | |||
1137 | */ | 1132 | */ |
1138 | if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { | 1133 | if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { |
1139 | /* XXX use a sentinel */ | 1134 | /* XXX use a sentinel */ |
1140 | XFS_MOUNT_IUNLOCK(mp); | ||
1141 | goto again; | 1135 | goto again; |
1142 | } | 1136 | } |
1143 | ip = ip->i_mnext; | 1137 | ip = ip->i_mnext; |
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 4ed7b6928cd7..4e1a5ec22fa3 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c | |||
@@ -31,6 +31,7 @@ | |||
31 | */ | 31 | */ |
32 | 32 | ||
33 | #include "debug.h" | 33 | #include "debug.h" |
34 | #include "spin.h" | ||
34 | 35 | ||
35 | #include <asm/page.h> | 36 | #include <asm/page.h> |
36 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c index 3dae14c8c55a..fa8394f9437d 100644 --- a/fs/xfs/support/ktrace.c +++ b/fs/xfs/support/ktrace.c | |||
@@ -170,7 +170,7 @@ ktrace_enter( | |||
170 | void *val14, | 170 | void *val14, |
171 | void *val15) | 171 | void *val15) |
172 | { | 172 | { |
173 | static lock_t wrap_lock = SPIN_LOCK_UNLOCKED; | 173 | static DEFINE_SPINLOCK(wrap_lock); |
174 | unsigned long flags; | 174 | unsigned long flags; |
175 | int index; | 175 | int index; |
176 | ktrace_entry_t *ktep; | 176 | ktrace_entry_t *ktep; |
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 8d01dce8c532..92fd1d67f878 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
@@ -85,7 +85,7 @@ xfs_acl_vhasacl_default( | |||
85 | { | 85 | { |
86 | int error; | 86 | int error; |
87 | 87 | ||
88 | if (vp->v_type != VDIR) | 88 | if (!VN_ISDIR(vp)) |
89 | return 0; | 89 | return 0; |
90 | xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); | 90 | xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); |
91 | return (error == 0); | 91 | return (error == 0); |
@@ -389,7 +389,7 @@ xfs_acl_allow_set( | |||
389 | 389 | ||
390 | if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) | 390 | if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) |
391 | return EPERM; | 391 | return EPERM; |
392 | if (kind == _ACL_TYPE_DEFAULT && vp->v_type != VDIR) | 392 | if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp)) |
393 | return ENOTDIR; | 393 | return ENOTDIR; |
394 | if (vp->v_vfsp->vfs_flag & VFS_RDONLY) | 394 | if (vp->v_vfsp->vfs_flag & VFS_RDONLY) |
395 | return EROFS; | 395 | return EROFS; |
@@ -750,7 +750,7 @@ xfs_acl_inherit( | |||
750 | * If the new file is a directory, its default ACL is a copy of | 750 | * If the new file is a directory, its default ACL is a copy of |
751 | * the containing directory's default ACL. | 751 | * the containing directory's default ACL. |
752 | */ | 752 | */ |
753 | if (vp->v_type == VDIR) | 753 | if (VN_ISDIR(vp)) |
754 | xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); | 754 | xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); |
755 | if (!error && !basicperms) | 755 | if (!error && !basicperms) |
756 | xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); | 756 | xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); |
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index ae35189b3d70..5ab0dd885b1b 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h | |||
@@ -40,22 +40,28 @@ | |||
40 | 40 | ||
41 | #include <asm/byteorder.h> | 41 | #include <asm/byteorder.h> |
42 | 42 | ||
43 | #ifdef __LITTLE_ENDIAN | ||
44 | # define __BYTE_ORDER __LITTLE_ENDIAN | ||
45 | #endif | ||
46 | #ifdef __BIG_ENDIAN | 43 | #ifdef __BIG_ENDIAN |
47 | # define __BYTE_ORDER __BIG_ENDIAN | 44 | #define XFS_NATIVE_HOST 1 |
45 | #else | ||
46 | #undef XFS_NATIVE_HOST | ||
47 | #endif | ||
48 | |||
49 | #else /* __KERNEL__ */ | ||
50 | |||
51 | #if __BYTE_ORDER == __BIG_ENDIAN | ||
52 | #define XFS_NATIVE_HOST 1 | ||
53 | #else | ||
54 | #undef XFS_NATIVE_HOST | ||
48 | #endif | 55 | #endif |
49 | 56 | ||
50 | #endif /* __KERNEL__ */ | 57 | #endif /* __KERNEL__ */ |
51 | 58 | ||
52 | /* do we need conversion? */ | 59 | /* do we need conversion? */ |
53 | |||
54 | #define ARCH_NOCONVERT 1 | 60 | #define ARCH_NOCONVERT 1 |
55 | #if __BYTE_ORDER == __LITTLE_ENDIAN | 61 | #ifdef XFS_NATIVE_HOST |
56 | # define ARCH_CONVERT 0 | ||
57 | #else | ||
58 | # define ARCH_CONVERT ARCH_NOCONVERT | 62 | # define ARCH_CONVERT ARCH_NOCONVERT |
63 | #else | ||
64 | # define ARCH_CONVERT 0 | ||
59 | #endif | 65 | #endif |
60 | 66 | ||
61 | /* generic swapping macros */ | 67 | /* generic swapping macros */ |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 6f5d283888aa..3e76def1283d 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -4754,10 +4754,20 @@ xfs_bmapi( | |||
4754 | error = xfs_mod_incore_sb(mp, | 4754 | error = xfs_mod_incore_sb(mp, |
4755 | XFS_SBS_FDBLOCKS, | 4755 | XFS_SBS_FDBLOCKS, |
4756 | -(alen), rsvd); | 4756 | -(alen), rsvd); |
4757 | if (!error) | 4757 | if (!error) { |
4758 | error = xfs_mod_incore_sb(mp, | 4758 | error = xfs_mod_incore_sb(mp, |
4759 | XFS_SBS_FDBLOCKS, | 4759 | XFS_SBS_FDBLOCKS, |
4760 | -(indlen), rsvd); | 4760 | -(indlen), rsvd); |
4761 | if (error && rt) { | ||
4762 | xfs_mod_incore_sb(ip->i_mount, | ||
4763 | XFS_SBS_FREXTENTS, | ||
4764 | extsz, rsvd); | ||
4765 | } else if (error) { | ||
4766 | xfs_mod_incore_sb(ip->i_mount, | ||
4767 | XFS_SBS_FDBLOCKS, | ||
4768 | alen, rsvd); | ||
4769 | } | ||
4770 | } | ||
4761 | 4771 | ||
4762 | if (error) { | 4772 | if (error) { |
4763 | if (XFS_IS_QUOTA_ON(ip->i_mount)) | 4773 | if (XFS_IS_QUOTA_ON(ip->i_mount)) |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 09c413576ba8..09a77b17565b 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
@@ -2017,7 +2017,7 @@ xfs_bmbt_get_state( | |||
2017 | ext_flag); | 2017 | ext_flag); |
2018 | } | 2018 | } |
2019 | 2019 | ||
2020 | #if __BYTE_ORDER != __BIG_ENDIAN | 2020 | #ifndef XFS_NATIVE_HOST |
2021 | /* Endian flipping versions of the bmbt extraction functions */ | 2021 | /* Endian flipping versions of the bmbt extraction functions */ |
2022 | void | 2022 | void |
2023 | xfs_bmbt_disk_get_all( | 2023 | xfs_bmbt_disk_get_all( |
@@ -2087,7 +2087,7 @@ xfs_bmbt_disk_get_state( | |||
2087 | return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r), | 2087 | return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r), |
2088 | ext_flag); | 2088 | ext_flag); |
2089 | } | 2089 | } |
2090 | #endif | 2090 | #endif /* XFS_NATIVE_HOST */ |
2091 | 2091 | ||
2092 | 2092 | ||
2093 | /* | 2093 | /* |
@@ -2531,7 +2531,7 @@ xfs_bmbt_set_allf( | |||
2531 | #endif /* XFS_BIG_BLKNOS */ | 2531 | #endif /* XFS_BIG_BLKNOS */ |
2532 | } | 2532 | } |
2533 | 2533 | ||
2534 | #if __BYTE_ORDER != __BIG_ENDIAN | 2534 | #ifndef XFS_NATIVE_HOST |
2535 | /* | 2535 | /* |
2536 | * Set all the fields in a bmap extent record from the uncompressed form. | 2536 | * Set all the fields in a bmap extent record from the uncompressed form. |
2537 | */ | 2537 | */ |
@@ -2617,7 +2617,7 @@ xfs_bmbt_disk_set_allf( | |||
2617 | } | 2617 | } |
2618 | #endif /* XFS_BIG_BLKNOS */ | 2618 | #endif /* XFS_BIG_BLKNOS */ |
2619 | } | 2619 | } |
2620 | #endif | 2620 | #endif /* XFS_NATIVE_HOST */ |
2621 | 2621 | ||
2622 | /* | 2622 | /* |
2623 | * Set the blockcount field in a bmap extent record. | 2623 | * Set the blockcount field in a bmap extent record. |
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 0a40cf126c28..2cf4fe45cbcb 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h | |||
@@ -62,7 +62,7 @@ typedef struct xfs_bmdr_block | |||
62 | * l1:0-20 are blockcount. | 62 | * l1:0-20 are blockcount. |
63 | */ | 63 | */ |
64 | 64 | ||
65 | #if __BYTE_ORDER == __LITTLE_ENDIAN | 65 | #ifndef XFS_NATIVE_HOST |
66 | 66 | ||
67 | #define BMBT_TOTAL_BITLEN 128 /* 128 bits, 16 bytes */ | 67 | #define BMBT_TOTAL_BITLEN 128 /* 128 bits, 16 bytes */ |
68 | #define BMBT_EXNTFLAG_BITOFF 0 | 68 | #define BMBT_EXNTFLAG_BITOFF 0 |
@@ -87,7 +87,7 @@ typedef struct xfs_bmdr_block | |||
87 | #define BMBT_BLOCKCOUNT_BITOFF 64 /* Start of second 64 bit container */ | 87 | #define BMBT_BLOCKCOUNT_BITOFF 64 /* Start of second 64 bit container */ |
88 | #define BMBT_BLOCKCOUNT_BITLEN 21 | 88 | #define BMBT_BLOCKCOUNT_BITLEN 21 |
89 | 89 | ||
90 | #endif | 90 | #endif /* XFS_NATIVE_HOST */ |
91 | 91 | ||
92 | 92 | ||
93 | #define BMBT_USE_64 1 | 93 | #define BMBT_USE_64 1 |
@@ -505,7 +505,7 @@ xfs_exntst_t | |||
505 | xfs_bmbt_get_state( | 505 | xfs_bmbt_get_state( |
506 | xfs_bmbt_rec_t *r); | 506 | xfs_bmbt_rec_t *r); |
507 | 507 | ||
508 | #if __BYTE_ORDER != __BIG_ENDIAN | 508 | #ifndef XFS_NATIVE_HOST |
509 | void | 509 | void |
510 | xfs_bmbt_disk_get_all( | 510 | xfs_bmbt_disk_get_all( |
511 | xfs_bmbt_rec_t *r, | 511 | xfs_bmbt_rec_t *r, |
@@ -538,7 +538,7 @@ xfs_bmbt_disk_get_startoff( | |||
538 | xfs_bmbt_get_blockcount(r) | 538 | xfs_bmbt_get_blockcount(r) |
539 | #define xfs_bmbt_disk_get_startoff(r) \ | 539 | #define xfs_bmbt_disk_get_startoff(r) \ |
540 | xfs_bmbt_get_startoff(r) | 540 | xfs_bmbt_get_startoff(r) |
541 | #endif | 541 | #endif /* XFS_NATIVE_HOST */ |
542 | 542 | ||
543 | int | 543 | int |
544 | xfs_bmbt_increment( | 544 | xfs_bmbt_increment( |
@@ -623,7 +623,7 @@ xfs_bmbt_set_state( | |||
623 | xfs_bmbt_rec_t *r, | 623 | xfs_bmbt_rec_t *r, |
624 | xfs_exntst_t v); | 624 | xfs_exntst_t v); |
625 | 625 | ||
626 | #if __BYTE_ORDER != __BIG_ENDIAN | 626 | #ifndef XFS_NATIVE_HOST |
627 | void | 627 | void |
628 | xfs_bmbt_disk_set_all( | 628 | xfs_bmbt_disk_set_all( |
629 | xfs_bmbt_rec_t *r, | 629 | xfs_bmbt_rec_t *r, |
@@ -641,7 +641,7 @@ xfs_bmbt_disk_set_allf( | |||
641 | xfs_bmbt_set_all(r, s) | 641 | xfs_bmbt_set_all(r, s) |
642 | #define xfs_bmbt_disk_set_allf(r, o, b, c, v) \ | 642 | #define xfs_bmbt_disk_set_allf(r, o, b, c, v) \ |
643 | xfs_bmbt_set_allf(r, o, b, c, v) | 643 | xfs_bmbt_set_allf(r, o, b, c, v) |
644 | #endif | 644 | #endif /* XFS_NATIVE_HOST */ |
645 | 645 | ||
646 | void | 646 | void |
647 | xfs_bmbt_to_bmdr( | 647 | xfs_bmbt_to_bmdr( |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 30b8285ad476..a264657acfd9 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -274,6 +274,7 @@ xfs_buf_item_format( | |||
274 | ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); | 274 | ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); |
275 | vecp->i_addr = (xfs_caddr_t)&bip->bli_format; | 275 | vecp->i_addr = (xfs_caddr_t)&bip->bli_format; |
276 | vecp->i_len = base_size; | 276 | vecp->i_len = base_size; |
277 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); | ||
277 | vecp++; | 278 | vecp++; |
278 | nvecs = 1; | 279 | nvecs = 1; |
279 | 280 | ||
@@ -320,12 +321,14 @@ xfs_buf_item_format( | |||
320 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 321 | buffer_offset = first_bit * XFS_BLI_CHUNK; |
321 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 322 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
322 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 323 | vecp->i_len = nbits * XFS_BLI_CHUNK; |
324 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); | ||
323 | nvecs++; | 325 | nvecs++; |
324 | break; | 326 | break; |
325 | } else if (next_bit != last_bit + 1) { | 327 | } else if (next_bit != last_bit + 1) { |
326 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 328 | buffer_offset = first_bit * XFS_BLI_CHUNK; |
327 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 329 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
328 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 330 | vecp->i_len = nbits * XFS_BLI_CHUNK; |
331 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); | ||
329 | nvecs++; | 332 | nvecs++; |
330 | vecp++; | 333 | vecp++; |
331 | first_bit = next_bit; | 334 | first_bit = next_bit; |
@@ -337,6 +340,7 @@ xfs_buf_item_format( | |||
337 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 340 | buffer_offset = first_bit * XFS_BLI_CHUNK; |
338 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 341 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
339 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 342 | vecp->i_len = nbits * XFS_BLI_CHUNK; |
343 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); | ||
340 | /* You would think we need to bump the nvecs here too, but we do not | 344 | /* You would think we need to bump the nvecs here too, but we do not |
341 | * this number is used by recovery, and it gets confused by the boundary | 345 | * this number is used by recovery, and it gets confused by the boundary |
342 | * split here | 346 | * split here |
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h index dd423ce1bc8d..480bffc1f29f 100644 --- a/fs/xfs/xfs_dir_leaf.h +++ b/fs/xfs/xfs_dir_leaf.h | |||
@@ -127,13 +127,13 @@ typedef union { | |||
127 | * Watch the order here (endian-ness dependent). | 127 | * Watch the order here (endian-ness dependent). |
128 | */ | 128 | */ |
129 | struct { | 129 | struct { |
130 | #if __BYTE_ORDER == __LITTLE_ENDIAN | 130 | #ifndef XFS_NATIVE_HOST |
131 | xfs_dahash_t h; /* hash value */ | 131 | xfs_dahash_t h; /* hash value */ |
132 | __uint32_t be; /* block and entry */ | 132 | __uint32_t be; /* block and entry */ |
133 | #else /* __BYTE_ORDER == __BIG_ENDIAN */ | 133 | #else |
134 | __uint32_t be; /* block and entry */ | 134 | __uint32_t be; /* block and entry */ |
135 | xfs_dahash_t h; /* hash value */ | 135 | xfs_dahash_t h; /* hash value */ |
136 | #endif /* __BYTE_ORDER == __BIG_ENDIAN */ | 136 | #endif /* XFS_NATIVE_HOST */ |
137 | } s; | 137 | } s; |
138 | } xfs_dircook_t; | 138 | } xfs_dircook_t; |
139 | 139 | ||
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index 55c17adaaa37..19e872856f6b 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | 5 | * under the terms of version 2 of the GNU General Public License as |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index db7cbd1bc857..cc7d1494a45d 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -107,6 +107,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip, | |||
107 | 107 | ||
108 | log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); | 108 | log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); |
109 | log_vector->i_len = size; | 109 | log_vector->i_len = size; |
110 | XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT); | ||
110 | ASSERT(size >= sizeof(xfs_efi_log_format_t)); | 111 | ASSERT(size >= sizeof(xfs_efi_log_format_t)); |
111 | } | 112 | } |
112 | 113 | ||
@@ -426,6 +427,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp, | |||
426 | 427 | ||
427 | log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); | 428 | log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); |
428 | log_vector->i_len = size; | 429 | log_vector->i_len = size; |
430 | XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT); | ||
429 | ASSERT(size >= sizeof(xfs_efd_log_format_t)); | 431 | ASSERT(size >= sizeof(xfs_efd_log_format_t)); |
430 | } | 432 | } |
431 | 433 | ||
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index d3da00045f26..0d9ae8fb4138 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -30,6 +30,8 @@ | |||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | 30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ |
31 | */ | 31 | */ |
32 | 32 | ||
33 | #include <linux/delay.h> | ||
34 | |||
33 | #include "xfs.h" | 35 | #include "xfs.h" |
34 | 36 | ||
35 | #include "xfs_macros.h" | 37 | #include "xfs_macros.h" |
@@ -505,17 +507,15 @@ xfs_iget( | |||
505 | vnode_t *vp = NULL; | 507 | vnode_t *vp = NULL; |
506 | int error; | 508 | int error; |
507 | 509 | ||
508 | retry: | ||
509 | XFS_STATS_INC(xs_ig_attempts); | 510 | XFS_STATS_INC(xs_ig_attempts); |
510 | 511 | ||
512 | retry: | ||
511 | if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { | 513 | if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { |
512 | bhv_desc_t *bdp; | 514 | bhv_desc_t *bdp; |
513 | xfs_inode_t *ip; | 515 | xfs_inode_t *ip; |
514 | int newnode; | ||
515 | 516 | ||
516 | vp = LINVFS_GET_VP(inode); | 517 | vp = LINVFS_GET_VP(inode); |
517 | if (inode->i_state & I_NEW) { | 518 | if (inode->i_state & I_NEW) { |
518 | inode_allocate: | ||
519 | vn_initialize(inode); | 519 | vn_initialize(inode); |
520 | error = xfs_iget_core(vp, mp, tp, ino, flags, | 520 | error = xfs_iget_core(vp, mp, tp, ino, flags, |
521 | lock_flags, ipp, bno); | 521 | lock_flags, ipp, bno); |
@@ -526,32 +526,25 @@ inode_allocate: | |||
526 | iput(inode); | 526 | iput(inode); |
527 | } | 527 | } |
528 | } else { | 528 | } else { |
529 | /* These are true if the inode is in inactive or | 529 | /* |
530 | * reclaim. The linux inode is about to go away, | 530 | * If the inode is not fully constructed due to |
531 | * wait for that path to finish, and try again. | 531 | * filehandle mistmatches wait for the inode to go |
532 | * away and try again. | ||
533 | * | ||
534 | * iget_locked will call __wait_on_freeing_inode | ||
535 | * to wait for the inode to go away. | ||
532 | */ | 536 | */ |
533 | if (vp->v_flag & (VINACT | VRECLM)) { | 537 | if (is_bad_inode(inode) || |
534 | vn_wait(vp); | 538 | ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), |
539 | &xfs_vnodeops)) == NULL)) { | ||
535 | iput(inode); | 540 | iput(inode); |
541 | delay(1); | ||
536 | goto retry; | 542 | goto retry; |
537 | } | 543 | } |
538 | 544 | ||
539 | if (is_bad_inode(inode)) { | ||
540 | iput(inode); | ||
541 | return EIO; | ||
542 | } | ||
543 | |||
544 | bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); | ||
545 | if (bdp == NULL) { | ||
546 | XFS_STATS_INC(xs_ig_dup); | ||
547 | goto inode_allocate; | ||
548 | } | ||
549 | ip = XFS_BHVTOI(bdp); | 545 | ip = XFS_BHVTOI(bdp); |
550 | if (lock_flags != 0) | 546 | if (lock_flags != 0) |
551 | xfs_ilock(ip, lock_flags); | 547 | xfs_ilock(ip, lock_flags); |
552 | newnode = (ip->i_d.di_mode == 0); | ||
553 | if (newnode) | ||
554 | xfs_iocore_inode_reinit(ip); | ||
555 | XFS_STATS_INC(xs_ig_found); | 548 | XFS_STATS_INC(xs_ig_found); |
556 | *ipp = ip; | 549 | *ipp = ip; |
557 | error = 0; | 550 | error = 0; |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 34bdf5909687..db43308aae93 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1128,7 +1128,6 @@ xfs_ialloc( | |||
1128 | ASSERT(ip != NULL); | 1128 | ASSERT(ip != NULL); |
1129 | 1129 | ||
1130 | vp = XFS_ITOV(ip); | 1130 | vp = XFS_ITOV(ip); |
1131 | vp->v_type = IFTOVT(mode); | ||
1132 | ip->i_d.di_mode = (__uint16_t)mode; | 1131 | ip->i_d.di_mode = (__uint16_t)mode; |
1133 | ip->i_d.di_onlink = 0; | 1132 | ip->i_d.di_onlink = 0; |
1134 | ip->i_d.di_nlink = nlink; | 1133 | ip->i_d.di_nlink = nlink; |
@@ -1250,7 +1249,7 @@ xfs_ialloc( | |||
1250 | */ | 1249 | */ |
1251 | xfs_trans_log_inode(tp, ip, flags); | 1250 | xfs_trans_log_inode(tp, ip, flags); |
1252 | 1251 | ||
1253 | /* now that we have a v_type we can set Linux inode ops (& unlock) */ | 1252 | /* now that we have an i_mode we can set Linux inode ops (& unlock) */ |
1254 | VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); | 1253 | VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); |
1255 | 1254 | ||
1256 | *ipp = ip; | 1255 | *ipp = ip; |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 0eed30f5cb19..50e2cadf9091 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -248,6 +248,7 @@ xfs_inode_item_format( | |||
248 | 248 | ||
249 | vecp->i_addr = (xfs_caddr_t)&iip->ili_format; | 249 | vecp->i_addr = (xfs_caddr_t)&iip->ili_format; |
250 | vecp->i_len = sizeof(xfs_inode_log_format_t); | 250 | vecp->i_len = sizeof(xfs_inode_log_format_t); |
251 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); | ||
251 | vecp++; | 252 | vecp++; |
252 | nvecs = 1; | 253 | nvecs = 1; |
253 | 254 | ||
@@ -292,6 +293,7 @@ xfs_inode_item_format( | |||
292 | 293 | ||
293 | vecp->i_addr = (xfs_caddr_t)&ip->i_d; | 294 | vecp->i_addr = (xfs_caddr_t)&ip->i_d; |
294 | vecp->i_len = sizeof(xfs_dinode_core_t); | 295 | vecp->i_len = sizeof(xfs_dinode_core_t); |
296 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); | ||
295 | vecp++; | 297 | vecp++; |
296 | nvecs++; | 298 | nvecs++; |
297 | iip->ili_format.ilf_fields |= XFS_ILOG_CORE; | 299 | iip->ili_format.ilf_fields |= XFS_ILOG_CORE; |
@@ -339,7 +341,7 @@ xfs_inode_item_format( | |||
339 | nrecs = ip->i_df.if_bytes / | 341 | nrecs = ip->i_df.if_bytes / |
340 | (uint)sizeof(xfs_bmbt_rec_t); | 342 | (uint)sizeof(xfs_bmbt_rec_t); |
341 | ASSERT(nrecs > 0); | 343 | ASSERT(nrecs > 0); |
342 | #if __BYTE_ORDER == __BIG_ENDIAN | 344 | #ifdef XFS_NATIVE_HOST |
343 | if (nrecs == ip->i_d.di_nextents) { | 345 | if (nrecs == ip->i_d.di_nextents) { |
344 | /* | 346 | /* |
345 | * There are no delayed allocation | 347 | * There are no delayed allocation |
@@ -349,6 +351,7 @@ xfs_inode_item_format( | |||
349 | vecp->i_addr = | 351 | vecp->i_addr = |
350 | (char *)(ip->i_df.if_u1.if_extents); | 352 | (char *)(ip->i_df.if_u1.if_extents); |
351 | vecp->i_len = ip->i_df.if_bytes; | 353 | vecp->i_len = ip->i_df.if_bytes; |
354 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); | ||
352 | } else | 355 | } else |
353 | #endif | 356 | #endif |
354 | { | 357 | { |
@@ -367,6 +370,7 @@ xfs_inode_item_format( | |||
367 | vecp->i_addr = (xfs_caddr_t)ext_buffer; | 370 | vecp->i_addr = (xfs_caddr_t)ext_buffer; |
368 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | 371 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, |
369 | XFS_DATA_FORK); | 372 | XFS_DATA_FORK); |
373 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); | ||
370 | } | 374 | } |
371 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); | 375 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); |
372 | iip->ili_format.ilf_dsize = vecp->i_len; | 376 | iip->ili_format.ilf_dsize = vecp->i_len; |
@@ -384,6 +388,7 @@ xfs_inode_item_format( | |||
384 | ASSERT(ip->i_df.if_broot != NULL); | 388 | ASSERT(ip->i_df.if_broot != NULL); |
385 | vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; | 389 | vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; |
386 | vecp->i_len = ip->i_df.if_broot_bytes; | 390 | vecp->i_len = ip->i_df.if_broot_bytes; |
391 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); | ||
387 | vecp++; | 392 | vecp++; |
388 | nvecs++; | 393 | nvecs++; |
389 | iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; | 394 | iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; |
@@ -409,6 +414,7 @@ xfs_inode_item_format( | |||
409 | ASSERT((ip->i_df.if_real_bytes == 0) || | 414 | ASSERT((ip->i_df.if_real_bytes == 0) || |
410 | (ip->i_df.if_real_bytes == data_bytes)); | 415 | (ip->i_df.if_real_bytes == data_bytes)); |
411 | vecp->i_len = (int)data_bytes; | 416 | vecp->i_len = (int)data_bytes; |
417 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); | ||
412 | vecp++; | 418 | vecp++; |
413 | nvecs++; | 419 | nvecs++; |
414 | iip->ili_format.ilf_dsize = (unsigned)data_bytes; | 420 | iip->ili_format.ilf_dsize = (unsigned)data_bytes; |
@@ -467,7 +473,7 @@ xfs_inode_item_format( | |||
467 | #endif | 473 | #endif |
468 | ASSERT(nrecs > 0); | 474 | ASSERT(nrecs > 0); |
469 | ASSERT(nrecs == ip->i_d.di_anextents); | 475 | ASSERT(nrecs == ip->i_d.di_anextents); |
470 | #if __BYTE_ORDER == __BIG_ENDIAN | 476 | #ifdef XFS_NATIVE_HOST |
471 | /* | 477 | /* |
472 | * There are not delayed allocation extents | 478 | * There are not delayed allocation extents |
473 | * for attributes, so just point at the array. | 479 | * for attributes, so just point at the array. |
@@ -486,6 +492,7 @@ xfs_inode_item_format( | |||
486 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | 492 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, |
487 | XFS_ATTR_FORK); | 493 | XFS_ATTR_FORK); |
488 | #endif | 494 | #endif |
495 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); | ||
489 | iip->ili_format.ilf_asize = vecp->i_len; | 496 | iip->ili_format.ilf_asize = vecp->i_len; |
490 | vecp++; | 497 | vecp++; |
491 | nvecs++; | 498 | nvecs++; |
@@ -500,6 +507,7 @@ xfs_inode_item_format( | |||
500 | ASSERT(ip->i_afp->if_broot != NULL); | 507 | ASSERT(ip->i_afp->if_broot != NULL); |
501 | vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; | 508 | vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; |
502 | vecp->i_len = ip->i_afp->if_broot_bytes; | 509 | vecp->i_len = ip->i_afp->if_broot_bytes; |
510 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); | ||
503 | vecp++; | 511 | vecp++; |
504 | nvecs++; | 512 | nvecs++; |
505 | iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; | 513 | iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; |
@@ -523,6 +531,7 @@ xfs_inode_item_format( | |||
523 | ASSERT((ip->i_afp->if_real_bytes == 0) || | 531 | ASSERT((ip->i_afp->if_real_bytes == 0) || |
524 | (ip->i_afp->if_real_bytes == data_bytes)); | 532 | (ip->i_afp->if_real_bytes == data_bytes)); |
525 | vecp->i_len = (int)data_bytes; | 533 | vecp->i_len = (int)data_bytes; |
534 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); | ||
526 | vecp++; | 535 | vecp++; |
527 | nvecs++; | 536 | nvecs++; |
528 | iip->ili_format.ilf_asize = (unsigned)data_bytes; | 537 | iip->ili_format.ilf_asize = (unsigned)data_bytes; |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2edd6769e5d3..d0f5be63cddb 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -226,13 +226,12 @@ xfs_iomap( | |||
226 | xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count); | 226 | xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count); |
227 | lockmode = XFS_LCK_MAP_SHARED(mp, io); | 227 | lockmode = XFS_LCK_MAP_SHARED(mp, io); |
228 | bmapi_flags = XFS_BMAPI_ENTIRE; | 228 | bmapi_flags = XFS_BMAPI_ENTIRE; |
229 | if (flags & BMAPI_IGNSTATE) | ||
230 | bmapi_flags |= XFS_BMAPI_IGSTATE; | ||
231 | break; | 229 | break; |
232 | case BMAPI_WRITE: | 230 | case BMAPI_WRITE: |
233 | xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count); | 231 | xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count); |
234 | lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; | 232 | lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; |
235 | bmapi_flags = 0; | 233 | if (flags & BMAPI_IGNSTATE) |
234 | bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; | ||
236 | XFS_ILOCK(mp, io, lockmode); | 235 | XFS_ILOCK(mp, io, lockmode); |
237 | break; | 236 | break; |
238 | case BMAPI_ALLOCATE: | 237 | case BMAPI_ALLOCATE: |
@@ -391,9 +390,9 @@ xfs_iomap_write_direct( | |||
391 | xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp; | 390 | xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp; |
392 | xfs_bmap_free_t free_list; | 391 | xfs_bmap_free_t free_list; |
393 | int aeof; | 392 | int aeof; |
394 | xfs_filblks_t datablocks, qblocks, resblks; | 393 | xfs_filblks_t qblocks, resblks; |
395 | int committed; | 394 | int committed; |
396 | int numrtextents; | 395 | int resrtextents; |
397 | 396 | ||
398 | /* | 397 | /* |
399 | * Make sure that the dquots are there. This doesn't hold | 398 | * Make sure that the dquots are there. This doesn't hold |
@@ -434,14 +433,14 @@ xfs_iomap_write_direct( | |||
434 | 433 | ||
435 | if (!(extsz = ip->i_d.di_extsize)) | 434 | if (!(extsz = ip->i_d.di_extsize)) |
436 | extsz = mp->m_sb.sb_rextsize; | 435 | extsz = mp->m_sb.sb_rextsize; |
437 | numrtextents = qblocks = (count_fsb + extsz - 1); | 436 | resrtextents = qblocks = (count_fsb + extsz - 1); |
438 | do_div(numrtextents, mp->m_sb.sb_rextsize); | 437 | do_div(resrtextents, mp->m_sb.sb_rextsize); |
438 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); | ||
439 | quota_flag = XFS_QMOPT_RES_RTBLKS; | 439 | quota_flag = XFS_QMOPT_RES_RTBLKS; |
440 | datablocks = 0; | ||
441 | } else { | 440 | } else { |
442 | datablocks = qblocks = count_fsb; | 441 | resrtextents = 0; |
442 | resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb); | ||
443 | quota_flag = XFS_QMOPT_RES_REGBLKS; | 443 | quota_flag = XFS_QMOPT_RES_REGBLKS; |
444 | numrtextents = 0; | ||
445 | } | 444 | } |
446 | 445 | ||
447 | /* | 446 | /* |
@@ -449,9 +448,8 @@ xfs_iomap_write_direct( | |||
449 | */ | 448 | */ |
450 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 449 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
451 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); | 450 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); |
452 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); | ||
453 | error = xfs_trans_reserve(tp, resblks, | 451 | error = xfs_trans_reserve(tp, resblks, |
454 | XFS_WRITE_LOG_RES(mp), numrtextents, | 452 | XFS_WRITE_LOG_RES(mp), resrtextents, |
455 | XFS_TRANS_PERM_LOG_RES, | 453 | XFS_TRANS_PERM_LOG_RES, |
456 | XFS_WRITE_LOG_COUNT); | 454 | XFS_WRITE_LOG_COUNT); |
457 | 455 | ||
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1cd2ac163877..54a6f1142403 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -159,11 +159,15 @@ xfs_buftarg_t *xlog_target; | |||
159 | void | 159 | void |
160 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | 160 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) |
161 | { | 161 | { |
162 | if (! log->l_grant_trace) { | 162 | unsigned long cnts; |
163 | log->l_grant_trace = ktrace_alloc(1024, KM_NOSLEEP); | 163 | |
164 | if (! log->l_grant_trace) | 164 | if (!log->l_grant_trace) { |
165 | log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); | ||
166 | if (!log->l_grant_trace) | ||
165 | return; | 167 | return; |
166 | } | 168 | } |
169 | /* ticket counts are 1 byte each */ | ||
170 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; | ||
167 | 171 | ||
168 | ktrace_enter(log->l_grant_trace, | 172 | ktrace_enter(log->l_grant_trace, |
169 | (void *)tic, | 173 | (void *)tic, |
@@ -178,10 +182,10 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | |||
178 | (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)), | 182 | (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)), |
179 | (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)), | 183 | (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)), |
180 | (void *)string, | 184 | (void *)string, |
181 | (void *)((unsigned long)13), | 185 | (void *)((unsigned long)tic->t_trans_type), |
182 | (void *)((unsigned long)14), | 186 | (void *)cnts, |
183 | (void *)((unsigned long)15), | 187 | (void *)((unsigned long)tic->t_curr_res), |
184 | (void *)((unsigned long)16)); | 188 | (void *)((unsigned long)tic->t_unit_res)); |
185 | } | 189 | } |
186 | 190 | ||
187 | void | 191 | void |
@@ -274,9 +278,11 @@ xfs_log_done(xfs_mount_t *mp, | |||
274 | * Release ticket if not permanent reservation or a specifc | 278 | * Release ticket if not permanent reservation or a specifc |
275 | * request has been made to release a permanent reservation. | 279 | * request has been made to release a permanent reservation. |
276 | */ | 280 | */ |
281 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); | ||
277 | xlog_ungrant_log_space(log, ticket); | 282 | xlog_ungrant_log_space(log, ticket); |
278 | xlog_state_put_ticket(log, ticket); | 283 | xlog_state_put_ticket(log, ticket); |
279 | } else { | 284 | } else { |
285 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); | ||
280 | xlog_regrant_reserve_log_space(log, ticket); | 286 | xlog_regrant_reserve_log_space(log, ticket); |
281 | } | 287 | } |
282 | 288 | ||
@@ -399,7 +405,8 @@ xfs_log_reserve(xfs_mount_t *mp, | |||
399 | int cnt, | 405 | int cnt, |
400 | xfs_log_ticket_t *ticket, | 406 | xfs_log_ticket_t *ticket, |
401 | __uint8_t client, | 407 | __uint8_t client, |
402 | uint flags) | 408 | uint flags, |
409 | uint t_type) | ||
403 | { | 410 | { |
404 | xlog_t *log = mp->m_log; | 411 | xlog_t *log = mp->m_log; |
405 | xlog_ticket_t *internal_ticket; | 412 | xlog_ticket_t *internal_ticket; |
@@ -421,13 +428,19 @@ xfs_log_reserve(xfs_mount_t *mp, | |||
421 | if (*ticket != NULL) { | 428 | if (*ticket != NULL) { |
422 | ASSERT(flags & XFS_LOG_PERM_RESERV); | 429 | ASSERT(flags & XFS_LOG_PERM_RESERV); |
423 | internal_ticket = (xlog_ticket_t *)*ticket; | 430 | internal_ticket = (xlog_ticket_t *)*ticket; |
431 | xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)"); | ||
424 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); | 432 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); |
425 | retval = xlog_regrant_write_log_space(log, internal_ticket); | 433 | retval = xlog_regrant_write_log_space(log, internal_ticket); |
426 | } else { | 434 | } else { |
427 | /* may sleep if need to allocate more tickets */ | 435 | /* may sleep if need to allocate more tickets */ |
428 | internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, | 436 | internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, |
429 | client, flags); | 437 | client, flags); |
438 | internal_ticket->t_trans_type = t_type; | ||
430 | *ticket = internal_ticket; | 439 | *ticket = internal_ticket; |
440 | xlog_trace_loggrant(log, internal_ticket, | ||
441 | (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ? | ||
442 | "xfs_log_reserve: create new ticket (permanent trans)" : | ||
443 | "xfs_log_reserve: create new ticket"); | ||
431 | xlog_grant_push_ail(mp, | 444 | xlog_grant_push_ail(mp, |
432 | (internal_ticket->t_unit_res * | 445 | (internal_ticket->t_unit_res * |
433 | internal_ticket->t_cnt)); | 446 | internal_ticket->t_cnt)); |
@@ -601,8 +614,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
601 | if (! (XLOG_FORCED_SHUTDOWN(log))) { | 614 | if (! (XLOG_FORCED_SHUTDOWN(log))) { |
602 | reg[0].i_addr = (void*)&magic; | 615 | reg[0].i_addr = (void*)&magic; |
603 | reg[0].i_len = sizeof(magic); | 616 | reg[0].i_len = sizeof(magic); |
617 | XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_UNMOUNT); | ||
604 | 618 | ||
605 | error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0); | 619 | error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0); |
606 | if (!error) { | 620 | if (!error) { |
607 | /* remove inited flag */ | 621 | /* remove inited flag */ |
608 | ((xlog_ticket_t *)tic)->t_flags = 0; | 622 | ((xlog_ticket_t *)tic)->t_flags = 0; |
@@ -1272,6 +1286,7 @@ xlog_commit_record(xfs_mount_t *mp, | |||
1272 | 1286 | ||
1273 | reg[0].i_addr = NULL; | 1287 | reg[0].i_addr = NULL; |
1274 | reg[0].i_len = 0; | 1288 | reg[0].i_len = 0; |
1289 | XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_COMMIT); | ||
1275 | 1290 | ||
1276 | ASSERT_ALWAYS(iclog); | 1291 | ASSERT_ALWAYS(iclog); |
1277 | if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, | 1292 | if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, |
@@ -1605,6 +1620,117 @@ xlog_state_finish_copy(xlog_t *log, | |||
1605 | 1620 | ||
1606 | 1621 | ||
1607 | /* | 1622 | /* |
1623 | * print out info relating to regions written which consume | ||
1624 | * the reservation | ||
1625 | */ | ||
1626 | #if defined(XFS_LOG_RES_DEBUG) | ||
1627 | STATIC void | ||
1628 | xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) | ||
1629 | { | ||
1630 | uint i; | ||
1631 | uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); | ||
1632 | |||
1633 | /* match with XLOG_REG_TYPE_* in xfs_log.h */ | ||
1634 | static char *res_type_str[XLOG_REG_TYPE_MAX] = { | ||
1635 | "bformat", | ||
1636 | "bchunk", | ||
1637 | "efi_format", | ||
1638 | "efd_format", | ||
1639 | "iformat", | ||
1640 | "icore", | ||
1641 | "iext", | ||
1642 | "ibroot", | ||
1643 | "ilocal", | ||
1644 | "iattr_ext", | ||
1645 | "iattr_broot", | ||
1646 | "iattr_local", | ||
1647 | "qformat", | ||
1648 | "dquot", | ||
1649 | "quotaoff", | ||
1650 | "LR header", | ||
1651 | "unmount", | ||
1652 | "commit", | ||
1653 | "trans header" | ||
1654 | }; | ||
1655 | static char *trans_type_str[XFS_TRANS_TYPE_MAX] = { | ||
1656 | "SETATTR_NOT_SIZE", | ||
1657 | "SETATTR_SIZE", | ||
1658 | "INACTIVE", | ||
1659 | "CREATE", | ||
1660 | "CREATE_TRUNC", | ||
1661 | "TRUNCATE_FILE", | ||
1662 | "REMOVE", | ||
1663 | "LINK", | ||
1664 | "RENAME", | ||
1665 | "MKDIR", | ||
1666 | "RMDIR", | ||
1667 | "SYMLINK", | ||
1668 | "SET_DMATTRS", | ||
1669 | "GROWFS", | ||
1670 | "STRAT_WRITE", | ||
1671 | "DIOSTRAT", | ||
1672 | "WRITE_SYNC", | ||
1673 | "WRITEID", | ||
1674 | "ADDAFORK", | ||
1675 | "ATTRINVAL", | ||
1676 | "ATRUNCATE", | ||
1677 | "ATTR_SET", | ||
1678 | "ATTR_RM", | ||
1679 | "ATTR_FLAG", | ||
1680 | "CLEAR_AGI_BUCKET", | ||
1681 | "QM_SBCHANGE", | ||
1682 | "DUMMY1", | ||
1683 | "DUMMY2", | ||
1684 | "QM_QUOTAOFF", | ||
1685 | "QM_DQALLOC", | ||
1686 | "QM_SETQLIM", | ||
1687 | "QM_DQCLUSTER", | ||
1688 | "QM_QINOCREATE", | ||
1689 | "QM_QUOTAOFF_END", | ||
1690 | "SB_UNIT", | ||
1691 | "FSYNC_TS", | ||
1692 | "GROWFSRT_ALLOC", | ||
1693 | "GROWFSRT_ZERO", | ||
1694 | "GROWFSRT_FREE", | ||
1695 | "SWAPEXT" | ||
1696 | }; | ||
1697 | |||
1698 | xfs_fs_cmn_err(CE_WARN, mp, | ||
1699 | "xfs_log_write: reservation summary:\n" | ||
1700 | " trans type = %s (%u)\n" | ||
1701 | " unit res = %d bytes\n" | ||
1702 | " current res = %d bytes\n" | ||
1703 | " total reg = %u bytes (o/flow = %u bytes)\n" | ||
1704 | " ophdrs = %u (ophdr space = %u bytes)\n" | ||
1705 | " ophdr + reg = %u bytes\n" | ||
1706 | " num regions = %u\n", | ||
1707 | ((ticket->t_trans_type <= 0 || | ||
1708 | ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? | ||
1709 | "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), | ||
1710 | ticket->t_trans_type, | ||
1711 | ticket->t_unit_res, | ||
1712 | ticket->t_curr_res, | ||
1713 | ticket->t_res_arr_sum, ticket->t_res_o_flow, | ||
1714 | ticket->t_res_num_ophdrs, ophdr_spc, | ||
1715 | ticket->t_res_arr_sum + | ||
1716 | ticket->t_res_o_flow + ophdr_spc, | ||
1717 | ticket->t_res_num); | ||
1718 | |||
1719 | for (i = 0; i < ticket->t_res_num; i++) { | ||
1720 | uint r_type = ticket->t_res_arr[i].r_type; | ||
1721 | cmn_err(CE_WARN, | ||
1722 | "region[%u]: %s - %u bytes\n", | ||
1723 | i, | ||
1724 | ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? | ||
1725 | "bad-rtype" : res_type_str[r_type-1]), | ||
1726 | ticket->t_res_arr[i].r_len); | ||
1727 | } | ||
1728 | } | ||
1729 | #else | ||
1730 | #define xlog_print_tic_res(mp, ticket) | ||
1731 | #endif | ||
1732 | |||
1733 | /* | ||
1608 | * Write some region out to in-core log | 1734 | * Write some region out to in-core log |
1609 | * | 1735 | * |
1610 | * This will be called when writing externally provided regions or when | 1736 | * This will be called when writing externally provided regions or when |
@@ -1677,16 +1803,21 @@ xlog_write(xfs_mount_t * mp, | |||
1677 | * xlog_op_header_t and may need to be double word aligned. | 1803 | * xlog_op_header_t and may need to be double word aligned. |
1678 | */ | 1804 | */ |
1679 | len = 0; | 1805 | len = 0; |
1680 | if (ticket->t_flags & XLOG_TIC_INITED) /* acct for start rec of xact */ | 1806 | if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */ |
1681 | len += sizeof(xlog_op_header_t); | 1807 | len += sizeof(xlog_op_header_t); |
1808 | XLOG_TIC_ADD_OPHDR(ticket); | ||
1809 | } | ||
1682 | 1810 | ||
1683 | for (index = 0; index < nentries; index++) { | 1811 | for (index = 0; index < nentries; index++) { |
1684 | len += sizeof(xlog_op_header_t); /* each region gets >= 1 */ | 1812 | len += sizeof(xlog_op_header_t); /* each region gets >= 1 */ |
1813 | XLOG_TIC_ADD_OPHDR(ticket); | ||
1685 | len += reg[index].i_len; | 1814 | len += reg[index].i_len; |
1815 | XLOG_TIC_ADD_REGION(ticket, reg[index].i_len, reg[index].i_type); | ||
1686 | } | 1816 | } |
1687 | contwr = *start_lsn = 0; | 1817 | contwr = *start_lsn = 0; |
1688 | 1818 | ||
1689 | if (ticket->t_curr_res < len) { | 1819 | if (ticket->t_curr_res < len) { |
1820 | xlog_print_tic_res(mp, ticket); | ||
1690 | #ifdef DEBUG | 1821 | #ifdef DEBUG |
1691 | xlog_panic( | 1822 | xlog_panic( |
1692 | "xfs_log_write: reservation ran out. Need to up reservation"); | 1823 | "xfs_log_write: reservation ran out. Need to up reservation"); |
@@ -1790,6 +1921,7 @@ xlog_write(xfs_mount_t * mp, | |||
1790 | len += sizeof(xlog_op_header_t); /* from splitting of region */ | 1921 | len += sizeof(xlog_op_header_t); /* from splitting of region */ |
1791 | /* account for new log op header */ | 1922 | /* account for new log op header */ |
1792 | ticket->t_curr_res -= sizeof(xlog_op_header_t); | 1923 | ticket->t_curr_res -= sizeof(xlog_op_header_t); |
1924 | XLOG_TIC_ADD_OPHDR(ticket); | ||
1793 | } | 1925 | } |
1794 | xlog_verify_dest_ptr(log, ptr); | 1926 | xlog_verify_dest_ptr(log, ptr); |
1795 | 1927 | ||
@@ -2282,6 +2414,9 @@ restart: | |||
2282 | */ | 2414 | */ |
2283 | if (log_offset == 0) { | 2415 | if (log_offset == 0) { |
2284 | ticket->t_curr_res -= log->l_iclog_hsize; | 2416 | ticket->t_curr_res -= log->l_iclog_hsize; |
2417 | XLOG_TIC_ADD_REGION(ticket, | ||
2418 | log->l_iclog_hsize, | ||
2419 | XLOG_REG_TYPE_LRHEADER); | ||
2285 | INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle); | 2420 | INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle); |
2286 | ASSIGN_LSN(head->h_lsn, log); | 2421 | ASSIGN_LSN(head->h_lsn, log); |
2287 | ASSERT(log->l_curr_block >= 0); | 2422 | ASSERT(log->l_curr_block >= 0); |
@@ -2468,6 +2603,7 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2468 | #endif | 2603 | #endif |
2469 | 2604 | ||
2470 | tic->t_curr_res = tic->t_unit_res; | 2605 | tic->t_curr_res = tic->t_unit_res; |
2606 | XLOG_TIC_RESET_RES(tic); | ||
2471 | 2607 | ||
2472 | if (tic->t_cnt > 0) | 2608 | if (tic->t_cnt > 0) |
2473 | return (0); | 2609 | return (0); |
@@ -2608,6 +2744,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, | |||
2608 | XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); | 2744 | XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); |
2609 | XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r'); | 2745 | XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r'); |
2610 | ticket->t_curr_res = ticket->t_unit_res; | 2746 | ticket->t_curr_res = ticket->t_unit_res; |
2747 | XLOG_TIC_RESET_RES(ticket); | ||
2611 | xlog_trace_loggrant(log, ticket, | 2748 | xlog_trace_loggrant(log, ticket, |
2612 | "xlog_regrant_reserve_log_space: sub current res"); | 2749 | "xlog_regrant_reserve_log_space: sub current res"); |
2613 | xlog_verify_grant_head(log, 1); | 2750 | xlog_verify_grant_head(log, 1); |
@@ -2624,6 +2761,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, | |||
2624 | xlog_verify_grant_head(log, 0); | 2761 | xlog_verify_grant_head(log, 0); |
2625 | GRANT_UNLOCK(log, s); | 2762 | GRANT_UNLOCK(log, s); |
2626 | ticket->t_curr_res = ticket->t_unit_res; | 2763 | ticket->t_curr_res = ticket->t_unit_res; |
2764 | XLOG_TIC_RESET_RES(ticket); | ||
2627 | } /* xlog_regrant_reserve_log_space */ | 2765 | } /* xlog_regrant_reserve_log_space */ |
2628 | 2766 | ||
2629 | 2767 | ||
@@ -3179,29 +3317,57 @@ xlog_ticket_get(xlog_t *log, | |||
3179 | * and their unit amount is the total amount of space required. | 3317 | * and their unit amount is the total amount of space required. |
3180 | * | 3318 | * |
3181 | * The following lines of code account for non-transaction data | 3319 | * The following lines of code account for non-transaction data |
3182 | * which occupy space in the on-disk log. | 3320 | * which occupy space in the on-disk log. |
3321 | * | ||
3322 | * Normal form of a transaction is: | ||
3323 | * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph> | ||
3324 | * and then there are LR hdrs, split-recs and roundoff at end of syncs. | ||
3325 | * | ||
3326 | * We need to account for all the leadup data and trailer data | ||
3327 | * around the transaction data. | ||
3328 | * And then we need to account for the worst case in terms of using | ||
3329 | * more space. | ||
3330 | * The worst case will happen if: | ||
3331 | * - the placement of the transaction happens to be such that the | ||
3332 | * roundoff is at its maximum | ||
3333 | * - the transaction data is synced before the commit record is synced | ||
3334 | * i.e. <transaction-data><roundoff> | <commit-rec><roundoff> | ||
3335 | * Therefore the commit record is in its own Log Record. | ||
3336 | * This can happen as the commit record is called with its | ||
3337 | * own region to xlog_write(). | ||
3338 | * This then means that in the worst case, roundoff can happen for | ||
3339 | * the commit-rec as well. | ||
3340 | * The commit-rec is smaller than padding in this scenario and so it is | ||
3341 | * not added separately. | ||
3183 | */ | 3342 | */ |
3184 | 3343 | ||
3344 | /* for trans header */ | ||
3345 | unit_bytes += sizeof(xlog_op_header_t); | ||
3346 | unit_bytes += sizeof(xfs_trans_header_t); | ||
3347 | |||
3185 | /* for start-rec */ | 3348 | /* for start-rec */ |
3186 | unit_bytes += sizeof(xlog_op_header_t); | 3349 | unit_bytes += sizeof(xlog_op_header_t); |
3350 | |||
3351 | /* for LR headers */ | ||
3352 | num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); | ||
3353 | unit_bytes += log->l_iclog_hsize * num_headers; | ||
3354 | |||
3355 | /* for commit-rec LR header - note: padding will subsume the ophdr */ | ||
3356 | unit_bytes += log->l_iclog_hsize; | ||
3357 | |||
3358 | /* for split-recs - ophdrs added when data split over LRs */ | ||
3359 | unit_bytes += sizeof(xlog_op_header_t) * num_headers; | ||
3187 | 3360 | ||
3188 | /* for padding */ | 3361 | /* for roundoff padding for transaction data and one for commit record */ |
3189 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && | 3362 | if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && |
3190 | log->l_mp->m_sb.sb_logsunit > 1) { | 3363 | log->l_mp->m_sb.sb_logsunit > 1) { |
3191 | /* log su roundoff */ | 3364 | /* log su roundoff */ |
3192 | unit_bytes += log->l_mp->m_sb.sb_logsunit; | 3365 | unit_bytes += 2*log->l_mp->m_sb.sb_logsunit; |
3193 | } else { | 3366 | } else { |
3194 | /* BB roundoff */ | 3367 | /* BB roundoff */ |
3195 | unit_bytes += BBSIZE; | 3368 | unit_bytes += 2*BBSIZE; |
3196 | } | 3369 | } |
3197 | 3370 | ||
3198 | /* for commit-rec */ | ||
3199 | unit_bytes += sizeof(xlog_op_header_t); | ||
3200 | |||
3201 | /* for LR headers */ | ||
3202 | num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); | ||
3203 | unit_bytes += log->l_iclog_hsize * num_headers; | ||
3204 | |||
3205 | tic->t_unit_res = unit_bytes; | 3371 | tic->t_unit_res = unit_bytes; |
3206 | tic->t_curr_res = unit_bytes; | 3372 | tic->t_curr_res = unit_bytes; |
3207 | tic->t_cnt = cnt; | 3373 | tic->t_cnt = cnt; |
@@ -3209,10 +3375,13 @@ xlog_ticket_get(xlog_t *log, | |||
3209 | tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff); | 3375 | tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff); |
3210 | tic->t_clientid = client; | 3376 | tic->t_clientid = client; |
3211 | tic->t_flags = XLOG_TIC_INITED; | 3377 | tic->t_flags = XLOG_TIC_INITED; |
3378 | tic->t_trans_type = 0; | ||
3212 | if (xflags & XFS_LOG_PERM_RESERV) | 3379 | if (xflags & XFS_LOG_PERM_RESERV) |
3213 | tic->t_flags |= XLOG_TIC_PERM_RESERV; | 3380 | tic->t_flags |= XLOG_TIC_PERM_RESERV; |
3214 | sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); | 3381 | sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); |
3215 | 3382 | ||
3383 | XLOG_TIC_RESET_RES(tic); | ||
3384 | |||
3216 | return tic; | 3385 | return tic; |
3217 | } /* xlog_ticket_get */ | 3386 | } /* xlog_ticket_get */ |
3218 | 3387 | ||
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 0db122ddda3f..18961119fc65 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -114,9 +114,44 @@ xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) | |||
114 | #define XFS_VOLUME 0x2 | 114 | #define XFS_VOLUME 0x2 |
115 | #define XFS_LOG 0xaa | 115 | #define XFS_LOG 0xaa |
116 | 116 | ||
117 | |||
118 | /* Region types for iovec's i_type */ | ||
119 | #if defined(XFS_LOG_RES_DEBUG) | ||
120 | #define XLOG_REG_TYPE_BFORMAT 1 | ||
121 | #define XLOG_REG_TYPE_BCHUNK 2 | ||
122 | #define XLOG_REG_TYPE_EFI_FORMAT 3 | ||
123 | #define XLOG_REG_TYPE_EFD_FORMAT 4 | ||
124 | #define XLOG_REG_TYPE_IFORMAT 5 | ||
125 | #define XLOG_REG_TYPE_ICORE 6 | ||
126 | #define XLOG_REG_TYPE_IEXT 7 | ||
127 | #define XLOG_REG_TYPE_IBROOT 8 | ||
128 | #define XLOG_REG_TYPE_ILOCAL 9 | ||
129 | #define XLOG_REG_TYPE_IATTR_EXT 10 | ||
130 | #define XLOG_REG_TYPE_IATTR_BROOT 11 | ||
131 | #define XLOG_REG_TYPE_IATTR_LOCAL 12 | ||
132 | #define XLOG_REG_TYPE_QFORMAT 13 | ||
133 | #define XLOG_REG_TYPE_DQUOT 14 | ||
134 | #define XLOG_REG_TYPE_QUOTAOFF 15 | ||
135 | #define XLOG_REG_TYPE_LRHEADER 16 | ||
136 | #define XLOG_REG_TYPE_UNMOUNT 17 | ||
137 | #define XLOG_REG_TYPE_COMMIT 18 | ||
138 | #define XLOG_REG_TYPE_TRANSHDR 19 | ||
139 | #define XLOG_REG_TYPE_MAX 19 | ||
140 | #endif | ||
141 | |||
142 | #if defined(XFS_LOG_RES_DEBUG) | ||
143 | #define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t)) | ||
144 | #else | ||
145 | #define XLOG_VEC_SET_TYPE(vecp, t) | ||
146 | #endif | ||
147 | |||
148 | |||
117 | typedef struct xfs_log_iovec { | 149 | typedef struct xfs_log_iovec { |
118 | xfs_caddr_t i_addr; /* beginning address of region */ | 150 | xfs_caddr_t i_addr; /* beginning address of region */ |
119 | int i_len; /* length in bytes of region */ | 151 | int i_len; /* length in bytes of region */ |
152 | #if defined(XFS_LOG_RES_DEBUG) | ||
153 | uint i_type; /* type of region */ | ||
154 | #endif | ||
120 | } xfs_log_iovec_t; | 155 | } xfs_log_iovec_t; |
121 | 156 | ||
122 | typedef void* xfs_log_ticket_t; | 157 | typedef void* xfs_log_ticket_t; |
@@ -159,7 +194,8 @@ int xfs_log_reserve(struct xfs_mount *mp, | |||
159 | int count, | 194 | int count, |
160 | xfs_log_ticket_t *ticket, | 195 | xfs_log_ticket_t *ticket, |
161 | __uint8_t clientid, | 196 | __uint8_t clientid, |
162 | uint flags); | 197 | uint flags, |
198 | uint t_type); | ||
163 | int xfs_log_write(struct xfs_mount *mp, | 199 | int xfs_log_write(struct xfs_mount *mp, |
164 | xfs_log_iovec_t region[], | 200 | xfs_log_iovec_t region[], |
165 | int nentries, | 201 | int nentries, |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 1a1d452f15f9..a884cea82fca 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -112,7 +112,7 @@ struct xfs_mount; | |||
112 | * this has endian issues, of course. | 112 | * this has endian issues, of course. |
113 | */ | 113 | */ |
114 | 114 | ||
115 | #if __BYTE_ORDER == __LITTLE_ENDIAN | 115 | #ifndef XFS_NATIVE_HOST |
116 | #define GET_CLIENT_ID(i,arch) \ | 116 | #define GET_CLIENT_ID(i,arch) \ |
117 | ((i) & 0xff) | 117 | ((i) & 0xff) |
118 | #else | 118 | #else |
@@ -335,18 +335,66 @@ typedef __uint32_t xlog_tid_t; | |||
335 | 335 | ||
336 | #define XLOG_COVER_OPS 5 | 336 | #define XLOG_COVER_OPS 5 |
337 | 337 | ||
338 | |||
339 | /* Ticket reservation region accounting */ | ||
340 | #if defined(XFS_LOG_RES_DEBUG) | ||
341 | #define XLOG_TIC_LEN_MAX 15 | ||
342 | #define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \ | ||
343 | (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0) | ||
344 | #define XLOG_TIC_ADD_OPHDR(t) ((t)->t_res_num_ophdrs++) | ||
345 | #define XLOG_TIC_ADD_REGION(t, len, type) \ | ||
346 | do { \ | ||
347 | if ((t)->t_res_num == XLOG_TIC_LEN_MAX) { \ | ||
348 | /* add to overflow and start again */ \ | ||
349 | (t)->t_res_o_flow += (t)->t_res_arr_sum; \ | ||
350 | (t)->t_res_num = 0; \ | ||
351 | (t)->t_res_arr_sum = 0; \ | ||
352 | } \ | ||
353 | (t)->t_res_arr[(t)->t_res_num].r_len = (len); \ | ||
354 | (t)->t_res_arr[(t)->t_res_num].r_type = (type); \ | ||
355 | (t)->t_res_arr_sum += (len); \ | ||
356 | (t)->t_res_num++; \ | ||
357 | } while (0) | ||
358 | |||
359 | /* | ||
360 | * Reservation region | ||
361 | * As would be stored in xfs_log_iovec but without the i_addr which | ||
362 | * we don't care about. | ||
363 | */ | ||
364 | typedef struct xlog_res { | ||
365 | uint r_len; | ||
366 | uint r_type; | ||
367 | } xlog_res_t; | ||
368 | #else | ||
369 | #define XLOG_TIC_RESET_RES(t) | ||
370 | #define XLOG_TIC_ADD_OPHDR(t) | ||
371 | #define XLOG_TIC_ADD_REGION(t, len, type) | ||
372 | #endif | ||
373 | |||
374 | |||
338 | typedef struct xlog_ticket { | 375 | typedef struct xlog_ticket { |
339 | sv_t t_sema; /* sleep on this semaphore :20 */ | 376 | sv_t t_sema; /* sleep on this semaphore : 20 */ |
340 | struct xlog_ticket *t_next; /* : 4 */ | 377 | struct xlog_ticket *t_next; /* :4|8 */ |
341 | struct xlog_ticket *t_prev; /* : 4 */ | 378 | struct xlog_ticket *t_prev; /* :4|8 */ |
342 | xlog_tid_t t_tid; /* transaction identifier : 4 */ | 379 | xlog_tid_t t_tid; /* transaction identifier : 4 */ |
343 | int t_curr_res; /* current reservation in bytes : 4 */ | 380 | int t_curr_res; /* current reservation in bytes : 4 */ |
344 | int t_unit_res; /* unit reservation in bytes : 4 */ | 381 | int t_unit_res; /* unit reservation in bytes : 4 */ |
345 | __uint8_t t_ocnt; /* original count : 1 */ | 382 | char t_ocnt; /* original count : 1 */ |
346 | __uint8_t t_cnt; /* current count : 1 */ | 383 | char t_cnt; /* current count : 1 */ |
347 | __uint8_t t_clientid; /* who does this belong to; : 1 */ | 384 | char t_clientid; /* who does this belong to; : 1 */ |
348 | __uint8_t t_flags; /* properties of reservation : 1 */ | 385 | char t_flags; /* properties of reservation : 1 */ |
386 | uint t_trans_type; /* transaction type : 4 */ | ||
387 | |||
388 | #if defined (XFS_LOG_RES_DEBUG) | ||
389 | /* reservation array fields */ | ||
390 | uint t_res_num; /* num in array : 4 */ | ||
391 | xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : X */ | ||
392 | uint t_res_num_ophdrs; /* num op hdrs : 4 */ | ||
393 | uint t_res_arr_sum; /* array sum : 4 */ | ||
394 | uint t_res_o_flow; /* sum overflow : 4 */ | ||
395 | #endif | ||
349 | } xlog_ticket_t; | 396 | } xlog_ticket_t; |
397 | |||
350 | #endif | 398 | #endif |
351 | 399 | ||
352 | 400 | ||
@@ -366,14 +414,10 @@ typedef struct xlog_op_header { | |||
366 | #define XLOG_FMT_IRIX_BE 3 | 414 | #define XLOG_FMT_IRIX_BE 3 |
367 | 415 | ||
368 | /* our fmt */ | 416 | /* our fmt */ |
369 | #if __BYTE_ORDER == __LITTLE_ENDIAN | 417 | #ifdef XFS_NATIVE_HOST |
370 | #define XLOG_FMT XLOG_FMT_LINUX_LE | ||
371 | #else | ||
372 | #if __BYTE_ORDER == __BIG_ENDIAN | ||
373 | #define XLOG_FMT XLOG_FMT_LINUX_BE | 418 | #define XLOG_FMT XLOG_FMT_LINUX_BE |
374 | #else | 419 | #else |
375 | #error unknown byte order | 420 | #define XLOG_FMT XLOG_FMT_LINUX_LE |
376 | #endif | ||
377 | #endif | 421 | #endif |
378 | 422 | ||
379 | typedef struct xlog_rec_header { | 423 | typedef struct xlog_rec_header { |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0aac28ddb81c..14faabaabf29 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -1387,7 +1387,7 @@ xlog_recover_add_to_cont_trans( | |||
1387 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | 1387 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; |
1388 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | 1388 | old_len = item->ri_buf[item->ri_cnt-1].i_len; |
1389 | 1389 | ||
1390 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0); | 1390 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); |
1391 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ | 1391 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ |
1392 | item->ri_buf[item->ri_cnt-1].i_len += len; | 1392 | item->ri_buf[item->ri_cnt-1].i_len += len; |
1393 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | 1393 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; |
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index 4f40c92863d5..a6cd6324e946 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | 5 | * under the terms of version 2 of the GNU General Public License as |
@@ -42,7 +42,8 @@ | |||
42 | #include "xfs_dir2.h" | 42 | #include "xfs_dir2.h" |
43 | #include "xfs_dmapi.h" | 43 | #include "xfs_dmapi.h" |
44 | #include "xfs_mount.h" | 44 | #include "xfs_mount.h" |
45 | 45 | #include "xfs_quota.h" | |
46 | #include "xfs_error.h" | ||
46 | 47 | ||
47 | STATIC struct xfs_dquot * | 48 | STATIC struct xfs_dquot * |
48 | xfs_dqvopchown_default( | 49 | xfs_dqvopchown_default( |
@@ -54,8 +55,79 @@ xfs_dqvopchown_default( | |||
54 | return NULL; | 55 | return NULL; |
55 | } | 56 | } |
56 | 57 | ||
58 | /* | ||
59 | * Clear the quotaflags in memory and in the superblock. | ||
60 | */ | ||
61 | int | ||
62 | xfs_mount_reset_sbqflags(xfs_mount_t *mp) | ||
63 | { | ||
64 | int error; | ||
65 | xfs_trans_t *tp; | ||
66 | unsigned long s; | ||
67 | |||
68 | mp->m_qflags = 0; | ||
69 | /* | ||
70 | * It is OK to look at sb_qflags here in mount path, | ||
71 | * without SB_LOCK. | ||
72 | */ | ||
73 | if (mp->m_sb.sb_qflags == 0) | ||
74 | return 0; | ||
75 | s = XFS_SB_LOCK(mp); | ||
76 | mp->m_sb.sb_qflags = 0; | ||
77 | XFS_SB_UNLOCK(mp, s); | ||
78 | |||
79 | /* | ||
80 | * if the fs is readonly, let the incore superblock run | ||
81 | * with quotas off but don't flush the update out to disk | ||
82 | */ | ||
83 | if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) | ||
84 | return 0; | ||
85 | #ifdef QUOTADEBUG | ||
86 | xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); | ||
87 | #endif | ||
88 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); | ||
89 | if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, | ||
90 | XFS_DEFAULT_LOG_COUNT))) { | ||
91 | xfs_trans_cancel(tp, 0); | ||
92 | xfs_fs_cmn_err(CE_ALERT, mp, | ||
93 | "xfs_mount_reset_sbqflags: Superblock update failed!"); | ||
94 | return error; | ||
95 | } | ||
96 | xfs_mod_sb(tp, XFS_SB_QFLAGS); | ||
97 | error = xfs_trans_commit(tp, 0, NULL); | ||
98 | return error; | ||
99 | } | ||
100 | |||
101 | STATIC int | ||
102 | xfs_noquota_init( | ||
103 | xfs_mount_t *mp, | ||
104 | uint *needquotamount, | ||
105 | uint *quotaflags) | ||
106 | { | ||
107 | int error = 0; | ||
108 | |||
109 | *quotaflags = 0; | ||
110 | *needquotamount = B_FALSE; | ||
111 | |||
112 | ASSERT(!XFS_IS_QUOTA_ON(mp)); | ||
113 | |||
114 | /* | ||
115 | * If a file system had quotas running earlier, but decided to | ||
116 | * mount without -o uquota/pquota/gquota options, revoke the | ||
117 | * quotachecked license. | ||
118 | */ | ||
119 | if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { | ||
120 | cmn_err(CE_NOTE, | ||
121 | "XFS resetting qflags for filesystem %s", | ||
122 | mp->m_fsname); | ||
123 | |||
124 | error = xfs_mount_reset_sbqflags(mp); | ||
125 | } | ||
126 | return error; | ||
127 | } | ||
128 | |||
57 | xfs_qmops_t xfs_qmcore_stub = { | 129 | xfs_qmops_t xfs_qmcore_stub = { |
58 | .xfs_qminit = (xfs_qminit_t) fs_noerr, | 130 | .xfs_qminit = (xfs_qminit_t) xfs_noquota_init, |
59 | .xfs_qmdone = (xfs_qmdone_t) fs_noerr, | 131 | .xfs_qmdone = (xfs_qmdone_t) fs_noerr, |
60 | .xfs_qmmount = (xfs_qmmount_t) fs_noerr, | 132 | .xfs_qmmount = (xfs_qmmount_t) fs_noerr, |
61 | .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr, | 133 | .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr, |
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 7134576ae7fa..32cb79752d5d 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | 5 | * under the terms of version 2 of the GNU General Public License as |
@@ -160,6 +160,20 @@ typedef struct xfs_qoff_logformat { | |||
160 | #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ | 160 | #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ |
161 | 161 | ||
162 | /* | 162 | /* |
163 | * Quota Accounting/Enforcement flags | ||
164 | */ | ||
165 | #define XFS_ALL_QUOTA_ACCT \ | ||
166 | (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) | ||
167 | #define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) | ||
168 | #define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) | ||
169 | |||
170 | #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) | ||
171 | #define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) | ||
172 | #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) | ||
173 | #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) | ||
174 | #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) | ||
175 | |||
176 | /* | ||
163 | * Incore only flags for quotaoff - these bits get cleared when quota(s) | 177 | * Incore only flags for quotaoff - these bits get cleared when quota(s) |
164 | * are in the process of getting turned off. These flags are in m_qflags but | 178 | * are in the process of getting turned off. These flags are in m_qflags but |
165 | * never in sb_qflags. | 179 | * never in sb_qflags. |
@@ -362,6 +376,7 @@ typedef struct xfs_dqtrxops { | |||
362 | f | XFS_QMOPT_RES_REGBLKS) | 376 | f | XFS_QMOPT_RES_REGBLKS) |
363 | 377 | ||
364 | extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); | 378 | extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); |
379 | extern int xfs_mount_reset_sbqflags(struct xfs_mount *); | ||
365 | 380 | ||
366 | extern struct bhv_vfsops xfs_qmops; | 381 | extern struct bhv_vfsops xfs_qmops; |
367 | 382 | ||
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 06dfca531f79..92efe272b83d 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -276,7 +276,7 @@ xfs_trans_reserve( | |||
276 | 276 | ||
277 | error = xfs_log_reserve(tp->t_mountp, logspace, logcount, | 277 | error = xfs_log_reserve(tp->t_mountp, logspace, logcount, |
278 | &tp->t_ticket, | 278 | &tp->t_ticket, |
279 | XFS_TRANSACTION, log_flags); | 279 | XFS_TRANSACTION, log_flags, tp->t_type); |
280 | if (error) { | 280 | if (error) { |
281 | goto undo_blocks; | 281 | goto undo_blocks; |
282 | } | 282 | } |
@@ -1032,6 +1032,7 @@ xfs_trans_fill_vecs( | |||
1032 | tp->t_header.th_num_items = nitems; | 1032 | tp->t_header.th_num_items = nitems; |
1033 | log_vector->i_addr = (xfs_caddr_t)&tp->t_header; | 1033 | log_vector->i_addr = (xfs_caddr_t)&tp->t_header; |
1034 | log_vector->i_len = sizeof(xfs_trans_header_t); | 1034 | log_vector->i_len = sizeof(xfs_trans_header_t); |
1035 | XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR); | ||
1035 | } | 1036 | } |
1036 | 1037 | ||
1037 | 1038 | ||
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index ec541d66fa2a..a263aec8b3a6 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -112,6 +112,7 @@ typedef struct xfs_trans_header { | |||
112 | #define XFS_TRANS_GROWFSRT_ZERO 38 | 112 | #define XFS_TRANS_GROWFSRT_ZERO 38 |
113 | #define XFS_TRANS_GROWFSRT_FREE 39 | 113 | #define XFS_TRANS_GROWFSRT_FREE 39 |
114 | #define XFS_TRANS_SWAPEXT 40 | 114 | #define XFS_TRANS_SWAPEXT 40 |
115 | #define XFS_TRANS_TYPE_MAX 40 | ||
115 | /* new transaction types need to be reflected in xfs_logprint(8) */ | 116 | /* new transaction types need to be reflected in xfs_logprint(8) */ |
116 | 117 | ||
117 | 118 | ||
@@ -998,6 +999,7 @@ struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); | |||
998 | void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); | 999 | void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); |
999 | void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); | 1000 | void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); |
1000 | void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); | 1001 | void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); |
1002 | void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); | ||
1001 | void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); | 1003 | void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); |
1002 | void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); | 1004 | void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); |
1003 | void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); | 1005 | void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 7bc5eab4c2c1..2a71b4f91bfa 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -379,8 +379,8 @@ xfs_trans_delete_ail( | |||
379 | else { | 379 | else { |
380 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, | 380 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, |
381 | "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL"); | 381 | "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL"); |
382 | xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); | ||
383 | AIL_UNLOCK(mp, s); | 382 | AIL_UNLOCK(mp, s); |
383 | xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); | ||
384 | } | 384 | } |
385 | } | 385 | } |
386 | } | 386 | } |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 144da7a85466..e733293dd7f4 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -714,6 +714,29 @@ xfs_trans_bhold(xfs_trans_t *tp, | |||
714 | } | 714 | } |
715 | 715 | ||
716 | /* | 716 | /* |
717 | * Cancel the previous buffer hold request made on this buffer | ||
718 | * for this transaction. | ||
719 | */ | ||
720 | void | ||
721 | xfs_trans_bhold_release(xfs_trans_t *tp, | ||
722 | xfs_buf_t *bp) | ||
723 | { | ||
724 | xfs_buf_log_item_t *bip; | ||
725 | |||
726 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
727 | ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); | ||
728 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); | ||
729 | |||
730 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | ||
731 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | ||
732 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | ||
733 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | ||
734 | ASSERT(bip->bli_flags & XFS_BLI_HOLD); | ||
735 | bip->bli_flags &= ~XFS_BLI_HOLD; | ||
736 | xfs_buf_item_trace("BHOLD RELEASE", bip); | ||
737 | } | ||
738 | |||
739 | /* | ||
717 | * This is called to mark bytes first through last inclusive of the given | 740 | * This is called to mark bytes first through last inclusive of the given |
718 | * buffer as needing to be logged when the transaction is committed. | 741 | * buffer as needing to be logged when the transaction is committed. |
719 | * The buffer must already be associated with the given transaction. | 742 | * The buffer must already be associated with the given transaction. |
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 42bcc0215203..f1a904e23ade 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c | |||
@@ -795,7 +795,6 @@ xfs_statvfs( | |||
795 | xfs_mount_t *mp; | 795 | xfs_mount_t *mp; |
796 | xfs_sb_t *sbp; | 796 | xfs_sb_t *sbp; |
797 | unsigned long s; | 797 | unsigned long s; |
798 | u64 id; | ||
799 | 798 | ||
800 | mp = XFS_BHVTOM(bdp); | 799 | mp = XFS_BHVTOM(bdp); |
801 | sbp = &(mp->m_sb); | 800 | sbp = &(mp->m_sb); |
@@ -823,9 +822,7 @@ xfs_statvfs( | |||
823 | statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | 822 | statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); |
824 | XFS_SB_UNLOCK(mp, s); | 823 | XFS_SB_UNLOCK(mp, s); |
825 | 824 | ||
826 | id = huge_encode_dev(mp->m_dev); | 825 | xfs_statvfs_fsid(statp, mp); |
827 | statp->f_fsid.val[0] = (u32)id; | ||
828 | statp->f_fsid.val[1] = (u32)(id >> 32); | ||
829 | statp->f_namelen = MAXNAMELEN - 1; | 826 | statp->f_namelen = MAXNAMELEN - 1; |
830 | 827 | ||
831 | return 0; | 828 | return 0; |
@@ -906,7 +903,6 @@ xfs_sync_inodes( | |||
906 | xfs_inode_t *ip_next; | 903 | xfs_inode_t *ip_next; |
907 | xfs_buf_t *bp; | 904 | xfs_buf_t *bp; |
908 | vnode_t *vp = NULL; | 905 | vnode_t *vp = NULL; |
909 | vmap_t vmap; | ||
910 | int error; | 906 | int error; |
911 | int last_error; | 907 | int last_error; |
912 | uint64_t fflag; | 908 | uint64_t fflag; |
@@ -1101,48 +1097,21 @@ xfs_sync_inodes( | |||
1101 | * lock in xfs_ireclaim() after the inode is pulled from | 1097 | * lock in xfs_ireclaim() after the inode is pulled from |
1102 | * the mount list will sleep until we release it here. | 1098 | * the mount list will sleep until we release it here. |
1103 | * This keeps the vnode from being freed while we reference | 1099 | * This keeps the vnode from being freed while we reference |
1104 | * it. It is also cheaper and simpler than actually doing | 1100 | * it. |
1105 | * a vn_get() for every inode we touch here. | ||
1106 | */ | 1101 | */ |
1107 | if (xfs_ilock_nowait(ip, lock_flags) == 0) { | 1102 | if (xfs_ilock_nowait(ip, lock_flags) == 0) { |
1108 | |||
1109 | if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { | 1103 | if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { |
1110 | ip = ip->i_mnext; | 1104 | ip = ip->i_mnext; |
1111 | continue; | 1105 | continue; |
1112 | } | 1106 | } |
1113 | 1107 | ||
1114 | /* | 1108 | vp = vn_grab(vp); |
1115 | * We need to unlock the inode list lock in order | ||
1116 | * to lock the inode. Insert a marker record into | ||
1117 | * the inode list to remember our position, dropping | ||
1118 | * the lock is now done inside the IPOINTER_INSERT | ||
1119 | * macro. | ||
1120 | * | ||
1121 | * We also use the inode list lock to protect us | ||
1122 | * in taking a snapshot of the vnode version number | ||
1123 | * for use in calling vn_get(). | ||
1124 | */ | ||
1125 | VMAP(vp, vmap); | ||
1126 | IPOINTER_INSERT(ip, mp); | ||
1127 | |||
1128 | vp = vn_get(vp, &vmap); | ||
1129 | if (vp == NULL) { | 1109 | if (vp == NULL) { |
1130 | /* | 1110 | ip = ip->i_mnext; |
1131 | * The vnode was reclaimed once we let go | ||
1132 | * of the inode list lock. Skip to the | ||
1133 | * next list entry. Remove the marker. | ||
1134 | */ | ||
1135 | |||
1136 | XFS_MOUNT_ILOCK(mp); | ||
1137 | |||
1138 | mount_locked = B_TRUE; | ||
1139 | vnode_refed = B_FALSE; | ||
1140 | |||
1141 | IPOINTER_REMOVE(ip, mp); | ||
1142 | |||
1143 | continue; | 1111 | continue; |
1144 | } | 1112 | } |
1145 | 1113 | ||
1114 | IPOINTER_INSERT(ip, mp); | ||
1146 | xfs_ilock(ip, lock_flags); | 1115 | xfs_ilock(ip, lock_flags); |
1147 | 1116 | ||
1148 | ASSERT(vp == XFS_ITOV(ip)); | 1117 | ASSERT(vp == XFS_ITOV(ip)); |
@@ -1533,7 +1502,10 @@ xfs_syncsub( | |||
1533 | * eventually kicked out of the cache. | 1502 | * eventually kicked out of the cache. |
1534 | */ | 1503 | */ |
1535 | if (flags & SYNC_REFCACHE) { | 1504 | if (flags & SYNC_REFCACHE) { |
1536 | xfs_refcache_purge_some(mp); | 1505 | if (flags & SYNC_WAIT) |
1506 | xfs_refcache_purge_mp(mp); | ||
1507 | else | ||
1508 | xfs_refcache_purge_some(mp); | ||
1537 | } | 1509 | } |
1538 | 1510 | ||
1539 | /* | 1511 | /* |
@@ -1649,6 +1621,10 @@ xfs_vget( | |||
1649 | #define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ | 1621 | #define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ |
1650 | #define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ | 1622 | #define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ |
1651 | #define MNTOPT_MTPT "mtpt" /* filesystem mount point */ | 1623 | #define MNTOPT_MTPT "mtpt" /* filesystem mount point */ |
1624 | #define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ | ||
1625 | #define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ | ||
1626 | #define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ | ||
1627 | #define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ | ||
1652 | #define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ | 1628 | #define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ |
1653 | #define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ | 1629 | #define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ |
1654 | #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ | 1630 | #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ |
@@ -1769,6 +1745,12 @@ xfs_parseargs( | |||
1769 | } | 1745 | } |
1770 | args->flags |= XFSMNT_IHASHSIZE; | 1746 | args->flags |= XFSMNT_IHASHSIZE; |
1771 | args->ihashsize = simple_strtoul(value, &eov, 10); | 1747 | args->ihashsize = simple_strtoul(value, &eov, 10); |
1748 | } else if (!strcmp(this_char, MNTOPT_GRPID) || | ||
1749 | !strcmp(this_char, MNTOPT_BSDGROUPS)) { | ||
1750 | vfsp->vfs_flag |= VFS_GRPID; | ||
1751 | } else if (!strcmp(this_char, MNTOPT_NOGRPID) || | ||
1752 | !strcmp(this_char, MNTOPT_SYSVGROUPS)) { | ||
1753 | vfsp->vfs_flag &= ~VFS_GRPID; | ||
1772 | } else if (!strcmp(this_char, MNTOPT_WSYNC)) { | 1754 | } else if (!strcmp(this_char, MNTOPT_WSYNC)) { |
1773 | args->flags |= XFSMNT_WSYNC; | 1755 | args->flags |= XFSMNT_WSYNC; |
1774 | } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { | 1756 | } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { |
@@ -1890,6 +1872,7 @@ xfs_showargs( | |||
1890 | }; | 1872 | }; |
1891 | struct proc_xfs_info *xfs_infop; | 1873 | struct proc_xfs_info *xfs_infop; |
1892 | struct xfs_mount *mp = XFS_BHVTOM(bhv); | 1874 | struct xfs_mount *mp = XFS_BHVTOM(bhv); |
1875 | struct vfs *vfsp = XFS_MTOVFS(mp); | ||
1893 | 1876 | ||
1894 | for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { | 1877 | for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { |
1895 | if (mp->m_flags & xfs_infop->flag) | 1878 | if (mp->m_flags & xfs_infop->flag) |
@@ -1926,7 +1909,10 @@ xfs_showargs( | |||
1926 | 1909 | ||
1927 | if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) | 1910 | if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) |
1928 | seq_printf(m, "," MNTOPT_64BITINODE); | 1911 | seq_printf(m, "," MNTOPT_64BITINODE); |
1929 | 1912 | ||
1913 | if (vfsp->vfs_flag & VFS_GRPID) | ||
1914 | seq_printf(m, "," MNTOPT_GRPID); | ||
1915 | |||
1930 | return 0; | 1916 | return 0; |
1931 | } | 1917 | } |
1932 | 1918 | ||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1377c868f3f4..58bfe629b933 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -104,7 +104,7 @@ xfs_open( | |||
104 | * If it's a directory with any blocks, read-ahead block 0 | 104 | * If it's a directory with any blocks, read-ahead block 0 |
105 | * as we're almost certain to have the next operation be a read there. | 105 | * as we're almost certain to have the next operation be a read there. |
106 | */ | 106 | */ |
107 | if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) { | 107 | if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) { |
108 | mode = xfs_ilock_map_shared(ip); | 108 | mode = xfs_ilock_map_shared(ip); |
109 | if (ip->i_d.di_nextents > 0) | 109 | if (ip->i_d.di_nextents > 0) |
110 | (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); | 110 | (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); |
@@ -163,18 +163,21 @@ xfs_getattr( | |||
163 | /* | 163 | /* |
164 | * Copy from in-core inode. | 164 | * Copy from in-core inode. |
165 | */ | 165 | */ |
166 | vap->va_type = vp->v_type; | 166 | vap->va_mode = ip->i_d.di_mode; |
167 | vap->va_mode = ip->i_d.di_mode & MODEMASK; | ||
168 | vap->va_uid = ip->i_d.di_uid; | 167 | vap->va_uid = ip->i_d.di_uid; |
169 | vap->va_gid = ip->i_d.di_gid; | 168 | vap->va_gid = ip->i_d.di_gid; |
170 | vap->va_projid = ip->i_d.di_projid; | 169 | vap->va_projid = ip->i_d.di_projid; |
171 | 170 | ||
172 | /* | 171 | /* |
173 | * Check vnode type block/char vs. everything else. | 172 | * Check vnode type block/char vs. everything else. |
174 | * Do it with bitmask because that's faster than looking | ||
175 | * for multiple values individually. | ||
176 | */ | 173 | */ |
177 | if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { | 174 | switch (ip->i_d.di_mode & S_IFMT) { |
175 | case S_IFBLK: | ||
176 | case S_IFCHR: | ||
177 | vap->va_rdev = ip->i_df.if_u2.if_rdev; | ||
178 | vap->va_blocksize = BLKDEV_IOSIZE; | ||
179 | break; | ||
180 | default: | ||
178 | vap->va_rdev = 0; | 181 | vap->va_rdev = 0; |
179 | 182 | ||
180 | if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { | 183 | if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { |
@@ -224,9 +227,7 @@ xfs_getattr( | |||
224 | (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : | 227 | (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : |
225 | (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); | 228 | (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); |
226 | } | 229 | } |
227 | } else { | 230 | break; |
228 | vap->va_rdev = ip->i_df.if_u2.if_rdev; | ||
229 | vap->va_blocksize = BLKDEV_IOSIZE; | ||
230 | } | 231 | } |
231 | 232 | ||
232 | vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; | 233 | vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; |
@@ -468,7 +469,7 @@ xfs_setattr( | |||
468 | m |= S_ISGID; | 469 | m |= S_ISGID; |
469 | #if 0 | 470 | #if 0 |
470 | /* Linux allows this, Irix doesn't. */ | 471 | /* Linux allows this, Irix doesn't. */ |
471 | if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR) | 472 | if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) |
472 | m |= S_ISVTX; | 473 | m |= S_ISVTX; |
473 | #endif | 474 | #endif |
474 | if (m && !capable(CAP_FSETID)) | 475 | if (m && !capable(CAP_FSETID)) |
@@ -546,10 +547,10 @@ xfs_setattr( | |||
546 | goto error_return; | 547 | goto error_return; |
547 | } | 548 | } |
548 | 549 | ||
549 | if (vp->v_type == VDIR) { | 550 | if (VN_ISDIR(vp)) { |
550 | code = XFS_ERROR(EISDIR); | 551 | code = XFS_ERROR(EISDIR); |
551 | goto error_return; | 552 | goto error_return; |
552 | } else if (vp->v_type != VREG) { | 553 | } else if (!VN_ISREG(vp)) { |
553 | code = XFS_ERROR(EINVAL); | 554 | code = XFS_ERROR(EINVAL); |
554 | goto error_return; | 555 | goto error_return; |
555 | } | 556 | } |
@@ -1567,7 +1568,7 @@ xfs_release( | |||
1567 | vp = BHV_TO_VNODE(bdp); | 1568 | vp = BHV_TO_VNODE(bdp); |
1568 | ip = XFS_BHVTOI(bdp); | 1569 | ip = XFS_BHVTOI(bdp); |
1569 | 1570 | ||
1570 | if ((vp->v_type != VREG) || (ip->i_d.di_mode == 0)) { | 1571 | if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { |
1571 | return 0; | 1572 | return 0; |
1572 | } | 1573 | } |
1573 | 1574 | ||
@@ -1895,7 +1896,7 @@ xfs_create( | |||
1895 | dp = XFS_BHVTOI(dir_bdp); | 1896 | dp = XFS_BHVTOI(dir_bdp); |
1896 | mp = dp->i_mount; | 1897 | mp = dp->i_mount; |
1897 | 1898 | ||
1898 | dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); | 1899 | dm_di_mode = vap->va_mode; |
1899 | namelen = VNAMELEN(dentry); | 1900 | namelen = VNAMELEN(dentry); |
1900 | 1901 | ||
1901 | if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { | 1902 | if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { |
@@ -1973,8 +1974,7 @@ xfs_create( | |||
1973 | (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) | 1974 | (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) |
1974 | goto error_return; | 1975 | goto error_return; |
1975 | rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; | 1976 | rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; |
1976 | error = xfs_dir_ialloc(&tp, dp, | 1977 | error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, |
1977 | MAKEIMODE(vap->va_type,vap->va_mode), 1, | ||
1978 | rdev, credp, prid, resblks > 0, | 1978 | rdev, credp, prid, resblks > 0, |
1979 | &ip, &committed); | 1979 | &ip, &committed); |
1980 | if (error) { | 1980 | if (error) { |
@@ -2620,7 +2620,7 @@ xfs_link( | |||
2620 | vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); | 2620 | vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); |
2621 | 2621 | ||
2622 | target_namelen = VNAMELEN(dentry); | 2622 | target_namelen = VNAMELEN(dentry); |
2623 | if (src_vp->v_type == VDIR) | 2623 | if (VN_ISDIR(src_vp)) |
2624 | return XFS_ERROR(EPERM); | 2624 | return XFS_ERROR(EPERM); |
2625 | 2625 | ||
2626 | src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); | 2626 | src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); |
@@ -2805,7 +2805,7 @@ xfs_mkdir( | |||
2805 | 2805 | ||
2806 | tp = NULL; | 2806 | tp = NULL; |
2807 | dp_joined_to_trans = B_FALSE; | 2807 | dp_joined_to_trans = B_FALSE; |
2808 | dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); | 2808 | dm_di_mode = vap->va_mode; |
2809 | 2809 | ||
2810 | if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { | 2810 | if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { |
2811 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, | 2811 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, |
@@ -2879,8 +2879,7 @@ xfs_mkdir( | |||
2879 | /* | 2879 | /* |
2880 | * create the directory inode. | 2880 | * create the directory inode. |
2881 | */ | 2881 | */ |
2882 | error = xfs_dir_ialloc(&tp, dp, | 2882 | error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2, |
2883 | MAKEIMODE(vap->va_type,vap->va_mode), 2, | ||
2884 | 0, credp, prid, resblks > 0, | 2883 | 0, credp, prid, resblks > 0, |
2885 | &cdp, NULL); | 2884 | &cdp, NULL); |
2886 | if (error) { | 2885 | if (error) { |
@@ -3650,7 +3649,7 @@ xfs_rwlock( | |||
3650 | vnode_t *vp; | 3649 | vnode_t *vp; |
3651 | 3650 | ||
3652 | vp = BHV_TO_VNODE(bdp); | 3651 | vp = BHV_TO_VNODE(bdp); |
3653 | if (vp->v_type == VDIR) | 3652 | if (VN_ISDIR(vp)) |
3654 | return 1; | 3653 | return 1; |
3655 | ip = XFS_BHVTOI(bdp); | 3654 | ip = XFS_BHVTOI(bdp); |
3656 | if (locktype == VRWLOCK_WRITE) { | 3655 | if (locktype == VRWLOCK_WRITE) { |
@@ -3681,7 +3680,7 @@ xfs_rwunlock( | |||
3681 | vnode_t *vp; | 3680 | vnode_t *vp; |
3682 | 3681 | ||
3683 | vp = BHV_TO_VNODE(bdp); | 3682 | vp = BHV_TO_VNODE(bdp); |
3684 | if (vp->v_type == VDIR) | 3683 | if (VN_ISDIR(vp)) |
3685 | return; | 3684 | return; |
3686 | ip = XFS_BHVTOI(bdp); | 3685 | ip = XFS_BHVTOI(bdp); |
3687 | if (locktype == VRWLOCK_WRITE) { | 3686 | if (locktype == VRWLOCK_WRITE) { |
@@ -3847,51 +3846,10 @@ xfs_reclaim( | |||
3847 | return 0; | 3846 | return 0; |
3848 | } | 3847 | } |
3849 | 3848 | ||
3850 | if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { | 3849 | vn_iowait(vp); |
3851 | if (ip->i_d.di_size > 0) { | ||
3852 | /* | ||
3853 | * Flush and invalidate any data left around that is | ||
3854 | * a part of this file. | ||
3855 | * | ||
3856 | * Get the inode's i/o lock so that buffers are pushed | ||
3857 | * out while holding the proper lock. We can't hold | ||
3858 | * the inode lock here since flushing out buffers may | ||
3859 | * cause us to try to get the lock in xfs_strategy(). | ||
3860 | * | ||
3861 | * We don't have to call remapf() here, because there | ||
3862 | * cannot be any mapped file references to this vnode | ||
3863 | * since it is being reclaimed. | ||
3864 | */ | ||
3865 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
3866 | |||
3867 | /* | ||
3868 | * If we hit an IO error, we need to make sure that the | ||
3869 | * buffer and page caches of file data for | ||
3870 | * the file are tossed away. We don't want to use | ||
3871 | * VOP_FLUSHINVAL_PAGES here because we don't want dirty | ||
3872 | * pages to stay attached to the vnode, but be | ||
3873 | * marked P_BAD. pdflush/vnode_pagebad | ||
3874 | * hates that. | ||
3875 | */ | ||
3876 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
3877 | VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE); | ||
3878 | } else { | ||
3879 | VOP_TOSS_PAGES(vp, 0, -1, FI_NONE); | ||
3880 | } | ||
3881 | 3850 | ||
3882 | ASSERT(VN_CACHED(vp) == 0); | 3851 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); |
3883 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || | 3852 | ASSERT(VN_CACHED(vp) == 0); |
3884 | ip->i_delayed_blks == 0); | ||
3885 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
3886 | } else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
3887 | /* | ||
3888 | * di_size field may not be quite accurate if we're | ||
3889 | * shutting down. | ||
3890 | */ | ||
3891 | VOP_TOSS_PAGES(vp, 0, -1, FI_NONE); | ||
3892 | ASSERT(VN_CACHED(vp) == 0); | ||
3893 | } | ||
3894 | } | ||
3895 | 3853 | ||
3896 | /* If we have nothing to flush with this inode then complete the | 3854 | /* If we have nothing to flush with this inode then complete the |
3897 | * teardown now, otherwise break the link between the xfs inode | 3855 | * teardown now, otherwise break the link between the xfs inode |
@@ -4567,7 +4525,7 @@ xfs_change_file_space( | |||
4567 | /* | 4525 | /* |
4568 | * must be a regular file and have write permission | 4526 | * must be a regular file and have write permission |
4569 | */ | 4527 | */ |
4570 | if (vp->v_type != VREG) | 4528 | if (!VN_ISREG(vp)) |
4571 | return XFS_ERROR(EINVAL); | 4529 | return XFS_ERROR(EINVAL); |
4572 | 4530 | ||
4573 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 4531 | xfs_ilock(ip, XFS_ILOCK_SHARED); |