aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/fuse.txt341
-rw-r--r--fs/fuse/Makefile2
-rw-r--r--fs/fuse/dev.c884
-rw-r--r--fs/fuse/fuse_i.h223
-rw-r--r--fs/fuse/inode.c58
-rw-r--r--include/linux/fuse.h36
6 files changed, 1537 insertions, 7 deletions
diff --git a/Documentation/filesystems/fuse.txt b/Documentation/filesystems/fuse.txt
new file mode 100644
index 000000000000..83f96cf56960
--- /dev/null
+++ b/Documentation/filesystems/fuse.txt
@@ -0,0 +1,341 @@
1Definitions
2~~~~~~~~~~~
3
4Userspace filesystem:
5
6 A filesystem in which data and metadata are provided by an ordinary
7 userspace process. The filesystem can be accessed normally through
8 the kernel interface.
9
10Filesystem daemon:
11
12 The process(es) providing the data and metadata of the filesystem.
13
14Non-privileged mount (or user mount):
15
16 A userspace filesystem mounted by a non-privileged (non-root) user.
17 The filesystem daemon is running with the privileges of the mounting
18 user. NOTE: this is not the same as mounts allowed with the "user"
19 option in /etc/fstab, which is not discussed here.
20
21Mount owner:
22
23 The user who does the mounting.
24
25User:
26
27 The user who is performing filesystem operations.
28
29What is FUSE?
30~~~~~~~~~~~~~
31
32FUSE is a userspace filesystem framework. It consists of a kernel
33module (fuse.ko), a userspace library (libfuse.*) and a mount utility
34(fusermount).
35
36One of the most important features of FUSE is allowing secure,
37non-privileged mounts. This opens up new possibilities for the use of
38filesystems. A good example is sshfs: a secure network filesystem
39using the sftp protocol.
40
41The userspace library and utilities are available from the FUSE
42homepage:
43
44 http://fuse.sourceforge.net/
45
46Mount options
47~~~~~~~~~~~~~
48
49'fd=N'
50
51 The file descriptor to use for communication between the userspace
52 filesystem and the kernel. The file descriptor must have been
53 obtained by opening the FUSE device ('/dev/fuse').
54
55'rootmode=M'
56
57 The file mode of the filesystem's root in octal representation.
58
59'user_id=N'
60
61 The numeric user id of the mount owner.
62
63'group_id=N'
64
65 The numeric group id of the mount owner.
66
67'default_permissions'
68
69 By default FUSE doesn't check file access permissions, the
70 filesystem is free to implement it's access policy or leave it to
71 the underlying file access mechanism (e.g. in case of network
72 filesystems). This option enables permission checking, restricting
73 access based on file mode. This is option is usually useful
74 together with the 'allow_other' mount option.
75
76'allow_other'
77
78 This option overrides the security measure restricting file access
79 to the user mounting the filesystem. This option is by default only
80 allowed to root, but this restriction can be removed with a
81 (userspace) configuration option.
82
83'kernel_cache'
84
85 This option disables flushing the cache of the file contents on
86 every open(). This should only be enabled on filesystems, where the
87 file data is never changed externally (not through the mounted FUSE
88 filesystem). Thus it is not suitable for network filesystems and
89 other "intermediate" filesystems.
90
91 NOTE: if this option is not specified (and neither 'direct_io') data
92 is still cached after the open(), so a read() system call will not
93 always initiate a read operation.
94
95'direct_io'
96
97 This option disables the use of page cache (file content cache) in
98 the kernel for this filesystem. This has several affects:
99
100 - Each read() or write() system call will initiate one or more
101 read or write operations, data will not be cached in the
102 kernel.
103
104 - The return value of the read() and write() system calls will
105 correspond to the return values of the read and write
106 operations. This is useful for example if the file size is not
107 known in advance (before reading it).
108
109'max_read=N'
110
111 With this option the maximum size of read operations can be set.
112 The default is infinite. Note that the size of read requests is
113 limited anyway to 32 pages (which is 128kbyte on i386).
114
115How do non-privileged mounts work?
116~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
117
118Since the mount() system call is a privileged operation, a helper
119program (fusermount) is needed, which is installed setuid root.
120
121The implication of providing non-privileged mounts is that the mount
122owner must not be able to use this capability to compromise the
123system. Obvious requirements arising from this are:
124
125 A) mount owner should not be able to get elevated privileges with the
126 help of the mounted filesystem
127
128 B) mount owner should not get illegitimate access to information from
129 other users' and the super user's processes
130
131 C) mount owner should not be able to induce undesired behavior in
132 other users' or the super user's processes
133
134How are requirements fulfilled?
135~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
136
137 A) The mount owner could gain elevated privileges by either:
138
139 1) creating a filesystem containing a device file, then opening
140 this device
141
142 2) creating a filesystem containing a suid or sgid application,
143 then executing this application
144
145 The solution is not to allow opening device files and ignore
146 setuid and setgid bits when executing programs. To ensure this
147 fusermount always adds "nosuid" and "nodev" to the mount options
148 for non-privileged mounts.
149
150 B) If another user is accessing files or directories in the
151 filesystem, the filesystem daemon serving requests can record the
152 exact sequence and timing of operations performed. This
153 information is otherwise inaccessible to the mount owner, so this
154 counts as an information leak.
155
156 The solution to this problem will be presented in point 2) of C).
157
158 C) There are several ways in which the mount owner can induce
159 undesired behavior in other users' processes, such as:
160
161 1) mounting a filesystem over a file or directory which the mount
162 owner could otherwise not be able to modify (or could only
163 make limited modifications).
164
165 This is solved in fusermount, by checking the access
166 permissions on the mountpoint and only allowing the mount if
167 the mount owner can do unlimited modification (has write
168 access to the mountpoint, and mountpoint is not a "sticky"
169 directory)
170
171 2) Even if 1) is solved the mount owner can change the behavior
172 of other users' processes.
173
174 i) It can slow down or indefinitely delay the execution of a
175 filesystem operation creating a DoS against the user or the
176 whole system. For example a suid application locking a
177 system file, and then accessing a file on the mount owner's
178 filesystem could be stopped, and thus causing the system
179 file to be locked forever.
180
181 ii) It can present files or directories of unlimited length, or
182 directory structures of unlimited depth, possibly causing a
183 system process to eat up diskspace, memory or other
184 resources, again causing DoS.
185
186 The solution to this as well as B) is not to allow processes
187 to access the filesystem, which could otherwise not be
188 monitored or manipulated by the mount owner. Since if the
189 mount owner can ptrace a process, it can do all of the above
190 without using a FUSE mount, the same criteria as used in
191 ptrace can be used to check if a process is allowed to access
192 the filesystem or not.
193
194 Note that the ptrace check is not strictly necessary to
195 prevent B/2/i, it is enough to check if mount owner has enough
196 privilege to send signal to the process accessing the
197 filesystem, since SIGSTOP can be used to get a similar effect.
198
199I think these limitations are unacceptable?
200~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
201
202If a sysadmin trusts the users enough, or can ensure through other
203measures, that system processes will never enter non-privileged
204mounts, it can relax the last limitation with a "user_allow_other"
205config option. If this config option is set, the mounting user can
206add the "allow_other" mount option which disables the check for other
207users' processes.
208
209Kernel - userspace interface
210~~~~~~~~~~~~~~~~~~~~~~~~~~~~
211
212The following diagram shows how a filesystem operation (in this
213example unlink) is performed in FUSE.
214
215NOTE: everything in this description is greatly simplified
216
217 | "rm /mnt/fuse/file" | FUSE filesystem daemon
218 | |
219 | | >sys_read()
220 | | >fuse_dev_read()
221 | | >request_wait()
222 | | [sleep on fc->waitq]
223 | |
224 | >sys_unlink() |
225 | >fuse_unlink() |
226 | [get request from |
227 | fc->unused_list] |
228 | >request_send() |
229 | [queue req on fc->pending] |
230 | [wake up fc->waitq] | [woken up]
231 | >request_wait_answer() |
232 | [sleep on req->waitq] |
233 | | <request_wait()
234 | | [remove req from fc->pending]
235 | | [copy req to read buffer]
236 | | [add req to fc->processing]
237 | | <fuse_dev_read()
238 | | <sys_read()
239 | |
240 | | [perform unlink]
241 | |
242 | | >sys_write()
243 | | >fuse_dev_write()
244 | | [look up req in fc->processing]
245 | | [remove from fc->processing]
246 | | [copy write buffer to req]
247 | [woken up] | [wake up req->waitq]
248 | | <fuse_dev_write()
249 | | <sys_write()
250 | <request_wait_answer() |
251 | <request_send() |
252 | [add request to |
253 | fc->unused_list] |
254 | <fuse_unlink() |
255 | <sys_unlink() |
256
257There are a couple of ways in which to deadlock a FUSE filesystem.
258Since we are talking about unprivileged userspace programs,
259something must be done about these.
260
261Scenario 1 - Simple deadlock
262-----------------------------
263
264 | "rm /mnt/fuse/file" | FUSE filesystem daemon
265 | |
266 | >sys_unlink("/mnt/fuse/file") |
267 | [acquire inode semaphore |
268 | for "file"] |
269 | >fuse_unlink() |
270 | [sleep on req->waitq] |
271 | | <sys_read()
272 | | >sys_unlink("/mnt/fuse/file")
273 | | [acquire inode semaphore
274 | | for "file"]
275 | | *DEADLOCK*
276
277The solution for this is to allow requests to be interrupted while
278they are in userspace:
279
280 | [interrupted by signal] |
281 | <fuse_unlink() |
282 | [release semaphore] | [semaphore acquired]
283 | <sys_unlink() |
284 | | >fuse_unlink()
285 | | [queue req on fc->pending]
286 | | [wake up fc->waitq]
287 | | [sleep on req->waitq]
288
289If the filesystem daemon was single threaded, this will stop here,
290since there's no other thread to dequeue and execute the request.
291In this case the solution is to kill the FUSE daemon as well. If
292there are multiple serving threads, you just have to kill them as
293long as any remain.
294
295Moral: a filesystem which deadlocks, can soon find itself dead.
296
297Scenario 2 - Tricky deadlock
298----------------------------
299
300This one needs a carefully crafted filesystem. It's a variation on
301the above, only the call back to the filesystem is not explicit,
302but is caused by a pagefault.
303
304 | Kamikaze filesystem thread 1 | Kamikaze filesystem thread 2
305 | |
306 | [fd = open("/mnt/fuse/file")] | [request served normally]
307 | [mmap fd to 'addr'] |
308 | [close fd] | [FLUSH triggers 'magic' flag]
309 | [read a byte from addr] |
310 | >do_page_fault() |
311 | [find or create page] |
312 | [lock page] |
313 | >fuse_readpage() |
314 | [queue READ request] |
315 | [sleep on req->waitq] |
316 | | [read request to buffer]
317 | | [create reply header before addr]
318 | | >sys_write(addr - headerlength)
319 | | >fuse_dev_write()
320 | | [look up req in fc->processing]
321 | | [remove from fc->processing]
322 | | [copy write buffer to req]
323 | | >do_page_fault()
324 | | [find or create page]
325 | | [lock page]
326 | | * DEADLOCK *
327
328Solution is again to let the the request be interrupted (not
329elaborated further).
330
331An additional problem is that while the write buffer is being
332copied to the request, the request must not be interrupted. This
333is because the destination address of the copy may not be valid
334after the request is interrupted.
335
336This is solved with doing the copy atomically, and allowing
337interruption while the page(s) belonging to the write buffer are
338faulted with get_user_pages(). The 'req->locked' flag indicates
339when the copy is taking place, and interruption is delayed until
340this flag is unset.
341
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 9c3e4cc7b1a6..21021c356481 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_FUSE_FS) += fuse.o 5obj-$(CONFIG_FUSE_FS) += fuse.o
6 6
7fuse-objs := inode.o 7fuse-objs := dev.o inode.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
new file mode 100644
index 000000000000..9aaf10a6588f
--- /dev/null
+++ b/fs/fuse/dev.c
@@ -0,0 +1,884 @@
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/poll.h>
14#include <linux/uio.h>
15#include <linux/miscdevice.h>
16#include <linux/pagemap.h>
17#include <linux/file.h>
18#include <linux/slab.h>
19
20MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21
22static kmem_cache_t *fuse_req_cachep;
23
24static inline struct fuse_conn *fuse_get_conn(struct file *file)
25{
26 struct fuse_conn *fc;
27 spin_lock(&fuse_lock);
28 fc = file->private_data;
29 if (fc && !fc->sb)
30 fc = NULL;
31 spin_unlock(&fuse_lock);
32 return fc;
33}
34
35static inline void fuse_request_init(struct fuse_req *req)
36{
37 memset(req, 0, sizeof(*req));
38 INIT_LIST_HEAD(&req->list);
39 init_waitqueue_head(&req->waitq);
40 atomic_set(&req->count, 1);
41}
42
43struct fuse_req *fuse_request_alloc(void)
44{
45 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
46 if (req)
47 fuse_request_init(req);
48 return req;
49}
50
51void fuse_request_free(struct fuse_req *req)
52{
53 kmem_cache_free(fuse_req_cachep, req);
54}
55
56static inline void block_sigs(sigset_t *oldset)
57{
58 sigset_t mask;
59
60 siginitsetinv(&mask, sigmask(SIGKILL));
61 sigprocmask(SIG_BLOCK, &mask, oldset);
62}
63
64static inline void restore_sigs(sigset_t *oldset)
65{
66 sigprocmask(SIG_SETMASK, oldset, NULL);
67}
68
69void fuse_reset_request(struct fuse_req *req)
70{
71 int preallocated = req->preallocated;
72 BUG_ON(atomic_read(&req->count) != 1);
73 fuse_request_init(req);
74 req->preallocated = preallocated;
75}
76
77static void __fuse_get_request(struct fuse_req *req)
78{
79 atomic_inc(&req->count);
80}
81
82/* Must be called with > 1 refcount */
83static void __fuse_put_request(struct fuse_req *req)
84{
85 BUG_ON(atomic_read(&req->count) < 2);
86 atomic_dec(&req->count);
87}
88
89static struct fuse_req *do_get_request(struct fuse_conn *fc)
90{
91 struct fuse_req *req;
92
93 spin_lock(&fuse_lock);
94 BUG_ON(list_empty(&fc->unused_list));
95 req = list_entry(fc->unused_list.next, struct fuse_req, list);
96 list_del_init(&req->list);
97 spin_unlock(&fuse_lock);
98 fuse_request_init(req);
99 req->preallocated = 1;
100 req->in.h.uid = current->fsuid;
101 req->in.h.gid = current->fsgid;
102 req->in.h.pid = current->pid;
103 return req;
104}
105
106struct fuse_req *fuse_get_request(struct fuse_conn *fc)
107{
108 if (down_interruptible(&fc->outstanding_sem))
109 return NULL;
110 return do_get_request(fc);
111}
112
113/*
114 * Non-interruptible version of the above function is for operations
115 * which can't legally return -ERESTART{SYS,NOINTR}. This can still
116 * return NULL, but only in case the signal is SIGKILL.
117 */
118struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc)
119{
120 int intr;
121 sigset_t oldset;
122
123 block_sigs(&oldset);
124 intr = down_interruptible(&fc->outstanding_sem);
125 restore_sigs(&oldset);
126 return intr ? NULL : do_get_request(fc);
127}
128
129static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
130{
131 spin_lock(&fuse_lock);
132 if (req->preallocated)
133 list_add(&req->list, &fc->unused_list);
134 else
135 fuse_request_free(req);
136
137 /* If we are in debt decrease that first */
138 if (fc->outstanding_debt)
139 fc->outstanding_debt--;
140 else
141 up(&fc->outstanding_sem);
142 spin_unlock(&fuse_lock);
143}
144
145void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
146{
147 if (atomic_dec_and_test(&req->count))
148 fuse_putback_request(fc, req);
149}
150
151/*
152 * This function is called when a request is finished. Either a reply
153 * has arrived or it was interrupted (and not yet sent) or some error
154 * occured during communication with userspace, or the device file was
155 * closed. It decreases the referece count for the request. In case
156 * of a background request the referece to the stored objects are
157 * released. The requester thread is woken up (if still waiting), and
158 * finally the request is either freed or put on the unused_list
159 *
160 * Called with fuse_lock, unlocks it
161 */
162static void request_end(struct fuse_conn *fc, struct fuse_req *req)
163{
164 int putback;
165 req->finished = 1;
166 putback = atomic_dec_and_test(&req->count);
167 spin_unlock(&fuse_lock);
168 if (req->background) {
169 if (req->inode)
170 iput(req->inode);
171 if (req->inode2)
172 iput(req->inode2);
173 if (req->file)
174 fput(req->file);
175 }
176 wake_up(&req->waitq);
177 if (req->in.h.opcode == FUSE_INIT) {
178 int i;
179
180 if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
181 fc->conn_error = 1;
182
183 /* After INIT reply is received other requests can go
184 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
185 up()s on outstanding_sem. The last up() is done in
186 fuse_putback_request() */
187 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
188 up(&fc->outstanding_sem);
189 }
190 if (putback)
191 fuse_putback_request(fc, req);
192}
193
194static void background_request(struct fuse_req *req)
195{
196 /* Need to get hold of the inode(s) and/or file used in the
197 request, so FORGET and RELEASE are not sent too early */
198 req->background = 1;
199 if (req->inode)
200 req->inode = igrab(req->inode);
201 if (req->inode2)
202 req->inode2 = igrab(req->inode2);
203 if (req->file)
204 get_file(req->file);
205}
206
207static int request_wait_answer_nonint(struct fuse_req *req)
208{
209 int err;
210 sigset_t oldset;
211 block_sigs(&oldset);
212 err = wait_event_interruptible(req->waitq, req->finished);
213 restore_sigs(&oldset);
214 return err;
215}
216
217/* Called with fuse_lock held. Releases, and then reacquires it. */
218static void request_wait_answer(struct fuse_req *req, int interruptible)
219{
220 int intr;
221
222 spin_unlock(&fuse_lock);
223 if (interruptible)
224 intr = wait_event_interruptible(req->waitq, req->finished);
225 else
226 intr = request_wait_answer_nonint(req);
227 spin_lock(&fuse_lock);
228 if (intr && interruptible && req->sent) {
229 /* If request is already in userspace, only allow KILL
230 signal to interrupt */
231 spin_unlock(&fuse_lock);
232 intr = request_wait_answer_nonint(req);
233 spin_lock(&fuse_lock);
234 }
235 if (!intr)
236 return;
237
238 if (!interruptible || req->sent)
239 req->out.h.error = -EINTR;
240 else
241 req->out.h.error = -ERESTARTNOINTR;
242
243 req->interrupted = 1;
244 if (req->locked) {
245 /* This is uninterruptible sleep, because data is
246 being copied to/from the buffers of req. During
247 locked state, there mustn't be any filesystem
248 operation (e.g. page fault), since that could lead
249 to deadlock */
250 spin_unlock(&fuse_lock);
251 wait_event(req->waitq, !req->locked);
252 spin_lock(&fuse_lock);
253 }
254 if (!req->sent && !list_empty(&req->list)) {
255 list_del(&req->list);
256 __fuse_put_request(req);
257 } else if (!req->finished && req->sent)
258 background_request(req);
259}
260
261static unsigned len_args(unsigned numargs, struct fuse_arg *args)
262{
263 unsigned nbytes = 0;
264 unsigned i;
265
266 for (i = 0; i < numargs; i++)
267 nbytes += args[i].size;
268
269 return nbytes;
270}
271
272static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
273{
274 fc->reqctr++;
275 /* zero is special */
276 if (fc->reqctr == 0)
277 fc->reqctr = 1;
278 req->in.h.unique = fc->reqctr;
279 req->in.h.len = sizeof(struct fuse_in_header) +
280 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
281 if (!req->preallocated) {
282 /* If request is not preallocated (either FORGET or
283 RELEASE), then still decrease outstanding_sem, so
284 user can't open infinite number of files while not
285 processing the RELEASE requests. However for
286 efficiency do it without blocking, so if down()
287 would block, just increase the debt instead */
288 if (down_trylock(&fc->outstanding_sem))
289 fc->outstanding_debt++;
290 }
291 list_add_tail(&req->list, &fc->pending);
292 wake_up(&fc->waitq);
293}
294
295static void request_send_wait(struct fuse_conn *fc, struct fuse_req *req,
296 int interruptible)
297{
298 req->isreply = 1;
299 spin_lock(&fuse_lock);
300 if (!fc->file)
301 req->out.h.error = -ENOTCONN;
302 else if (fc->conn_error)
303 req->out.h.error = -ECONNREFUSED;
304 else {
305 queue_request(fc, req);
306 /* acquire extra reference, since request is still needed
307 after request_end() */
308 __fuse_get_request(req);
309
310 request_wait_answer(req, interruptible);
311 }
312 spin_unlock(&fuse_lock);
313}
314
315void request_send(struct fuse_conn *fc, struct fuse_req *req)
316{
317 request_send_wait(fc, req, 1);
318}
319
320/*
321 * Non-interruptible version of the above function is for operations
322 * which can't legally return -ERESTART{SYS,NOINTR}. This can still
323 * be interrupted but only with SIGKILL.
324 */
325void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req)
326{
327 request_send_wait(fc, req, 0);
328}
329
330static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
331{
332 spin_lock(&fuse_lock);
333 if (fc->file) {
334 queue_request(fc, req);
335 spin_unlock(&fuse_lock);
336 } else {
337 req->out.h.error = -ENOTCONN;
338 request_end(fc, req);
339 }
340}
341
342void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
343{
344 req->isreply = 0;
345 request_send_nowait(fc, req);
346}
347
348void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
349{
350 req->isreply = 1;
351 background_request(req);
352 request_send_nowait(fc, req);
353}
354
355void fuse_send_init(struct fuse_conn *fc)
356{
357 /* This is called from fuse_read_super() so there's guaranteed
358 to be a request available */
359 struct fuse_req *req = do_get_request(fc);
360 struct fuse_init_in_out *arg = &req->misc.init_in_out;
361 arg->major = FUSE_KERNEL_VERSION;
362 arg->minor = FUSE_KERNEL_MINOR_VERSION;
363 req->in.h.opcode = FUSE_INIT;
364 req->in.numargs = 1;
365 req->in.args[0].size = sizeof(*arg);
366 req->in.args[0].value = arg;
367 req->out.numargs = 1;
368 req->out.args[0].size = sizeof(*arg);
369 req->out.args[0].value = arg;
370 request_send_background(fc, req);
371}
372
373/*
374 * Lock the request. Up to the next unlock_request() there mustn't be
375 * anything that could cause a page-fault. If the request was already
376 * interrupted bail out.
377 */
378static inline int lock_request(struct fuse_req *req)
379{
380 int err = 0;
381 if (req) {
382 spin_lock(&fuse_lock);
383 if (req->interrupted)
384 err = -ENOENT;
385 else
386 req->locked = 1;
387 spin_unlock(&fuse_lock);
388 }
389 return err;
390}
391
392/*
393 * Unlock request. If it was interrupted during being locked, the
394 * requester thread is currently waiting for it to be unlocked, so
395 * wake it up.
396 */
397static inline void unlock_request(struct fuse_req *req)
398{
399 if (req) {
400 spin_lock(&fuse_lock);
401 req->locked = 0;
402 if (req->interrupted)
403 wake_up(&req->waitq);
404 spin_unlock(&fuse_lock);
405 }
406}
407
408struct fuse_copy_state {
409 int write;
410 struct fuse_req *req;
411 const struct iovec *iov;
412 unsigned long nr_segs;
413 unsigned long seglen;
414 unsigned long addr;
415 struct page *pg;
416 void *mapaddr;
417 void *buf;
418 unsigned len;
419};
420
421static void fuse_copy_init(struct fuse_copy_state *cs, int write,
422 struct fuse_req *req, const struct iovec *iov,
423 unsigned long nr_segs)
424{
425 memset(cs, 0, sizeof(*cs));
426 cs->write = write;
427 cs->req = req;
428 cs->iov = iov;
429 cs->nr_segs = nr_segs;
430}
431
432/* Unmap and put previous page of userspace buffer */
433static inline void fuse_copy_finish(struct fuse_copy_state *cs)
434{
435 if (cs->mapaddr) {
436 kunmap_atomic(cs->mapaddr, KM_USER0);
437 if (cs->write) {
438 flush_dcache_page(cs->pg);
439 set_page_dirty_lock(cs->pg);
440 }
441 put_page(cs->pg);
442 cs->mapaddr = NULL;
443 }
444}
445
446/*
447 * Get another pagefull of userspace buffer, and map it to kernel
448 * address space, and lock request
449 */
450static int fuse_copy_fill(struct fuse_copy_state *cs)
451{
452 unsigned long offset;
453 int err;
454
455 unlock_request(cs->req);
456 fuse_copy_finish(cs);
457 if (!cs->seglen) {
458 BUG_ON(!cs->nr_segs);
459 cs->seglen = cs->iov[0].iov_len;
460 cs->addr = (unsigned long) cs->iov[0].iov_base;
461 cs->iov ++;
462 cs->nr_segs --;
463 }
464 down_read(&current->mm->mmap_sem);
465 err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
466 &cs->pg, NULL);
467 up_read(&current->mm->mmap_sem);
468 if (err < 0)
469 return err;
470 BUG_ON(err != 1);
471 offset = cs->addr % PAGE_SIZE;
472 cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
473 cs->buf = cs->mapaddr + offset;
474 cs->len = min(PAGE_SIZE - offset, cs->seglen);
475 cs->seglen -= cs->len;
476 cs->addr += cs->len;
477
478 return lock_request(cs->req);
479}
480
481/* Do as much copy to/from userspace buffer as we can */
482static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
483 unsigned *size)
484{
485 unsigned ncpy = min(*size, cs->len);
486 if (val) {
487 if (cs->write)
488 memcpy(cs->buf, *val, ncpy);
489 else
490 memcpy(*val, cs->buf, ncpy);
491 *val += ncpy;
492 }
493 *size -= ncpy;
494 cs->len -= ncpy;
495 cs->buf += ncpy;
496 return ncpy;
497}
498
499/*
500 * Copy a page in the request to/from the userspace buffer. Must be
501 * done atomically
502 */
503static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
504 unsigned offset, unsigned count, int zeroing)
505{
506 if (page && zeroing && count < PAGE_SIZE) {
507 void *mapaddr = kmap_atomic(page, KM_USER1);
508 memset(mapaddr, 0, PAGE_SIZE);
509 kunmap_atomic(mapaddr, KM_USER1);
510 }
511 while (count) {
512 int err;
513 if (!cs->len && (err = fuse_copy_fill(cs)))
514 return err;
515 if (page) {
516 void *mapaddr = kmap_atomic(page, KM_USER1);
517 void *buf = mapaddr + offset;
518 offset += fuse_copy_do(cs, &buf, &count);
519 kunmap_atomic(mapaddr, KM_USER1);
520 } else
521 offset += fuse_copy_do(cs, NULL, &count);
522 }
523 if (page && !cs->write)
524 flush_dcache_page(page);
525 return 0;
526}
527
528/* Copy pages in the request to/from userspace buffer */
529static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
530 int zeroing)
531{
532 unsigned i;
533 struct fuse_req *req = cs->req;
534 unsigned offset = req->page_offset;
535 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
536
537 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
538 struct page *page = req->pages[i];
539 int err = fuse_copy_page(cs, page, offset, count, zeroing);
540 if (err)
541 return err;
542
543 nbytes -= count;
544 count = min(nbytes, (unsigned) PAGE_SIZE);
545 offset = 0;
546 }
547 return 0;
548}
549
550/* Copy a single argument in the request to/from userspace buffer */
551static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
552{
553 while (size) {
554 int err;
555 if (!cs->len && (err = fuse_copy_fill(cs)))
556 return err;
557 fuse_copy_do(cs, &val, &size);
558 }
559 return 0;
560}
561
562/* Copy request arguments to/from userspace buffer */
563static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
564 unsigned argpages, struct fuse_arg *args,
565 int zeroing)
566{
567 int err = 0;
568 unsigned i;
569
570 for (i = 0; !err && i < numargs; i++) {
571 struct fuse_arg *arg = &args[i];
572 if (i == numargs - 1 && argpages)
573 err = fuse_copy_pages(cs, arg->size, zeroing);
574 else
575 err = fuse_copy_one(cs, arg->value, arg->size);
576 }
577 return err;
578}
579
580/* Wait until a request is available on the pending list */
581static void request_wait(struct fuse_conn *fc)
582{
583 DECLARE_WAITQUEUE(wait, current);
584
585 add_wait_queue_exclusive(&fc->waitq, &wait);
586 while (fc->sb && list_empty(&fc->pending)) {
587 set_current_state(TASK_INTERRUPTIBLE);
588 if (signal_pending(current))
589 break;
590
591 spin_unlock(&fuse_lock);
592 schedule();
593 spin_lock(&fuse_lock);
594 }
595 set_current_state(TASK_RUNNING);
596 remove_wait_queue(&fc->waitq, &wait);
597}
598
599/*
600 * Read a single request into the userspace filesystem's buffer. This
601 * function waits until a request is available, then removes it from
602 * the pending list and copies request data to userspace buffer. If
603 * no reply is needed (FORGET) or request has been interrupted or
604 * there was an error during the copying then it's finished by calling
605 * request_end(). Otherwise add it to the processing list, and set
606 * the 'sent' flag.
607 */
608static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
609 unsigned long nr_segs, loff_t *off)
610{
611 int err;
612 struct fuse_conn *fc;
613 struct fuse_req *req;
614 struct fuse_in *in;
615 struct fuse_copy_state cs;
616 unsigned reqsize;
617
618 spin_lock(&fuse_lock);
619 fc = file->private_data;
620 err = -EPERM;
621 if (!fc)
622 goto err_unlock;
623 request_wait(fc);
624 err = -ENODEV;
625 if (!fc->sb)
626 goto err_unlock;
627 err = -ERESTARTSYS;
628 if (list_empty(&fc->pending))
629 goto err_unlock;
630
631 req = list_entry(fc->pending.next, struct fuse_req, list);
632 list_del_init(&req->list);
633 spin_unlock(&fuse_lock);
634
635 in = &req->in;
636 reqsize = req->in.h.len;
637 fuse_copy_init(&cs, 1, req, iov, nr_segs);
638 err = -EINVAL;
639 if (iov_length(iov, nr_segs) >= reqsize) {
640 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
641 if (!err)
642 err = fuse_copy_args(&cs, in->numargs, in->argpages,
643 (struct fuse_arg *) in->args, 0);
644 }
645 fuse_copy_finish(&cs);
646
647 spin_lock(&fuse_lock);
648 req->locked = 0;
649 if (!err && req->interrupted)
650 err = -ENOENT;
651 if (err) {
652 if (!req->interrupted)
653 req->out.h.error = -EIO;
654 request_end(fc, req);
655 return err;
656 }
657 if (!req->isreply)
658 request_end(fc, req);
659 else {
660 req->sent = 1;
661 list_add_tail(&req->list, &fc->processing);
662 spin_unlock(&fuse_lock);
663 }
664 return reqsize;
665
666 err_unlock:
667 spin_unlock(&fuse_lock);
668 return err;
669}
670
671static ssize_t fuse_dev_read(struct file *file, char __user *buf,
672 size_t nbytes, loff_t *off)
673{
674 struct iovec iov;
675 iov.iov_len = nbytes;
676 iov.iov_base = buf;
677 return fuse_dev_readv(file, &iov, 1, off);
678}
679
680/* Look up request on processing list by unique ID */
681static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
682{
683 struct list_head *entry;
684
685 list_for_each(entry, &fc->processing) {
686 struct fuse_req *req;
687 req = list_entry(entry, struct fuse_req, list);
688 if (req->in.h.unique == unique)
689 return req;
690 }
691 return NULL;
692}
693
694static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
695 unsigned nbytes)
696{
697 unsigned reqsize = sizeof(struct fuse_out_header);
698
699 if (out->h.error)
700 return nbytes != reqsize ? -EINVAL : 0;
701
702 reqsize += len_args(out->numargs, out->args);
703
704 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
705 return -EINVAL;
706 else if (reqsize > nbytes) {
707 struct fuse_arg *lastarg = &out->args[out->numargs-1];
708 unsigned diffsize = reqsize - nbytes;
709 if (diffsize > lastarg->size)
710 return -EINVAL;
711 lastarg->size -= diffsize;
712 }
713 return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
714 out->page_zeroing);
715}
716
717/*
718 * Write a single reply to a request. First the header is copied from
719 * the write buffer. The request is then searched on the processing
720 * list by the unique ID found in the header. If found, then remove
721 * it from the list and copy the rest of the buffer to the request.
722 * The request is finished by calling request_end()
723 */
724static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
725 unsigned long nr_segs, loff_t *off)
726{
727 int err;
728 unsigned nbytes = iov_length(iov, nr_segs);
729 struct fuse_req *req;
730 struct fuse_out_header oh;
731 struct fuse_copy_state cs;
732 struct fuse_conn *fc = fuse_get_conn(file);
733 if (!fc)
734 return -ENODEV;
735
736 fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
737 if (nbytes < sizeof(struct fuse_out_header))
738 return -EINVAL;
739
740 err = fuse_copy_one(&cs, &oh, sizeof(oh));
741 if (err)
742 goto err_finish;
743 err = -EINVAL;
744 if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
745 oh.len != nbytes)
746 goto err_finish;
747
748 spin_lock(&fuse_lock);
749 req = request_find(fc, oh.unique);
750 err = -EINVAL;
751 if (!req)
752 goto err_unlock;
753
754 list_del_init(&req->list);
755 if (req->interrupted) {
756 request_end(fc, req);
757 fuse_copy_finish(&cs);
758 return -ENOENT;
759 }
760 req->out.h = oh;
761 req->locked = 1;
762 cs.req = req;
763 spin_unlock(&fuse_lock);
764
765 err = copy_out_args(&cs, &req->out, nbytes);
766 fuse_copy_finish(&cs);
767
768 spin_lock(&fuse_lock);
769 req->locked = 0;
770 if (!err) {
771 if (req->interrupted)
772 err = -ENOENT;
773 } else if (!req->interrupted)
774 req->out.h.error = -EIO;
775 request_end(fc, req);
776
777 return err ? err : nbytes;
778
779 err_unlock:
780 spin_unlock(&fuse_lock);
781 err_finish:
782 fuse_copy_finish(&cs);
783 return err;
784}
785
786static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
787 size_t nbytes, loff_t *off)
788{
789 struct iovec iov;
790 iov.iov_len = nbytes;
791 iov.iov_base = (char __user *) buf;
792 return fuse_dev_writev(file, &iov, 1, off);
793}
794
795static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
796{
797 struct fuse_conn *fc = fuse_get_conn(file);
798 unsigned mask = POLLOUT | POLLWRNORM;
799
800 if (!fc)
801 return -ENODEV;
802
803 poll_wait(file, &fc->waitq, wait);
804
805 spin_lock(&fuse_lock);
806 if (!list_empty(&fc->pending))
807 mask |= POLLIN | POLLRDNORM;
808 spin_unlock(&fuse_lock);
809
810 return mask;
811}
812
813/* Abort all requests on the given list (pending or processing) */
814static void end_requests(struct fuse_conn *fc, struct list_head *head)
815{
816 while (!list_empty(head)) {
817 struct fuse_req *req;
818 req = list_entry(head->next, struct fuse_req, list);
819 list_del_init(&req->list);
820 req->out.h.error = -ECONNABORTED;
821 request_end(fc, req);
822 spin_lock(&fuse_lock);
823 }
824}
825
826static int fuse_dev_release(struct inode *inode, struct file *file)
827{
828 struct fuse_conn *fc;
829
830 spin_lock(&fuse_lock);
831 fc = file->private_data;
832 if (fc) {
833 fc->file = NULL;
834 end_requests(fc, &fc->pending);
835 end_requests(fc, &fc->processing);
836 fuse_release_conn(fc);
837 }
838 spin_unlock(&fuse_lock);
839 return 0;
840}
841
842struct file_operations fuse_dev_operations = {
843 .owner = THIS_MODULE,
844 .llseek = no_llseek,
845 .read = fuse_dev_read,
846 .readv = fuse_dev_readv,
847 .write = fuse_dev_write,
848 .writev = fuse_dev_writev,
849 .poll = fuse_dev_poll,
850 .release = fuse_dev_release,
851};
852
853static struct miscdevice fuse_miscdevice = {
854 .minor = FUSE_MINOR,
855 .name = "fuse",
856 .fops = &fuse_dev_operations,
857};
858
859int __init fuse_dev_init(void)
860{
861 int err = -ENOMEM;
862 fuse_req_cachep = kmem_cache_create("fuse_request",
863 sizeof(struct fuse_req),
864 0, 0, NULL, NULL);
865 if (!fuse_req_cachep)
866 goto out;
867
868 err = misc_register(&fuse_miscdevice);
869 if (err)
870 goto out_cache_clean;
871
872 return 0;
873
874 out_cache_clean:
875 kmem_cache_destroy(fuse_req_cachep);
876 out:
877 return err;
878}
879
880void fuse_dev_cleanup(void)
881{
882 misc_deregister(&fuse_miscdevice);
883 kmem_cache_destroy(fuse_req_cachep);
884}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index eed6e89ce01f..50ad6a0c39bf 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -15,6 +15,12 @@
15#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
16#include <asm/semaphore.h> 16#include <asm/semaphore.h>
17 17
18/** Max number of pages that can be used in a single read request */
19#define FUSE_MAX_PAGES_PER_REQ 32
20
21/** If more requests are outstanding, then the operation will block */
22#define FUSE_MAX_OUTSTANDING 10
23
18/** FUSE inode */ 24/** FUSE inode */
19struct fuse_inode { 25struct fuse_inode {
20 /** Inode data */ 26 /** Inode data */
@@ -28,6 +34,123 @@ struct fuse_inode {
28 unsigned long i_time; 34 unsigned long i_time;
29}; 35};
30 36
37/** One input argument of a request */
38struct fuse_in_arg {
39 unsigned size;
40 const void *value;
41};
42
43/** The request input */
44struct fuse_in {
45 /** The request header */
46 struct fuse_in_header h;
47
48 /** True if the data for the last argument is in req->pages */
49 unsigned argpages:1;
50
51 /** Number of arguments */
52 unsigned numargs;
53
54 /** Array of arguments */
55 struct fuse_in_arg args[3];
56};
57
58/** One output argument of a request */
59struct fuse_arg {
60 unsigned size;
61 void *value;
62};
63
64/** The request output */
65struct fuse_out {
66 /** Header returned from userspace */
67 struct fuse_out_header h;
68
69 /** Last argument is variable length (can be shorter than
70 arg->size) */
71 unsigned argvar:1;
72
73 /** Last argument is a list of pages to copy data to */
74 unsigned argpages:1;
75
76 /** Zero partially or not copied pages */
77 unsigned page_zeroing:1;
78
79 /** Number or arguments */
80 unsigned numargs;
81
82 /** Array of arguments */
83 struct fuse_arg args[3];
84};
85
86struct fuse_req;
87struct fuse_conn;
88
89/**
90 * A request to the client
91 */
92struct fuse_req {
93 /** This can be on either unused_list, pending or processing
94 lists in fuse_conn */
95 struct list_head list;
96
97 /** refcount */
98 atomic_t count;
99
100 /** True if the request has reply */
101 unsigned isreply:1;
102
103 /** The request is preallocated */
104 unsigned preallocated:1;
105
106 /** The request was interrupted */
107 unsigned interrupted:1;
108
109 /** Request is sent in the background */
110 unsigned background:1;
111
112 /** Data is being copied to/from the request */
113 unsigned locked:1;
114
115 /** Request has been sent to userspace */
116 unsigned sent:1;
117
118 /** The request is finished */
119 unsigned finished:1;
120
121 /** The request input */
122 struct fuse_in in;
123
124 /** The request output */
125 struct fuse_out out;
126
127 /** Used to wake up the task waiting for completion of request*/
128 wait_queue_head_t waitq;
129
130 /** Data for asynchronous requests */
131 union {
132 struct fuse_init_in_out init_in_out;
133 } misc;
134
135 /** page vector */
136 struct page *pages[FUSE_MAX_PAGES_PER_REQ];
137
138 /** number of pages in vector */
139 unsigned num_pages;
140
141 /** offset of data on first page */
142 unsigned page_offset;
143
144 /** Inode used in the request */
145 struct inode *inode;
146
147 /** Second inode used in the request (or NULL) */
148 struct inode *inode2;
149
150 /** File used in the request (or NULL) */
151 struct file *file;
152};
153
31/** 154/**
32 * A Fuse connection. 155 * A Fuse connection.
33 * 156 *
@@ -39,9 +162,37 @@ struct fuse_conn {
39 /** The superblock of the mounted filesystem */ 162 /** The superblock of the mounted filesystem */
40 struct super_block *sb; 163 struct super_block *sb;
41 164
165 /** The opened client device */
166 struct file *file;
167
42 /** The user id for this mount */ 168 /** The user id for this mount */
43 uid_t user_id; 169 uid_t user_id;
44 170
171 /** Readers of the connection are waiting on this */
172 wait_queue_head_t waitq;
173
174 /** The list of pending requests */
175 struct list_head pending;
176
177 /** The list of requests being processed */
178 struct list_head processing;
179
180 /** Controls the maximum number of outstanding requests */
181 struct semaphore outstanding_sem;
182
183 /** This counts the number of outstanding requests if
184 outstanding_sem would go negative */
185 unsigned outstanding_debt;
186
187 /** The list of unused requests */
188 struct list_head unused_list;
189
190 /** The next unique request id */
191 u64 reqctr;
192
193 /** Connection failed (version mismatch) */
194 unsigned conn_error : 1;
195
45 /** Backing dev info */ 196 /** Backing dev info */
46 struct backing_dev_info bdi; 197 struct backing_dev_info bdi;
47}; 198};
@@ -71,13 +222,20 @@ static inline u64 get_node_id(struct inode *inode)
71 return get_fuse_inode(inode)->nodeid; 222 return get_fuse_inode(inode)->nodeid;
72} 223}
73 224
225/** Device operations */
226extern struct file_operations fuse_dev_operations;
227
74/** 228/**
75 * This is the single global spinlock which protects FUSE's structures 229 * This is the single global spinlock which protects FUSE's structures
76 * 230 *
77 * The following data is protected by this lock: 231 * The following data is protected by this lock:
78 * 232 *
233 * - the private_data field of the device file
79 * - the s_fs_info field of the super block 234 * - the s_fs_info field of the super block
235 * - unused_list, pending, processing lists in fuse_conn
236 * - the unique request ID counter reqctr in fuse_conn
80 * - the sb (super_block) field in fuse_conn 237 * - the sb (super_block) field in fuse_conn
238 * - the file (device file) field in fuse_conn
81 */ 239 */
82extern spinlock_t fuse_lock; 240extern spinlock_t fuse_lock;
83 241
@@ -87,3 +245,68 @@ extern spinlock_t fuse_lock;
87 */ 245 */
88void fuse_release_conn(struct fuse_conn *fc); 246void fuse_release_conn(struct fuse_conn *fc);
89 247
248/**
249 * Initialize the client device
250 */
251int fuse_dev_init(void);
252
253/**
254 * Cleanup the client device
255 */
256void fuse_dev_cleanup(void);
257
258/**
259 * Allocate a request
260 */
261struct fuse_req *fuse_request_alloc(void);
262
263/**
264 * Free a request
265 */
266void fuse_request_free(struct fuse_req *req);
267
268/**
269 * Reinitialize a request, the preallocated flag is left unmodified
270 */
271void fuse_reset_request(struct fuse_req *req);
272
273/**
274 * Reserve a preallocated request
275 */
276struct fuse_req *fuse_get_request(struct fuse_conn *fc);
277
278/**
279 * Reserve a preallocated request, only interruptible by SIGKILL
280 */
281struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc);
282
283/**
284 * Decrement reference count of a request. If count goes to zero put
285 * on unused list (preallocated) or free reqest (not preallocated).
286 */
287void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
288
289/**
290 * Send a request (synchronous, interruptible)
291 */
292void request_send(struct fuse_conn *fc, struct fuse_req *req);
293
294/**
295 * Send a request (synchronous, non-interruptible except by SIGKILL)
296 */
297void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req);
298
299/**
300 * Send a request with no reply
301 */
302void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
303
304/**
305 * Send a request in the background
306 */
307void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
308
309/**
310 * Send the INIT message
311 */
312void fuse_send_init(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ea6339c2b6a1..33fad334ba70 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -151,6 +151,8 @@ static void fuse_put_super(struct super_block *sb)
151 mount_count --; 151 mount_count --;
152 fc->sb = NULL; 152 fc->sb = NULL;
153 fc->user_id = 0; 153 fc->user_id = 0;
154 /* Flush all readers on this fs */
155 wake_up_all(&fc->waitq);
154 fuse_release_conn(fc); 156 fuse_release_conn(fc);
155 *get_fuse_conn_super_p(sb) = NULL; 157 *get_fuse_conn_super_p(sb) = NULL;
156 spin_unlock(&fuse_lock); 158 spin_unlock(&fuse_lock);
@@ -229,22 +231,51 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
229 return 0; 231 return 0;
230} 232}
231 233
232void fuse_release_conn(struct fuse_conn *fc) 234static void free_conn(struct fuse_conn *fc)
233{ 235{
236 while (!list_empty(&fc->unused_list)) {
237 struct fuse_req *req;
238 req = list_entry(fc->unused_list.next, struct fuse_req, list);
239 list_del(&req->list);
240 fuse_request_free(req);
241 }
234 kfree(fc); 242 kfree(fc);
235} 243}
236 244
245/* Must be called with the fuse lock held */
246void fuse_release_conn(struct fuse_conn *fc)
247{
248 if (!fc->sb && !fc->file)
249 free_conn(fc);
250}
251
237static struct fuse_conn *new_conn(void) 252static struct fuse_conn *new_conn(void)
238{ 253{
239 struct fuse_conn *fc; 254 struct fuse_conn *fc;
240 255
241 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 256 fc = kmalloc(sizeof(*fc), GFP_KERNEL);
242 if (fc != NULL) { 257 if (fc != NULL) {
258 int i;
243 memset(fc, 0, sizeof(*fc)); 259 memset(fc, 0, sizeof(*fc));
244 fc->sb = NULL; 260 fc->sb = NULL;
261 fc->file = NULL;
245 fc->user_id = 0; 262 fc->user_id = 0;
263 init_waitqueue_head(&fc->waitq);
264 INIT_LIST_HEAD(&fc->pending);
265 INIT_LIST_HEAD(&fc->processing);
266 INIT_LIST_HEAD(&fc->unused_list);
267 sema_init(&fc->outstanding_sem, 0);
268 for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
269 struct fuse_req *req = fuse_request_alloc();
270 if (!req) {
271 free_conn(fc);
272 return NULL;
273 }
274 list_add(&req->list, &fc->unused_list);
275 }
246 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 276 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
247 fc->bdi.unplug_io_fn = default_unplug_io_fn; 277 fc->bdi.unplug_io_fn = default_unplug_io_fn;
278 fc->reqctr = 0;
248 } 279 }
249 return fc; 280 return fc;
250} 281}
@@ -253,11 +284,20 @@ static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
253{ 284{
254 struct fuse_conn *fc; 285 struct fuse_conn *fc;
255 286
287 if (file->f_op != &fuse_dev_operations)
288 return ERR_PTR(-EINVAL);
256 fc = new_conn(); 289 fc = new_conn();
257 if (fc == NULL) 290 if (fc == NULL)
258 return NULL; 291 return ERR_PTR(-ENOMEM);
259 spin_lock(&fuse_lock); 292 spin_lock(&fuse_lock);
260 fc->sb = sb; 293 if (file->private_data) {
294 free_conn(fc);
295 fc = ERR_PTR(-EINVAL);
296 } else {
297 file->private_data = fc;
298 fc->sb = sb;
299 fc->file = file;
300 }
261 spin_unlock(&fuse_lock); 301 spin_unlock(&fuse_lock);
262 return fc; 302 return fc;
263} 303}
@@ -315,8 +355,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
315 355
316 fc = get_conn(file, sb); 356 fc = get_conn(file, sb);
317 fput(file); 357 fput(file);
318 if (fc == NULL) 358 if (IS_ERR(fc))
319 return -EINVAL; 359 return PTR_ERR(fc);
320 360
321 fc->user_id = d.user_id; 361 fc->user_id = d.user_id;
322 362
@@ -336,6 +376,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
336 iput(root); 376 iput(root);
337 goto err; 377 goto err;
338 } 378 }
379 fuse_send_init(fc);
339 return 0; 380 return 0;
340 381
341 err: 382 err:
@@ -411,8 +452,14 @@ static int __init fuse_init(void)
411 if (res) 452 if (res)
412 goto err; 453 goto err;
413 454
455 res = fuse_dev_init();
456 if (res)
457 goto err_fs_cleanup;
458
414 return 0; 459 return 0;
415 460
461 err_fs_cleanup:
462 fuse_fs_cleanup();
416 err: 463 err:
417 return res; 464 return res;
418} 465}
@@ -422,6 +469,7 @@ static void __exit fuse_exit(void)
422 printk(KERN_DEBUG "fuse exit\n"); 469 printk(KERN_DEBUG "fuse exit\n");
423 470
424 fuse_fs_cleanup(); 471 fuse_fs_cleanup();
472 fuse_dev_cleanup();
425} 473}
426 474
427module_init(fuse_init); 475module_init(fuse_init);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 2b1f4ae01e9d..a1aebd7104c4 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -11,7 +11,7 @@
11#include <asm/types.h> 11#include <asm/types.h>
12 12
13/** Version number of this interface */ 13/** Version number of this interface */
14#define FUSE_KERNEL_VERSION 5 14#define FUSE_KERNEL_VERSION 6
15 15
16/** Minor version number of this interface */ 16/** Minor version number of this interface */
17#define FUSE_KERNEL_MINOR_VERSION 1 17#define FUSE_KERNEL_MINOR_VERSION 1
@@ -19,6 +19,12 @@
19/** The node ID of the root inode */ 19/** The node ID of the root inode */
20#define FUSE_ROOT_ID 1 20#define FUSE_ROOT_ID 1
21 21
22/** The major number of the fuse character device */
23#define FUSE_MAJOR 10
24
25/** The minor number of the fuse character device */
26#define FUSE_MINOR 229
27
22struct fuse_attr { 28struct fuse_attr {
23 __u64 ino; 29 __u64 ino;
24 __u64 size; 30 __u64 size;
@@ -36,3 +42,31 @@ struct fuse_attr {
36 __u32 rdev; 42 __u32 rdev;
37}; 43};
38 44
45enum fuse_opcode {
46 FUSE_INIT = 26
47};
48
49/* Conservative buffer size for the client */
50#define FUSE_MAX_IN 8192
51
52struct fuse_init_in_out {
53 __u32 major;
54 __u32 minor;
55};
56
57struct fuse_in_header {
58 __u32 len;
59 __u32 opcode;
60 __u64 unique;
61 __u64 nodeid;
62 __u32 uid;
63 __u32 gid;
64 __u32 pid;
65};
66
67struct fuse_out_header {
68 __u32 len;
69 __s32 error;
70 __u64 unique;
71};
72