aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c13
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c19
-rw-r--r--fs/ext3/resize.c1
-rw-r--r--fs/fuse/dev.c256
-rw-r--r--fs/fuse/dir.c118
-rw-r--r--fs/fuse/file.c56
-rw-r--r--fs/fuse/fuse_i.h61
-rw-r--r--fs/fuse/inode.c135
-rw-r--r--fs/inotify.c2
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfsd/auth.c46
-rw-r--r--fs/nfsd/export.c3
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs4acl.c8
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c150
-rw-r--r--fs/nfsd/nfs4xdr.c62
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c40
-rw-r--r--fs/ocfs2/dlm/userdlm.c74
-rw-r--r--fs/ocfs2/file.c19
-rw-r--r--fs/proc/vmcore.c4
-rw-r--r--fs/select.c30
-rw-r--r--fs/sync.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/xfs_ialloc.c15
-rw-r--r--fs/xfs/xfs_iget.c29
-rw-r--r--fs/xfs/xfs_inode.c27
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_mount.c2
37 files changed, 625 insertions, 609 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index b0a0ae509c00..61c599b4a1e3 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -127,12 +127,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type
127 127
128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { 128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
129 dprintk(DEBUG_ERROR, "problem initiating session\n"); 129 dprintk(DEBUG_ERROR, "problem initiating session\n");
130 kfree(v9ses); 130 sb = ERR_PTR(newfid);
131 return ERR_PTR(newfid); 131 goto out_free_session;
132 } 132 }
133 133
134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses); 134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
135 135 if (IS_ERR(sb))
136 goto out_close_session;
136 v9fs_fill_super(sb, v9ses, flags); 137 v9fs_fill_super(sb, v9ses, flags);
137 138
138 inode = v9fs_get_inode(sb, S_IFDIR | mode); 139 inode = v9fs_get_inode(sb, S_IFDIR | mode);
@@ -185,6 +186,12 @@ static struct super_block *v9fs_get_sb(struct file_system_type
185 186
186 return sb; 187 return sb;
187 188
189out_close_session:
190 v9fs_session_close(v9ses);
191out_free_session:
192 kfree(v9ses);
193 return sb;
194
188put_back_sb: 195put_back_sb:
189 /* deactivate_super calls v9fs_kill_super which will frees the rest */ 196 /* deactivate_super calls v9fs_kill_super which will frees the rest */
190 up_write(&sb->s_umount); 197 up_write(&sb->s_umount);
diff --git a/fs/Kconfig b/fs/Kconfig
index e207be68d4ca..2524629dc835 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -799,6 +799,7 @@ config PROC_KCORE
799config PROC_VMCORE 799config PROC_VMCORE
800 bool "/proc/vmcore support (EXPERIMENTAL)" 800 bool "/proc/vmcore support (EXPERIMENTAL)"
801 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP 801 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP
802 default y
802 help 803 help
803 Exports the dump image of crashed kernel in ELF format. 804 Exports the dump image of crashed kernel in ELF format.
804 805
@@ -861,7 +862,7 @@ config RAMFS
861 862
862config CONFIGFS_FS 863config CONFIGFS_FS
863 tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)" 864 tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
864 depends on EXPERIMENTAL 865 depends on SYSFS && EXPERIMENTAL
865 help 866 help
866 configfs is a ram-based filesystem that provides the converse 867 configfs is a ram-based filesystem that provides the converse
867 of sysfs's functionality. Where sysfs is a filesystem-based 868 of sysfs's functionality. Where sysfs is a filesystem-based
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8ed9b06a9828..5638c8f9362f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -504,7 +504,7 @@ static int populate_groups(struct config_group *group)
504 int ret = 0; 504 int ret = 0;
505 int i; 505 int i;
506 506
507 if (group && group->default_groups) { 507 if (group->default_groups) {
508 /* FYI, we're faking mkdir here 508 /* FYI, we're faking mkdir here
509 * I'm not sure we need this semaphore, as we're called 509 * I'm not sure we need this semaphore, as we're called
510 * from our parent's mkdir. That holds our parent's 510 * from our parent's mkdir. That holds our parent's
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 242fe1a66ce5..1b4491cdd115 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -599,7 +599,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
599 switch (op) { 599 switch (op) {
600 case EPOLL_CTL_ADD: 600 case EPOLL_CTL_ADD:
601 if (!epi) { 601 if (!epi) {
602 epds.events |= POLLERR | POLLHUP | POLLRDHUP; 602 epds.events |= POLLERR | POLLHUP;
603 603
604 error = ep_insert(ep, &epds, tfile, fd); 604 error = ep_insert(ep, &epds, tfile, fd);
605 } else 605 } else
@@ -613,7 +613,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
613 break; 613 break;
614 case EPOLL_CTL_MOD: 614 case EPOLL_CTL_MOD:
615 if (epi) { 615 if (epi) {
616 epds.events |= POLLERR | POLLHUP | POLLRDHUP; 616 epds.events |= POLLERR | POLLHUP;
617 error = ep_modify(ep, epi, &epds); 617 error = ep_modify(ep, epi, &epds);
618 } else 618 } else
619 error = -ENOENT; 619 error = -ENOENT;
diff --git a/fs/exec.c b/fs/exec.c
index 0291a68a3626..3234a0c32d54 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -678,6 +678,18 @@ static int de_thread(struct task_struct *tsk)
678 while (leader->exit_state != EXIT_ZOMBIE) 678 while (leader->exit_state != EXIT_ZOMBIE)
679 yield(); 679 yield();
680 680
681 /*
682 * The only record we have of the real-time age of a
683 * process, regardless of execs it's done, is start_time.
684 * All the past CPU time is accumulated in signal_struct
685 * from sister threads now dead. But in this non-leader
686 * exec, nothing survives from the original leader thread,
687 * whose birth marks the true age of this process now.
688 * When we take on its identity by switching to its PID, we
689 * also take its birthdate (always earlier than our own).
690 */
691 current->start_time = leader->start_time;
692
681 spin_lock(&leader->proc_lock); 693 spin_lock(&leader->proc_lock);
682 spin_lock(&current->proc_lock); 694 spin_lock(&current->proc_lock);
683 proc_dentry1 = proc_pid_unhash(current); 695 proc_dentry1 = proc_pid_unhash(current);
@@ -723,7 +735,12 @@ static int de_thread(struct task_struct *tsk)
723 current->parent = current->real_parent = leader->real_parent; 735 current->parent = current->real_parent = leader->real_parent;
724 leader->parent = leader->real_parent = child_reaper; 736 leader->parent = leader->real_parent = child_reaper;
725 current->group_leader = current; 737 current->group_leader = current;
726 leader->group_leader = leader; 738 leader->group_leader = current;
739
740 /* Reduce leader to a thread */
741 detach_pid(leader, PIDTYPE_PGID);
742 detach_pid(leader, PIDTYPE_SID);
743 list_del_init(&leader->tasks);
727 744
728 add_parent(current); 745 add_parent(current);
729 add_parent(leader); 746 add_parent(leader);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 1041dab6de2f..14f5f6ea3e72 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -974,6 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
975 ext3_warning(sb, __FUNCTION__, 975 ext3_warning(sb, __FUNCTION__,
976 "multiple resizers run on filesystem!"); 976 "multiple resizers run on filesystem!");
977 unlock_super(sb);
977 err = -EBUSY; 978 err = -EBUSY;
978 goto exit_put; 979 goto exit_put;
979 } 980 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 23d1f52eb1b8..6c740f860665 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -23,13 +23,11 @@ static kmem_cache_t *fuse_req_cachep;
23 23
24static struct fuse_conn *fuse_get_conn(struct file *file) 24static struct fuse_conn *fuse_get_conn(struct file *file)
25{ 25{
26 struct fuse_conn *fc; 26 /*
27 spin_lock(&fuse_lock); 27 * Lockless access is OK, because file->private data is set
28 fc = file->private_data; 28 * once during mount and is valid until the file is released.
29 if (fc && !fc->connected) 29 */
30 fc = NULL; 30 return file->private_data;
31 spin_unlock(&fuse_lock);
32 return fc;
33} 31}
34 32
35static void fuse_request_init(struct fuse_req *req) 33static void fuse_request_init(struct fuse_req *req)
@@ -74,10 +72,8 @@ static void restore_sigs(sigset_t *oldset)
74 */ 72 */
75void fuse_reset_request(struct fuse_req *req) 73void fuse_reset_request(struct fuse_req *req)
76{ 74{
77 int preallocated = req->preallocated;
78 BUG_ON(atomic_read(&req->count) != 1); 75 BUG_ON(atomic_read(&req->count) != 1);
79 fuse_request_init(req); 76 fuse_request_init(req);
80 req->preallocated = preallocated;
81} 77}
82 78
83static void __fuse_get_request(struct fuse_req *req) 79static void __fuse_get_request(struct fuse_req *req)
@@ -92,80 +88,52 @@ static void __fuse_put_request(struct fuse_req *req)
92 atomic_dec(&req->count); 88 atomic_dec(&req->count);
93} 89}
94 90
95static struct fuse_req *do_get_request(struct fuse_conn *fc) 91struct fuse_req *fuse_get_req(struct fuse_conn *fc)
96{ 92{
97 struct fuse_req *req; 93 struct fuse_req *req;
98
99 spin_lock(&fuse_lock);
100 BUG_ON(list_empty(&fc->unused_list));
101 req = list_entry(fc->unused_list.next, struct fuse_req, list);
102 list_del_init(&req->list);
103 spin_unlock(&fuse_lock);
104 fuse_request_init(req);
105 req->preallocated = 1;
106 req->in.h.uid = current->fsuid;
107 req->in.h.gid = current->fsgid;
108 req->in.h.pid = current->pid;
109 return req;
110}
111
112/* This can return NULL, but only in case it's interrupted by a SIGKILL */
113struct fuse_req *fuse_get_request(struct fuse_conn *fc)
114{
115 int intr;
116 sigset_t oldset; 94 sigset_t oldset;
95 int err;
117 96
118 atomic_inc(&fc->num_waiting);
119 block_sigs(&oldset); 97 block_sigs(&oldset);
120 intr = down_interruptible(&fc->outstanding_sem); 98 err = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
121 restore_sigs(&oldset); 99 restore_sigs(&oldset);
122 if (intr) { 100 if (err)
123 atomic_dec(&fc->num_waiting); 101 return ERR_PTR(-EINTR);
124 return NULL;
125 }
126 return do_get_request(fc);
127}
128 102
129/* Must be called with fuse_lock held */ 103 req = fuse_request_alloc();
130static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) 104 if (!req)
131{ 105 return ERR_PTR(-ENOMEM);
132 if (req->preallocated) {
133 atomic_dec(&fc->num_waiting);
134 list_add(&req->list, &fc->unused_list);
135 } else
136 fuse_request_free(req);
137 106
138 /* If we are in debt decrease that first */ 107 atomic_inc(&fc->num_waiting);
139 if (fc->outstanding_debt) 108 fuse_request_init(req);
140 fc->outstanding_debt--; 109 req->in.h.uid = current->fsuid;
141 else 110 req->in.h.gid = current->fsgid;
142 up(&fc->outstanding_sem); 111 req->in.h.pid = current->pid;
112 return req;
143} 113}
144 114
145void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 115void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
146{ 116{
147 if (atomic_dec_and_test(&req->count)) { 117 if (atomic_dec_and_test(&req->count)) {
148 spin_lock(&fuse_lock); 118 atomic_dec(&fc->num_waiting);
149 fuse_putback_request(fc, req); 119 fuse_request_free(req);
150 spin_unlock(&fuse_lock);
151 } 120 }
152} 121}
153 122
154static void fuse_put_request_locked(struct fuse_conn *fc, struct fuse_req *req) 123void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
155{
156 if (atomic_dec_and_test(&req->count))
157 fuse_putback_request(fc, req);
158}
159
160void fuse_release_background(struct fuse_req *req)
161{ 124{
162 iput(req->inode); 125 iput(req->inode);
163 iput(req->inode2); 126 iput(req->inode2);
164 if (req->file) 127 if (req->file)
165 fput(req->file); 128 fput(req->file);
166 spin_lock(&fuse_lock); 129 spin_lock(&fc->lock);
167 list_del(&req->bg_entry); 130 list_del(&req->bg_entry);
168 spin_unlock(&fuse_lock); 131 if (fc->num_background == FUSE_MAX_BACKGROUND) {
132 fc->blocked = 0;
133 wake_up_all(&fc->blocked_waitq);
134 }
135 fc->num_background--;
136 spin_unlock(&fc->lock);
169} 137}
170 138
171/* 139/*
@@ -184,23 +152,23 @@ void fuse_release_background(struct fuse_req *req)
184 * interrupted and put in the background, it will return with an error 152 * interrupted and put in the background, it will return with an error
185 * and hence never be reset and reused. 153 * and hence never be reset and reused.
186 * 154 *
187 * Called with fuse_lock, unlocks it 155 * Called with fc->lock, unlocks it
188 */ 156 */
189static void request_end(struct fuse_conn *fc, struct fuse_req *req) 157static void request_end(struct fuse_conn *fc, struct fuse_req *req)
190{ 158{
191 list_del(&req->list); 159 list_del(&req->list);
192 req->state = FUSE_REQ_FINISHED; 160 req->state = FUSE_REQ_FINISHED;
193 if (!req->background) { 161 if (!req->background) {
162 spin_unlock(&fc->lock);
194 wake_up(&req->waitq); 163 wake_up(&req->waitq);
195 fuse_put_request_locked(fc, req); 164 fuse_put_request(fc, req);
196 spin_unlock(&fuse_lock);
197 } else { 165 } else {
198 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 166 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
199 req->end = NULL; 167 req->end = NULL;
200 spin_unlock(&fuse_lock); 168 spin_unlock(&fc->lock);
201 down_read(&fc->sbput_sem); 169 down_read(&fc->sbput_sem);
202 if (fc->mounted) 170 if (fc->mounted)
203 fuse_release_background(req); 171 fuse_release_background(fc, req);
204 up_read(&fc->sbput_sem); 172 up_read(&fc->sbput_sem);
205 if (end) 173 if (end)
206 end(fc, req); 174 end(fc, req);
@@ -242,6 +210,9 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
242{ 210{
243 req->background = 1; 211 req->background = 1;
244 list_add(&req->bg_entry, &fc->background); 212 list_add(&req->bg_entry, &fc->background);
213 fc->num_background++;
214 if (fc->num_background == FUSE_MAX_BACKGROUND)
215 fc->blocked = 1;
245 if (req->inode) 216 if (req->inode)
246 req->inode = igrab(req->inode); 217 req->inode = igrab(req->inode);
247 if (req->inode2) 218 if (req->inode2)
@@ -250,16 +221,16 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
250 get_file(req->file); 221 get_file(req->file);
251} 222}
252 223
253/* Called with fuse_lock held. Releases, and then reacquires it. */ 224/* Called with fc->lock held. Releases, and then reacquires it. */
254static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 225static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
255{ 226{
256 sigset_t oldset; 227 sigset_t oldset;
257 228
258 spin_unlock(&fuse_lock); 229 spin_unlock(&fc->lock);
259 block_sigs(&oldset); 230 block_sigs(&oldset);
260 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); 231 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
261 restore_sigs(&oldset); 232 restore_sigs(&oldset);
262 spin_lock(&fuse_lock); 233 spin_lock(&fc->lock);
263 if (req->state == FUSE_REQ_FINISHED && !req->interrupted) 234 if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
264 return; 235 return;
265 236
@@ -273,9 +244,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
273 locked state, there mustn't be any filesystem 244 locked state, there mustn't be any filesystem
274 operation (e.g. page fault), since that could lead 245 operation (e.g. page fault), since that could lead
275 to deadlock */ 246 to deadlock */
276 spin_unlock(&fuse_lock); 247 spin_unlock(&fc->lock);
277 wait_event(req->waitq, !req->locked); 248 wait_event(req->waitq, !req->locked);
278 spin_lock(&fuse_lock); 249 spin_lock(&fc->lock);
279 } 250 }
280 if (req->state == FUSE_REQ_PENDING) { 251 if (req->state == FUSE_REQ_PENDING) {
281 list_del(&req->list); 252 list_del(&req->list);
@@ -304,19 +275,10 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
304 req->in.h.unique = fc->reqctr; 275 req->in.h.unique = fc->reqctr;
305 req->in.h.len = sizeof(struct fuse_in_header) + 276 req->in.h.len = sizeof(struct fuse_in_header) +
306 len_args(req->in.numargs, (struct fuse_arg *) req->in.args); 277 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
307 if (!req->preallocated) {
308 /* If request is not preallocated (either FORGET or
309 RELEASE), then still decrease outstanding_sem, so
310 user can't open infinite number of files while not
311 processing the RELEASE requests. However for
312 efficiency do it without blocking, so if down()
313 would block, just increase the debt instead */
314 if (down_trylock(&fc->outstanding_sem))
315 fc->outstanding_debt++;
316 }
317 list_add_tail(&req->list, &fc->pending); 278 list_add_tail(&req->list, &fc->pending);
318 req->state = FUSE_REQ_PENDING; 279 req->state = FUSE_REQ_PENDING;
319 wake_up(&fc->waitq); 280 wake_up(&fc->waitq);
281 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
320} 282}
321 283
322/* 284/*
@@ -325,7 +287,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
325void request_send(struct fuse_conn *fc, struct fuse_req *req) 287void request_send(struct fuse_conn *fc, struct fuse_req *req)
326{ 288{
327 req->isreply = 1; 289 req->isreply = 1;
328 spin_lock(&fuse_lock); 290 spin_lock(&fc->lock);
329 if (!fc->connected) 291 if (!fc->connected)
330 req->out.h.error = -ENOTCONN; 292 req->out.h.error = -ENOTCONN;
331 else if (fc->conn_error) 293 else if (fc->conn_error)
@@ -338,15 +300,16 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req)
338 300
339 request_wait_answer(fc, req); 301 request_wait_answer(fc, req);
340 } 302 }
341 spin_unlock(&fuse_lock); 303 spin_unlock(&fc->lock);
342} 304}
343 305
344static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) 306static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
345{ 307{
346 spin_lock(&fuse_lock); 308 spin_lock(&fc->lock);
309 background_request(fc, req);
347 if (fc->connected) { 310 if (fc->connected) {
348 queue_request(fc, req); 311 queue_request(fc, req);
349 spin_unlock(&fuse_lock); 312 spin_unlock(&fc->lock);
350 } else { 313 } else {
351 req->out.h.error = -ENOTCONN; 314 req->out.h.error = -ENOTCONN;
352 request_end(fc, req); 315 request_end(fc, req);
@@ -362,9 +325,6 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
362void request_send_background(struct fuse_conn *fc, struct fuse_req *req) 325void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
363{ 326{
364 req->isreply = 1; 327 req->isreply = 1;
365 spin_lock(&fuse_lock);
366 background_request(fc, req);
367 spin_unlock(&fuse_lock);
368 request_send_nowait(fc, req); 328 request_send_nowait(fc, req);
369} 329}
370 330
@@ -373,16 +333,16 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
373 * anything that could cause a page-fault. If the request was already 333 * anything that could cause a page-fault. If the request was already
374 * interrupted bail out. 334 * interrupted bail out.
375 */ 335 */
376static int lock_request(struct fuse_req *req) 336static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
377{ 337{
378 int err = 0; 338 int err = 0;
379 if (req) { 339 if (req) {
380 spin_lock(&fuse_lock); 340 spin_lock(&fc->lock);
381 if (req->interrupted) 341 if (req->interrupted)
382 err = -ENOENT; 342 err = -ENOENT;
383 else 343 else
384 req->locked = 1; 344 req->locked = 1;
385 spin_unlock(&fuse_lock); 345 spin_unlock(&fc->lock);
386 } 346 }
387 return err; 347 return err;
388} 348}
@@ -392,18 +352,19 @@ static int lock_request(struct fuse_req *req)
392 * requester thread is currently waiting for it to be unlocked, so 352 * requester thread is currently waiting for it to be unlocked, so
393 * wake it up. 353 * wake it up.
394 */ 354 */
395static void unlock_request(struct fuse_req *req) 355static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
396{ 356{
397 if (req) { 357 if (req) {
398 spin_lock(&fuse_lock); 358 spin_lock(&fc->lock);
399 req->locked = 0; 359 req->locked = 0;
400 if (req->interrupted) 360 if (req->interrupted)
401 wake_up(&req->waitq); 361 wake_up(&req->waitq);
402 spin_unlock(&fuse_lock); 362 spin_unlock(&fc->lock);
403 } 363 }
404} 364}
405 365
406struct fuse_copy_state { 366struct fuse_copy_state {
367 struct fuse_conn *fc;
407 int write; 368 int write;
408 struct fuse_req *req; 369 struct fuse_req *req;
409 const struct iovec *iov; 370 const struct iovec *iov;
@@ -416,11 +377,12 @@ struct fuse_copy_state {
416 unsigned len; 377 unsigned len;
417}; 378};
418 379
419static void fuse_copy_init(struct fuse_copy_state *cs, int write, 380static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
420 struct fuse_req *req, const struct iovec *iov, 381 int write, struct fuse_req *req,
421 unsigned long nr_segs) 382 const struct iovec *iov, unsigned long nr_segs)
422{ 383{
423 memset(cs, 0, sizeof(*cs)); 384 memset(cs, 0, sizeof(*cs));
385 cs->fc = fc;
424 cs->write = write; 386 cs->write = write;
425 cs->req = req; 387 cs->req = req;
426 cs->iov = iov; 388 cs->iov = iov;
@@ -450,7 +412,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
450 unsigned long offset; 412 unsigned long offset;
451 int err; 413 int err;
452 414
453 unlock_request(cs->req); 415 unlock_request(cs->fc, cs->req);
454 fuse_copy_finish(cs); 416 fuse_copy_finish(cs);
455 if (!cs->seglen) { 417 if (!cs->seglen) {
456 BUG_ON(!cs->nr_segs); 418 BUG_ON(!cs->nr_segs);
@@ -473,7 +435,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
473 cs->seglen -= cs->len; 435 cs->seglen -= cs->len;
474 cs->addr += cs->len; 436 cs->addr += cs->len;
475 437
476 return lock_request(cs->req); 438 return lock_request(cs->fc, cs->req);
477} 439}
478 440
479/* Do as much copy to/from userspace buffer as we can */ 441/* Do as much copy to/from userspace buffer as we can */
@@ -585,9 +547,9 @@ static void request_wait(struct fuse_conn *fc)
585 if (signal_pending(current)) 547 if (signal_pending(current))
586 break; 548 break;
587 549
588 spin_unlock(&fuse_lock); 550 spin_unlock(&fc->lock);
589 schedule(); 551 schedule();
590 spin_lock(&fuse_lock); 552 spin_lock(&fc->lock);
591 } 553 }
592 set_current_state(TASK_RUNNING); 554 set_current_state(TASK_RUNNING);
593 remove_wait_queue(&fc->waitq, &wait); 555 remove_wait_queue(&fc->waitq, &wait);
@@ -606,18 +568,21 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
606 unsigned long nr_segs, loff_t *off) 568 unsigned long nr_segs, loff_t *off)
607{ 569{
608 int err; 570 int err;
609 struct fuse_conn *fc;
610 struct fuse_req *req; 571 struct fuse_req *req;
611 struct fuse_in *in; 572 struct fuse_in *in;
612 struct fuse_copy_state cs; 573 struct fuse_copy_state cs;
613 unsigned reqsize; 574 unsigned reqsize;
575 struct fuse_conn *fc = fuse_get_conn(file);
576 if (!fc)
577 return -EPERM;
614 578
615 restart: 579 restart:
616 spin_lock(&fuse_lock); 580 spin_lock(&fc->lock);
617 fc = file->private_data; 581 err = -EAGAIN;
618 err = -EPERM; 582 if ((file->f_flags & O_NONBLOCK) && fc->connected &&
619 if (!fc) 583 list_empty(&fc->pending))
620 goto err_unlock; 584 goto err_unlock;
585
621 request_wait(fc); 586 request_wait(fc);
622 err = -ENODEV; 587 err = -ENODEV;
623 if (!fc->connected) 588 if (!fc->connected)
@@ -641,14 +606,14 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
641 request_end(fc, req); 606 request_end(fc, req);
642 goto restart; 607 goto restart;
643 } 608 }
644 spin_unlock(&fuse_lock); 609 spin_unlock(&fc->lock);
645 fuse_copy_init(&cs, 1, req, iov, nr_segs); 610 fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
646 err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); 611 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
647 if (!err) 612 if (!err)
648 err = fuse_copy_args(&cs, in->numargs, in->argpages, 613 err = fuse_copy_args(&cs, in->numargs, in->argpages,
649 (struct fuse_arg *) in->args, 0); 614 (struct fuse_arg *) in->args, 0);
650 fuse_copy_finish(&cs); 615 fuse_copy_finish(&cs);
651 spin_lock(&fuse_lock); 616 spin_lock(&fc->lock);
652 req->locked = 0; 617 req->locked = 0;
653 if (!err && req->interrupted) 618 if (!err && req->interrupted)
654 err = -ENOENT; 619 err = -ENOENT;
@@ -663,12 +628,12 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
663 else { 628 else {
664 req->state = FUSE_REQ_SENT; 629 req->state = FUSE_REQ_SENT;
665 list_move_tail(&req->list, &fc->processing); 630 list_move_tail(&req->list, &fc->processing);
666 spin_unlock(&fuse_lock); 631 spin_unlock(&fc->lock);
667 } 632 }
668 return reqsize; 633 return reqsize;
669 634
670 err_unlock: 635 err_unlock:
671 spin_unlock(&fuse_lock); 636 spin_unlock(&fc->lock);
672 return err; 637 return err;
673} 638}
674 639
@@ -735,9 +700,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
735 struct fuse_copy_state cs; 700 struct fuse_copy_state cs;
736 struct fuse_conn *fc = fuse_get_conn(file); 701 struct fuse_conn *fc = fuse_get_conn(file);
737 if (!fc) 702 if (!fc)
738 return -ENODEV; 703 return -EPERM;
739 704
740 fuse_copy_init(&cs, 0, NULL, iov, nr_segs); 705 fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
741 if (nbytes < sizeof(struct fuse_out_header)) 706 if (nbytes < sizeof(struct fuse_out_header))
742 return -EINVAL; 707 return -EINVAL;
743 708
@@ -749,7 +714,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
749 oh.len != nbytes) 714 oh.len != nbytes)
750 goto err_finish; 715 goto err_finish;
751 716
752 spin_lock(&fuse_lock); 717 spin_lock(&fc->lock);
753 err = -ENOENT; 718 err = -ENOENT;
754 if (!fc->connected) 719 if (!fc->connected)
755 goto err_unlock; 720 goto err_unlock;
@@ -760,9 +725,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
760 goto err_unlock; 725 goto err_unlock;
761 726
762 if (req->interrupted) { 727 if (req->interrupted) {
763 spin_unlock(&fuse_lock); 728 spin_unlock(&fc->lock);
764 fuse_copy_finish(&cs); 729 fuse_copy_finish(&cs);
765 spin_lock(&fuse_lock); 730 spin_lock(&fc->lock);
766 request_end(fc, req); 731 request_end(fc, req);
767 return -ENOENT; 732 return -ENOENT;
768 } 733 }
@@ -770,12 +735,12 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
770 req->out.h = oh; 735 req->out.h = oh;
771 req->locked = 1; 736 req->locked = 1;
772 cs.req = req; 737 cs.req = req;
773 spin_unlock(&fuse_lock); 738 spin_unlock(&fc->lock);
774 739
775 err = copy_out_args(&cs, &req->out, nbytes); 740 err = copy_out_args(&cs, &req->out, nbytes);
776 fuse_copy_finish(&cs); 741 fuse_copy_finish(&cs);
777 742
778 spin_lock(&fuse_lock); 743 spin_lock(&fc->lock);
779 req->locked = 0; 744 req->locked = 0;
780 if (!err) { 745 if (!err) {
781 if (req->interrupted) 746 if (req->interrupted)
@@ -787,7 +752,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
787 return err ? err : nbytes; 752 return err ? err : nbytes;
788 753
789 err_unlock: 754 err_unlock:
790 spin_unlock(&fuse_lock); 755 spin_unlock(&fc->lock);
791 err_finish: 756 err_finish:
792 fuse_copy_finish(&cs); 757 fuse_copy_finish(&cs);
793 return err; 758 return err;
@@ -804,18 +769,19 @@ static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
804 769
805static unsigned fuse_dev_poll(struct file *file, poll_table *wait) 770static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
806{ 771{
807 struct fuse_conn *fc = fuse_get_conn(file);
808 unsigned mask = POLLOUT | POLLWRNORM; 772 unsigned mask = POLLOUT | POLLWRNORM;
809 773 struct fuse_conn *fc = fuse_get_conn(file);
810 if (!fc) 774 if (!fc)
811 return -ENODEV; 775 return POLLERR;
812 776
813 poll_wait(file, &fc->waitq, wait); 777 poll_wait(file, &fc->waitq, wait);
814 778
815 spin_lock(&fuse_lock); 779 spin_lock(&fc->lock);
816 if (!list_empty(&fc->pending)) 780 if (!fc->connected)
817 mask |= POLLIN | POLLRDNORM; 781 mask = POLLERR;
818 spin_unlock(&fuse_lock); 782 else if (!list_empty(&fc->pending))
783 mask |= POLLIN | POLLRDNORM;
784 spin_unlock(&fc->lock);
819 785
820 return mask; 786 return mask;
821} 787}
@@ -823,7 +789,7 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
823/* 789/*
824 * Abort all requests on the given list (pending or processing) 790 * Abort all requests on the given list (pending or processing)
825 * 791 *
826 * This function releases and reacquires fuse_lock 792 * This function releases and reacquires fc->lock
827 */ 793 */
828static void end_requests(struct fuse_conn *fc, struct list_head *head) 794static void end_requests(struct fuse_conn *fc, struct list_head *head)
829{ 795{
@@ -832,7 +798,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
832 req = list_entry(head->next, struct fuse_req, list); 798 req = list_entry(head->next, struct fuse_req, list);
833 req->out.h.error = -ECONNABORTED; 799 req->out.h.error = -ECONNABORTED;
834 request_end(fc, req); 800 request_end(fc, req);
835 spin_lock(&fuse_lock); 801 spin_lock(&fc->lock);
836 } 802 }
837} 803}
838 804
@@ -863,10 +829,10 @@ static void end_io_requests(struct fuse_conn *fc)
863 req->end = NULL; 829 req->end = NULL;
864 /* The end function will consume this reference */ 830 /* The end function will consume this reference */
865 __fuse_get_request(req); 831 __fuse_get_request(req);
866 spin_unlock(&fuse_lock); 832 spin_unlock(&fc->lock);
867 wait_event(req->waitq, !req->locked); 833 wait_event(req->waitq, !req->locked);
868 end(fc, req); 834 end(fc, req);
869 spin_lock(&fuse_lock); 835 spin_lock(&fc->lock);
870 } 836 }
871 } 837 }
872} 838}
@@ -893,35 +859,44 @@ static void end_io_requests(struct fuse_conn *fc)
893 */ 859 */
894void fuse_abort_conn(struct fuse_conn *fc) 860void fuse_abort_conn(struct fuse_conn *fc)
895{ 861{
896 spin_lock(&fuse_lock); 862 spin_lock(&fc->lock);
897 if (fc->connected) { 863 if (fc->connected) {
898 fc->connected = 0; 864 fc->connected = 0;
899 end_io_requests(fc); 865 end_io_requests(fc);
900 end_requests(fc, &fc->pending); 866 end_requests(fc, &fc->pending);
901 end_requests(fc, &fc->processing); 867 end_requests(fc, &fc->processing);
902 wake_up_all(&fc->waitq); 868 wake_up_all(&fc->waitq);
869 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
903 } 870 }
904 spin_unlock(&fuse_lock); 871 spin_unlock(&fc->lock);
905} 872}
906 873
907static int fuse_dev_release(struct inode *inode, struct file *file) 874static int fuse_dev_release(struct inode *inode, struct file *file)
908{ 875{
909 struct fuse_conn *fc; 876 struct fuse_conn *fc = fuse_get_conn(file);
910
911 spin_lock(&fuse_lock);
912 fc = file->private_data;
913 if (fc) { 877 if (fc) {
878 spin_lock(&fc->lock);
914 fc->connected = 0; 879 fc->connected = 0;
915 end_requests(fc, &fc->pending); 880 end_requests(fc, &fc->pending);
916 end_requests(fc, &fc->processing); 881 end_requests(fc, &fc->processing);
917 } 882 spin_unlock(&fc->lock);
918 spin_unlock(&fuse_lock); 883 fasync_helper(-1, file, 0, &fc->fasync);
919 if (fc)
920 kobject_put(&fc->kobj); 884 kobject_put(&fc->kobj);
885 }
921 886
922 return 0; 887 return 0;
923} 888}
924 889
890static int fuse_dev_fasync(int fd, struct file *file, int on)
891{
892 struct fuse_conn *fc = fuse_get_conn(file);
893 if (!fc)
894 return -EPERM;
895
896 /* No locking - fasync_helper does its own locking */
897 return fasync_helper(fd, file, on, &fc->fasync);
898}
899
925const struct file_operations fuse_dev_operations = { 900const struct file_operations fuse_dev_operations = {
926 .owner = THIS_MODULE, 901 .owner = THIS_MODULE,
927 .llseek = no_llseek, 902 .llseek = no_llseek,
@@ -931,6 +906,7 @@ const struct file_operations fuse_dev_operations = {
931 .writev = fuse_dev_writev, 906 .writev = fuse_dev_writev,
932 .poll = fuse_dev_poll, 907 .poll = fuse_dev_poll,
933 .release = fuse_dev_release, 908 .release = fuse_dev_release,
909 .fasync = fuse_dev_fasync,
934}; 910};
935 911
936static struct miscdevice fuse_miscdevice = { 912static struct miscdevice fuse_miscdevice = {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 256355b80256..8d7546e832e8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -117,8 +117,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
117 return 0; 117 return 0;
118 118
119 fc = get_fuse_conn(inode); 119 fc = get_fuse_conn(inode);
120 req = fuse_get_request(fc); 120 req = fuse_get_req(fc);
121 if (!req) 121 if (IS_ERR(req))
122 return 0; 122 return 0;
123 123
124 fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg); 124 fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
@@ -188,9 +188,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
188 if (entry->d_name.len > FUSE_NAME_MAX) 188 if (entry->d_name.len > FUSE_NAME_MAX)
189 return ERR_PTR(-ENAMETOOLONG); 189 return ERR_PTR(-ENAMETOOLONG);
190 190
191 req = fuse_get_request(fc); 191 req = fuse_get_req(fc);
192 if (!req) 192 if (IS_ERR(req))
193 return ERR_PTR(-EINTR); 193 return ERR_PTR(PTR_ERR(req));
194 194
195 fuse_lookup_init(req, dir, entry, &outarg); 195 fuse_lookup_init(req, dir, entry, &outarg);
196 request_send(fc, req); 196 request_send(fc, req);
@@ -244,15 +244,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
244 struct file *file; 244 struct file *file;
245 int flags = nd->intent.open.flags - 1; 245 int flags = nd->intent.open.flags - 1;
246 246
247 err = -ENOSYS;
248 if (fc->no_create) 247 if (fc->no_create)
249 goto out; 248 return -ENOSYS;
250 249
251 err = -EINTR; 250 req = fuse_get_req(fc);
252 req = fuse_get_request(fc); 251 if (IS_ERR(req))
253 if (!req) 252 return PTR_ERR(req);
254 goto out;
255 253
254 err = -ENOMEM;
256 ff = fuse_file_alloc(); 255 ff = fuse_file_alloc();
257 if (!ff) 256 if (!ff)
258 goto out_put_request; 257 goto out_put_request;
@@ -314,7 +313,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
314 fuse_file_free(ff); 313 fuse_file_free(ff);
315 out_put_request: 314 out_put_request:
316 fuse_put_request(fc, req); 315 fuse_put_request(fc, req);
317 out:
318 return err; 316 return err;
319} 317}
320 318
@@ -375,9 +373,9 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
375{ 373{
376 struct fuse_mknod_in inarg; 374 struct fuse_mknod_in inarg;
377 struct fuse_conn *fc = get_fuse_conn(dir); 375 struct fuse_conn *fc = get_fuse_conn(dir);
378 struct fuse_req *req = fuse_get_request(fc); 376 struct fuse_req *req = fuse_get_req(fc);
379 if (!req) 377 if (IS_ERR(req))
380 return -EINTR; 378 return PTR_ERR(req);
381 379
382 memset(&inarg, 0, sizeof(inarg)); 380 memset(&inarg, 0, sizeof(inarg));
383 inarg.mode = mode; 381 inarg.mode = mode;
@@ -407,9 +405,9 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
407{ 405{
408 struct fuse_mkdir_in inarg; 406 struct fuse_mkdir_in inarg;
409 struct fuse_conn *fc = get_fuse_conn(dir); 407 struct fuse_conn *fc = get_fuse_conn(dir);
410 struct fuse_req *req = fuse_get_request(fc); 408 struct fuse_req *req = fuse_get_req(fc);
411 if (!req) 409 if (IS_ERR(req))
412 return -EINTR; 410 return PTR_ERR(req);
413 411
414 memset(&inarg, 0, sizeof(inarg)); 412 memset(&inarg, 0, sizeof(inarg));
415 inarg.mode = mode; 413 inarg.mode = mode;
@@ -427,9 +425,9 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
427{ 425{
428 struct fuse_conn *fc = get_fuse_conn(dir); 426 struct fuse_conn *fc = get_fuse_conn(dir);
429 unsigned len = strlen(link) + 1; 427 unsigned len = strlen(link) + 1;
430 struct fuse_req *req = fuse_get_request(fc); 428 struct fuse_req *req = fuse_get_req(fc);
431 if (!req) 429 if (IS_ERR(req))
432 return -EINTR; 430 return PTR_ERR(req);
433 431
434 req->in.h.opcode = FUSE_SYMLINK; 432 req->in.h.opcode = FUSE_SYMLINK;
435 req->in.numargs = 2; 433 req->in.numargs = 2;
@@ -444,9 +442,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
444{ 442{
445 int err; 443 int err;
446 struct fuse_conn *fc = get_fuse_conn(dir); 444 struct fuse_conn *fc = get_fuse_conn(dir);
447 struct fuse_req *req = fuse_get_request(fc); 445 struct fuse_req *req = fuse_get_req(fc);
448 if (!req) 446 if (IS_ERR(req))
449 return -EINTR; 447 return PTR_ERR(req);
450 448
451 req->in.h.opcode = FUSE_UNLINK; 449 req->in.h.opcode = FUSE_UNLINK;
452 req->in.h.nodeid = get_node_id(dir); 450 req->in.h.nodeid = get_node_id(dir);
@@ -476,9 +474,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
476{ 474{
477 int err; 475 int err;
478 struct fuse_conn *fc = get_fuse_conn(dir); 476 struct fuse_conn *fc = get_fuse_conn(dir);
479 struct fuse_req *req = fuse_get_request(fc); 477 struct fuse_req *req = fuse_get_req(fc);
480 if (!req) 478 if (IS_ERR(req))
481 return -EINTR; 479 return PTR_ERR(req);
482 480
483 req->in.h.opcode = FUSE_RMDIR; 481 req->in.h.opcode = FUSE_RMDIR;
484 req->in.h.nodeid = get_node_id(dir); 482 req->in.h.nodeid = get_node_id(dir);
@@ -504,9 +502,9 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
504 int err; 502 int err;
505 struct fuse_rename_in inarg; 503 struct fuse_rename_in inarg;
506 struct fuse_conn *fc = get_fuse_conn(olddir); 504 struct fuse_conn *fc = get_fuse_conn(olddir);
507 struct fuse_req *req = fuse_get_request(fc); 505 struct fuse_req *req = fuse_get_req(fc);
508 if (!req) 506 if (IS_ERR(req))
509 return -EINTR; 507 return PTR_ERR(req);
510 508
511 memset(&inarg, 0, sizeof(inarg)); 509 memset(&inarg, 0, sizeof(inarg));
512 inarg.newdir = get_node_id(newdir); 510 inarg.newdir = get_node_id(newdir);
@@ -553,9 +551,9 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
553 struct fuse_link_in inarg; 551 struct fuse_link_in inarg;
554 struct inode *inode = entry->d_inode; 552 struct inode *inode = entry->d_inode;
555 struct fuse_conn *fc = get_fuse_conn(inode); 553 struct fuse_conn *fc = get_fuse_conn(inode);
556 struct fuse_req *req = fuse_get_request(fc); 554 struct fuse_req *req = fuse_get_req(fc);
557 if (!req) 555 if (IS_ERR(req))
558 return -EINTR; 556 return PTR_ERR(req);
559 557
560 memset(&inarg, 0, sizeof(inarg)); 558 memset(&inarg, 0, sizeof(inarg));
561 inarg.oldnodeid = get_node_id(inode); 559 inarg.oldnodeid = get_node_id(inode);
@@ -583,9 +581,9 @@ int fuse_do_getattr(struct inode *inode)
583 int err; 581 int err;
584 struct fuse_attr_out arg; 582 struct fuse_attr_out arg;
585 struct fuse_conn *fc = get_fuse_conn(inode); 583 struct fuse_conn *fc = get_fuse_conn(inode);
586 struct fuse_req *req = fuse_get_request(fc); 584 struct fuse_req *req = fuse_get_req(fc);
587 if (!req) 585 if (IS_ERR(req))
588 return -EINTR; 586 return PTR_ERR(req);
589 587
590 req->in.h.opcode = FUSE_GETATTR; 588 req->in.h.opcode = FUSE_GETATTR;
591 req->in.h.nodeid = get_node_id(inode); 589 req->in.h.nodeid = get_node_id(inode);
@@ -673,9 +671,9 @@ static int fuse_access(struct inode *inode, int mask)
673 if (fc->no_access) 671 if (fc->no_access)
674 return 0; 672 return 0;
675 673
676 req = fuse_get_request(fc); 674 req = fuse_get_req(fc);
677 if (!req) 675 if (IS_ERR(req))
678 return -EINTR; 676 return PTR_ERR(req);
679 677
680 memset(&inarg, 0, sizeof(inarg)); 678 memset(&inarg, 0, sizeof(inarg));
681 inarg.mask = mask; 679 inarg.mask = mask;
@@ -780,9 +778,9 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
780 if (is_bad_inode(inode)) 778 if (is_bad_inode(inode))
781 return -EIO; 779 return -EIO;
782 780
783 req = fuse_get_request(fc); 781 req = fuse_get_req(fc);
784 if (!req) 782 if (IS_ERR(req))
785 return -EINTR; 783 return PTR_ERR(req);
786 784
787 page = alloc_page(GFP_KERNEL); 785 page = alloc_page(GFP_KERNEL);
788 if (!page) { 786 if (!page) {
@@ -809,11 +807,11 @@ static char *read_link(struct dentry *dentry)
809{ 807{
810 struct inode *inode = dentry->d_inode; 808 struct inode *inode = dentry->d_inode;
811 struct fuse_conn *fc = get_fuse_conn(inode); 809 struct fuse_conn *fc = get_fuse_conn(inode);
812 struct fuse_req *req = fuse_get_request(fc); 810 struct fuse_req *req = fuse_get_req(fc);
813 char *link; 811 char *link;
814 812
815 if (!req) 813 if (IS_ERR(req))
816 return ERR_PTR(-EINTR); 814 return ERR_PTR(PTR_ERR(req));
817 815
818 link = (char *) __get_free_page(GFP_KERNEL); 816 link = (char *) __get_free_page(GFP_KERNEL);
819 if (!link) { 817 if (!link) {
@@ -933,9 +931,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
933 } 931 }
934 } 932 }
935 933
936 req = fuse_get_request(fc); 934 req = fuse_get_req(fc);
937 if (!req) 935 if (IS_ERR(req))
938 return -EINTR; 936 return PTR_ERR(req);
939 937
940 memset(&inarg, 0, sizeof(inarg)); 938 memset(&inarg, 0, sizeof(inarg));
941 iattr_to_fattr(attr, &inarg); 939 iattr_to_fattr(attr, &inarg);
@@ -995,9 +993,9 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
995 if (fc->no_setxattr) 993 if (fc->no_setxattr)
996 return -EOPNOTSUPP; 994 return -EOPNOTSUPP;
997 995
998 req = fuse_get_request(fc); 996 req = fuse_get_req(fc);
999 if (!req) 997 if (IS_ERR(req))
1000 return -EINTR; 998 return PTR_ERR(req);
1001 999
1002 memset(&inarg, 0, sizeof(inarg)); 1000 memset(&inarg, 0, sizeof(inarg));
1003 inarg.size = size; 1001 inarg.size = size;
@@ -1035,9 +1033,9 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1035 if (fc->no_getxattr) 1033 if (fc->no_getxattr)
1036 return -EOPNOTSUPP; 1034 return -EOPNOTSUPP;
1037 1035
1038 req = fuse_get_request(fc); 1036 req = fuse_get_req(fc);
1039 if (!req) 1037 if (IS_ERR(req))
1040 return -EINTR; 1038 return PTR_ERR(req);
1041 1039
1042 memset(&inarg, 0, sizeof(inarg)); 1040 memset(&inarg, 0, sizeof(inarg));
1043 inarg.size = size; 1041 inarg.size = size;
@@ -1085,9 +1083,9 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1085 if (fc->no_listxattr) 1083 if (fc->no_listxattr)
1086 return -EOPNOTSUPP; 1084 return -EOPNOTSUPP;
1087 1085
1088 req = fuse_get_request(fc); 1086 req = fuse_get_req(fc);
1089 if (!req) 1087 if (IS_ERR(req))
1090 return -EINTR; 1088 return PTR_ERR(req);
1091 1089
1092 memset(&inarg, 0, sizeof(inarg)); 1090 memset(&inarg, 0, sizeof(inarg));
1093 inarg.size = size; 1091 inarg.size = size;
@@ -1131,9 +1129,9 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1131 if (fc->no_removexattr) 1129 if (fc->no_removexattr)
1132 return -EOPNOTSUPP; 1130 return -EOPNOTSUPP;
1133 1131
1134 req = fuse_get_request(fc); 1132 req = fuse_get_req(fc);
1135 if (!req) 1133 if (IS_ERR(req))
1136 return -EINTR; 1134 return PTR_ERR(req);
1137 1135
1138 req->in.h.opcode = FUSE_REMOVEXATTR; 1136 req->in.h.opcode = FUSE_REMOVEXATTR;
1139 req->in.h.nodeid = get_node_id(inode); 1137 req->in.h.nodeid = get_node_id(inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 975f2697e866..e4f041a11bb5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -22,9 +22,9 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
22 struct fuse_req *req; 22 struct fuse_req *req;
23 int err; 23 int err;
24 24
25 req = fuse_get_request(fc); 25 req = fuse_get_req(fc);
26 if (!req) 26 if (IS_ERR(req))
27 return -EINTR; 27 return PTR_ERR(req);
28 28
29 memset(&inarg, 0, sizeof(inarg)); 29 memset(&inarg, 0, sizeof(inarg));
30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -184,9 +184,9 @@ static int fuse_flush(struct file *file)
184 if (fc->no_flush) 184 if (fc->no_flush)
185 return 0; 185 return 0;
186 186
187 req = fuse_get_request(fc); 187 req = fuse_get_req(fc);
188 if (!req) 188 if (IS_ERR(req))
189 return -EINTR; 189 return PTR_ERR(req);
190 190
191 memset(&inarg, 0, sizeof(inarg)); 191 memset(&inarg, 0, sizeof(inarg));
192 inarg.fh = ff->fh; 192 inarg.fh = ff->fh;
@@ -223,9 +223,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
223 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 223 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
224 return 0; 224 return 0;
225 225
226 req = fuse_get_request(fc); 226 req = fuse_get_req(fc);
227 if (!req) 227 if (IS_ERR(req))
228 return -EINTR; 228 return PTR_ERR(req);
229 229
230 memset(&inarg, 0, sizeof(inarg)); 230 memset(&inarg, 0, sizeof(inarg));
231 inarg.fh = ff->fh; 231 inarg.fh = ff->fh;
@@ -297,9 +297,9 @@ static int fuse_readpage(struct file *file, struct page *page)
297 if (is_bad_inode(inode)) 297 if (is_bad_inode(inode))
298 goto out; 298 goto out;
299 299
300 err = -EINTR; 300 req = fuse_get_req(fc);
301 req = fuse_get_request(fc); 301 err = PTR_ERR(req);
302 if (!req) 302 if (IS_ERR(req))
303 goto out; 303 goto out;
304 304
305 req->out.page_zeroing = 1; 305 req->out.page_zeroing = 1;
@@ -368,10 +368,10 @@ static int fuse_readpages_fill(void *_data, struct page *page)
368 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 368 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
369 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 369 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
370 fuse_send_readpages(req, data->file, inode); 370 fuse_send_readpages(req, data->file, inode);
371 data->req = req = fuse_get_request(fc); 371 data->req = req = fuse_get_req(fc);
372 if (!req) { 372 if (IS_ERR(req)) {
373 unlock_page(page); 373 unlock_page(page);
374 return -EINTR; 374 return PTR_ERR(req);
375 } 375 }
376 } 376 }
377 req->pages[req->num_pages] = page; 377 req->pages[req->num_pages] = page;
@@ -392,13 +392,17 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
392 392
393 data.file = file; 393 data.file = file;
394 data.inode = inode; 394 data.inode = inode;
395 data.req = fuse_get_request(fc); 395 data.req = fuse_get_req(fc);
396 if (!data.req) 396 if (IS_ERR(data.req))
397 return -EINTR; 397 return PTR_ERR(data.req);
398 398
399 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); 399 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
400 if (!err) 400 if (!err) {
401 fuse_send_readpages(data.req, file, inode); 401 if (data.req->num_pages)
402 fuse_send_readpages(data.req, file, inode);
403 else
404 fuse_put_request(fc, data.req);
405 }
402 return err; 406 return err;
403} 407}
404 408
@@ -451,9 +455,9 @@ static int fuse_commit_write(struct file *file, struct page *page,
451 if (is_bad_inode(inode)) 455 if (is_bad_inode(inode))
452 return -EIO; 456 return -EIO;
453 457
454 req = fuse_get_request(fc); 458 req = fuse_get_req(fc);
455 if (!req) 459 if (IS_ERR(req))
456 return -EINTR; 460 return PTR_ERR(req);
457 461
458 req->num_pages = 1; 462 req->num_pages = 1;
459 req->pages[0] = page; 463 req->pages[0] = page;
@@ -528,9 +532,9 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
528 if (is_bad_inode(inode)) 532 if (is_bad_inode(inode))
529 return -EIO; 533 return -EIO;
530 534
531 req = fuse_get_request(fc); 535 req = fuse_get_req(fc);
532 if (!req) 536 if (IS_ERR(req))
533 return -EINTR; 537 return PTR_ERR(req);
534 538
535 while (count) { 539 while (count) {
536 size_t nres; 540 size_t nres;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index a16a04fcf41e..19c7185a7546 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -18,8 +18,8 @@
18/** Max number of pages that can be used in a single read request */ 18/** Max number of pages that can be used in a single read request */
19#define FUSE_MAX_PAGES_PER_REQ 32 19#define FUSE_MAX_PAGES_PER_REQ 32
20 20
21/** If more requests are outstanding, then the operation will block */ 21/** Maximum number of outstanding background requests */
22#define FUSE_MAX_OUTSTANDING 10 22#define FUSE_MAX_BACKGROUND 10
23 23
24/** It could be as large as PATH_MAX, but would that have any uses? */ 24/** It could be as large as PATH_MAX, but would that have any uses? */
25#define FUSE_NAME_MAX 1024 25#define FUSE_NAME_MAX 1024
@@ -131,8 +131,8 @@ struct fuse_conn;
131 * A request to the client 131 * A request to the client
132 */ 132 */
133struct fuse_req { 133struct fuse_req {
134 /** This can be on either unused_list, pending processing or 134 /** This can be on either pending processing or io lists in
135 io lists in fuse_conn */ 135 fuse_conn */
136 struct list_head list; 136 struct list_head list;
137 137
138 /** Entry on the background list */ 138 /** Entry on the background list */
@@ -144,15 +144,12 @@ struct fuse_req {
144 /* 144 /*
145 * The following bitfields are either set once before the 145 * The following bitfields are either set once before the
146 * request is queued or setting/clearing them is protected by 146 * request is queued or setting/clearing them is protected by
147 * fuse_lock 147 * fuse_conn->lock
148 */ 148 */
149 149
150 /** True if the request has reply */ 150 /** True if the request has reply */
151 unsigned isreply:1; 151 unsigned isreply:1;
152 152
153 /** The request is preallocated */
154 unsigned preallocated:1;
155
156 /** The request was interrupted */ 153 /** The request was interrupted */
157 unsigned interrupted:1; 154 unsigned interrupted:1;
158 155
@@ -213,6 +210,9 @@ struct fuse_req {
213 * unmounted. 210 * unmounted.
214 */ 211 */
215struct fuse_conn { 212struct fuse_conn {
213 /** Lock protecting accessess to members of this structure */
214 spinlock_t lock;
215
216 /** The user id for this mount */ 216 /** The user id for this mount */
217 uid_t user_id; 217 uid_t user_id;
218 218
@@ -244,19 +244,20 @@ struct fuse_conn {
244 interrupted request) */ 244 interrupted request) */
245 struct list_head background; 245 struct list_head background;
246 246
247 /** Controls the maximum number of outstanding requests */ 247 /** Number of requests currently in the background */
248 struct semaphore outstanding_sem; 248 unsigned num_background;
249
250 /** Flag indicating if connection is blocked. This will be
251 the case before the INIT reply is received, and if there
252 are too many outstading backgrounds requests */
253 int blocked;
249 254
250 /** This counts the number of outstanding requests if 255 /** waitq for blocked connection */
251 outstanding_sem would go negative */ 256 wait_queue_head_t blocked_waitq;
252 unsigned outstanding_debt;
253 257
254 /** RW semaphore for exclusion with fuse_put_super() */ 258 /** RW semaphore for exclusion with fuse_put_super() */
255 struct rw_semaphore sbput_sem; 259 struct rw_semaphore sbput_sem;
256 260
257 /** The list of unused requests */
258 struct list_head unused_list;
259
260 /** The next unique request id */ 261 /** The next unique request id */
261 u64 reqctr; 262 u64 reqctr;
262 263
@@ -318,6 +319,9 @@ struct fuse_conn {
318 319
319 /** kobject */ 320 /** kobject */
320 struct kobject kobj; 321 struct kobject kobj;
322
323 /** O_ASYNC requests */
324 struct fasync_struct *fasync;
321}; 325};
322 326
323static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 327static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -349,21 +353,6 @@ static inline u64 get_node_id(struct inode *inode)
349extern const struct file_operations fuse_dev_operations; 353extern const struct file_operations fuse_dev_operations;
350 354
351/** 355/**
352 * This is the single global spinlock which protects FUSE's structures
353 *
354 * The following data is protected by this lock:
355 *
356 * - the private_data field of the device file
357 * - the s_fs_info field of the super block
358 * - unused_list, pending, processing lists in fuse_conn
359 * - background list in fuse_conn
360 * - the unique request ID counter reqctr in fuse_conn
361 * - the sb (super_block) field in fuse_conn
362 * - the file (device file) field in fuse_conn
363 */
364extern spinlock_t fuse_lock;
365
366/**
367 * Get a filled in inode 356 * Get a filled in inode
368 */ 357 */
369struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 358struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
@@ -461,11 +450,11 @@ void fuse_reset_request(struct fuse_req *req);
461/** 450/**
462 * Reserve a preallocated request 451 * Reserve a preallocated request
463 */ 452 */
464struct fuse_req *fuse_get_request(struct fuse_conn *fc); 453struct fuse_req *fuse_get_req(struct fuse_conn *fc);
465 454
466/** 455/**
467 * Decrement reference count of a request. If count goes to zero put 456 * Decrement reference count of a request. If count goes to zero free
468 * on unused list (preallocated) or free request (not preallocated). 457 * the request.
469 */ 458 */
470void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); 459void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
471 460
@@ -487,7 +476,7 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
487/** 476/**
488 * Release inodes and file associated with background request 477 * Release inodes and file associated with background request
489 */ 478 */
490void fuse_release_background(struct fuse_req *req); 479void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req);
491 480
492/* Abort all requests */ 481/* Abort all requests */
493void fuse_abort_conn(struct fuse_conn *fc); 482void fuse_abort_conn(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 879e6fba9480..fd34037b0588 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -22,7 +22,6 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
22MODULE_DESCRIPTION("Filesystem in Userspace"); 22MODULE_DESCRIPTION("Filesystem in Userspace");
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24 24
25spinlock_t fuse_lock;
26static kmem_cache_t *fuse_inode_cachep; 25static kmem_cache_t *fuse_inode_cachep;
27static struct subsystem connections_subsys; 26static struct subsystem connections_subsys;
28 27
@@ -207,15 +206,17 @@ static void fuse_put_super(struct super_block *sb)
207 206
208 down_write(&fc->sbput_sem); 207 down_write(&fc->sbput_sem);
209 while (!list_empty(&fc->background)) 208 while (!list_empty(&fc->background))
210 fuse_release_background(list_entry(fc->background.next, 209 fuse_release_background(fc,
210 list_entry(fc->background.next,
211 struct fuse_req, bg_entry)); 211 struct fuse_req, bg_entry));
212 212
213 spin_lock(&fuse_lock); 213 spin_lock(&fc->lock);
214 fc->mounted = 0; 214 fc->mounted = 0;
215 fc->connected = 0; 215 fc->connected = 0;
216 spin_unlock(&fuse_lock); 216 spin_unlock(&fc->lock);
217 up_write(&fc->sbput_sem); 217 up_write(&fc->sbput_sem);
218 /* Flush all readers on this fs */ 218 /* Flush all readers on this fs */
219 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
219 wake_up_all(&fc->waitq); 220 wake_up_all(&fc->waitq);
220 kobject_del(&fc->kobj); 221 kobject_del(&fc->kobj);
221 kobject_put(&fc->kobj); 222 kobject_put(&fc->kobj);
@@ -242,9 +243,9 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
242 struct fuse_statfs_out outarg; 243 struct fuse_statfs_out outarg;
243 int err; 244 int err;
244 245
245 req = fuse_get_request(fc); 246 req = fuse_get_req(fc);
246 if (!req) 247 if (IS_ERR(req))
247 return -EINTR; 248 return PTR_ERR(req);
248 249
249 memset(&outarg, 0, sizeof(outarg)); 250 memset(&outarg, 0, sizeof(outarg));
250 req->in.numargs = 0; 251 req->in.numargs = 0;
@@ -369,15 +370,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
369 370
370static void fuse_conn_release(struct kobject *kobj) 371static void fuse_conn_release(struct kobject *kobj)
371{ 372{
372 struct fuse_conn *fc = get_fuse_conn_kobj(kobj); 373 kfree(get_fuse_conn_kobj(kobj));
373
374 while (!list_empty(&fc->unused_list)) {
375 struct fuse_req *req;
376 req = list_entry(fc->unused_list.next, struct fuse_req, list);
377 list_del(&req->list);
378 fuse_request_free(req);
379 }
380 kfree(fc);
381} 374}
382 375
383static struct fuse_conn *new_conn(void) 376static struct fuse_conn *new_conn(void)
@@ -386,64 +379,25 @@ static struct fuse_conn *new_conn(void)
386 379
387 fc = kzalloc(sizeof(*fc), GFP_KERNEL); 380 fc = kzalloc(sizeof(*fc), GFP_KERNEL);
388 if (fc) { 381 if (fc) {
389 int i; 382 spin_lock_init(&fc->lock);
390 init_waitqueue_head(&fc->waitq); 383 init_waitqueue_head(&fc->waitq);
384 init_waitqueue_head(&fc->blocked_waitq);
391 INIT_LIST_HEAD(&fc->pending); 385 INIT_LIST_HEAD(&fc->pending);
392 INIT_LIST_HEAD(&fc->processing); 386 INIT_LIST_HEAD(&fc->processing);
393 INIT_LIST_HEAD(&fc->io); 387 INIT_LIST_HEAD(&fc->io);
394 INIT_LIST_HEAD(&fc->unused_list);
395 INIT_LIST_HEAD(&fc->background); 388 INIT_LIST_HEAD(&fc->background);
396 sema_init(&fc->outstanding_sem, 1); /* One for INIT */
397 init_rwsem(&fc->sbput_sem); 389 init_rwsem(&fc->sbput_sem);
398 kobj_set_kset_s(fc, connections_subsys); 390 kobj_set_kset_s(fc, connections_subsys);
399 kobject_init(&fc->kobj); 391 kobject_init(&fc->kobj);
400 atomic_set(&fc->num_waiting, 0); 392 atomic_set(&fc->num_waiting, 0);
401 for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
402 struct fuse_req *req = fuse_request_alloc();
403 if (!req) {
404 kobject_put(&fc->kobj);
405 return NULL;
406 }
407 list_add(&req->list, &fc->unused_list);
408 }
409 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 393 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
410 fc->bdi.unplug_io_fn = default_unplug_io_fn; 394 fc->bdi.unplug_io_fn = default_unplug_io_fn;
411 fc->reqctr = 0; 395 fc->reqctr = 0;
396 fc->blocked = 1;
412 } 397 }
413 return fc; 398 return fc;
414} 399}
415 400
416static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
417{
418 struct fuse_conn *fc;
419 int err;
420
421 err = -EINVAL;
422 if (file->f_op != &fuse_dev_operations)
423 goto out_err;
424
425 err = -ENOMEM;
426 fc = new_conn();
427 if (!fc)
428 goto out_err;
429
430 spin_lock(&fuse_lock);
431 err = -EINVAL;
432 if (file->private_data)
433 goto out_unlock;
434
435 kobject_get(&fc->kobj);
436 file->private_data = fc;
437 spin_unlock(&fuse_lock);
438 return fc;
439
440 out_unlock:
441 spin_unlock(&fuse_lock);
442 kobject_put(&fc->kobj);
443 out_err:
444 return ERR_PTR(err);
445}
446
447static struct inode *get_root_inode(struct super_block *sb, unsigned mode) 401static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
448{ 402{
449 struct fuse_attr attr; 403 struct fuse_attr attr;
@@ -467,7 +421,6 @@ static struct super_operations fuse_super_operations = {
467 421
468static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 422static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
469{ 423{
470 int i;
471 struct fuse_init_out *arg = &req->misc.init_out; 424 struct fuse_init_out *arg = &req->misc.init_out;
472 425
473 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION) 426 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
@@ -486,22 +439,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
486 fc->minor = arg->minor; 439 fc->minor = arg->minor;
487 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 440 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
488 } 441 }
489
490 /* After INIT reply is received other requests can go
491 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
492 up()s on outstanding_sem. The last up() is done in
493 fuse_putback_request() */
494 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
495 up(&fc->outstanding_sem);
496
497 fuse_put_request(fc, req); 442 fuse_put_request(fc, req);
443 fc->blocked = 0;
444 wake_up_all(&fc->blocked_waitq);
498} 445}
499 446
500static void fuse_send_init(struct fuse_conn *fc) 447static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
501{ 448{
502 /* This is called from fuse_read_super() so there's guaranteed
503 to be exactly one request available */
504 struct fuse_req *req = fuse_get_request(fc);
505 struct fuse_init_in *arg = &req->misc.init_in; 449 struct fuse_init_in *arg = &req->misc.init_in;
506 450
507 arg->major = FUSE_KERNEL_VERSION; 451 arg->major = FUSE_KERNEL_VERSION;
@@ -525,12 +469,9 @@ static void fuse_send_init(struct fuse_conn *fc)
525 469
526static unsigned long long conn_id(void) 470static unsigned long long conn_id(void)
527{ 471{
472 /* BKL is held for ->get_sb() */
528 static unsigned long long ctr = 1; 473 static unsigned long long ctr = 1;
529 unsigned long long val; 474 return ctr++;
530 spin_lock(&fuse_lock);
531 val = ctr++;
532 spin_unlock(&fuse_lock);
533 return val;
534} 475}
535 476
536static int fuse_fill_super(struct super_block *sb, void *data, int silent) 477static int fuse_fill_super(struct super_block *sb, void *data, int silent)
@@ -540,6 +481,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
540 struct fuse_mount_data d; 481 struct fuse_mount_data d;
541 struct file *file; 482 struct file *file;
542 struct dentry *root_dentry; 483 struct dentry *root_dentry;
484 struct fuse_req *init_req;
543 int err; 485 int err;
544 486
545 if (!parse_fuse_opt((char *) data, &d)) 487 if (!parse_fuse_opt((char *) data, &d))
@@ -555,10 +497,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
555 if (!file) 497 if (!file)
556 return -EINVAL; 498 return -EINVAL;
557 499
558 fc = get_conn(file, sb); 500 if (file->f_op != &fuse_dev_operations)
559 fput(file); 501 return -EINVAL;
560 if (IS_ERR(fc)) 502
561 return PTR_ERR(fc); 503 /* Setting file->private_data can't race with other mount()
504 instances, since BKL is held for ->get_sb() */
505 if (file->private_data)
506 return -EINVAL;
507
508 fc = new_conn();
509 if (!fc)
510 return -ENOMEM;
562 511
563 fc->flags = d.flags; 512 fc->flags = d.flags;
564 fc->user_id = d.user_id; 513 fc->user_id = d.user_id;
@@ -579,27 +528,40 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
579 goto err; 528 goto err;
580 } 529 }
581 530
531 init_req = fuse_request_alloc();
532 if (!init_req)
533 goto err_put_root;
534
582 err = kobject_set_name(&fc->kobj, "%llu", conn_id()); 535 err = kobject_set_name(&fc->kobj, "%llu", conn_id());
583 if (err) 536 if (err)
584 goto err_put_root; 537 goto err_free_req;
585 538
586 err = kobject_add(&fc->kobj); 539 err = kobject_add(&fc->kobj);
587 if (err) 540 if (err)
588 goto err_put_root; 541 goto err_free_req;
589 542
590 sb->s_root = root_dentry; 543 sb->s_root = root_dentry;
591 spin_lock(&fuse_lock);
592 fc->mounted = 1; 544 fc->mounted = 1;
593 fc->connected = 1; 545 fc->connected = 1;
594 spin_unlock(&fuse_lock); 546 kobject_get(&fc->kobj);
547 file->private_data = fc;
548 /*
549 * atomic_dec_and_test() in fput() provides the necessary
550 * memory barrier for file->private_data to be visible on all
551 * CPUs after this
552 */
553 fput(file);
595 554
596 fuse_send_init(fc); 555 fuse_send_init(fc, init_req);
597 556
598 return 0; 557 return 0;
599 558
559 err_free_req:
560 fuse_request_free(init_req);
600 err_put_root: 561 err_put_root:
601 dput(root_dentry); 562 dput(root_dentry);
602 err: 563 err:
564 fput(file);
603 kobject_put(&fc->kobj); 565 kobject_put(&fc->kobj);
604 return err; 566 return err;
605} 567}
@@ -753,7 +715,6 @@ static int __init fuse_init(void)
753 printk("fuse init (API version %i.%i)\n", 715 printk("fuse init (API version %i.%i)\n",
754 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 716 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
755 717
756 spin_lock_init(&fuse_lock);
757 res = fuse_fs_init(); 718 res = fuse_fs_init();
758 if (res) 719 if (res)
759 goto err; 720 goto err;
diff --git a/fs/inotify.c b/fs/inotify.c
index 367c487c014b..1f50302849c5 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -538,7 +538,7 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
538 WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); 538 WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
539 spin_lock(&entry->d_lock); 539 spin_lock(&entry->d_lock);
540 parent = entry->d_parent; 540 parent = entry->d_parent;
541 if (inotify_inode_watched(parent->d_inode)) 541 if (parent->d_inode && inotify_inode_watched(parent->d_inode))
542 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; 542 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
543 spin_unlock(&entry->d_lock); 543 spin_unlock(&entry->d_lock);
544} 544}
diff --git a/fs/namespace.c b/fs/namespace.c
index bf478addb852..2c5f1f80bdc2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -899,11 +899,13 @@ static int do_change_type(struct nameidata *nd, int flag)
899/* 899/*
900 * do loopback mount. 900 * do loopback mount.
901 */ 901 */
902static int do_loopback(struct nameidata *nd, char *old_name, int recurse) 902static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags)
903{ 903{
904 struct nameidata old_nd; 904 struct nameidata old_nd;
905 struct vfsmount *mnt = NULL; 905 struct vfsmount *mnt = NULL;
906 int recurse = flags & MS_REC;
906 int err = mount_is_safe(nd); 907 int err = mount_is_safe(nd);
908
907 if (err) 909 if (err)
908 return err; 910 return err;
909 if (!old_name || !*old_name) 911 if (!old_name || !*old_name)
@@ -937,6 +939,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
937 spin_unlock(&vfsmount_lock); 939 spin_unlock(&vfsmount_lock);
938 release_mounts(&umount_list); 940 release_mounts(&umount_list);
939 } 941 }
942 mnt->mnt_flags = mnt_flags;
940 943
941out: 944out:
942 up_write(&namespace_sem); 945 up_write(&namespace_sem);
@@ -1350,7 +1353,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1350 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, 1353 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
1351 data_page); 1354 data_page);
1352 else if (flags & MS_BIND) 1355 else if (flags & MS_BIND)
1353 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1356 retval = do_loopback(&nd, dev_name, flags, mnt_flags);
1354 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 1357 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1355 retval = do_change_type(&nd, flags); 1358 retval = do_change_type(&nd, flags);
1356 else if (flags & MS_MOVE) 1359 else if (flags & MS_MOVE)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index cfe9ce881613..6e92b0fe5323 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -14,46 +14,46 @@
14 14
15int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) 15int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
16{ 16{
17 struct svc_cred *cred = &rqstp->rq_cred; 17 struct svc_cred cred = rqstp->rq_cred;
18 int i; 18 int i;
19 int ret; 19 int ret;
20 20
21 if (exp->ex_flags & NFSEXP_ALLSQUASH) { 21 if (exp->ex_flags & NFSEXP_ALLSQUASH) {
22 cred->cr_uid = exp->ex_anon_uid; 22 cred.cr_uid = exp->ex_anon_uid;
23 cred->cr_gid = exp->ex_anon_gid; 23 cred.cr_gid = exp->ex_anon_gid;
24 put_group_info(cred->cr_group_info); 24 cred.cr_group_info = groups_alloc(0);
25 cred->cr_group_info = groups_alloc(0);
26 } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) { 25 } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) {
27 struct group_info *gi; 26 struct group_info *gi;
28 if (!cred->cr_uid) 27 if (!cred.cr_uid)
29 cred->cr_uid = exp->ex_anon_uid; 28 cred.cr_uid = exp->ex_anon_uid;
30 if (!cred->cr_gid) 29 if (!cred.cr_gid)
31 cred->cr_gid = exp->ex_anon_gid; 30 cred.cr_gid = exp->ex_anon_gid;
32 gi = groups_alloc(cred->cr_group_info->ngroups); 31 gi = groups_alloc(cred.cr_group_info->ngroups);
33 if (gi) 32 if (gi)
34 for (i = 0; i < cred->cr_group_info->ngroups; i++) { 33 for (i = 0; i < cred.cr_group_info->ngroups; i++) {
35 if (!GROUP_AT(cred->cr_group_info, i)) 34 if (!GROUP_AT(cred.cr_group_info, i))
36 GROUP_AT(gi, i) = exp->ex_anon_gid; 35 GROUP_AT(gi, i) = exp->ex_anon_gid;
37 else 36 else
38 GROUP_AT(gi, i) = GROUP_AT(cred->cr_group_info, i); 37 GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i);
39 } 38 }
40 put_group_info(cred->cr_group_info); 39 cred.cr_group_info = gi;
41 cred->cr_group_info = gi; 40 } else
42 } 41 get_group_info(cred.cr_group_info);
43 42
44 if (cred->cr_uid != (uid_t) -1) 43 if (cred.cr_uid != (uid_t) -1)
45 current->fsuid = cred->cr_uid; 44 current->fsuid = cred.cr_uid;
46 else 45 else
47 current->fsuid = exp->ex_anon_uid; 46 current->fsuid = exp->ex_anon_uid;
48 if (cred->cr_gid != (gid_t) -1) 47 if (cred.cr_gid != (gid_t) -1)
49 current->fsgid = cred->cr_gid; 48 current->fsgid = cred.cr_gid;
50 else 49 else
51 current->fsgid = exp->ex_anon_gid; 50 current->fsgid = exp->ex_anon_gid;
52 51
53 if (!cred->cr_group_info) 52 if (!cred.cr_group_info)
54 return -ENOMEM; 53 return -ENOMEM;
55 ret = set_current_groups(cred->cr_group_info); 54 ret = set_current_groups(cred.cr_group_info);
56 if ((cred->cr_uid)) { 55 put_group_info(cred.cr_group_info);
56 if ((cred.cr_uid)) {
57 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; 57 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
58 } else { 58 } else {
59 cap_t(current->cap_effective) |= (CAP_NFSD_MASK & 59 cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c340be0a3f59..4e0578121d9a 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -422,7 +422,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
422 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) 422 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
423 goto out; 423 goto out;
424 err = path_lookup(buf, 0, &nd); 424 err = path_lookup(buf, 0, &nd);
425 if (err) goto out; 425 if (err) goto out_no_path;
426 426
427 exp.h.flags = 0; 427 exp.h.flags = 0;
428 exp.ex_client = dom; 428 exp.ex_client = dom;
@@ -475,6 +475,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
475 out: 475 out:
476 if (nd.dentry) 476 if (nd.dentry)
477 path_release(&nd); 477 path_release(&nd);
478 out_no_path:
478 if (dom) 479 if (dom)
479 auth_domain_put(dom); 480 auth_domain_put(dom);
480 kfree(buf); 481 kfree(buf);
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 6d2dfed1de08..f61142afea44 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -682,7 +682,7 @@ static struct svc_procedure nfsd_procedures3[22] = {
682 PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT), 682 PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
683 PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1), 683 PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1),
684 PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4), 684 PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
685 PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE), 685 PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4),
686 PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4), 686 PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4),
687 PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 687 PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
688 PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 688 PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 7391f4aabedb..edb107e61b91 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -710,9 +710,9 @@ calculate_posix_ace_count(struct nfs4_acl *n4acl)
710 /* Also, the remaining entries are for named users and 710 /* Also, the remaining entries are for named users and
711 * groups, and come in threes (mask, allow, deny): */ 711 * groups, and come in threes (mask, allow, deny): */
712 if (n4acl->naces < 7) 712 if (n4acl->naces < 7)
713 return -1; 713 return -EINVAL;
714 if ((n4acl->naces - 7) % 3) 714 if ((n4acl->naces - 7) % 3)
715 return -1; 715 return -EINVAL;
716 return 4 + (n4acl->naces - 7)/3; 716 return 4 + (n4acl->naces - 7)/3;
717 } 717 }
718} 718}
@@ -790,7 +790,7 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
790 continue; 790 continue;
791 791
792 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, 792 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
793 ace->access_mask, ace->whotype, ace->who) == -1; 793 ace->access_mask, ace->whotype, ace->who);
794 if (error < 0) 794 if (error < 0)
795 goto out; 795 goto out;
796 796
@@ -866,7 +866,7 @@ nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
866 struct nfs4_ace *ace; 866 struct nfs4_ace *ace;
867 867
868 if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL) 868 if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL)
869 return -1; 869 return -ENOMEM;
870 870
871 ace->type = type; 871 ace->type = type;
872 ace->flag = flag; 872 ace->flag = flag;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c872bd07fc10..dbaf3f93f328 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -441,8 +441,9 @@ nfsd4_probe_callback(struct nfs4_client *clp)
441 goto out_clnt; 441 goto out_clnt;
442 } 442 }
443 443
444 /* the task holds a reference to the nfs4_client struct */
445 cb->cb_client = clnt; 444 cb->cb_client = clnt;
445
446 /* the task holds a reference to the nfs4_client struct */
446 atomic_inc(&clp->cl_count); 447 atomic_inc(&clp->cl_count);
447 448
448 msg.rpc_cred = nfsd4_lookupcred(clp,0); 449 msg.rpc_cred = nfsd4_lookupcred(clp,0);
@@ -460,13 +461,12 @@ nfsd4_probe_callback(struct nfs4_client *clp)
460out_rpciod: 461out_rpciod:
461 atomic_dec(&clp->cl_count); 462 atomic_dec(&clp->cl_count);
462 rpciod_down(); 463 rpciod_down();
464 cb->cb_client = NULL;
463out_clnt: 465out_clnt:
464 rpc_shutdown_client(clnt); 466 rpc_shutdown_client(clnt);
465 goto out_err;
466out_err: 467out_err:
467 dprintk("NFSD: warning: no callback path to client %.*s\n", 468 dprintk("NFSD: warning: no callback path to client %.*s\n",
468 (int)clp->cl_name.len, clp->cl_name.data); 469 (int)clp->cl_name.len, clp->cl_name.data);
469 cb->cb_client = NULL;
470} 470}
471 471
472static void 472static void
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6d63f1d9e5f5..b0e095ea0c03 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -288,8 +288,6 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
288 fh_put(current_fh); 288 fh_put(current_fh);
289 status = exp_pseudoroot(rqstp->rq_client, current_fh, 289 status = exp_pseudoroot(rqstp->rq_client, current_fh,
290 &rqstp->rq_chandle); 290 &rqstp->rq_chandle);
291 if (!status)
292 status = nfserrno(nfsd_setuser(rqstp, current_fh->fh_export));
293 return status; 291 return status;
294} 292}
295 293
@@ -975,7 +973,7 @@ struct nfsd4_voidargs { int dummy; };
975 */ 973 */
976static struct svc_procedure nfsd_procedures4[2] = { 974static struct svc_procedure nfsd_procedures4[2] = {
977 PROC(null, void, void, void, RC_NOCACHE, 1), 975 PROC(null, void, void, void, RC_NOCACHE, 1),
978 PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE) 976 PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE/4)
979}; 977};
980 978
981struct svc_version nfsd_version4 = { 979struct svc_version nfsd_version4 = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47ec112b266c..96c7578cbe1e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -147,6 +147,42 @@ get_nfs4_file(struct nfs4_file *fi)
147 kref_get(&fi->fi_ref); 147 kref_get(&fi->fi_ref);
148} 148}
149 149
150static int num_delegations;
151
152/*
153 * Open owner state (share locks)
154 */
155
156/* hash tables for nfs4_stateowner */
157#define OWNER_HASH_BITS 8
158#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
159#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
160
161#define ownerid_hashval(id) \
162 ((id) & OWNER_HASH_MASK)
163#define ownerstr_hashval(clientid, ownername) \
164 (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
165
166static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
167static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
168
169/* hash table for nfs4_file */
170#define FILE_HASH_BITS 8
171#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
172#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
173/* hash table for (open)nfs4_stateid */
174#define STATEID_HASH_BITS 10
175#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
176#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
177
178#define file_hashval(x) \
179 hash_ptr(x, FILE_HASH_BITS)
180#define stateid_hashval(owner_id, file_id) \
181 (((owner_id) + (file_id)) & STATEID_HASH_MASK)
182
183static struct list_head file_hashtbl[FILE_HASH_SIZE];
184static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
185
150static struct nfs4_delegation * 186static struct nfs4_delegation *
151alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) 187alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
152{ 188{
@@ -155,9 +191,12 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
155 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; 191 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
156 192
157 dprintk("NFSD alloc_init_deleg\n"); 193 dprintk("NFSD alloc_init_deleg\n");
194 if (num_delegations > STATEID_HASH_SIZE * 4)
195 return NULL;
158 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); 196 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
159 if (dp == NULL) 197 if (dp == NULL)
160 return dp; 198 return dp;
199 num_delegations++;
161 INIT_LIST_HEAD(&dp->dl_perfile); 200 INIT_LIST_HEAD(&dp->dl_perfile);
162 INIT_LIST_HEAD(&dp->dl_perclnt); 201 INIT_LIST_HEAD(&dp->dl_perclnt);
163 INIT_LIST_HEAD(&dp->dl_recall_lru); 202 INIT_LIST_HEAD(&dp->dl_recall_lru);
@@ -192,6 +231,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
192 dprintk("NFSD: freeing dp %p\n",dp); 231 dprintk("NFSD: freeing dp %p\n",dp);
193 put_nfs4_file(dp->dl_file); 232 put_nfs4_file(dp->dl_file);
194 kmem_cache_free(deleg_slab, dp); 233 kmem_cache_free(deleg_slab, dp);
234 num_delegations--;
195 } 235 }
196} 236}
197 237
@@ -330,22 +370,29 @@ put_nfs4_client(struct nfs4_client *clp)
330} 370}
331 371
332static void 372static void
373shutdown_callback_client(struct nfs4_client *clp)
374{
375 struct rpc_clnt *clnt = clp->cl_callback.cb_client;
376
377 /* shutdown rpc client, ending any outstanding recall rpcs */
378 if (clnt) {
379 clp->cl_callback.cb_client = NULL;
380 rpc_shutdown_client(clnt);
381 rpciod_down();
382 }
383}
384
385static void
333expire_client(struct nfs4_client *clp) 386expire_client(struct nfs4_client *clp)
334{ 387{
335 struct nfs4_stateowner *sop; 388 struct nfs4_stateowner *sop;
336 struct nfs4_delegation *dp; 389 struct nfs4_delegation *dp;
337 struct nfs4_callback *cb = &clp->cl_callback;
338 struct rpc_clnt *clnt = clp->cl_callback.cb_client;
339 struct list_head reaplist; 390 struct list_head reaplist;
340 391
341 dprintk("NFSD: expire_client cl_count %d\n", 392 dprintk("NFSD: expire_client cl_count %d\n",
342 atomic_read(&clp->cl_count)); 393 atomic_read(&clp->cl_count));
343 394
344 /* shutdown rpc client, ending any outstanding recall rpcs */ 395 shutdown_callback_client(clp);
345 if (atomic_read(&cb->cb_set) == 1 && clnt) {
346 rpc_shutdown_client(clnt);
347 clnt = clp->cl_callback.cb_client = NULL;
348 }
349 396
350 INIT_LIST_HEAD(&reaplist); 397 INIT_LIST_HEAD(&reaplist);
351 spin_lock(&recall_lock); 398 spin_lock(&recall_lock);
@@ -936,40 +983,6 @@ out:
936 return status; 983 return status;
937} 984}
938 985
939/*
940 * Open owner state (share locks)
941 */
942
943/* hash tables for nfs4_stateowner */
944#define OWNER_HASH_BITS 8
945#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
946#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
947
948#define ownerid_hashval(id) \
949 ((id) & OWNER_HASH_MASK)
950#define ownerstr_hashval(clientid, ownername) \
951 (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
952
953static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
954static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
955
956/* hash table for nfs4_file */
957#define FILE_HASH_BITS 8
958#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
959#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
960/* hash table for (open)nfs4_stateid */
961#define STATEID_HASH_BITS 10
962#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
963#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
964
965#define file_hashval(x) \
966 hash_ptr(x, FILE_HASH_BITS)
967#define stateid_hashval(owner_id, file_id) \
968 (((owner_id) + (file_id)) & STATEID_HASH_MASK)
969
970static struct list_head file_hashtbl[FILE_HASH_SIZE];
971static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
972
973/* OPEN Share state helper functions */ 986/* OPEN Share state helper functions */
974static inline struct nfs4_file * 987static inline struct nfs4_file *
975alloc_init_file(struct inode *ino) 988alloc_init_file(struct inode *ino)
@@ -1186,8 +1199,7 @@ move_to_close_lru(struct nfs4_stateowner *sop)
1186{ 1199{
1187 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); 1200 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
1188 1201
1189 unhash_stateowner(sop); 1202 list_move_tail(&sop->so_close_lru, &close_lru);
1190 list_add_tail(&sop->so_close_lru, &close_lru);
1191 sop->so_time = get_seconds(); 1203 sop->so_time = get_seconds();
1192} 1204}
1193 1205
@@ -1916,8 +1928,7 @@ nfs4_laundromat(void)
1916 } 1928 }
1917 dprintk("NFSD: purging unused open stateowner (so_id %d)\n", 1929 dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
1918 sop->so_id); 1930 sop->so_id);
1919 list_del(&sop->so_close_lru); 1931 release_stateowner(sop);
1920 nfs4_put_stateowner(sop);
1921 } 1932 }
1922 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) 1933 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
1923 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; 1934 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
@@ -2495,36 +2506,27 @@ nfs4_transform_lock_offset(struct file_lock *lock)
2495 lock->fl_end = OFFSET_MAX; 2506 lock->fl_end = OFFSET_MAX;
2496} 2507}
2497 2508
2498static int 2509/* Hack!: For now, we're defining this just so we can use a pointer to it
2499nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval) 2510 * as a unique cookie to identify our (NFSv4's) posix locks. */
2500{ 2511static struct lock_manager_operations nfsd_posix_mng_ops = {
2501 struct nfs4_stateowner *local = NULL; 2512};
2502 int status = 0;
2503
2504 if (hashval >= LOCK_HASH_SIZE)
2505 goto out;
2506 list_for_each_entry(local, &lock_ownerid_hashtbl[hashval], so_idhash) {
2507 if (local == sop) {
2508 status = 1;
2509 goto out;
2510 }
2511 }
2512out:
2513 return status;
2514}
2515
2516 2513
2517static inline void 2514static inline void
2518nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) 2515nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
2519{ 2516{
2520 struct nfs4_stateowner *sop = (struct nfs4_stateowner *) fl->fl_owner; 2517 struct nfs4_stateowner *sop;
2521 unsigned int hval = lockownerid_hashval(sop->so_id); 2518 unsigned int hval;
2522 2519
2523 deny->ld_sop = NULL; 2520 if (fl->fl_lmops == &nfsd_posix_mng_ops) {
2524 if (nfs4_verify_lock_stateowner(sop, hval)) { 2521 sop = (struct nfs4_stateowner *) fl->fl_owner;
2522 hval = lockownerid_hashval(sop->so_id);
2525 kref_get(&sop->so_ref); 2523 kref_get(&sop->so_ref);
2526 deny->ld_sop = sop; 2524 deny->ld_sop = sop;
2527 deny->ld_clientid = sop->so_client->cl_clientid; 2525 deny->ld_clientid = sop->so_client->cl_clientid;
2526 } else {
2527 deny->ld_sop = NULL;
2528 deny->ld_clientid.cl_boot = 0;
2529 deny->ld_clientid.cl_id = 0;
2528 } 2530 }
2529 deny->ld_start = fl->fl_start; 2531 deny->ld_start = fl->fl_start;
2530 deny->ld_length = ~(u64)0; 2532 deny->ld_length = ~(u64)0;
@@ -2736,6 +2738,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2736 file_lock.fl_pid = current->tgid; 2738 file_lock.fl_pid = current->tgid;
2737 file_lock.fl_file = filp; 2739 file_lock.fl_file = filp;
2738 file_lock.fl_flags = FL_POSIX; 2740 file_lock.fl_flags = FL_POSIX;
2741 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2739 2742
2740 file_lock.fl_start = lock->lk_offset; 2743 file_lock.fl_start = lock->lk_offset;
2741 if ((lock->lk_length == ~(u64)0) || 2744 if ((lock->lk_length == ~(u64)0) ||
@@ -2841,6 +2844,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2841 file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner; 2844 file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
2842 file_lock.fl_pid = current->tgid; 2845 file_lock.fl_pid = current->tgid;
2843 file_lock.fl_flags = FL_POSIX; 2846 file_lock.fl_flags = FL_POSIX;
2847 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2844 2848
2845 file_lock.fl_start = lockt->lt_offset; 2849 file_lock.fl_start = lockt->lt_offset;
2846 if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length)) 2850 if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length))
@@ -2900,6 +2904,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2900 file_lock.fl_pid = current->tgid; 2904 file_lock.fl_pid = current->tgid;
2901 file_lock.fl_file = filp; 2905 file_lock.fl_file = filp;
2902 file_lock.fl_flags = FL_POSIX; 2906 file_lock.fl_flags = FL_POSIX;
2907 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2903 file_lock.fl_start = locku->lu_offset; 2908 file_lock.fl_start = locku->lu_offset;
2904 2909
2905 if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length)) 2910 if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length))
@@ -3211,15 +3216,8 @@ __nfs4_state_shutdown(void)
3211 int i; 3216 int i;
3212 struct nfs4_client *clp = NULL; 3217 struct nfs4_client *clp = NULL;
3213 struct nfs4_delegation *dp = NULL; 3218 struct nfs4_delegation *dp = NULL;
3214 struct nfs4_stateowner *sop = NULL;
3215 struct list_head *pos, *next, reaplist; 3219 struct list_head *pos, *next, reaplist;
3216 3220
3217 list_for_each_safe(pos, next, &close_lru) {
3218 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
3219 list_del(&sop->so_close_lru);
3220 nfs4_put_stateowner(sop);
3221 }
3222
3223 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 3221 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3224 while (!list_empty(&conf_id_hashtbl[i])) { 3222 while (!list_empty(&conf_id_hashtbl[i])) {
3225 clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); 3223 clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
@@ -3244,8 +3242,6 @@ __nfs4_state_shutdown(void)
3244 } 3242 }
3245 3243
3246 cancel_delayed_work(&laundromat_work); 3244 cancel_delayed_work(&laundromat_work);
3247 flush_workqueue(laundry_wq);
3248 destroy_workqueue(laundry_wq);
3249 nfsd4_shutdown_recdir(); 3245 nfsd4_shutdown_recdir();
3250 nfs4_init = 0; 3246 nfs4_init = 0;
3251} 3247}
@@ -3253,6 +3249,8 @@ __nfs4_state_shutdown(void)
3253void 3249void
3254nfs4_state_shutdown(void) 3250nfs4_state_shutdown(void)
3255{ 3251{
3252 cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
3253 destroy_workqueue(laundry_wq);
3256 nfs4_lock_state(); 3254 nfs4_lock_state();
3257 nfs4_release_reclaim(); 3255 nfs4_release_reclaim();
3258 __nfs4_state_shutdown(); 3256 __nfs4_state_shutdown();
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 03857fd81126..de3998f15f10 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -299,11 +299,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
299 buf, dummy32, &ace.who); 299 buf, dummy32, &ace.who);
300 if (status) 300 if (status)
301 goto out_nfserr; 301 goto out_nfserr;
302 if (nfs4_acl_add_ace(*acl, ace.type, ace.flag, 302 status = nfs4_acl_add_ace(*acl, ace.type, ace.flag,
303 ace.access_mask, ace.whotype, ace.who) != 0) { 303 ace.access_mask, ace.whotype, ace.who);
304 status = -ENOMEM; 304 if (status)
305 goto out_nfserr; 305 goto out_nfserr;
306 }
307 } 306 }
308 } else 307 } else
309 *acl = NULL; 308 *acl = NULL;
@@ -2085,27 +2084,20 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
2085 WRITE32(eof); 2084 WRITE32(eof);
2086 WRITE32(maxcount); 2085 WRITE32(maxcount);
2087 ADJUST_ARGS(); 2086 ADJUST_ARGS();
2088 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2087 resp->xbuf->head[0].iov_len = (char*)p
2089 2088 - (char*)resp->xbuf->head[0].iov_base;
2090 resp->xbuf->page_len = maxcount; 2089 resp->xbuf->page_len = maxcount;
2091 2090
2092 /* read zero bytes -> don't set up tail */ 2091 /* Use rest of head for padding and remaining ops: */
2093 if(!maxcount) 2092 resp->rqstp->rq_restailpage = 0;
2094 return 0; 2093 resp->xbuf->tail[0].iov_base = p;
2095
2096 /* set up page for remaining responses */
2097 svc_take_page(resp->rqstp);
2098 resp->xbuf->tail[0].iov_base =
2099 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2100 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2101 resp->xbuf->tail[0].iov_len = 0; 2094 resp->xbuf->tail[0].iov_len = 0;
2102 resp->p = resp->xbuf->tail[0].iov_base;
2103 resp->end = resp->p + PAGE_SIZE/4;
2104
2105 if (maxcount&3) { 2095 if (maxcount&3) {
2106 *(resp->p)++ = 0; 2096 RESERVE_SPACE(4);
2097 WRITE32(0);
2107 resp->xbuf->tail[0].iov_base += maxcount&3; 2098 resp->xbuf->tail[0].iov_base += maxcount&3;
2108 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); 2099 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
2100 ADJUST_ARGS();
2109 } 2101 }
2110 return 0; 2102 return 0;
2111} 2103}
@@ -2142,21 +2134,20 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
2142 2134
2143 WRITE32(maxcount); 2135 WRITE32(maxcount);
2144 ADJUST_ARGS(); 2136 ADJUST_ARGS();
2145 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2137 resp->xbuf->head[0].iov_len = (char*)p
2138 - (char*)resp->xbuf->head[0].iov_base;
2139 resp->xbuf->page_len = maxcount;
2146 2140
2147 svc_take_page(resp->rqstp); 2141 /* Use rest of head for padding and remaining ops: */
2148 resp->xbuf->tail[0].iov_base = 2142 resp->rqstp->rq_restailpage = 0;
2149 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); 2143 resp->xbuf->tail[0].iov_base = p;
2150 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2151 resp->xbuf->tail[0].iov_len = 0; 2144 resp->xbuf->tail[0].iov_len = 0;
2152 resp->p = resp->xbuf->tail[0].iov_base;
2153 resp->end = resp->p + PAGE_SIZE/4;
2154
2155 resp->xbuf->page_len = maxcount;
2156 if (maxcount&3) { 2145 if (maxcount&3) {
2157 *(resp->p)++ = 0; 2146 RESERVE_SPACE(4);
2147 WRITE32(0);
2158 resp->xbuf->tail[0].iov_base += maxcount&3; 2148 resp->xbuf->tail[0].iov_base += maxcount&3;
2159 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); 2149 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
2150 ADJUST_ARGS();
2160 } 2151 }
2161 return 0; 2152 return 0;
2162} 2153}
@@ -2166,7 +2157,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2166{ 2157{
2167 int maxcount; 2158 int maxcount;
2168 loff_t offset; 2159 loff_t offset;
2169 u32 *page, *savep; 2160 u32 *page, *savep, *tailbase;
2170 ENCODE_HEAD; 2161 ENCODE_HEAD;
2171 2162
2172 if (nfserr) 2163 if (nfserr)
@@ -2182,6 +2173,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2182 WRITE32(0); 2173 WRITE32(0);
2183 ADJUST_ARGS(); 2174 ADJUST_ARGS();
2184 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2175 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
2176 tailbase = p;
2185 2177
2186 maxcount = PAGE_SIZE; 2178 maxcount = PAGE_SIZE;
2187 if (maxcount > readdir->rd_maxcount) 2179 if (maxcount > readdir->rd_maxcount)
@@ -2226,14 +2218,12 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2226 *p++ = htonl(readdir->common.err == nfserr_eof); 2218 *p++ = htonl(readdir->common.err == nfserr_eof);
2227 resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); 2219 resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2228 2220
2229 /* allocate a page for the tail */ 2221 /* Use rest of head for padding and remaining ops: */
2230 svc_take_page(resp->rqstp); 2222 resp->rqstp->rq_restailpage = 0;
2231 resp->xbuf->tail[0].iov_base = 2223 resp->xbuf->tail[0].iov_base = tailbase;
2232 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2233 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2234 resp->xbuf->tail[0].iov_len = 0; 2224 resp->xbuf->tail[0].iov_len = 0;
2235 resp->p = resp->xbuf->tail[0].iov_base; 2225 resp->p = resp->xbuf->tail[0].iov_base;
2236 resp->end = resp->p + PAGE_SIZE/4; 2226 resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4;
2237 2227
2238 return 0; 2228 return 0;
2239err_no_verf: 2229err_no_verf:
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 3e6b75cd90fd..06cd0db0f32b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = {
553 PROC(none, void, void, none, RC_NOCACHE, ST), 553 PROC(none, void, void, none, RC_NOCACHE, ST),
554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), 554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), 555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE), 556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
557 PROC(none, void, void, none, RC_NOCACHE, ST), 557 PROC(none, void, void, none, RC_NOCACHE, ST),
558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), 558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), 559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 31018333dc38..6aa92d0e6876 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -371,7 +371,6 @@ out_nfserr:
371static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) 371static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
372{ 372{
373 ssize_t buflen; 373 ssize_t buflen;
374 int error;
375 374
376 buflen = vfs_getxattr(dentry, key, NULL, 0); 375 buflen = vfs_getxattr(dentry, key, NULL, 0);
377 if (buflen <= 0) 376 if (buflen <= 0)
@@ -381,10 +380,7 @@ static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
381 if (!*buf) 380 if (!*buf)
382 return -ENOMEM; 381 return -ENOMEM;
383 382
384 error = vfs_getxattr(dentry, key, *buf, buflen); 383 return vfs_getxattr(dentry, key, *buf, buflen);
385 if (error < 0)
386 return error;
387 return buflen;
388} 384}
389#endif 385#endif
390 386
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index bff0f0d06867..21f38accd039 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -153,6 +153,7 @@ struct o2hb_region {
153struct o2hb_bio_wait_ctxt { 153struct o2hb_bio_wait_ctxt {
154 atomic_t wc_num_reqs; 154 atomic_t wc_num_reqs;
155 struct completion wc_io_complete; 155 struct completion wc_io_complete;
156 int wc_error;
156}; 157};
157 158
158static void o2hb_write_timeout(void *arg) 159static void o2hb_write_timeout(void *arg)
@@ -186,6 +187,7 @@ static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc,
186{ 187{
187 atomic_set(&wc->wc_num_reqs, num_ios); 188 atomic_set(&wc->wc_num_reqs, num_ios);
188 init_completion(&wc->wc_io_complete); 189 init_completion(&wc->wc_io_complete);
190 wc->wc_error = 0;
189} 191}
190 192
191/* Used in error paths too */ 193/* Used in error paths too */
@@ -218,8 +220,10 @@ static int o2hb_bio_end_io(struct bio *bio,
218{ 220{
219 struct o2hb_bio_wait_ctxt *wc = bio->bi_private; 221 struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
220 222
221 if (error) 223 if (error) {
222 mlog(ML_ERROR, "IO Error %d\n", error); 224 mlog(ML_ERROR, "IO Error %d\n", error);
225 wc->wc_error = error;
226 }
223 227
224 if (bio->bi_size) 228 if (bio->bi_size)
225 return 1; 229 return 1;
@@ -390,6 +394,8 @@ static int o2hb_read_slots(struct o2hb_region *reg,
390 394
391bail_and_wait: 395bail_and_wait:
392 o2hb_wait_on_io(reg, &wc); 396 o2hb_wait_on_io(reg, &wc);
397 if (wc.wc_error && !status)
398 status = wc.wc_error;
393 399
394 if (bios) { 400 if (bios) {
395 for(i = 0; i < num_bios; i++) 401 for(i = 0; i < num_bios; i++)
@@ -790,20 +796,24 @@ static int o2hb_highest_node(unsigned long *nodes,
790 return highest; 796 return highest;
791} 797}
792 798
793static void o2hb_do_disk_heartbeat(struct o2hb_region *reg) 799static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
794{ 800{
795 int i, ret, highest_node, change = 0; 801 int i, ret, highest_node, change = 0;
796 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; 802 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
797 struct bio *write_bio; 803 struct bio *write_bio;
798 struct o2hb_bio_wait_ctxt write_wc; 804 struct o2hb_bio_wait_ctxt write_wc;
799 805
800 if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes))) 806 ret = o2nm_configured_node_map(configured_nodes,
801 return; 807 sizeof(configured_nodes));
808 if (ret) {
809 mlog_errno(ret);
810 return ret;
811 }
802 812
803 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); 813 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
804 if (highest_node >= O2NM_MAX_NODES) { 814 if (highest_node >= O2NM_MAX_NODES) {
805 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); 815 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
806 return; 816 return -EINVAL;
807 } 817 }
808 818
809 /* No sense in reading the slots of nodes that don't exist 819 /* No sense in reading the slots of nodes that don't exist
@@ -813,7 +823,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
813 ret = o2hb_read_slots(reg, highest_node + 1); 823 ret = o2hb_read_slots(reg, highest_node + 1);
814 if (ret < 0) { 824 if (ret < 0) {
815 mlog_errno(ret); 825 mlog_errno(ret);
816 return; 826 return ret;
817 } 827 }
818 828
819 /* With an up to date view of the slots, we can check that no 829 /* With an up to date view of the slots, we can check that no
@@ -831,7 +841,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
831 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); 841 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
832 if (ret < 0) { 842 if (ret < 0) {
833 mlog_errno(ret); 843 mlog_errno(ret);
834 return; 844 return ret;
835 } 845 }
836 846
837 i = -1; 847 i = -1;
@@ -847,6 +857,15 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
847 */ 857 */
848 o2hb_wait_on_io(reg, &write_wc); 858 o2hb_wait_on_io(reg, &write_wc);
849 bio_put(write_bio); 859 bio_put(write_bio);
860 if (write_wc.wc_error) {
861 /* Do not re-arm the write timeout on I/O error - we
862 * can't be sure that the new block ever made it to
863 * disk */
864 mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
865 write_wc.wc_error, reg->hr_dev_name);
866 return write_wc.wc_error;
867 }
868
850 o2hb_arm_write_timeout(reg); 869 o2hb_arm_write_timeout(reg);
851 870
852 /* let the person who launched us know when things are steady */ 871 /* let the person who launched us know when things are steady */
@@ -854,6 +873,8 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
854 if (atomic_dec_and_test(&reg->hr_steady_iterations)) 873 if (atomic_dec_and_test(&reg->hr_steady_iterations))
855 wake_up(&o2hb_steady_queue); 874 wake_up(&o2hb_steady_queue);
856 } 875 }
876
877 return 0;
857} 878}
858 879
859/* Subtract b from a, storing the result in a. a *must* have a larger 880/* Subtract b from a, storing the result in a. a *must* have a larger
@@ -913,7 +934,10 @@ static int o2hb_thread(void *data)
913 * likely to time itself out. */ 934 * likely to time itself out. */
914 do_gettimeofday(&before_hb); 935 do_gettimeofday(&before_hb);
915 936
916 o2hb_do_disk_heartbeat(reg); 937 i = 0;
938 do {
939 ret = o2hb_do_disk_heartbeat(reg);
940 } while (ret && ++i < 2);
917 941
918 do_gettimeofday(&after_hb); 942 do_gettimeofday(&after_hb);
919 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 943 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index c3764f4744ee..74ca4e5f9765 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -139,6 +139,10 @@ static void user_ast(void *opaque)
139 return; 139 return;
140 } 140 }
141 141
142 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
143 "Lockres %s, requested ivmode. flags 0x%x\n",
144 lockres->l_name, lockres->l_flags);
145
142 /* we're downconverting. */ 146 /* we're downconverting. */
143 if (lockres->l_requested < lockres->l_level) { 147 if (lockres->l_requested < lockres->l_level) {
144 if (lockres->l_requested <= 148 if (lockres->l_requested <=
@@ -229,23 +233,42 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
229 233
230 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); 234 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
231 235
232 if (status != DLM_NORMAL) 236 if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
233 mlog(ML_ERROR, "Dlm returns status %d\n", status); 237 mlog(ML_ERROR, "Dlm returns status %d\n", status);
234 238
235 spin_lock(&lockres->l_lock); 239 spin_lock(&lockres->l_lock);
236 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) 240 /* The teardown flag gets set early during the unlock process,
241 * so test the cancel flag to make sure that this ast isn't
242 * for a concurrent cancel. */
243 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
244 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
237 lockres->l_level = LKM_IVMODE; 245 lockres->l_level = LKM_IVMODE;
238 else { 246 } else if (status == DLM_CANCELGRANT) {
247 mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
248 lockres->l_name, lockres->l_flags);
249 /* We tried to cancel a convert request, but it was
250 * already granted. Don't clear the busy flag - the
251 * ast should've done this already. */
252 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
253 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
254 goto out_noclear;
255 } else {
256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
257 /* Cancel succeeded, we want to re-queue */
258 mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
259 lockres->l_name, lockres->l_flags);
239 lockres->l_requested = LKM_IVMODE; /* cancel an 260 lockres->l_requested = LKM_IVMODE; /* cancel an
240 * upconvert 261 * upconvert
241 * request. */ 262 * request. */
242 lockres->l_flags &= ~USER_LOCK_IN_CANCEL; 263 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
243 /* we want the unblock thread to look at it again 264 /* we want the unblock thread to look at it again
244 * now. */ 265 * now. */
245 __user_dlm_queue_lockres(lockres); 266 if (lockres->l_flags & USER_LOCK_BLOCKED)
267 __user_dlm_queue_lockres(lockres);
246 } 268 }
247 269
248 lockres->l_flags &= ~USER_LOCK_BUSY; 270 lockres->l_flags &= ~USER_LOCK_BUSY;
271out_noclear:
249 spin_unlock(&lockres->l_lock); 272 spin_unlock(&lockres->l_lock);
250 273
251 wake_up(&lockres->l_event); 274 wake_up(&lockres->l_event);
@@ -268,13 +291,26 @@ static void user_dlm_unblock_lock(void *opaque)
268 291
269 spin_lock(&lockres->l_lock); 292 spin_lock(&lockres->l_lock);
270 293
271 BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); 294 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
272 BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED)); 295 "Lockres %s, flags 0x%x\n",
296 lockres->l_name, lockres->l_flags);
273 297
274 /* notice that we don't clear USER_LOCK_BLOCKED here. That's 298 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
275 * for user_ast to do. */ 299 * set, we want user_ast clear it. */
276 lockres->l_flags &= ~USER_LOCK_QUEUED; 300 lockres->l_flags &= ~USER_LOCK_QUEUED;
277 301
302 /* It's valid to get here and no longer be blocked - if we get
303 * several basts in a row, we might be queued by the first
304 * one, the unblock thread might run and clear the queued
305 * flag, and finally we might get another bast which re-queues
306 * us before our ast for the downconvert is called. */
307 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
308 mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
309 lockres->l_name, lockres->l_flags);
310 spin_unlock(&lockres->l_lock);
311 goto drop_ref;
312 }
313
278 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 314 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
279 mlog(0, "lock is in teardown so we do nothing\n"); 315 mlog(0, "lock is in teardown so we do nothing\n");
280 spin_unlock(&lockres->l_lock); 316 spin_unlock(&lockres->l_lock);
@@ -282,7 +318,9 @@ static void user_dlm_unblock_lock(void *opaque)
282 } 318 }
283 319
284 if (lockres->l_flags & USER_LOCK_BUSY) { 320 if (lockres->l_flags & USER_LOCK_BUSY) {
285 mlog(0, "BUSY flag detected...\n"); 321 mlog(0, "Cancel lock %s, flags 0x%x\n",
322 lockres->l_name, lockres->l_flags);
323
286 if (lockres->l_flags & USER_LOCK_IN_CANCEL) { 324 if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
287 spin_unlock(&lockres->l_lock); 325 spin_unlock(&lockres->l_lock);
288 goto drop_ref; 326 goto drop_ref;
@@ -296,14 +334,7 @@ static void user_dlm_unblock_lock(void *opaque)
296 LKM_CANCEL, 334 LKM_CANCEL,
297 user_unlock_ast, 335 user_unlock_ast,
298 lockres); 336 lockres);
299 if (status == DLM_CANCELGRANT) { 337 if (status != DLM_NORMAL)
300 /* If we got this, then the ast was fired
301 * before we could cancel. We cleanup our
302 * state, and restart the function. */
303 spin_lock(&lockres->l_lock);
304 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
305 spin_unlock(&lockres->l_lock);
306 } else if (status != DLM_NORMAL)
307 user_log_dlm_error("dlmunlock", status, lockres); 338 user_log_dlm_error("dlmunlock", status, lockres);
308 goto drop_ref; 339 goto drop_ref;
309 } 340 }
@@ -581,6 +612,14 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
581 mlog(0, "asked to destroy %s\n", lockres->l_name); 612 mlog(0, "asked to destroy %s\n", lockres->l_name);
582 613
583 spin_lock(&lockres->l_lock); 614 spin_lock(&lockres->l_lock);
615 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
616 mlog(0, "Lock is already torn down\n");
617 spin_unlock(&lockres->l_lock);
618 return 0;
619 }
620
621 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
622
584 while (lockres->l_flags & USER_LOCK_BUSY) { 623 while (lockres->l_flags & USER_LOCK_BUSY) {
585 spin_unlock(&lockres->l_lock); 624 spin_unlock(&lockres->l_lock);
586 625
@@ -606,7 +645,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
606 645
607 lockres->l_flags &= ~USER_LOCK_ATTACHED; 646 lockres->l_flags &= ~USER_LOCK_ATTACHED;
608 lockres->l_flags |= USER_LOCK_BUSY; 647 lockres->l_flags |= USER_LOCK_BUSY;
609 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
610 spin_unlock(&lockres->l_lock); 648 spin_unlock(&lockres->l_lock);
611 649
612 mlog(0, "unlocking lockres %s\n", lockres->l_name); 650 mlog(0, "unlocking lockres %s\n", lockres->l_name);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 34e903a6a46b..581eb451a41a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -260,6 +260,17 @@ static int ocfs2_truncate_file(struct inode *inode,
260 if (new_i_size == le64_to_cpu(fe->i_size)) 260 if (new_i_size == le64_to_cpu(fe->i_size))
261 goto bail; 261 goto bail;
262 262
263 /* This forces other nodes to sync and drop their pages. Do
264 * this even if we have a truncate without allocation change -
265 * ocfs2 cluster sizes can be much greater than page size, so
266 * we have to truncate them anyway. */
267 status = ocfs2_data_lock(inode, 1);
268 if (status < 0) {
269 mlog_errno(status);
270 goto bail;
271 }
272 ocfs2_data_unlock(inode, 1);
273
263 if (le32_to_cpu(fe->i_clusters) == 274 if (le32_to_cpu(fe->i_clusters) ==
264 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { 275 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
265 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", 276 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
@@ -272,14 +283,6 @@ static int ocfs2_truncate_file(struct inode *inode,
272 goto bail; 283 goto bail;
273 } 284 }
274 285
275 /* This forces other nodes to sync and drop their pages */
276 status = ocfs2_data_lock(inode, 1);
277 if (status < 0) {
278 mlog_errno(status);
279 goto bail;
280 }
281 ocfs2_data_unlock(inode, 1);
282
283 /* alright, we're going to need to do a full blown alloc size 286 /* alright, we're going to need to do a full blown alloc size
284 * change. Orphan the inode so that recovery can complete the 287 * change. Orphan the inode so that recovery can complete the
285 * truncate if necessary. This does the task of marking 288 * truncate if necessary. This does the task of marking
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7efa73d44c9a..20d4b2237fce 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -103,8 +103,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
103 size_t buflen, loff_t *fpos) 103 size_t buflen, loff_t *fpos)
104{ 104{
105 ssize_t acc = 0, tmp; 105 ssize_t acc = 0, tmp;
106 size_t tsz, nr_bytes; 106 size_t tsz;
107 u64 start; 107 u64 start, nr_bytes;
108 struct vmcore *curr_m = NULL; 108 struct vmcore *curr_m = NULL;
109 109
110 if (buflen == 0 || *fpos >= vmcore_size) 110 if (buflen == 0 || *fpos >= vmcore_size)
diff --git a/fs/select.c b/fs/select.c
index 071660fa7b01..a8109baa5e46 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -310,8 +310,9 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
310 fd_set __user *exp, s64 *timeout) 310 fd_set __user *exp, s64 *timeout)
311{ 311{
312 fd_set_bits fds; 312 fd_set_bits fds;
313 char *bits; 313 void *bits;
314 int ret, size, max_fdset; 314 int ret, max_fdset;
315 unsigned int size;
315 struct fdtable *fdt; 316 struct fdtable *fdt;
316 /* Allocate small arguments on the stack to save memory and be faster */ 317 /* Allocate small arguments on the stack to save memory and be faster */
317 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; 318 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
@@ -333,20 +334,21 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
333 * since we used fdset we need to allocate memory in units of 334 * since we used fdset we need to allocate memory in units of
334 * long-words. 335 * long-words.
335 */ 336 */
336 ret = -ENOMEM;
337 size = FDS_BYTES(n); 337 size = FDS_BYTES(n);
338 if (6*size < SELECT_STACK_ALLOC) 338 bits = stack_fds;
339 bits = stack_fds; 339 if (size > sizeof(stack_fds) / 6) {
340 else 340 /* Not enough space in on-stack array; must use kmalloc */
341 ret = -ENOMEM;
341 bits = kmalloc(6 * size, GFP_KERNEL); 342 bits = kmalloc(6 * size, GFP_KERNEL);
342 if (!bits) 343 if (!bits)
343 goto out_nofds; 344 goto out_nofds;
344 fds.in = (unsigned long *) bits; 345 }
345 fds.out = (unsigned long *) (bits + size); 346 fds.in = bits;
346 fds.ex = (unsigned long *) (bits + 2*size); 347 fds.out = bits + size;
347 fds.res_in = (unsigned long *) (bits + 3*size); 348 fds.ex = bits + 2*size;
348 fds.res_out = (unsigned long *) (bits + 4*size); 349 fds.res_in = bits + 3*size;
349 fds.res_ex = (unsigned long *) (bits + 5*size); 350 fds.res_out = bits + 4*size;
351 fds.res_ex = bits + 5*size;
350 352
351 if ((ret = get_fd_set(n, inp, fds.in)) || 353 if ((ret = get_fd_set(n, inp, fds.in)) ||
352 (ret = get_fd_set(n, outp, fds.out)) || 354 (ret = get_fd_set(n, outp, fds.out)) ||
diff --git a/fs/sync.c b/fs/sync.c
index 8616006d2094..aab5ffe77e9f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -61,7 +61,7 @@
61 * will be available after a crash. 61 * will be available after a crash.
62 */ 62 */
63asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, 63asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
64 int flags) 64 unsigned int flags)
65{ 65{
66 int ret; 66 int ret;
67 struct file *file; 67 struct file *file;
@@ -126,7 +126,7 @@ out:
126 * `endbyte' is inclusive 126 * `endbyte' is inclusive
127 */ 127 */
128int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte, 128int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte,
129 int flags) 129 unsigned int flags)
130{ 130{
131 int ret; 131 int ret;
132 struct address_space *mapping; 132 struct address_space *mapping;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 6cbbd165c60d..4d191ef39b67 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -870,12 +870,14 @@ xfs_page_state_convert(
870 pgoff_t end_index, last_index, tlast; 870 pgoff_t end_index, last_index, tlast;
871 ssize_t size, len; 871 ssize_t size, len;
872 int flags, err, iomap_valid = 0, uptodate = 1; 872 int flags, err, iomap_valid = 0, uptodate = 1;
873 int page_dirty, count = 0, trylock_flag = 0; 873 int page_dirty, count = 0;
874 int trylock = 0;
874 int all_bh = unmapped; 875 int all_bh = unmapped;
875 876
876 /* wait for other IO threads? */ 877 if (startio) {
877 if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)) 878 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
878 trylock_flag |= BMAPI_TRYLOCK; 879 trylock |= BMAPI_TRYLOCK;
880 }
879 881
880 /* Is this page beyond the end of the file? */ 882 /* Is this page beyond the end of the file? */
881 offset = i_size_read(inode); 883 offset = i_size_read(inode);
@@ -956,15 +958,13 @@ xfs_page_state_convert(
956 958
957 if (buffer_unwritten(bh)) { 959 if (buffer_unwritten(bh)) {
958 type = IOMAP_UNWRITTEN; 960 type = IOMAP_UNWRITTEN;
959 flags = BMAPI_WRITE|BMAPI_IGNSTATE; 961 flags = BMAPI_WRITE | BMAPI_IGNSTATE;
960 } else if (buffer_delay(bh)) { 962 } else if (buffer_delay(bh)) {
961 type = IOMAP_DELAY; 963 type = IOMAP_DELAY;
962 flags = BMAPI_ALLOCATE; 964 flags = BMAPI_ALLOCATE | trylock;
963 if (!startio)
964 flags |= trylock_flag;
965 } else { 965 } else {
966 type = IOMAP_NEW; 966 type = IOMAP_NEW;
967 flags = BMAPI_WRITE|BMAPI_MMAP; 967 flags = BMAPI_WRITE | BMAPI_MMAP;
968 } 968 }
969 969
970 if (!iomap_valid) { 970 if (!iomap_valid) {
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9fb0312665ca..26fed0756f01 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -182,7 +182,7 @@ free_address(
182{ 182{
183 a_list_t *aentry; 183 a_list_t *aentry;
184 184
185 aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH); 185 aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
186 if (likely(aentry)) { 186 if (likely(aentry)) {
187 spin_lock(&as_lock); 187 spin_lock(&as_lock);
188 aentry->next = as_free_head; 188 aentry->next = as_free_head;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 149237304fb6..2e2e275c786f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -673,8 +673,7 @@ xfs_vn_setattr(
673 if (ia_valid & ATTR_ATIME) { 673 if (ia_valid & ATTR_ATIME) {
674 vattr.va_mask |= XFS_AT_ATIME; 674 vattr.va_mask |= XFS_AT_ATIME;
675 vattr.va_atime = attr->ia_atime; 675 vattr.va_atime = attr->ia_atime;
676 if (ia_valid & ATTR_ATIME_SET) 676 inode->i_atime = attr->ia_atime;
677 inode->i_atime = attr->ia_atime;
678 } 677 }
679 if (ia_valid & ATTR_MTIME) { 678 if (ia_valid & ATTR_MTIME) {
680 vattr.va_mask |= XFS_AT_MTIME; 679 vattr.va_mask |= XFS_AT_MTIME;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 4eeb856183b1..deddbd03c166 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -158,9 +158,10 @@ xfs_ialloc_ag_alloc(
158 */ 158 */
159 agi = XFS_BUF_TO_AGI(agbp); 159 agi = XFS_BUF_TO_AGI(agbp);
160 newino = be32_to_cpu(agi->agi_newino); 160 newino = be32_to_cpu(agi->agi_newino);
161 if(likely(newino != NULLAGINO)) { 161 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
162 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 162 XFS_IALLOC_BLOCKS(args.mp);
163 XFS_IALLOC_BLOCKS(args.mp); 163 if (likely(newino != NULLAGINO &&
164 (args.agbno < be32_to_cpu(agi->agi_length)))) {
164 args.fsbno = XFS_AGB_TO_FSB(args.mp, 165 args.fsbno = XFS_AGB_TO_FSB(args.mp,
165 be32_to_cpu(agi->agi_seqno), args.agbno); 166 be32_to_cpu(agi->agi_seqno), args.agbno);
166 args.type = XFS_ALLOCTYPE_THIS_BNO; 167 args.type = XFS_ALLOCTYPE_THIS_BNO;
@@ -182,8 +183,8 @@ xfs_ialloc_ag_alloc(
182 * Set the alignment for the allocation. 183 * Set the alignment for the allocation.
183 * If stripe alignment is turned on then align at stripe unit 184 * If stripe alignment is turned on then align at stripe unit
184 * boundary. 185 * boundary.
185 * If the cluster size is smaller than a filesystem block 186 * If the cluster size is smaller than a filesystem block
186 * then we're doing I/O for inodes in filesystem block size 187 * then we're doing I/O for inodes in filesystem block size
187 * pieces, so don't need alignment anyway. 188 * pieces, so don't need alignment anyway.
188 */ 189 */
189 isaligned = 0; 190 isaligned = 0;
@@ -192,7 +193,7 @@ xfs_ialloc_ag_alloc(
192 args.alignment = args.mp->m_dalign; 193 args.alignment = args.mp->m_dalign;
193 isaligned = 1; 194 isaligned = 1;
194 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && 195 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
195 args.mp->m_sb.sb_inoalignmt >= 196 args.mp->m_sb.sb_inoalignmt >=
196 XFS_B_TO_FSBT(args.mp, 197 XFS_B_TO_FSBT(args.mp,
197 XFS_INODE_CLUSTER_SIZE(args.mp))) 198 XFS_INODE_CLUSTER_SIZE(args.mp)))
198 args.alignment = args.mp->m_sb.sb_inoalignmt; 199 args.alignment = args.mp->m_sb.sb_inoalignmt;
@@ -220,7 +221,7 @@ xfs_ialloc_ag_alloc(
220 if ((error = xfs_alloc_vextent(&args))) 221 if ((error = xfs_alloc_vextent(&args)))
221 return error; 222 return error;
222 } 223 }
223 224
224 /* 225 /*
225 * If stripe alignment is turned on, then try again with cluster 226 * If stripe alignment is turned on, then try again with cluster
226 * alignment. 227 * alignment.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index bb33113eef9f..b53854325266 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -421,7 +421,10 @@ finish_inode:
421 ip->i_chash = chlnew; 421 ip->i_chash = chlnew;
422 chlnew->chl_ip = ip; 422 chlnew->chl_ip = ip;
423 chlnew->chl_blkno = ip->i_blkno; 423 chlnew->chl_blkno = ip->i_blkno;
424 if (ch->ch_list)
425 ch->ch_list->chl_prev = chlnew;
424 chlnew->chl_next = ch->ch_list; 426 chlnew->chl_next = ch->ch_list;
427 chlnew->chl_prev = NULL;
425 ch->ch_list = chlnew; 428 ch->ch_list = chlnew;
426 chlnew = NULL; 429 chlnew = NULL;
427 } 430 }
@@ -723,23 +726,15 @@ xfs_iextract(
723 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip); 726 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
724 ASSERT(ip->i_chash != NULL); 727 ASSERT(ip->i_chash != NULL);
725 chm=NULL; 728 chm=NULL;
726 for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { 729 chl = ip->i_chash;
727 if (chl->chl_blkno == ip->i_blkno) { 730 if (chl->chl_prev)
728 if (chm == NULL) { 731 chl->chl_prev->chl_next = chl->chl_next;
729 /* first item on the list */ 732 else
730 ch->ch_list = chl->chl_next; 733 ch->ch_list = chl->chl_next;
731 } else { 734 if (chl->chl_next)
732 chm->chl_next = chl->chl_next; 735 chl->chl_next->chl_prev = chl->chl_prev;
733 } 736 kmem_zone_free(xfs_chashlist_zone, chl);
734 kmem_zone_free(xfs_chashlist_zone, chl); 737 } else {
735 break;
736 } else {
737 ASSERT(chl->chl_ip != ip);
738 chm = chl;
739 }
740 }
741 ASSERT_ALWAYS(chl != NULL);
742 } else {
743 /* delete one inode from a non-empty list */ 738 /* delete one inode from a non-empty list */
744 iq = ip->i_cnext; 739 iq = ip->i_cnext;
745 iq->i_cprev = ip->i_cprev; 740 iq->i_cprev = ip->i_cprev;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 48146bdc6bdd..94b60dd03801 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2732,16 +2732,29 @@ xfs_iunpin(
2732 ASSERT(atomic_read(&ip->i_pincount) > 0); 2732 ASSERT(atomic_read(&ip->i_pincount) > 0);
2733 2733
2734 if (atomic_dec_and_test(&ip->i_pincount)) { 2734 if (atomic_dec_and_test(&ip->i_pincount)) {
2735 vnode_t *vp = XFS_ITOV_NULL(ip); 2735 /*
2736 * If the inode is currently being reclaimed, the
2737 * linux inode _and_ the xfs vnode may have been
2738 * freed so we cannot reference either of them safely.
2739 * Hence we should not try to do anything to them
2740 * if the xfs inode is currently in the reclaim
2741 * path.
2742 *
2743 * However, we still need to issue the unpin wakeup
2744 * call as the inode reclaim may be blocked waiting for
2745 * the inode to become unpinned.
2746 */
2747 if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
2748 vnode_t *vp = XFS_ITOV_NULL(ip);
2736 2749
2737 /* make sync come back and flush this inode */ 2750 /* make sync come back and flush this inode */
2738 if (vp) { 2751 if (vp) {
2739 struct inode *inode = vn_to_inode(vp); 2752 struct inode *inode = vn_to_inode(vp);
2740 2753
2741 if (!(inode->i_state & I_NEW)) 2754 if (!(inode->i_state & I_NEW))
2742 mark_inode_dirty_sync(inode); 2755 mark_inode_dirty_sync(inode);
2756 }
2743 } 2757 }
2744
2745 wake_up(&ip->i_ipin_wait); 2758 wake_up(&ip->i_ipin_wait);
2746 } 2759 }
2747} 2760}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 39ef9c36ea55..3b544db1790b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -189,6 +189,7 @@ typedef struct xfs_ihash {
189 */ 189 */
190typedef struct xfs_chashlist { 190typedef struct xfs_chashlist {
191 struct xfs_chashlist *chl_next; 191 struct xfs_chashlist *chl_next;
192 struct xfs_chashlist *chl_prev;
192 struct xfs_inode *chl_ip; 193 struct xfs_inode *chl_ip;
193 xfs_daddr_t chl_blkno; /* starting block number of 194 xfs_daddr_t chl_blkno; /* starting block number of
194 * the cluster */ 195 * the cluster */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 049fabb7f7e0..c0b1c2906880 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -270,7 +270,7 @@ xfs_mount_validate_sb(
270 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 270 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
271 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 271 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
272 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 272 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
273 (sbp->sb_imax_pct > 100 || sbp->sb_imax_pct < 1))) { 273 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
274 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 274 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed");
275 return XFS_ERROR(EFSCORRUPTED); 275 return XFS_ERROR(EFSCORRUPTED);
276 } 276 }