aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c13
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c19
-rw-r--r--fs/ext3/resize.c1
-rw-r--r--fs/fifo.c65
-rw-r--r--fs/fuse/dev.c256
-rw-r--r--fs/fuse/dir.c118
-rw-r--r--fs/fuse/file.c56
-rw-r--r--fs/fuse/fuse_i.h61
-rw-r--r--fs/fuse/inode.c135
-rw-r--r--fs/inotify.c2
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfsd/auth.c46
-rw-r--r--fs/nfsd/export.c3
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs4acl.c8
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c150
-rw-r--r--fs/nfsd/nfs4xdr.c62
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c40
-rw-r--r--fs/ocfs2/dlm/userdlm.c74
-rw-r--r--fs/ocfs2/file.c19
-rw-r--r--fs/pipe.c310
-rw-r--r--fs/proc/vmcore.c4
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/select.c30
-rw-r--r--fs/splice.c492
-rw-r--r--fs/sync.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h4
-rw-r--r--fs/xfs/xfs_ialloc.c15
-rw-r--r--fs/xfs/xfs_iget.c29
-rw-r--r--fs/xfs/xfs_inode.c27
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_mount.c2
45 files changed, 1180 insertions, 943 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index b0a0ae509c00..61c599b4a1e3 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -127,12 +127,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type
127 127
128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { 128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
129 dprintk(DEBUG_ERROR, "problem initiating session\n"); 129 dprintk(DEBUG_ERROR, "problem initiating session\n");
130 kfree(v9ses); 130 sb = ERR_PTR(newfid);
131 return ERR_PTR(newfid); 131 goto out_free_session;
132 } 132 }
133 133
134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses); 134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
135 135 if (IS_ERR(sb))
136 goto out_close_session;
136 v9fs_fill_super(sb, v9ses, flags); 137 v9fs_fill_super(sb, v9ses, flags);
137 138
138 inode = v9fs_get_inode(sb, S_IFDIR | mode); 139 inode = v9fs_get_inode(sb, S_IFDIR | mode);
@@ -185,6 +186,12 @@ static struct super_block *v9fs_get_sb(struct file_system_type
185 186
186 return sb; 187 return sb;
187 188
189out_close_session:
190 v9fs_session_close(v9ses);
191out_free_session:
192 kfree(v9ses);
193 return sb;
194
188put_back_sb: 195put_back_sb:
189 /* deactivate_super calls v9fs_kill_super which will frees the rest */ 196 /* deactivate_super calls v9fs_kill_super which will frees the rest */
190 up_write(&sb->s_umount); 197 up_write(&sb->s_umount);
diff --git a/fs/Kconfig b/fs/Kconfig
index e207be68d4ca..2524629dc835 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -799,6 +799,7 @@ config PROC_KCORE
799config PROC_VMCORE 799config PROC_VMCORE
800 bool "/proc/vmcore support (EXPERIMENTAL)" 800 bool "/proc/vmcore support (EXPERIMENTAL)"
801 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP 801 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP
802 default y
802 help 803 help
803 Exports the dump image of crashed kernel in ELF format. 804 Exports the dump image of crashed kernel in ELF format.
804 805
@@ -861,7 +862,7 @@ config RAMFS
861 862
862config CONFIGFS_FS 863config CONFIGFS_FS
863 tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)" 864 tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
864 depends on EXPERIMENTAL 865 depends on SYSFS && EXPERIMENTAL
865 help 866 help
866 configfs is a ram-based filesystem that provides the converse 867 configfs is a ram-based filesystem that provides the converse
867 of sysfs's functionality. Where sysfs is a filesystem-based 868 of sysfs's functionality. Where sysfs is a filesystem-based
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8ed9b06a9828..5638c8f9362f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -504,7 +504,7 @@ static int populate_groups(struct config_group *group)
504 int ret = 0; 504 int ret = 0;
505 int i; 505 int i;
506 506
507 if (group && group->default_groups) { 507 if (group->default_groups) {
508 /* FYI, we're faking mkdir here 508 /* FYI, we're faking mkdir here
509 * I'm not sure we need this semaphore, as we're called 509 * I'm not sure we need this semaphore, as we're called
510 * from our parent's mkdir. That holds our parent's 510 * from our parent's mkdir. That holds our parent's
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 242fe1a66ce5..1b4491cdd115 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -599,7 +599,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
599 switch (op) { 599 switch (op) {
600 case EPOLL_CTL_ADD: 600 case EPOLL_CTL_ADD:
601 if (!epi) { 601 if (!epi) {
602 epds.events |= POLLERR | POLLHUP | POLLRDHUP; 602 epds.events |= POLLERR | POLLHUP;
603 603
604 error = ep_insert(ep, &epds, tfile, fd); 604 error = ep_insert(ep, &epds, tfile, fd);
605 } else 605 } else
@@ -613,7 +613,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
613 break; 613 break;
614 case EPOLL_CTL_MOD: 614 case EPOLL_CTL_MOD:
615 if (epi) { 615 if (epi) {
616 epds.events |= POLLERR | POLLHUP | POLLRDHUP; 616 epds.events |= POLLERR | POLLHUP;
617 error = ep_modify(ep, epi, &epds); 617 error = ep_modify(ep, epi, &epds);
618 } else 618 } else
619 error = -ENOENT; 619 error = -ENOENT;
diff --git a/fs/exec.c b/fs/exec.c
index 0291a68a3626..3234a0c32d54 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -678,6 +678,18 @@ static int de_thread(struct task_struct *tsk)
678 while (leader->exit_state != EXIT_ZOMBIE) 678 while (leader->exit_state != EXIT_ZOMBIE)
679 yield(); 679 yield();
680 680
681 /*
682 * The only record we have of the real-time age of a
683 * process, regardless of execs it's done, is start_time.
684 * All the past CPU time is accumulated in signal_struct
685 * from sister threads now dead. But in this non-leader
686 * exec, nothing survives from the original leader thread,
687 * whose birth marks the true age of this process now.
688 * When we take on its identity by switching to its PID, we
689 * also take its birthdate (always earlier than our own).
690 */
691 current->start_time = leader->start_time;
692
681 spin_lock(&leader->proc_lock); 693 spin_lock(&leader->proc_lock);
682 spin_lock(&current->proc_lock); 694 spin_lock(&current->proc_lock);
683 proc_dentry1 = proc_pid_unhash(current); 695 proc_dentry1 = proc_pid_unhash(current);
@@ -723,7 +735,12 @@ static int de_thread(struct task_struct *tsk)
723 current->parent = current->real_parent = leader->real_parent; 735 current->parent = current->real_parent = leader->real_parent;
724 leader->parent = leader->real_parent = child_reaper; 736 leader->parent = leader->real_parent = child_reaper;
725 current->group_leader = current; 737 current->group_leader = current;
726 leader->group_leader = leader; 738 leader->group_leader = current;
739
740 /* Reduce leader to a thread */
741 detach_pid(leader, PIDTYPE_PGID);
742 detach_pid(leader, PIDTYPE_SID);
743 list_del_init(&leader->tasks);
727 744
728 add_parent(current); 745 add_parent(current);
729 add_parent(leader); 746 add_parent(leader);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 1041dab6de2f..14f5f6ea3e72 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -974,6 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
975 ext3_warning(sb, __FUNCTION__, 975 ext3_warning(sb, __FUNCTION__,
976 "multiple resizers run on filesystem!"); 976 "multiple resizers run on filesystem!");
977 unlock_super(sb);
977 err = -EBUSY; 978 err = -EBUSY;
978 goto exit_put; 979 goto exit_put;
979 } 980 }
diff --git a/fs/fifo.c b/fs/fifo.c
index 889f722ee36d..49035b174b48 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -15,30 +15,35 @@
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/pipe_fs_i.h> 16#include <linux/pipe_fs_i.h>
17 17
18static void wait_for_partner(struct inode* inode, unsigned int* cnt) 18static void wait_for_partner(struct inode* inode, unsigned int *cnt)
19{ 19{
20 int cur = *cnt; 20 int cur = *cnt;
21 while(cur == *cnt) { 21
22 pipe_wait(inode); 22 while (cur == *cnt) {
23 if(signal_pending(current)) 23 pipe_wait(inode->i_pipe);
24 if (signal_pending(current))
24 break; 25 break;
25 } 26 }
26} 27}
27 28
28static void wake_up_partner(struct inode* inode) 29static void wake_up_partner(struct inode* inode)
29{ 30{
30 wake_up_interruptible(PIPE_WAIT(*inode)); 31 wake_up_interruptible(&inode->i_pipe->wait);
31} 32}
32 33
33static int fifo_open(struct inode *inode, struct file *filp) 34static int fifo_open(struct inode *inode, struct file *filp)
34{ 35{
36 struct pipe_inode_info *pipe;
35 int ret; 37 int ret;
36 38
37 mutex_lock(PIPE_MUTEX(*inode)); 39 mutex_lock(&inode->i_mutex);
38 if (!inode->i_pipe) { 40 pipe = inode->i_pipe;
41 if (!pipe) {
39 ret = -ENOMEM; 42 ret = -ENOMEM;
40 if(!pipe_new(inode)) 43 pipe = alloc_pipe_info(inode);
44 if (!pipe)
41 goto err_nocleanup; 45 goto err_nocleanup;
46 inode->i_pipe = pipe;
42 } 47 }
43 filp->f_version = 0; 48 filp->f_version = 0;
44 49
@@ -53,18 +58,18 @@ static int fifo_open(struct inode *inode, struct file *filp)
53 * opened, even when there is no process writing the FIFO. 58 * opened, even when there is no process writing the FIFO.
54 */ 59 */
55 filp->f_op = &read_fifo_fops; 60 filp->f_op = &read_fifo_fops;
56 PIPE_RCOUNTER(*inode)++; 61 pipe->r_counter++;
57 if (PIPE_READERS(*inode)++ == 0) 62 if (pipe->readers++ == 0)
58 wake_up_partner(inode); 63 wake_up_partner(inode);
59 64
60 if (!PIPE_WRITERS(*inode)) { 65 if (!pipe->writers) {
61 if ((filp->f_flags & O_NONBLOCK)) { 66 if ((filp->f_flags & O_NONBLOCK)) {
62 /* suppress POLLHUP until we have 67 /* suppress POLLHUP until we have
63 * seen a writer */ 68 * seen a writer */
64 filp->f_version = PIPE_WCOUNTER(*inode); 69 filp->f_version = pipe->w_counter;
65 } else 70 } else
66 { 71 {
67 wait_for_partner(inode, &PIPE_WCOUNTER(*inode)); 72 wait_for_partner(inode, &pipe->w_counter);
68 if(signal_pending(current)) 73 if(signal_pending(current))
69 goto err_rd; 74 goto err_rd;
70 } 75 }
@@ -78,16 +83,16 @@ static int fifo_open(struct inode *inode, struct file *filp)
78 * errno=ENXIO when there is no process reading the FIFO. 83 * errno=ENXIO when there is no process reading the FIFO.
79 */ 84 */
80 ret = -ENXIO; 85 ret = -ENXIO;
81 if ((filp->f_flags & O_NONBLOCK) && !PIPE_READERS(*inode)) 86 if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
82 goto err; 87 goto err;
83 88
84 filp->f_op = &write_fifo_fops; 89 filp->f_op = &write_fifo_fops;
85 PIPE_WCOUNTER(*inode)++; 90 pipe->w_counter++;
86 if (!PIPE_WRITERS(*inode)++) 91 if (!pipe->writers++)
87 wake_up_partner(inode); 92 wake_up_partner(inode);
88 93
89 if (!PIPE_READERS(*inode)) { 94 if (!pipe->readers) {
90 wait_for_partner(inode, &PIPE_RCOUNTER(*inode)); 95 wait_for_partner(inode, &pipe->r_counter);
91 if (signal_pending(current)) 96 if (signal_pending(current))
92 goto err_wr; 97 goto err_wr;
93 } 98 }
@@ -102,11 +107,11 @@ static int fifo_open(struct inode *inode, struct file *filp)
102 */ 107 */
103 filp->f_op = &rdwr_fifo_fops; 108 filp->f_op = &rdwr_fifo_fops;
104 109
105 PIPE_READERS(*inode)++; 110 pipe->readers++;
106 PIPE_WRITERS(*inode)++; 111 pipe->writers++;
107 PIPE_RCOUNTER(*inode)++; 112 pipe->r_counter++;
108 PIPE_WCOUNTER(*inode)++; 113 pipe->w_counter++;
109 if (PIPE_READERS(*inode) == 1 || PIPE_WRITERS(*inode) == 1) 114 if (pipe->readers == 1 || pipe->writers == 1)
110 wake_up_partner(inode); 115 wake_up_partner(inode);
111 break; 116 break;
112 117
@@ -116,27 +121,27 @@ static int fifo_open(struct inode *inode, struct file *filp)
116 } 121 }
117 122
118 /* Ok! */ 123 /* Ok! */
119 mutex_unlock(PIPE_MUTEX(*inode)); 124 mutex_unlock(&inode->i_mutex);
120 return 0; 125 return 0;
121 126
122err_rd: 127err_rd:
123 if (!--PIPE_READERS(*inode)) 128 if (!--pipe->readers)
124 wake_up_interruptible(PIPE_WAIT(*inode)); 129 wake_up_interruptible(&pipe->wait);
125 ret = -ERESTARTSYS; 130 ret = -ERESTARTSYS;
126 goto err; 131 goto err;
127 132
128err_wr: 133err_wr:
129 if (!--PIPE_WRITERS(*inode)) 134 if (!--pipe->writers)
130 wake_up_interruptible(PIPE_WAIT(*inode)); 135 wake_up_interruptible(&pipe->wait);
131 ret = -ERESTARTSYS; 136 ret = -ERESTARTSYS;
132 goto err; 137 goto err;
133 138
134err: 139err:
135 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) 140 if (!pipe->readers && !pipe->writers)
136 free_pipe_info(inode); 141 free_pipe_info(inode);
137 142
138err_nocleanup: 143err_nocleanup:
139 mutex_unlock(PIPE_MUTEX(*inode)); 144 mutex_unlock(&inode->i_mutex);
140 return ret; 145 return ret;
141} 146}
142 147
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 23d1f52eb1b8..6c740f860665 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -23,13 +23,11 @@ static kmem_cache_t *fuse_req_cachep;
23 23
24static struct fuse_conn *fuse_get_conn(struct file *file) 24static struct fuse_conn *fuse_get_conn(struct file *file)
25{ 25{
26 struct fuse_conn *fc; 26 /*
27 spin_lock(&fuse_lock); 27 * Lockless access is OK, because file->private data is set
28 fc = file->private_data; 28 * once during mount and is valid until the file is released.
29 if (fc && !fc->connected) 29 */
30 fc = NULL; 30 return file->private_data;
31 spin_unlock(&fuse_lock);
32 return fc;
33} 31}
34 32
35static void fuse_request_init(struct fuse_req *req) 33static void fuse_request_init(struct fuse_req *req)
@@ -74,10 +72,8 @@ static void restore_sigs(sigset_t *oldset)
74 */ 72 */
75void fuse_reset_request(struct fuse_req *req) 73void fuse_reset_request(struct fuse_req *req)
76{ 74{
77 int preallocated = req->preallocated;
78 BUG_ON(atomic_read(&req->count) != 1); 75 BUG_ON(atomic_read(&req->count) != 1);
79 fuse_request_init(req); 76 fuse_request_init(req);
80 req->preallocated = preallocated;
81} 77}
82 78
83static void __fuse_get_request(struct fuse_req *req) 79static void __fuse_get_request(struct fuse_req *req)
@@ -92,80 +88,52 @@ static void __fuse_put_request(struct fuse_req *req)
92 atomic_dec(&req->count); 88 atomic_dec(&req->count);
93} 89}
94 90
95static struct fuse_req *do_get_request(struct fuse_conn *fc) 91struct fuse_req *fuse_get_req(struct fuse_conn *fc)
96{ 92{
97 struct fuse_req *req; 93 struct fuse_req *req;
98
99 spin_lock(&fuse_lock);
100 BUG_ON(list_empty(&fc->unused_list));
101 req = list_entry(fc->unused_list.next, struct fuse_req, list);
102 list_del_init(&req->list);
103 spin_unlock(&fuse_lock);
104 fuse_request_init(req);
105 req->preallocated = 1;
106 req->in.h.uid = current->fsuid;
107 req->in.h.gid = current->fsgid;
108 req->in.h.pid = current->pid;
109 return req;
110}
111
112/* This can return NULL, but only in case it's interrupted by a SIGKILL */
113struct fuse_req *fuse_get_request(struct fuse_conn *fc)
114{
115 int intr;
116 sigset_t oldset; 94 sigset_t oldset;
95 int err;
117 96
118 atomic_inc(&fc->num_waiting);
119 block_sigs(&oldset); 97 block_sigs(&oldset);
120 intr = down_interruptible(&fc->outstanding_sem); 98 err = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
121 restore_sigs(&oldset); 99 restore_sigs(&oldset);
122 if (intr) { 100 if (err)
123 atomic_dec(&fc->num_waiting); 101 return ERR_PTR(-EINTR);
124 return NULL;
125 }
126 return do_get_request(fc);
127}
128 102
129/* Must be called with fuse_lock held */ 103 req = fuse_request_alloc();
130static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) 104 if (!req)
131{ 105 return ERR_PTR(-ENOMEM);
132 if (req->preallocated) {
133 atomic_dec(&fc->num_waiting);
134 list_add(&req->list, &fc->unused_list);
135 } else
136 fuse_request_free(req);
137 106
138 /* If we are in debt decrease that first */ 107 atomic_inc(&fc->num_waiting);
139 if (fc->outstanding_debt) 108 fuse_request_init(req);
140 fc->outstanding_debt--; 109 req->in.h.uid = current->fsuid;
141 else 110 req->in.h.gid = current->fsgid;
142 up(&fc->outstanding_sem); 111 req->in.h.pid = current->pid;
112 return req;
143} 113}
144 114
145void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 115void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
146{ 116{
147 if (atomic_dec_and_test(&req->count)) { 117 if (atomic_dec_and_test(&req->count)) {
148 spin_lock(&fuse_lock); 118 atomic_dec(&fc->num_waiting);
149 fuse_putback_request(fc, req); 119 fuse_request_free(req);
150 spin_unlock(&fuse_lock);
151 } 120 }
152} 121}
153 122
154static void fuse_put_request_locked(struct fuse_conn *fc, struct fuse_req *req) 123void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
155{
156 if (atomic_dec_and_test(&req->count))
157 fuse_putback_request(fc, req);
158}
159
160void fuse_release_background(struct fuse_req *req)
161{ 124{
162 iput(req->inode); 125 iput(req->inode);
163 iput(req->inode2); 126 iput(req->inode2);
164 if (req->file) 127 if (req->file)
165 fput(req->file); 128 fput(req->file);
166 spin_lock(&fuse_lock); 129 spin_lock(&fc->lock);
167 list_del(&req->bg_entry); 130 list_del(&req->bg_entry);
168 spin_unlock(&fuse_lock); 131 if (fc->num_background == FUSE_MAX_BACKGROUND) {
132 fc->blocked = 0;
133 wake_up_all(&fc->blocked_waitq);
134 }
135 fc->num_background--;
136 spin_unlock(&fc->lock);
169} 137}
170 138
171/* 139/*
@@ -184,23 +152,23 @@ void fuse_release_background(struct fuse_req *req)
184 * interrupted and put in the background, it will return with an error 152 * interrupted and put in the background, it will return with an error
185 * and hence never be reset and reused. 153 * and hence never be reset and reused.
186 * 154 *
187 * Called with fuse_lock, unlocks it 155 * Called with fc->lock, unlocks it
188 */ 156 */
189static void request_end(struct fuse_conn *fc, struct fuse_req *req) 157static void request_end(struct fuse_conn *fc, struct fuse_req *req)
190{ 158{
191 list_del(&req->list); 159 list_del(&req->list);
192 req->state = FUSE_REQ_FINISHED; 160 req->state = FUSE_REQ_FINISHED;
193 if (!req->background) { 161 if (!req->background) {
162 spin_unlock(&fc->lock);
194 wake_up(&req->waitq); 163 wake_up(&req->waitq);
195 fuse_put_request_locked(fc, req); 164 fuse_put_request(fc, req);
196 spin_unlock(&fuse_lock);
197 } else { 165 } else {
198 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 166 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
199 req->end = NULL; 167 req->end = NULL;
200 spin_unlock(&fuse_lock); 168 spin_unlock(&fc->lock);
201 down_read(&fc->sbput_sem); 169 down_read(&fc->sbput_sem);
202 if (fc->mounted) 170 if (fc->mounted)
203 fuse_release_background(req); 171 fuse_release_background(fc, req);
204 up_read(&fc->sbput_sem); 172 up_read(&fc->sbput_sem);
205 if (end) 173 if (end)
206 end(fc, req); 174 end(fc, req);
@@ -242,6 +210,9 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
242{ 210{
243 req->background = 1; 211 req->background = 1;
244 list_add(&req->bg_entry, &fc->background); 212 list_add(&req->bg_entry, &fc->background);
213 fc->num_background++;
214 if (fc->num_background == FUSE_MAX_BACKGROUND)
215 fc->blocked = 1;
245 if (req->inode) 216 if (req->inode)
246 req->inode = igrab(req->inode); 217 req->inode = igrab(req->inode);
247 if (req->inode2) 218 if (req->inode2)
@@ -250,16 +221,16 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
250 get_file(req->file); 221 get_file(req->file);
251} 222}
252 223
253/* Called with fuse_lock held. Releases, and then reacquires it. */ 224/* Called with fc->lock held. Releases, and then reacquires it. */
254static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 225static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
255{ 226{
256 sigset_t oldset; 227 sigset_t oldset;
257 228
258 spin_unlock(&fuse_lock); 229 spin_unlock(&fc->lock);
259 block_sigs(&oldset); 230 block_sigs(&oldset);
260 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); 231 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
261 restore_sigs(&oldset); 232 restore_sigs(&oldset);
262 spin_lock(&fuse_lock); 233 spin_lock(&fc->lock);
263 if (req->state == FUSE_REQ_FINISHED && !req->interrupted) 234 if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
264 return; 235 return;
265 236
@@ -273,9 +244,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
273 locked state, there mustn't be any filesystem 244 locked state, there mustn't be any filesystem
274 operation (e.g. page fault), since that could lead 245 operation (e.g. page fault), since that could lead
275 to deadlock */ 246 to deadlock */
276 spin_unlock(&fuse_lock); 247 spin_unlock(&fc->lock);
277 wait_event(req->waitq, !req->locked); 248 wait_event(req->waitq, !req->locked);
278 spin_lock(&fuse_lock); 249 spin_lock(&fc->lock);
279 } 250 }
280 if (req->state == FUSE_REQ_PENDING) { 251 if (req->state == FUSE_REQ_PENDING) {
281 list_del(&req->list); 252 list_del(&req->list);
@@ -304,19 +275,10 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
304 req->in.h.unique = fc->reqctr; 275 req->in.h.unique = fc->reqctr;
305 req->in.h.len = sizeof(struct fuse_in_header) + 276 req->in.h.len = sizeof(struct fuse_in_header) +
306 len_args(req->in.numargs, (struct fuse_arg *) req->in.args); 277 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
307 if (!req->preallocated) {
308 /* If request is not preallocated (either FORGET or
309 RELEASE), then still decrease outstanding_sem, so
310 user can't open infinite number of files while not
311 processing the RELEASE requests. However for
312 efficiency do it without blocking, so if down()
313 would block, just increase the debt instead */
314 if (down_trylock(&fc->outstanding_sem))
315 fc->outstanding_debt++;
316 }
317 list_add_tail(&req->list, &fc->pending); 278 list_add_tail(&req->list, &fc->pending);
318 req->state = FUSE_REQ_PENDING; 279 req->state = FUSE_REQ_PENDING;
319 wake_up(&fc->waitq); 280 wake_up(&fc->waitq);
281 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
320} 282}
321 283
322/* 284/*
@@ -325,7 +287,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
325void request_send(struct fuse_conn *fc, struct fuse_req *req) 287void request_send(struct fuse_conn *fc, struct fuse_req *req)
326{ 288{
327 req->isreply = 1; 289 req->isreply = 1;
328 spin_lock(&fuse_lock); 290 spin_lock(&fc->lock);
329 if (!fc->connected) 291 if (!fc->connected)
330 req->out.h.error = -ENOTCONN; 292 req->out.h.error = -ENOTCONN;
331 else if (fc->conn_error) 293 else if (fc->conn_error)
@@ -338,15 +300,16 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req)
338 300
339 request_wait_answer(fc, req); 301 request_wait_answer(fc, req);
340 } 302 }
341 spin_unlock(&fuse_lock); 303 spin_unlock(&fc->lock);
342} 304}
343 305
344static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) 306static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
345{ 307{
346 spin_lock(&fuse_lock); 308 spin_lock(&fc->lock);
309 background_request(fc, req);
347 if (fc->connected) { 310 if (fc->connected) {
348 queue_request(fc, req); 311 queue_request(fc, req);
349 spin_unlock(&fuse_lock); 312 spin_unlock(&fc->lock);
350 } else { 313 } else {
351 req->out.h.error = -ENOTCONN; 314 req->out.h.error = -ENOTCONN;
352 request_end(fc, req); 315 request_end(fc, req);
@@ -362,9 +325,6 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
362void request_send_background(struct fuse_conn *fc, struct fuse_req *req) 325void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
363{ 326{
364 req->isreply = 1; 327 req->isreply = 1;
365 spin_lock(&fuse_lock);
366 background_request(fc, req);
367 spin_unlock(&fuse_lock);
368 request_send_nowait(fc, req); 328 request_send_nowait(fc, req);
369} 329}
370 330
@@ -373,16 +333,16 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
373 * anything that could cause a page-fault. If the request was already 333 * anything that could cause a page-fault. If the request was already
374 * interrupted bail out. 334 * interrupted bail out.
375 */ 335 */
376static int lock_request(struct fuse_req *req) 336static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
377{ 337{
378 int err = 0; 338 int err = 0;
379 if (req) { 339 if (req) {
380 spin_lock(&fuse_lock); 340 spin_lock(&fc->lock);
381 if (req->interrupted) 341 if (req->interrupted)
382 err = -ENOENT; 342 err = -ENOENT;
383 else 343 else
384 req->locked = 1; 344 req->locked = 1;
385 spin_unlock(&fuse_lock); 345 spin_unlock(&fc->lock);
386 } 346 }
387 return err; 347 return err;
388} 348}
@@ -392,18 +352,19 @@ static int lock_request(struct fuse_req *req)
392 * requester thread is currently waiting for it to be unlocked, so 352 * requester thread is currently waiting for it to be unlocked, so
393 * wake it up. 353 * wake it up.
394 */ 354 */
395static void unlock_request(struct fuse_req *req) 355static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
396{ 356{
397 if (req) { 357 if (req) {
398 spin_lock(&fuse_lock); 358 spin_lock(&fc->lock);
399 req->locked = 0; 359 req->locked = 0;
400 if (req->interrupted) 360 if (req->interrupted)
401 wake_up(&req->waitq); 361 wake_up(&req->waitq);
402 spin_unlock(&fuse_lock); 362 spin_unlock(&fc->lock);
403 } 363 }
404} 364}
405 365
406struct fuse_copy_state { 366struct fuse_copy_state {
367 struct fuse_conn *fc;
407 int write; 368 int write;
408 struct fuse_req *req; 369 struct fuse_req *req;
409 const struct iovec *iov; 370 const struct iovec *iov;
@@ -416,11 +377,12 @@ struct fuse_copy_state {
416 unsigned len; 377 unsigned len;
417}; 378};
418 379
419static void fuse_copy_init(struct fuse_copy_state *cs, int write, 380static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
420 struct fuse_req *req, const struct iovec *iov, 381 int write, struct fuse_req *req,
421 unsigned long nr_segs) 382 const struct iovec *iov, unsigned long nr_segs)
422{ 383{
423 memset(cs, 0, sizeof(*cs)); 384 memset(cs, 0, sizeof(*cs));
385 cs->fc = fc;
424 cs->write = write; 386 cs->write = write;
425 cs->req = req; 387 cs->req = req;
426 cs->iov = iov; 388 cs->iov = iov;
@@ -450,7 +412,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
450 unsigned long offset; 412 unsigned long offset;
451 int err; 413 int err;
452 414
453 unlock_request(cs->req); 415 unlock_request(cs->fc, cs->req);
454 fuse_copy_finish(cs); 416 fuse_copy_finish(cs);
455 if (!cs->seglen) { 417 if (!cs->seglen) {
456 BUG_ON(!cs->nr_segs); 418 BUG_ON(!cs->nr_segs);
@@ -473,7 +435,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
473 cs->seglen -= cs->len; 435 cs->seglen -= cs->len;
474 cs->addr += cs->len; 436 cs->addr += cs->len;
475 437
476 return lock_request(cs->req); 438 return lock_request(cs->fc, cs->req);
477} 439}
478 440
479/* Do as much copy to/from userspace buffer as we can */ 441/* Do as much copy to/from userspace buffer as we can */
@@ -585,9 +547,9 @@ static void request_wait(struct fuse_conn *fc)
585 if (signal_pending(current)) 547 if (signal_pending(current))
586 break; 548 break;
587 549
588 spin_unlock(&fuse_lock); 550 spin_unlock(&fc->lock);
589 schedule(); 551 schedule();
590 spin_lock(&fuse_lock); 552 spin_lock(&fc->lock);
591 } 553 }
592 set_current_state(TASK_RUNNING); 554 set_current_state(TASK_RUNNING);
593 remove_wait_queue(&fc->waitq, &wait); 555 remove_wait_queue(&fc->waitq, &wait);
@@ -606,18 +568,21 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
606 unsigned long nr_segs, loff_t *off) 568 unsigned long nr_segs, loff_t *off)
607{ 569{
608 int err; 570 int err;
609 struct fuse_conn *fc;
610 struct fuse_req *req; 571 struct fuse_req *req;
611 struct fuse_in *in; 572 struct fuse_in *in;
612 struct fuse_copy_state cs; 573 struct fuse_copy_state cs;
613 unsigned reqsize; 574 unsigned reqsize;
575 struct fuse_conn *fc = fuse_get_conn(file);
576 if (!fc)
577 return -EPERM;
614 578
615 restart: 579 restart:
616 spin_lock(&fuse_lock); 580 spin_lock(&fc->lock);
617 fc = file->private_data; 581 err = -EAGAIN;
618 err = -EPERM; 582 if ((file->f_flags & O_NONBLOCK) && fc->connected &&
619 if (!fc) 583 list_empty(&fc->pending))
620 goto err_unlock; 584 goto err_unlock;
585
621 request_wait(fc); 586 request_wait(fc);
622 err = -ENODEV; 587 err = -ENODEV;
623 if (!fc->connected) 588 if (!fc->connected)
@@ -641,14 +606,14 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
641 request_end(fc, req); 606 request_end(fc, req);
642 goto restart; 607 goto restart;
643 } 608 }
644 spin_unlock(&fuse_lock); 609 spin_unlock(&fc->lock);
645 fuse_copy_init(&cs, 1, req, iov, nr_segs); 610 fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
646 err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); 611 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
647 if (!err) 612 if (!err)
648 err = fuse_copy_args(&cs, in->numargs, in->argpages, 613 err = fuse_copy_args(&cs, in->numargs, in->argpages,
649 (struct fuse_arg *) in->args, 0); 614 (struct fuse_arg *) in->args, 0);
650 fuse_copy_finish(&cs); 615 fuse_copy_finish(&cs);
651 spin_lock(&fuse_lock); 616 spin_lock(&fc->lock);
652 req->locked = 0; 617 req->locked = 0;
653 if (!err && req->interrupted) 618 if (!err && req->interrupted)
654 err = -ENOENT; 619 err = -ENOENT;
@@ -663,12 +628,12 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
663 else { 628 else {
664 req->state = FUSE_REQ_SENT; 629 req->state = FUSE_REQ_SENT;
665 list_move_tail(&req->list, &fc->processing); 630 list_move_tail(&req->list, &fc->processing);
666 spin_unlock(&fuse_lock); 631 spin_unlock(&fc->lock);
667 } 632 }
668 return reqsize; 633 return reqsize;
669 634
670 err_unlock: 635 err_unlock:
671 spin_unlock(&fuse_lock); 636 spin_unlock(&fc->lock);
672 return err; 637 return err;
673} 638}
674 639
@@ -735,9 +700,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
735 struct fuse_copy_state cs; 700 struct fuse_copy_state cs;
736 struct fuse_conn *fc = fuse_get_conn(file); 701 struct fuse_conn *fc = fuse_get_conn(file);
737 if (!fc) 702 if (!fc)
738 return -ENODEV; 703 return -EPERM;
739 704
740 fuse_copy_init(&cs, 0, NULL, iov, nr_segs); 705 fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
741 if (nbytes < sizeof(struct fuse_out_header)) 706 if (nbytes < sizeof(struct fuse_out_header))
742 return -EINVAL; 707 return -EINVAL;
743 708
@@ -749,7 +714,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
749 oh.len != nbytes) 714 oh.len != nbytes)
750 goto err_finish; 715 goto err_finish;
751 716
752 spin_lock(&fuse_lock); 717 spin_lock(&fc->lock);
753 err = -ENOENT; 718 err = -ENOENT;
754 if (!fc->connected) 719 if (!fc->connected)
755 goto err_unlock; 720 goto err_unlock;
@@ -760,9 +725,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
760 goto err_unlock; 725 goto err_unlock;
761 726
762 if (req->interrupted) { 727 if (req->interrupted) {
763 spin_unlock(&fuse_lock); 728 spin_unlock(&fc->lock);
764 fuse_copy_finish(&cs); 729 fuse_copy_finish(&cs);
765 spin_lock(&fuse_lock); 730 spin_lock(&fc->lock);
766 request_end(fc, req); 731 request_end(fc, req);
767 return -ENOENT; 732 return -ENOENT;
768 } 733 }
@@ -770,12 +735,12 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
770 req->out.h = oh; 735 req->out.h = oh;
771 req->locked = 1; 736 req->locked = 1;
772 cs.req = req; 737 cs.req = req;
773 spin_unlock(&fuse_lock); 738 spin_unlock(&fc->lock);
774 739
775 err = copy_out_args(&cs, &req->out, nbytes); 740 err = copy_out_args(&cs, &req->out, nbytes);
776 fuse_copy_finish(&cs); 741 fuse_copy_finish(&cs);
777 742
778 spin_lock(&fuse_lock); 743 spin_lock(&fc->lock);
779 req->locked = 0; 744 req->locked = 0;
780 if (!err) { 745 if (!err) {
781 if (req->interrupted) 746 if (req->interrupted)
@@ -787,7 +752,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
787 return err ? err : nbytes; 752 return err ? err : nbytes;
788 753
789 err_unlock: 754 err_unlock:
790 spin_unlock(&fuse_lock); 755 spin_unlock(&fc->lock);
791 err_finish: 756 err_finish:
792 fuse_copy_finish(&cs); 757 fuse_copy_finish(&cs);
793 return err; 758 return err;
@@ -804,18 +769,19 @@ static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
804 769
805static unsigned fuse_dev_poll(struct file *file, poll_table *wait) 770static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
806{ 771{
807 struct fuse_conn *fc = fuse_get_conn(file);
808 unsigned mask = POLLOUT | POLLWRNORM; 772 unsigned mask = POLLOUT | POLLWRNORM;
809 773 struct fuse_conn *fc = fuse_get_conn(file);
810 if (!fc) 774 if (!fc)
811 return -ENODEV; 775 return POLLERR;
812 776
813 poll_wait(file, &fc->waitq, wait); 777 poll_wait(file, &fc->waitq, wait);
814 778
815 spin_lock(&fuse_lock); 779 spin_lock(&fc->lock);
816 if (!list_empty(&fc->pending)) 780 if (!fc->connected)
817 mask |= POLLIN | POLLRDNORM; 781 mask = POLLERR;
818 spin_unlock(&fuse_lock); 782 else if (!list_empty(&fc->pending))
783 mask |= POLLIN | POLLRDNORM;
784 spin_unlock(&fc->lock);
819 785
820 return mask; 786 return mask;
821} 787}
@@ -823,7 +789,7 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
823/* 789/*
824 * Abort all requests on the given list (pending or processing) 790 * Abort all requests on the given list (pending or processing)
825 * 791 *
826 * This function releases and reacquires fuse_lock 792 * This function releases and reacquires fc->lock
827 */ 793 */
828static void end_requests(struct fuse_conn *fc, struct list_head *head) 794static void end_requests(struct fuse_conn *fc, struct list_head *head)
829{ 795{
@@ -832,7 +798,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
832 req = list_entry(head->next, struct fuse_req, list); 798 req = list_entry(head->next, struct fuse_req, list);
833 req->out.h.error = -ECONNABORTED; 799 req->out.h.error = -ECONNABORTED;
834 request_end(fc, req); 800 request_end(fc, req);
835 spin_lock(&fuse_lock); 801 spin_lock(&fc->lock);
836 } 802 }
837} 803}
838 804
@@ -863,10 +829,10 @@ static void end_io_requests(struct fuse_conn *fc)
863 req->end = NULL; 829 req->end = NULL;
864 /* The end function will consume this reference */ 830 /* The end function will consume this reference */
865 __fuse_get_request(req); 831 __fuse_get_request(req);
866 spin_unlock(&fuse_lock); 832 spin_unlock(&fc->lock);
867 wait_event(req->waitq, !req->locked); 833 wait_event(req->waitq, !req->locked);
868 end(fc, req); 834 end(fc, req);
869 spin_lock(&fuse_lock); 835 spin_lock(&fc->lock);
870 } 836 }
871 } 837 }
872} 838}
@@ -893,35 +859,44 @@ static void end_io_requests(struct fuse_conn *fc)
893 */ 859 */
894void fuse_abort_conn(struct fuse_conn *fc) 860void fuse_abort_conn(struct fuse_conn *fc)
895{ 861{
896 spin_lock(&fuse_lock); 862 spin_lock(&fc->lock);
897 if (fc->connected) { 863 if (fc->connected) {
898 fc->connected = 0; 864 fc->connected = 0;
899 end_io_requests(fc); 865 end_io_requests(fc);
900 end_requests(fc, &fc->pending); 866 end_requests(fc, &fc->pending);
901 end_requests(fc, &fc->processing); 867 end_requests(fc, &fc->processing);
902 wake_up_all(&fc->waitq); 868 wake_up_all(&fc->waitq);
869 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
903 } 870 }
904 spin_unlock(&fuse_lock); 871 spin_unlock(&fc->lock);
905} 872}
906 873
907static int fuse_dev_release(struct inode *inode, struct file *file) 874static int fuse_dev_release(struct inode *inode, struct file *file)
908{ 875{
909 struct fuse_conn *fc; 876 struct fuse_conn *fc = fuse_get_conn(file);
910
911 spin_lock(&fuse_lock);
912 fc = file->private_data;
913 if (fc) { 877 if (fc) {
878 spin_lock(&fc->lock);
914 fc->connected = 0; 879 fc->connected = 0;
915 end_requests(fc, &fc->pending); 880 end_requests(fc, &fc->pending);
916 end_requests(fc, &fc->processing); 881 end_requests(fc, &fc->processing);
917 } 882 spin_unlock(&fc->lock);
918 spin_unlock(&fuse_lock); 883 fasync_helper(-1, file, 0, &fc->fasync);
919 if (fc)
920 kobject_put(&fc->kobj); 884 kobject_put(&fc->kobj);
885 }
921 886
922 return 0; 887 return 0;
923} 888}
924 889
890static int fuse_dev_fasync(int fd, struct file *file, int on)
891{
892 struct fuse_conn *fc = fuse_get_conn(file);
893 if (!fc)
894 return -EPERM;
895
896 /* No locking - fasync_helper does its own locking */
897 return fasync_helper(fd, file, on, &fc->fasync);
898}
899
925const struct file_operations fuse_dev_operations = { 900const struct file_operations fuse_dev_operations = {
926 .owner = THIS_MODULE, 901 .owner = THIS_MODULE,
927 .llseek = no_llseek, 902 .llseek = no_llseek,
@@ -931,6 +906,7 @@ const struct file_operations fuse_dev_operations = {
931 .writev = fuse_dev_writev, 906 .writev = fuse_dev_writev,
932 .poll = fuse_dev_poll, 907 .poll = fuse_dev_poll,
933 .release = fuse_dev_release, 908 .release = fuse_dev_release,
909 .fasync = fuse_dev_fasync,
934}; 910};
935 911
936static struct miscdevice fuse_miscdevice = { 912static struct miscdevice fuse_miscdevice = {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 256355b80256..8d7546e832e8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -117,8 +117,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
117 return 0; 117 return 0;
118 118
119 fc = get_fuse_conn(inode); 119 fc = get_fuse_conn(inode);
120 req = fuse_get_request(fc); 120 req = fuse_get_req(fc);
121 if (!req) 121 if (IS_ERR(req))
122 return 0; 122 return 0;
123 123
124 fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg); 124 fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
@@ -188,9 +188,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
188 if (entry->d_name.len > FUSE_NAME_MAX) 188 if (entry->d_name.len > FUSE_NAME_MAX)
189 return ERR_PTR(-ENAMETOOLONG); 189 return ERR_PTR(-ENAMETOOLONG);
190 190
191 req = fuse_get_request(fc); 191 req = fuse_get_req(fc);
192 if (!req) 192 if (IS_ERR(req))
193 return ERR_PTR(-EINTR); 193 return ERR_PTR(PTR_ERR(req));
194 194
195 fuse_lookup_init(req, dir, entry, &outarg); 195 fuse_lookup_init(req, dir, entry, &outarg);
196 request_send(fc, req); 196 request_send(fc, req);
@@ -244,15 +244,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
244 struct file *file; 244 struct file *file;
245 int flags = nd->intent.open.flags - 1; 245 int flags = nd->intent.open.flags - 1;
246 246
247 err = -ENOSYS;
248 if (fc->no_create) 247 if (fc->no_create)
249 goto out; 248 return -ENOSYS;
250 249
251 err = -EINTR; 250 req = fuse_get_req(fc);
252 req = fuse_get_request(fc); 251 if (IS_ERR(req))
253 if (!req) 252 return PTR_ERR(req);
254 goto out;
255 253
254 err = -ENOMEM;
256 ff = fuse_file_alloc(); 255 ff = fuse_file_alloc();
257 if (!ff) 256 if (!ff)
258 goto out_put_request; 257 goto out_put_request;
@@ -314,7 +313,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
314 fuse_file_free(ff); 313 fuse_file_free(ff);
315 out_put_request: 314 out_put_request:
316 fuse_put_request(fc, req); 315 fuse_put_request(fc, req);
317 out:
318 return err; 316 return err;
319} 317}
320 318
@@ -375,9 +373,9 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
375{ 373{
376 struct fuse_mknod_in inarg; 374 struct fuse_mknod_in inarg;
377 struct fuse_conn *fc = get_fuse_conn(dir); 375 struct fuse_conn *fc = get_fuse_conn(dir);
378 struct fuse_req *req = fuse_get_request(fc); 376 struct fuse_req *req = fuse_get_req(fc);
379 if (!req) 377 if (IS_ERR(req))
380 return -EINTR; 378 return PTR_ERR(req);
381 379
382 memset(&inarg, 0, sizeof(inarg)); 380 memset(&inarg, 0, sizeof(inarg));
383 inarg.mode = mode; 381 inarg.mode = mode;
@@ -407,9 +405,9 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
407{ 405{
408 struct fuse_mkdir_in inarg; 406 struct fuse_mkdir_in inarg;
409 struct fuse_conn *fc = get_fuse_conn(dir); 407 struct fuse_conn *fc = get_fuse_conn(dir);
410 struct fuse_req *req = fuse_get_request(fc); 408 struct fuse_req *req = fuse_get_req(fc);
411 if (!req) 409 if (IS_ERR(req))
412 return -EINTR; 410 return PTR_ERR(req);
413 411
414 memset(&inarg, 0, sizeof(inarg)); 412 memset(&inarg, 0, sizeof(inarg));
415 inarg.mode = mode; 413 inarg.mode = mode;
@@ -427,9 +425,9 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
427{ 425{
428 struct fuse_conn *fc = get_fuse_conn(dir); 426 struct fuse_conn *fc = get_fuse_conn(dir);
429 unsigned len = strlen(link) + 1; 427 unsigned len = strlen(link) + 1;
430 struct fuse_req *req = fuse_get_request(fc); 428 struct fuse_req *req = fuse_get_req(fc);
431 if (!req) 429 if (IS_ERR(req))
432 return -EINTR; 430 return PTR_ERR(req);
433 431
434 req->in.h.opcode = FUSE_SYMLINK; 432 req->in.h.opcode = FUSE_SYMLINK;
435 req->in.numargs = 2; 433 req->in.numargs = 2;
@@ -444,9 +442,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
444{ 442{
445 int err; 443 int err;
446 struct fuse_conn *fc = get_fuse_conn(dir); 444 struct fuse_conn *fc = get_fuse_conn(dir);
447 struct fuse_req *req = fuse_get_request(fc); 445 struct fuse_req *req = fuse_get_req(fc);
448 if (!req) 446 if (IS_ERR(req))
449 return -EINTR; 447 return PTR_ERR(req);
450 448
451 req->in.h.opcode = FUSE_UNLINK; 449 req->in.h.opcode = FUSE_UNLINK;
452 req->in.h.nodeid = get_node_id(dir); 450 req->in.h.nodeid = get_node_id(dir);
@@ -476,9 +474,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
476{ 474{
477 int err; 475 int err;
478 struct fuse_conn *fc = get_fuse_conn(dir); 476 struct fuse_conn *fc = get_fuse_conn(dir);
479 struct fuse_req *req = fuse_get_request(fc); 477 struct fuse_req *req = fuse_get_req(fc);
480 if (!req) 478 if (IS_ERR(req))
481 return -EINTR; 479 return PTR_ERR(req);
482 480
483 req->in.h.opcode = FUSE_RMDIR; 481 req->in.h.opcode = FUSE_RMDIR;
484 req->in.h.nodeid = get_node_id(dir); 482 req->in.h.nodeid = get_node_id(dir);
@@ -504,9 +502,9 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
504 int err; 502 int err;
505 struct fuse_rename_in inarg; 503 struct fuse_rename_in inarg;
506 struct fuse_conn *fc = get_fuse_conn(olddir); 504 struct fuse_conn *fc = get_fuse_conn(olddir);
507 struct fuse_req *req = fuse_get_request(fc); 505 struct fuse_req *req = fuse_get_req(fc);
508 if (!req) 506 if (IS_ERR(req))
509 return -EINTR; 507 return PTR_ERR(req);
510 508
511 memset(&inarg, 0, sizeof(inarg)); 509 memset(&inarg, 0, sizeof(inarg));
512 inarg.newdir = get_node_id(newdir); 510 inarg.newdir = get_node_id(newdir);
@@ -553,9 +551,9 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
553 struct fuse_link_in inarg; 551 struct fuse_link_in inarg;
554 struct inode *inode = entry->d_inode; 552 struct inode *inode = entry->d_inode;
555 struct fuse_conn *fc = get_fuse_conn(inode); 553 struct fuse_conn *fc = get_fuse_conn(inode);
556 struct fuse_req *req = fuse_get_request(fc); 554 struct fuse_req *req = fuse_get_req(fc);
557 if (!req) 555 if (IS_ERR(req))
558 return -EINTR; 556 return PTR_ERR(req);
559 557
560 memset(&inarg, 0, sizeof(inarg)); 558 memset(&inarg, 0, sizeof(inarg));
561 inarg.oldnodeid = get_node_id(inode); 559 inarg.oldnodeid = get_node_id(inode);
@@ -583,9 +581,9 @@ int fuse_do_getattr(struct inode *inode)
583 int err; 581 int err;
584 struct fuse_attr_out arg; 582 struct fuse_attr_out arg;
585 struct fuse_conn *fc = get_fuse_conn(inode); 583 struct fuse_conn *fc = get_fuse_conn(inode);
586 struct fuse_req *req = fuse_get_request(fc); 584 struct fuse_req *req = fuse_get_req(fc);
587 if (!req) 585 if (IS_ERR(req))
588 return -EINTR; 586 return PTR_ERR(req);
589 587
590 req->in.h.opcode = FUSE_GETATTR; 588 req->in.h.opcode = FUSE_GETATTR;
591 req->in.h.nodeid = get_node_id(inode); 589 req->in.h.nodeid = get_node_id(inode);
@@ -673,9 +671,9 @@ static int fuse_access(struct inode *inode, int mask)
673 if (fc->no_access) 671 if (fc->no_access)
674 return 0; 672 return 0;
675 673
676 req = fuse_get_request(fc); 674 req = fuse_get_req(fc);
677 if (!req) 675 if (IS_ERR(req))
678 return -EINTR; 676 return PTR_ERR(req);
679 677
680 memset(&inarg, 0, sizeof(inarg)); 678 memset(&inarg, 0, sizeof(inarg));
681 inarg.mask = mask; 679 inarg.mask = mask;
@@ -780,9 +778,9 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
780 if (is_bad_inode(inode)) 778 if (is_bad_inode(inode))
781 return -EIO; 779 return -EIO;
782 780
783 req = fuse_get_request(fc); 781 req = fuse_get_req(fc);
784 if (!req) 782 if (IS_ERR(req))
785 return -EINTR; 783 return PTR_ERR(req);
786 784
787 page = alloc_page(GFP_KERNEL); 785 page = alloc_page(GFP_KERNEL);
788 if (!page) { 786 if (!page) {
@@ -809,11 +807,11 @@ static char *read_link(struct dentry *dentry)
809{ 807{
810 struct inode *inode = dentry->d_inode; 808 struct inode *inode = dentry->d_inode;
811 struct fuse_conn *fc = get_fuse_conn(inode); 809 struct fuse_conn *fc = get_fuse_conn(inode);
812 struct fuse_req *req = fuse_get_request(fc); 810 struct fuse_req *req = fuse_get_req(fc);
813 char *link; 811 char *link;
814 812
815 if (!req) 813 if (IS_ERR(req))
816 return ERR_PTR(-EINTR); 814 return ERR_PTR(PTR_ERR(req));
817 815
818 link = (char *) __get_free_page(GFP_KERNEL); 816 link = (char *) __get_free_page(GFP_KERNEL);
819 if (!link) { 817 if (!link) {
@@ -933,9 +931,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
933 } 931 }
934 } 932 }
935 933
936 req = fuse_get_request(fc); 934 req = fuse_get_req(fc);
937 if (!req) 935 if (IS_ERR(req))
938 return -EINTR; 936 return PTR_ERR(req);
939 937
940 memset(&inarg, 0, sizeof(inarg)); 938 memset(&inarg, 0, sizeof(inarg));
941 iattr_to_fattr(attr, &inarg); 939 iattr_to_fattr(attr, &inarg);
@@ -995,9 +993,9 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
995 if (fc->no_setxattr) 993 if (fc->no_setxattr)
996 return -EOPNOTSUPP; 994 return -EOPNOTSUPP;
997 995
998 req = fuse_get_request(fc); 996 req = fuse_get_req(fc);
999 if (!req) 997 if (IS_ERR(req))
1000 return -EINTR; 998 return PTR_ERR(req);
1001 999
1002 memset(&inarg, 0, sizeof(inarg)); 1000 memset(&inarg, 0, sizeof(inarg));
1003 inarg.size = size; 1001 inarg.size = size;
@@ -1035,9 +1033,9 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1035 if (fc->no_getxattr) 1033 if (fc->no_getxattr)
1036 return -EOPNOTSUPP; 1034 return -EOPNOTSUPP;
1037 1035
1038 req = fuse_get_request(fc); 1036 req = fuse_get_req(fc);
1039 if (!req) 1037 if (IS_ERR(req))
1040 return -EINTR; 1038 return PTR_ERR(req);
1041 1039
1042 memset(&inarg, 0, sizeof(inarg)); 1040 memset(&inarg, 0, sizeof(inarg));
1043 inarg.size = size; 1041 inarg.size = size;
@@ -1085,9 +1083,9 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1085 if (fc->no_listxattr) 1083 if (fc->no_listxattr)
1086 return -EOPNOTSUPP; 1084 return -EOPNOTSUPP;
1087 1085
1088 req = fuse_get_request(fc); 1086 req = fuse_get_req(fc);
1089 if (!req) 1087 if (IS_ERR(req))
1090 return -EINTR; 1088 return PTR_ERR(req);
1091 1089
1092 memset(&inarg, 0, sizeof(inarg)); 1090 memset(&inarg, 0, sizeof(inarg));
1093 inarg.size = size; 1091 inarg.size = size;
@@ -1131,9 +1129,9 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1131 if (fc->no_removexattr) 1129 if (fc->no_removexattr)
1132 return -EOPNOTSUPP; 1130 return -EOPNOTSUPP;
1133 1131
1134 req = fuse_get_request(fc); 1132 req = fuse_get_req(fc);
1135 if (!req) 1133 if (IS_ERR(req))
1136 return -EINTR; 1134 return PTR_ERR(req);
1137 1135
1138 req->in.h.opcode = FUSE_REMOVEXATTR; 1136 req->in.h.opcode = FUSE_REMOVEXATTR;
1139 req->in.h.nodeid = get_node_id(inode); 1137 req->in.h.nodeid = get_node_id(inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 975f2697e866..e4f041a11bb5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -22,9 +22,9 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
22 struct fuse_req *req; 22 struct fuse_req *req;
23 int err; 23 int err;
24 24
25 req = fuse_get_request(fc); 25 req = fuse_get_req(fc);
26 if (!req) 26 if (IS_ERR(req))
27 return -EINTR; 27 return PTR_ERR(req);
28 28
29 memset(&inarg, 0, sizeof(inarg)); 29 memset(&inarg, 0, sizeof(inarg));
30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -184,9 +184,9 @@ static int fuse_flush(struct file *file)
184 if (fc->no_flush) 184 if (fc->no_flush)
185 return 0; 185 return 0;
186 186
187 req = fuse_get_request(fc); 187 req = fuse_get_req(fc);
188 if (!req) 188 if (IS_ERR(req))
189 return -EINTR; 189 return PTR_ERR(req);
190 190
191 memset(&inarg, 0, sizeof(inarg)); 191 memset(&inarg, 0, sizeof(inarg));
192 inarg.fh = ff->fh; 192 inarg.fh = ff->fh;
@@ -223,9 +223,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
223 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 223 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
224 return 0; 224 return 0;
225 225
226 req = fuse_get_request(fc); 226 req = fuse_get_req(fc);
227 if (!req) 227 if (IS_ERR(req))
228 return -EINTR; 228 return PTR_ERR(req);
229 229
230 memset(&inarg, 0, sizeof(inarg)); 230 memset(&inarg, 0, sizeof(inarg));
231 inarg.fh = ff->fh; 231 inarg.fh = ff->fh;
@@ -297,9 +297,9 @@ static int fuse_readpage(struct file *file, struct page *page)
297 if (is_bad_inode(inode)) 297 if (is_bad_inode(inode))
298 goto out; 298 goto out;
299 299
300 err = -EINTR; 300 req = fuse_get_req(fc);
301 req = fuse_get_request(fc); 301 err = PTR_ERR(req);
302 if (!req) 302 if (IS_ERR(req))
303 goto out; 303 goto out;
304 304
305 req->out.page_zeroing = 1; 305 req->out.page_zeroing = 1;
@@ -368,10 +368,10 @@ static int fuse_readpages_fill(void *_data, struct page *page)
368 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 368 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
369 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 369 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
370 fuse_send_readpages(req, data->file, inode); 370 fuse_send_readpages(req, data->file, inode);
371 data->req = req = fuse_get_request(fc); 371 data->req = req = fuse_get_req(fc);
372 if (!req) { 372 if (IS_ERR(req)) {
373 unlock_page(page); 373 unlock_page(page);
374 return -EINTR; 374 return PTR_ERR(req);
375 } 375 }
376 } 376 }
377 req->pages[req->num_pages] = page; 377 req->pages[req->num_pages] = page;
@@ -392,13 +392,17 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
392 392
393 data.file = file; 393 data.file = file;
394 data.inode = inode; 394 data.inode = inode;
395 data.req = fuse_get_request(fc); 395 data.req = fuse_get_req(fc);
396 if (!data.req) 396 if (IS_ERR(data.req))
397 return -EINTR; 397 return PTR_ERR(data.req);
398 398
399 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); 399 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
400 if (!err) 400 if (!err) {
401 fuse_send_readpages(data.req, file, inode); 401 if (data.req->num_pages)
402 fuse_send_readpages(data.req, file, inode);
403 else
404 fuse_put_request(fc, data.req);
405 }
402 return err; 406 return err;
403} 407}
404 408
@@ -451,9 +455,9 @@ static int fuse_commit_write(struct file *file, struct page *page,
451 if (is_bad_inode(inode)) 455 if (is_bad_inode(inode))
452 return -EIO; 456 return -EIO;
453 457
454 req = fuse_get_request(fc); 458 req = fuse_get_req(fc);
455 if (!req) 459 if (IS_ERR(req))
456 return -EINTR; 460 return PTR_ERR(req);
457 461
458 req->num_pages = 1; 462 req->num_pages = 1;
459 req->pages[0] = page; 463 req->pages[0] = page;
@@ -528,9 +532,9 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
528 if (is_bad_inode(inode)) 532 if (is_bad_inode(inode))
529 return -EIO; 533 return -EIO;
530 534
531 req = fuse_get_request(fc); 535 req = fuse_get_req(fc);
532 if (!req) 536 if (IS_ERR(req))
533 return -EINTR; 537 return PTR_ERR(req);
534 538
535 while (count) { 539 while (count) {
536 size_t nres; 540 size_t nres;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index a16a04fcf41e..19c7185a7546 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -18,8 +18,8 @@
18/** Max number of pages that can be used in a single read request */ 18/** Max number of pages that can be used in a single read request */
19#define FUSE_MAX_PAGES_PER_REQ 32 19#define FUSE_MAX_PAGES_PER_REQ 32
20 20
21/** If more requests are outstanding, then the operation will block */ 21/** Maximum number of outstanding background requests */
22#define FUSE_MAX_OUTSTANDING 10 22#define FUSE_MAX_BACKGROUND 10
23 23
24/** It could be as large as PATH_MAX, but would that have any uses? */ 24/** It could be as large as PATH_MAX, but would that have any uses? */
25#define FUSE_NAME_MAX 1024 25#define FUSE_NAME_MAX 1024
@@ -131,8 +131,8 @@ struct fuse_conn;
131 * A request to the client 131 * A request to the client
132 */ 132 */
133struct fuse_req { 133struct fuse_req {
134 /** This can be on either unused_list, pending processing or 134 /** This can be on either pending processing or io lists in
135 io lists in fuse_conn */ 135 fuse_conn */
136 struct list_head list; 136 struct list_head list;
137 137
138 /** Entry on the background list */ 138 /** Entry on the background list */
@@ -144,15 +144,12 @@ struct fuse_req {
144 /* 144 /*
145 * The following bitfields are either set once before the 145 * The following bitfields are either set once before the
146 * request is queued or setting/clearing them is protected by 146 * request is queued or setting/clearing them is protected by
147 * fuse_lock 147 * fuse_conn->lock
148 */ 148 */
149 149
150 /** True if the request has reply */ 150 /** True if the request has reply */
151 unsigned isreply:1; 151 unsigned isreply:1;
152 152
153 /** The request is preallocated */
154 unsigned preallocated:1;
155
156 /** The request was interrupted */ 153 /** The request was interrupted */
157 unsigned interrupted:1; 154 unsigned interrupted:1;
158 155
@@ -213,6 +210,9 @@ struct fuse_req {
213 * unmounted. 210 * unmounted.
214 */ 211 */
215struct fuse_conn { 212struct fuse_conn {
213 /** Lock protecting accessess to members of this structure */
214 spinlock_t lock;
215
216 /** The user id for this mount */ 216 /** The user id for this mount */
217 uid_t user_id; 217 uid_t user_id;
218 218
@@ -244,19 +244,20 @@ struct fuse_conn {
244 interrupted request) */ 244 interrupted request) */
245 struct list_head background; 245 struct list_head background;
246 246
247 /** Controls the maximum number of outstanding requests */ 247 /** Number of requests currently in the background */
248 struct semaphore outstanding_sem; 248 unsigned num_background;
249
250 /** Flag indicating if connection is blocked. This will be
251 the case before the INIT reply is received, and if there
252 are too many outstading backgrounds requests */
253 int blocked;
249 254
250 /** This counts the number of outstanding requests if 255 /** waitq for blocked connection */
251 outstanding_sem would go negative */ 256 wait_queue_head_t blocked_waitq;
252 unsigned outstanding_debt;
253 257
254 /** RW semaphore for exclusion with fuse_put_super() */ 258 /** RW semaphore for exclusion with fuse_put_super() */
255 struct rw_semaphore sbput_sem; 259 struct rw_semaphore sbput_sem;
256 260
257 /** The list of unused requests */
258 struct list_head unused_list;
259
260 /** The next unique request id */ 261 /** The next unique request id */
261 u64 reqctr; 262 u64 reqctr;
262 263
@@ -318,6 +319,9 @@ struct fuse_conn {
318 319
319 /** kobject */ 320 /** kobject */
320 struct kobject kobj; 321 struct kobject kobj;
322
323 /** O_ASYNC requests */
324 struct fasync_struct *fasync;
321}; 325};
322 326
323static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 327static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -349,21 +353,6 @@ static inline u64 get_node_id(struct inode *inode)
349extern const struct file_operations fuse_dev_operations; 353extern const struct file_operations fuse_dev_operations;
350 354
351/** 355/**
352 * This is the single global spinlock which protects FUSE's structures
353 *
354 * The following data is protected by this lock:
355 *
356 * - the private_data field of the device file
357 * - the s_fs_info field of the super block
358 * - unused_list, pending, processing lists in fuse_conn
359 * - background list in fuse_conn
360 * - the unique request ID counter reqctr in fuse_conn
361 * - the sb (super_block) field in fuse_conn
362 * - the file (device file) field in fuse_conn
363 */
364extern spinlock_t fuse_lock;
365
366/**
367 * Get a filled in inode 356 * Get a filled in inode
368 */ 357 */
369struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 358struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
@@ -461,11 +450,11 @@ void fuse_reset_request(struct fuse_req *req);
461/** 450/**
462 * Reserve a preallocated request 451 * Reserve a preallocated request
463 */ 452 */
464struct fuse_req *fuse_get_request(struct fuse_conn *fc); 453struct fuse_req *fuse_get_req(struct fuse_conn *fc);
465 454
466/** 455/**
467 * Decrement reference count of a request. If count goes to zero put 456 * Decrement reference count of a request. If count goes to zero free
468 * on unused list (preallocated) or free request (not preallocated). 457 * the request.
469 */ 458 */
470void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); 459void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
471 460
@@ -487,7 +476,7 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
487/** 476/**
488 * Release inodes and file associated with background request 477 * Release inodes and file associated with background request
489 */ 478 */
490void fuse_release_background(struct fuse_req *req); 479void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req);
491 480
492/* Abort all requests */ 481/* Abort all requests */
493void fuse_abort_conn(struct fuse_conn *fc); 482void fuse_abort_conn(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 879e6fba9480..fd34037b0588 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -22,7 +22,6 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
22MODULE_DESCRIPTION("Filesystem in Userspace"); 22MODULE_DESCRIPTION("Filesystem in Userspace");
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24 24
25spinlock_t fuse_lock;
26static kmem_cache_t *fuse_inode_cachep; 25static kmem_cache_t *fuse_inode_cachep;
27static struct subsystem connections_subsys; 26static struct subsystem connections_subsys;
28 27
@@ -207,15 +206,17 @@ static void fuse_put_super(struct super_block *sb)
207 206
208 down_write(&fc->sbput_sem); 207 down_write(&fc->sbput_sem);
209 while (!list_empty(&fc->background)) 208 while (!list_empty(&fc->background))
210 fuse_release_background(list_entry(fc->background.next, 209 fuse_release_background(fc,
210 list_entry(fc->background.next,
211 struct fuse_req, bg_entry)); 211 struct fuse_req, bg_entry));
212 212
213 spin_lock(&fuse_lock); 213 spin_lock(&fc->lock);
214 fc->mounted = 0; 214 fc->mounted = 0;
215 fc->connected = 0; 215 fc->connected = 0;
216 spin_unlock(&fuse_lock); 216 spin_unlock(&fc->lock);
217 up_write(&fc->sbput_sem); 217 up_write(&fc->sbput_sem);
218 /* Flush all readers on this fs */ 218 /* Flush all readers on this fs */
219 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
219 wake_up_all(&fc->waitq); 220 wake_up_all(&fc->waitq);
220 kobject_del(&fc->kobj); 221 kobject_del(&fc->kobj);
221 kobject_put(&fc->kobj); 222 kobject_put(&fc->kobj);
@@ -242,9 +243,9 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
242 struct fuse_statfs_out outarg; 243 struct fuse_statfs_out outarg;
243 int err; 244 int err;
244 245
245 req = fuse_get_request(fc); 246 req = fuse_get_req(fc);
246 if (!req) 247 if (IS_ERR(req))
247 return -EINTR; 248 return PTR_ERR(req);
248 249
249 memset(&outarg, 0, sizeof(outarg)); 250 memset(&outarg, 0, sizeof(outarg));
250 req->in.numargs = 0; 251 req->in.numargs = 0;
@@ -369,15 +370,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
369 370
370static void fuse_conn_release(struct kobject *kobj) 371static void fuse_conn_release(struct kobject *kobj)
371{ 372{
372 struct fuse_conn *fc = get_fuse_conn_kobj(kobj); 373 kfree(get_fuse_conn_kobj(kobj));
373
374 while (!list_empty(&fc->unused_list)) {
375 struct fuse_req *req;
376 req = list_entry(fc->unused_list.next, struct fuse_req, list);
377 list_del(&req->list);
378 fuse_request_free(req);
379 }
380 kfree(fc);
381} 374}
382 375
383static struct fuse_conn *new_conn(void) 376static struct fuse_conn *new_conn(void)
@@ -386,64 +379,25 @@ static struct fuse_conn *new_conn(void)
386 379
387 fc = kzalloc(sizeof(*fc), GFP_KERNEL); 380 fc = kzalloc(sizeof(*fc), GFP_KERNEL);
388 if (fc) { 381 if (fc) {
389 int i; 382 spin_lock_init(&fc->lock);
390 init_waitqueue_head(&fc->waitq); 383 init_waitqueue_head(&fc->waitq);
384 init_waitqueue_head(&fc->blocked_waitq);
391 INIT_LIST_HEAD(&fc->pending); 385 INIT_LIST_HEAD(&fc->pending);
392 INIT_LIST_HEAD(&fc->processing); 386 INIT_LIST_HEAD(&fc->processing);
393 INIT_LIST_HEAD(&fc->io); 387 INIT_LIST_HEAD(&fc->io);
394 INIT_LIST_HEAD(&fc->unused_list);
395 INIT_LIST_HEAD(&fc->background); 388 INIT_LIST_HEAD(&fc->background);
396 sema_init(&fc->outstanding_sem, 1); /* One for INIT */
397 init_rwsem(&fc->sbput_sem); 389 init_rwsem(&fc->sbput_sem);
398 kobj_set_kset_s(fc, connections_subsys); 390 kobj_set_kset_s(fc, connections_subsys);
399 kobject_init(&fc->kobj); 391 kobject_init(&fc->kobj);
400 atomic_set(&fc->num_waiting, 0); 392 atomic_set(&fc->num_waiting, 0);
401 for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
402 struct fuse_req *req = fuse_request_alloc();
403 if (!req) {
404 kobject_put(&fc->kobj);
405 return NULL;
406 }
407 list_add(&req->list, &fc->unused_list);
408 }
409 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 393 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
410 fc->bdi.unplug_io_fn = default_unplug_io_fn; 394 fc->bdi.unplug_io_fn = default_unplug_io_fn;
411 fc->reqctr = 0; 395 fc->reqctr = 0;
396 fc->blocked = 1;
412 } 397 }
413 return fc; 398 return fc;
414} 399}
415 400
416static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
417{
418 struct fuse_conn *fc;
419 int err;
420
421 err = -EINVAL;
422 if (file->f_op != &fuse_dev_operations)
423 goto out_err;
424
425 err = -ENOMEM;
426 fc = new_conn();
427 if (!fc)
428 goto out_err;
429
430 spin_lock(&fuse_lock);
431 err = -EINVAL;
432 if (file->private_data)
433 goto out_unlock;
434
435 kobject_get(&fc->kobj);
436 file->private_data = fc;
437 spin_unlock(&fuse_lock);
438 return fc;
439
440 out_unlock:
441 spin_unlock(&fuse_lock);
442 kobject_put(&fc->kobj);
443 out_err:
444 return ERR_PTR(err);
445}
446
447static struct inode *get_root_inode(struct super_block *sb, unsigned mode) 401static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
448{ 402{
449 struct fuse_attr attr; 403 struct fuse_attr attr;
@@ -467,7 +421,6 @@ static struct super_operations fuse_super_operations = {
467 421
468static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 422static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
469{ 423{
470 int i;
471 struct fuse_init_out *arg = &req->misc.init_out; 424 struct fuse_init_out *arg = &req->misc.init_out;
472 425
473 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION) 426 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
@@ -486,22 +439,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
486 fc->minor = arg->minor; 439 fc->minor = arg->minor;
487 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 440 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
488 } 441 }
489
490 /* After INIT reply is received other requests can go
491 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
492 up()s on outstanding_sem. The last up() is done in
493 fuse_putback_request() */
494 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
495 up(&fc->outstanding_sem);
496
497 fuse_put_request(fc, req); 442 fuse_put_request(fc, req);
443 fc->blocked = 0;
444 wake_up_all(&fc->blocked_waitq);
498} 445}
499 446
500static void fuse_send_init(struct fuse_conn *fc) 447static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
501{ 448{
502 /* This is called from fuse_read_super() so there's guaranteed
503 to be exactly one request available */
504 struct fuse_req *req = fuse_get_request(fc);
505 struct fuse_init_in *arg = &req->misc.init_in; 449 struct fuse_init_in *arg = &req->misc.init_in;
506 450
507 arg->major = FUSE_KERNEL_VERSION; 451 arg->major = FUSE_KERNEL_VERSION;
@@ -525,12 +469,9 @@ static void fuse_send_init(struct fuse_conn *fc)
525 469
526static unsigned long long conn_id(void) 470static unsigned long long conn_id(void)
527{ 471{
472 /* BKL is held for ->get_sb() */
528 static unsigned long long ctr = 1; 473 static unsigned long long ctr = 1;
529 unsigned long long val; 474 return ctr++;
530 spin_lock(&fuse_lock);
531 val = ctr++;
532 spin_unlock(&fuse_lock);
533 return val;
534} 475}
535 476
536static int fuse_fill_super(struct super_block *sb, void *data, int silent) 477static int fuse_fill_super(struct super_block *sb, void *data, int silent)
@@ -540,6 +481,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
540 struct fuse_mount_data d; 481 struct fuse_mount_data d;
541 struct file *file; 482 struct file *file;
542 struct dentry *root_dentry; 483 struct dentry *root_dentry;
484 struct fuse_req *init_req;
543 int err; 485 int err;
544 486
545 if (!parse_fuse_opt((char *) data, &d)) 487 if (!parse_fuse_opt((char *) data, &d))
@@ -555,10 +497,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
555 if (!file) 497 if (!file)
556 return -EINVAL; 498 return -EINVAL;
557 499
558 fc = get_conn(file, sb); 500 if (file->f_op != &fuse_dev_operations)
559 fput(file); 501 return -EINVAL;
560 if (IS_ERR(fc)) 502
561 return PTR_ERR(fc); 503 /* Setting file->private_data can't race with other mount()
504 instances, since BKL is held for ->get_sb() */
505 if (file->private_data)
506 return -EINVAL;
507
508 fc = new_conn();
509 if (!fc)
510 return -ENOMEM;
562 511
563 fc->flags = d.flags; 512 fc->flags = d.flags;
564 fc->user_id = d.user_id; 513 fc->user_id = d.user_id;
@@ -579,27 +528,40 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
579 goto err; 528 goto err;
580 } 529 }
581 530
531 init_req = fuse_request_alloc();
532 if (!init_req)
533 goto err_put_root;
534
582 err = kobject_set_name(&fc->kobj, "%llu", conn_id()); 535 err = kobject_set_name(&fc->kobj, "%llu", conn_id());
583 if (err) 536 if (err)
584 goto err_put_root; 537 goto err_free_req;
585 538
586 err = kobject_add(&fc->kobj); 539 err = kobject_add(&fc->kobj);
587 if (err) 540 if (err)
588 goto err_put_root; 541 goto err_free_req;
589 542
590 sb->s_root = root_dentry; 543 sb->s_root = root_dentry;
591 spin_lock(&fuse_lock);
592 fc->mounted = 1; 544 fc->mounted = 1;
593 fc->connected = 1; 545 fc->connected = 1;
594 spin_unlock(&fuse_lock); 546 kobject_get(&fc->kobj);
547 file->private_data = fc;
548 /*
549 * atomic_dec_and_test() in fput() provides the necessary
550 * memory barrier for file->private_data to be visible on all
551 * CPUs after this
552 */
553 fput(file);
595 554
596 fuse_send_init(fc); 555 fuse_send_init(fc, init_req);
597 556
598 return 0; 557 return 0;
599 558
559 err_free_req:
560 fuse_request_free(init_req);
600 err_put_root: 561 err_put_root:
601 dput(root_dentry); 562 dput(root_dentry);
602 err: 563 err:
564 fput(file);
603 kobject_put(&fc->kobj); 565 kobject_put(&fc->kobj);
604 return err; 566 return err;
605} 567}
@@ -753,7 +715,6 @@ static int __init fuse_init(void)
753 printk("fuse init (API version %i.%i)\n", 715 printk("fuse init (API version %i.%i)\n",
754 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 716 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
755 717
756 spin_lock_init(&fuse_lock);
757 res = fuse_fs_init(); 718 res = fuse_fs_init();
758 if (res) 719 if (res)
759 goto err; 720 goto err;
diff --git a/fs/inotify.c b/fs/inotify.c
index 367c487c014b..1f50302849c5 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -538,7 +538,7 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
538 WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); 538 WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
539 spin_lock(&entry->d_lock); 539 spin_lock(&entry->d_lock);
540 parent = entry->d_parent; 540 parent = entry->d_parent;
541 if (inotify_inode_watched(parent->d_inode)) 541 if (parent->d_inode && inotify_inode_watched(parent->d_inode))
542 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; 542 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
543 spin_unlock(&entry->d_lock); 543 spin_unlock(&entry->d_lock);
544} 544}
diff --git a/fs/namespace.c b/fs/namespace.c
index bf478addb852..2c5f1f80bdc2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -899,11 +899,13 @@ static int do_change_type(struct nameidata *nd, int flag)
899/* 899/*
900 * do loopback mount. 900 * do loopback mount.
901 */ 901 */
902static int do_loopback(struct nameidata *nd, char *old_name, int recurse) 902static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags)
903{ 903{
904 struct nameidata old_nd; 904 struct nameidata old_nd;
905 struct vfsmount *mnt = NULL; 905 struct vfsmount *mnt = NULL;
906 int recurse = flags & MS_REC;
906 int err = mount_is_safe(nd); 907 int err = mount_is_safe(nd);
908
907 if (err) 909 if (err)
908 return err; 910 return err;
909 if (!old_name || !*old_name) 911 if (!old_name || !*old_name)
@@ -937,6 +939,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
937 spin_unlock(&vfsmount_lock); 939 spin_unlock(&vfsmount_lock);
938 release_mounts(&umount_list); 940 release_mounts(&umount_list);
939 } 941 }
942 mnt->mnt_flags = mnt_flags;
940 943
941out: 944out:
942 up_write(&namespace_sem); 945 up_write(&namespace_sem);
@@ -1350,7 +1353,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1350 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, 1353 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
1351 data_page); 1354 data_page);
1352 else if (flags & MS_BIND) 1355 else if (flags & MS_BIND)
1353 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1356 retval = do_loopback(&nd, dev_name, flags, mnt_flags);
1354 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 1357 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1355 retval = do_change_type(&nd, flags); 1358 retval = do_change_type(&nd, flags);
1356 else if (flags & MS_MOVE) 1359 else if (flags & MS_MOVE)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index cfe9ce881613..6e92b0fe5323 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -14,46 +14,46 @@
14 14
15int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) 15int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
16{ 16{
17 struct svc_cred *cred = &rqstp->rq_cred; 17 struct svc_cred cred = rqstp->rq_cred;
18 int i; 18 int i;
19 int ret; 19 int ret;
20 20
21 if (exp->ex_flags & NFSEXP_ALLSQUASH) { 21 if (exp->ex_flags & NFSEXP_ALLSQUASH) {
22 cred->cr_uid = exp->ex_anon_uid; 22 cred.cr_uid = exp->ex_anon_uid;
23 cred->cr_gid = exp->ex_anon_gid; 23 cred.cr_gid = exp->ex_anon_gid;
24 put_group_info(cred->cr_group_info); 24 cred.cr_group_info = groups_alloc(0);
25 cred->cr_group_info = groups_alloc(0);
26 } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) { 25 } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) {
27 struct group_info *gi; 26 struct group_info *gi;
28 if (!cred->cr_uid) 27 if (!cred.cr_uid)
29 cred->cr_uid = exp->ex_anon_uid; 28 cred.cr_uid = exp->ex_anon_uid;
30 if (!cred->cr_gid) 29 if (!cred.cr_gid)
31 cred->cr_gid = exp->ex_anon_gid; 30 cred.cr_gid = exp->ex_anon_gid;
32 gi = groups_alloc(cred->cr_group_info->ngroups); 31 gi = groups_alloc(cred.cr_group_info->ngroups);
33 if (gi) 32 if (gi)
34 for (i = 0; i < cred->cr_group_info->ngroups; i++) { 33 for (i = 0; i < cred.cr_group_info->ngroups; i++) {
35 if (!GROUP_AT(cred->cr_group_info, i)) 34 if (!GROUP_AT(cred.cr_group_info, i))
36 GROUP_AT(gi, i) = exp->ex_anon_gid; 35 GROUP_AT(gi, i) = exp->ex_anon_gid;
37 else 36 else
38 GROUP_AT(gi, i) = GROUP_AT(cred->cr_group_info, i); 37 GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i);
39 } 38 }
40 put_group_info(cred->cr_group_info); 39 cred.cr_group_info = gi;
41 cred->cr_group_info = gi; 40 } else
42 } 41 get_group_info(cred.cr_group_info);
43 42
44 if (cred->cr_uid != (uid_t) -1) 43 if (cred.cr_uid != (uid_t) -1)
45 current->fsuid = cred->cr_uid; 44 current->fsuid = cred.cr_uid;
46 else 45 else
47 current->fsuid = exp->ex_anon_uid; 46 current->fsuid = exp->ex_anon_uid;
48 if (cred->cr_gid != (gid_t) -1) 47 if (cred.cr_gid != (gid_t) -1)
49 current->fsgid = cred->cr_gid; 48 current->fsgid = cred.cr_gid;
50 else 49 else
51 current->fsgid = exp->ex_anon_gid; 50 current->fsgid = exp->ex_anon_gid;
52 51
53 if (!cred->cr_group_info) 52 if (!cred.cr_group_info)
54 return -ENOMEM; 53 return -ENOMEM;
55 ret = set_current_groups(cred->cr_group_info); 54 ret = set_current_groups(cred.cr_group_info);
56 if ((cred->cr_uid)) { 55 put_group_info(cred.cr_group_info);
56 if ((cred.cr_uid)) {
57 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; 57 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
58 } else { 58 } else {
59 cap_t(current->cap_effective) |= (CAP_NFSD_MASK & 59 cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c340be0a3f59..4e0578121d9a 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -422,7 +422,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
422 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) 422 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
423 goto out; 423 goto out;
424 err = path_lookup(buf, 0, &nd); 424 err = path_lookup(buf, 0, &nd);
425 if (err) goto out; 425 if (err) goto out_no_path;
426 426
427 exp.h.flags = 0; 427 exp.h.flags = 0;
428 exp.ex_client = dom; 428 exp.ex_client = dom;
@@ -475,6 +475,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
475 out: 475 out:
476 if (nd.dentry) 476 if (nd.dentry)
477 path_release(&nd); 477 path_release(&nd);
478 out_no_path:
478 if (dom) 479 if (dom)
479 auth_domain_put(dom); 480 auth_domain_put(dom);
480 kfree(buf); 481 kfree(buf);
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 6d2dfed1de08..f61142afea44 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -682,7 +682,7 @@ static struct svc_procedure nfsd_procedures3[22] = {
682 PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT), 682 PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
683 PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1), 683 PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1),
684 PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4), 684 PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
685 PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE), 685 PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4),
686 PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4), 686 PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4),
687 PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 687 PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
688 PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 688 PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 7391f4aabedb..edb107e61b91 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -710,9 +710,9 @@ calculate_posix_ace_count(struct nfs4_acl *n4acl)
710 /* Also, the remaining entries are for named users and 710 /* Also, the remaining entries are for named users and
711 * groups, and come in threes (mask, allow, deny): */ 711 * groups, and come in threes (mask, allow, deny): */
712 if (n4acl->naces < 7) 712 if (n4acl->naces < 7)
713 return -1; 713 return -EINVAL;
714 if ((n4acl->naces - 7) % 3) 714 if ((n4acl->naces - 7) % 3)
715 return -1; 715 return -EINVAL;
716 return 4 + (n4acl->naces - 7)/3; 716 return 4 + (n4acl->naces - 7)/3;
717 } 717 }
718} 718}
@@ -790,7 +790,7 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
790 continue; 790 continue;
791 791
792 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, 792 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
793 ace->access_mask, ace->whotype, ace->who) == -1; 793 ace->access_mask, ace->whotype, ace->who);
794 if (error < 0) 794 if (error < 0)
795 goto out; 795 goto out;
796 796
@@ -866,7 +866,7 @@ nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
866 struct nfs4_ace *ace; 866 struct nfs4_ace *ace;
867 867
868 if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL) 868 if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL)
869 return -1; 869 return -ENOMEM;
870 870
871 ace->type = type; 871 ace->type = type;
872 ace->flag = flag; 872 ace->flag = flag;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c872bd07fc10..dbaf3f93f328 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -441,8 +441,9 @@ nfsd4_probe_callback(struct nfs4_client *clp)
441 goto out_clnt; 441 goto out_clnt;
442 } 442 }
443 443
444 /* the task holds a reference to the nfs4_client struct */
445 cb->cb_client = clnt; 444 cb->cb_client = clnt;
445
446 /* the task holds a reference to the nfs4_client struct */
446 atomic_inc(&clp->cl_count); 447 atomic_inc(&clp->cl_count);
447 448
448 msg.rpc_cred = nfsd4_lookupcred(clp,0); 449 msg.rpc_cred = nfsd4_lookupcred(clp,0);
@@ -460,13 +461,12 @@ nfsd4_probe_callback(struct nfs4_client *clp)
460out_rpciod: 461out_rpciod:
461 atomic_dec(&clp->cl_count); 462 atomic_dec(&clp->cl_count);
462 rpciod_down(); 463 rpciod_down();
464 cb->cb_client = NULL;
463out_clnt: 465out_clnt:
464 rpc_shutdown_client(clnt); 466 rpc_shutdown_client(clnt);
465 goto out_err;
466out_err: 467out_err:
467 dprintk("NFSD: warning: no callback path to client %.*s\n", 468 dprintk("NFSD: warning: no callback path to client %.*s\n",
468 (int)clp->cl_name.len, clp->cl_name.data); 469 (int)clp->cl_name.len, clp->cl_name.data);
469 cb->cb_client = NULL;
470} 470}
471 471
472static void 472static void
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6d63f1d9e5f5..b0e095ea0c03 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -288,8 +288,6 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
288 fh_put(current_fh); 288 fh_put(current_fh);
289 status = exp_pseudoroot(rqstp->rq_client, current_fh, 289 status = exp_pseudoroot(rqstp->rq_client, current_fh,
290 &rqstp->rq_chandle); 290 &rqstp->rq_chandle);
291 if (!status)
292 status = nfserrno(nfsd_setuser(rqstp, current_fh->fh_export));
293 return status; 291 return status;
294} 292}
295 293
@@ -975,7 +973,7 @@ struct nfsd4_voidargs { int dummy; };
975 */ 973 */
976static struct svc_procedure nfsd_procedures4[2] = { 974static struct svc_procedure nfsd_procedures4[2] = {
977 PROC(null, void, void, void, RC_NOCACHE, 1), 975 PROC(null, void, void, void, RC_NOCACHE, 1),
978 PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE) 976 PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE/4)
979}; 977};
980 978
981struct svc_version nfsd_version4 = { 979struct svc_version nfsd_version4 = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47ec112b266c..96c7578cbe1e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -147,6 +147,42 @@ get_nfs4_file(struct nfs4_file *fi)
147 kref_get(&fi->fi_ref); 147 kref_get(&fi->fi_ref);
148} 148}
149 149
150static int num_delegations;
151
152/*
153 * Open owner state (share locks)
154 */
155
156/* hash tables for nfs4_stateowner */
157#define OWNER_HASH_BITS 8
158#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
159#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
160
161#define ownerid_hashval(id) \
162 ((id) & OWNER_HASH_MASK)
163#define ownerstr_hashval(clientid, ownername) \
164 (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
165
166static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
167static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
168
169/* hash table for nfs4_file */
170#define FILE_HASH_BITS 8
171#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
172#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
173/* hash table for (open)nfs4_stateid */
174#define STATEID_HASH_BITS 10
175#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
176#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
177
178#define file_hashval(x) \
179 hash_ptr(x, FILE_HASH_BITS)
180#define stateid_hashval(owner_id, file_id) \
181 (((owner_id) + (file_id)) & STATEID_HASH_MASK)
182
183static struct list_head file_hashtbl[FILE_HASH_SIZE];
184static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
185
150static struct nfs4_delegation * 186static struct nfs4_delegation *
151alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) 187alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
152{ 188{
@@ -155,9 +191,12 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
155 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; 191 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
156 192
157 dprintk("NFSD alloc_init_deleg\n"); 193 dprintk("NFSD alloc_init_deleg\n");
194 if (num_delegations > STATEID_HASH_SIZE * 4)
195 return NULL;
158 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); 196 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
159 if (dp == NULL) 197 if (dp == NULL)
160 return dp; 198 return dp;
199 num_delegations++;
161 INIT_LIST_HEAD(&dp->dl_perfile); 200 INIT_LIST_HEAD(&dp->dl_perfile);
162 INIT_LIST_HEAD(&dp->dl_perclnt); 201 INIT_LIST_HEAD(&dp->dl_perclnt);
163 INIT_LIST_HEAD(&dp->dl_recall_lru); 202 INIT_LIST_HEAD(&dp->dl_recall_lru);
@@ -192,6 +231,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
192 dprintk("NFSD: freeing dp %p\n",dp); 231 dprintk("NFSD: freeing dp %p\n",dp);
193 put_nfs4_file(dp->dl_file); 232 put_nfs4_file(dp->dl_file);
194 kmem_cache_free(deleg_slab, dp); 233 kmem_cache_free(deleg_slab, dp);
234 num_delegations--;
195 } 235 }
196} 236}
197 237
@@ -330,22 +370,29 @@ put_nfs4_client(struct nfs4_client *clp)
330} 370}
331 371
332static void 372static void
373shutdown_callback_client(struct nfs4_client *clp)
374{
375 struct rpc_clnt *clnt = clp->cl_callback.cb_client;
376
377 /* shutdown rpc client, ending any outstanding recall rpcs */
378 if (clnt) {
379 clp->cl_callback.cb_client = NULL;
380 rpc_shutdown_client(clnt);
381 rpciod_down();
382 }
383}
384
385static void
333expire_client(struct nfs4_client *clp) 386expire_client(struct nfs4_client *clp)
334{ 387{
335 struct nfs4_stateowner *sop; 388 struct nfs4_stateowner *sop;
336 struct nfs4_delegation *dp; 389 struct nfs4_delegation *dp;
337 struct nfs4_callback *cb = &clp->cl_callback;
338 struct rpc_clnt *clnt = clp->cl_callback.cb_client;
339 struct list_head reaplist; 390 struct list_head reaplist;
340 391
341 dprintk("NFSD: expire_client cl_count %d\n", 392 dprintk("NFSD: expire_client cl_count %d\n",
342 atomic_read(&clp->cl_count)); 393 atomic_read(&clp->cl_count));
343 394
344 /* shutdown rpc client, ending any outstanding recall rpcs */ 395 shutdown_callback_client(clp);
345 if (atomic_read(&cb->cb_set) == 1 && clnt) {
346 rpc_shutdown_client(clnt);
347 clnt = clp->cl_callback.cb_client = NULL;
348 }
349 396
350 INIT_LIST_HEAD(&reaplist); 397 INIT_LIST_HEAD(&reaplist);
351 spin_lock(&recall_lock); 398 spin_lock(&recall_lock);
@@ -936,40 +983,6 @@ out:
936 return status; 983 return status;
937} 984}
938 985
939/*
940 * Open owner state (share locks)
941 */
942
943/* hash tables for nfs4_stateowner */
944#define OWNER_HASH_BITS 8
945#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
946#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
947
948#define ownerid_hashval(id) \
949 ((id) & OWNER_HASH_MASK)
950#define ownerstr_hashval(clientid, ownername) \
951 (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
952
953static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
954static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
955
956/* hash table for nfs4_file */
957#define FILE_HASH_BITS 8
958#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
959#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
960/* hash table for (open)nfs4_stateid */
961#define STATEID_HASH_BITS 10
962#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
963#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
964
965#define file_hashval(x) \
966 hash_ptr(x, FILE_HASH_BITS)
967#define stateid_hashval(owner_id, file_id) \
968 (((owner_id) + (file_id)) & STATEID_HASH_MASK)
969
970static struct list_head file_hashtbl[FILE_HASH_SIZE];
971static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
972
973/* OPEN Share state helper functions */ 986/* OPEN Share state helper functions */
974static inline struct nfs4_file * 987static inline struct nfs4_file *
975alloc_init_file(struct inode *ino) 988alloc_init_file(struct inode *ino)
@@ -1186,8 +1199,7 @@ move_to_close_lru(struct nfs4_stateowner *sop)
1186{ 1199{
1187 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); 1200 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
1188 1201
1189 unhash_stateowner(sop); 1202 list_move_tail(&sop->so_close_lru, &close_lru);
1190 list_add_tail(&sop->so_close_lru, &close_lru);
1191 sop->so_time = get_seconds(); 1203 sop->so_time = get_seconds();
1192} 1204}
1193 1205
@@ -1916,8 +1928,7 @@ nfs4_laundromat(void)
1916 } 1928 }
1917 dprintk("NFSD: purging unused open stateowner (so_id %d)\n", 1929 dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
1918 sop->so_id); 1930 sop->so_id);
1919 list_del(&sop->so_close_lru); 1931 release_stateowner(sop);
1920 nfs4_put_stateowner(sop);
1921 } 1932 }
1922 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) 1933 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
1923 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; 1934 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
@@ -2495,36 +2506,27 @@ nfs4_transform_lock_offset(struct file_lock *lock)
2495 lock->fl_end = OFFSET_MAX; 2506 lock->fl_end = OFFSET_MAX;
2496} 2507}
2497 2508
2498static int 2509/* Hack!: For now, we're defining this just so we can use a pointer to it
2499nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval) 2510 * as a unique cookie to identify our (NFSv4's) posix locks. */
2500{ 2511static struct lock_manager_operations nfsd_posix_mng_ops = {
2501 struct nfs4_stateowner *local = NULL; 2512};
2502 int status = 0;
2503
2504 if (hashval >= LOCK_HASH_SIZE)
2505 goto out;
2506 list_for_each_entry(local, &lock_ownerid_hashtbl[hashval], so_idhash) {
2507 if (local == sop) {
2508 status = 1;
2509 goto out;
2510 }
2511 }
2512out:
2513 return status;
2514}
2515
2516 2513
2517static inline void 2514static inline void
2518nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) 2515nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
2519{ 2516{
2520 struct nfs4_stateowner *sop = (struct nfs4_stateowner *) fl->fl_owner; 2517 struct nfs4_stateowner *sop;
2521 unsigned int hval = lockownerid_hashval(sop->so_id); 2518 unsigned int hval;
2522 2519
2523 deny->ld_sop = NULL; 2520 if (fl->fl_lmops == &nfsd_posix_mng_ops) {
2524 if (nfs4_verify_lock_stateowner(sop, hval)) { 2521 sop = (struct nfs4_stateowner *) fl->fl_owner;
2522 hval = lockownerid_hashval(sop->so_id);
2525 kref_get(&sop->so_ref); 2523 kref_get(&sop->so_ref);
2526 deny->ld_sop = sop; 2524 deny->ld_sop = sop;
2527 deny->ld_clientid = sop->so_client->cl_clientid; 2525 deny->ld_clientid = sop->so_client->cl_clientid;
2526 } else {
2527 deny->ld_sop = NULL;
2528 deny->ld_clientid.cl_boot = 0;
2529 deny->ld_clientid.cl_id = 0;
2528 } 2530 }
2529 deny->ld_start = fl->fl_start; 2531 deny->ld_start = fl->fl_start;
2530 deny->ld_length = ~(u64)0; 2532 deny->ld_length = ~(u64)0;
@@ -2736,6 +2738,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2736 file_lock.fl_pid = current->tgid; 2738 file_lock.fl_pid = current->tgid;
2737 file_lock.fl_file = filp; 2739 file_lock.fl_file = filp;
2738 file_lock.fl_flags = FL_POSIX; 2740 file_lock.fl_flags = FL_POSIX;
2741 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2739 2742
2740 file_lock.fl_start = lock->lk_offset; 2743 file_lock.fl_start = lock->lk_offset;
2741 if ((lock->lk_length == ~(u64)0) || 2744 if ((lock->lk_length == ~(u64)0) ||
@@ -2841,6 +2844,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2841 file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner; 2844 file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
2842 file_lock.fl_pid = current->tgid; 2845 file_lock.fl_pid = current->tgid;
2843 file_lock.fl_flags = FL_POSIX; 2846 file_lock.fl_flags = FL_POSIX;
2847 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2844 2848
2845 file_lock.fl_start = lockt->lt_offset; 2849 file_lock.fl_start = lockt->lt_offset;
2846 if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length)) 2850 if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length))
@@ -2900,6 +2904,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2900 file_lock.fl_pid = current->tgid; 2904 file_lock.fl_pid = current->tgid;
2901 file_lock.fl_file = filp; 2905 file_lock.fl_file = filp;
2902 file_lock.fl_flags = FL_POSIX; 2906 file_lock.fl_flags = FL_POSIX;
2907 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2903 file_lock.fl_start = locku->lu_offset; 2908 file_lock.fl_start = locku->lu_offset;
2904 2909
2905 if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length)) 2910 if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length))
@@ -3211,15 +3216,8 @@ __nfs4_state_shutdown(void)
3211 int i; 3216 int i;
3212 struct nfs4_client *clp = NULL; 3217 struct nfs4_client *clp = NULL;
3213 struct nfs4_delegation *dp = NULL; 3218 struct nfs4_delegation *dp = NULL;
3214 struct nfs4_stateowner *sop = NULL;
3215 struct list_head *pos, *next, reaplist; 3219 struct list_head *pos, *next, reaplist;
3216 3220
3217 list_for_each_safe(pos, next, &close_lru) {
3218 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
3219 list_del(&sop->so_close_lru);
3220 nfs4_put_stateowner(sop);
3221 }
3222
3223 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 3221 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3224 while (!list_empty(&conf_id_hashtbl[i])) { 3222 while (!list_empty(&conf_id_hashtbl[i])) {
3225 clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); 3223 clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
@@ -3244,8 +3242,6 @@ __nfs4_state_shutdown(void)
3244 } 3242 }
3245 3243
3246 cancel_delayed_work(&laundromat_work); 3244 cancel_delayed_work(&laundromat_work);
3247 flush_workqueue(laundry_wq);
3248 destroy_workqueue(laundry_wq);
3249 nfsd4_shutdown_recdir(); 3245 nfsd4_shutdown_recdir();
3250 nfs4_init = 0; 3246 nfs4_init = 0;
3251} 3247}
@@ -3253,6 +3249,8 @@ __nfs4_state_shutdown(void)
3253void 3249void
3254nfs4_state_shutdown(void) 3250nfs4_state_shutdown(void)
3255{ 3251{
3252 cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
3253 destroy_workqueue(laundry_wq);
3256 nfs4_lock_state(); 3254 nfs4_lock_state();
3257 nfs4_release_reclaim(); 3255 nfs4_release_reclaim();
3258 __nfs4_state_shutdown(); 3256 __nfs4_state_shutdown();
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 03857fd81126..de3998f15f10 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -299,11 +299,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
299 buf, dummy32, &ace.who); 299 buf, dummy32, &ace.who);
300 if (status) 300 if (status)
301 goto out_nfserr; 301 goto out_nfserr;
302 if (nfs4_acl_add_ace(*acl, ace.type, ace.flag, 302 status = nfs4_acl_add_ace(*acl, ace.type, ace.flag,
303 ace.access_mask, ace.whotype, ace.who) != 0) { 303 ace.access_mask, ace.whotype, ace.who);
304 status = -ENOMEM; 304 if (status)
305 goto out_nfserr; 305 goto out_nfserr;
306 }
307 } 306 }
308 } else 307 } else
309 *acl = NULL; 308 *acl = NULL;
@@ -2085,27 +2084,20 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
2085 WRITE32(eof); 2084 WRITE32(eof);
2086 WRITE32(maxcount); 2085 WRITE32(maxcount);
2087 ADJUST_ARGS(); 2086 ADJUST_ARGS();
2088 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2087 resp->xbuf->head[0].iov_len = (char*)p
2089 2088 - (char*)resp->xbuf->head[0].iov_base;
2090 resp->xbuf->page_len = maxcount; 2089 resp->xbuf->page_len = maxcount;
2091 2090
2092 /* read zero bytes -> don't set up tail */ 2091 /* Use rest of head for padding and remaining ops: */
2093 if(!maxcount) 2092 resp->rqstp->rq_restailpage = 0;
2094 return 0; 2093 resp->xbuf->tail[0].iov_base = p;
2095
2096 /* set up page for remaining responses */
2097 svc_take_page(resp->rqstp);
2098 resp->xbuf->tail[0].iov_base =
2099 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2100 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2101 resp->xbuf->tail[0].iov_len = 0; 2094 resp->xbuf->tail[0].iov_len = 0;
2102 resp->p = resp->xbuf->tail[0].iov_base;
2103 resp->end = resp->p + PAGE_SIZE/4;
2104
2105 if (maxcount&3) { 2095 if (maxcount&3) {
2106 *(resp->p)++ = 0; 2096 RESERVE_SPACE(4);
2097 WRITE32(0);
2107 resp->xbuf->tail[0].iov_base += maxcount&3; 2098 resp->xbuf->tail[0].iov_base += maxcount&3;
2108 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); 2099 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
2100 ADJUST_ARGS();
2109 } 2101 }
2110 return 0; 2102 return 0;
2111} 2103}
@@ -2142,21 +2134,20 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
2142 2134
2143 WRITE32(maxcount); 2135 WRITE32(maxcount);
2144 ADJUST_ARGS(); 2136 ADJUST_ARGS();
2145 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2137 resp->xbuf->head[0].iov_len = (char*)p
2138 - (char*)resp->xbuf->head[0].iov_base;
2139 resp->xbuf->page_len = maxcount;
2146 2140
2147 svc_take_page(resp->rqstp); 2141 /* Use rest of head for padding and remaining ops: */
2148 resp->xbuf->tail[0].iov_base = 2142 resp->rqstp->rq_restailpage = 0;
2149 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); 2143 resp->xbuf->tail[0].iov_base = p;
2150 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2151 resp->xbuf->tail[0].iov_len = 0; 2144 resp->xbuf->tail[0].iov_len = 0;
2152 resp->p = resp->xbuf->tail[0].iov_base;
2153 resp->end = resp->p + PAGE_SIZE/4;
2154
2155 resp->xbuf->page_len = maxcount;
2156 if (maxcount&3) { 2145 if (maxcount&3) {
2157 *(resp->p)++ = 0; 2146 RESERVE_SPACE(4);
2147 WRITE32(0);
2158 resp->xbuf->tail[0].iov_base += maxcount&3; 2148 resp->xbuf->tail[0].iov_base += maxcount&3;
2159 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); 2149 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
2150 ADJUST_ARGS();
2160 } 2151 }
2161 return 0; 2152 return 0;
2162} 2153}
@@ -2166,7 +2157,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2166{ 2157{
2167 int maxcount; 2158 int maxcount;
2168 loff_t offset; 2159 loff_t offset;
2169 u32 *page, *savep; 2160 u32 *page, *savep, *tailbase;
2170 ENCODE_HEAD; 2161 ENCODE_HEAD;
2171 2162
2172 if (nfserr) 2163 if (nfserr)
@@ -2182,6 +2173,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2182 WRITE32(0); 2173 WRITE32(0);
2183 ADJUST_ARGS(); 2174 ADJUST_ARGS();
2184 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2175 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
2176 tailbase = p;
2185 2177
2186 maxcount = PAGE_SIZE; 2178 maxcount = PAGE_SIZE;
2187 if (maxcount > readdir->rd_maxcount) 2179 if (maxcount > readdir->rd_maxcount)
@@ -2226,14 +2218,12 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2226 *p++ = htonl(readdir->common.err == nfserr_eof); 2218 *p++ = htonl(readdir->common.err == nfserr_eof);
2227 resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); 2219 resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2228 2220
2229 /* allocate a page for the tail */ 2221 /* Use rest of head for padding and remaining ops: */
2230 svc_take_page(resp->rqstp); 2222 resp->rqstp->rq_restailpage = 0;
2231 resp->xbuf->tail[0].iov_base = 2223 resp->xbuf->tail[0].iov_base = tailbase;
2232 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2233 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2234 resp->xbuf->tail[0].iov_len = 0; 2224 resp->xbuf->tail[0].iov_len = 0;
2235 resp->p = resp->xbuf->tail[0].iov_base; 2225 resp->p = resp->xbuf->tail[0].iov_base;
2236 resp->end = resp->p + PAGE_SIZE/4; 2226 resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4;
2237 2227
2238 return 0; 2228 return 0;
2239err_no_verf: 2229err_no_verf:
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 3e6b75cd90fd..06cd0db0f32b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = {
553 PROC(none, void, void, none, RC_NOCACHE, ST), 553 PROC(none, void, void, none, RC_NOCACHE, ST),
554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), 554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), 555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE), 556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
557 PROC(none, void, void, none, RC_NOCACHE, ST), 557 PROC(none, void, void, none, RC_NOCACHE, ST),
558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), 558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), 559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 31018333dc38..6aa92d0e6876 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -371,7 +371,6 @@ out_nfserr:
371static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) 371static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
372{ 372{
373 ssize_t buflen; 373 ssize_t buflen;
374 int error;
375 374
376 buflen = vfs_getxattr(dentry, key, NULL, 0); 375 buflen = vfs_getxattr(dentry, key, NULL, 0);
377 if (buflen <= 0) 376 if (buflen <= 0)
@@ -381,10 +380,7 @@ static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
381 if (!*buf) 380 if (!*buf)
382 return -ENOMEM; 381 return -ENOMEM;
383 382
384 error = vfs_getxattr(dentry, key, *buf, buflen); 383 return vfs_getxattr(dentry, key, *buf, buflen);
385 if (error < 0)
386 return error;
387 return buflen;
388} 384}
389#endif 385#endif
390 386
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index bff0f0d06867..21f38accd039 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -153,6 +153,7 @@ struct o2hb_region {
153struct o2hb_bio_wait_ctxt { 153struct o2hb_bio_wait_ctxt {
154 atomic_t wc_num_reqs; 154 atomic_t wc_num_reqs;
155 struct completion wc_io_complete; 155 struct completion wc_io_complete;
156 int wc_error;
156}; 157};
157 158
158static void o2hb_write_timeout(void *arg) 159static void o2hb_write_timeout(void *arg)
@@ -186,6 +187,7 @@ static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc,
186{ 187{
187 atomic_set(&wc->wc_num_reqs, num_ios); 188 atomic_set(&wc->wc_num_reqs, num_ios);
188 init_completion(&wc->wc_io_complete); 189 init_completion(&wc->wc_io_complete);
190 wc->wc_error = 0;
189} 191}
190 192
191/* Used in error paths too */ 193/* Used in error paths too */
@@ -218,8 +220,10 @@ static int o2hb_bio_end_io(struct bio *bio,
218{ 220{
219 struct o2hb_bio_wait_ctxt *wc = bio->bi_private; 221 struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
220 222
221 if (error) 223 if (error) {
222 mlog(ML_ERROR, "IO Error %d\n", error); 224 mlog(ML_ERROR, "IO Error %d\n", error);
225 wc->wc_error = error;
226 }
223 227
224 if (bio->bi_size) 228 if (bio->bi_size)
225 return 1; 229 return 1;
@@ -390,6 +394,8 @@ static int o2hb_read_slots(struct o2hb_region *reg,
390 394
391bail_and_wait: 395bail_and_wait:
392 o2hb_wait_on_io(reg, &wc); 396 o2hb_wait_on_io(reg, &wc);
397 if (wc.wc_error && !status)
398 status = wc.wc_error;
393 399
394 if (bios) { 400 if (bios) {
395 for(i = 0; i < num_bios; i++) 401 for(i = 0; i < num_bios; i++)
@@ -790,20 +796,24 @@ static int o2hb_highest_node(unsigned long *nodes,
790 return highest; 796 return highest;
791} 797}
792 798
793static void o2hb_do_disk_heartbeat(struct o2hb_region *reg) 799static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
794{ 800{
795 int i, ret, highest_node, change = 0; 801 int i, ret, highest_node, change = 0;
796 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; 802 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
797 struct bio *write_bio; 803 struct bio *write_bio;
798 struct o2hb_bio_wait_ctxt write_wc; 804 struct o2hb_bio_wait_ctxt write_wc;
799 805
800 if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes))) 806 ret = o2nm_configured_node_map(configured_nodes,
801 return; 807 sizeof(configured_nodes));
808 if (ret) {
809 mlog_errno(ret);
810 return ret;
811 }
802 812
803 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); 813 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
804 if (highest_node >= O2NM_MAX_NODES) { 814 if (highest_node >= O2NM_MAX_NODES) {
805 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); 815 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
806 return; 816 return -EINVAL;
807 } 817 }
808 818
809 /* No sense in reading the slots of nodes that don't exist 819 /* No sense in reading the slots of nodes that don't exist
@@ -813,7 +823,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
813 ret = o2hb_read_slots(reg, highest_node + 1); 823 ret = o2hb_read_slots(reg, highest_node + 1);
814 if (ret < 0) { 824 if (ret < 0) {
815 mlog_errno(ret); 825 mlog_errno(ret);
816 return; 826 return ret;
817 } 827 }
818 828
819 /* With an up to date view of the slots, we can check that no 829 /* With an up to date view of the slots, we can check that no
@@ -831,7 +841,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
831 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); 841 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
832 if (ret < 0) { 842 if (ret < 0) {
833 mlog_errno(ret); 843 mlog_errno(ret);
834 return; 844 return ret;
835 } 845 }
836 846
837 i = -1; 847 i = -1;
@@ -847,6 +857,15 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
847 */ 857 */
848 o2hb_wait_on_io(reg, &write_wc); 858 o2hb_wait_on_io(reg, &write_wc);
849 bio_put(write_bio); 859 bio_put(write_bio);
860 if (write_wc.wc_error) {
861 /* Do not re-arm the write timeout on I/O error - we
862 * can't be sure that the new block ever made it to
863 * disk */
864 mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
865 write_wc.wc_error, reg->hr_dev_name);
866 return write_wc.wc_error;
867 }
868
850 o2hb_arm_write_timeout(reg); 869 o2hb_arm_write_timeout(reg);
851 870
852 /* let the person who launched us know when things are steady */ 871 /* let the person who launched us know when things are steady */
@@ -854,6 +873,8 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
854 if (atomic_dec_and_test(&reg->hr_steady_iterations)) 873 if (atomic_dec_and_test(&reg->hr_steady_iterations))
855 wake_up(&o2hb_steady_queue); 874 wake_up(&o2hb_steady_queue);
856 } 875 }
876
877 return 0;
857} 878}
858 879
859/* Subtract b from a, storing the result in a. a *must* have a larger 880/* Subtract b from a, storing the result in a. a *must* have a larger
@@ -913,7 +934,10 @@ static int o2hb_thread(void *data)
913 * likely to time itself out. */ 934 * likely to time itself out. */
914 do_gettimeofday(&before_hb); 935 do_gettimeofday(&before_hb);
915 936
916 o2hb_do_disk_heartbeat(reg); 937 i = 0;
938 do {
939 ret = o2hb_do_disk_heartbeat(reg);
940 } while (ret && ++i < 2);
917 941
918 do_gettimeofday(&after_hb); 942 do_gettimeofday(&after_hb);
919 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 943 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index c3764f4744ee..74ca4e5f9765 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -139,6 +139,10 @@ static void user_ast(void *opaque)
139 return; 139 return;
140 } 140 }
141 141
142 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
143 "Lockres %s, requested ivmode. flags 0x%x\n",
144 lockres->l_name, lockres->l_flags);
145
142 /* we're downconverting. */ 146 /* we're downconverting. */
143 if (lockres->l_requested < lockres->l_level) { 147 if (lockres->l_requested < lockres->l_level) {
144 if (lockres->l_requested <= 148 if (lockres->l_requested <=
@@ -229,23 +233,42 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
229 233
230 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); 234 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
231 235
232 if (status != DLM_NORMAL) 236 if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
233 mlog(ML_ERROR, "Dlm returns status %d\n", status); 237 mlog(ML_ERROR, "Dlm returns status %d\n", status);
234 238
235 spin_lock(&lockres->l_lock); 239 spin_lock(&lockres->l_lock);
236 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) 240 /* The teardown flag gets set early during the unlock process,
241 * so test the cancel flag to make sure that this ast isn't
242 * for a concurrent cancel. */
243 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
244 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
237 lockres->l_level = LKM_IVMODE; 245 lockres->l_level = LKM_IVMODE;
238 else { 246 } else if (status == DLM_CANCELGRANT) {
247 mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
248 lockres->l_name, lockres->l_flags);
249 /* We tried to cancel a convert request, but it was
250 * already granted. Don't clear the busy flag - the
251 * ast should've done this already. */
252 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
253 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
254 goto out_noclear;
255 } else {
256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
257 /* Cancel succeeded, we want to re-queue */
258 mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
259 lockres->l_name, lockres->l_flags);
239 lockres->l_requested = LKM_IVMODE; /* cancel an 260 lockres->l_requested = LKM_IVMODE; /* cancel an
240 * upconvert 261 * upconvert
241 * request. */ 262 * request. */
242 lockres->l_flags &= ~USER_LOCK_IN_CANCEL; 263 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
243 /* we want the unblock thread to look at it again 264 /* we want the unblock thread to look at it again
244 * now. */ 265 * now. */
245 __user_dlm_queue_lockres(lockres); 266 if (lockres->l_flags & USER_LOCK_BLOCKED)
267 __user_dlm_queue_lockres(lockres);
246 } 268 }
247 269
248 lockres->l_flags &= ~USER_LOCK_BUSY; 270 lockres->l_flags &= ~USER_LOCK_BUSY;
271out_noclear:
249 spin_unlock(&lockres->l_lock); 272 spin_unlock(&lockres->l_lock);
250 273
251 wake_up(&lockres->l_event); 274 wake_up(&lockres->l_event);
@@ -268,13 +291,26 @@ static void user_dlm_unblock_lock(void *opaque)
268 291
269 spin_lock(&lockres->l_lock); 292 spin_lock(&lockres->l_lock);
270 293
271 BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); 294 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
272 BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED)); 295 "Lockres %s, flags 0x%x\n",
296 lockres->l_name, lockres->l_flags);
273 297
274 /* notice that we don't clear USER_LOCK_BLOCKED here. That's 298 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
275 * for user_ast to do. */ 299 * set, we want user_ast clear it. */
276 lockres->l_flags &= ~USER_LOCK_QUEUED; 300 lockres->l_flags &= ~USER_LOCK_QUEUED;
277 301
302 /* It's valid to get here and no longer be blocked - if we get
303 * several basts in a row, we might be queued by the first
304 * one, the unblock thread might run and clear the queued
305 * flag, and finally we might get another bast which re-queues
306 * us before our ast for the downconvert is called. */
307 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
308 mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
309 lockres->l_name, lockres->l_flags);
310 spin_unlock(&lockres->l_lock);
311 goto drop_ref;
312 }
313
278 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 314 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
279 mlog(0, "lock is in teardown so we do nothing\n"); 315 mlog(0, "lock is in teardown so we do nothing\n");
280 spin_unlock(&lockres->l_lock); 316 spin_unlock(&lockres->l_lock);
@@ -282,7 +318,9 @@ static void user_dlm_unblock_lock(void *opaque)
282 } 318 }
283 319
284 if (lockres->l_flags & USER_LOCK_BUSY) { 320 if (lockres->l_flags & USER_LOCK_BUSY) {
285 mlog(0, "BUSY flag detected...\n"); 321 mlog(0, "Cancel lock %s, flags 0x%x\n",
322 lockres->l_name, lockres->l_flags);
323
286 if (lockres->l_flags & USER_LOCK_IN_CANCEL) { 324 if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
287 spin_unlock(&lockres->l_lock); 325 spin_unlock(&lockres->l_lock);
288 goto drop_ref; 326 goto drop_ref;
@@ -296,14 +334,7 @@ static void user_dlm_unblock_lock(void *opaque)
296 LKM_CANCEL, 334 LKM_CANCEL,
297 user_unlock_ast, 335 user_unlock_ast,
298 lockres); 336 lockres);
299 if (status == DLM_CANCELGRANT) { 337 if (status != DLM_NORMAL)
300 /* If we got this, then the ast was fired
301 * before we could cancel. We cleanup our
302 * state, and restart the function. */
303 spin_lock(&lockres->l_lock);
304 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
305 spin_unlock(&lockres->l_lock);
306 } else if (status != DLM_NORMAL)
307 user_log_dlm_error("dlmunlock", status, lockres); 338 user_log_dlm_error("dlmunlock", status, lockres);
308 goto drop_ref; 339 goto drop_ref;
309 } 340 }
@@ -581,6 +612,14 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
581 mlog(0, "asked to destroy %s\n", lockres->l_name); 612 mlog(0, "asked to destroy %s\n", lockres->l_name);
582 613
583 spin_lock(&lockres->l_lock); 614 spin_lock(&lockres->l_lock);
615 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
616 mlog(0, "Lock is already torn down\n");
617 spin_unlock(&lockres->l_lock);
618 return 0;
619 }
620
621 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
622
584 while (lockres->l_flags & USER_LOCK_BUSY) { 623 while (lockres->l_flags & USER_LOCK_BUSY) {
585 spin_unlock(&lockres->l_lock); 624 spin_unlock(&lockres->l_lock);
586 625
@@ -606,7 +645,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
606 645
607 lockres->l_flags &= ~USER_LOCK_ATTACHED; 646 lockres->l_flags &= ~USER_LOCK_ATTACHED;
608 lockres->l_flags |= USER_LOCK_BUSY; 647 lockres->l_flags |= USER_LOCK_BUSY;
609 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
610 spin_unlock(&lockres->l_lock); 648 spin_unlock(&lockres->l_lock);
611 649
612 mlog(0, "unlocking lockres %s\n", lockres->l_name); 650 mlog(0, "unlocking lockres %s\n", lockres->l_name);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 34e903a6a46b..581eb451a41a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -260,6 +260,17 @@ static int ocfs2_truncate_file(struct inode *inode,
260 if (new_i_size == le64_to_cpu(fe->i_size)) 260 if (new_i_size == le64_to_cpu(fe->i_size))
261 goto bail; 261 goto bail;
262 262
263 /* This forces other nodes to sync and drop their pages. Do
264 * this even if we have a truncate without allocation change -
265 * ocfs2 cluster sizes can be much greater than page size, so
266 * we have to truncate them anyway. */
267 status = ocfs2_data_lock(inode, 1);
268 if (status < 0) {
269 mlog_errno(status);
270 goto bail;
271 }
272 ocfs2_data_unlock(inode, 1);
273
263 if (le32_to_cpu(fe->i_clusters) == 274 if (le32_to_cpu(fe->i_clusters) ==
264 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { 275 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
265 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", 276 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
@@ -272,14 +283,6 @@ static int ocfs2_truncate_file(struct inode *inode,
272 goto bail; 283 goto bail;
273 } 284 }
274 285
275 /* This forces other nodes to sync and drop their pages */
276 status = ocfs2_data_lock(inode, 1);
277 if (status < 0) {
278 mlog_errno(status);
279 goto bail;
280 }
281 ocfs2_data_unlock(inode, 1);
282
283 /* alright, we're going to need to do a full blown alloc size 286 /* alright, we're going to need to do a full blown alloc size
284 * change. Orphan the inode so that recovery can complete the 287 * change. Orphan the inode so that recovery can complete the
285 * truncate if necessary. This does the task of marking 288 * truncate if necessary. This does the task of marking
diff --git a/fs/pipe.c b/fs/pipe.c
index 795df987cd38..e984beb93a0e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -36,7 +36,7 @@
36 */ 36 */
37 37
38/* Drop the inode semaphore and wait for a pipe event, atomically */ 38/* Drop the inode semaphore and wait for a pipe event, atomically */
39void pipe_wait(struct inode * inode) 39void pipe_wait(struct pipe_inode_info *pipe)
40{ 40{
41 DEFINE_WAIT(wait); 41 DEFINE_WAIT(wait);
42 42
@@ -44,11 +44,14 @@ void pipe_wait(struct inode * inode)
44 * Pipes are system-local resources, so sleeping on them 44 * Pipes are system-local resources, so sleeping on them
45 * is considered a noninteractive wait: 45 * is considered a noninteractive wait:
46 */ 46 */
47 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); 47 prepare_to_wait(&pipe->wait, &wait,
48 mutex_unlock(PIPE_MUTEX(*inode)); 48 TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
49 if (pipe->inode)
50 mutex_unlock(&pipe->inode->i_mutex);
49 schedule(); 51 schedule();
50 finish_wait(PIPE_WAIT(*inode), &wait); 52 finish_wait(&pipe->wait, &wait);
51 mutex_lock(PIPE_MUTEX(*inode)); 53 if (pipe->inode)
54 mutex_lock(&pipe->inode->i_mutex);
52} 55}
53 56
54static int 57static int
@@ -91,7 +94,8 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
91 return 0; 94 return 0;
92} 95}
93 96
94static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) 97static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 struct pipe_buffer *buf)
95{ 99{
96 struct page *page = buf->page; 100 struct page *page = buf->page;
97 101
@@ -100,30 +104,27 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff
100 /* 104 /*
101 * If nobody else uses this page, and we don't already have a 105 * If nobody else uses this page, and we don't already have a
102 * temporary page, let's keep track of it as a one-deep 106 * temporary page, let's keep track of it as a one-deep
103 * allocation cache 107 * allocation cache. (Otherwise just release our reference to it)
104 */ 108 */
105 if (page_count(page) == 1 && !info->tmp_page) { 109 if (page_count(page) == 1 && !pipe->tmp_page)
106 info->tmp_page = page; 110 pipe->tmp_page = page;
107 return; 111 else
108 } 112 page_cache_release(page);
109
110 /*
111 * Otherwise just release our reference to it
112 */
113 page_cache_release(page);
114} 113}
115 114
116static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf) 115static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe,
116 struct pipe_buffer *buf)
117{ 117{
118 return kmap(buf->page); 118 return kmap(buf->page);
119} 119}
120 120
121static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf) 121static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
122 struct pipe_buffer *buf)
122{ 123{
123 kunmap(buf->page); 124 kunmap(buf->page);
124} 125}
125 126
126static int anon_pipe_buf_steal(struct pipe_inode_info *info, 127static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
127 struct pipe_buffer *buf) 128 struct pipe_buffer *buf)
128{ 129{
129 buf->flags |= PIPE_BUF_FLAG_STOLEN; 130 buf->flags |= PIPE_BUF_FLAG_STOLEN;
@@ -143,7 +144,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
143 unsigned long nr_segs, loff_t *ppos) 144 unsigned long nr_segs, loff_t *ppos)
144{ 145{
145 struct inode *inode = filp->f_dentry->d_inode; 146 struct inode *inode = filp->f_dentry->d_inode;
146 struct pipe_inode_info *info; 147 struct pipe_inode_info *pipe;
147 int do_wakeup; 148 int do_wakeup;
148 ssize_t ret; 149 ssize_t ret;
149 struct iovec *iov = (struct iovec *)_iov; 150 struct iovec *iov = (struct iovec *)_iov;
@@ -156,13 +157,13 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
156 157
157 do_wakeup = 0; 158 do_wakeup = 0;
158 ret = 0; 159 ret = 0;
159 mutex_lock(PIPE_MUTEX(*inode)); 160 mutex_lock(&inode->i_mutex);
160 info = inode->i_pipe; 161 pipe = inode->i_pipe;
161 for (;;) { 162 for (;;) {
162 int bufs = info->nrbufs; 163 int bufs = pipe->nrbufs;
163 if (bufs) { 164 if (bufs) {
164 int curbuf = info->curbuf; 165 int curbuf = pipe->curbuf;
165 struct pipe_buffer *buf = info->bufs + curbuf; 166 struct pipe_buffer *buf = pipe->bufs + curbuf;
166 struct pipe_buf_operations *ops = buf->ops; 167 struct pipe_buf_operations *ops = buf->ops;
167 void *addr; 168 void *addr;
168 size_t chars = buf->len; 169 size_t chars = buf->len;
@@ -171,16 +172,17 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
171 if (chars > total_len) 172 if (chars > total_len)
172 chars = total_len; 173 chars = total_len;
173 174
174 addr = ops->map(filp, info, buf); 175 addr = ops->map(filp, pipe, buf);
175 if (IS_ERR(addr)) { 176 if (IS_ERR(addr)) {
176 if (!ret) 177 if (!ret)
177 ret = PTR_ERR(addr); 178 ret = PTR_ERR(addr);
178 break; 179 break;
179 } 180 }
180 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); 181 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
181 ops->unmap(info, buf); 182 ops->unmap(pipe, buf);
182 if (unlikely(error)) { 183 if (unlikely(error)) {
183 if (!ret) ret = -EFAULT; 184 if (!ret)
185 ret = -EFAULT;
184 break; 186 break;
185 } 187 }
186 ret += chars; 188 ret += chars;
@@ -188,10 +190,10 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
188 buf->len -= chars; 190 buf->len -= chars;
189 if (!buf->len) { 191 if (!buf->len) {
190 buf->ops = NULL; 192 buf->ops = NULL;
191 ops->release(info, buf); 193 ops->release(pipe, buf);
192 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 194 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
193 info->curbuf = curbuf; 195 pipe->curbuf = curbuf;
194 info->nrbufs = --bufs; 196 pipe->nrbufs = --bufs;
195 do_wakeup = 1; 197 do_wakeup = 1;
196 } 198 }
197 total_len -= chars; 199 total_len -= chars;
@@ -200,9 +202,9 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
200 } 202 }
201 if (bufs) /* More to do? */ 203 if (bufs) /* More to do? */
202 continue; 204 continue;
203 if (!PIPE_WRITERS(*inode)) 205 if (!pipe->writers)
204 break; 206 break;
205 if (!PIPE_WAITING_WRITERS(*inode)) { 207 if (!pipe->waiting_writers) {
206 /* syscall merging: Usually we must not sleep 208 /* syscall merging: Usually we must not sleep
207 * if O_NONBLOCK is set, or if we got some data. 209 * if O_NONBLOCK is set, or if we got some data.
208 * But if a writer sleeps in kernel space, then 210 * But if a writer sleeps in kernel space, then
@@ -216,20 +218,22 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
216 } 218 }
217 } 219 }
218 if (signal_pending(current)) { 220 if (signal_pending(current)) {
219 if (!ret) ret = -ERESTARTSYS; 221 if (!ret)
222 ret = -ERESTARTSYS;
220 break; 223 break;
221 } 224 }
222 if (do_wakeup) { 225 if (do_wakeup) {
223 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 226 wake_up_interruptible_sync(&pipe->wait);
224 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 227 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
225 } 228 }
226 pipe_wait(inode); 229 pipe_wait(pipe);
227 } 230 }
228 mutex_unlock(PIPE_MUTEX(*inode)); 231 mutex_unlock(&inode->i_mutex);
229 /* Signal writers asynchronously that there is more room. */ 232
233 /* Signal writers asynchronously that there is more room. */
230 if (do_wakeup) { 234 if (do_wakeup) {
231 wake_up_interruptible(PIPE_WAIT(*inode)); 235 wake_up_interruptible(&pipe->wait);
232 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 236 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
233 } 237 }
234 if (ret > 0) 238 if (ret > 0)
235 file_accessed(filp); 239 file_accessed(filp);
@@ -240,6 +244,7 @@ static ssize_t
240pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 244pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
241{ 245{
242 struct iovec iov = { .iov_base = buf, .iov_len = count }; 246 struct iovec iov = { .iov_base = buf, .iov_len = count };
247
243 return pipe_readv(filp, &iov, 1, ppos); 248 return pipe_readv(filp, &iov, 1, ppos);
244} 249}
245 250
@@ -248,7 +253,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
248 unsigned long nr_segs, loff_t *ppos) 253 unsigned long nr_segs, loff_t *ppos)
249{ 254{
250 struct inode *inode = filp->f_dentry->d_inode; 255 struct inode *inode = filp->f_dentry->d_inode;
251 struct pipe_inode_info *info; 256 struct pipe_inode_info *pipe;
252 ssize_t ret; 257 ssize_t ret;
253 int do_wakeup; 258 int do_wakeup;
254 struct iovec *iov = (struct iovec *)_iov; 259 struct iovec *iov = (struct iovec *)_iov;
@@ -262,10 +267,10 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
262 267
263 do_wakeup = 0; 268 do_wakeup = 0;
264 ret = 0; 269 ret = 0;
265 mutex_lock(PIPE_MUTEX(*inode)); 270 mutex_lock(&inode->i_mutex);
266 info = inode->i_pipe; 271 pipe = inode->i_pipe;
267 272
268 if (!PIPE_READERS(*inode)) { 273 if (!pipe->readers) {
269 send_sig(SIGPIPE, current, 0); 274 send_sig(SIGPIPE, current, 0);
270 ret = -EPIPE; 275 ret = -EPIPE;
271 goto out; 276 goto out;
@@ -273,23 +278,25 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
273 278
274 /* We try to merge small writes */ 279 /* We try to merge small writes */
275 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 280 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
276 if (info->nrbufs && chars != 0) { 281 if (pipe->nrbufs && chars != 0) {
277 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1); 282 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
278 struct pipe_buffer *buf = info->bufs + lastbuf; 283 (PIPE_BUFFERS-1);
284 struct pipe_buffer *buf = pipe->bufs + lastbuf;
279 struct pipe_buf_operations *ops = buf->ops; 285 struct pipe_buf_operations *ops = buf->ops;
280 int offset = buf->offset + buf->len; 286 int offset = buf->offset + buf->len;
287
281 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 288 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
282 void *addr; 289 void *addr;
283 int error; 290 int error;
284 291
285 addr = ops->map(filp, info, buf); 292 addr = ops->map(filp, pipe, buf);
286 if (IS_ERR(addr)) { 293 if (IS_ERR(addr)) {
287 error = PTR_ERR(addr); 294 error = PTR_ERR(addr);
288 goto out; 295 goto out;
289 } 296 }
290 error = pipe_iov_copy_from_user(offset + addr, iov, 297 error = pipe_iov_copy_from_user(offset + addr, iov,
291 chars); 298 chars);
292 ops->unmap(info, buf); 299 ops->unmap(pipe, buf);
293 ret = error; 300 ret = error;
294 do_wakeup = 1; 301 do_wakeup = 1;
295 if (error) 302 if (error)
@@ -304,16 +311,18 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
304 311
305 for (;;) { 312 for (;;) {
306 int bufs; 313 int bufs;
307 if (!PIPE_READERS(*inode)) { 314
315 if (!pipe->readers) {
308 send_sig(SIGPIPE, current, 0); 316 send_sig(SIGPIPE, current, 0);
309 if (!ret) ret = -EPIPE; 317 if (!ret)
318 ret = -EPIPE;
310 break; 319 break;
311 } 320 }
312 bufs = info->nrbufs; 321 bufs = pipe->nrbufs;
313 if (bufs < PIPE_BUFFERS) { 322 if (bufs < PIPE_BUFFERS) {
314 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1); 323 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
315 struct pipe_buffer *buf = info->bufs + newbuf; 324 struct pipe_buffer *buf = pipe->bufs + newbuf;
316 struct page *page = info->tmp_page; 325 struct page *page = pipe->tmp_page;
317 int error; 326 int error;
318 327
319 if (!page) { 328 if (!page) {
@@ -322,9 +331,9 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
322 ret = ret ? : -ENOMEM; 331 ret = ret ? : -ENOMEM;
323 break; 332 break;
324 } 333 }
325 info->tmp_page = page; 334 pipe->tmp_page = page;
326 } 335 }
327 /* Always wakeup, even if the copy fails. Otherwise 336 /* Always wake up, even if the copy fails. Otherwise
328 * we lock up (O_NONBLOCK-)readers that sleep due to 337 * we lock up (O_NONBLOCK-)readers that sleep due to
329 * syscall merging. 338 * syscall merging.
330 * FIXME! Is this really true? 339 * FIXME! Is this really true?
@@ -337,7 +346,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
337 error = pipe_iov_copy_from_user(kmap(page), iov, chars); 346 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
338 kunmap(page); 347 kunmap(page);
339 if (unlikely(error)) { 348 if (unlikely(error)) {
340 if (!ret) ret = -EFAULT; 349 if (!ret)
350 ret = -EFAULT;
341 break; 351 break;
342 } 352 }
343 ret += chars; 353 ret += chars;
@@ -347,8 +357,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
347 buf->ops = &anon_pipe_buf_ops; 357 buf->ops = &anon_pipe_buf_ops;
348 buf->offset = 0; 358 buf->offset = 0;
349 buf->len = chars; 359 buf->len = chars;
350 info->nrbufs = ++bufs; 360 pipe->nrbufs = ++bufs;
351 info->tmp_page = NULL; 361 pipe->tmp_page = NULL;
352 362
353 total_len -= chars; 363 total_len -= chars;
354 if (!total_len) 364 if (!total_len)
@@ -357,27 +367,29 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
357 if (bufs < PIPE_BUFFERS) 367 if (bufs < PIPE_BUFFERS)
358 continue; 368 continue;
359 if (filp->f_flags & O_NONBLOCK) { 369 if (filp->f_flags & O_NONBLOCK) {
360 if (!ret) ret = -EAGAIN; 370 if (!ret)
371 ret = -EAGAIN;
361 break; 372 break;
362 } 373 }
363 if (signal_pending(current)) { 374 if (signal_pending(current)) {
364 if (!ret) ret = -ERESTARTSYS; 375 if (!ret)
376 ret = -ERESTARTSYS;
365 break; 377 break;
366 } 378 }
367 if (do_wakeup) { 379 if (do_wakeup) {
368 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 380 wake_up_interruptible_sync(&pipe->wait);
369 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 381 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
370 do_wakeup = 0; 382 do_wakeup = 0;
371 } 383 }
372 PIPE_WAITING_WRITERS(*inode)++; 384 pipe->waiting_writers++;
373 pipe_wait(inode); 385 pipe_wait(pipe);
374 PIPE_WAITING_WRITERS(*inode)--; 386 pipe->waiting_writers--;
375 } 387 }
376out: 388out:
377 mutex_unlock(PIPE_MUTEX(*inode)); 389 mutex_unlock(&inode->i_mutex);
378 if (do_wakeup) { 390 if (do_wakeup) {
379 wake_up_interruptible(PIPE_WAIT(*inode)); 391 wake_up_interruptible(&pipe->wait);
380 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 392 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
381 } 393 }
382 if (ret > 0) 394 if (ret > 0)
383 file_update_time(filp); 395 file_update_time(filp);
@@ -389,6 +401,7 @@ pipe_write(struct file *filp, const char __user *buf,
389 size_t count, loff_t *ppos) 401 size_t count, loff_t *ppos)
390{ 402{
391 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 403 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
404
392 return pipe_writev(filp, &iov, 1, ppos); 405 return pipe_writev(filp, &iov, 1, ppos);
393} 406}
394 407
@@ -399,7 +412,8 @@ bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
399} 412}
400 413
401static ssize_t 414static ssize_t
402bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) 415bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
416 loff_t *ppos)
403{ 417{
404 return -EBADF; 418 return -EBADF;
405} 419}
@@ -409,21 +423,22 @@ pipe_ioctl(struct inode *pino, struct file *filp,
409 unsigned int cmd, unsigned long arg) 423 unsigned int cmd, unsigned long arg)
410{ 424{
411 struct inode *inode = filp->f_dentry->d_inode; 425 struct inode *inode = filp->f_dentry->d_inode;
412 struct pipe_inode_info *info; 426 struct pipe_inode_info *pipe;
413 int count, buf, nrbufs; 427 int count, buf, nrbufs;
414 428
415 switch (cmd) { 429 switch (cmd) {
416 case FIONREAD: 430 case FIONREAD:
417 mutex_lock(PIPE_MUTEX(*inode)); 431 mutex_lock(&inode->i_mutex);
418 info = inode->i_pipe; 432 pipe = inode->i_pipe;
419 count = 0; 433 count = 0;
420 buf = info->curbuf; 434 buf = pipe->curbuf;
421 nrbufs = info->nrbufs; 435 nrbufs = pipe->nrbufs;
422 while (--nrbufs >= 0) { 436 while (--nrbufs >= 0) {
423 count += info->bufs[buf].len; 437 count += pipe->bufs[buf].len;
424 buf = (buf+1) & (PIPE_BUFFERS-1); 438 buf = (buf+1) & (PIPE_BUFFERS-1);
425 } 439 }
426 mutex_unlock(PIPE_MUTEX(*inode)); 440 mutex_unlock(&inode->i_mutex);
441
427 return put_user(count, (int __user *)arg); 442 return put_user(count, (int __user *)arg);
428 default: 443 default:
429 return -EINVAL; 444 return -EINVAL;
@@ -436,17 +451,17 @@ pipe_poll(struct file *filp, poll_table *wait)
436{ 451{
437 unsigned int mask; 452 unsigned int mask;
438 struct inode *inode = filp->f_dentry->d_inode; 453 struct inode *inode = filp->f_dentry->d_inode;
439 struct pipe_inode_info *info = inode->i_pipe; 454 struct pipe_inode_info *pipe = inode->i_pipe;
440 int nrbufs; 455 int nrbufs;
441 456
442 poll_wait(filp, PIPE_WAIT(*inode), wait); 457 poll_wait(filp, &pipe->wait, wait);
443 458
444 /* Reading only -- no need for acquiring the semaphore. */ 459 /* Reading only -- no need for acquiring the semaphore. */
445 nrbufs = info->nrbufs; 460 nrbufs = pipe->nrbufs;
446 mask = 0; 461 mask = 0;
447 if (filp->f_mode & FMODE_READ) { 462 if (filp->f_mode & FMODE_READ) {
448 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 463 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
449 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode)) 464 if (!pipe->writers && filp->f_version != pipe->w_counter)
450 mask |= POLLHUP; 465 mask |= POLLHUP;
451 } 466 }
452 467
@@ -456,7 +471,7 @@ pipe_poll(struct file *filp, poll_table *wait)
456 * Most Unices do not set POLLERR for FIFOs but on Linux they 471 * Most Unices do not set POLLERR for FIFOs but on Linux they
457 * behave exactly like pipes for poll(). 472 * behave exactly like pipes for poll().
458 */ 473 */
459 if (!PIPE_READERS(*inode)) 474 if (!pipe->readers)
460 mask |= POLLERR; 475 mask |= POLLERR;
461 } 476 }
462 477
@@ -466,17 +481,21 @@ pipe_poll(struct file *filp, poll_table *wait)
466static int 481static int
467pipe_release(struct inode *inode, int decr, int decw) 482pipe_release(struct inode *inode, int decr, int decw)
468{ 483{
469 mutex_lock(PIPE_MUTEX(*inode)); 484 struct pipe_inode_info *pipe;
470 PIPE_READERS(*inode) -= decr; 485
471 PIPE_WRITERS(*inode) -= decw; 486 mutex_lock(&inode->i_mutex);
472 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) { 487 pipe = inode->i_pipe;
488 pipe->readers -= decr;
489 pipe->writers -= decw;
490
491 if (!pipe->readers && !pipe->writers) {
473 free_pipe_info(inode); 492 free_pipe_info(inode);
474 } else { 493 } else {
475 wake_up_interruptible(PIPE_WAIT(*inode)); 494 wake_up_interruptible(&pipe->wait);
476 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 495 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
477 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 496 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
478 } 497 }
479 mutex_unlock(PIPE_MUTEX(*inode)); 498 mutex_unlock(&inode->i_mutex);
480 499
481 return 0; 500 return 0;
482} 501}
@@ -487,9 +506,9 @@ pipe_read_fasync(int fd, struct file *filp, int on)
487 struct inode *inode = filp->f_dentry->d_inode; 506 struct inode *inode = filp->f_dentry->d_inode;
488 int retval; 507 int retval;
489 508
490 mutex_lock(PIPE_MUTEX(*inode)); 509 mutex_lock(&inode->i_mutex);
491 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 510 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
492 mutex_unlock(PIPE_MUTEX(*inode)); 511 mutex_unlock(&inode->i_mutex);
493 512
494 if (retval < 0) 513 if (retval < 0)
495 return retval; 514 return retval;
@@ -504,9 +523,9 @@ pipe_write_fasync(int fd, struct file *filp, int on)
504 struct inode *inode = filp->f_dentry->d_inode; 523 struct inode *inode = filp->f_dentry->d_inode;
505 int retval; 524 int retval;
506 525
507 mutex_lock(PIPE_MUTEX(*inode)); 526 mutex_lock(&inode->i_mutex);
508 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 527 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
509 mutex_unlock(PIPE_MUTEX(*inode)); 528 mutex_unlock(&inode->i_mutex);
510 529
511 if (retval < 0) 530 if (retval < 0)
512 return retval; 531 return retval;
@@ -519,16 +538,17 @@ static int
519pipe_rdwr_fasync(int fd, struct file *filp, int on) 538pipe_rdwr_fasync(int fd, struct file *filp, int on)
520{ 539{
521 struct inode *inode = filp->f_dentry->d_inode; 540 struct inode *inode = filp->f_dentry->d_inode;
541 struct pipe_inode_info *pipe = inode->i_pipe;
522 int retval; 542 int retval;
523 543
524 mutex_lock(PIPE_MUTEX(*inode)); 544 mutex_lock(&inode->i_mutex);
525 545
526 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 546 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
527 547
528 if (retval >= 0) 548 if (retval >= 0)
529 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 549 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
530 550
531 mutex_unlock(PIPE_MUTEX(*inode)); 551 mutex_unlock(&inode->i_mutex);
532 552
533 if (retval < 0) 553 if (retval < 0)
534 return retval; 554 return retval;
@@ -567,9 +587,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
567{ 587{
568 /* We could have perhaps used atomic_t, but this and friends 588 /* We could have perhaps used atomic_t, but this and friends
569 below are the only places. So it doesn't seem worthwhile. */ 589 below are the only places. So it doesn't seem worthwhile. */
570 mutex_lock(PIPE_MUTEX(*inode)); 590 mutex_lock(&inode->i_mutex);
571 PIPE_READERS(*inode)++; 591 inode->i_pipe->readers++;
572 mutex_unlock(PIPE_MUTEX(*inode)); 592 mutex_unlock(&inode->i_mutex);
573 593
574 return 0; 594 return 0;
575} 595}
@@ -577,9 +597,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
577static int 597static int
578pipe_write_open(struct inode *inode, struct file *filp) 598pipe_write_open(struct inode *inode, struct file *filp)
579{ 599{
580 mutex_lock(PIPE_MUTEX(*inode)); 600 mutex_lock(&inode->i_mutex);
581 PIPE_WRITERS(*inode)++; 601 inode->i_pipe->writers++;
582 mutex_unlock(PIPE_MUTEX(*inode)); 602 mutex_unlock(&inode->i_mutex);
583 603
584 return 0; 604 return 0;
585} 605}
@@ -587,12 +607,12 @@ pipe_write_open(struct inode *inode, struct file *filp)
587static int 607static int
588pipe_rdwr_open(struct inode *inode, struct file *filp) 608pipe_rdwr_open(struct inode *inode, struct file *filp)
589{ 609{
590 mutex_lock(PIPE_MUTEX(*inode)); 610 mutex_lock(&inode->i_mutex);
591 if (filp->f_mode & FMODE_READ) 611 if (filp->f_mode & FMODE_READ)
592 PIPE_READERS(*inode)++; 612 inode->i_pipe->readers++;
593 if (filp->f_mode & FMODE_WRITE) 613 if (filp->f_mode & FMODE_WRITE)
594 PIPE_WRITERS(*inode)++; 614 inode->i_pipe->writers++;
595 mutex_unlock(PIPE_MUTEX(*inode)); 615 mutex_unlock(&inode->i_mutex);
596 616
597 return 0; 617 return 0;
598} 618}
@@ -675,37 +695,38 @@ static struct file_operations rdwr_pipe_fops = {
675 .fasync = pipe_rdwr_fasync, 695 .fasync = pipe_rdwr_fasync,
676}; 696};
677 697
678void free_pipe_info(struct inode *inode) 698struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
699{
700 struct pipe_inode_info *pipe;
701
702 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
703 if (pipe) {
704 init_waitqueue_head(&pipe->wait);
705 pipe->r_counter = pipe->w_counter = 1;
706 pipe->inode = inode;
707 }
708
709 return pipe;
710}
711
712void __free_pipe_info(struct pipe_inode_info *pipe)
679{ 713{
680 int i; 714 int i;
681 struct pipe_inode_info *info = inode->i_pipe;
682 715
683 inode->i_pipe = NULL;
684 for (i = 0; i < PIPE_BUFFERS; i++) { 716 for (i = 0; i < PIPE_BUFFERS; i++) {
685 struct pipe_buffer *buf = info->bufs + i; 717 struct pipe_buffer *buf = pipe->bufs + i;
686 if (buf->ops) 718 if (buf->ops)
687 buf->ops->release(info, buf); 719 buf->ops->release(pipe, buf);
688 } 720 }
689 if (info->tmp_page) 721 if (pipe->tmp_page)
690 __free_page(info->tmp_page); 722 __free_page(pipe->tmp_page);
691 kfree(info); 723 kfree(pipe);
692} 724}
693 725
694struct inode* pipe_new(struct inode* inode) 726void free_pipe_info(struct inode *inode)
695{ 727{
696 struct pipe_inode_info *info; 728 __free_pipe_info(inode->i_pipe);
697 729 inode->i_pipe = NULL;
698 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
699 if (!info)
700 goto fail_page;
701 inode->i_pipe = info;
702
703 init_waitqueue_head(PIPE_WAIT(*inode));
704 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
705
706 return inode;
707fail_page:
708 return NULL;
709} 730}
710 731
711static struct vfsmount *pipe_mnt __read_mostly; 732static struct vfsmount *pipe_mnt __read_mostly;
@@ -713,6 +734,7 @@ static int pipefs_delete_dentry(struct dentry *dentry)
713{ 734{
714 return 1; 735 return 1;
715} 736}
737
716static struct dentry_operations pipefs_dentry_operations = { 738static struct dentry_operations pipefs_dentry_operations = {
717 .d_delete = pipefs_delete_dentry, 739 .d_delete = pipefs_delete_dentry,
718}; 740};
@@ -720,13 +742,17 @@ static struct dentry_operations pipefs_dentry_operations = {
720static struct inode * get_pipe_inode(void) 742static struct inode * get_pipe_inode(void)
721{ 743{
722 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 744 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
745 struct pipe_inode_info *pipe;
723 746
724 if (!inode) 747 if (!inode)
725 goto fail_inode; 748 goto fail_inode;
726 749
727 if(!pipe_new(inode)) 750 pipe = alloc_pipe_info(inode);
751 if (!pipe)
728 goto fail_iput; 752 goto fail_iput;
729 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; 753 inode->i_pipe = pipe;
754
755 pipe->readers = pipe->writers = 1;
730 inode->i_fop = &rdwr_pipe_fops; 756 inode->i_fop = &rdwr_pipe_fops;
731 757
732 /* 758 /*
@@ -741,10 +767,12 @@ static struct inode * get_pipe_inode(void)
741 inode->i_gid = current->fsgid; 767 inode->i_gid = current->fsgid;
742 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 768 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
743 inode->i_blksize = PAGE_SIZE; 769 inode->i_blksize = PAGE_SIZE;
770
744 return inode; 771 return inode;
745 772
746fail_iput: 773fail_iput:
747 iput(inode); 774 iput(inode);
775
748fail_inode: 776fail_inode:
749 return NULL; 777 return NULL;
750} 778}
@@ -757,7 +785,7 @@ int do_pipe(int *fd)
757 struct inode * inode; 785 struct inode * inode;
758 struct file *f1, *f2; 786 struct file *f1, *f2;
759 int error; 787 int error;
760 int i,j; 788 int i, j;
761 789
762 error = -ENFILE; 790 error = -ENFILE;
763 f1 = get_empty_filp(); 791 f1 = get_empty_filp();
@@ -790,6 +818,7 @@ int do_pipe(int *fd)
790 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); 818 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
791 if (!dentry) 819 if (!dentry)
792 goto close_f12_inode_i_j; 820 goto close_f12_inode_i_j;
821
793 dentry->d_op = &pipefs_dentry_operations; 822 dentry->d_op = &pipefs_dentry_operations;
794 d_add(dentry, inode); 823 d_add(dentry, inode);
795 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt)); 824 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
@@ -813,6 +842,7 @@ int do_pipe(int *fd)
813 fd_install(j, f2); 842 fd_install(j, f2);
814 fd[0] = i; 843 fd[0] = i;
815 fd[1] = j; 844 fd[1] = j;
845
816 return 0; 846 return 0;
817 847
818close_f12_inode_i_j: 848close_f12_inode_i_j:
@@ -837,8 +867,9 @@ no_files:
837 * d_name - pipe: will go nicely and kill the special-casing in procfs. 867 * d_name - pipe: will go nicely and kill the special-casing in procfs.
838 */ 868 */
839 869
840static struct super_block *pipefs_get_sb(struct file_system_type *fs_type, 870static struct super_block *
841 int flags, const char *dev_name, void *data) 871pipefs_get_sb(struct file_system_type *fs_type, int flags,
872 const char *dev_name, void *data)
842{ 873{
843 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); 874 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
844} 875}
@@ -852,6 +883,7 @@ static struct file_system_type pipe_fs_type = {
852static int __init init_pipe_fs(void) 883static int __init init_pipe_fs(void)
853{ 884{
854 int err = register_filesystem(&pipe_fs_type); 885 int err = register_filesystem(&pipe_fs_type);
886
855 if (!err) { 887 if (!err) {
856 pipe_mnt = kern_mount(&pipe_fs_type); 888 pipe_mnt = kern_mount(&pipe_fs_type);
857 if (IS_ERR(pipe_mnt)) { 889 if (IS_ERR(pipe_mnt)) {
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7efa73d44c9a..20d4b2237fce 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -103,8 +103,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
103 size_t buflen, loff_t *fpos) 103 size_t buflen, loff_t *fpos)
104{ 104{
105 ssize_t acc = 0, tmp; 105 ssize_t acc = 0, tmp;
106 size_t tsz, nr_bytes; 106 size_t tsz;
107 u64 start; 107 u64 start, nr_bytes;
108 struct vmcore *curr_m = NULL; 108 struct vmcore *curr_m = NULL;
109 109
110 if (buflen == 0 || *fpos >= vmcore_size) 110 if (buflen == 0 || *fpos >= vmcore_size)
diff --git a/fs/read_write.c b/fs/read_write.c
index 6256ca81a718..5bc0e9234f9d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -202,7 +202,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
202 goto Einval; 202 goto Einval;
203 203
204 inode = file->f_dentry->d_inode; 204 inode = file->f_dentry->d_inode;
205 if (inode->i_flock && MANDATORY_LOCK(inode)) { 205 if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) {
206 int retval = locks_mandatory_area( 206 int retval = locks_mandatory_area(
207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
208 inode, file, pos, count); 208 inode, file, pos, count);
diff --git a/fs/select.c b/fs/select.c
index 071660fa7b01..a8109baa5e46 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -310,8 +310,9 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
310 fd_set __user *exp, s64 *timeout) 310 fd_set __user *exp, s64 *timeout)
311{ 311{
312 fd_set_bits fds; 312 fd_set_bits fds;
313 char *bits; 313 void *bits;
314 int ret, size, max_fdset; 314 int ret, max_fdset;
315 unsigned int size;
315 struct fdtable *fdt; 316 struct fdtable *fdt;
316 /* Allocate small arguments on the stack to save memory and be faster */ 317 /* Allocate small arguments on the stack to save memory and be faster */
317 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; 318 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
@@ -333,20 +334,21 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
333 * since we used fdset we need to allocate memory in units of 334 * since we used fdset we need to allocate memory in units of
334 * long-words. 335 * long-words.
335 */ 336 */
336 ret = -ENOMEM;
337 size = FDS_BYTES(n); 337 size = FDS_BYTES(n);
338 if (6*size < SELECT_STACK_ALLOC) 338 bits = stack_fds;
339 bits = stack_fds; 339 if (size > sizeof(stack_fds) / 6) {
340 else 340 /* Not enough space in on-stack array; must use kmalloc */
341 ret = -ENOMEM;
341 bits = kmalloc(6 * size, GFP_KERNEL); 342 bits = kmalloc(6 * size, GFP_KERNEL);
342 if (!bits) 343 if (!bits)
343 goto out_nofds; 344 goto out_nofds;
344 fds.in = (unsigned long *) bits; 345 }
345 fds.out = (unsigned long *) (bits + size); 346 fds.in = bits;
346 fds.ex = (unsigned long *) (bits + 2*size); 347 fds.out = bits + size;
347 fds.res_in = (unsigned long *) (bits + 3*size); 348 fds.ex = bits + 2*size;
348 fds.res_out = (unsigned long *) (bits + 4*size); 349 fds.res_in = bits + 3*size;
349 fds.res_ex = (unsigned long *) (bits + 5*size); 350 fds.res_out = bits + 4*size;
351 fds.res_ex = bits + 5*size;
350 352
351 if ((ret = get_fd_set(n, inp, fds.in)) || 353 if ((ret = get_fd_set(n, inp, fds.in)) ||
352 (ret = get_fd_set(n, outp, fds.out)) || 354 (ret = get_fd_set(n, outp, fds.out)) ||
diff --git a/fs/splice.c b/fs/splice.c
index bfa42a277bb8..e50a460239dd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -9,11 +9,12 @@
9 * that transfers data buffers to or from a pipe buffer. 9 * that transfers data buffers to or from a pipe buffer.
10 * 10 *
11 * Named by Larry McVoy, original implementation from Linus, extended by 11 * Named by Larry McVoy, original implementation from Linus, extended by
12 * Jens to support splicing to files and fixing the initial implementation 12 * Jens to support splicing to files, network, direct splicing, etc and
13 * bugs. 13 * fixing lots of bugs.
14 * 14 *
15 * Copyright (C) 2005 Jens Axboe <axboe@suse.de> 15 * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de>
16 * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org> 16 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
17 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
17 * 18 *
18 */ 19 */
19#include <linux/fs.h> 20#include <linux/fs.h>
@@ -84,26 +85,43 @@ static void *page_cache_pipe_buf_map(struct file *file,
84 struct pipe_buffer *buf) 85 struct pipe_buffer *buf)
85{ 86{
86 struct page *page = buf->page; 87 struct page *page = buf->page;
87 88 int err;
88 lock_page(page);
89 89
90 if (!PageUptodate(page)) { 90 if (!PageUptodate(page)) {
91 unlock_page(page); 91 lock_page(page);
92 return ERR_PTR(-EIO); 92
93 } 93 /*
94 * Page got truncated/unhashed. This will cause a 0-byte
95 * splice, if this is the first page.
96 */
97 if (!page->mapping) {
98 err = -ENODATA;
99 goto error;
100 }
94 101
95 if (!page->mapping) { 102 /*
103 * Uh oh, read-error from disk.
104 */
105 if (!PageUptodate(page)) {
106 err = -EIO;
107 goto error;
108 }
109
110 /*
111 * Page is ok afterall, fall through to mapping.
112 */
96 unlock_page(page); 113 unlock_page(page);
97 return ERR_PTR(-ENODATA);
98 } 114 }
99 115
100 return kmap(buf->page); 116 return kmap(page);
117error:
118 unlock_page(page);
119 return ERR_PTR(err);
101} 120}
102 121
103static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 122static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
104 struct pipe_buffer *buf) 123 struct pipe_buffer *buf)
105{ 124{
106 unlock_page(buf->page);
107 kunmap(buf->page); 125 kunmap(buf->page);
108} 126}
109 127
@@ -119,34 +137,30 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
119 * Pipe output worker. This sets up our pipe format with the page cache 137 * Pipe output worker. This sets up our pipe format with the page cache
120 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 138 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
121 */ 139 */
122static ssize_t move_to_pipe(struct inode *inode, struct page **pages, 140static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
123 int nr_pages, unsigned long offset, 141 int nr_pages, unsigned long offset,
124 unsigned long len, unsigned int flags) 142 unsigned long len, unsigned int flags)
125{ 143{
126 struct pipe_inode_info *info;
127 int ret, do_wakeup, i; 144 int ret, do_wakeup, i;
128 145
129 ret = 0; 146 ret = 0;
130 do_wakeup = 0; 147 do_wakeup = 0;
131 i = 0; 148 i = 0;
132 149
133 mutex_lock(PIPE_MUTEX(*inode)); 150 if (pipe->inode)
151 mutex_lock(&pipe->inode->i_mutex);
134 152
135 info = inode->i_pipe;
136 for (;;) { 153 for (;;) {
137 int bufs; 154 if (!pipe->readers) {
138
139 if (!PIPE_READERS(*inode)) {
140 send_sig(SIGPIPE, current, 0); 155 send_sig(SIGPIPE, current, 0);
141 if (!ret) 156 if (!ret)
142 ret = -EPIPE; 157 ret = -EPIPE;
143 break; 158 break;
144 } 159 }
145 160
146 bufs = info->nrbufs; 161 if (pipe->nrbufs < PIPE_BUFFERS) {
147 if (bufs < PIPE_BUFFERS) { 162 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
148 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS - 1); 163 struct pipe_buffer *buf = pipe->bufs + newbuf;
149 struct pipe_buffer *buf = info->bufs + newbuf;
150 struct page *page = pages[i++]; 164 struct page *page = pages[i++];
151 unsigned long this_len; 165 unsigned long this_len;
152 166
@@ -158,8 +172,9 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
158 buf->offset = offset; 172 buf->offset = offset;
159 buf->len = this_len; 173 buf->len = this_len;
160 buf->ops = &page_cache_pipe_buf_ops; 174 buf->ops = &page_cache_pipe_buf_ops;
161 info->nrbufs = ++bufs; 175 pipe->nrbufs++;
162 do_wakeup = 1; 176 if (pipe->inode)
177 do_wakeup = 1;
163 178
164 ret += this_len; 179 ret += this_len;
165 len -= this_len; 180 len -= this_len;
@@ -168,7 +183,7 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
168 break; 183 break;
169 if (!len) 184 if (!len)
170 break; 185 break;
171 if (bufs < PIPE_BUFFERS) 186 if (pipe->nrbufs < PIPE_BUFFERS)
172 continue; 187 continue;
173 188
174 break; 189 break;
@@ -187,22 +202,26 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
187 } 202 }
188 203
189 if (do_wakeup) { 204 if (do_wakeup) {
190 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 205 smp_mb();
191 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, 206 if (waitqueue_active(&pipe->wait))
192 POLL_IN); 207 wake_up_interruptible_sync(&pipe->wait);
208 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
193 do_wakeup = 0; 209 do_wakeup = 0;
194 } 210 }
195 211
196 PIPE_WAITING_WRITERS(*inode)++; 212 pipe->waiting_writers++;
197 pipe_wait(inode); 213 pipe_wait(pipe);
198 PIPE_WAITING_WRITERS(*inode)--; 214 pipe->waiting_writers--;
199 } 215 }
200 216
201 mutex_unlock(PIPE_MUTEX(*inode)); 217 if (pipe->inode)
218 mutex_unlock(&pipe->inode->i_mutex);
202 219
203 if (do_wakeup) { 220 if (do_wakeup) {
204 wake_up_interruptible(PIPE_WAIT(*inode)); 221 smp_mb();
205 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 222 if (waitqueue_active(&pipe->wait))
223 wake_up_interruptible(&pipe->wait);
224 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
206 } 225 }
207 226
208 while (i < nr_pages) 227 while (i < nr_pages)
@@ -211,15 +230,16 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
211 return ret; 230 return ret;
212} 231}
213 232
214static int __generic_file_splice_read(struct file *in, struct inode *pipe, 233static int
215 size_t len, unsigned int flags) 234__generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
235 size_t len, unsigned int flags)
216{ 236{
217 struct address_space *mapping = in->f_mapping; 237 struct address_space *mapping = in->f_mapping;
218 unsigned int offset, nr_pages; 238 unsigned int offset, nr_pages;
219 struct page *pages[PIPE_BUFFERS], *shadow[PIPE_BUFFERS]; 239 struct page *pages[PIPE_BUFFERS];
220 struct page *page; 240 struct page *page;
221 pgoff_t index, pidx; 241 pgoff_t index;
222 int i, j; 242 int i, error;
223 243
224 index = in->f_pos >> PAGE_CACHE_SHIFT; 244 index = in->f_pos >> PAGE_CACHE_SHIFT;
225 offset = in->f_pos & ~PAGE_CACHE_MASK; 245 offset = in->f_pos & ~PAGE_CACHE_MASK;
@@ -229,78 +249,94 @@ static int __generic_file_splice_read(struct file *in, struct inode *pipe,
229 nr_pages = PIPE_BUFFERS; 249 nr_pages = PIPE_BUFFERS;
230 250
231 /* 251 /*
232 * initiate read-ahead on this page range 252 * Initiate read-ahead on this page range. however, don't call into
233 */ 253 * read-ahead if this is a non-zero offset (we are likely doing small
234 do_page_cache_readahead(mapping, in, index, nr_pages); 254 * chunk splice and the page is already there) for a single page.
235
236 /*
237 * Get as many pages from the page cache as possible..
238 * Start IO on the page cache entries we create (we
239 * can assume that any pre-existing ones we find have
240 * already had IO started on them).
241 */ 255 */
242 i = find_get_pages(mapping, index, nr_pages, pages); 256 if (!offset || nr_pages > 1)
257 do_page_cache_readahead(mapping, in, index, nr_pages);
243 258
244 /* 259 /*
245 * common case - we found all pages and they are contiguous, 260 * Now fill in the holes:
246 * kick them off
247 */ 261 */
248 if (i && (pages[i - 1]->index == index + i - 1)) 262 error = 0;
249 goto splice_them; 263 for (i = 0; i < nr_pages; i++, index++) {
264find_page:
265 /*
266 * lookup the page for this index
267 */
268 page = find_get_page(mapping, index);
269 if (!page) {
270 /*
271 * If in nonblock mode then dont block on
272 * readpage (we've kicked readahead so there
273 * will be asynchronous progress):
274 */
275 if (flags & SPLICE_F_NONBLOCK)
276 break;
250 277
251 /* 278 /*
252 * fill shadow[] with pages at the right locations, so we only 279 * page didn't exist, allocate one
253 * have to fill holes 280 */
254 */ 281 page = page_cache_alloc_cold(mapping);
255 memset(shadow, 0, nr_pages * sizeof(struct page *)); 282 if (!page)
256 for (j = 0; j < i; j++) 283 break;
257 shadow[pages[j]->index - index] = pages[j];
258 284
259 /* 285 error = add_to_page_cache_lru(page, mapping, index,
260 * now fill in the holes 286 mapping_gfp_mask(mapping));
261 */ 287 if (unlikely(error)) {
262 for (i = 0, pidx = index; i < nr_pages; pidx++, i++) { 288 page_cache_release(page);
263 int error; 289 break;
290 }
264 291
265 if (shadow[i]) 292 goto readpage;
266 continue; 293 }
267 294
268 /* 295 /*
269 * no page there, look one up / create it 296 * If the page isn't uptodate, we may need to start io on it
270 */ 297 */
271 page = find_or_create_page(mapping, pidx, 298 if (!PageUptodate(page)) {
272 mapping_gfp_mask(mapping)); 299 lock_page(page);
273 if (!page) 300
274 break; 301 /*
302 * page was truncated, stop here. if this isn't the
303 * first page, we'll just complete what we already
304 * added
305 */
306 if (!page->mapping) {
307 unlock_page(page);
308 page_cache_release(page);
309 break;
310 }
311 /*
312 * page was already under io and is now done, great
313 */
314 if (PageUptodate(page)) {
315 unlock_page(page);
316 goto fill_it;
317 }
275 318
276 if (PageUptodate(page)) 319readpage:
277 unlock_page(page); 320 /*
278 else { 321 * need to read in the page
322 */
279 error = mapping->a_ops->readpage(in, page); 323 error = mapping->a_ops->readpage(in, page);
280 324
281 if (unlikely(error)) { 325 if (unlikely(error)) {
282 page_cache_release(page); 326 page_cache_release(page);
327 if (error == AOP_TRUNCATED_PAGE)
328 goto find_page;
283 break; 329 break;
284 } 330 }
285 } 331 }
286 shadow[i] = page; 332fill_it:
333 pages[i] = page;
287 } 334 }
288 335
289 if (!i) { 336 if (i)
290 for (i = 0; i < nr_pages; i++) { 337 return move_to_pipe(pipe, pages, i, offset, len, flags);
291 if (shadow[i])
292 page_cache_release(shadow[i]);
293 }
294 return 0;
295 }
296 338
297 memcpy(pages, shadow, i * sizeof(struct page *)); 339 return error;
298
299 /*
300 * Now we splice them into the pipe..
301 */
302splice_them:
303 return move_to_pipe(pipe, pages, i, offset, len, flags);
304} 340}
305 341
306/** 342/**
@@ -311,9 +347,8 @@ splice_them:
311 * @flags: splice modifier flags 347 * @flags: splice modifier flags
312 * 348 *
313 * Will read pages from given file and fill them into a pipe. 349 * Will read pages from given file and fill them into a pipe.
314 *
315 */ 350 */
316ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, 351ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
317 size_t len, unsigned int flags) 352 size_t len, unsigned int flags)
318{ 353{
319 ssize_t spliced; 354 ssize_t spliced;
@@ -321,6 +356,7 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
321 356
322 ret = 0; 357 ret = 0;
323 spliced = 0; 358 spliced = 0;
359
324 while (len) { 360 while (len) {
325 ret = __generic_file_splice_read(in, pipe, len, flags); 361 ret = __generic_file_splice_read(in, pipe, len, flags);
326 362
@@ -360,10 +396,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
360 int more; 396 int more;
361 397
362 /* 398 /*
363 * sub-optimal, but we are limited by the pipe ->map. we don't 399 * Sub-optimal, but we are limited by the pipe ->map. We don't
364 * need a kmap'ed buffer here, we just want to make sure we 400 * need a kmap'ed buffer here, we just want to make sure we
365 * have the page pinned if the pipe page originates from the 401 * have the page pinned if the pipe page originates from the
366 * page cache 402 * page cache.
367 */ 403 */
368 ptr = buf->ops->map(file, info, buf); 404 ptr = buf->ops->map(file, info, buf);
369 if (IS_ERR(ptr)) 405 if (IS_ERR(ptr))
@@ -414,7 +450,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
414 int ret; 450 int ret;
415 451
416 /* 452 /*
417 * after this, page will be locked and unmapped 453 * make sure the data in this buffer is uptodate
418 */ 454 */
419 src = buf->ops->map(file, info, buf); 455 src = buf->ops->map(file, info, buf);
420 if (IS_ERR(src)) 456 if (IS_ERR(src))
@@ -424,7 +460,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
424 offset = sd->pos & ~PAGE_CACHE_MASK; 460 offset = sd->pos & ~PAGE_CACHE_MASK;
425 461
426 /* 462 /*
427 * reuse buf page, if SPLICE_F_MOVE is set 463 * Reuse buf page, if SPLICE_F_MOVE is set.
428 */ 464 */
429 if (sd->flags & SPLICE_F_MOVE) { 465 if (sd->flags & SPLICE_F_MOVE) {
430 /* 466 /*
@@ -434,6 +470,9 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
434 if (buf->ops->steal(info, buf)) 470 if (buf->ops->steal(info, buf))
435 goto find_page; 471 goto find_page;
436 472
473 /*
474 * this will also set the page locked
475 */
437 page = buf->page; 476 page = buf->page;
438 if (add_to_page_cache(page, mapping, index, gfp_mask)) 477 if (add_to_page_cache(page, mapping, index, gfp_mask))
439 goto find_page; 478 goto find_page;
@@ -445,7 +484,7 @@ find_page:
445 ret = -ENOMEM; 484 ret = -ENOMEM;
446 page = find_or_create_page(mapping, index, gfp_mask); 485 page = find_or_create_page(mapping, index, gfp_mask);
447 if (!page) 486 if (!page)
448 goto out; 487 goto out_nomem;
449 488
450 /* 489 /*
451 * If the page is uptodate, it is also locked. If it isn't 490 * If the page is uptodate, it is also locked. If it isn't
@@ -462,7 +501,7 @@ find_page:
462 501
463 if (!PageUptodate(page)) { 502 if (!PageUptodate(page)) {
464 /* 503 /*
465 * page got invalidated, repeat 504 * Page got invalidated, repeat.
466 */ 505 */
467 if (!page->mapping) { 506 if (!page->mapping) {
468 unlock_page(page); 507 unlock_page(page);
@@ -501,12 +540,14 @@ find_page:
501 } else if (ret) 540 } else if (ret)
502 goto out; 541 goto out;
503 542
543 mark_page_accessed(page);
504 balance_dirty_pages_ratelimited(mapping); 544 balance_dirty_pages_ratelimited(mapping);
505out: 545out:
506 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 546 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
507 page_cache_release(page); 547 page_cache_release(page);
508 unlock_page(page); 548 unlock_page(page);
509 } 549 }
550out_nomem:
510 buf->ops->unmap(info, buf); 551 buf->ops->unmap(info, buf);
511 return ret; 552 return ret;
512} 553}
@@ -519,11 +560,10 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
519 * key here is the 'actor' worker passed in that actually moves the data 560 * key here is the 'actor' worker passed in that actually moves the data
520 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 561 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
521 */ 562 */
522static ssize_t move_from_pipe(struct inode *inode, struct file *out, 563static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
523 size_t len, unsigned int flags, 564 size_t len, unsigned int flags,
524 splice_actor *actor) 565 splice_actor *actor)
525{ 566{
526 struct pipe_inode_info *info;
527 int ret, do_wakeup, err; 567 int ret, do_wakeup, err;
528 struct splice_desc sd; 568 struct splice_desc sd;
529 569
@@ -535,22 +575,19 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
535 sd.file = out; 575 sd.file = out;
536 sd.pos = out->f_pos; 576 sd.pos = out->f_pos;
537 577
538 mutex_lock(PIPE_MUTEX(*inode)); 578 if (pipe->inode)
579 mutex_lock(&pipe->inode->i_mutex);
539 580
540 info = inode->i_pipe;
541 for (;;) { 581 for (;;) {
542 int bufs = info->nrbufs; 582 if (pipe->nrbufs) {
543 583 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
544 if (bufs) {
545 int curbuf = info->curbuf;
546 struct pipe_buffer *buf = info->bufs + curbuf;
547 struct pipe_buf_operations *ops = buf->ops; 584 struct pipe_buf_operations *ops = buf->ops;
548 585
549 sd.len = buf->len; 586 sd.len = buf->len;
550 if (sd.len > sd.total_len) 587 if (sd.len > sd.total_len)
551 sd.len = sd.total_len; 588 sd.len = sd.total_len;
552 589
553 err = actor(info, buf, &sd); 590 err = actor(pipe, buf, &sd);
554 if (err) { 591 if (err) {
555 if (!ret && err != -ENODATA) 592 if (!ret && err != -ENODATA)
556 ret = err; 593 ret = err;
@@ -561,13 +598,14 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
561 ret += sd.len; 598 ret += sd.len;
562 buf->offset += sd.len; 599 buf->offset += sd.len;
563 buf->len -= sd.len; 600 buf->len -= sd.len;
601
564 if (!buf->len) { 602 if (!buf->len) {
565 buf->ops = NULL; 603 buf->ops = NULL;
566 ops->release(info, buf); 604 ops->release(pipe, buf);
567 curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1); 605 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
568 info->curbuf = curbuf; 606 pipe->nrbufs--;
569 info->nrbufs = --bufs; 607 if (pipe->inode)
570 do_wakeup = 1; 608 do_wakeup = 1;
571 } 609 }
572 610
573 sd.pos += sd.len; 611 sd.pos += sd.len;
@@ -576,11 +614,11 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
576 break; 614 break;
577 } 615 }
578 616
579 if (bufs) 617 if (pipe->nrbufs)
580 continue; 618 continue;
581 if (!PIPE_WRITERS(*inode)) 619 if (!pipe->writers)
582 break; 620 break;
583 if (!PIPE_WAITING_WRITERS(*inode)) { 621 if (!pipe->waiting_writers) {
584 if (ret) 622 if (ret)
585 break; 623 break;
586 } 624 }
@@ -598,31 +636,34 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
598 } 636 }
599 637
600 if (do_wakeup) { 638 if (do_wakeup) {
601 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 639 smp_mb();
602 kill_fasync(PIPE_FASYNC_WRITERS(*inode),SIGIO,POLL_OUT); 640 if (waitqueue_active(&pipe->wait))
641 wake_up_interruptible_sync(&pipe->wait);
642 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
603 do_wakeup = 0; 643 do_wakeup = 0;
604 } 644 }
605 645
606 pipe_wait(inode); 646 pipe_wait(pipe);
607 } 647 }
608 648
609 mutex_unlock(PIPE_MUTEX(*inode)); 649 if (pipe->inode)
650 mutex_unlock(&pipe->inode->i_mutex);
610 651
611 if (do_wakeup) { 652 if (do_wakeup) {
612 wake_up_interruptible(PIPE_WAIT(*inode)); 653 smp_mb();
613 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 654 if (waitqueue_active(&pipe->wait))
655 wake_up_interruptible(&pipe->wait);
656 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
614 } 657 }
615 658
616 mutex_lock(&out->f_mapping->host->i_mutex);
617 out->f_pos = sd.pos; 659 out->f_pos = sd.pos;
618 mutex_unlock(&out->f_mapping->host->i_mutex);
619 return ret; 660 return ret;
620 661
621} 662}
622 663
623/** 664/**
624 * generic_file_splice_write - splice data from a pipe to a file 665 * generic_file_splice_write - splice data from a pipe to a file
625 * @inode: pipe inode 666 * @pipe: pipe info
626 * @out: file to write to 667 * @out: file to write to
627 * @len: number of bytes to splice 668 * @len: number of bytes to splice
628 * @flags: splice modifier flags 669 * @flags: splice modifier flags
@@ -631,14 +672,17 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
631 * the given pipe inode to the given file. 672 * the given pipe inode to the given file.
632 * 673 *
633 */ 674 */
634ssize_t generic_file_splice_write(struct inode *inode, struct file *out, 675ssize_t
635 size_t len, unsigned int flags) 676generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
677 size_t len, unsigned int flags)
636{ 678{
637 struct address_space *mapping = out->f_mapping; 679 struct address_space *mapping = out->f_mapping;
638 ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file); 680 ssize_t ret;
681
682 ret = move_from_pipe(pipe, out, len, flags, pipe_to_file);
639 683
640 /* 684 /*
641 * if file or inode is SYNC and we actually wrote some data, sync it 685 * If file or inode is SYNC and we actually wrote some data, sync it.
642 */ 686 */
643 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) 687 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
644 && ret > 0) { 688 && ret > 0) {
@@ -647,7 +691,7 @@ ssize_t generic_file_splice_write(struct inode *inode, struct file *out,
647 691
648 mutex_lock(&inode->i_mutex); 692 mutex_lock(&inode->i_mutex);
649 err = generic_osync_inode(mapping->host, mapping, 693 err = generic_osync_inode(mapping->host, mapping,
650 OSYNC_METADATA|OSYNC_DATA); 694 OSYNC_METADATA|OSYNC_DATA);
651 mutex_unlock(&inode->i_mutex); 695 mutex_unlock(&inode->i_mutex);
652 696
653 if (err) 697 if (err)
@@ -670,10 +714,10 @@ EXPORT_SYMBOL(generic_file_splice_write);
670 * is involved. 714 * is involved.
671 * 715 *
672 */ 716 */
673ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, 717ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
674 size_t len, unsigned int flags) 718 size_t len, unsigned int flags)
675{ 719{
676 return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); 720 return move_from_pipe(pipe, out, len, flags, pipe_to_sendpage);
677} 721}
678 722
679EXPORT_SYMBOL(generic_splice_sendpage); 723EXPORT_SYMBOL(generic_splice_sendpage);
@@ -681,19 +725,20 @@ EXPORT_SYMBOL(generic_splice_sendpage);
681/* 725/*
682 * Attempt to initiate a splice from pipe to file. 726 * Attempt to initiate a splice from pipe to file.
683 */ 727 */
684static long do_splice_from(struct inode *pipe, struct file *out, size_t len, 728static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
685 unsigned int flags) 729 size_t len, unsigned int flags)
686{ 730{
687 loff_t pos; 731 loff_t pos;
688 int ret; 732 int ret;
689 733
690 if (!out->f_op || !out->f_op->splice_write) 734 if (unlikely(!out->f_op || !out->f_op->splice_write))
691 return -EINVAL; 735 return -EINVAL;
692 736
693 if (!(out->f_mode & FMODE_WRITE)) 737 if (unlikely(!(out->f_mode & FMODE_WRITE)))
694 return -EBADF; 738 return -EBADF;
695 739
696 pos = out->f_pos; 740 pos = out->f_pos;
741
697 ret = rw_verify_area(WRITE, out, &pos, len); 742 ret = rw_verify_area(WRITE, out, &pos, len);
698 if (unlikely(ret < 0)) 743 if (unlikely(ret < 0))
699 return ret; 744 return ret;
@@ -704,19 +749,20 @@ static long do_splice_from(struct inode *pipe, struct file *out, size_t len,
704/* 749/*
705 * Attempt to initiate a splice from a file to a pipe. 750 * Attempt to initiate a splice from a file to a pipe.
706 */ 751 */
707static long do_splice_to(struct file *in, struct inode *pipe, size_t len, 752static long do_splice_to(struct file *in, struct pipe_inode_info *pipe,
708 unsigned int flags) 753 size_t len, unsigned int flags)
709{ 754{
710 loff_t pos, isize, left; 755 loff_t pos, isize, left;
711 int ret; 756 int ret;
712 757
713 if (!in->f_op || !in->f_op->splice_read) 758 if (unlikely(!in->f_op || !in->f_op->splice_read))
714 return -EINVAL; 759 return -EINVAL;
715 760
716 if (!(in->f_mode & FMODE_READ)) 761 if (unlikely(!(in->f_mode & FMODE_READ)))
717 return -EBADF; 762 return -EBADF;
718 763
719 pos = in->f_pos; 764 pos = in->f_pos;
765
720 ret = rw_verify_area(READ, in, &pos, len); 766 ret = rw_verify_area(READ, in, &pos, len);
721 if (unlikely(ret < 0)) 767 if (unlikely(ret < 0))
722 return ret; 768 return ret;
@@ -726,32 +772,168 @@ static long do_splice_to(struct file *in, struct inode *pipe, size_t len,
726 return 0; 772 return 0;
727 773
728 left = isize - in->f_pos; 774 left = isize - in->f_pos;
729 if (left < len) 775 if (unlikely(left < len))
730 len = left; 776 len = left;
731 777
732 return in->f_op->splice_read(in, pipe, len, flags); 778 return in->f_op->splice_read(in, pipe, len, flags);
733} 779}
734 780
781long do_splice_direct(struct file *in, struct file *out, size_t len,
782 unsigned int flags)
783{
784 struct pipe_inode_info *pipe;
785 long ret, bytes;
786 umode_t i_mode;
787 int i;
788
789 /*
790 * We require the input being a regular file, as we don't want to
791 * randomly drop data for eg socket -> socket splicing. Use the
792 * piped splicing for that!
793 */
794 i_mode = in->f_dentry->d_inode->i_mode;
795 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
796 return -EINVAL;
797
798 /*
799 * neither in nor out is a pipe, setup an internal pipe attached to
800 * 'out' and transfer the wanted data from 'in' to 'out' through that
801 */
802 pipe = current->splice_pipe;
803 if (unlikely(!pipe)) {
804 pipe = alloc_pipe_info(NULL);
805 if (!pipe)
806 return -ENOMEM;
807
808 /*
809 * We don't have an immediate reader, but we'll read the stuff
810 * out of the pipe right after the move_to_pipe(). So set
811 * PIPE_READERS appropriately.
812 */
813 pipe->readers = 1;
814
815 current->splice_pipe = pipe;
816 }
817
818 /*
819 * Do the splice.
820 */
821 ret = 0;
822 bytes = 0;
823
824 while (len) {
825 size_t read_len, max_read_len;
826
827 /*
828 * Do at most PIPE_BUFFERS pages worth of transfer:
829 */
830 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
831
832 ret = do_splice_to(in, pipe, max_read_len, flags);
833 if (unlikely(ret < 0))
834 goto out_release;
835
836 read_len = ret;
837
838 /*
839 * NOTE: nonblocking mode only applies to the input. We
840 * must not do the output in nonblocking mode as then we
841 * could get stuck data in the internal pipe:
842 */
843 ret = do_splice_from(pipe, out, read_len,
844 flags & ~SPLICE_F_NONBLOCK);
845 if (unlikely(ret < 0))
846 goto out_release;
847
848 bytes += ret;
849 len -= ret;
850
851 /*
852 * In nonblocking mode, if we got back a short read then
853 * that was due to either an IO error or due to the
854 * pagecache entry not being there. In the IO error case
855 * the _next_ splice attempt will produce a clean IO error
856 * return value (not a short read), so in both cases it's
857 * correct to break out of the loop here:
858 */
859 if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
860 break;
861 }
862
863 pipe->nrbufs = pipe->curbuf = 0;
864
865 return bytes;
866
867out_release:
868 /*
869 * If we did an incomplete transfer we must release
870 * the pipe buffers in question:
871 */
872 for (i = 0; i < PIPE_BUFFERS; i++) {
873 struct pipe_buffer *buf = pipe->bufs + i;
874
875 if (buf->ops) {
876 buf->ops->release(pipe, buf);
877 buf->ops = NULL;
878 }
879 }
880 pipe->nrbufs = pipe->curbuf = 0;
881
882 /*
883 * If we transferred some data, return the number of bytes:
884 */
885 if (bytes > 0)
886 return bytes;
887
888 return ret;
889}
890
891EXPORT_SYMBOL(do_splice_direct);
892
735/* 893/*
736 * Determine where to splice to/from. 894 * Determine where to splice to/from.
737 */ 895 */
738static long do_splice(struct file *in, struct file *out, size_t len, 896static long do_splice(struct file *in, loff_t __user *off_in,
739 unsigned int flags) 897 struct file *out, loff_t __user *off_out,
898 size_t len, unsigned int flags)
740{ 899{
741 struct inode *pipe; 900 struct pipe_inode_info *pipe;
901
902 pipe = in->f_dentry->d_inode->i_pipe;
903 if (pipe) {
904 if (off_in)
905 return -ESPIPE;
906 if (off_out) {
907 if (out->f_op->llseek == no_llseek)
908 return -EINVAL;
909 if (copy_from_user(&out->f_pos, off_out,
910 sizeof(loff_t)))
911 return -EFAULT;
912 }
742 913
743 pipe = in->f_dentry->d_inode;
744 if (pipe->i_pipe)
745 return do_splice_from(pipe, out, len, flags); 914 return do_splice_from(pipe, out, len, flags);
915 }
916
917 pipe = out->f_dentry->d_inode->i_pipe;
918 if (pipe) {
919 if (off_out)
920 return -ESPIPE;
921 if (off_in) {
922 if (in->f_op->llseek == no_llseek)
923 return -EINVAL;
924 if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t)))
925 return -EFAULT;
926 }
746 927
747 pipe = out->f_dentry->d_inode;
748 if (pipe->i_pipe)
749 return do_splice_to(in, pipe, len, flags); 928 return do_splice_to(in, pipe, len, flags);
929 }
750 930
751 return -EINVAL; 931 return -EINVAL;
752} 932}
753 933
754asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags) 934asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
935 int fd_out, loff_t __user *off_out,
936 size_t len, unsigned int flags)
755{ 937{
756 long error; 938 long error;
757 struct file *in, *out; 939 struct file *in, *out;
@@ -761,13 +943,15 @@ asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags)
761 return 0; 943 return 0;
762 944
763 error = -EBADF; 945 error = -EBADF;
764 in = fget_light(fdin, &fput_in); 946 in = fget_light(fd_in, &fput_in);
765 if (in) { 947 if (in) {
766 if (in->f_mode & FMODE_READ) { 948 if (in->f_mode & FMODE_READ) {
767 out = fget_light(fdout, &fput_out); 949 out = fget_light(fd_out, &fput_out);
768 if (out) { 950 if (out) {
769 if (out->f_mode & FMODE_WRITE) 951 if (out->f_mode & FMODE_WRITE)
770 error = do_splice(in, out, len, flags); 952 error = do_splice(in, off_in,
953 out, off_out,
954 len, flags);
771 fput_light(out, fput_out); 955 fput_light(out, fput_out);
772 } 956 }
773 } 957 }
diff --git a/fs/sync.c b/fs/sync.c
index 8616006d2094..aab5ffe77e9f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -61,7 +61,7 @@
61 * will be available after a crash. 61 * will be available after a crash.
62 */ 62 */
63asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, 63asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
64 int flags) 64 unsigned int flags)
65{ 65{
66 int ret; 66 int ret;
67 struct file *file; 67 struct file *file;
@@ -126,7 +126,7 @@ out:
126 * `endbyte' is inclusive 126 * `endbyte' is inclusive
127 */ 127 */
128int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte, 128int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte,
129 int flags) 129 unsigned int flags)
130{ 130{
131 int ret; 131 int ret;
132 struct address_space *mapping; 132 struct address_space *mapping;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 6cbbd165c60d..4d191ef39b67 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -870,12 +870,14 @@ xfs_page_state_convert(
870 pgoff_t end_index, last_index, tlast; 870 pgoff_t end_index, last_index, tlast;
871 ssize_t size, len; 871 ssize_t size, len;
872 int flags, err, iomap_valid = 0, uptodate = 1; 872 int flags, err, iomap_valid = 0, uptodate = 1;
873 int page_dirty, count = 0, trylock_flag = 0; 873 int page_dirty, count = 0;
874 int trylock = 0;
874 int all_bh = unmapped; 875 int all_bh = unmapped;
875 876
876 /* wait for other IO threads? */ 877 if (startio) {
877 if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)) 878 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
878 trylock_flag |= BMAPI_TRYLOCK; 879 trylock |= BMAPI_TRYLOCK;
880 }
879 881
880 /* Is this page beyond the end of the file? */ 882 /* Is this page beyond the end of the file? */
881 offset = i_size_read(inode); 883 offset = i_size_read(inode);
@@ -956,15 +958,13 @@ xfs_page_state_convert(
956 958
957 if (buffer_unwritten(bh)) { 959 if (buffer_unwritten(bh)) {
958 type = IOMAP_UNWRITTEN; 960 type = IOMAP_UNWRITTEN;
959 flags = BMAPI_WRITE|BMAPI_IGNSTATE; 961 flags = BMAPI_WRITE | BMAPI_IGNSTATE;
960 } else if (buffer_delay(bh)) { 962 } else if (buffer_delay(bh)) {
961 type = IOMAP_DELAY; 963 type = IOMAP_DELAY;
962 flags = BMAPI_ALLOCATE; 964 flags = BMAPI_ALLOCATE | trylock;
963 if (!startio)
964 flags |= trylock_flag;
965 } else { 965 } else {
966 type = IOMAP_NEW; 966 type = IOMAP_NEW;
967 flags = BMAPI_WRITE|BMAPI_MMAP; 967 flags = BMAPI_WRITE | BMAPI_MMAP;
968 } 968 }
969 969
970 if (!iomap_valid) { 970 if (!iomap_valid) {
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9fb0312665ca..26fed0756f01 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -182,7 +182,7 @@ free_address(
182{ 182{
183 a_list_t *aentry; 183 a_list_t *aentry;
184 184
185 aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH); 185 aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
186 if (likely(aentry)) { 186 if (likely(aentry)) {
187 spin_lock(&as_lock); 187 spin_lock(&as_lock);
188 aentry->next = as_free_head; 188 aentry->next = as_free_head;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ae4c4754ed31..269721af02f3 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -252,7 +252,7 @@ xfs_file_sendfile_invis(
252STATIC ssize_t 252STATIC ssize_t
253xfs_file_splice_read( 253xfs_file_splice_read(
254 struct file *infilp, 254 struct file *infilp,
255 struct inode *pipe, 255 struct pipe_inode_info *pipe,
256 size_t len, 256 size_t len,
257 unsigned int flags) 257 unsigned int flags)
258{ 258{
@@ -266,7 +266,7 @@ xfs_file_splice_read(
266STATIC ssize_t 266STATIC ssize_t
267xfs_file_splice_read_invis( 267xfs_file_splice_read_invis(
268 struct file *infilp, 268 struct file *infilp,
269 struct inode *pipe, 269 struct pipe_inode_info *pipe,
270 size_t len, 270 size_t len,
271 unsigned int flags) 271 unsigned int flags)
272{ 272{
@@ -279,7 +279,7 @@ xfs_file_splice_read_invis(
279 279
280STATIC ssize_t 280STATIC ssize_t
281xfs_file_splice_write( 281xfs_file_splice_write(
282 struct inode *pipe, 282 struct pipe_inode_info *pipe,
283 struct file *outfilp, 283 struct file *outfilp,
284 size_t len, 284 size_t len,
285 unsigned int flags) 285 unsigned int flags)
@@ -293,7 +293,7 @@ xfs_file_splice_write(
293 293
294STATIC ssize_t 294STATIC ssize_t
295xfs_file_splice_write_invis( 295xfs_file_splice_write_invis(
296 struct inode *pipe, 296 struct pipe_inode_info *pipe,
297 struct file *outfilp, 297 struct file *outfilp,
298 size_t len, 298 size_t len,
299 unsigned int flags) 299 unsigned int flags)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 149237304fb6..2e2e275c786f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -673,8 +673,7 @@ xfs_vn_setattr(
673 if (ia_valid & ATTR_ATIME) { 673 if (ia_valid & ATTR_ATIME) {
674 vattr.va_mask |= XFS_AT_ATIME; 674 vattr.va_mask |= XFS_AT_ATIME;
675 vattr.va_atime = attr->ia_atime; 675 vattr.va_atime = attr->ia_atime;
676 if (ia_valid & ATTR_ATIME_SET) 676 inode->i_atime = attr->ia_atime;
677 inode->i_atime = attr->ia_atime;
678 } 677 }
679 if (ia_valid & ATTR_MTIME) { 678 if (ia_valid & ATTR_MTIME) {
680 vattr.va_mask |= XFS_AT_MTIME; 679 vattr.va_mask |= XFS_AT_MTIME;
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 90cd314acbaa..74a52937f208 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -338,7 +338,7 @@ ssize_t
338xfs_splice_read( 338xfs_splice_read(
339 bhv_desc_t *bdp, 339 bhv_desc_t *bdp,
340 struct file *infilp, 340 struct file *infilp,
341 struct inode *pipe, 341 struct pipe_inode_info *pipe,
342 size_t count, 342 size_t count,
343 int flags, 343 int flags,
344 int ioflags, 344 int ioflags,
@@ -380,7 +380,7 @@ xfs_splice_read(
380ssize_t 380ssize_t
381xfs_splice_write( 381xfs_splice_write(
382 bhv_desc_t *bdp, 382 bhv_desc_t *bdp,
383 struct inode *pipe, 383 struct pipe_inode_info *pipe,
384 struct file *outfilp, 384 struct file *outfilp,
385 size_t count, 385 size_t count,
386 int flags, 386 int flags,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index eaa5659713fb..55c689a86ad2 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -94,9 +94,9 @@ extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
94 loff_t *, int, size_t, read_actor_t, 94 loff_t *, int, size_t, read_actor_t,
95 void *, struct cred *); 95 void *, struct cred *);
96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, 96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *,
97 struct inode *, size_t, int, int, 97 struct pipe_inode_info *, size_t, int, int,
98 struct cred *); 98 struct cred *);
99extern ssize_t xfs_splice_write(struct bhv_desc *, struct inode *, 99extern ssize_t xfs_splice_write(struct bhv_desc *, struct pipe_inode_info *,
100 struct file *, size_t, int, int, 100 struct file *, size_t, int, int,
101 struct cred *); 101 struct cred *);
102 102
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 6f1c79a28f8b..88b09f186289 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -174,9 +174,9 @@ typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
174 loff_t *, int, size_t, read_actor_t, 174 loff_t *, int, size_t, read_actor_t,
175 void *, struct cred *); 175 void *, struct cred *);
176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, 176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *,
177 struct inode *, size_t, int, int, 177 struct pipe_inode_info *, size_t, int, int,
178 struct cred *); 178 struct cred *);
179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct inode *, 179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *,
180 struct file *, size_t, int, int, 180 struct file *, size_t, int, int,
181 struct cred *); 181 struct cred *);
182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, 182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 4eeb856183b1..deddbd03c166 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -158,9 +158,10 @@ xfs_ialloc_ag_alloc(
158 */ 158 */
159 agi = XFS_BUF_TO_AGI(agbp); 159 agi = XFS_BUF_TO_AGI(agbp);
160 newino = be32_to_cpu(agi->agi_newino); 160 newino = be32_to_cpu(agi->agi_newino);
161 if(likely(newino != NULLAGINO)) { 161 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
162 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 162 XFS_IALLOC_BLOCKS(args.mp);
163 XFS_IALLOC_BLOCKS(args.mp); 163 if (likely(newino != NULLAGINO &&
164 (args.agbno < be32_to_cpu(agi->agi_length)))) {
164 args.fsbno = XFS_AGB_TO_FSB(args.mp, 165 args.fsbno = XFS_AGB_TO_FSB(args.mp,
165 be32_to_cpu(agi->agi_seqno), args.agbno); 166 be32_to_cpu(agi->agi_seqno), args.agbno);
166 args.type = XFS_ALLOCTYPE_THIS_BNO; 167 args.type = XFS_ALLOCTYPE_THIS_BNO;
@@ -182,8 +183,8 @@ xfs_ialloc_ag_alloc(
182 * Set the alignment for the allocation. 183 * Set the alignment for the allocation.
183 * If stripe alignment is turned on then align at stripe unit 184 * If stripe alignment is turned on then align at stripe unit
184 * boundary. 185 * boundary.
185 * If the cluster size is smaller than a filesystem block 186 * If the cluster size is smaller than a filesystem block
186 * then we're doing I/O for inodes in filesystem block size 187 * then we're doing I/O for inodes in filesystem block size
187 * pieces, so don't need alignment anyway. 188 * pieces, so don't need alignment anyway.
188 */ 189 */
189 isaligned = 0; 190 isaligned = 0;
@@ -192,7 +193,7 @@ xfs_ialloc_ag_alloc(
192 args.alignment = args.mp->m_dalign; 193 args.alignment = args.mp->m_dalign;
193 isaligned = 1; 194 isaligned = 1;
194 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && 195 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
195 args.mp->m_sb.sb_inoalignmt >= 196 args.mp->m_sb.sb_inoalignmt >=
196 XFS_B_TO_FSBT(args.mp, 197 XFS_B_TO_FSBT(args.mp,
197 XFS_INODE_CLUSTER_SIZE(args.mp))) 198 XFS_INODE_CLUSTER_SIZE(args.mp)))
198 args.alignment = args.mp->m_sb.sb_inoalignmt; 199 args.alignment = args.mp->m_sb.sb_inoalignmt;
@@ -220,7 +221,7 @@ xfs_ialloc_ag_alloc(
220 if ((error = xfs_alloc_vextent(&args))) 221 if ((error = xfs_alloc_vextent(&args)))
221 return error; 222 return error;
222 } 223 }
223 224
224 /* 225 /*
225 * If stripe alignment is turned on, then try again with cluster 226 * If stripe alignment is turned on, then try again with cluster
226 * alignment. 227 * alignment.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index bb33113eef9f..b53854325266 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -421,7 +421,10 @@ finish_inode:
421 ip->i_chash = chlnew; 421 ip->i_chash = chlnew;
422 chlnew->chl_ip = ip; 422 chlnew->chl_ip = ip;
423 chlnew->chl_blkno = ip->i_blkno; 423 chlnew->chl_blkno = ip->i_blkno;
424 if (ch->ch_list)
425 ch->ch_list->chl_prev = chlnew;
424 chlnew->chl_next = ch->ch_list; 426 chlnew->chl_next = ch->ch_list;
427 chlnew->chl_prev = NULL;
425 ch->ch_list = chlnew; 428 ch->ch_list = chlnew;
426 chlnew = NULL; 429 chlnew = NULL;
427 } 430 }
@@ -723,23 +726,15 @@ xfs_iextract(
723 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip); 726 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
724 ASSERT(ip->i_chash != NULL); 727 ASSERT(ip->i_chash != NULL);
725 chm=NULL; 728 chm=NULL;
726 for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { 729 chl = ip->i_chash;
727 if (chl->chl_blkno == ip->i_blkno) { 730 if (chl->chl_prev)
728 if (chm == NULL) { 731 chl->chl_prev->chl_next = chl->chl_next;
729 /* first item on the list */ 732 else
730 ch->ch_list = chl->chl_next; 733 ch->ch_list = chl->chl_next;
731 } else { 734 if (chl->chl_next)
732 chm->chl_next = chl->chl_next; 735 chl->chl_next->chl_prev = chl->chl_prev;
733 } 736 kmem_zone_free(xfs_chashlist_zone, chl);
734 kmem_zone_free(xfs_chashlist_zone, chl); 737 } else {
735 break;
736 } else {
737 ASSERT(chl->chl_ip != ip);
738 chm = chl;
739 }
740 }
741 ASSERT_ALWAYS(chl != NULL);
742 } else {
743 /* delete one inode from a non-empty list */ 738 /* delete one inode from a non-empty list */
744 iq = ip->i_cnext; 739 iq = ip->i_cnext;
745 iq->i_cprev = ip->i_cprev; 740 iq->i_cprev = ip->i_cprev;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 48146bdc6bdd..94b60dd03801 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2732,16 +2732,29 @@ xfs_iunpin(
2732 ASSERT(atomic_read(&ip->i_pincount) > 0); 2732 ASSERT(atomic_read(&ip->i_pincount) > 0);
2733 2733
2734 if (atomic_dec_and_test(&ip->i_pincount)) { 2734 if (atomic_dec_and_test(&ip->i_pincount)) {
2735 vnode_t *vp = XFS_ITOV_NULL(ip); 2735 /*
2736 * If the inode is currently being reclaimed, the
2737 * linux inode _and_ the xfs vnode may have been
2738 * freed so we cannot reference either of them safely.
2739 * Hence we should not try to do anything to them
2740 * if the xfs inode is currently in the reclaim
2741 * path.
2742 *
2743 * However, we still need to issue the unpin wakeup
2744 * call as the inode reclaim may be blocked waiting for
2745 * the inode to become unpinned.
2746 */
2747 if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
2748 vnode_t *vp = XFS_ITOV_NULL(ip);
2736 2749
2737 /* make sync come back and flush this inode */ 2750 /* make sync come back and flush this inode */
2738 if (vp) { 2751 if (vp) {
2739 struct inode *inode = vn_to_inode(vp); 2752 struct inode *inode = vn_to_inode(vp);
2740 2753
2741 if (!(inode->i_state & I_NEW)) 2754 if (!(inode->i_state & I_NEW))
2742 mark_inode_dirty_sync(inode); 2755 mark_inode_dirty_sync(inode);
2756 }
2743 } 2757 }
2744
2745 wake_up(&ip->i_ipin_wait); 2758 wake_up(&ip->i_ipin_wait);
2746 } 2759 }
2747} 2760}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 39ef9c36ea55..3b544db1790b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -189,6 +189,7 @@ typedef struct xfs_ihash {
189 */ 189 */
190typedef struct xfs_chashlist { 190typedef struct xfs_chashlist {
191 struct xfs_chashlist *chl_next; 191 struct xfs_chashlist *chl_next;
192 struct xfs_chashlist *chl_prev;
192 struct xfs_inode *chl_ip; 193 struct xfs_inode *chl_ip;
193 xfs_daddr_t chl_blkno; /* starting block number of 194 xfs_daddr_t chl_blkno; /* starting block number of
194 * the cluster */ 195 * the cluster */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 049fabb7f7e0..c0b1c2906880 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -270,7 +270,7 @@ xfs_mount_validate_sb(
270 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 270 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
271 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 271 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
272 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 272 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
273 (sbp->sb_imax_pct > 100 || sbp->sb_imax_pct < 1))) { 273 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
274 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 274 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed");
275 return XFS_ERROR(EFSCORRUPTED); 275 return XFS_ERROR(EFSCORRUPTED);
276 } 276 }